1 Introduction DNA (deoxyribonucleic acid) that carries the genetic information
ID: 3811713 • Letter: 1
Question
1 Introduction DNA (deoxyribonucleic acid) that carries the genetic information used in the growth, development, is a molecule many viruses, and reproduction of all known living organisms and (T): Guanin (G) and has four building blocks called (A), as a code Cytosine All information in the DNA is stored made up of these four chemical bases. the Sub-strings of a DNA sequence hav length k are called k For Figure 1 k-mers of length 4 (k of the sequence shown in In this example w refers to step size with which the sliding window is moved. With w the window moves 1 letter to the right at each step, so that the first k-mer starts at position 1 and the second starts at position 2 and so on. The usage data over the "GATTACA" in this case is [(GA where each couple represents a k-mer (k 4) and its frequency of occurrence in this sequence. When w 2, the first k-mer starts at position 1, the second k-mer starts at position 3, the third position starts at 5 but since there are fewer than 4 nuclotides left, the count stops. The usage data for this case is IGATT,1), (TTAC,1) G A T T A C A G A T T T A C A C A Figure 1: Example of k-mers where k 4 and 1.Explanation / Answer
Answer::
1)
import java.util.*;
import java.io.*;
import java.text.*;
public class DNA
{
public static final int MNC = 5;
public static final double CG = 30.0;
public static final int UN = 4;
public static final int NPC = 3;
public static void main(String[] args) throws FileNotFoundException
{
String inputText = input();
String outputText = output();
Scanner DNA = new Scanner(new File(inputText));
PrintStream out = new PrintStream(new File(outputText));
while(DNA.hasNextLine())
{
name(DNA.nextLine(), out);
String sequenceDNA = sequence(DNA.nextLine(), out);
int[] nuc_Count = nuc_Count(sequenceDNA, out);
double[] percentages = masses(nuc_Count, out);
//and total mass. returns percentages of mass
String codonList = groupCodons(sequenceDNA, out);
proteinTest(codonList, percentages, out);
}
}
public static String input() throws FileNotFoundException
{
System.out.println("This program reports information about DNA");
System.out.println("nucleotides sequences that may encode proteins.");
System.out.print("Input file name? ");
Scanner input = new Scanner(System.in);
String text = (input.next());
return text;
}
public static String output()
{
System.out.print("Output file name? ");
Scanner output = new Scanner(System.in);
String text = (output.next());
return text;
}
public static void name(String name, PrintStream out)
{
out.println("Region Name: " + name);
}
public static String sequence(String sequenceDNA, PrintStream out)
{
sequenceDNA = sequenceDNA.toUpperCase();
out.println("Nucleotides: " + sequenceDNA);
return sequenceDNA;
}
public static int[] nuc_Count(String sequenceDNA, PrintStream out) throws FileNotFoundException
{
int[] counts = new int[UN + 1];
char[] codons = {'A', 'C', 'G', 'T', '-'};
for(int i = 0; i < sequenceDNA.length(); i++)
{
char c = sequenceDNA.charAt(i);
for(int j = 0; j < codons.length; j++)
{
if(c == codons[j]) {
counts[j]++;
}
}
}
int[] counts_Short = Arrays.copyOf(counts, 4);
out.println("Nuc. Counts: " + Arrays.toString(counts_Short));
return counts;
}
public static double[] masses(int[] nuc_Count, PrintStream out) throws FileNotFoundException
{
double[] masses_Constant = {135.128, 111.103, 151.128, 125.107, 100.000};
double[] masses_Nuc = new double[5];
double totalMass = 0;
for(int i = 0; i < 5; i++)
{
masses_Nuc[i] = nuc_Count[i] * masses_Constant[i];
totalMass += masses_Nuc[i];
}
double[] percentages = convert_Percentage(masses_Nuc, totalMass);
double[] percentages2 = Arrays.copyOf(percentages, 4);
out.print("Total Mass%: " + Arrays.toString(percentages2) + " of ");
out.printf("%.1f", totalMass);
out.println();
return percentages;
}
public static double[] convert_Percentage(double[] masses_Nuc, double totalMass) throws FileNotFoundException
{
double[] percentages = new double[5];
for(int i = 0; i <=4; i++)
{
percentages[i] = Math.round((masses_Nuc[i] / totalMass * 100) * 10.0) / 10.0;
}
return percentages;
}
public static String groupCodons(String sequenceDNA, PrintStream out) throws FileNotFoundException
{
String sequenceDNA2 = sequenceDNA.replace("-","");
int length = sequenceDNA2.length() / NPC;
String[] codons = new String[length];
int j = 1; //cumulative sum
for(int i = 0; i <= sequenceDNA2.length() - NPC; i = i + NPC)
{
String codon = sequenceDNA2.substring(i, NPC * j);
codons[j - 1] = codon;
j++;
}
String codonList = Arrays.toString(codons);
out.println("Codons List: " + codonList);
return codonList;
}
public static void proteinTest(String codonList, double[] percentages, PrintStream out)
{
if (startTest(codonList) && stopTest(codonList) && mNCTest(codonList) && percentageTest(percentages))
{
out.println("Is Protein?: YES");
}
else
{
out.println("Is Protein?: NO");
}
out.println();
}
public static boolean startTest(String codonList)
{
return(codonList.substring(1, 4).equals("ATG"));
}
public static boolean stopTest(String codonList) {
String EndC = codonList.substring(codonList.length() - 4, codonList.length() - 1);
return (EndC.equals("TAA") || EndC.equals("TAG") || EndC.equals("TGA"));
}
public static boolean mNCTest(String codonList)
{
return(codonList.charAt(19) == ',');
}
public static boolean percentageTest(double[] percentages)
{
return(percentages[1] + percentages[2] >= 30.0);
}
}
2)
import java.io.*;
import java.util.*;
public class test
{
public static void main(String[] args) throws IOException
{
String fileName = "";
Scanner input = new ScannerSystem.in);
System.out.print ("Enter the name of the sequence file: ");
fileName = input.nextLine();
int count = 0;
BufferedReader bf = null;
try {
bf = new BufferedReader(new FileReader(fileName));
String line;
while ((line = bf.readLine()) != null)
{
if (line.matches("^>.*"))count++;
}
}
catch (FileNotFoundException e)
{
System.out.println( + fileName + " does not exist");
}
finally
{
if (bf != null)
{
bf.close();
}
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.