in C language. Genes are substrings of DNA which code for proteins and carry the

ID: 3731363 • Letter: I

Question

in C language.

Genes are substrings of DNA which code for proteins and carry the heritable information from our parents. Genes start with the sequence of three letters ATG, called the start codon, and end with one of the three sequences TCA, TAA, or TAG, called stop codons. The stretch of sequence between the start codon and any of the stop codons is a potential gene. Each codon codes for an amino acid represented by a letter of the alphabet. There is a total of 19 amino acids. Strung together, amino acids from proteins. A substring of a DNA sequence is a translatable sequence if: it has a length that is multiple of three, it starts with a start codon and ends with a stop codon it can be translated into an amino acid sequence For example, DNA sequence AATTAAGATGCCCCTCTAAAAT contains such a translatable sequence, starting at the 8th position and of 1ength 12 (ATGGGGCTCTAA), thus consisting of 4 codons. This sequence can be translated using a codon table into the ength three amino acid sequence MGL. Note that the start codon codes for amino acid M while the stop codons don t code for any amino acids le the stop codons don' code On the other hand, DNA sequence AATCAATCTACT is not a translatable sequence. Write program dna_translate. c that takes two command line arguments: an input file name, containing DNA sequences, and an output file name, in which you will store the translated, protein sequences. For each sequence, the program should identify the longest possible translatable sub-sequence, if one exists, and translate it into a protein using a codon table given in the file codeoflife.txt. See example below.

Explanation / Answer

//I have included the explanation to the code in comments like these. If you found this helpful, please leave a like.

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h> //For toupper()
typedef struct dnode dnode; //Defining node of DNA linked list
struct dnode
{
char dseq[256];
dnode *next;
};
typedef struct cnode cnode; //Defining node of Codon linked list
struct cnode
{
char alph;
char cod[4];
cnode *next;
};

dnode *builddna(char *filename) // Creates linked list of dna sequences and returns the head
{
FILE *dnaseq;
dnaseq=fopen(filename,"r"); //Opening the DNA sequence file
dnode *dhead=NULL,*dtrav;
char str[258];
while(fgets(str,258,dnaseq)!=NULL) //Taking each line as input until end of file
{
dnode *dtemp=(dnode*)malloc(sizeof(dnode));

strcpy(dtemp->dseq,str); //Making a node from each sequence
dtemp->next=NULL;

if(dhead==NULL)
{
dhead=dtemp;
dtrav=dtemp;
}
else
{
dtrav->next=dtemp;
dtrav=dtrav->next;
}
}
fclose(dnaseq); //Closing the file
return dhead;
}

cnode *buildcod() // Creates linked list of codon table and returns the head
{
FILE *codt;
codt=fopen("codeoflife.txt","r"); //Opening codeoflife.txt
cnode *chead=NULL,*ctrav;
int i;
char str[258];
while(fgets(str,258,codt)!=NULL) //Taking each line as input until end of file
{
cnode *ctemp=(cnode*)malloc(sizeof(cnode));

ctemp->alph=str[0]; //Creating a node from each codon table entry
for(i=1;str[i]==' '||str[i]==' ';i++);
strncpy(ctemp->cod,&str[i],3);
ctemp->next=NULL;

if(chead==NULL)
{
chead=ctemp;
ctrav=ctemp;
}
else
{
ctrav->next=ctemp;
ctrav=ctrav->next;
}
}
fclose(codt);
return chead;
}

char findcod(char *test,cnode *chead) //Finds the amino acid from the codon by traversing through codon linked list
{
cnode *trav=chead;
while(trav!=NULL)
{
if(strncmp(test,trav->cod,3)==0)
{
return trav->alph;
}
trav=trav->next;
}
return 'X';
}

void finddnaseq(dnode *dhead,cnode *chead,char *filename) //finds the longest translatable DNA sequence from each node of dna sequence linked list
{
FILE *protseq;
protseq=fopen(filename,"w"); //Opens the DNA Sequence file
dnode *dtrav=dhead;
int i,j,len,k;
while(dtrav!=NULL)
{
int li,ll=0;
char str[258],tstr[4],ch;
for(i=0;dtrav->dseq[i]!=' '&&dtrav->dseq[i]!='';i++)
str[i]=toupper(dtrav->dseq[i]);
str[i]='';
len=strlen(str);
for(i=0;i<len-2;i++)
{
if(strncmp(&str[i],"ATG",3)==0) //Searching for start codon
{
for(j=i+3;j<len-2;j+=3)
{
if(((strncmp(&str[j],"TGA",3)==0)||(strncmp(&str[j],"TAA",3)==0)||(strncmp(&str[j],"TAG",3)==0))&&(j-i>ll)) //Searching for stop codon
{
li=i;
ll=j-i;
i=j+2;
break;
}
}
}
}
if(ll==0) //If no translatable sequences are found
{
fprintf(protseq,"none ");
}
else if(ll%3==0&&ll!=0)
{
for(k=li;k<li+ll;k+=3)
{
strncpy(tstr,&str[k],3);
tstr[3]='';
ch=findcod(tstr,chead); //Finds amino acid from each codon
fprintf(protseq,"%c",ch); //prints each amino acid to file
}
fprintf(protseq," ");
}
dtrav=dtrav->next;
}
fclose(protseq); //Closing the file
return;
}

void cfree(cnode *chead) //Frees up the codon linked list
{
cnode *trav=chead,*trav1;
trav1=trav->next;
while(trav1!=NULL)
{
free(trav);
trav=trav1;
trav1=trav1->next;
}
return;
}

void dfree(dnode *dhead) //Frees up the DNA sequence linked list
{
dnode *trav=dhead,*trav1;
if(dhead->next==NULL)
{
free(dhead);
return;
}
trav1=trav->next;
while(trav1!=NULL)
{
free(trav);
trav=trav1;
trav1=trav1->next;
}
return;
}

int main(int argc,char* argv[])
{
dnode *dhead=builddna(argv[1]); //Builds the DNA Sequence linked list
cnode *chead=buildcod(); //Builds the codon table linked list
finddnaseq(dhead,chead,argv[2]);//Finds the longest translatable DNA sequence
cfree(chead); //Frees up the codon table linked list
dfree(dhead); //Frees up the DNA Sequence linked list
return 0;
}

Navigate

in C language Project: You will create a program which contains 4 functions: 1)

in C language. Provided the following broken implementation for push() onto a st

Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.

in C language. Genes are substrings of DNA which code for proteins and carry the

Question

Explanation / Answer

Related Questions

Navigate