Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

1. In this question, you will investigate the nucleotides at the splicing sites

ID: 3910094 • Letter: 1

Question

1. In this question, you will investigate the nucleotides at the splicing sites (intersection of the exon and intron) within protein coding genes in human genome. You are given a fasta file called "gene fasta chr12.fa" which contain the sequences of randomly selected 2,412 protein coding genes from chromosome 12 in human. The nd intron portions of the gene. The nucleotides in exons are uppercased and the ones in the intron are lower case. Implement programs to compute the following [100 points] sequence includes both the exon a Average number of exons in a gene Average number of introns in a gene Length of the longest and shortest introrn Length of the longest and shortest exom Look at the positions immediately after each exon (donor site or the first two bases of each intron) in all the genes and count the frequency of all possible 2-mers at those locations. (GT is expected to have the highest frequency) Look at the positions immediately before internal exons (splice acceptor sites or the last two bases of each intron) in all the genes and count the frequency of all possible 2-mers at those locations. (AG is expected to have the highest frequency)

Explanation / Answer

#include<iostream.h>

#include<fstream>

#include<string.h>

using namespace std;

void chkexon(string c,int *max,int *avg,int*min,string exon){

(*avg)++;

exon.append(c);

if(exon.size()>*max){

*max=exon.size();

}

if(exon.size()<*min){

*min=exon.size();

}

}

void chkintron(string c,int *max1,int *avg1,int *min1,string intron){

(*avg1)++;

intron.append(c);

if(intron.size()>*max1){

*max1=intron.size();

}

if(intron.size()<*min1){

*min1=intron.size();

}

}

int main(){

fstream f1;

f1.open("gen_fasta_chr12.txt",fstream::in | fstream::out); // given fasta file converted to txt//

char c;int max,min;string exon,intron;

max=min=0;

float avg=0.0;

while(getchar(f1,c)){

if(isupper(c)){

chkexon(c,max,avg,min,exon);

}

if(islower(c)){

chkintron(c,max1,avg1,min1,intron);

}

}

cout<<"max length of exon "<<max<<endl;

cout<<"min length of exon "<<min<<endl;

cout<<"avg no of exon "<<avg/2412<<endl;

cout<<"max length of intron "<<max1<<endl;

cout<<"mmin length of intron "<<min1<<endl;

cout<<"avg no of intron "<<avg1/2412<<endl;

return 0;

}