Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Python programming, convert DNA to RNA the process of converting DNA to RNA to p

ID: 3727800 • Letter: P

Question

Python programming, convert DNA to RNA

the process of converting DNA to RNA to protein is referred to as the Central Dogma of Biology. Use a class "Biomolecule" and develop methods that:a. Is initially constructed with a DNA sequence.b. A method that "transcribes" the DNA to RNA (converting all "T" to "U") and stores the RNA in a new variable within the class.c. A method that finds and stores the position of the first detected "open reading frame" (ORF). An ORF is a sequence that starts with an "AUG" and ends with a STOP codon (UAA, UAG or UGA).d. A method that translates the ORF RNA to an amino acid sequence. The provided csv file contains a key of the three RNA bases which correspond to each amino acid. Remember, amino acid translation terminates at the STOP codon.
Save this program as "hw5_q3.py"

RNA bases Amino Acid UUU Phe UUC Phe UUA Leu UUG Leu CUU Leu CUC Leu CUA Leu CUG Leu AUU Ile AUC Ile AUA Ile AUG Met GUU Val GUC Val GUA Val GUG Val UCU Ser UCC Ser UCA Ser UCG Ser CCU Pro CCC Pro CCA Pro CCG Pro ACU Thr ACC Thr ACA Thr ACG Thr GCU Ala GCC Ala GCA Ala GCG Ala UAU Tyr UAC Tyr UAA STOP UAG STOP CAU His CAC His CAA Gln CAG Gln AAU Asn AAC Asn AAA Lys AAG Lys GAU Asp GAC Asp GAA Glu GAG Glu UGU Cys UGC Cys UGA STOP UGG Trp CGU Arg CGC Arg CGA Arg CGG Arg AGU Ser AGC Ser AGA Arg AGG Arg GGU Gly GGC Gly GGA Gly GGG Gly

Explanation / Answer

import pickle
''

It then translates the DNA code into mRNA code, and again into tRNA code.
''


def main():

# Asks the user if they would like to open a file which contains their genetic sequence.
# Removes all whitespace from input in order to process it correctly.
open_choice = remove_spaces(input("Do you want to load a file to translate [Y/N]").upper())

# Processes whether the user wants to use a file
while open_choice != 'Y' and open_choice != 'N':
open_choice = remove_spaces(input("Do you want to load a file to translate [Y/N]").upper())

if open_choice == 'Y':
sequence = get_file().upper()
else:
sequence = input("Enter the DNA sequence to convert it: ").upper() # Gets the DNA sequence to convert from input, if the user
# declines to open a file.

sequence = remove_spaces(sequence) # Removes spaces from the user's sequence

while not check_sequence(sequence, 'dna'): # Sends to check sequence function to confirm that it is a valid sequence
sequence = input("Please enter a correct sequence: ").upper() # If sequence is invalid, repeat until it is valid
sequence = remove_spaces(sequence)

original_sequence = ' '.join([sequence[i:i + 3] for i in range(0, len(sequence), 3)]) # Saves original DNA sequence
mRNA = convert_sequence(sequence, 'dna') # Saves mRNA sequence
tRNA = convert_sequence(remove_spaces(mRNA), 'rna') # Saves tRNA sequence
proteins = convert_to_proteins((mRNA + " ")) # Prints amino acid sequence
symbols = convert_symbols(proteins) # Prints amino acid symbols

print('DNA: ' + original_sequence) # Prints original sequence
print('mRNA: ' + mRNA) # Prints mRNA sequence
print('tRNA: ' + tRNA) # Prints tRNA sequence
print(" ".join(proteins))
print(" ".join(symbols))

dump_data(original_sequence, mRNA, tRNA, " ".join(proteins), " ".join(symbols))

input()


# Checks sequence for validility
def check_sequence(sequence, type): # Takes the sequence input and the type of sequence

if type == 'rna': # If it is an RNA sequence, confirm it only contains characters in AUCG
a = 'AUCG'
else:
a = 'ATCG' # If it is an DNA sequence, confirm it only contains characters in ATCG

sequence_list = list(sequence) # Converts sequence into a list

# Checks each character in list to see if it is in respective character list determined above
for i in sequence_list:
if i not in a: # If a character is invalid, return False
return False

return True # If all characters are valid, return True


# Converts sequence to rNA
def convert_sequence(sequence, sequence_type): # Takes sequence and type of secuence

if sequence_type == 'dna': # if the sequence is DNA: convert t to u
conversion_dict = {
'A': 'U',
'T': 'A',
'C': 'G',
'G': 'C'
}

else: # if the sequence is RBA: convert u to a
conversion_dict = {
'A': 'U',
'U': 'A',
'C': 'G',
'G': 'C'
}

# convert sequence into a list
converted_sequence = []
sequence_list = list(sequence)

# convert list one by one, checking the dictionary for the corresponding key, and add it to the new clist
for i in sequence_list:
converted_sequence.append(conversion_dict[i])

# return converted sequence, seperated by a space every three spaces
converted_sequence = ''.join(converted_sequence)
# noinspection PyTypeChecker
return ' '.join([converted_sequence[i:i + 3] for i in range(0, len(converted_sequence), 3)])


def convert_to_proteins(sequence):
n = []
protein_sequence = []
mrna_to_protein = pickle.load(open('mRNA_to_protein.p', 'rb'))

for i in sequence:
if not i.isspace():
n.append(i)
else:
if len(n) < 3:
break

protein_sequence.append(mrna_to_protein[''.join(n)])
n = []

return protein_sequence


def convert_symbols(proteins):
symbol_list = []
symbols = pickle.load(open('symbols.p', 'rb'))

for i in proteins:
symbol_list.append(symbols[i])

return symbol_list


# removes all spaces in a sequence
def remove_spaces(x):
return (''.join(x.split())).strip()


def get_file():
file_name = input("Enter file name: ")

while True:
try:
f = open(file_name, 'r')
sequence = f.read()
while not check_sequence(remove_spaces(sequence).upper(), 'dna'):
file_name = input(" Please provide a file with a correct DNA sequence: ")
break
except FileNotFoundError:
file_name = input(" The file '{}' was not found. Please enter an accurate file name/path: ".format(file_name))

return sequence

def dump_data(dna, mrna, trna, aa, s):
file = open('results.txt', 'w')
file.write('DNA: ' + dna + " ")
file.write('mRNA: ' + mrna + " ")
file.write('tRNA: ' + trna + " ")
file.write(aa + " ")
file.write(s + " ")

return True


main()