rosalind练习题八

# Problem

# The 20 commonly occurring amino acids are abbreviated by using 20 letters from the English alphabet (all letters except for B, J, O, U, X, and Z). Protein strings are constructed from these 20 symbols. Henceforth, the term genetic string will incorporate protein strings along with DNA strings and RNA strings.

# The RNA codon table dictates the details regarding the encoding of specific codons into the amino acid alphabet.

# Given: An RNA string s corresponding to a strand of mRNA (of length at most 10 kbp).

# Return: The protein string encoded by s.

# Sample Dataset

# AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA

# Sample Output

# MAMAPRTEINSTRING

# RNA序列翻译成蛋白序列

# 代码：

codon_table = {

'UUU': 'F', 'CUU': 'L', 'AUU': 'I', 'GUU': 'V',

'UUC': 'F', 'CUC': 'L', 'AUC': 'I', 'GUC': 'V',

'UUA': 'L', 'CUA': 'L', 'AUA': 'I', 'GUA': 'V',

'UUG': 'L', 'CUG': 'L', 'AUG': 'M', 'GUG': 'V',

'UCU': 'S', 'CCU': 'P', 'ACU': 'T', 'GCU': 'A',

'UCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A',

'UCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A',

'UCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A',

'UAU': 'Y', 'CAU': 'H', 'AAU': 'N', 'GAU': 'D',

'UAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D',

'UAA': '', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E',

'UAG': '', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E',

'UGU': 'C', 'CGU': 'R', 'AGU': 'S', 'GGU': 'G',

'UGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G',

'UGA': '', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G',

'UGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G'

}

rna = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"

# 常规方法

def translate(rna):

protein = ''

for i in range(0, len(rna), 3):

codon = rna[i:i+3]

amino_acid = codon_table.get(codon, '')

if amino_acid:

protein += amino_acid

else:

break

return protein

protein = translate(rna)

print(protein)

# biopython方法

from Bio.Seq import Seq

seq_rna = Seq(rna)

protein = seq_rna.translate()

print(protein)

rosalind练习题八

推荐阅读更多精彩内容