# Working with Genetic Sequences

In [2]:
%load_ext autoreload
%autoreload 2

import sys,os; sys.path.append(os.environ['BMESAHMETDIR']); import bmes
bmes.pipinstall('biopython','Bio')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#def seq_transcribe():
    

## Reading Fasta Files

In [4]:
from Bio import SeqIO

genes = SeqIO.parse('samplegenes.fasta','fasta')
#genes is now an "iterator" object, which can be efficiently used in a for loop.
#I am converting genes to a list for demonstration purposes; for a large list of entries, this is not not efficient and is not recommended.
genes = [gene for gene in genes]

print(genes[0])

ID: NM_012490.2
Name: NM_012490.2
Description: NM_012490.2 Rattus norvegicus acrosin (Acr), mRNA
Number of features: 0
Seq('GGCCAGGTTAGGGCAGGAGTATGGTAGAGATGCTGCCAACTGTCGTTGCGCTGG...ATA')


In [6]:
gene=genes[0]
print('ID: ', end='');  print(gene.id)
print('Seq: ', end='');  print(gene.seq)

ID: NM_012490.2
Seq: GGCCAGGTTAGGGCAGGAGTATGGTAGAGATGCTGCCAACTGTCGTTGCGCTGGTCCTGGCAGTGTCCGTGGTTGCCAAGGATAACACCACGTGTGATGGCCCCTGTGGGTTACGATTCAGGCAGAACCCACAAGCAGGTATCCGGATTGTCGGAGGGCAGACTTCGTCGCCTGGGGCCTGGCCCTGGATGGTCAGTTTACAGATCTTCACGTCCCATAACAGCCGTAGGTATCACGCCTGCGGAGGCAGCCTACTGAACTCCCACTGGGTGCTCACGGCTGCTCACTGCTTCGATAACAAAAAAAAAGTCTATGACTGGAGACTGGTTTTTGGAGCCCATGAAATTGAATACGGAAGAAACAAGCCAGTGAAAGAGCCCCAGCAGGAAAGATACGTGCAGAAAATTGTCATCCACGAGAAATACAACGCTGTGACCGAGGGGAACGACATTGCCCTCTTGAAAGTCACTCCTCCTGTTACATGTGGGGACTTCGTTGGGCCTGGCTGCCTACCTCATTTTAAGTCTGGTCCTCCCAGAATCCCCCACACCTGCTACGTGACTGGGTGGGGATACATAAAAGATAACGCCCCCAGACCATCACCTGTCCTGATGGAGGCCCGCGTGGATCTCATTGACCTCGACCTGTGTAACTCCACCCAGTGGTACAATGGGCGTGTCACATCAACTAATGTGTGCGCAGGGTATCCTGAAGGCAAGATTGACACCTGCCAGGGGGACAGTGGTGGGCCTCTCATGTGCAGAGACAGCGTCGACAGCCCCTTTGTGATCGTGGGGATCACGAGCTGGGGGGTAGGCTGTGCCCGTGCTAAGCGTCCTGGAGTCTACACAGCCACCTGGGACTACCTGGACTGGATTGCTTCCAAGATCGGCCCTACCGCCTTGCACTTGATTCAACCGGCCACCCCTCACCCACCTACAACCCAGCAACCGGTCATCTCTTTCCACCCTCCTTCGAT

## Indexing a String/Sequence

In [7]:
seq = genes[0].seq;

print(seq[0:3])
print(seq[1:4])
print(seq[2:5])


GGC
GCC
CCA


## Converting a codon to an amino acid

In [8]:
# You could create your own "genetic code" table.
mygeneticcode = {'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}

codon = 'ATG'
mygeneticcode[codon]

'M'

In [9]:
# But itt is better to use a standard library/function, if that functionality is already available:
import Bio.Data.CodonTable
t=Bio.Data.CodonTable.standard_dna_table;

print('******** Codon Table: ')
print(t)

print('******** Forward Table: ')
print(t.forward_table)

print('******** Backward Table: ')
print(t.back_table)

******** Codon Table: 
Table 1 Standard, SGC0

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA Stop| A
T | TTG L(s)| TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I   | ACT T   | AAT N   | AGT S   | T
A | ATC I   | ACC T   | AAC N   | AGC S   | C
A | ATA I   | ACA T   | AAA K   | AGA R   | A
A | ATG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   | GCA A   | GAA E   | GGA G   | A
G | GTG V   | GCG A   | GAG E   | G

In [11]:
print(t.forward_table['CCA'])

P
