Harvard:Biophysics 101/Notebook:ZS/2007-2-20: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
(New page: ==Assignment 3, Due 2/20==) |
|||
Line 1: | Line 1: | ||
==Assignment 3, Due 2/20== | ==Assignment 3, Due 2/20== | ||
<pre> | |||
#Zachary Sun, Assignment 3 (alpha edition) | |||
#2.20.07, final version forthcoming | |||
#!/usr/bin/env python | |||
import os | |||
from Bio import Clustalw | |||
from Bio import GenBank, Seq | |||
from Bio.Seq import Seq,translate | |||
cmdline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta')) | |||
cmdline.set_output('test.aln') | |||
align = Clustalw.do_alignment(cmdline) | |||
#Zach's addition: Looking to see if the mutation is in intron or exon; | |||
#sorry it is a poor implementation, but hopefully will expand on this | |||
#in upcoming week (could not get implementation working for a while =( | |||
##Determining start and end sites assuming first is reference coding seq | |||
refSeqObj = align.get_all_seqs() | |||
refSeq = refSeqObj[0].seq.tostring() | |||
start_site = refSeq.find('ATG') #position of start codon | |||
counter = start_site | |||
countcodon = 0; | |||
for i in range(len(refSeq)-4-start_site): #to determine stop codon point | |||
totrans = totrans + refSeq[counter] | |||
counter = counter + 1 | |||
stoptest = refSeq[counter]+refSeq[counter+1]+refSeq[counter+2] | |||
if countcodon == 2: | |||
if stoptest == 'TAA' or stoptest == 'TAG' or stoptest == 'TGA': | |||
stop_site = counter | |||
break | |||
countcodon = -1 | |||
countcodon = countcodon + 1 | |||
print "Start codon site: ", start_site | |||
print "Stop codon site: ", stop_site | |||
for i in range(alignment.get_alignment_length()): | |||
col = align.get_column(i) | |||
s = Set() # create a new set | |||
for c in range(len(col)): | |||
s.add(col[c]) # add each column element to the set | |||
if len(s) > 1: # multiple elements in s indicate a mismatch | |||
if i<start_site or i>stop_site: #To determine if intron or exon | |||
print "Exon: ", i, col; | |||
else: | |||
print "Intron: ", i, col; | |||
</pre> | |||
output: | |||
<pre> | |||
Start codon site: 60 | |||
Stop codon site: 1011 | |||
Exon: 3 AAT | |||
Intron: 658 AGG | |||
Intron: 802 C-A | |||
Intron: 803 C-C | |||
Intron: 804 G-G | |||
Intron: 805 A-A | |||
Intron: 806 G-G | |||
</pre> | |||
Note: slightly modified apoe.fasta file to introduce a mutation before start codon. |
Latest revision as of 00:47, 20 February 2007
Assignment 3, Due 2/20
#Zachary Sun, Assignment 3 (alpha edition) #2.20.07, final version forthcoming #!/usr/bin/env python import os from Bio import Clustalw from Bio import GenBank, Seq from Bio.Seq import Seq,translate cmdline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta')) cmdline.set_output('test.aln') align = Clustalw.do_alignment(cmdline) #Zach's addition: Looking to see if the mutation is in intron or exon; #sorry it is a poor implementation, but hopefully will expand on this #in upcoming week (could not get implementation working for a while =( ##Determining start and end sites assuming first is reference coding seq refSeqObj = align.get_all_seqs() refSeq = refSeqObj[0].seq.tostring() start_site = refSeq.find('ATG') #position of start codon counter = start_site countcodon = 0; for i in range(len(refSeq)-4-start_site): #to determine stop codon point totrans = totrans + refSeq[counter] counter = counter + 1 stoptest = refSeq[counter]+refSeq[counter+1]+refSeq[counter+2] if countcodon == 2: if stoptest == 'TAA' or stoptest == 'TAG' or stoptest == 'TGA': stop_site = counter break countcodon = -1 countcodon = countcodon + 1 print "Start codon site: ", start_site print "Stop codon site: ", stop_site for i in range(alignment.get_alignment_length()): col = align.get_column(i) s = Set() # create a new set for c in range(len(col)): s.add(col[c]) # add each column element to the set if len(s) > 1: # multiple elements in s indicate a mismatch if i<start_site or i>stop_site: #To determine if intron or exon print "Exon: ", i, col; else: print "Intron: ", i, col;
output:
Start codon site: 60 Stop codon site: 1011 Exon: 3 AAT Intron: 658 AGG Intron: 802 C-A Intron: 803 C-C Intron: 804 G-G Intron: 805 A-A Intron: 806 G-G
Note: slightly modified apoe.fasta file to introduce a mutation before start codon.