Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-2-27: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
No edit summary |
|||
Line 127: | Line 127: | ||
Silent point mutation A 742 C in variable sequence two resulting in E 248 E | Silent point mutation A 742 C in variable sequence two resulting in E 248 E | ||
</pre> | </pre> | ||
==Notes== | |||
* I considered the first sequence in the apoe.fasta file to be the reference sequence, but this could also be downloaded directly from GenBank. |
Revision as of 21:34, 26 February 2007
Program Code
#!/usr/bin/env python import os from Bio import GenBank, Seq from Bio.Seq import Seq,translate from Bio import Clustalw from Bio.Clustalw import MultipleAlignCL from Bio.Align import AlignInfo cline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta')) cline.set_output('test.aln') alignment = Clustalw.do_alignment(cline) numseq=[] orfs=[] proteins=[] seqs=[] numseq=alignment.get_all_seqs() t=0 for i in range(len(numseq)): se=numseq[i] seqs.append(se.seq.tostring()) sq=seqs[i] start = sq.find('ATG') orf = '' c=start for x in range(len(sq)-start-4): orf = orf + sq[c] c= c +1 length = c-start remainder=length%3 if remainder == 0: codon=sq[c]+sq[c+1]+sq[c+2] if codon== 'TAA' or codon=='TAG' or codon=='TGA': orf=orf+sq[c+1]+sq[c+2] break orfs.append(orf) proteins.append(translate(orfs[t])) t=t+1 star = seqs[0].find('ATG') print "5'->3' ORF: Start Codon Position:", start, " Stop Codon Position:", c for i in range(len(orfs[0])): if orfs[0][i]==orfs[1][i]==orfs[2][i]: continue x=i//3 +1 if not orfs[0][i]==orfs[1][i]: if orfs[1][i-1]=='-': break elif orfs[1][i]=='-': q=1 for q in range(len(orfs[0])): if orfs[1][i+q]=='-': continue else: break if q%3==0: print q, 'base pair deletion in variable sequence one beginning at position', i, '\n resulting in a non-frameshift mutation.' else: print q, 'base pair deletion in variable sequence one beginning at position', i, '\n resulting in a frameshift mutation.' elif len(orfs[1])>len(orfs[0]): dif=len(orfs[1])-len(orfs[0]) d=0 for y in range(dif): if not orfs[1][i+y-dif+1]==orfs[0][i]: continue elif orfs[1][i+y-dif+1]==orfs[0][i]: one=''.join([orfs[1][j] for j in range(i+y-dif+1, i+y-dif+11)]) zero=''.join([orfs[0][j] for j in range(i+y-dif+1, i+y-dif+11)]) if one==zero: if y%3==0: print y+dif, 'base pair insertion in variable sequence one beginning at position', i, '\n resulting in a non-frameshift mutation.' else: print y+dif, 'base pair insertion in variable sequence one beginning at position', i, '\n resulting in a frameshift mutation.' break elif proteins[0][x]==proteins[1][x]: print 'Silent point mutation', orfs[0][i], i, orfs[2][i], 'in variable sequence one resulting in', proteins[0][x], x, proteins[2][x] else: print 'Non-silent point mutation', orfs[0][i], i, orfs[2][i], 'in varable sequence one resulting in', proteins[0][x], x, proteins[2][x] if not orfs[0][i]==orfs[2][i]: if orfs[2][i-1]=='-': break elif orfs[2][i]=='-': q=1 for q in range(len(orfs[0])): if orfs[2][i+q]=='-': continue else: break if q%3==0: print q, 'base pair deletion in variable sequence two beginning at position', i, '\n resulting in a non-frameshift mutation.' else: print q, 'base pair deletion in variable sequence two beginning at position', i, '\n resulting in a frameshift mutation.' elif len(orfs[2])>len(orfs[0]): dif=len(orfs[2])-len(orfs[0]) d=0 for y in range(dif): if not orfs[2][i+y]==orfs[0][i]: continue elif orfs[2][i+y]==orfs[0][i]: two=''.join([orfs[2][j] for j in range(i+y, i+y+10)]) zero=''.join([orfs[0][j] for j in range(i+y, i+y+10)]) if two==zero: if y%3==0: print y, 'base pair insertion in variable sequence two beginning at position', i, '\n resulting in a non-frameshift mutation.' else: print y, 'base pair insertion in variable sequence two beginning at position', i, '\n resulting in a frameshift mutation.' elif proteins[0][x]==proteins[2][x]: print 'Silent point mutation', orfs[0][i], i, orfs[2][i], 'in variable sequence two resulting in', proteins[0][x], x, proteins[2][x] else: print 'Non-silent point mutation', orfs[0][i], i, orfs[2][i], 'in variable sequence two resulting in', proteins[0][x], x, proteins[2][x]
Output of Program
5'->3' ORF: Start Codon Position: 60 Stop Codon Position: 1011 Silent point mutation G 598 A in variable sequence two resulting in P 200 P 5 base pair deletion in variable sequence one beginning at position 742 resulting in a frameshift mutation. Silent point mutation A 742 C in variable sequence two resulting in E 248 E
Notes
- I considered the first sequence in the apoe.fasta file to be the reference sequence, but this could also be downloaded directly from GenBank.