Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-2-20: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
(New page: == Script == #!/usr/bin/env python import os from Bio import GenBank, Seq from Bio.Seq import Seq,translate from Bio import Clustalw from Bio.Clustalw import MultipleAlignCL...) |
(No difference)
|
Revision as of 07:21, 20 February 2007
Script
#!/usr/bin/env python import os from Bio import GenBank, Seq from Bio.Seq import Seq,translate from Bio import Clustalw from Bio.Clustalw import MultipleAlignCL from Bio.Align import AlignInfo from sets import Set mut = [] gap =[] cold=[] colm=[] cline = Clustalw.MultipleAlignCL(os.path.join(os.curdir, 'apoe.fasta')) cline.set_output('test.aln') alignment = Clustalw.do_alignment(cline) summary_align = AlignInfo.SummaryInfo(alignment) for i in range(alignment.get_alignment_length()): col = alignment.get_column(i) s = Set() # create a new set for c in range(len(col)): s.add(col[c]) # add each column element to the set if '-' in s: gap.append(i) cold.append(col) ss=s.copy() ss.remove('-') if len(ss)>1: mut.append(i) colm.append(col) elif len(s)>1: mut.append(i) colm.append(col) for i in range(len(gap)): print 'Deletion', cold[i], 'at', gap[i] for i in range(len(mut)): print 'Mismatch', colm[i], 'at', mut[i] numseq=[] orfs=[] proteins=[] seq=[] numseq=alignment.get_all_seqs() for i in range(len(numseq)): seq[i]=numseq[i].seq.tostring() start = seq[i].find('ATG') orf = c=start for x in range(len(s)-start-4): orf = orf + s[c] c= c +1 length = c-start remainder=length%3 if remainder == 0: codon=s[c]+s[c+1]+s[c+2] if codon== 'TAA' or codon=='TAG' or codon=='TGA': orf=orf+s[c+1]+s[c+2] break orfs[x]=orf proteins[x] = translate(orfs[x]) for p in range(len(proteins)-1): if proteins[p+1]==proteins[0]: print 'Either', proteins[p+1], 'has no mutations or only has silent mutations.' else: print proteins[p+1], 'has amino acid changes.'
Output
Deletion A-C at 802 Deletion C-C at 803 Deletion G-G at 804 Deletion A-A at 805 Deletion G-G at 806 Mismatch GGA at 658 Mismatch A-C at 802