Harvard:Biophysics 101/2007/Notebook:Michael Wang/2007-2-27: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
Notes: | |||
I've decided to make an orf class that indexes to a list of codon classes. | |||
The mutation detection will be built into comparators for the class. It's taking me a bit to figure out exactly how these things all work... | |||
<pre> | |||
#find all orfs within the sequences | #find all orfs within the sequences | ||
import re | import re | ||
Line 90: | Line 95: | ||
print j.sequence | print j.sequence | ||
print j.protein | print j.protein | ||
</pre> |
Revision as of 22:04, 27 February 2007
Notes: I've decided to make an orf class that indexes to a list of codon classes. The mutation detection will be built into comparators for the class. It's taking me a bit to figure out exactly how these things all work...
#find all orfs within the sequences import re from Bio import Transcribe transcriber = Transcribe.unambiguous_transcriber from Bio import Translate from Bio.Alphabet import IUPAC from Bio.Seq import Seq standard_translator = Translate.unambiguous_dna_by_id[1] class codon: def __init__(self, sequence=""): self.sequence = sequence self.mRNA = transcriber.transcribe(Seq(sequence, IUPAC.unambiguous_dna)) self.protein = standard_translator.translate(Seq(sequence, IUPAC.unambiguous_dna))[0] class mutation: def __init__(self,type="",start=0,stop=0): self.type = type self.span=(start,stop) class orf: #On initiation, the orf is stored as a list of codons. If the orf has no stop, any excess bases will #be ignored on the conversion to codons def __init__(self, sequence=""): self.codons = [] for i in range(len(sequence)/3): #print i temp_codon = codon(sequence[i*3:3+(i*3)]) #this algorithm of seperating into codons ignores any excess bases self.codons.append(temp_codon) #print self.codons[i].sequence self.sequence = sequence #orfs are indexed by codons def __getitem__(self,index): return self.codons[index] #comparing two orfs returns a list of mutations def __eq__(self,other) def findORFS(sequence, startpos=0): all_orfs = [] start = re.compile('ATG') stop = re.compile('(TAA|TGA|TAG)') all_starts = start.finditer(sequence) all_stops = stop.finditer(sequence) print "Infunction: ",sequence print "starts:" all_starts_list = [] for match in all_starts: all_starts_list.append(match.span()) print match.span() print "stops:" all_stops_list = [] for stops in all_stops: all_stops_list.append(stops.span()) print stops.span() for start in all_starts_list: found = 0 for stop in all_stops_list: print "checking", start[0], "and", stop[0] diff = (stop[0]-start[0]) if ((diff>0) and ((diff%3) == 0)): print "orf at:", start[0]," ",stop[1] all_orfs.append((start[0],stop[1])) found = 1 break if found ==0: all_orfs.append((start[0],-1)) return all_orfs #Main Program starts here teststring = "ATG GGG GGG AAT GAT TAA CGT CGT TAA AGT ATG TTT TTT GTA G" print teststring teststring = re.sub("\s+", "", teststring) print 'chomped:',teststring allspans = findORFS(teststring) allorfs = [] print "spans" print allspans for i in allspans: x = i[0] #I don't know why it won't let me use i[0] and i[1] directly as an int y = i[1] temp_orf = orf(teststring[x:y]) allorfs.append(temp_orf) print allorfs for i in allorfs: print "orfseq:",i.sequence for j in i: print j.sequence print j.protein