Harvard:Biophysics 101/2007/Notebook:Katie Fifer/2007-2-6

From OpenWetWare
Revision as of 06:45, 6 February 2007 by Kfifer (talk | contribs) (New page: #!/usr/bin/env python # Added translate so that we can use it in finding the protein translation from Bio import GenBank, Seq from Bio.Seq import translate # We can create a G...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search
 #!/usr/bin/env python
 
 # Added translate so that we can use it in finding the protein translation
 from Bio import GenBank, Seq
 from Bio.Seq import translate
 # We can create a GenBank object that will parse a raw record
 # This facilitates extracting specific information from the sequences
 
 record_parser = GenBank.FeatureParser()
 
 # NCBIDictionary is an interface to Genbank
 ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
 
 # If you pass NCBIDictionary a GenBank id, it will download that record
 # PART 1: A different GenBank ID
 parsed_record = ncbi_dict['116496513']
 
 print "GenBank id:", parsed_record.id
 
 # Extract the sequence from the parsed_record
 s = parsed_record.seq.tostring()
 print "total sequence length:", len(s)
 max_repeat = 9
 
 print "method 1"
 for i in range(max_repeat):
     # PART 2: Change A to T
     substr = .join(['T' for n in range(i+1)])
     print substr, s.count(substr)
 
 print "\nmethod 2"
 for i in range(max_repeat):
     # PART 2: Change A to T
     substr = .join(['T' for n in range(i + 1)])
     count = 0
     pos = s.find(substr, 0)
     while not pos == -1:
         count = count + 1
         pos = s.find(substr, pos + 1)
     print substr, count
 
 # PART 3: Print the translated protein sequence
 print 'protein translation is: '
 my_protein = translate(s)
 print my_protein
 
 # PART 4: New dictionary without parser. Just print raw record.
 
 ncbi_dict2 = GenBank.NCBIDictionary('nucleotide', 'genbank')
 new_raw_record = ncbi_dict2['116496513']
 print '\n'
 print new_raw_record