Harvard:Biophysics 101/2007/Notebook:Xiaodi Wu/2007-2-6: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
((The aforementioned assignment))
 
mNo edit summary
Line 1: Line 1:
<code>
<pre>
#!/usr/bin/env python
#!/usr/bin/env python


Line 62: Line 62:
print '-' * 40
print '-' * 40
print "Xiaodi Wu, Biophysics 101, 2007-02-06\n"
print "Xiaodi Wu, Biophysics 101, 2007-02-06\n"
</code>
</pre>

Revision as of 19:48, 4 February 2007

#!/usr/bin/env python

from Bio import GenBank, Seq

# Create GenBank object to parse a raw record
record_parser = GenBank.FeatureParser()
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
# Download and parse record
parsed_record = ncbi_dict['119392085'] # (IHH gene)

print "GenBank ID:", parsed_record.id

# Extract the sequence from the parsed_record
s = parsed_record.seq.tostring()
print "total sequence length:", len(s)
# Translate!
p = Seq.translate(parsed_record.seq)
p_string = (p.tostring().partition('*'))[0] # Split at first stop codon, discard rest
print "total translated sequence length:", len(p_string)

max_repeat = 9

print "\npoly-A count -- method 1"
for i in range(max_repeat):
    substr = ''.join(['A' for n in range(i+1)])
    print substr, s.count(substr)

print "\npoly-A count -- method 2"
for i in range(max_repeat):
    substr = ''.join(['A' for n in range(i+1)])
    count = 0
    pos = s.find(substr,0)
    while not pos == -1:
        count = count + 1
        pos = s.find(substr,pos+1)
    print substr, count

print "\npoly-T count -- method 1"
for i in range(max_repeat):
    substr = ''.join(['T' for n in range(i+1)])
    print substr, s.count(substr)

print "\npoly-T count -- method 2"
for i in range(max_repeat):
    substr = ''.join(['T' for n in range(i+1)])
    count = 0
    pos = s.find(substr,0)
    while not pos == -1:
        count = count + 1
        pos = s.find(substr,pos+1)
    print substr, count
    
print "\ntranslated sequence"
print p_string

print "\nraw record"
ncbi_dict_raw = GenBank.NCBIDictionary('nucleotide', 'genbank')
raw_record = ncbi_dict_raw['119392085']
print raw_record

print '-' * 40
print "Xiaodi Wu, Biophysics 101, 2007-02-06\n"