User:Mdwang: Difference between revisions
No edit summary |
No edit summary |
||
Line 10: | Line 10: | ||
==Assignments== | ==Assignments== | ||
===Feburary 1=== | ===Feburary 1=== | ||
#!/usr/bin/env python | |||
from Bio.Seq import translate | |||
from Bio import GenBank, Seq | |||
import sys | |||
#user defines which array element to pick | |||
array_position = int(sys.argv[-1]) | |||
print "Retrieving entry number ",array_position | |||
#Creates a non-parsed library of genes with search terms Xenopus and notch | |||
search_terms = "Xenopus AND notch" | |||
gi_list = GenBank.search_for(search_terms) | |||
print "Searching for", search_terms, "entries" | |||
print gi_list | |||
print "Done searching" | |||
print "Entry ",array_position | |||
ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank') | |||
#prints the raw entry | |||
print ncbi_dict_alpha[gi_list[0]] | |||
# We can create a GenBank object that will parse a raw record | |||
# This facilitates extracting specific information from the sequences | |||
record_parser = GenBank.FeatureParser() | |||
# NCBIDictionary is an interface to Genbank | |||
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser) | |||
# If you pass NCBIDictionary a GenBank id, it will download that record | |||
parsed_record = ncbi_dict[gi_list[0]] | |||
print parsed_record | |||
print "GenBank id:", parsed_record.id | |||
# Extract the sequence from the parsed_record | |||
s = parsed_record.seq.tostring() | |||
print "total sequence length:", len(s) | |||
#Translating the raw sequence | |||
my_protein = translate(s) | |||
print "translated sequence:", my_protein | |||
max_repeat = 9 | |||
print "multiple T analysis" | |||
print "method 1" | |||
for i in range(max_repeat): | |||
substr = ''.join(['T' for n in range(i+1)]) | |||
print substr, s.count(substr) | |||
print "\nmethod 2" | |||
for i in range(max_repeat): | |||
substr = ''.join(['T' for n in range(i+1)]) | |||
count = 0 | |||
pos = s.find(substr,0) | |||
while not pos == -1: | |||
count = count + 1 | |||
pos = s.find(substr,pos+1) | |||
print substr, count |
Revision as of 22:31, 5 February 2007
Profile
Michael Wang
Biochemistry
Harvard College 2004-7
Genomics, Public Health, Informatics
Current Classes:
Biophysics 101, Physics 11b, General Education 187
Assignments
Feburary 1
- !/usr/bin/env python
from Bio.Seq import translate from Bio import GenBank, Seq import sys
- user defines which array element to pick
array_position = int(sys.argv[-1]) print "Retrieving entry number ",array_position
- Creates a non-parsed library of genes with search terms Xenopus and notch
search_terms = "Xenopus AND notch" gi_list = GenBank.search_for(search_terms) print "Searching for", search_terms, "entries" print gi_list print "Done searching" print "Entry ",array_position ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank')
- prints the raw entry
print ncbi_dict_alpha[gi_list[0]]
- We can create a GenBank object that will parse a raw record
- This facilitates extracting specific information from the sequences
record_parser = GenBank.FeatureParser()
- NCBIDictionary is an interface to Genbank
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
- If you pass NCBIDictionary a GenBank id, it will download that record
parsed_record = ncbi_dict[gi_list[0]] print parsed_record print "GenBank id:", parsed_record.id
- Extract the sequence from the parsed_record
s = parsed_record.seq.tostring() print "total sequence length:", len(s)
- Translating the raw sequence
my_protein = translate(s) print "translated sequence:", my_protein
max_repeat = 9
print "multiple T analysis" print "method 1" for i in range(max_repeat):
substr = .join(['T' for n in range(i+1)]) print substr, s.count(substr)
print "\nmethod 2" for i in range(max_repeat):
substr = .join(['T' for n in range(i+1)]) count = 0 pos = s.find(substr,0) while not pos == -1: count = count + 1 pos = s.find(substr,pos+1) print substr, count