User:Mdwang: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
No edit summary
No edit summary
Line 10: Line 10:
==Assignments==
==Assignments==
===Feburary 1===
===Feburary 1===
#!/usr/bin/env python
from Bio.Seq import translate
from Bio import GenBank, Seq
import sys
#user defines which array element to pick
array_position = int(sys.argv[-1])
print "Retrieving entry number ",array_position
#Creates a non-parsed library of genes with search terms Xenopus and notch
search_terms = "Xenopus AND notch"
gi_list = GenBank.search_for(search_terms)
print "Searching for", search_terms, "entries"
print gi_list
print "Done searching"
print "Entry ",array_position
ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank')
#prints the raw entry
print ncbi_dict_alpha[gi_list[0]]
# We can create a GenBank object that will parse a raw record
# This facilitates extracting specific information from the sequences
record_parser = GenBank.FeatureParser()
# NCBIDictionary is an interface to Genbank
ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
# If you pass NCBIDictionary a GenBank id, it will download that record
parsed_record = ncbi_dict[gi_list[0]]
print parsed_record
print "GenBank id:", parsed_record.id
# Extract the sequence from the parsed_record
s = parsed_record.seq.tostring()
print "total sequence length:", len(s)
#Translating the raw sequence
my_protein = translate(s)
print "translated sequence:", my_protein
max_repeat = 9
print "multiple T analysis"
print "method 1"
for i in range(max_repeat):
    substr = ''.join(['T' for n in range(i+1)])
    print substr, s.count(substr)
print "\nmethod 2"
for i in range(max_repeat):
    substr = ''.join(['T' for n in range(i+1)])
    count = 0
    pos = s.find(substr,0)
    while not pos == -1:
        count = count + 1
        pos = s.find(substr,pos+1)
    print substr, count

Revision as of 22:31, 5 February 2007

Profile

Michael Wang
Biochemistry
Harvard College 2004-7
Genomics, Public Health, Informatics
Current Classes: Biophysics 101, Physics 11b, General Education 187


Assignments

Feburary 1

  1. !/usr/bin/env python

from Bio.Seq import translate from Bio import GenBank, Seq import sys

  1. user defines which array element to pick

array_position = int(sys.argv[-1]) print "Retrieving entry number ",array_position

  1. Creates a non-parsed library of genes with search terms Xenopus and notch

search_terms = "Xenopus AND notch" gi_list = GenBank.search_for(search_terms) print "Searching for", search_terms, "entries" print gi_list print "Done searching" print "Entry ",array_position ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank')

  1. prints the raw entry

print ncbi_dict_alpha[gi_list[0]]

  1. We can create a GenBank object that will parse a raw record
  2. This facilitates extracting specific information from the sequences

record_parser = GenBank.FeatureParser()

  1. NCBIDictionary is an interface to Genbank

ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)

  1. If you pass NCBIDictionary a GenBank id, it will download that record

parsed_record = ncbi_dict[gi_list[0]] print parsed_record print "GenBank id:", parsed_record.id

  1. Extract the sequence from the parsed_record

s = parsed_record.seq.tostring() print "total sequence length:", len(s)

  1. Translating the raw sequence

my_protein = translate(s) print "translated sequence:", my_protein

max_repeat = 9

print "multiple T analysis" print "method 1" for i in range(max_repeat):

   substr = .join(['T' for n in range(i+1)])
   print substr, s.count(substr)

print "\nmethod 2" for i in range(max_repeat):

   substr = .join(['T' for n in range(i+1)])
   count = 0
   pos = s.find(substr,0)
   while not pos == -1:
       count = count + 1
       pos = s.find(substr,pos+1)
   print substr, count