User:Mdwang: Difference between revisions

Revision as of 22:31, 5 February 2007

Profile

Michael Wang
Biochemistry
Harvard College 2004-7
Genomics, Public Health, Informatics
Current Classes: Biophysics 101, Physics 11b, General Education 187

Assignments

Feburary 1

!/usr/bin/env python

from Bio.Seq import translate from Bio import GenBank, Seq import sys

user defines which array element to pick

array_position = int(sys.argv[-1]) print "Retrieving entry number ",array_position

Creates a non-parsed library of genes with search terms Xenopus and notch

search_terms = "Xenopus AND notch" gi_list = GenBank.search_for(search_terms) print "Searching for", search_terms, "entries" print gi_list print "Done searching" print "Entry ",array_position ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank')

prints the raw entry

print ncbi_dict_alpha[gi_list[0]]

We can create a GenBank object that will parse a raw record
This facilitates extracting specific information from the sequences

record_parser = GenBank.FeatureParser()

NCBIDictionary is an interface to Genbank

ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)

If you pass NCBIDictionary a GenBank id, it will download that record

parsed_record = ncbi_dict[gi_list[0]] print parsed_record print "GenBank id:", parsed_record.id

Extract the sequence from the parsed_record

s = parsed_record.seq.tostring() print "total sequence length:", len(s)

Translating the raw sequence

my_protein = translate(s) print "translated sequence:", my_protein

max_repeat = 9

print "multiple T analysis" print "method 1" for i in range(max_repeat):

   substr = .join(['T' for n in range(i+1)])
   print substr, s.count(substr)

print "\nmethod 2" for i in range(max_repeat):

   substr = .join(['T' for n in range(i+1)])
   count = 0
   pos = s.find(substr,0)
   while not pos == -1:
       count = count + 1
       pos = s.find(substr,pos+1)
   print substr, count

User:Mdwang: Difference between revisions

Revision as of 22:31, 5 February 2007

Profile

Assignments

Feburary 1

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools

@@ Line 10: / Line 10: @@
 ==Assignments==
 ===Feburary 1===
+#!/usr/bin/env python
+from Bio.Seq import translate
+from Bio import GenBank, Seq
+import sys
+#user defines which array element to pick
+array_position = int(sys.argv[-1])
+print "Retrieving entry number ",array_position
+#Creates a non-parsed library of genes with search terms Xenopus and notch
+search_terms = "Xenopus AND notch"
+gi_list = GenBank.search_for(search_terms)
+print "Searching for", search_terms, "entries"
+print gi_list
+print "Done searching"
+print "Entry ",array_position
+ncbi_dict_alpha = GenBank.NCBIDictionary('nucleotide', 'genbank')
+#prints the raw entry
+print ncbi_dict_alpha[gi_list[0]]
+# We can create a GenBank object that will parse a raw record
+# This facilitates extracting specific information from the sequences
+record_parser = GenBank.FeatureParser()
+# NCBIDictionary is an interface to Genbank
+ncbi_dict = GenBank.NCBIDictionary('nucleotide', 'genbank', parser = record_parser)
+# If you pass NCBIDictionary a GenBank id, it will download that record
+parsed_record = ncbi_dict[gi_list[0]]
+print parsed_record
+print "GenBank id:", parsed_record.id
+# Extract the sequence from the parsed_record
+s = parsed_record.seq.tostring()
+print "total sequence length:", len(s)
+#Translating the raw sequence
+my_protein = translate(s)
+print "translated sequence:", my_protein
+max_repeat = 9
+print "multiple T analysis"
+print "method 1"
+for i in range(max_repeat):
+    substr = ''.join(['T' for n in range(i+1)])
+    print substr, s.count(substr)
+print "\nmethod 2"
+for i in range(max_repeat):
+    substr = ''.join(['T' for n in range(i+1)])
+    count = 0
+    pos = s.find(substr,0)
+    while not pos == -1:
+        count = count + 1
+        pos = s.find(substr,pos+1)
+    print substr, count