Harvard:Biophysics 101/2007/Notebook:Xiaodi Wu/2007-4-5

From OpenWetWare
Revision as of 08:28, 5 April 2007 by Wuxiaodi (talk | contribs) (script. hurrah.)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search

Script for SNP processing:

from Bio import SeqIO
from Bio.Blast import NCBIWWW
import xml.dom.minidom
from xml.dom.minidom import parse, parseString

file_handle = open("example.fasta")
records = SeqIO.parse(file_handle, format="fasta")
record = records.next()
sequence = record.seq.data
print sequence

result_handle = NCBIWWW.qblast("blastn", "snp/human_9606/human_9606", sequence)
blast_results = result_handle.read()
print blast_results

# basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html
def get_text(node_list):
    rc = ""
    for node in node_list:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc

# extracts snp data
def extract_snp_data(str):
	dom = parseString(str)
	variants = dom.getElementsByTagName("Hit")
	if len(variants) == 0:
		return
	parsed = []
	for v in variants:
		# now populate the struct
		id = get_text(v.getElementsByTagName("Hit_accession")[0].childNodes)
		parsed.append(id)
		print id
	return parsed

extract_snp_data(blast_results)