TChan/Notebook/2007-3-20

From OpenWetWare
Jump to navigationJump to search

BLAST SNP XML to OMIM

  • NOTE: This probably doesn't work - will tweak when I can get a sample xml file to work with.
from Bio.EUtils import DBIdsClient
import xml.dom.minidom
from xml.dom.minidom import parse, parseString

# From SNP BLAST xml to OMIM (using Xiaodi's code as a template...and then just using his code :))

# C-style struct to pass parameters
class rsID:
	pass

# queries the database and returns all info in an XML format
def BLAST_SNP_search(sample_seq):
	client = DBIdsClient.DBIdsClient()
	query = client.search(sample_seq, "BLAST SNP")
	records = [i.efetch(rettype="xml") for i in query]
	return records

# basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html
def get_text(node_list):
    rc = ""
    for node in node_list:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc

# extracts allelic variant data, as the name implies, using the struct above
def extract_rsID():
	dom = parseString()
	rsIDs = dom.getElementsByTagName("gn1|dbSNP|")
	if len(rsIDs) == 0:
		return
	parsed = []
	for rs in rsIDs:
		b = rsID() # create empty instance of struct
		# now populate the struct
		b.rsIDnum = get_text(rs.getElementsByTagName("gn1|dbSNP|")[0].childNodes)
		parsed.append(a)
	return parsed

seq = open('sample_sequence.txt')
rsID_array = []

for i in BLAST_SNP_search(seq):
	b = extract_rsID(i.read())
	if rsIDs != None:
		for b in rsIDs:
                        rsID_array.append(b.rsIDnum)         
			print b.rsIDnum


# C-style struct to pass parameters
class AllelicVariant:
	pass

# queries the database and returns all info in an XML format
def omim_snp_search(dnsnp_id):
	client = DBIdsClient.DBIdsClient()
	query = client.search(dnsnp_id, "omim")
	records = [i.efetch(rettype="xml") for i in query]
	return records

# basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html
def get_text(node_list):
    rc = ""
    for node in node_list:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc

# extracts allelic variant data, as the name implies, using the struct above
def extract_allelic_variant_data(str):
	dom = parseString(str)
	variants = dom.getElementsByTagName("Mim-allelic-variant")
	if len(variants) == 0:
		return
	parsed = []
	for v in variants:
		a = AllelicVariant() # create empty instance of struct
		# now populate the struct
		a.name = get_text(v.getElementsByTagName("Mim-allelic-variant_name")[0].childNodes)
		a.mutation = get_text(v.getElementsByTagName("Mim-allelic-variant_mutation")[0].getElementsByTagName("Mim-text_text")[0].childNodes)
		a.description = get_text(v.getElementsByTagName("Mim-allelic-variant_description")[0].getElementsByTagName("Mim-text_text")[0].childNodes)
		parsed.append(a)
	return parsed

for j in rsID_array:	
    for i in omim_snp_search(rsID_array[j]):
            v = extract_allelic_variant_data(i.read())
            if v != None:
                    for a in v:
                            print a.name
                            print a.mutation
                            print a.description