Harvard:Biophysics 101/2007/Notebook:Xiaodi Wu/2007-4-2
From OpenWetWare
Jump to navigationJump to search
Code for parsing OMIM, transposed from the 20 March notebook.
from Bio.EUtils import DBIdsClient import xml.dom.minidom from xml.dom.minidom import parse, parseString # C-style struct to pass parameters class AllelicVariant: pass # queries the database and returns all info in an XML format def omim_snp_search(dnsnp_id): client = DBIdsClient.DBIdsClient() query = client.search(dnsnp_id, "omim") records = [i.efetch(rettype="xml") for i in query] return records # basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc # extracts allelic variant data, as the name implies, using the struct above def extract_allelic_variant_data(str): dom = parseString(str) variants = dom.getElementsByTagName("Mim-allelic-variant") if len(variants) == 0: return parsed = [] for v in variants: a = AllelicVariant() # create empty instance of struct # now populate the struct a.name = get_text(v.getElementsByTagName("Mim-allelic-variant_name")[0].childNodes) a.mutation = get_text(v.getElementsByTagName("Mim-allelic-variant_mutation")[0].getElementsByTagName("Mim-text_text")[0].childNodes) a.description = get_text(v.getElementsByTagName("Mim-allelic-variant_description")[0].getElementsByTagName("Mim-text_text")[0].childNodes) parsed.append(a) return parsed for i in omim_snp_search("rs11200638"): v = extract_allelic_variant_data(i.read()) if v != None: for a in v: print a.name print a.mutation print a.description