Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-5-3
From OpenWetWare
(Difference between revisions)
RCharalel (Talk | contribs)
(New page: ==Annotation== *The following code combines the work that both Cynthia and I have done to return mesh terms (all mesh terms as well as just the major mesh terms) that are derived from two ...)
Next diff →
Revision as of 01:39, 3 May 2007
Annotation
- The following code combines the work that both Cynthia and I have done to return mesh terms (all mesh terms as well as just the major mesh terms) that are derived from two sources:
- 1) From parsing OMIM for PMIDs and returning meshterms of these PMIDs
- 2) By searching PubMed for rs number and returning meshterms of the articles returned in the search
Code
from Bio.EUtils import DBIdsClient
import xml.dom.minidom
from xml.dom.minidom import parse, parseString
# C-style struct to pass parameters
class PubmedID:
pass
# queries the database and returns all info in an XML format
def omim_snp_search(dnsnp_id):
client = DBIdsClient.DBIdsClient()
query = client.search(dnsnp_id, "omim")
records = [i.efetch(rettype="xml") for i in query]
return records
# basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html
def get_text(node_list):
rc = ""
for node in node_list:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc
def extract_allelic_variant_pmid(str):
dom = parseString(str)
pmids = dom.getElementsByTagName("Mim-reference")
if len(pmids) == 0:
return
ids = []
for p in pmids:
i = PubmedID()
i.pmid = get_text(p.getElementsByTagName("Mim-reference_pubmedUID")[0].childNodes)
ids.append(i.pmid)
return ids
from Bio import PubMed
from Bio import Medline
import string
# parses a mesh term to remove * and /
def parse_term(str, bool):
parsed_term = str
if(bool):
parsed_term = parsed_term.replace('*', '')
if str.find('/') != -1:
parsed_term = parsed_term.replace('/', ' ')
return parsed_term
# parses list of mesh terms
# returns embedded list, one with all terms and one major terms
def parse_mesh(list):
all_mesh_terms = []
major_mesh_terms = []
mesh_term = ''
for i in range(len(list)):
major = False
if list[i].find('*') == -1:
mesh_term = parse_term(list[i], major)
all_mesh_terms.append(mesh_term)
else:
major = True
mesh_term = parse_term(list[i], major)
major_mesh_terms.append(mesh_term)
all_mesh_terms.append(mesh_term)
all_mesh = [all_mesh_terms, major_mesh_terms]
return all_mesh
rec_parser = Medline.RecordParser()
medline_dict = PubMed.Dictionary(parser = rec_parser)
all_mesh = []
all_mesh_terms = []
major_mesh_terms = []
for i in omim_snp_search("rs11200638"):
p = extract_allelic_variant_pmid(i.read())
if p != None:
# for s in p:
# print p[0]
cur_record = medline_dict[p[0]]
# print '\n', cur_record.title, cur_record.authors, cur_record.source
mesh_headings = cur_record.mesh_headings
if len(mesh_headings) != 0:
all_mesh = parse_mesh(mesh_headings)
all_mesh_terms.extend(all_mesh[0])
major_mesh_terms.extend(all_mesh[1])
print '\n', all_mesh_terms, '\n', major_mesh_terms
article_ids = PubMed.search_for("rs11200638")
all_mesh = []
all_mesh_terms = []
major_mesh_terms = []
for did in article_ids[0:5]:
cur_record = medline_dict[did]
#print '\n', cur_record.title, cur_record.authors, cur_record.source
mesh_headings = cur_record.mesh_headings
if len(mesh_headings) != 0:
all_mesh = parse_mesh(mesh_headings)
all_mesh_terms.extend(all_mesh[0])
major_mesh_terms.extend(all_mesh[1])
print '\n', all_mesh_terms, '\n', major_mesh_terms
#rest of code returns review articles on topic of interest by searching pubmed
disease = "Age-related Macular Degeneration" #should put a.name here when combined with Xiaodi's previous code
search_term = "Review[ptyp] "+disease
#print search_term
review_ids = PubMed.search_for(search_term)
count = 1
for did in review_ids[0:3]:
cur_record = medline_dict[did]
print '\n', count, ') ', string.rstrip(cur_record.title), cur_record.authors, string.strip(cur_record.source)
count=count+1


