Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-5-3: Difference between revisions

Revision as of 22:42, 2 May 2007

Annotation

The following code combines the work that both Cynthia and I have done to return mesh terms (all mesh terms as well as just the major mesh terms) that are derived from two sources:
- 1) From parsing OMIM for PMIDs and returning meshterms of these PMIDs
- 2) By searching PubMed for rs number and returning meshterms of the articles returned in the search

Code

from Bio.EUtils import DBIdsClient
import xml.dom.minidom
from xml.dom.minidom import parse, parseString

# C-style struct to pass parameters
class PubmedID:
        pass

# queries the database and returns all info in an XML format
def omim_snp_search(dnsnp_id):
        client = DBIdsClient.DBIdsClient()
        query = client.search(dnsnp_id, "omim")
        records = [i.efetch(rettype="xml") for i in query]
        return records

# basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html
def get_text(node_list):
    rc = ""
    for node in node_list:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc

def extract_allelic_variant_pmid(str):
    dom = parseString(str)
    pmids = dom.getElementsByTagName("Mim-reference")
    if len(pmids) == 0:
        return
    ids = []
    for p in pmids:
        i = PubmedID()
        i.pmid = get_text(p.getElementsByTagName("Mim-reference_pubmedUID")[0].childNodes)
        ids.append(i.pmid)
    return ids

from Bio import PubMed
from Bio import Medline
import string

# parses a mesh term to remove * and /
def parse_term(str, bool):
    parsed_term = str
    if(bool):
        parsed_term = parsed_term.replace('*', )
    if str.find('/') != -1:
       parsed_term = parsed_term.replace('/', ' ')
    return parsed_term

# parses list of mesh terms
# returns embedded list, one with all terms and one major  terms
def parse_mesh(list):
    all_mesh_terms = []
    major_mesh_terms = []
    mesh_term = 
    for i in range(len(list)):
        major = False
        if list[i].find('*') == -1:
            mesh_term = parse_term(list[i], major)
            all_mesh_terms.append(mesh_term)
        else:
            major = True
            mesh_term = parse_term(list[i], major)
            major_mesh_terms.append(mesh_term)
            all_mesh_terms.append(mesh_term)
    all_mesh = [all_mesh_terms, major_mesh_terms]
    return all_mesh


rec_parser = Medline.RecordParser()
medline_dict = PubMed.Dictionary(parser = rec_parser)

all_mesh = []
all_mesh_terms = []
major_mesh_terms = []

for i in omim_snp_search("rs11200638"):
        p = extract_allelic_variant_pmid(i.read())
        if p != None:
               # for s in p:
               #         print p[0]
                        cur_record = medline_dict[p[0]]
         #   print '\n', cur_record.title, cur_record.authors, cur_record.source
                        mesh_headings = cur_record.mesh_headings
                        if len(mesh_headings) != 0:
                            all_mesh = parse_mesh(mesh_headings)
                            all_mesh_terms.extend(all_mesh[0])
                            major_mesh_terms.extend(all_mesh[1])

print '\n', "All mesh terms from OMIM PMIDs:  ", all_mesh_terms, '\n', "Major mesh terms from OMIM PMIDs:  ", major_mesh_terms

article_ids = PubMed.search_for("rs11200638")

all_mesh = []
all_mesh_terms = []
major_mesh_terms = []
for did in article_ids[0:5]:
    cur_record = medline_dict[did]
    #print '\n', cur_record.title, cur_record.authors, cur_record.source
    mesh_headings = cur_record.mesh_headings
    if len(mesh_headings) != 0:
        all_mesh = parse_mesh(mesh_headings)
        all_mesh_terms.extend(all_mesh[0])
        major_mesh_terms.extend(all_mesh[1])

print '\n', "All mesh terms from rs number:  ", all_mesh_terms, '\n', "Major mesh terms from rs number:  ", major_mesh_terms

#rest of code returns review articles on topic of interest by searching pubmed
disease = "Age-related Macular Degeneration" #should put a.name here when combined with Xiaodi's previous code
search_term = "Review[ptyp] "+disease
#print search_term

review_ids = PubMed.search_for(search_term)

count = 1

for did in review_ids[0:3]:
    cur_record = medline_dict[did]
    print '\n', count, ')  ', string.rstrip(cur_record.title), cur_record.authors, string.strip(cur_record.source)
    count=count+1

Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-5-3: Difference between revisions

Revision as of 22:42, 2 May 2007

Annotation

Code

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools