Harvard:Biophysics 101/2007/Notebook:Resmi Charalel/2007-5-3: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
(New page: ==Annotation== *The following code combines the work that both Cynthia and I have done to return mesh terms (all mesh terms as well as just the major mesh terms) that are derived from two ...) |
(→Code) |
||
Line 94: | Line 94: | ||
major_mesh_terms.extend(all_mesh[1]) | major_mesh_terms.extend(all_mesh[1]) | ||
print '\n', all_mesh_terms, '\n', major_mesh_terms | print '\n', "All mesh terms from OMIM PMIDs: ", all_mesh_terms, '\n', "Major mesh terms from OMIM PMIDs: ", major_mesh_terms | ||
article_ids = PubMed.search_for("rs11200638") | article_ids = PubMed.search_for("rs11200638") | ||
Line 110: | Line 110: | ||
major_mesh_terms.extend(all_mesh[1]) | major_mesh_terms.extend(all_mesh[1]) | ||
print '\n', all_mesh_terms, '\n', major_mesh_terms | print '\n', "All mesh terms from rs number: ", all_mesh_terms, '\n', "Major mesh terms from rs number: ", major_mesh_terms | ||
#rest of code returns review articles on topic of interest by searching pubmed | #rest of code returns review articles on topic of interest by searching pubmed |
Revision as of 22:42, 2 May 2007
Annotation
- The following code combines the work that both Cynthia and I have done to return mesh terms (all mesh terms as well as just the major mesh terms) that are derived from two sources:
- 1) From parsing OMIM for PMIDs and returning meshterms of these PMIDs
- 2) By searching PubMed for rs number and returning meshterms of the articles returned in the search
Code
from Bio.EUtils import DBIdsClient import xml.dom.minidom from xml.dom.minidom import parse, parseString # C-style struct to pass parameters class PubmedID: pass # queries the database and returns all info in an XML format def omim_snp_search(dnsnp_id): client = DBIdsClient.DBIdsClient() query = client.search(dnsnp_id, "omim") records = [i.efetch(rettype="xml") for i in query] return records # basic text extraction from XML; based on http://docs.python.org/lib/dom-example.html def get_text(node_list): rc = "" for node in node_list: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def extract_allelic_variant_pmid(str): dom = parseString(str) pmids = dom.getElementsByTagName("Mim-reference") if len(pmids) == 0: return ids = [] for p in pmids: i = PubmedID() i.pmid = get_text(p.getElementsByTagName("Mim-reference_pubmedUID")[0].childNodes) ids.append(i.pmid) return ids from Bio import PubMed from Bio import Medline import string # parses a mesh term to remove * and / def parse_term(str, bool): parsed_term = str if(bool): parsed_term = parsed_term.replace('*', ) if str.find('/') != -1: parsed_term = parsed_term.replace('/', ' ') return parsed_term # parses list of mesh terms # returns embedded list, one with all terms and one major terms def parse_mesh(list): all_mesh_terms = [] major_mesh_terms = [] mesh_term = for i in range(len(list)): major = False if list[i].find('*') == -1: mesh_term = parse_term(list[i], major) all_mesh_terms.append(mesh_term) else: major = True mesh_term = parse_term(list[i], major) major_mesh_terms.append(mesh_term) all_mesh_terms.append(mesh_term) all_mesh = [all_mesh_terms, major_mesh_terms] return all_mesh rec_parser = Medline.RecordParser() medline_dict = PubMed.Dictionary(parser = rec_parser) all_mesh = [] all_mesh_terms = [] major_mesh_terms = [] for i in omim_snp_search("rs11200638"): p = extract_allelic_variant_pmid(i.read()) if p != None: # for s in p: # print p[0] cur_record = medline_dict[p[0]] # print '\n', cur_record.title, cur_record.authors, cur_record.source mesh_headings = cur_record.mesh_headings if len(mesh_headings) != 0: all_mesh = parse_mesh(mesh_headings) all_mesh_terms.extend(all_mesh[0]) major_mesh_terms.extend(all_mesh[1]) print '\n', "All mesh terms from OMIM PMIDs: ", all_mesh_terms, '\n', "Major mesh terms from OMIM PMIDs: ", major_mesh_terms article_ids = PubMed.search_for("rs11200638") all_mesh = [] all_mesh_terms = [] major_mesh_terms = [] for did in article_ids[0:5]: cur_record = medline_dict[did] #print '\n', cur_record.title, cur_record.authors, cur_record.source mesh_headings = cur_record.mesh_headings if len(mesh_headings) != 0: all_mesh = parse_mesh(mesh_headings) all_mesh_terms.extend(all_mesh[0]) major_mesh_terms.extend(all_mesh[1]) print '\n', "All mesh terms from rs number: ", all_mesh_terms, '\n', "Major mesh terms from rs number: ", major_mesh_terms #rest of code returns review articles on topic of interest by searching pubmed disease = "Age-related Macular Degeneration" #should put a.name here when combined with Xiaodi's previous code search_term = "Review[ptyp] "+disease #print search_term review_ids = PubMed.search_for(search_term) count = 1 for did in review_ids[0:3]: cur_record = medline_dict[did] print '\n', count, ') ', string.rstrip(cur_record.title), cur_record.authors, string.strip(cur_record.source) count=count+1