TChan/Notebook/2007-4-23: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
(New page: ==Goals Completed== * (Temporary?) URL-parsing to get OUTPUT: MedStory searchpage using INPUT: search_term passed from other parts of the program * HTML-parsing: ** Parse out from MedStor...) |
(No difference)
|
Revision as of 21:03, 23 April 2007
Goals Completed
- (Temporary?) URL-parsing to get OUTPUT: MedStory searchpage using INPUT: search_term passed from other parts of the program
- HTML-parsing:
- Parse out from MedStory's HTML searchpage return:
- Drugs
- Procedures
- Experts
- Drugs in Clinical Trials
- Parse out from MedStory's HTML searchpage return:
- (More) URL-parsing:
- MedStory's search on Clinical Trials
- Got code to work for today's new code (ie. everything not commented out below)
- But when tried to add last week's URL-parsing code (which worked before), got "Token Error: EOF in multi-line statement"
- What does that mean?
- But when tried to add last week's URL-parsing code (which worked before), got "Token Error: EOF in multi-line statement"
Continuting Goals
- XML-Parsing
- Get Clinical Trials in an RSS form out, so that it can be read in an XML reader on the page we present to the patient (? is this the goal ?)
Code
import urllib # Definitions of functions def parse_for_MedStory_genl(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term def get_MedStory_search_file(search_term, genl_search_file): URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term)) page = URL_stream_genl.read() URL_stream_genl.close() genl_search_file.write(page) def get_drug_names(drug_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_Drug">''') != -1: drug_list.append(line[(line.find('''_Drug">''')+7):line.find('''</a>''')]) search_file.close() return drug_list def get_procedure_names(procedure_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_Therapy">''') != -1: procedure_list.append(line[(line.find('''_Therapy">''')+10):line.find('''</a>''')]) search_file.close() return procedure_list def get_experts_names(experts_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_Person">''') != -1: experts_list.append(line[(line.find('''_Person">''')+9):line.find('''</a>''')]) search_file.close() return experts_list def get_clinical_names(clinical_list, search_file_name): search_file = open(search_file_name, 'r') for line in search_file: if line.find('''_ExperimentalDrug">''') != -1: clinical_list.append(line[(line.find('''_ExperimentalDrug">''')+19):line.find('''</a>''')]) search_file.close() return clinical_list # Parsing functions to display relevant URLs def parse_for_MedStory_clinical(search_term): parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term ##def parse_for_eMed(search_term): ## parsed_term = search_term.lower().replace(' ', '%20') ## return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term ## ##def parse_for_Google_genl(search_term): ## parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') ## return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term ## ##def parse_for_Google_treatment(search_term): ## parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') ## return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term ## ##def parse_for_Wikipedia(search_term): ## parsed_term = search_term.lower().capitalize().replace("'", '%27').replace(' ', '_') ## return "http://en.wikipedia.org/wiki/%s" % parsed_term ## ##def parse_for_WHO(search_term): ## parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+') ## return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term ## ##def parse_for_GeneCards(search_term): ## parsed_term = search_term.lower().replace(" ", '+') ## # NB: This only gives a functionally correct search if the search_term is a name of a disease ## # because there are other formats for different inputs and different forms of the input ## return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term ## ##def return_site_list_for_disease(search_term): ## # Currently returns site-name and URL list ## # ex. [["eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"]] ## return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)], ## ["eMedicine", parse_for_eMed(search_term)], ## ["Google, general search", parse_for_Google_genl(search_term)], ## ["Google, Treatment search", parse_for_Google_treatment(search_term)], ## ["Wikipedia", parse_for_Wikipedia(search_term)], ## ["WHO", parse_for_WHO(search_term)], ## ["GeneCards", parse_for_GeneCards(search_term)] ## Stuff actually happening in the program search_term = raw_input('''Please enter in the search_term (will be returned in real version): ''') search_file_name = "%s_medstory.html" % search_term q = open(search_file_name, 'w') get_MedStory_search_file(search_term, q) q.close() drug_list = [] procedure_list = [] experts_list = [] clinical_list = [] # refers to drugs currently in clinical trials get_drug_names(drug_list, search_file_name) get_procedure_names(procedure_list, search_file_name) get_experts_names(experts_list, search_file_name) get_clinical_names(clinical_list, search_file_name) print drug_list print procedure_list print experts_list print clinical_list print parse_for_MedStory_clinical(search_term) ##final_list = return_site_list_for_disease(search_term) ##print final_list