TChan/Notebook/2007-4-23: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
(New page: ==Goals Completed== * (Temporary?) URL-parsing to get OUTPUT: MedStory searchpage using INPUT: search_term passed from other parts of the program * HTML-parsing: ** Parse out from MedStor...)
(No difference)

Revision as of 21:03, 23 April 2007

Goals Completed

  • (Temporary?) URL-parsing to get OUTPUT: MedStory searchpage using INPUT: search_term passed from other parts of the program
  • HTML-parsing:
    • Parse out from MedStory's HTML searchpage return:
      • Drugs
      • Procedures
      • Experts
      • Drugs in Clinical Trials
  • (More) URL-parsing:
    • MedStory's search on Clinical Trials
  • Got code to work for today's new code (ie. everything not commented out below)
    • But when tried to add last week's URL-parsing code (which worked before), got "Token Error: EOF in multi-line statement"
      • What does that mean?

Continuting Goals

  • XML-Parsing
    • Get Clinical Trials in an RSS form out, so that it can be read in an XML reader on the page we present to the patient (? is this the goal ?)


Code

import urllib

# Definitions of functions

def parse_for_MedStory_genl(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term

def get_MedStory_search_file(search_term, genl_search_file):
    URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term))
    page = URL_stream_genl.read()
    URL_stream_genl.close()
    genl_search_file.write(page)
    
def get_drug_names(drug_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_Drug">''') != -1:
            drug_list.append(line[(line.find('''_Drug">''')+7):line.find('''</a>''')])
    search_file.close()
    return drug_list

def get_procedure_names(procedure_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_Therapy">''') != -1:
            procedure_list.append(line[(line.find('''_Therapy">''')+10):line.find('''</a>''')])
    search_file.close()
    return procedure_list

def get_experts_names(experts_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_Person">''') != -1:
            experts_list.append(line[(line.find('''_Person">''')+9):line.find('''</a>''')])
    search_file.close()
    return experts_list

def get_clinical_names(clinical_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_ExperimentalDrug">''') != -1:
            clinical_list.append(line[(line.find('''_ExperimentalDrug">''')+19):line.find('''</a>''')])
    search_file.close()
    return clinical_list


# Parsing functions to display relevant URLs

def parse_for_MedStory_clinical(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term

##def parse_for_eMed(search_term):
##    parsed_term = search_term.lower().replace(' ', '%20')
##    return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term
##
##def parse_for_Google_genl(search_term):
##    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
##    return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term 
##
##def parse_for_Google_treatment(search_term):
##    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
##    return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term
##
##def parse_for_Wikipedia(search_term):
##    parsed_term = search_term.lower().capitalize().replace("'", '%27').replace(' ', '_')
##    return "http://en.wikipedia.org/wiki/%s" % parsed_term
##
##def parse_for_WHO(search_term):
##    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
##    return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term
##
##def parse_for_GeneCards(search_term):
##    parsed_term = search_term.lower().replace(" ", '+')
##    # NB: This only gives a functionally correct search if the search_term is a name of a disease
##    # because there are other formats for different inputs and different forms of the input
##    return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term
##
##def return_site_list_for_disease(search_term):
##    # Currently returns site-name and URL list
##    # ex. [["eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"]]
##    return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)],
##            ["eMedicine", parse_for_eMed(search_term)],
##            ["Google, general search", parse_for_Google_genl(search_term)],
##            ["Google, Treatment search", parse_for_Google_treatment(search_term)],
##            ["Wikipedia", parse_for_Wikipedia(search_term)],
##            ["WHO", parse_for_WHO(search_term)],
##            ["GeneCards", parse_for_GeneCards(search_term)]

## Stuff actually happening in the program

search_term = raw_input('''Please enter in the search_term (will be returned in real version): ''')
search_file_name = "%s_medstory.html" % search_term

q = open(search_file_name, 'w')
get_MedStory_search_file(search_term, q)
q.close()

drug_list = []
procedure_list = []
experts_list = []
clinical_list = []      # refers to drugs currently in clinical trials

get_drug_names(drug_list, search_file_name)
get_procedure_names(procedure_list, search_file_name)
get_experts_names(experts_list, search_file_name)
get_clinical_names(clinical_list, search_file_name)

print drug_list
print procedure_list
print experts_list
print clinical_list

print parse_for_MedStory_clinical(search_term)

##final_list = return_site_list_for_disease(search_term)
##print final_list