TChan/Notebook/2007-4-24

From OpenWetWare

Jump to: navigation, search

Continuing Goals

  • XML-parse MedStory's "Clinical Trials" feed
    • UPDATE: No need, since Deniz's code already does this for the "News" feed, and the only thing we'd need to change would be the URL the XML comes from.
  • Reorganize and document the Project page
    • Should wait till 4.26 to talk to class and ask about necessary details, and to make sure no one else is doing this/something like it with documentation

Update

  • Reviewed code to figure out EOF error; found missing end-bracket, fixed

Working Code

import urllib

# Definitions of functions

def parse_for_MedStory_genl(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://www.medstory.com/app?service=external&page=Search&c=true&s=Web&tc=h1&q=%s" % parsed_term

def get_MedStory_search_file(search_term, genl_search_file):
    URL_stream_genl = urllib.urlopen(parse_for_MedStory_genl(search_term))
    page = URL_stream_genl.read()
    URL_stream_genl.close()
    genl_search_file.write(page)
    
def get_drug_names(drug_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_Drug">''') != -1:
            drug_list.append(line[(line.find('''_Drug">''')+7):line.find('''</a>''')])
    search_file.close()
    return drug_list

def get_procedure_names(procedure_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_Therapy">''') != -1:
            procedure_list.append(line[(line.find('''_Therapy">''')+10):line.find('''</a>''')])
    search_file.close()
    return procedure_list

def get_experts_names(experts_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_Person">''') != -1:
            experts_list.append(line[(line.find('''_Person">''')+9):line.find('''</a>''')])
    search_file.close()
    return experts_list

def get_clinical_names(clinical_list, search_file_name):
    search_file = open(search_file_name, 'r')
    for line in search_file:
        if line.find('''_ExperimentalDrug">''') != -1:
            clinical_list.append(line[(line.find('''_ExperimentalDrug">''')+19):line.find('''</a>''')])
    search_file.close()
    return clinical_list


# Parsing functions to display relevant URLs

def parse_for_MedStory_clinical(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://www.medstory.com/app?service=external&tc=c1&page=Search&q=%s&s=ClinicalTrial&c=true&i=" % parsed_term

def parse_for_eMed(search_term):
    parsed_term = search_term.lower().replace(' ', '%20')
    return "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=%s" % parsed_term

def parse_for_Google_genl(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://www.google.com/search?hl=en&q=%s&btnG=Search" % parsed_term 

def parse_for_Google_treatment(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://www.google.com/search?hl=en&q=%s+more:condition_treatment&cx=disease_for_patients&sa=N&oi=cooptsr&resnum=0&ct=col1&cd=1" % parsed_term

def parse_for_Wikipedia(search_term):
    parsed_term = search_term.lower().capitalize().replace("'", '%27').replace(' ', '_')
    return "http://en.wikipedia.org/wiki/%s" % parsed_term

def parse_for_WHO(search_term):
    parsed_term = search_term.lower().replace("'", '%27').replace(' ', '+')
    return "http://search.who.int/search?ie=utf8&site=default_collection&client=WHO&proxystylesheet=WHO&output=xml_no_dtd&oe=utf8&q=%s&Search=Search" % parsed_term

def parse_for_GeneCards(search_term):
    parsed_term = search_term.lower().replace(" ", '+')
    # NB: This only gives a functionally correct search if the search_term is a name of a disease
    # because there are other formats for different inputs and different forms of the input
    return "http://www.genecards.org/cgi-bin/cardsearch.pl?search_type=kwd&speed=fast&search=%s#MICROCARDS" % parsed_term

def return_site_list_for_disease(search_term):
    # Currently returns site-name and URL list
    # ex. [["eMedicine", "http://www.emedicine.com/cgi-bin/foxweb.exe/searchengine@/em/searchengine?boolean=and&book=all&maxhits=40&HiddenURL=&query=parsed-term"]]
    return [["MedStory, clinical trials", parse_for_MedStory_clinical(search_term)],
            ["eMedicine", parse_for_eMed(search_term)],
            ["Google, general search", parse_for_Google_genl(search_term)],
            ["Google, Treatment search", parse_for_Google_treatment(search_term)],
            ["Wikipedia", parse_for_Wikipedia(search_term)],
            ["WHO", parse_for_WHO(search_term)],
            ["GeneCards", parse_for_GeneCards(search_term)]]

# Stuff actually happening in the program

search_term = """Hashimoto's Thyroiditis"""     # example search_term for now; will be returned by rest of program when finished    
search_file_name = "%s_medstory.html" % search_term

q = open(search_file_name, 'w')
get_MedStory_search_file(search_term, q)
q.close()

drug_list = []
procedure_list = []
experts_list = []
clinical_list = []      # refers to drugs currently in clinical trials

get_drug_names(drug_list, search_file_name)
get_procedure_names(procedure_list, search_file_name)
get_experts_names(experts_list, search_file_name)
get_clinical_names(clinical_list, search_file_name)

print drug_list
print procedure_list
print experts_list
print clinical_list

final_list = []
final_list = return_site_list_for_disease(search_term)
print final_list
Personal tools