Http://openwetware.org/wiki/Anugraha Raman/usefultool: Difference between revisions
(New page: <syntax> # # BiPython Script Created By Anugraha Raman # For BP 101 November 27, 2009 # Parse GWAS Get unique traits, SNPs # Get population diversity from NCBI's dbSNP # #from Bio import ...) |
(No difference)
|
Revision as of 14:44, 1 December 2009
<syntax>
- BiPython Script Created By Anugraha Raman
- For BP 101 November 27, 2009
- Parse GWAS Get unique traits, SNPs
- Get population diversity from NCBI's dbSNP
- from Bio import SeqIO
from Bio.Blast import NCBIWWW from Bio.EUtils import DBIdsClient
- import sre, urllib2, sys, BaseHTTPServer
from xml.dom import minidom from xml.dom.minidom import parse, parseString import csv import os, urllib import sre
- from threading import Thread
- import pickle, sys, time, urllib
- Functions defined in this script file are as follows:
- writeheader(myfile) : Writes a specific HTML header using the myfile handle
- writefooter(myfile) : Writes a specific HTML footer using the myfile handle
- get_snp_url(rsid) : Takes a rsid and returns url for retrieving population diversity
- write_traithtml(mydict,myfile): writes out the tuthamin.html file output
- get_trait(my_file) : Does the main parsing of the tab delimited GWAS file and gets trait info
-
def writeheader(my_file):
#Write an extra cool header file header = str ('<html><font face="Trebuchet"size="3" color="#2171B7" > Biophysisc 101: Genomics, Computing and Economics >> ') header = header + str(' TRUTH <P>') my_file.write(header) my_file.write('</font>') my_file.write('<img border="2" src="2009BP101-logo1.png">') my_file.write('</img>') my_file.write('<p><p>')
def writefooter(my_file):
# write the footer on the html file footer = str ('<p><p><font face="Trebuchet"size="1" color="#2171B7" > BioPython Scripting By: Anugraha Raman ') footer = footer + str ('Script Source: <a href=Anugrah_gwas-dbsnp_r3.py> Anugraha_gwas-dbsnp_r3.py </a>') my_file.write(footer) my_file.write('</font> </html>')
def get_snp_url(rsid):
SNP_URL = 'http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=' snp_id_number = rsid.strip('rs') url = SNP_URL + snp_id_number + '#Diversity' # population diversity return url
def write_traithtml(mydict,myfile):
for k, v in mydict.iteritems(): myfile.write('') myfile.write(k)# write the trait myfile.write(' ') for l in v[0]: # for the rsids #print l #print rsid myfile.write('<a href=') myurl=get_snp_url(l) myfile.write(myurl) # write the url link to pop diversity myfile.write('>') myfile.write(l) myfile.write('</a>') myfile.write(' + ')
myfile.write('
') myfile.write('') myfile.write ('More info') myfile.write(' ') for l in v[1]: # for the pubmedids myfile.write('<a href=http://www.ncbi.nlm.nih.gov/pubmed/') #pubmed public url myfile.write(l) # write the pubmedid myfile.write('> pubmed: ') myfile.write(l) myfile.write('</a>') myfile.write(' + ') myfile.write('
') myfile.write('----------------------------------------------------
') def get_trait(my_file): gwas_file = open('gwas.txt', 'r') reader = csv.DictReader(gwas_file, dialect='excel-tab') gwas_array = [] trait_array = [] trait_dict= {} gwas_file.seek(0) for row in reader: if 'rs' in row['SNPs']: # do not include GWAS records without rsid numbers gwas_array.append([row['Disease/Trait'],row['SNPs'],row['PubMedID']]) trait_array.append(row['Disease/Trait']) #print gwas_array trait_set = set (trait_array) # get unique set of traits print trait_set for i in trait_set: #for every unique trait rs_array = [] # start a new set of rs associations with unique trait pubmed_array = [] for j in gwas_array: if j[0] in i: #print 'got here' rs_array.append(j[1]) pubmed_array.append(j[2]) rs_set= set(rs_array) pubmed_set = set(pubmed_array) trait_dict[i] = [rs_set,pubmed_set] return trait_dict
- Main
- All the relevant output will be to a file.
out_file_name = os.path.join(os.getcwd(), 'truthmain.html') out_file = open(out_file_name, 'w') writeheader(out_file) thetrait_dict = get_trait(out_file) # parse trait info details from GWAS file write_traithtml(thetrait_dict,out_file) # output the main html file writefooter(out_file)
- close the file handle so the file is actually written to disk
out_file.close() print 'Completed run' + str(out_file_name)
- os.system('explorer ' + out_file_name)
os.system('explorer ' + 'truth.htm') </syntax>