Http://openwetware.org/wiki/Anugraha Raman/usefultool: Difference between revisions

Revision as of 14:44, 1 December 2009

BiPython Script Created By Anugraha Raman
For BP 101 November 27, 2009
Parse GWAS Get unique traits, SNPs
Get population diversity from NCBI's dbSNP

from Bio import SeqIO

from Bio.Blast import NCBIWWW from Bio.EUtils import DBIdsClient

import sre, urllib2, sys, BaseHTTPServer

from xml.dom import minidom from xml.dom.minidom import parse, parseString import csv import os, urllib import sre

from threading import Thread
import pickle, sys, time, urllib

Functions defined in this script file are as follows:
writeheader(myfile) : Writes a specific HTML header using the myfile handle
writefooter(myfile) : Writes a specific HTML footer using the myfile handle
get_snp_url(rsid) : Takes a rsid and returns url for retrieving population diversity
write_traithtml(mydict,myfile): writes out the tuthamin.html file output
get_trait(my_file) : Does the main parsing of the tab delimited GWAS file and gets trait info

def writeheader(my_file):

   #Write an extra cool header file     
   header = str ('<html><font face="Trebuchet"size="3" color="#2171B7" > Biophysisc 101: Genomics, Computing and Economics >>  ')
   header = header + str(' TRUTH <P>')
   my_file.write(header)
   my_file.write('</font>')
   my_file.write('<img border="2" src="2009BP101-logo1.png">')
   my_file.write('</img>')
   my_file.write('<p><p>')

def writefooter(my_file):

   # write the footer on the html file
   footer = str ('<p><p><font face="Trebuchet"size="1" color="#2171B7" > BioPython Scripting By: Anugraha Raman ')
   footer = footer + str ('Script Source: <a href=Anugrah_gwas-dbsnp_r3.py> Anugraha_gwas-dbsnp_r3.py </a>')
   my_file.write(footer)
   my_file.write('</font> </html>')

def get_snp_url(rsid):

   SNP_URL = 'http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs='
   snp_id_number = rsid.strip('rs')
   url = SNP_URL + snp_id_number + '#Diversity' # population diversity
   return url

def write_traithtml(mydict,myfile):

   for k, v in mydict.iteritems():
       myfile.write('')
       myfile.write(k)# write the trait
       myfile.write('  ')
       for l in v[0]: # for the rsids
           #print l #print rsid
           myfile.write('<a href=')
           myurl=get_snp_url(l)
           myfile.write(myurl) # write the url link to pop diversity
           myfile.write('>')
           myfile.write(l)
           myfile.write('</a>')
           myfile.write(' + ')

myfile.write('

') myfile.write('') myfile.write ('More info') myfile.write(' ') for l in v[1]: # for the pubmedids myfile.write('<a href=http://www.ncbi.nlm.nih.gov/pubmed/') #pubmed public url myfile.write(l) # write the pubmedid myfile.write('> pubmed: ') myfile.write(l) myfile.write('</a>') myfile.write(' + ') myfile.write('

') myfile.write('----------------------------------------------------

') def get_trait(my_file): gwas_file = open('gwas.txt', 'r') reader = csv.DictReader(gwas_file, dialect='excel-tab') gwas_array = [] trait_array = [] trait_dict= {} gwas_file.seek(0) for row in reader: if 'rs' in row['SNPs']: # do not include GWAS records without rsid numbers gwas_array.append([row['Disease/Trait'],row['SNPs'],row['PubMedID']]) trait_array.append(row['Disease/Trait']) #print gwas_array trait_set = set (trait_array) # get unique set of traits print trait_set for i in trait_set: #for every unique trait rs_array = [] # start a new set of rs associations with unique trait pubmed_array = [] for j in gwas_array: if j[0] in i: #print 'got here' rs_array.append(j[1]) pubmed_array.append(j[2]) rs_set= set(rs_array) pubmed_set = set(pubmed_array) trait_dict[i] = [rs_set,pubmed_set] return trait_dict

Main
All the relevant output will be to a file.

out_file_name = os.path.join(os.getcwd(), 'truthmain.html') out_file = open(out_file_name, 'w') writeheader(out_file) thetrait_dict = get_trait(out_file) # parse trait info details from GWAS file write_traithtml(thetrait_dict,out_file) # output the main html file writefooter(out_file)

close the file handle so the file is actually written to disk

out_file.close() print 'Completed run' + str(out_file_name)

os.system('explorer ' + out_file_name)

os.system('explorer ' + 'truth.htm') </syntax>

Http://openwetware.org/wiki/Anugraha Raman/usefultool: Difference between revisions

Revision as of 14:44, 1 December 2009

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools