Http://openwetware.org/wiki/Anugraha Raman/usefultool: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
(New page: <syntax> # # BiPython Script Created By Anugraha Raman # For BP 101 November 27, 2009 # Parse GWAS Get unique traits, SNPs # Get population diversity from NCBI's dbSNP # #from Bio import ...)
(No difference)

Revision as of 14:44, 1 December 2009

<syntax>

  1. BiPython Script Created By Anugraha Raman
  2. For BP 101 November 27, 2009
  3. Parse GWAS Get unique traits, SNPs
  4. Get population diversity from NCBI's dbSNP
  1. from Bio import SeqIO

from Bio.Blast import NCBIWWW from Bio.EUtils import DBIdsClient

  1. import sre, urllib2, sys, BaseHTTPServer

from xml.dom import minidom from xml.dom.minidom import parse, parseString import csv import os, urllib import sre


  1. from threading import Thread
  2. import pickle, sys, time, urllib


  1. Functions defined in this script file are as follows:
  2. writeheader(myfile)  : Writes a specific HTML header using the myfile handle
  3. writefooter(myfile)  : Writes a specific HTML footer using the myfile handle
  4. get_snp_url(rsid)  : Takes a rsid and returns url for retrieving population diversity
  5. write_traithtml(mydict,myfile): writes out the tuthamin.html file output
  6. get_trait(my_file)  : Does the main parsing of the tab delimited GWAS file and gets trait info

def writeheader(my_file):

   #Write an extra cool header file     
   header = str ('<html><font face="Trebuchet"size="3" color="#2171B7" > Biophysisc 101: Genomics, Computing and Economics >>  ')
   header = header + str(' TRUTH <P>')
   my_file.write(header)
   my_file.write('</font>')
   my_file.write('<img border="2" src="2009BP101-logo1.png">')
   my_file.write('</img>')
   my_file.write('<p><p>')

def writefooter(my_file):

   # write the footer on the html file
   footer = str ('<p><p><font face="Trebuchet"size="1" color="#2171B7" > BioPython Scripting By: Anugraha Raman ')
   footer = footer + str ('Script Source: <a href=Anugrah_gwas-dbsnp_r3.py> Anugraha_gwas-dbsnp_r3.py </a>')
   my_file.write(footer)
   my_file.write('</font> </html>')

def get_snp_url(rsid):

   SNP_URL = 'http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs='
   snp_id_number = rsid.strip('rs')
   url = SNP_URL + snp_id_number + '#Diversity' # population diversity
   return url

def write_traithtml(mydict,myfile):

   for k, v in mydict.iteritems():
       myfile.write('')
       myfile.write(k)# write the trait
       myfile.write('  ')
       for l in v[0]: # for the rsids
           #print l #print rsid
           myfile.write('<a href=')
           myurl=get_snp_url(l)
           myfile.write(myurl) # write the url link to pop diversity
           myfile.write('>')
           myfile.write(l)
           myfile.write('</a>')
           myfile.write(' + ')

myfile.write('

') myfile.write('') myfile.write ('More info') myfile.write(' ') for l in v[1]: # for the pubmedids myfile.write('<a href=http://www.ncbi.nlm.nih.gov/pubmed/') #pubmed public url myfile.write(l) # write the pubmedid myfile.write('> pubmed: ') myfile.write(l) myfile.write('</a>') myfile.write(' + ') myfile.write('

') myfile.write('----------------------------------------------------

') def get_trait(my_file): gwas_file = open('gwas.txt', 'r') reader = csv.DictReader(gwas_file, dialect='excel-tab') gwas_array = [] trait_array = [] trait_dict= {} gwas_file.seek(0) for row in reader: if 'rs' in row['SNPs']: # do not include GWAS records without rsid numbers gwas_array.append([row['Disease/Trait'],row['SNPs'],row['PubMedID']]) trait_array.append(row['Disease/Trait']) #print gwas_array trait_set = set (trait_array) # get unique set of traits print trait_set for i in trait_set: #for every unique trait rs_array = [] # start a new set of rs associations with unique trait pubmed_array = [] for j in gwas_array: if j[0] in i: #print 'got here' rs_array.append(j[1]) pubmed_array.append(j[2]) rs_set= set(rs_array) pubmed_set = set(pubmed_array) trait_dict[i] = [rs_set,pubmed_set] return trait_dict

  1. Main
  2. All the relevant output will be to a file.

out_file_name = os.path.join(os.getcwd(), 'truthmain.html') out_file = open(out_file_name, 'w') writeheader(out_file) thetrait_dict = get_trait(out_file) # parse trait info details from GWAS file write_traithtml(thetrait_dict,out_file) # output the main html file writefooter(out_file)

  1. close the file handle so the file is actually written to disk

out_file.close() print 'Completed run' + str(out_file_name)

  1. os.system('explorer ' + out_file_name)

os.system('explorer ' + 'truth.htm') </syntax>