User talk:Anugraha Raman/AbnormalStops.py
<syntax type="python>
- BiPython Script Created By Anugraha Raman
- For BP 101 September 24, 2009 Problem #4
from Bio import SeqIO from Bio.Seq import Seq from Bio.Alphabet import IUPAC from random import * import os
- Functions defined in this script file are as follows:
- writeheader(myfile) : Writes a specific HTML header using the myfile
handle
- writefooter(myfile) : Writes a specific HTML footer using the myfile
handle
- writerunsummary(myfile) : Writes a specific set of sumamry information
using the myfile handle
- mutatesinglebp(seq, random_Seed, forevery, prob) : Mutates a single
base pair for forevery
- location range using a probability of prob;
returns mutated sequence
- writeAA(myfile, seq,stop_locs) : Writes the amino acids using the myfile
handle
- findstops(seq) : Finds the stop locations in the given DNA
sequence
def writeheader(my_file):
#Write an extra cool header file header = str ('<html><font face="Trebuchet"size="3" color="#2171B7" >
Biophysisc 101: Genomics, Computing and Economics >> ')
header = header + str(' Problem 4: ') my_file.write(header) my_file.write('</font>') my_file.write('<img border="2" src="2009BP101-logo1.png">') my_file.write('</img>') my_file.write('<p><p>')
- end of function writeheader
def writefooter(my_file):
# write the footer on the html file footer = str ('<p><p><font face="Trebuchet"size="1" color="#2171B7" >
BioPython Scripting By: Anugraha Raman ')
footer = footer + str ('Script Source: <a href=AbnormalStops.py>
AbnormalStops.py </a>')
my_file.write(footer) my_file.write('</font> </html>')
- end of function writefooter
def writerunsummary (runs):
output_file.write('
[Total # of Runs: ' + str(runs)) output_file.write('] ') output_file.write('Normal Stops are noted in blue ') output_file.write('Abnormal Stops are noted in red or ') output_file.write(str(' green '))
- end of function writerunheader
- mutate_singlebp mutates a single base pair for forevery location range
using a
- probability of prob
def mutate_singlebp(seq, random_seed=0, forevery=100, prob=0.01): # reset seed if random_seed == 0: seed() else: seed(random_seed) for x in range(0,forevery): r=randrange(0,1/prob) if r == 0: mutate_pos = randrange(0,len(seq)-1) old_base = seq[mutate_pos] if old_base == 'a': mutated_base = choice(['c', 't', 'g']) elif old_base == 'c': mutated_base = choice(['a', 't', 'g']) elif old_base == 't': mutated_base = choice(['a', 'c', 'g']) else: # 'g' mutated_base = choice(['a', 'c', 't']) # mutable sequence seq is updated seq[mutate_pos] = mutated_base # end if # end for
- end of function mutate_singlebp
def writeAA(my_file, seq,stop_locs): my_file.write('
') my_file.write(Seq.translate(seq).tostring()) my_file.write('
') #my_file.write('
') #my_file.write(str(stop_locs)) #my_file.write('
')
- end of function writeAA
def findstops(seq): stop_array = [] start = 0 stop_pos = 0 while stop_pos != -1: stop_pos = Seq.translate(seq).find('*',start) start = stop_pos + 1 stop_array = stop_array + [stop_pos] stop_array.remove(-1) return stop_array # end of while
- end of function findstops
- Start my script
input_file = open('p53seg.txt', 'r') output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob4.html') output_file = open(output_file_name, 'w') writeheader(output_file)
- only one record for now but can expand this later to include multiple
records for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq a = findstops(my_seq) writeAA(output_file, my_seq, a) freq_count = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- number of simulations
sim_num = 800 for sim_count in range(0, sim_num): # number of runs within a simulation num_of_runs = 100 pt_array = [] premature_termination_count = 0 # not counting the missing stop abnormality currently only premature terminations #ms_array = [] #missing_stop_count = 0 for run_num in range (0,num_of_runs): # I have to create a mutable sequence to use the mutate_singlebp function rand_seq = my_seq.tomutable() # calling the function that will do msot of the heavy lifting mutate_singlebp(rand_seq) # I have to convert the mutable sequence back to a normal DNA sequence in order to use the transcribe() method new_seq = Seq(rand_seq.toseq().tostring(), IUPAC.unambiguous_dna) b = findstops(new_seq) if a != b: notification_str = str('
[Sim #: ') + str(sim_count) + str(' |Run #: ') + str(run_num) notification_str = notification_str + str('] Stop Locations ') for loc in b: if a.count(loc) == 0:# write the premature termination Stop location in Red notification_str = notification_str + str(' Premature Termination @: ') notification_str = notification_str + str(loc) + str('') premature_termination_count = premature_termination_count + 1 else: # write the normal Stop locations in Blue notification_str = notification_str + str(' ') notification_str = notification_str + str(loc) + str('') # end For loop # actually write out the string created above output_file.write(notification_str) for loc in a: if b.count(loc) == 0:# write the missing Stop locations in Green output_file.write(' Missing Stop Mutation @: ') output_file.write(str(loc) + str('')) #missing_stop_count = missing_stop_count + 1 writeAA(output_file, new_seq, b) # count the number of premature terminations in current simulation pt_array = pt_array + [premature_termination_count] # ms_array = ms_array + [missing_stop_count] # exit the for(run_num) loop # Gather the frequency distribution for i in range(0,10): freq_count[i] = freq_count[i] + pt_array.count(i)
- end for sim_count loop
writerunsummary(num_of_runs) writefooter(output_file) input_file.close()
- close the file handle so the file is actually written to disk
output_file.close() print 'Completed Mutation runs .... finished writing' + str(output_file_name)
- Open internet explorer and display the file
os.system('explorer ' + output_file_name)
- Print final distribution count
print freq_count import numpy as np import matplotlib.pyplot as plt from pylab import * t1 = np.arange(1.0, 11.0, 1.0) title_string = 'BP 101: 9/24, #4 aggregated results By Anugraha Raman: ' title_string += str(sim_num) title_string += ' simulations of ' title_string += str(num_of_runs) title_string += ' runs of p53 seg sequence(1020 bp) mutation set at 1% rate' title(title_string)
- plot #1
subplot(111) for i in range (0,10): s1 = freq_count plt.plot(t1, s1, 'r^') grid(True) xlabel('# of Early Termination Mutations ') ylabel('Frequency of Occurrence') plt.show() </syntax>