User talk:Anugraha Raman/AbnormalStops.py

BiPython Script Created By Anugraha Raman
For BP 101 September 24, 2009 Problem #4

from Bio import SeqIO from Bio.Seq import Seq from Bio.Alphabet import IUPAC from random import * import os

Functions defined in this script file are as follows:
writeheader(myfile) : Writes a specific HTML header using the myfile

handle

writefooter(myfile) : Writes a specific HTML footer using the myfile

handle

writerunsummary(myfile) : Writes a specific set of sumamry information

using the myfile handle

mutatesinglebp(seq, random_Seed, forevery, prob) : Mutates a single

base pair for forevery

location range using a probability of prob;

returns mutated sequence

writeAA(myfile, seq,stop_locs) : Writes the amino acids using the myfile

handle

findstops(seq) : Finds the stop locations in the given DNA

sequence

def writeheader(my_file):

  #Write an extra cool header file
  header = str ('<html><font face="Trebuchet"size="3" color="#2171B7" >

Biophysisc 101: Genomics, Computing and Economics >> ')

  header = header + str(' Problem 4: ')
  my_file.write(header)
  my_file.write('</font>')
  my_file.write('<img border="2" src="2009BP101-logo1.png">')
  my_file.write('</img>')
  my_file.write('<p><p>')

end of function writeheader

def writefooter(my_file):

  # write the footer on the html file
  footer = str ('<p><p><font face="Trebuchet"size="1" color="#2171B7" >

BioPython Scripting By: Anugraha Raman ')

  footer = footer + str ('Script Source: <a href=AbnormalStops.py>

AbnormalStops.py </a>')

  my_file.write(footer)
  my_file.write('</font> </html>')

end of function writefooter

def writerunsummary (runs):

output_file.write('

[Total # of Runs: ' + str(runs)) output_file.write('] ') output_file.write('Normal Stops are noted in blue ') output_file.write('Abnormal Stops are noted in red or ') output_file.write(str(' green '))

end of function writerunheader

mutate_singlebp mutates a single base pair for forevery location range

using a

probability of prob

def mutate_singlebp(seq, random_seed=0, forevery=100, prob=0.01): # reset seed if random_seed == 0: seed() else: seed(random_seed) for x in range(0,forevery): r=randrange(0,1/prob) if r == 0: mutate_pos = randrange(0,len(seq)-1) old_base = seq[mutate_pos] if old_base == 'a': mutated_base = choice(['c', 't', 'g']) elif old_base == 'c': mutated_base = choice(['a', 't', 'g']) elif old_base == 't': mutated_base = choice(['a', 'c', 'g']) else: # 'g' mutated_base = choice(['a', 'c', 't']) # mutable sequence seq is updated seq[mutate_pos] = mutated_base # end if # end for

end of function mutate_singlebp

def writeAA(my_file, seq,stop_locs): my_file.write('

') my_file.write(Seq.translate(seq).tostring()) my_file.write('

') #my_file.write('

') #my_file.write(str(stop_locs)) #my_file.write('

')

end of function writeAA

def findstops(seq): stop_array = [] start = 0 stop_pos = 0 while stop_pos != -1: stop_pos = Seq.translate(seq).find('*',start) start = stop_pos + 1 stop_array = stop_array + [stop_pos] stop_array.remove(-1) return stop_array # end of while

end of function findstops

Start my script

input_file = open('p53seg.txt', 'r') output_file_name = os.path.join(os.getcwd(), 'anugraha-092409-prob4.html') output_file = open(output_file_name, 'w') writeheader(output_file)

only one record for now but can expand this later to include multiple

records for cur_record in SeqIO.parse(input_file, "fasta"): my_seq = cur_record.seq a = findstops(my_seq) writeAA(output_file, my_seq, a) freq_count = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

number of simulations

sim_num = 800 for sim_count in range(0, sim_num): # number of runs within a simulation num_of_runs = 100 pt_array = [] premature_termination_count = 0 # not counting the missing stop abnormality currently only premature terminations #ms_array = [] #missing_stop_count = 0 for run_num in range (0,num_of_runs): # I have to create a mutable sequence to use the mutate_singlebp function rand_seq = my_seq.tomutable() # calling the function that will do msot of the heavy lifting mutate_singlebp(rand_seq) # I have to convert the mutable sequence back to a normal DNA sequence in order to use the transcribe() method new_seq = Seq(rand_seq.toseq().tostring(), IUPAC.unambiguous_dna) b = findstops(new_seq) if a != b: notification_str = str('

[Sim #: ') + str(sim_count) + str(' |Run #: ') + str(run_num) notification_str = notification_str + str('] Stop Locations ') for loc in b: if a.count(loc) == 0:# write the premature termination Stop location in Red notification_str = notification_str + str(' Premature Termination @: ') notification_str = notification_str + str(loc) + str('') premature_termination_count = premature_termination_count + 1 else: # write the normal Stop locations in Blue notification_str = notification_str + str(' ') notification_str = notification_str + str(loc) + str('') # end For loop # actually write out the string created above output_file.write(notification_str) for loc in a: if b.count(loc) == 0:# write the missing Stop locations in Green output_file.write(' Missing Stop Mutation @: ') output_file.write(str(loc) + str('')) #missing_stop_count = missing_stop_count + 1 writeAA(output_file, new_seq, b) # count the number of premature terminations in current simulation pt_array = pt_array + [premature_termination_count] # ms_array = ms_array + [missing_stop_count] # exit the for(run_num) loop # Gather the frequency distribution for i in range(0,10): freq_count[i] = freq_count[i] + pt_array.count(i)

end for sim_count loop

writerunsummary(num_of_runs) writefooter(output_file) input_file.close()

close the file handle so the file is actually written to disk

output_file.close() print 'Completed Mutation runs .... finished writing' + str(output_file_name)

Open internet explorer and display the file

os.system('explorer ' + output_file_name)

Print final distribution count

print freq_count import numpy as np import matplotlib.pyplot as plt from pylab import * t1 = np.arange(1.0, 11.0, 1.0) title_string = 'BP 101: 9/24, #4 aggregated results By Anugraha Raman: ' title_string += str(sim_num) title_string += ' simulations of ' title_string += str(num_of_runs) title_string += ' runs of p53 seg sequence(1020 bp) mutation set at 1% rate' title(title_string)

plot #1

subplot(111) for i in range (0,10): s1 = freq_count plt.plot(t1, s1, 'r^') grid(True) xlabel('# of Early Termination Mutations ') ylabel('Frequency of Occurrence') plt.show() </syntax>

User talk:Anugraha Raman/AbnormalStops.py

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools