IGEM:Harvard/2006/Container Design 4/Python Code: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
No edit summary
Line 1: Line 1:
[[IGEM:Harvard/2006/Container_Design_4/Python_Code/Full_Code_Final|Full Code - Finalized 7/11]]
*[[IGEM:Harvard/2006/Container_Design_4/Python_Code/Full_Code_Final|Full Code - Finalized 7/11]]<br>
[[IGEM:Harvard/2006/Container_Design_4/Python_Code/Split_Scaffold|Split Scaffold]]
*[[IGEM:Harvard/2006/Container_Design_4/Python_Code/Split_Scaffold|Split Scaffold]]


==Split Oligos From User Input==
==Split Oligos From User Input==

Revision as of 08:58, 11 July 2006

Split Oligos From User Input

Script for splitting up as many oligos as you want (USING USER INPUT). The first part goes in main. The second part goes in honeycomb_pointers_v1.py


#####
# Oligo splitting:
# get new list of oligos given user input specifying which oligo to cut
#####

num_to_split = int(raw_input('How many oligos do you want to split?'))
i = 0
new_OTP_ra = OTP_ra[:]
while i < num_to_split:  

        oligo_num = int(raw_input('Enter the number oligo you wish to split:'))
        print '\n'
        print 'How many tokens should the first new oligo be?'
        num_toks = int(raw_input('Number of toks starting from 5 prime: '))

        new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)
        
        print new_OTP_ra
        print len(OTP_ra)
        print len(new_OTP_ra)
        i = i + 1

####
# given an oligo to split, split it and return the new list of oligos
####    
def split_oligo(new_OTP_ra, oligo_num, num_toks):
        
        print new_OTP_ra[oligo_num]

        original_oligo = new_OTP_ra[oligo_num]
        
        oligo_1 = original_oligo[:num_toks]  
        oligo_2 = original_oligo[num_toks:]
        print oligo_1
        print'\n'
        print oligo_2
                
        new_OTP_ra[oligo_num] = oligo_1
        new_OTP_ra.insert(oligo_num + 1, oligo_2)
        return new_OTP_ra    



Split Oligos From File Input

Script for splitting up oligos automatically (FILE INPUT). (make sure to import pickle at the top). This part goes in main. Further down is the file to read from essentially (uses pickle)

#####
# Oligo splitting - this time reading from a file and not asking for user
# input
#####
fin_barrel = None
fin_lid = None
        
try:
        fin_barrel = open("barrel_oligos_to_split.txt", "r")
        fin_lid = open("lid_oligos_to_split.txt", "r")
except IOError, e:
        print "Error in file IO: ", e
        
# Ask the user if they are running a lid or a barrel
shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: "))
if (shape == 1):
        oligos_to_split = pickle.load(fin_barrel)
elif (shape == 2):
        oligos_to_split = pickle.load(fin_lid)
else:
        print 'Please modify code or run with lid or 30hb barrel'


new_OTP_ra = OTP_ra[:]
for pair in oligos_to_split:
        oligo_num = pair[0]
        print oligo_num
        num_toks = pair[1]
        print num_toks

        new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)
        
        print new_OTP_ra
        print len(OTP_ra)
        print len(new_OTP_ra)

# if it's the barrel design and so that all the numbers aren't messed
# up, the 7bp token on the start of strand 10 needs to be removed
# because it's going to be left unpaired
if (shape == 1):
        new_OTP_ra = new_OTP_ra[:61] + new_OTP_ra[62:]

if fin_barrel: fin_barrel.close()
if fin_lid: fin_lid.close() 
        

Pickle Split Parameters

### split = [[oligo_num, num_tokens for oligo_1 - from 5prime], ... ]

import pickle

fout_barrel = None
fout_lid = None

try:
        fout_barrel = open("barrel_oligos_to_split.txt", "w")
        fout_lid = open("lid_oligos_to_split.txt", "w")
except IOError, e:
        print "Error in file IO: ", e

barrel_split = [[56, 2], [57, 3], [41, 3], [26, 3], [21, 4]]
lid_split = [[27, 4], [2, 4]]

pickle.dump(barrel_split, fout_barrel)
pickle.dump(lid_split, fout_lid)

# clean up if they're open
if fout_barrel:
        fout_barrel.close()
if fout_lid:
        fout_lid.close()


Add aptamers

#######
# Add aptamers to the ends of the appropriate oligos.
#######

# Constants
apt_seq = 'GGTTGGTGTGGTTGG'
T_linker = 'TTT'
        
print oligo_ra
num_aptamers = int(raw_input('How many aptamers do you want to add? '))
i = 0   
while i < num_aptamers:
        oligo_num = int(raw_input('Which oligo needs an aptamer? '))
        if oligo_num < len(oligo_ra):
                # Add the aptamer to that oligo
                oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq
                i = i + 1
        else:
                print 'oligo ' + str(oligo_num) + ' out of range.'

print oligo_ra

Add aptamers using File Input rather than User Input

#####
# Add apts this time using file input instead of user input
#####
                        
# Constants
apt_seq = 'GGTTGGTGTGGTTGG'
T_linker = 'TTT'
                        
fin_barrel = None
                        
try:
        fin_barrel = open("barrel_apts_to_add.txt", "r")
except IOError, e:
        print "Error in file IO: ", e

# Ask the user if they are running a lid or a barrel
shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: "))
if (shape == 1):   
        apts_to_add = pickle.load(fin_barrel)
        for apt_specs in apts_to_add:
                oligo_num = apt_specs[0]
                type = apt_specs[1]
                if (type == 1):
                        # apt is pointing in so add 'I' as a flag at the end
                        oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'I'
                elif(type == 2):
                        # apt is pointing out so add 'O' as a flag
                        oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'O'
                else:
                        # incorrect type
                        print 'Bad input - aptamer needs to be pointing in or out'


Print Aptamer Oligos

Oligo sorting - find an print out those with aptamers


#####   
# oligo sorting
#####

# sort based on whether or not there's an aptamer attached to the end of
# an oligo
        
apt = re.compile('TTTGGTTGGTGTGGTTGG')
oligo_num = 0
for oligo in oligo_ra:   
        m = apt.search(oligo)
        if m:
                print 'Match found: ', oligo + ' : ' + str(oligo_num)
        else: 
                print 'No match' + str(oligo_num)
        oligo_num = oligo_num + 1


Modifications to honeycomb_v1 scripts

  • Modifications to William's program to print each oligo number next to what tokens it represents
oligo_num = 0
for oligo in OTP_ra:
        for token in oligo:
                print str(oligo_num) + ": ", token
        oligo_num = oligo_num + 1
  • Modifications to William's program to print a grid of oligo numbers completely filled in
  • add this part to main (AAA or BBB)
####
# generate and print the oligo grid
####

# Initialize the grid with all periods
num_strands = len(TPP_ra)
num_subzones = len(TPP_ra[0])

sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
grid_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]
        
oligo_num = 0
for oligo in OTP_ra:
        grid_ra = generate_oligo_path(oligo, oligo_num, grid_ra)
        oligo_num = oligo_num + 1
print grid_ra
        
print_all_oligos(grid_ra, num_strands, num_subzones)

  • add this part to honeycomb_pointers_v1.py
# The idea here is to have a function that adds the numbers of one oligo path
# to the appropriate places in the big grid array. Eventually this will be printed
# in main. Also it needs to be initialized in main. Oligo_path is the path of
# one oligo, while grid_ra is the grid that is constantly being updated until 
# it is printed in main. oligo_num is number that will be inputed to the grid_ra.
                        
def generate_oligo_path(oligo_path, oligo_num, grid_ra):
        num_path_tokens = len(oligo_path)
                
# Assign visits
        for path_token_num in range(num_path_tokens):
                token = oligo_path[path_token_num]
                strand = token[0]
                subzone = token[1]
                grid_ra[strand][subzone] = oligo_num
        
        
        return grid_ra

def print_all_oligos(grid_ra, num_strands, num_subzones):
        spacer = '   '
        for strand_num in range(num_strands):
                for subzone_num in range(num_subzones):
                        visitor_string = str(grid_ra[strand_num][subzone_num])   
                        sys.stdout.write(visitor_string)
                        sys.stdout.write(spacer[:4 - len(visitor_string)])
                sys.stdout.write('\n')   

honeycomb_pointers_v1.py

#!/usr/bin/python

import sys

######
# This function reads in the node array from a text file
# It adds a border of nodes automatically
######
def read_text_format_node_array(filename):
	# Read in file
	input_file = file(filename, 'r')
	lines = input_file.readlines()
	input_file.close()
	row_string_ra = [line[:-1] for line in lines]

	
	# Check to make sure each line is the same length
	no_length_violation = True
	length = len(row_string_ra[0])
	for row_string in row_string_ra:
		if len(row_string) != length and len(row_string) != 0:
			sys.stdout.write('ERROR: Not all lines of inputted node lattice array are the same length.\n')
			sys.stdout.write('Length is ' + str(len(row_string)) + '\n')
			no_length_violation = False

		
	# Parse into pre node array
	pre_node_ra = []
	for row_string in row_string_ra:
		num_row_nodes = len(row_string)/3
		sub_pre_node_ra = []
		for row_node_num in range(num_row_nodes):
			sub_pre_node_ra.append(row_string[row_node_num*3:row_node_num*3 + 3])
		if sub_pre_node_ra != []:
			pre_node_ra.append(sub_pre_node_ra)	

	
	# Parse pre node array into node array
	num_rows = len(pre_node_ra)/2
	num_row_nodes = len(pre_node_ra[0])
	node_ra = '.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)
	for row_num in range(num_rows):
		sub_node_ra = ['.']
		for row_node_num in range(num_row_nodes):
			pre_node_string = pre_node_ra[row_num*2 + (row_node_num + row_num)%2][row_node_num]
			if pre_node_string == '...':
				node = '.'
			else:
				node = int(pre_node_string)
			sub_node_ra.append(node)
		sub_node_ra.append('.')
		if num_row_nodes%2 == 1:
			sub_node_ra.append('.')			
		node_ra.append(sub_node_ra)
	node_ra.append(['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)])

	
	# Check for parity violations
	num_parity_violations = 0
	for row_num in range(num_rows):
		for row_node_num in range(num_row_nodes):
			node = node_ra[row_num][row_node_num]
			if node != '.':
				if (node + row_num + row_node_num)%2 == 1:
					sys.stdout.write('ERROR: Parity violation for strand ' + str(node) + '.\n')
					sys.stdout.write('Parity is the row number plus the row-node number.\n')
					sys.stdout.write('Make sure even-numbered strands are on even-parity nodes.\n')
					sys.stdout.write('Make sure odd-numbered strands are on odd-parity nodes.\n')
					num_parity_violations += 1

	
	if num_parity_violations > 0 and no_length_violations == True:
		return
	else:
		return node_ra






######
# This function prints the node lattice array in a honeycomb format
######
def print_node_lattice_array(node_ra):
	sys.stdout.write('\nNODE LATTICE ARRAY\n')
	zeroes = '000'
	for y in range(len(node_ra)):
		even_row_string = 
		odd_row_string = '   '
		for x in range(len(node_ra[0])):
			string_element = str(node_ra[y][x])
			if string_element == '.':
				string_element += '..'
			else:
				string_element = zeroes[:3 - len(string_element)] + string_element
			if x%2 == 0:
				even_row_string += string_element + '   '
			else:
				odd_row_string += string_element + '   '
		if y%2 == 0:
			sys.stdout.write(even_row_string + '\n')
			sys.stdout.write(odd_row_string + '\n')
		else:
			sys.stdout.write(odd_row_string + '\n')
			sys.stdout.write(even_row_string + '\n')
		sys.stdout.write('\n')
	sys.stdout.write('\n\n')

	return






######
# This function inputs the node array and the number of 42bp zones
# and returns a token pointer pair array
######
def token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag):
	# Initialize the offset array
	even_offset_ra = [[ 0,  1], [ 0, -1], [-1, 0]]
	odd_offset_ra  = [[ 0, -1], [ 0,  1], [ 1, 0]]
	offset_ra = [even_offset_ra, odd_offset_ra]

			
	#Initialize the token pointer pair array
	strand_list_ra = []
	for sub_node_ra in node_ra:
		for node in sub_node_ra:
			if node != '.' and strand_list_ra.count(node) == 0:
				strand_list_ra.append(node)
	num_strands = len(strand_list_ra)

	TPP_ra = []
	num_subzones = num_zones*6
	for strand_num in range(num_strands):
		sub_TPP_ra = []
		for token_num in range(num_subzones):
			if strand_num%2 == 0:
				previous_TP = [strand_num, (token_num + 1)%num_subzones]
				if previous_TP[1] == 0 and periodic_structure_flag == False:
					previous_TP[1] = -1
				next_TP = [strand_num, (token_num - 1)%num_subzones]
				if next_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
					next_TP[1] = -1
				sub_TPP_ra.append([previous_TP, next_TP])
			else:
				previous_TP = [strand_num, (token_num - 1)%num_subzones]
				if previous_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
					previous_TP[1] = -1
				next_TP = [strand_num, (token_num + 1)%num_subzones]
				if next_TP[1] == 0 and periodic_structure_flag == False:
					next_TP[1] = -1
				sub_TPP_ra.append([previous_TP, next_TP])
		TPP_ra.append(sub_TPP_ra)


	# Introduce crossovers based on the node array
	for donor_y in range(len(node_ra)):
		for donor_x in range(len(node_ra[0])):
			donor_strand_num = node_ra[donor_y][donor_x]
			if donor_strand_num != '.':
				for position in range(3):
					parity = (donor_y + donor_x)%2
					acceptor_y = donor_y + offset_ra[parity][position][0]
					acceptor_x = donor_x + offset_ra[parity][position][1]
					acceptor_strand_num = node_ra[acceptor_y][acceptor_x]
					if acceptor_strand_num != '.':
						for zone_num in range(num_zones):
							subzone_num = zone_num*6 + position*2 + 1 - parity
							TPP_ra[donor_strand_num][subzone_num][1] = [acceptor_strand_num, subzone_num]
							TPP_ra[acceptor_strand_num][subzone_num][0] = [donor_strand_num, subzone_num]
							
	return TPP_ra






######
# This function inputs the token pointer pair array
# and returns an array of token_pointer_paths
######
def token_pointer_path_array(TPP_ra):
	sub_visits_ra = [0 for i in range(len(TPP_ra[0]))]
	visits_ra = [sub_visits_ra[:] for i in range(len(TPP_ra))]
	path_ra = []


	for strand_num in range(len(TPP_ra)):
		for subzone_num in range(len(TPP_ra[0])):
			if visits_ra[strand_num][subzone_num] == 0:	
				previous_TP = TPP_ra[strand_num][subzone_num][0]
				next_TP = TPP_ra[strand_num][subzone_num][1]
				num_visits = 100 * len(path_ra) + 1
				visits_ra[strand_num][subzone_num] = num_visits
				sub_path_ra = strand_num, subzone_num
							
				upstream_done = False
				while not upstream_done:
					if previous_TP[1] == -1:
						upstream_done = True
					elif visits_ra[previous_TP[0]][previous_TP[1]] > 0:
						upstream_done = True
					else:
						sub_path_ra.insert(0, previous_TP)
						num_visits += 1
						visits_ra[previous_TP[0]][previous_TP[1]] = num_visits
						previous_TP = TPP_ra[previous_TP[0]][previous_TP[1]][0]
			
				downstream_done = False
				while not downstream_done:
					if next_TP[1] == -1:
						downstream_done = True
					elif visits_ra[next_TP[0]][next_TP[1]] > 0:
						downstream_done = True
					else:
						sub_path_ra.append(next_TP)
						num_visits += 1
						visits_ra[next_TP[0]][next_TP[1]] = num_visits
						next_TP = TPP_ra[next_TP[0]][next_TP[1]][1]
			
				# Make sure that the path begins between subzones, not in the middle of a subzone
				if (sub_path_ra[0][0] + sub_path_ra[0][1])%2 == 0:
					sub_path_ra = sub_path_ra[1:] + sub_path_ra[:1]
				path_ra.append(sub_path_ra)



	num_tokens_visited = 0
	for sub_path_ra in path_ra:
		num_tokens_visited += len(sub_path_ra)

	sys.stdout.write('The number of tokens visited is ' + str(num_tokens_visited) + '.\n')
			

	path_length_ra = [0 for i in range(1100)]
	for sub_path_ra in path_ra:
		path_length_ra[len(sub_path_ra)] += 1

	for length in range(1100):
		if path_length_ra[length] != 0:
			sys.stdout.write('The number of paths with length ' + str(length) + ' is ' + str(path_length_ra[length]) + '.\n')
	
	return path_ra	






######
# This function inputs the token pointer path array
# and returns a list of oligos as lists of six token pointers
######
def oligo_token_pointer_array(path_ra):
	OTP_ra = []
	for sub_path_ra in path_ra:
		for oligo_num in range(len(sub_path_ra)/6):
			OTP_ra.append(sub_path_ra[oligo_num*6:oligo_num*6 + 6])
		# Take care of the remainders
		if len(sub_path_ra)%6 != 0:
			OTP_ra.append(sub_path_ra[-(len(sub_path_ra)%6):])
	return OTP_ra






######
# This function checks to make sure each token is represented once and only once
# in the oligo token pointer lists
######
def check_token_representation(OTP_ra, TPP_ra):
	CTP_ra = []
	for sub_OTP_ra in OTP_ra:
		for TP in sub_OTP_ra:
			CTP_ra.append(TP)

	num_strands = len(TPP_ra)
	num_subzones = len(TPP_ra[0])
	problems = False

	# Check each token for single presence
	for strand_num in range(num_strands):
		for token_num in range(num_subzones):
			if CTP_ra.count([strand_num, token_num]) == 0:
				sys.stdout.write(str([strand_num, token_num]) + ' not present.\n')
				problems = True
			elif CTP_ra.count([strand_num, token_num]) > 1:
				sys.stdout.write(str([strand_num, token_num]) + ' present more than once.\n')
				problems = True
	
	if problems == False:
		sys.stdout.write('Each token is represented once and only once in the oligo token pointer lists.\n')

	return
	





######
# This function prints the oligo path on the strand token lattice
# It is fun to see how the paths twist around the lattice
# You can use this function to help debug your program
######
def print_path(sub_path_ra, TPP_ra):
	sys.stdout.write('\nONE PATH ARRAY\n')

	num_strands = len(TPP_ra)
	num_subzones = len(TPP_ra[0])
	num_path_tokens = len(sub_path_ra)
	
	# Initialize strand token lattice
	sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
	token_visit_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]


	# Assign visits
	for path_token_num in range(num_path_tokens):
		token = sub_path_ra[path_token_num]
		strand = token[0]
		subzone = token[1]
		token_visit_ra[strand][subzone] = path_token_num

	# Print out strand token lattice
	spacer = '   '
	for strand_num in range(num_strands):
		for subzone_num in range(num_subzones):
			visitor_string = str(token_visit_ra[strand_num][subzone_num])
			sys.stdout.write(visitor_string)
			sys.stdout.write(spacer[:4 - len(visitor_string)])
		sys.stdout.write('\n')
	
	sys.stdout.write('\n')
	
	return
	
# The idea here is to have a function that adds the numbers of one oligo path 
# to the appropriate places in the big grid array. Eventually this will be printed
# in main. Also it needs to be initialized in main. Oligo_path is the path of 
# one oligo, while grid_ra is the grid that is constantly being updated until 
# it is printed in main. oligo_num is number that will be inputed to the grid_ra.

def generate_oligo_path(oligo_path, oligo_num, grid_ra):
	num_path_tokens = len(oligo_path)
        
# Assign visits
        for path_token_num in range(num_path_tokens):
                token = oligo_path[path_token_num]
                strand = token[0]
                subzone = token[1]
                grid_ra[strand][subzone] = oligo_num
                
        
	return grid_ra


def print_all_oligos(grid_ra, num_strands, num_subzones):
        spacer = '   '
        for strand_num in range(num_strands):
                for subzone_num in range(num_subzones):
                        visitor_string = str(grid_ra[strand_num][subzone_num])
                        sys.stdout.write(visitor_string)
                        sys.stdout.write(spacer[:4 - len(visitor_string)])
                sys.stdout.write('\n')
        


####
# given an oligo to split, split it and return the new list of oligos
####
def split_oligo(new_OTP_ra, oligo_num, num_toks):

	print new_OTP_ra[oligo_num]

	original_oligo = new_OTP_ra[oligo_num]

	oligo_1 = original_oligo[:num_toks]
	oligo_2 = original_oligo[num_toks:]
	print oligo_1
	print'\n'
	print oligo_2
	
	new_OTP_ra[oligo_num] = oligo_1
	new_OTP_ra.insert(oligo_num + 1, oligo_2)
	return new_OTP_ra


sys.stdout.write('Honeycomb pointers module installed.\n')

<\pre>