IGEM:Harvard/2006/Container Design 4/Python Code: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
Line 1: Line 1:
= Modified Versions of William's Code - Finalized 7/11/06 =  
= Modified Versions of William's Code - Finalized 7/11/06 =  
main
main - BBB_make_honeycomb_structure_v1.py
<pre>
<pre>
#!/usr/bin/python
#!/usr/bin/python
Line 648: Line 648:
                 sys.stdout.write('\n')   
                 sys.stdout.write('\n')   
</pre>
</pre>
honeycomb_pointers_v1.py
<pre>
#!/usr/bin/python
import sys
######
# This function reads in the node array from a text file
# It adds a border of nodes automatically
######
def read_text_format_node_array(filename):
# Read in file
input_file = file(filename, 'r')
lines = input_file.readlines()
input_file.close()
row_string_ra = [line[:-1] for line in lines]
# Check to make sure each line is the same length
no_length_violation = True
length = len(row_string_ra[0])
for row_string in row_string_ra:
if len(row_string) != length and len(row_string) != 0:
sys.stdout.write('ERROR: Not all lines of inputted node lattice array are the same length.\n')
sys.stdout.write('Length is ' + str(len(row_string)) + '\n')
no_length_violation = False
# Parse into pre node array
pre_node_ra = []
for row_string in row_string_ra:
num_row_nodes = len(row_string)/3
sub_pre_node_ra = []
for row_node_num in range(num_row_nodes):
sub_pre_node_ra.append(row_string[row_node_num*3:row_node_num*3 + 3])
if sub_pre_node_ra != []:
pre_node_ra.append(sub_pre_node_ra)
# Parse pre node array into node array
num_rows = len(pre_node_ra)/2
num_row_nodes = len(pre_node_ra[0])
node_ra = [['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)]]
for row_num in range(num_rows):
sub_node_ra = ['.']
for row_node_num in range(num_row_nodes):
pre_node_string = pre_node_ra[row_num*2 + (row_node_num + row_num)%2][row_node_num]
if pre_node_string == '...':
node = '.'
else:
node = int(pre_node_string)
sub_node_ra.append(node)
sub_node_ra.append('.')
if num_row_nodes%2 == 1:
sub_node_ra.append('.')
node_ra.append(sub_node_ra)
node_ra.append(['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)])
# Check for parity violations
num_parity_violations = 0
for row_num in range(num_rows):
for row_node_num in range(num_row_nodes):
node = node_ra[row_num][row_node_num]
if node != '.':
if (node + row_num + row_node_num)%2 == 1:
sys.stdout.write('ERROR: Parity violation for strand ' + str(node) + '.\n')
sys.stdout.write('Parity is the row number plus the row-node number.\n')
sys.stdout.write('Make sure even-numbered strands are on even-parity nodes.\n')
sys.stdout.write('Make sure odd-numbered strands are on odd-parity nodes.\n')
num_parity_violations += 1
if num_parity_violations > 0 and no_length_violations == True:
return
else:
return node_ra
######
# This function prints the node lattice array in a honeycomb format
######
def print_node_lattice_array(node_ra):
sys.stdout.write('\nNODE LATTICE ARRAY\n')
zeroes = '000'
for y in range(len(node_ra)):
even_row_string = ''
odd_row_string = '  '
for x in range(len(node_ra[0])):
string_element = str(node_ra[y][x])
if string_element == '.':
string_element += '..'
else:
string_element = zeroes[:3 - len(string_element)] + string_element
if x%2 == 0:
even_row_string += string_element + '  '
else:
odd_row_string += string_element + '  '
if y%2 == 0:
sys.stdout.write(even_row_string + '\n')
sys.stdout.write(odd_row_string + '\n')
else:
sys.stdout.write(odd_row_string + '\n')
sys.stdout.write(even_row_string + '\n')
sys.stdout.write('\n')
sys.stdout.write('\n\n')
return
######
# This function inputs the node array and the number of 42bp zones
# and returns a token pointer pair array
######
def token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag):
# Initialize the offset array
even_offset_ra = [[ 0,  1], [ 0, -1], [-1, 0]]
odd_offset_ra  = [[ 0, -1], [ 0,  1], [ 1, 0]]
offset_ra = [even_offset_ra, odd_offset_ra]
#Initialize the token pointer pair array
strand_list_ra = []
for sub_node_ra in node_ra:
for node in sub_node_ra:
if node != '.' and strand_list_ra.count(node) == 0:
strand_list_ra.append(node)
num_strands = len(strand_list_ra)
TPP_ra = []
num_subzones = num_zones*6
for strand_num in range(num_strands):
sub_TPP_ra = []
for token_num in range(num_subzones):
if strand_num%2 == 0:
previous_TP = [strand_num, (token_num + 1)%num_subzones]
if previous_TP[1] == 0 and periodic_structure_flag == False:
previous_TP[1] = -1
next_TP = [strand_num, (token_num - 1)%num_subzones]
if next_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
next_TP[1] = -1
sub_TPP_ra.append([previous_TP, next_TP])
else:
previous_TP = [strand_num, (token_num - 1)%num_subzones]
if previous_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
previous_TP[1] = -1
next_TP = [strand_num, (token_num + 1)%num_subzones]
if next_TP[1] == 0 and periodic_structure_flag == False:
next_TP[1] = -1
sub_TPP_ra.append([previous_TP, next_TP])
TPP_ra.append(sub_TPP_ra)
# Introduce crossovers based on the node array
for donor_y in range(len(node_ra)):
for donor_x in range(len(node_ra[0])):
donor_strand_num = node_ra[donor_y][donor_x]
if donor_strand_num != '.':
for position in range(3):
parity = (donor_y + donor_x)%2
acceptor_y = donor_y + offset_ra[parity][position][0]
acceptor_x = donor_x + offset_ra[parity][position][1]
acceptor_strand_num = node_ra[acceptor_y][acceptor_x]
if acceptor_strand_num != '.':
for zone_num in range(num_zones):
subzone_num = zone_num*6 + position*2 + 1 - parity
TPP_ra[donor_strand_num][subzone_num][1] = [acceptor_strand_num, subzone_num]
TPP_ra[acceptor_strand_num][subzone_num][0] = [donor_strand_num, subzone_num]
return TPP_ra
######
# This function inputs the token pointer pair array
# and returns an array of token_pointer_paths
######
def token_pointer_path_array(TPP_ra):
sub_visits_ra = [0 for i in range(len(TPP_ra[0]))]
visits_ra = [sub_visits_ra[:] for i in range(len(TPP_ra))]
path_ra = []
for strand_num in range(len(TPP_ra)):
for subzone_num in range(len(TPP_ra[0])):
if visits_ra[strand_num][subzone_num] == 0:
previous_TP = TPP_ra[strand_num][subzone_num][0]
next_TP = TPP_ra[strand_num][subzone_num][1]
num_visits = 100 * len(path_ra) + 1
visits_ra[strand_num][subzone_num] = num_visits
sub_path_ra = [[strand_num, subzone_num]]
upstream_done = False
while not upstream_done:
if previous_TP[1] == -1:
upstream_done = True
elif visits_ra[previous_TP[0]][previous_TP[1]] > 0:
upstream_done = True
else:
sub_path_ra.insert(0, previous_TP)
num_visits += 1
visits_ra[previous_TP[0]][previous_TP[1]] = num_visits
previous_TP = TPP_ra[previous_TP[0]][previous_TP[1]][0]
downstream_done = False
while not downstream_done:
if next_TP[1] == -1:
downstream_done = True
elif visits_ra[next_TP[0]][next_TP[1]] > 0:
downstream_done = True
else:
sub_path_ra.append(next_TP)
num_visits += 1
visits_ra[next_TP[0]][next_TP[1]] = num_visits
next_TP = TPP_ra[next_TP[0]][next_TP[1]][1]
# Make sure that the path begins between subzones, not in the middle of a subzone
if (sub_path_ra[0][0] + sub_path_ra[0][1])%2 == 0:
sub_path_ra = sub_path_ra[1:] + sub_path_ra[:1]
path_ra.append(sub_path_ra)
num_tokens_visited = 0
for sub_path_ra in path_ra:
num_tokens_visited += len(sub_path_ra)
sys.stdout.write('The number of tokens visited is ' + str(num_tokens_visited) + '.\n')
path_length_ra = [0 for i in range(1100)]
for sub_path_ra in path_ra:
path_length_ra[len(sub_path_ra)] += 1
for length in range(1100):
if path_length_ra[length] != 0:
sys.stdout.write('The number of paths with length ' + str(length) + ' is ' + str(path_length_ra[length]) + '.\n')
return path_ra
######
# This function inputs the token pointer path array
# and returns a list of oligos as lists of six token pointers
######
def oligo_token_pointer_array(path_ra):
OTP_ra = []
for sub_path_ra in path_ra:
for oligo_num in range(len(sub_path_ra)/6):
OTP_ra.append(sub_path_ra[oligo_num*6:oligo_num*6 + 6])
# Take care of the remainders
if len(sub_path_ra)%6 != 0:
OTP_ra.append(sub_path_ra[-(len(sub_path_ra)%6):])
return OTP_ra
######
# This function checks to make sure each token is represented once and only once
# in the oligo token pointer lists
######
def check_token_representation(OTP_ra, TPP_ra):
CTP_ra = []
for sub_OTP_ra in OTP_ra:
for TP in sub_OTP_ra:
CTP_ra.append(TP)
num_strands = len(TPP_ra)
num_subzones = len(TPP_ra[0])
problems = False
# Check each token for single presence
for strand_num in range(num_strands):
for token_num in range(num_subzones):
if CTP_ra.count([strand_num, token_num]) == 0:
sys.stdout.write(str([strand_num, token_num]) + ' not present.\n')
problems = True
elif CTP_ra.count([strand_num, token_num]) > 1:
sys.stdout.write(str([strand_num, token_num]) + ' present more than once.\n')
problems = True
if problems == False:
sys.stdout.write('Each token is represented once and only once in the oligo token pointer lists.\n')
return
######
# This function prints the oligo path on the strand token lattice
# It is fun to see how the paths twist around the lattice
# You can use this function to help debug your program
######
def print_path(sub_path_ra, TPP_ra):
sys.stdout.write('\nONE PATH ARRAY\n')
num_strands = len(TPP_ra)
num_subzones = len(TPP_ra[0])
num_path_tokens = len(sub_path_ra)
# Initialize strand token lattice
sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
token_visit_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]
# Assign visits
for path_token_num in range(num_path_tokens):
token = sub_path_ra[path_token_num]
strand = token[0]
subzone = token[1]
token_visit_ra[strand][subzone] = path_token_num
# Print out strand token lattice
spacer = '  '
for strand_num in range(num_strands):
for subzone_num in range(num_subzones):
visitor_string = str(token_visit_ra[strand_num][subzone_num])
sys.stdout.write(visitor_string)
sys.stdout.write(spacer[:4 - len(visitor_string)])
sys.stdout.write('\n')
sys.stdout.write('\n')
return
# The idea here is to have a function that adds the numbers of one oligo path
# to the appropriate places in the big grid array. Eventually this will be printed
# in main. Also it needs to be initialized in main. Oligo_path is the path of
# one oligo, while grid_ra is the grid that is constantly being updated until
# it is printed in main. oligo_num is number that will be inputed to the grid_ra.
def generate_oligo_path(oligo_path, oligo_num, grid_ra):
num_path_tokens = len(oligo_path)
       
# Assign visits
        for path_token_num in range(num_path_tokens):
                token = oligo_path[path_token_num]
                strand = token[0]
                subzone = token[1]
                grid_ra[strand][subzone] = oligo_num
               
       
return grid_ra
def print_all_oligos(grid_ra, num_strands, num_subzones):
        spacer = '  '
        for strand_num in range(num_strands):
                for subzone_num in range(num_subzones):
                        visitor_string = str(grid_ra[strand_num][subzone_num])
                        sys.stdout.write(visitor_string)
                        sys.stdout.write(spacer[:4 - len(visitor_string)])
                sys.stdout.write('\n')
       
####
# given an oligo to split, split it and return the new list of oligos
####
def split_oligo(new_OTP_ra, oligo_num, num_toks):
print new_OTP_ra[oligo_num]
original_oligo = new_OTP_ra[oligo_num]
oligo_1 = original_oligo[:num_toks]
oligo_2 = original_oligo[num_toks:]
print oligo_1
print'\n'
print oligo_2
new_OTP_ra[oligo_num] = oligo_1
new_OTP_ra.insert(oligo_num + 1, oligo_2)
return new_OTP_ra
sys.stdout.write('Honeycomb pointers module installed.\n')
<\pre>

Revision as of 08:47, 11 July 2006

Modified Versions of William's Code - Finalized 7/11/06

main - BBB_make_honeycomb_structure_v1.py

#!/usr/bin/python

import sys
# for regular expression matching
import re
# for reading structures in from files
import pickle
from honeycomb_pointers_v1 import *
from pointers_to_sequences_v1 import *

filename = raw_input('Enter the filename for the text-based node lattice array: ')
num_zones = int(raw_input('Enter the number of 42bp zones per double helix: '))
#filename = 'lid_1_ascii.txt'
#num_zones = 3
periodic_structure_flag = False

node_ra = read_text_format_node_array(filename)
print_node_lattice_array(node_ra)
TPP_ra = token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag)
path_ra = token_pointer_path_array(TPP_ra)
OTP_ra = oligo_token_pointer_array(path_ra)
check_token_representation(OTP_ra, TPP_ra)


# Print out the longest path, just for fun
longest_path = []
for path in path_ra:
	if len(path) > len(longest_path):
		longest_path = path
print_path(longest_path, TPP_ra)

#####
# Oligo splitting - this time reading from a file and not asking for user
# input
#####
fin_barrel = None
fin_lid = None

try:
	fin_barrel = open("barrel_oligos_to_split.txt", "r")
	fin_lid = open("lid_oligos_to_split.txt", "r")
except IOError, e:
	print "Error in file IO: ", e

# Ask the user if they are running a lid or a barrel

shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: "))
if (shape == 1):
	oligos_to_split = pickle.load(fin_barrel)
elif (shape == 2):
	oligos_to_split = pickle.load(fin_lid)
else:
	print 'Please modify code or run with lid or 30hb barrel'
	

new_OTP_ra = OTP_ra[:]
for pair in oligos_to_split:
	oligo_num = pair[0]
	print oligo_num
	num_toks = pair[1]
	print num_toks
	
       	new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)

       	print new_OTP_ra
     	print len(OTP_ra)
       	print len(new_OTP_ra)

# if it's the barrel design and so that all the numbers aren't messed
# up, the 7bp token on the start of strand 10 needs to be removed
# because it's going to be left unpaired

if (shape == 1):
	new_OTP_ra = new_OTP_ra[:61] + new_OTP_ra[62:]

if fin_barrel: fin_barrel.close()
if fin_lid: fin_lid.close()




####
# generate and print the oligo token grid
####

# Initialize the grid with all periods
num_strands = len(TPP_ra)
num_subzones = len(TPP_ra[0])

sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
grid_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]

oligo_num = 0
for oligo in new_OTP_ra:
	grid_ra = generate_oligo_path(oligo, oligo_num, grid_ra) 
	oligo_num = oligo_num + 1
print grid_ra

print_all_oligos(grid_ra, num_strands, num_subzones)

####
# Generate oligo sequences
####
strand_ra = strand_array('lid_1_scaffold.txt', TPP_ra)
token_ra = token_array(strand_ra)
oligo_ra = oligo_array(new_OTP_ra, token_ra)

# The array of oligos that does have the oligos split but does not have the
# aptamers or latches added.
original_oligo_ra = oligo_ra[:]
latch_2_oligo_ra = oligo_ra[:]

#####
# Add apts this time using file input instead of user input
#####

# Constants
apt_seq = 'GGTTGGTGTGGTTGG'
T_linker = 'TTT'

fin_barrel = None
        
try:
        fin_barrel = open("barrel_apts_to_add.txt", "r")
except IOError, e:
        print "Error in file IO: ", e
        
if (shape == 1):
    	apts_to_add = pickle.load(fin_barrel)

	for apt_specs in apts_to_add:
		oligo_num = apt_specs[0]
		type = apt_specs[1]
		if (type == 1):
			# apt is pointing in so add 'I' as a flag at the end
			oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'I'
		elif (type == 2):
			# apt is pointing out so add 'O' as a flag
			oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'O'
		else:
			# incorrect type
			print 'Bad input - aptamer needs to be pointing in or out'

#####
# Add latches
#####

fin_barrel = None
fin_lid1 = None
fin_lid2 = None
fin_barrel_design2 = None
fin_lid1_design2 = None
fin_lid2_design2 = None
                
try:
        fin_barrel = open("barrel_latches_to_add.txt", "r")
	fin_lid1 = open("lid1_latches_to_add.txt", "r")
	fin_lid2 = open("lid2_latches_to_add.txt", "r")
	fin_barrel_design2 = open("barrel_latches_to_add_design2.txt", "r")
	fin_lid1_design2 = open("lid1_latches_to_add_design2.txt", "r")
	fin_lid2_design2 = open("lid2_latches_to_add_design2.txt", "r")
except IOError, e:
        print "Error in file IO: ", e


if (shape == 1):
        latches_to_add = pickle.load(fin_barrel)
	latches_to_add_d2 = pickle.load(fin_barrel_design2)
elif (shape == 2):
	# Ask which lid is being run
	print 'What lid are you running?'
	type = int(raw_input('Enter 1 if lid1, 2 if lid2: '))
	if (type == 1):
		latches_to_add = pickle.load(fin_lid1)
		latches_to_add_d2 = pickle.load(fin_lid1_design2)
	elif (type == 2):
		latches_to_add = pickle.load(fin_lid2)
		latches_to_add_d2 = pickle.load(fin_lid2_design2)

        
# note: latch sequence includes any linker Ts already
for latch_specs in latches_to_add:
	oligo_num = latch_specs[0]
        latch = latch_specs[1]
	oligo_ra[oligo_num] = oligo_ra[oligo_num] + latch

for latch_specs in latches_to_add_d2:
	oligo_num = latch_specs[0]
	latch = latch_specs[1]
	latch_2_oligo_ra[oligo_num] = latch_2_oligo_ra[oligo_num] + latch


#####
# oligo sorting
#####
#
barrel_core = [0, 3, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77]
barrel_latch = [21, 27, 43, 60] 
barrel_aptamer_out = [2, 7, 17, 23] 
barrel_aptamer_in = [1, 11, 36, 47]
lid_core = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34]
lid_latch = [2, 29]

# output all oligos
output_file = file('lid_1_all_oligos.txt', 'w')
for oligo in oligo_ra:
	output_file.write(oligo + '\n')
output_file.close()

if (shape == 1):
	# barrel output
	
	barrel_oligos_file = file('barrel_oligos_sorted.txt', 'w')
	# output barrel core oligos
	barrel_oligos_file.write('Core Oligos' + '\n')
	barrel_oligos_file.close()
	# to append the file needs to be reopened in append mode.
	barrel_oligos_file = file('barrel_oligos_sorted.txt', 'a')

	for oligo_num in barrel_core:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

	# out + apts
	barrel_oligos_file.write('\n' + 'Out + Apts' + '\n')
	for oligo_num in barrel_aptamer_out:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')
	# out - apts
	barrel_oligos_file.write('\n' + 'Out - Apts' + '\n')
	for oligo_num in barrel_aptamer_out:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')
	
	# in - apts
	barrel_oligos_file.write('\n' + 'In - Apts' + '\n')
	for oligo_num in barrel_aptamer_in:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')

	# in + apts
	barrel_oligos_file.write('\n' + 'In + Apts' +'\n')
	for oligo_num in barrel_aptamer_in:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')
	
	# barrel oligos with latches - two oligo design
	barrel_oligos_file.write('\n' + 'Oligos with Latches - Design 1' + '\n')
	for oligo_num in barrel_latch:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')
	
	# barrel oligos with latches - four oligo design
	barrel_oligos_file.write('\n' + 'Oligos with Latches - Design 2\n')
	for oligo_num in barrel_latch:
		barrel_oligos_file.write(str(oligo_num) + ' Design 2: ' + latch_2_oligo_ra[oligo_num] + '\n')

	# barrel oligos replacing all latches
	barrel_oligos_file.write('\n' + 'Oligos replacing all latches\n')
	for oligo_num in barrel_latch:
		barrel_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')

	
	barrel_oligos_file.close()


if (shape == 2):
	# lid output

	# output lid core oligos
	# we first open the file for writing because we want to clear 
	# anything that was there before. Then we close it and reopen it
	# in append mode.
	lid_oligos_file = file('lid1_oligos_sorted.txt', 'w')
	lid_oligos_file.write('Lid Core' + '\n')
	lid_oligos_file.close()
	lid_oligos_file = file('lid1_oligos_sorted.txt', 'a')
	for oligo_num in lid_core:
		lid_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

	# output lid oligos with latches -- 2 oligo design
	lid_oligos_file.write('\nLid Latch Oligos - 2 Oligo Design\n')
	for oligo_num in lid_latch:
		lid_oligos_file.write(str(oligo_num) + ' : ' + oligo_ra[oligo_num] + '\n')

	# output lid oligos with latches -- 4 oligo design
	lid_oligos_file.write('\nLid Latch Oligos - 4 Oligo Design\n')
	for oligo_num in lid_latch:
		lid_oligos_file.write(str(oligo_num) + ' Design 2 : ' + latch_2_oligo_ra[oligo_num] + '\n')

	# output lid oligos that replace those with latches
	lid_oligos_file.write('\nLid Replacement Oligos - No Latches\n')
	for oligo_num in lid_latch:
		lid_oligos_file.write(str(oligo_num) + ' : ' + original_oligo_ra[oligo_num] + '\n')
	
	lid_oligos_file.close()	

sys.stdout.write('\n\n\n\n\n\n')


Split Scaffold Sequence into Chunks

--Vlau 16:12, 10 July 2006 (EDT)

#!/usr/bin/python

import string
import sys
import fileinput

def nowhite(s):
  return ''.join([c for c in s if c in string.letters])

seq = ''

# read in sequence
for line in fileinput.input("-"):
  seq = seq + nowhite(line)

lid_1 = 96 * 14
linker_1 = 200
box = 29 * 96
linker_2 = 200
lid_2 = 96 * 14
linker_3 = 200
box_2 = 96

#store first segment up to end of lid 1
lid_1_seq = seq[:lid_1]
#reset sequence to start at end of lid 1
seq = seq[lid_1:]
linker_1_seq = seq[:linker_1]
seq = seq[linker_1:]
box_1_seq = seq[:box]
seq = seq[box:]
linker_2_seq = seq[:linker_2]
seq = seq[linker_2:]
lid_2_seq = seq[:lid_2]
seq = seq[lid_2:]
linker_3_seq = seq[:linker_3]
seq = seq[linker_3:]
box_2_seq = seq[:box_2]
seq = seq[box_2:]
extra_scaffold = seq
barrel = box_1_seq + box_2_seq

#print out the length of each segment and its sequence

barrel_file = file('barrel_scaffold.txt', 'w')
lid_1_file = file('lid_1_scaffold.txt', 'w')
lid_2_file = file('lid_2_scaffold.txt', 'w')

print len(lid_1_seq)
print lid_1_seq
lid_1_file.write(lid_1_seq)
print len(linker_1_seq)
print linker_1_seq
print len(barrel)
print barrel
barrel_file.write(barrel)
print len(linker_2_seq)
print linker_2_seq  
print len(lid_2_seq)
print lid_2_seq
lid_2_file.write(lid_2_seq)
print len(linker_3_seq)
print linker_3_seq

Split Oligos From User Input

Script for splitting up as many oligos as you want (USING USER INPUT). The first part goes in main. The second part goes in honeycomb_pointers_v1.py


#####
# Oligo splitting:
# get new list of oligos given user input specifying which oligo to cut
#####

num_to_split = int(raw_input('How many oligos do you want to split?'))
i = 0
new_OTP_ra = OTP_ra[:]
while i < num_to_split:  

        oligo_num = int(raw_input('Enter the number oligo you wish to split:'))
        print '\n'
        print 'How many tokens should the first new oligo be?'
        num_toks = int(raw_input('Number of toks starting from 5 prime: '))

        new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)
        
        print new_OTP_ra
        print len(OTP_ra)
        print len(new_OTP_ra)
        i = i + 1

####
# given an oligo to split, split it and return the new list of oligos
####    
def split_oligo(new_OTP_ra, oligo_num, num_toks):
        
        print new_OTP_ra[oligo_num]

        original_oligo = new_OTP_ra[oligo_num]
        
        oligo_1 = original_oligo[:num_toks]  
        oligo_2 = original_oligo[num_toks:]
        print oligo_1
        print'\n'
        print oligo_2
                
        new_OTP_ra[oligo_num] = oligo_1
        new_OTP_ra.insert(oligo_num + 1, oligo_2)
        return new_OTP_ra    



Split Oligos From File Input

Script for splitting up oligos automatically (FILE INPUT). (make sure to import pickle at the top). This part goes in main. Further down is the file to read from essentially (uses pickle)

#####
# Oligo splitting - this time reading from a file and not asking for user
# input
#####
fin_barrel = None
fin_lid = None
        
try:
        fin_barrel = open("barrel_oligos_to_split.txt", "r")
        fin_lid = open("lid_oligos_to_split.txt", "r")
except IOError, e:
        print "Error in file IO: ", e
        
# Ask the user if they are running a lid or a barrel
shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: "))
if (shape == 1):
        oligos_to_split = pickle.load(fin_barrel)
elif (shape == 2):
        oligos_to_split = pickle.load(fin_lid)
else:
        print 'Please modify code or run with lid or 30hb barrel'


new_OTP_ra = OTP_ra[:]
for pair in oligos_to_split:
        oligo_num = pair[0]
        print oligo_num
        num_toks = pair[1]
        print num_toks

        new_OTP_ra = split_oligo(new_OTP_ra, oligo_num, num_toks)
        
        print new_OTP_ra
        print len(OTP_ra)
        print len(new_OTP_ra)

# if it's the barrel design and so that all the numbers aren't messed
# up, the 7bp token on the start of strand 10 needs to be removed
# because it's going to be left unpaired
if (shape == 1):
        new_OTP_ra = new_OTP_ra[:61] + new_OTP_ra[62:]

if fin_barrel: fin_barrel.close()
if fin_lid: fin_lid.close() 
        

Pickle Split Parameters

### split = [[oligo_num, num_tokens for oligo_1 - from 5prime], ... ]

import pickle

fout_barrel = None
fout_lid = None

try:
        fout_barrel = open("barrel_oligos_to_split.txt", "w")
        fout_lid = open("lid_oligos_to_split.txt", "w")
except IOError, e:
        print "Error in file IO: ", e

barrel_split = [[56, 2], [57, 3], [41, 3], [26, 3], [21, 4]]
lid_split = [[27, 4], [2, 4]]

pickle.dump(barrel_split, fout_barrel)
pickle.dump(lid_split, fout_lid)

# clean up if they're open
if fout_barrel:
        fout_barrel.close()
if fout_lid:
        fout_lid.close()


Add aptamers

#######
# Add aptamers to the ends of the appropriate oligos.
#######

# Constants
apt_seq = 'GGTTGGTGTGGTTGG'
T_linker = 'TTT'
        
print oligo_ra
num_aptamers = int(raw_input('How many aptamers do you want to add? '))
i = 0   
while i < num_aptamers:
        oligo_num = int(raw_input('Which oligo needs an aptamer? '))
        if oligo_num < len(oligo_ra):
                # Add the aptamer to that oligo
                oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq
                i = i + 1
        else:
                print 'oligo ' + str(oligo_num) + ' out of range.'

print oligo_ra

Add aptamers using File Input rather than User Input

#####
# Add apts this time using file input instead of user input
#####
                        
# Constants
apt_seq = 'GGTTGGTGTGGTTGG'
T_linker = 'TTT'
                        
fin_barrel = None
                        
try:
        fin_barrel = open("barrel_apts_to_add.txt", "r")
except IOError, e:
        print "Error in file IO: ", e

# Ask the user if they are running a lid or a barrel
shape = int(raw_input("Enter 1 if you are running a barrel, 2 if lid: "))
if (shape == 1):   
        apts_to_add = pickle.load(fin_barrel)
        for apt_specs in apts_to_add:
                oligo_num = apt_specs[0]
                type = apt_specs[1]
                if (type == 1):
                        # apt is pointing in so add 'I' as a flag at the end
                        oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'I'
                elif(type == 2):
                        # apt is pointing out so add 'O' as a flag
                        oligo_ra[oligo_num] = oligo_ra[oligo_num] + T_linker + apt_seq + 'O'
                else:
                        # incorrect type
                        print 'Bad input - aptamer needs to be pointing in or out'


Print Aptamer Oligos

Oligo sorting - find an print out those with aptamers


#####   
# oligo sorting
#####

# sort based on whether or not there's an aptamer attached to the end of
# an oligo
        
apt = re.compile('TTTGGTTGGTGTGGTTGG')
oligo_num = 0
for oligo in oligo_ra:   
        m = apt.search(oligo)
        if m:
                print 'Match found: ', oligo + ' : ' + str(oligo_num)
        else: 
                print 'No match' + str(oligo_num)
        oligo_num = oligo_num + 1


Modifications to honeycomb_v1 scripts

  • Modifications to William's program to print each oligo number next to what tokens it represents
oligo_num = 0
for oligo in OTP_ra:
        for token in oligo:
                print str(oligo_num) + ": ", token
        oligo_num = oligo_num + 1
  • Modifications to William's program to print a grid of oligo numbers completely filled in
  • add this part to main (AAA or BBB)
####
# generate and print the oligo grid
####

# Initialize the grid with all periods
num_strands = len(TPP_ra)
num_subzones = len(TPP_ra[0])

sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
grid_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]
        
oligo_num = 0
for oligo in OTP_ra:
        grid_ra = generate_oligo_path(oligo, oligo_num, grid_ra)
        oligo_num = oligo_num + 1
print grid_ra
        
print_all_oligos(grid_ra, num_strands, num_subzones)

  • add this part to honeycomb_pointers_v1.py
# The idea here is to have a function that adds the numbers of one oligo path
# to the appropriate places in the big grid array. Eventually this will be printed
# in main. Also it needs to be initialized in main. Oligo_path is the path of
# one oligo, while grid_ra is the grid that is constantly being updated until 
# it is printed in main. oligo_num is number that will be inputed to the grid_ra.
                        
def generate_oligo_path(oligo_path, oligo_num, grid_ra):
        num_path_tokens = len(oligo_path)
                
# Assign visits
        for path_token_num in range(num_path_tokens):
                token = oligo_path[path_token_num]
                strand = token[0]
                subzone = token[1]
                grid_ra[strand][subzone] = oligo_num
        
        
        return grid_ra

def print_all_oligos(grid_ra, num_strands, num_subzones):
        spacer = '   '
        for strand_num in range(num_strands):
                for subzone_num in range(num_subzones):
                        visitor_string = str(grid_ra[strand_num][subzone_num])   
                        sys.stdout.write(visitor_string)
                        sys.stdout.write(spacer[:4 - len(visitor_string)])
                sys.stdout.write('\n')   


honeycomb_pointers_v1.py

#!/usr/bin/python

import sys

######
# This function reads in the node array from a text file
# It adds a border of nodes automatically
######
def read_text_format_node_array(filename):
	# Read in file
	input_file = file(filename, 'r')
	lines = input_file.readlines()
	input_file.close()
	row_string_ra = [line[:-1] for line in lines]

	
	# Check to make sure each line is the same length
	no_length_violation = True
	length = len(row_string_ra[0])
	for row_string in row_string_ra:
		if len(row_string) != length and len(row_string) != 0:
			sys.stdout.write('ERROR: Not all lines of inputted node lattice array are the same length.\n')
			sys.stdout.write('Length is ' + str(len(row_string)) + '\n')
			no_length_violation = False

		
	# Parse into pre node array
	pre_node_ra = []
	for row_string in row_string_ra:
		num_row_nodes = len(row_string)/3
		sub_pre_node_ra = []
		for row_node_num in range(num_row_nodes):
			sub_pre_node_ra.append(row_string[row_node_num*3:row_node_num*3 + 3])
		if sub_pre_node_ra != []:
			pre_node_ra.append(sub_pre_node_ra)	

	
	# Parse pre node array into node array
	num_rows = len(pre_node_ra)/2
	num_row_nodes = len(pre_node_ra[0])
	node_ra = '.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)
	for row_num in range(num_rows):
		sub_node_ra = ['.']
		for row_node_num in range(num_row_nodes):
			pre_node_string = pre_node_ra[row_num*2 + (row_node_num + row_num)%2][row_node_num]
			if pre_node_string == '...':
				node = '.'
			else:
				node = int(pre_node_string)
			sub_node_ra.append(node)
		sub_node_ra.append('.')
		if num_row_nodes%2 == 1:
			sub_node_ra.append('.')			
		node_ra.append(sub_node_ra)
	node_ra.append(['.' for row_node_num in range(num_row_nodes + num_row_nodes%2 + 2)])

	
	# Check for parity violations
	num_parity_violations = 0
	for row_num in range(num_rows):
		for row_node_num in range(num_row_nodes):
			node = node_ra[row_num][row_node_num]
			if node != '.':
				if (node + row_num + row_node_num)%2 == 1:
					sys.stdout.write('ERROR: Parity violation for strand ' + str(node) + '.\n')
					sys.stdout.write('Parity is the row number plus the row-node number.\n')
					sys.stdout.write('Make sure even-numbered strands are on even-parity nodes.\n')
					sys.stdout.write('Make sure odd-numbered strands are on odd-parity nodes.\n')
					num_parity_violations += 1

	
	if num_parity_violations > 0 and no_length_violations == True:
		return
	else:
		return node_ra






######
# This function prints the node lattice array in a honeycomb format
######
def print_node_lattice_array(node_ra):
	sys.stdout.write('\nNODE LATTICE ARRAY\n')
	zeroes = '000'
	for y in range(len(node_ra)):
		even_row_string = 
		odd_row_string = '   '
		for x in range(len(node_ra[0])):
			string_element = str(node_ra[y][x])
			if string_element == '.':
				string_element += '..'
			else:
				string_element = zeroes[:3 - len(string_element)] + string_element
			if x%2 == 0:
				even_row_string += string_element + '   '
			else:
				odd_row_string += string_element + '   '
		if y%2 == 0:
			sys.stdout.write(even_row_string + '\n')
			sys.stdout.write(odd_row_string + '\n')
		else:
			sys.stdout.write(odd_row_string + '\n')
			sys.stdout.write(even_row_string + '\n')
		sys.stdout.write('\n')
	sys.stdout.write('\n\n')

	return






######
# This function inputs the node array and the number of 42bp zones
# and returns a token pointer pair array
######
def token_pointer_pair_array(node_ra, num_zones, periodic_structure_flag):
	# Initialize the offset array
	even_offset_ra = [[ 0,  1], [ 0, -1], [-1, 0]]
	odd_offset_ra  = [[ 0, -1], [ 0,  1], [ 1, 0]]
	offset_ra = [even_offset_ra, odd_offset_ra]

			
	#Initialize the token pointer pair array
	strand_list_ra = []
	for sub_node_ra in node_ra:
		for node in sub_node_ra:
			if node != '.' and strand_list_ra.count(node) == 0:
				strand_list_ra.append(node)
	num_strands = len(strand_list_ra)

	TPP_ra = []
	num_subzones = num_zones*6
	for strand_num in range(num_strands):
		sub_TPP_ra = []
		for token_num in range(num_subzones):
			if strand_num%2 == 0:
				previous_TP = [strand_num, (token_num + 1)%num_subzones]
				if previous_TP[1] == 0 and periodic_structure_flag == False:
					previous_TP[1] = -1
				next_TP = [strand_num, (token_num - 1)%num_subzones]
				if next_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
					next_TP[1] = -1
				sub_TPP_ra.append([previous_TP, next_TP])
			else:
				previous_TP = [strand_num, (token_num - 1)%num_subzones]
				if previous_TP[1] == (num_subzones - 1) and periodic_structure_flag == False:
					previous_TP[1] = -1
				next_TP = [strand_num, (token_num + 1)%num_subzones]
				if next_TP[1] == 0 and periodic_structure_flag == False:
					next_TP[1] = -1
				sub_TPP_ra.append([previous_TP, next_TP])
		TPP_ra.append(sub_TPP_ra)


	# Introduce crossovers based on the node array
	for donor_y in range(len(node_ra)):
		for donor_x in range(len(node_ra[0])):
			donor_strand_num = node_ra[donor_y][donor_x]
			if donor_strand_num != '.':
				for position in range(3):
					parity = (donor_y + donor_x)%2
					acceptor_y = donor_y + offset_ra[parity][position][0]
					acceptor_x = donor_x + offset_ra[parity][position][1]
					acceptor_strand_num = node_ra[acceptor_y][acceptor_x]
					if acceptor_strand_num != '.':
						for zone_num in range(num_zones):
							subzone_num = zone_num*6 + position*2 + 1 - parity
							TPP_ra[donor_strand_num][subzone_num][1] = [acceptor_strand_num, subzone_num]
							TPP_ra[acceptor_strand_num][subzone_num][0] = [donor_strand_num, subzone_num]
							
	return TPP_ra






######
# This function inputs the token pointer pair array
# and returns an array of token_pointer_paths
######
def token_pointer_path_array(TPP_ra):
	sub_visits_ra = [0 for i in range(len(TPP_ra[0]))]
	visits_ra = [sub_visits_ra[:] for i in range(len(TPP_ra))]
	path_ra = []


	for strand_num in range(len(TPP_ra)):
		for subzone_num in range(len(TPP_ra[0])):
			if visits_ra[strand_num][subzone_num] == 0:	
				previous_TP = TPP_ra[strand_num][subzone_num][0]
				next_TP = TPP_ra[strand_num][subzone_num][1]
				num_visits = 100 * len(path_ra) + 1
				visits_ra[strand_num][subzone_num] = num_visits
				sub_path_ra = strand_num, subzone_num
							
				upstream_done = False
				while not upstream_done:
					if previous_TP[1] == -1:
						upstream_done = True
					elif visits_ra[previous_TP[0]][previous_TP[1]] > 0:
						upstream_done = True
					else:
						sub_path_ra.insert(0, previous_TP)
						num_visits += 1
						visits_ra[previous_TP[0]][previous_TP[1]] = num_visits
						previous_TP = TPP_ra[previous_TP[0]][previous_TP[1]][0]
			
				downstream_done = False
				while not downstream_done:
					if next_TP[1] == -1:
						downstream_done = True
					elif visits_ra[next_TP[0]][next_TP[1]] > 0:
						downstream_done = True
					else:
						sub_path_ra.append(next_TP)
						num_visits += 1
						visits_ra[next_TP[0]][next_TP[1]] = num_visits
						next_TP = TPP_ra[next_TP[0]][next_TP[1]][1]
			
				# Make sure that the path begins between subzones, not in the middle of a subzone
				if (sub_path_ra[0][0] + sub_path_ra[0][1])%2 == 0:
					sub_path_ra = sub_path_ra[1:] + sub_path_ra[:1]
				path_ra.append(sub_path_ra)



	num_tokens_visited = 0
	for sub_path_ra in path_ra:
		num_tokens_visited += len(sub_path_ra)

	sys.stdout.write('The number of tokens visited is ' + str(num_tokens_visited) + '.\n')
			

	path_length_ra = [0 for i in range(1100)]
	for sub_path_ra in path_ra:
		path_length_ra[len(sub_path_ra)] += 1

	for length in range(1100):
		if path_length_ra[length] != 0:
			sys.stdout.write('The number of paths with length ' + str(length) + ' is ' + str(path_length_ra[length]) + '.\n')
	
	return path_ra	






######
# This function inputs the token pointer path array
# and returns a list of oligos as lists of six token pointers
######
def oligo_token_pointer_array(path_ra):
	OTP_ra = []
	for sub_path_ra in path_ra:
		for oligo_num in range(len(sub_path_ra)/6):
			OTP_ra.append(sub_path_ra[oligo_num*6:oligo_num*6 + 6])
		# Take care of the remainders
		if len(sub_path_ra)%6 != 0:
			OTP_ra.append(sub_path_ra[-(len(sub_path_ra)%6):])
	return OTP_ra






######
# This function checks to make sure each token is represented once and only once
# in the oligo token pointer lists
######
def check_token_representation(OTP_ra, TPP_ra):
	CTP_ra = []
	for sub_OTP_ra in OTP_ra:
		for TP in sub_OTP_ra:
			CTP_ra.append(TP)

	num_strands = len(TPP_ra)
	num_subzones = len(TPP_ra[0])
	problems = False

	# Check each token for single presence
	for strand_num in range(num_strands):
		for token_num in range(num_subzones):
			if CTP_ra.count([strand_num, token_num]) == 0:
				sys.stdout.write(str([strand_num, token_num]) + ' not present.\n')
				problems = True
			elif CTP_ra.count([strand_num, token_num]) > 1:
				sys.stdout.write(str([strand_num, token_num]) + ' present more than once.\n')
				problems = True
	
	if problems == False:
		sys.stdout.write('Each token is represented once and only once in the oligo token pointer lists.\n')

	return
	





######
# This function prints the oligo path on the strand token lattice
# It is fun to see how the paths twist around the lattice
# You can use this function to help debug your program
######
def print_path(sub_path_ra, TPP_ra):
	sys.stdout.write('\nONE PATH ARRAY\n')

	num_strands = len(TPP_ra)
	num_subzones = len(TPP_ra[0])
	num_path_tokens = len(sub_path_ra)
	
	# Initialize strand token lattice
	sub_token_visit_ra = ['.' for subzone_num in range(num_subzones)]
	token_visit_ra = [sub_token_visit_ra[:] for strand_num in range(num_strands)]


	# Assign visits
	for path_token_num in range(num_path_tokens):
		token = sub_path_ra[path_token_num]
		strand = token[0]
		subzone = token[1]
		token_visit_ra[strand][subzone] = path_token_num

	# Print out strand token lattice
	spacer = '   '
	for strand_num in range(num_strands):
		for subzone_num in range(num_subzones):
			visitor_string = str(token_visit_ra[strand_num][subzone_num])
			sys.stdout.write(visitor_string)
			sys.stdout.write(spacer[:4 - len(visitor_string)])
		sys.stdout.write('\n')
	
	sys.stdout.write('\n')
	
	return
	
# The idea here is to have a function that adds the numbers of one oligo path 
# to the appropriate places in the big grid array. Eventually this will be printed
# in main. Also it needs to be initialized in main. Oligo_path is the path of 
# one oligo, while grid_ra is the grid that is constantly being updated until 
# it is printed in main. oligo_num is number that will be inputed to the grid_ra.

def generate_oligo_path(oligo_path, oligo_num, grid_ra):
	num_path_tokens = len(oligo_path)
        
# Assign visits
        for path_token_num in range(num_path_tokens):
                token = oligo_path[path_token_num]
                strand = token[0]
                subzone = token[1]
                grid_ra[strand][subzone] = oligo_num
                
        
	return grid_ra


def print_all_oligos(grid_ra, num_strands, num_subzones):
        spacer = '   '
        for strand_num in range(num_strands):
                for subzone_num in range(num_subzones):
                        visitor_string = str(grid_ra[strand_num][subzone_num])
                        sys.stdout.write(visitor_string)
                        sys.stdout.write(spacer[:4 - len(visitor_string)])
                sys.stdout.write('\n')
        


####
# given an oligo to split, split it and return the new list of oligos
####
def split_oligo(new_OTP_ra, oligo_num, num_toks):

	print new_OTP_ra[oligo_num]

	original_oligo = new_OTP_ra[oligo_num]

	oligo_1 = original_oligo[:num_toks]
	oligo_2 = original_oligo[num_toks:]
	print oligo_1
	print'\n'
	print oligo_2
	
	new_OTP_ra[oligo_num] = oligo_1
	new_OTP_ra.insert(oligo_num + 1, oligo_2)
	return new_OTP_ra


sys.stdout.write('Honeycomb pointers module installed.\n')

<\pre>