User:ShawnDouglas/scripts/make-pcr-oligos.py
From OpenWetWare
Jump to navigationJump to search
Used for breaking a sequence into 30mers for PCR assembly --smd 14:01, 4 November 2005 (EST)
- Reads a sequence file from STDIN
- Outputs list of oligos that overlap by 15 bases which can be hierarchically assembled to form desired product
- Run with -s flag to see an ASCII schematic of the original sequence and corresponding oligos
#!/usr/bin/env python
import fileinput
import re
import string
import sys
##
# reads sequence from standard input
# outputs list of n-mers
##
def nowhite(s):
return ''.join([c for c in s if c in string.letters])
def rev(s):
return s[::-1]
complement = string.maketrans('ACGTacgt','TGCAtgca')
def comp(s):
return rev(s.translate(complement))
schematic = False
if len(sys.argv) > 1:
if re.match('-s$',sys.argv[1]):
schematic = True
# declarations
n = 30 # n-mer length
o = 15 # overlap length
s, a = [], [] # sense and antisense lists
# read in sequence
for line in fileinput.input("-"):
sequence = nowhite(string.rstrip(line))
seqlen = len(sequence)
# record sense n-mers at base i and antisense n-mers at base i+o
for i in range(0,seqlen,n):
s.append(''.join(sequence[i:i+n]))
a.append(''.join(comp(sequence[i+o:i+n+o])))
# shift last two oligos so antisense terminates sequence
s.pop()
s.append(''.join(sequence[seqlen-n-o:seqlen-o]))
if len(a[-1]) == 0:
a.pop()
a.pop()
a.append(''.join(comp(sequence[seqlen-n:seqlen])))
# print schematic if -s option specified
if schematic:
print s[0]
if len(s) > 2:
for i in range(1,len(s)-1):
print ''.join([' ' for j in range(n*i-1)]),
print ''.join(s[i])
print ''.join([' ' for j in range(seqlen-n-o-1)]),
print ''.join(s[len(s)-1])
print '\n' + sequence + '\n'
for i in range(len(a)-1):
print ''.join([' ' for j in range(o-2)]), # initial o spaces
print ''.join([' ' for j in range(n*i)]), #
print ''.join(a[i])
print ''.join([' ' for j in range(seqlen-n-1)]),
print ''.join(a[len(a)-1])
else:
for i in range(len(a)):
print s[i]
print a[i]