User:ShawnDouglas/scripts/make-pcr-oligos.py: Difference between revisions
From OpenWetWare
< User:ShawnDouglas | scripts
ShawnDouglas (talk | contribs) mNo edit summary |
ShawnDouglas (talk | contribs) mNo edit summary |
||
Line 1: | Line 1: | ||
Used for breaking a sequence into 30mers for PCR assembly --[[User:ShawnDouglas|smd]] 14:01, 4 November 2005 (EST) | Used for breaking a sequence into 30mers for PCR assembly --[[User:ShawnDouglas|smd]] 14:01, 4 November 2005 (EST) | ||
*Reads a sequence file from STDIN | |||
*Outputs list of oligos that overlap by 15 bases which can be hierarchically assembled to form desired product | |||
*Run with -s flag to see an ASCII schematic of the original sequence and corresponding oligos | |||
<pre> | <pre> | ||
#!/usr/bin/python | #!/usr/bin/env python | ||
import fileinput | import fileinput | ||
import re | import re | ||
import string | import string | ||
import sys | import sys | ||
Line 14: | Line 16: | ||
# outputs list of n-mers | # outputs list of n-mers | ||
## | ## | ||
def nowhite(s): | |||
return ''.join([c for c in s if c in string.letters]) | |||
def rev(s): | |||
return s[::-1] | |||
complement = string.maketrans('ACGTacgt','TGCAtgca') | |||
def comp(s): | |||
return rev(s.translate(complement)) | |||
schematic = False | schematic = False | ||
Line 27: | Line 37: | ||
# read in sequence | # read in sequence | ||
for line in fileinput.input("-"): | for line in fileinput.input("-"): | ||
sequence = | sequence = nowhite(string.rstrip(line)) | ||
seqlen = len(sequence) | seqlen = len(sequence) | ||
Line 33: | Line 43: | ||
for i in range(0,seqlen,n): | for i in range(0,seqlen,n): | ||
s.append(''.join(sequence[i:i+n])) | s.append(''.join(sequence[i:i+n])) | ||
a.append(''.join( | a.append(''.join(comp(sequence[i+o:i+n+o]))) | ||
# shift last two oligos so antisense terminates sequence | # shift last two oligos so antisense terminates sequence | ||
Line 41: | Line 51: | ||
a.pop() | a.pop() | ||
a.pop() | a.pop() | ||
a.append(''.join( | a.append(''.join(comp(sequence[seqlen-n:seqlen]))) | ||
# print schematic if -s option specified | # print schematic if -s option specified |
Latest revision as of 16:37, 25 September 2006
Used for breaking a sequence into 30mers for PCR assembly --smd 14:01, 4 November 2005 (EST)
- Reads a sequence file from STDIN
- Outputs list of oligos that overlap by 15 bases which can be hierarchically assembled to form desired product
- Run with -s flag to see an ASCII schematic of the original sequence and corresponding oligos
#!/usr/bin/env python import fileinput import re import string import sys ## # reads sequence from standard input # outputs list of n-mers ## def nowhite(s): return ''.join([c for c in s if c in string.letters]) def rev(s): return s[::-1] complement = string.maketrans('ACGTacgt','TGCAtgca') def comp(s): return rev(s.translate(complement)) schematic = False if len(sys.argv) > 1: if re.match('-s$',sys.argv[1]): schematic = True # declarations n = 30 # n-mer length o = 15 # overlap length s, a = [], [] # sense and antisense lists # read in sequence for line in fileinput.input("-"): sequence = nowhite(string.rstrip(line)) seqlen = len(sequence) # record sense n-mers at base i and antisense n-mers at base i+o for i in range(0,seqlen,n): s.append(''.join(sequence[i:i+n])) a.append(''.join(comp(sequence[i+o:i+n+o]))) # shift last two oligos so antisense terminates sequence s.pop() s.append(''.join(sequence[seqlen-n-o:seqlen-o])) if len(a[-1]) == 0: a.pop() a.pop() a.append(''.join(comp(sequence[seqlen-n:seqlen]))) # print schematic if -s option specified if schematic: print s[0] if len(s) > 2: for i in range(1,len(s)-1): print ''.join([' ' for j in range(n*i-1)]), print ''.join(s[i]) print ''.join([' ' for j in range(seqlen-n-o-1)]), print ''.join(s[len(s)-1]) print '\n' + sequence + '\n' for i in range(len(a)-1): print ''.join([' ' for j in range(o-2)]), # initial o spaces print ''.join([' ' for j in range(n*i)]), # print ''.join(a[i]) print ''.join([' ' for j in range(seqlen-n-1)]), print ''.join(a[len(a)-1]) else: for i in range(len(a)): print s[i] print a[i]