User:ShawnDouglas/scripts/make-pcr-oligos.py: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
mNo edit summary
 
mNo edit summary
 
Line 1: Line 1:
Used for breaking a sequence into 30mers for PCR assembly  --[[User:ShawnDouglas|smd]] 14:01, 4 November 2005 (EST)
Used for breaking a sequence into 30mers for PCR assembly  --[[User:ShawnDouglas|smd]] 14:01, 4 November 2005 (EST)


*Reads a sequence file from STDIN
*Outputs list of oligos that overlap by 15 bases which can be hierarchically assembled to form desired product
*Run with -s flag to see an ASCII schematic of the original sequence and corresponding oligos
<pre>
<pre>
#!/usr/bin/python
#!/usr/bin/env python


import fileinput
import fileinput
import re
import re
import seq
import string
import string
import sys
import sys
Line 14: Line 16:
# outputs list of n-mers
# outputs list of n-mers
##
##
def nowhite(s):
  return ''.join([c for c in s if c in string.letters])
def rev(s):
  return s[::-1]
complement = string.maketrans('ACGTacgt','TGCAtgca')
def comp(s):
  return rev(s.translate(complement))


schematic = False
schematic = False
Line 27: Line 37:
# read in sequence
# read in sequence
for line in fileinput.input("-"):
for line in fileinput.input("-"):
   sequence = seq.nowhite(string.rstrip(line))
   sequence = nowhite(string.rstrip(line))
   seqlen = len(sequence)
   seqlen = len(sequence)


Line 33: Line 43:
   for i in range(0,seqlen,n):
   for i in range(0,seqlen,n):
     s.append(''.join(sequence[i:i+n]))
     s.append(''.join(sequence[i:i+n]))
     a.append(''.join(seq.comp(sequence[i+o:i+n+o])))
     a.append(''.join(comp(sequence[i+o:i+n+o])))


   # shift last two oligos so antisense terminates sequence
   # shift last two oligos so antisense terminates sequence
Line 41: Line 51:
     a.pop()
     a.pop()
   a.pop()
   a.pop()
   a.append(''.join(seq.comp(sequence[seqlen-n:seqlen])))
   a.append(''.join(comp(sequence[seqlen-n:seqlen])))


   # print schematic if -s option specified
   # print schematic if -s option specified

Latest revision as of 16:37, 25 September 2006

Used for breaking a sequence into 30mers for PCR assembly --smd 14:01, 4 November 2005 (EST)

  • Reads a sequence file from STDIN
  • Outputs list of oligos that overlap by 15 bases which can be hierarchically assembled to form desired product
  • Run with -s flag to see an ASCII schematic of the original sequence and corresponding oligos
#!/usr/bin/env python

import fileinput
import re
import string
import sys

##
# reads sequence from standard input
# outputs list of n-mers
##

def nowhite(s):
  return ''.join([c for c in s if c in string.letters])
def rev(s):
  return s[::-1]
complement = string.maketrans('ACGTacgt','TGCAtgca')
def comp(s):
  return rev(s.translate(complement))

schematic = False
if len(sys.argv) > 1:
  if re.match('-s$',sys.argv[1]):
    schematic = True

# declarations
n = 30          # n-mer length
o = 15          # overlap length
s, a = [], []   # sense and antisense lists

# read in sequence
for line in fileinput.input("-"):
  sequence = nowhite(string.rstrip(line))
  seqlen = len(sequence)

  # record sense n-mers at base i and antisense n-mers at base i+o
  for i in range(0,seqlen,n):
    s.append(''.join(sequence[i:i+n]))
    a.append(''.join(comp(sequence[i+o:i+n+o])))

  # shift last two oligos so antisense terminates sequence
  s.pop()
  s.append(''.join(sequence[seqlen-n-o:seqlen-o]))
  if len(a[-1]) == 0:
    a.pop()
  a.pop()
  a.append(''.join(comp(sequence[seqlen-n:seqlen])))

  # print schematic if -s option specified
  if schematic:
    print s[0]

    if len(s) > 2:
      for i in range(1,len(s)-1):
        print ''.join([' ' for j in range(n*i-1)]),
        print ''.join(s[i])

    print ''.join([' ' for j in range(seqlen-n-o-1)]),
    print ''.join(s[len(s)-1])

    print  '\n' + sequence + '\n'
    for i in range(len(a)-1):
      print ''.join([' ' for j in range(o-2)]),         # initial o spaces
      print ''.join([' ' for j in range(n*i)]),         # 
      print ''.join(a[i])

    print ''.join([' ' for j in range(seqlen-n-1)]),  
    print ''.join(a[len(a)-1])
  else:
    for i in range(len(a)):
      print s[i]
      print a[i]