Alignreads.py: Difference between revisions
From OpenWetWare
Jump to navigationJump to search
No edit summary |
No edit summary |
||
| Line 12: | Line 12: | ||
debugLog = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied | debugLog = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied | ||
cwd = os.getcwd() | cwd = os.getcwd() | ||
progName, progVersion = ('alignreads.py','2. | progName, progVersion = ('alignreads.py','2.23') | ||
progUsage = 'python %s [options] <Reads in .fa file> <Reference> OR...\n python %s [options] <YASRA folder>' % (progName, progName) | progUsage = 'python %s [options] <Reads in .fa file> <Reference> OR...\n python %s [options] <YASRA folder>' % (progName, progName) | ||
mulitipleRuns = False | mulitipleRuns = False | ||
| Line 145: | Line 145: | ||
cmndLineParser.add_option_group(sumqualGroup) | cmndLineParser.add_option_group(sumqualGroup) | ||
cmndLineParser.add_option_group(qualtofaGroup) | cmndLineParser.add_option_group(qualtofaGroup) | ||
cmndLineParser.add_option_group(maskingGroup) | |||
runyasraArgs = {'read_type' : '--read-type', 'read_orientation' : '--orientation', 'percent_identity' : '--percent-identity', 'single_step' : '--single-step', 'dot_replace_reads' : '--remove-dots-reads', 'external_makefile' : '--make-path', 'verbose' : '--verbose', 'dos2unix_ref' : '--dos2unix-ref'} | runyasraArgs = {'read_type' : '--read-type', 'read_orientation' : '--orientation', 'percent_identity' : '--percent-identity', 'single_step' : '--single-step', 'dot_replace_reads' : '--remove-dots-reads', 'external_makefile' : '--make-path', 'verbose' : '--verbose', 'dos2unix_ref' : '--dos2unix-ref'} | ||
nucmerArgs = {'break_length' : '--breaklen', 'min_cluster' : '--mincluster', 'diag_factor' : '--diagfactor', 'no_extend' : '--[no]extend', 'forward_only' : '--forward','max_gap' : '--maxgap', 'coords' : '--coords', 'no_optimize' : '--[no]optimize', 'prefix' : '--prefix', 'no_simplify' : '--[no]simplify'} | nucmerArgs = {'break_length' : '--breaklen', 'min_cluster' : '--mincluster', 'diag_factor' : '--diagfactor', 'no_extend' : '--[no]extend', 'forward_only' : '--forward','max_gap' : '--maxgap', 'coords' : '--coords', 'no_optimize' : '--[no]optimize', 'prefix' : '--prefix', 'no_simplify' : '--[no]simplify'} | ||
| Line 258: | Line 259: | ||
try: yasraRef = runyasraFolder + '/' + os.path.basename(yasraRef) | try: yasraRef = runyasraFolder + '/' + os.path.basename(yasraRef) | ||
except NameError: errorExit('Unable to find path to reference file from yasra Makefile at %s' % makePath) | except NameError: errorExit('Unable to find path to reference file from yasra Makefile at %s' % makePath) | ||
############################################################################################################################ | ############################################################################################################################ | ||
Latest revision as of 00:04, 23 September 2010
Copy and past the following into a text editor and save it as alignreads.py to use the script.
#!/usr/bin/env python
###Imports / Variable Initilization##########################################################################################
import os, string, sys, time, copy
from optparse import *
from subprocess import *
argList = sys.argv #argumen list supplied by user
argNum = len(argList) #the number of arguments supplied
minArgNum = 1 #the smallest amount of arguments with which it is possible to run the script
debugLog = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied
cwd = os.getcwd()
progName, progVersion = ('alignreads.py','2.23')
progUsage = 'python %s [options] <Reads in .fa file> <Reference> OR...\n python %s [options] <YASRA folder>' % (progName, progName)
mulitipleRuns = False
yasraSubFolderName = 'YASRA_related_files'
nucmerSubFolderName = 'NUCmer_related_files'
multRunSubFolderName = 'aligned_data'
#############################################################################################################################
def errorExit(message=None,exitStatus=1):
'''Version 1.0
Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
if message:
print '%s: Error: %s' % (progName, message)
else:
print progName +': Unknown error, printing debug log...'
for line in debugLog: print line #print debug log
sys.exit(exitStatus)
###Command Line Parser Initilization#########################################################################################
cmndLineParser = OptionParser(usage=progUsage, version="Version %s" % progVersion)
yasraGroup = OptionGroup(cmndLineParser, "YASRA-Related Modifiers") #Yasra Modifiers
nucmerGroup = OptionGroup(cmndLineParser, "NUCmer-Related Modifiers") #Nucmer Modifiers
deltaGroup = OptionGroup(cmndLineParser, "Delta-Filter-Related Modifiers") #delta-filter Modifiers
sumqualGroup = OptionGroup(cmndLineParser, "sumqual-Related Modifiers") #sumqual Modifiers'
qualtofaGroup = OptionGroup(cmndLineParser, "qualtofa-Related Modifiers") #qualtofa Modifiers
maskingGroup = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.')
#############################################################################################################################
def maskingCallback(option, opt_str, value, parser):
value = []
def floatable(str):
try:
float(str)
return True
except ValueError: return False
for arg in parser.rargs:
if arg[:2] == "--" and len(arg) > 2: break # stop on --foo like options
if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break # stop on -a, but not on -3 or -3.0
value.append(arg)
if len(value) >= 2: break
if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest)
del parser.rargs[:len(value)]
if len(value) == 1: value.append(0)
value[0] = int(value[0])
value[1] = float(value[1])
setattr(parser.values, option.dest, value)
###Basic Option Initilization#######################################################################################################
cmndLineParser.add_option( "-H", "--advanced-help", action="store_true", default=False, dest="advanced_help", help="Display help information for all supported options (Default: only basic options are shown)")
cmndLineParser.add_option( "-z", "--import-options", action="store", default="", type="string", dest="import_options", metavar="STRING", help="specify the path to a Command_Line_Record.txt fie from a previous run, or the folder that contains one. Any other options used with this one are overwritten. (Default: use options supplied)")
yasraGroup.add_option( "-d", "--silent", action="store_false", default=True, dest="verbose", help="Nothing is printed to the screen (Default: print the output of yasra to the screen)")
yasraGroup.add_option( "-t", "--read-type", action="store", default="solexa", type="choice", dest="read_type", choices=["454","solexa"], metavar="454 or solexa", help="Specify the type of reads. (Default: solexa)")
yasraGroup.add_option( "-o", "--read-orientation", action="store", default="circular", type="choice", dest="read_orientation", choices=["circular","linear"], metavar="circular or linear", help="Specify orientation of the sequence. (Default: circular")
yasraGroup.add_option( "-p", "--percent-identity", action="store", default="same", type="choice", dest="percent_identity", choices=["same","high","medium","low","verylow","desperate"], metavar="same, high, medium, low or very low", help="The percent identity (PID in yasra). The settings correspond to different percent values depending on the read type (-t). (Defalt: same)")
yasraGroup.add_option( "-a", "--single-step", action="store_true", default=False, dest="single_step", help="Activate yasra's single_step option (Default: run yasra normally)")
nucmerGroup.add_option( "-f", "--prefix", action="store", default="out", type="string", dest="prefix", metavar="STRING", help="Set the output file prefix (Default: out)")
nucmerGroup.add_option( "-b", "--break-length", action="store", default=200, type="int", dest="break_length", metavar="INT", help="Distance an alignment extension will attempt to extend poor scoring regions before giving up (Default: 200)")
nucmerGroup.add_option( "-j", "--alternate-ref", action="store", default="", type="string", dest="alternate_ref", metavar="INT", help="Specify a new refence to be used in the rest of the alignment after yasra. (Default: use YASRA's reference)")
deltaGroup.add_option( "-y", "--min-identity", action="store", default=80, type="int", dest="min_identity", metavar="INT", help="Set the minimum alignment identity [0, 100], (Default: 80)")
deltaGroup.add_option( "-l", "--min-align-length", action="store", default=100, type="int", dest="min_align_length", metavar="INT", help="Set the minimum alignment length (Default: 100)")
#sumqualGroup.add_option( "-e", "--dont-extend", action="store_false", default=True, dest="extend", help="Dont include parts of contigs that extend past the start and end of the reference in the consensus (Default: extend contigs)")
qualtofaGroup.add_option( "-c", "--exclude-contigs", action="store_false", default=True, dest="include_contigs", help="Dont include each contig on its own line (Default: include contigs)")
qualtofaGroup.add_option( "-i", "--no-match-overlap", action="store_true", default=False, help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
qualtofaGroup.add_option( "-e", "--no-overlap", action="store_true", default=False, help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
qualtofaGroup.add_option( "-k", "--keep-contained", action="store_true", default=False, dest="keep_contained", help="Include contained contigs (Defalt: save sequences of contained contigs to a separate file)")
qualtofaGroup.add_option( "-q", "--end-trim-qual", action="store", default=0, type="int", dest="end_trim_qual", metavar="INT", help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount (Default: 0)")
qualtofaGroup.add_option( "-s", "--dont-save-SNPs", action="store_false", default=True, dest="save_SNPs", help="Dont save SNPs to a .qual file(Default: Save SNP file)")
maskingGroup.add_option( "-m", "--mask-contigs", action="callback", default=[0,0], metavar='INT FLOAT', callback=maskingCallback, dest='mask_contigs', help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Cannot be used with the -c modifier.(Default: 0, 0)")
maskingGroup.add_option( "-n", "--mask-contig-SNPs", action="callback", default=[0,0], metavar='INT FLOAT', callback=maskingCallback, dest='mask_contig_SNPs', help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Cannot be used with the -c modifier.(Default: 0, 0)")
maskingGroup.add_option( "-w", "--mask-consensus", action="callback", default=[0,0], metavar='INT FLOAT', callback=maskingCallback, dest='mask_consensus', help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
maskingGroup.add_option( "-x", "--mask-SNPs", action="callback", default=[0,0], metavar='INT FLOAT', callback=maskingCallback, dest='mask_SNPs', help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
#############################################################################################################################
###Simple Help Menu##########################################################################################################
if len(argList) == 1: #if no arguments are supplied
cmndLineParser.add_option_group(yasraGroup)
cmndLineParser.add_option_group(nucmerGroup)
cmndLineParser.add_option_group(deltaGroup)
#cmndLineParser.add_option_group(sumqualGroup)
cmndLineParser.add_option_group(qualtofaGroup)
cmndLineParser.add_option_group(maskingGroup)
cmndLineParser.print_help()
sys.exit(0)
#############################################################################################################################
###Advanced Option Initilization#############################################################################################
cmndLineParser.add_option( "-Z", "--debug", action="store_true", default=False, dest="debug", help="Save the debug log to the current working directory. (Default: dont save)")
yasraGroup.add_option( "-E", "--external-makefile", action="store", default=False, type="string", dest="external_makefile", metavar="FILEPATH", help="Specify path to external makefile used by YASRA. (Default: use the makefile built in to runyasra)")
yasraGroup.add_option( "-Q", "--no-dot-replace-reads",action="store_false", default=True, dest="dot_replace_reads", help="Do NOT replace N's with dots (.) in the microreads file before running yasra/ (Default: replace dots)")
yasraGroup.add_option( "-I", "--no-dos2unix-ref", action="store_false", default=True, dest="dos2unix_ref", help="Do NOT run dos2unix on the reference before running yasra/ (Default: run dos2unix)")
nucmerGroup.add_option( "-A", "--anchor-uniqueness", action="store", default="ref", type="choice", dest="anchor_uniqueness", choices=["mum","ref","max"], metavar="mum, ref, or max", help="Specify how NUCmer chooses anchor matches using one of three settings: mum = Use anchor matches that are unique in both the reference and query, ref = Use anchor matches that are unique in the reference but not necessarily unique in the query, max = Use all anchor matches regardless of their uniqueness. (Default = ref)")
nucmerGroup.add_option( "-T", "--min-cluster", action="store", default=65, type="int", dest="min_cluster", metavar="INT", help="Minimum cluster length used in the NUCmer analysis. (Default: 65)")
nucmerGroup.add_option( "-D", "--diag-factor", action="store", default=0.12, type="float", dest="diag_factor", metavar="FLOAT", help="Maximum diagonal difference factor for clustering, i.e. diagonal difference / match separation used by NUCmer. (Default: 0.12)")
nucmerGroup.add_option( "-J", "--no-extend", action="store_true", default=False, dest="no_extend", help="Prevent alignment extensions from their anchoring clusters but still align the DNA between clustered matches in NUCmer. (Default: extend)")
nucmerGroup.add_option( "-F", "--forward-only", action="store_true", default=False, dest="forward_only", help="Align only the forward strands of each sequence. (Default: forward and reverse)")
nucmerGroup.add_option( "-X", "--max-gap", action="store", default=90, type="int", dest="max_gap", metavar="INT", help="Maximum gap between two adjacent matches in a cluster. (Default: 90)")
nucmerGroup.add_option( "-M", "--min-match", action="store", default=20, type="int", dest="min_match", metavar="INT", help="Minimum length of an maximal exact match. (Default: 20)")
nucmerGroup.add_option( "-C", "--coords", action="store_true", default=False, dest="coords", help="Automatically generate the <prefix>.coords file using the 'show-coords' program with the -r option. (Default: dont)")
nucmerGroup.add_option( "-O", "--no-optimize", action="store_true", default=False, dest="no_optimize", help="Toggle alignment score optimization. Setting --nooptimize will prevent alignment score optimization and result in sometimes longer, but lower scoring alignments (default: optimize)")
nucmerGroup.add_option( "-S", "--no-simplify", action="store_true", default=False, dest="no_simplify", help="Simplify alignments by removing shadowed clusters. Turn this option off if aligning a sequence to itself to look for repeats. (Default: simplify)")
deltaGroup.add_option( "-K", "--max-overlap", action="store", default=100, type="float", dest="max_overlap", metavar="FLOAT", help="Set the maximum alignment overlap for -r and -q options as a percent of the alignment length [0, 100]. (Default 100)")
deltaGroup.add_option( "-B", "--query-alignment", action="store_true", default=False, dest="query_alignment", help="Query alignment using length*identity weighted LIS. For each query, leave only the alignments which form the longest consistent set for the query. (Defualt: global alignment)")
deltaGroup.add_option( "-R", "--ref-alignment", action="store_true", default=False, dest="ref_alignment", help="Reference alignment using length*identity weighted LIS. For each reference, leave only the alignments which form the longest consistent set for the reference. (Defualt: global alignment)")
deltaGroup.add_option( "-G", "--global-alignment", action="store_false", default=True, dest="global_alignment", help="Global alignment using length*identity weighted LIS (longest increasing subset). For every reference-query pair, leave only the alignments which form the longest mutually consistent set. (this is the default)")
deltaGroup.add_option( "-U", "--min-uniqueness", action="store", default=0, type="float", dest="min_uniqueness", metavar="FLOAT", help="Set the minimum alignment uniqueness, i.e. percent of the alignment matching to unique reference AND query sequence [0, 100]. (Default 0)")
sumqualGroup.add_option( "-Y", "--save-ref-dels", action="store_true", default=False, dest="save_ref_dels", help="Save the sequence of the reference that corresponds to empty gaps in the consensus in a fasta file. (Default: dont save)")
#sumqualGroup.add_option( "-V", "--remove-ref-dels", action="store_true", default=False, dest="remove_ref_dels", help="Only include portions of the reference that are represented by contig quality values (i.e. no empty bases in the consensus). (Defualt: included all of the reference)")
qualtofaGroup.add_option( "-W", "--dont-align-contigs", action="store_true", default=False, dest="dont_align_contigs", help="Do NOT align contigs to the reference using '-'s at the start of each contig; independent of the consensus. (Default: align contigs)")
#qualtofaGroup.add_option( "-P", "--only-matching", action="store_true", default=False, dest="only_matching", help="Only include regions of the contigs that NUCmer identifies as matching (Default: include non-matching regions)")
qualtofaGroup.add_option( "-N", "--end-trim-num", action="store", default=0, type="int", dest="end_trim_num", metavar="INT", help="Trim the ends of the contigs by the specified number of bases. (Default: 0)")
qualtofaGroup.add_option( "-L", "--min-match-length", action="store", default=50, type="int", dest="min_match_length", metavar="INT", help="Set minimum length of the matching region of the contigs. (Default: 50)")
#############################################################################################################################
def strCmndLine(cmndLine):
out = ''
for part in cmndLine:
out += str(part) + ' '
return out[:-1]
def argNum(cmndLine):
out = 0
for part in cmndLine:
if part[0] != '-':
out += 1
return out
###Command Line Parsing and Advanced Help####################################################################################
cmndLineParser.add_option_group(yasraGroup)
cmndLineParser.add_option_group(nucmerGroup)
cmndLineParser.add_option_group(deltaGroup)
cmndLineParser.add_option_group(sumqualGroup)
cmndLineParser.add_option_group(qualtofaGroup)
cmndLineParser.add_option_group(maskingGroup)
runyasraArgs = {'read_type' : '--read-type', 'read_orientation' : '--orientation', 'percent_identity' : '--percent-identity', 'single_step' : '--single-step', 'dot_replace_reads' : '--remove-dots-reads', 'external_makefile' : '--make-path', 'verbose' : '--verbose', 'dos2unix_ref' : '--dos2unix-ref'}
nucmerArgs = {'break_length' : '--breaklen', 'min_cluster' : '--mincluster', 'diag_factor' : '--diagfactor', 'no_extend' : '--[no]extend', 'forward_only' : '--forward','max_gap' : '--maxgap', 'coords' : '--coords', 'no_optimize' : '--[no]optimize', 'prefix' : '--prefix', 'no_simplify' : '--[no]simplify'}
deltaArgs = {'global_alignment' : '-g','min_identity' : '-i','min_align_length' : '-l','query_alignment' : '-q','ref_alignment' : '-r','min_uniqueness' : '-u','max_overlap' : '-o'}
sumqualArgs = {'save_ref_dels' : '--save-gaps', 'verbose' : '--verbose'}
qualtofaArgs = {'verbose' : '--verbose', 'include_contigs' : '-c','mask_contigs' : '--mask-contigs','mask_contig_SNPs' : '--mask-contig-SNPs', 'dont_align_contigs' : '--dont-align-contigs', 'no_match_overlap' : '--no-match-overlap','no_overlap' : '--no-overlap', 'mask_consensus' : '--mask-consensus', 'mask_SNPs' : '--mask-SNPs', 'keep_contained' : '--keep-contained', 'end_trim_num' : '--end-trim', 'end_trim_qual' : '--end-trim-qual', 'min_match_length' : '--min-match-length', 'save_SNPs' : '--save-SNPs'}
(options, args) = cmndLineParser.parse_args(argList[1:])
if len(args) > 2:
errorExit('Too many argments supplied. alignreads.py takes either 1 directory or 2 files as arguments. %s arguments supplied...' % str(len(args)))
if options.advanced_help:
cmndLineParser.print_help()
sys.exit(0)
############################################################################################################################
###Option import modifier###################################################################################################
if len(options.import_options) > 0 and os.path.exists(options.import_options):
if os.path.isdir(options.import_options):
recordPath = options.import_options + '/Command_Line_Record.txt'
elif os.path.exists(options.import_options) and options.import_options[-23:] == 'Command_Line_Record.txt':
recordPath = options.import_options
else:
errorExit('incorrect path to command line record')
record = open(recordPath)
newCmndLine = record.readline()
argList = newCmndLine.split()
(options, notArgs) = cmndLineParser.parse_args(argList[1:])
record.close()
############################################################################################################################
def getProgPath(progName):
prevCWD = os.getcwd()
allPaths = os.environ['PATH'].split(':')
matchs = []
for dirPath in allPaths:
path = dirPath + '/' + progName
if os.path.exists(path):
matchs.append([os.stat(path).st_mtime,path])
matchs.sort()
matchs.reverse()
if len(matchs) >= 1:
return matchs[0][1]
else:
return -1
def getOptCmndLine(args,converterKey):
out = []
for equivalancy in converterKey.iteritems(): #equivalancy = (alignreads option, yasra option)
option = getattr(args, equivalancy[0])
if option != False: #if the option is a boolean and is the default value, therefore not nessesary (always false in this script)
out.append(str(equivalancy[1]))
if option != True:
if type(option) != list: option = [option]
for opt in option: out.append(str(opt))
return out
def mvFiles(folder):
'''takes all of the files in the current working directory and moves them to a new directory within it caled folder'''
fileList = os.listdir(os.getcwd())
os.mkdir(folder)
for path in fileList:
if os.path.isdir(path) == False:
old = './' + path
new = './' + os.path.basename(folder) + '/' + path
os.rename(old,new)
###YASRA####################################################################################################################
#yasraPath = getProgPath('runyasra')
runyasraFolder = ''
pastRuns = False #see end of script
if len(args) == 2: #if runyasra is to be used...
yasraQuery = os.path.join(cwd,args[0])
yasraRef = os.path.join(cwd,args[1])
runyasraCmndLine = ['runyasra.py'] + getOptCmndLine(options,runyasraArgs) + [yasraQuery,yasraRef]
sys.argv = runyasraCmndLine
if options.verbose: print 'alignreads: implimenting YASRA with runyasra.py using the following command line: %s' % ' '.join(runyasraCmndLine)
import runyasra
yasraFolder = runyasra.outDirPath
os.chdir(yasraFolder)
mvFiles(yasraSubFolderName)
runyasraFolder = os.path.join(yasraFolder,yasraSubFolderName)
elif len(args) == 1 and os.path.isdir(args[0]): #if the user supplied the folder of a previous yasra run, insead of running it again
yasraFolder = os.path.join(cwd,args[0])
runyasraFolder = os.path.join(yasraFolder,yasraSubFolderName)
os.chdir(yasraFolder)
if os.path.exists(runyasraFolder): #if the full pipeline had been run on the yasra data
pastRuns = True #see end of script
makePath = os.path.join(runyasraFolder, 'Makefile')
makefileHandle = open(makePath, 'r')
nucmerSubFolderPath = os.path.join(yasraFolder,nucmerSubFolderName)
if os.path.exists(nucmerSubFolderPath): #if the input directory has gone through the pipeline exactly once before
mvFiles(multRunSubFolderName + '_1')
newNSFP = os.path.join(yasraFolder,multRunSubFolderName + '_1',nucmerSubFolderName)
os.rename(nucmerSubFolderPath,newNSFP)
nucmerSubFolderPath = newNSFP
if options.verbose:
nextRunNum = max([int(name[len(multRunSubFolderName) + 1:]) for name in os.listdir(yasraFolder) if os.path.isdir(name) and name.startswith(multRunSubFolderName)]) + 1
print 'alignreads: Using previous alignreads output at %s. Saving new ouput to %s.' % (yasraFolder,os.path.join(yasraFolder,multRunSubFolderName + '_' + str(nextRunNum)))
else:
if options.verbose: print 'alignreads: Using previous yasra ouput at %s (New data will be saved within this folder).' % yasraFolder
makePath = os.path.join(os.getcwd(),'Makefile')
makefileHandle = open(makePath, 'r')
mvFiles(yasraSubFolderName)
makefile = makefileHandle.readlines()
makefileHandle.close()
for line in makefile: #the makefile used in the previously run yasra folder is searched to find the names of the input files
if line.find('READS=') == 0:
yasraQuery = line.strip().replace('READS=','')
elif line.find('TEMPLATE=') == 0:
yasraRef = line.strip().replace('TEMPLATE=','')
break
try: yasraQuery = runyasraFolder + '/' + os.path.basename(yasraQuery)
except NameError: errorExit('Unable to find path to read file from yasra Makefile at %s' % makePath)
try: yasraRef = runyasraFolder + '/' + os.path.basename(yasraRef)
except NameError: errorExit('Unable to find path to reference file from yasra Makefile at %s' % makePath)
############################################################################################################################
def printProcess(process):
'''Prints the standard ouput and error messages of a process while the process ia running.'''
outLen = 0
errLen = 0
out = ''
err = ''
while process.poll() is None: #while it hasnt finished...
(outData, errData) = process.communicate()
outDiff = len(outData) - outLen
errDiff = len(errData) - errLen
if outDiff > 0:
outLen += outDiff
out += outData[-outDiff:]
print outData[-outDiff:]
if errDiff > 0:
errLen += errDiff
err += errData[-errDiff:]
print errData[-errDiff:]
return (out, err)
###NUCmer###################################################################################################################
if options.alternate_ref != "":
nucRef = options.alternate_ref
else:
nucRef = yasraRef
nucmerPath = getProgPath('nucmer')
nucQuery = os.path.join(runyasraFolder,'Final_Assembly_%s_%s' % (os.path.basename(yasraQuery), os.path.basename(yasraRef)))
nucmerCmndLine = [nucmerPath] + getOptCmndLine(options,nucmerArgs) + [nucRef,nucQuery] #USAGE: nucmer [options] <Reference> <Query>
nucmerStdOut = open(os.path.join(yasraFolder,'nucmer_standard_output.txt'), 'w')
nucmerStdErr = open(os.path.join(yasraFolder,'nucmer_standard_error.txt'), 'w')
nucmerProcess = Popen(nucmerCmndLine, stdout=PIPE, stderr=PIPE)
if options.verbose:
(nucOutData, nucErrData) = printProcess(nucmerProcess)
else:
(nucOutData, nucErrData) = nucmerProcess.communicate()
nucmerProcess.wait()
nucmerStdOut.write(nucOutData)
nucmerStdOut.write(nucErrData)
nucmerStdOut.close()
nucmerStdOut.close()
if nucmerProcess.returncode != 0: errorExit('Nucmer returned a non-zero code; it may have not completed successfully')
############################################################################################################################
###delta-filter#############################################################################################################
deltaPath = getProgPath('delta-filter')
nucOut = os.path.join(yasraFolder,options.prefix + '.delta')
deltaCmndLine = [deltaPath] + getOptCmndLine(options,deltaArgs) + [nucOut] #USAGE: delta-filter [options] <deltafile>
deltaOut = os.path.join(yasraFolder,options.prefix + '_filtered.delta')
filtered = open(deltaOut, 'w')
if call(deltaCmndLine, stdout=filtered) != 0: errorExit('delta-filter returned a non-zero code; it may have not completed successfully')
filtered.close()
mvFiles(nucmerSubFolderName)
############################################################################################################################
###sumqual and qualtofa#####################################################################################################
deltaPath = os.path.join(yasraFolder,nucmerSubFolderName,os.path.basename(deltaOut))
qualPath = os.path.join(runyasraFolder,'Assembly_%s_%s.qual' % (os.path.basename(yasraQuery), os.path.basename(yasraRef)))
sys.argv = ['sumqual.py'] + getOptCmndLine(options,sumqualArgs) + ['--save-run-info'] + [qualPath,deltaPath,nucRef] #sumqual.py [options] <quality file path> <NUCmer file path> <Reference Path>
import sumqual
sumqualOutPath = os.path.join(yasraFolder,os.path.basename(deltaOut) + '.qual')
sys.argv = ['qualtofa.py'] + getOptCmndLine(options,qualtofaArgs) + ['--save-run-info'] + [sumqualOutPath]
import qualtofa
############################################################################################################################
def listToString(List):
out = ''
for item in List:
out += str(item) + ' '
return out[:-1]
###Command Line Record Saving###############################################################################################
cmndLineOut = open('Command_Line_Record.txt', 'w')
strLine = ''
for part in argList:
strLine += str(part) + ' '
strLine = strLine[:-1]
cmndLineOut.write(strLine)
cmndLineOut.close()
#############################################################################################################################
def getNum():
nums = []
for path in os.listdir(os.getcwd()):
baseName = os.path.basename(path)
if baseName.startswith('aligned_data_'):
nums.append(int(baseName[13:]))
if len(nums) > 0:
return max(nums)
else:
return 0
###########################################################################################################################
if pastRuns:
os.chdir(yasraFolder)
nextRunNum = max([int(name[len(multRunSubFolderName) + 1:]) for name in os.listdir(yasraFolder) if os.path.isdir(name) and name.startswith(multRunSubFolderName)]) + 1
multRunSubFolderName = '%s_%s' % (multRunSubFolderName,nextRunNum)
multRunSubFolderPath = os.path.join(yasraFolder,multRunSubFolderName)
nucmerSubFolderPath = os.path.join(yasraFolder,nucmerSubFolderName)
mvFiles(multRunSubFolderName)
os.rename(nucmerSubFolderPath,os.path.join(multRunSubFolderPath,nucmerSubFolderName))
############################################################################################################################