Alignreads.py: Difference between revisions

Latest revision as of 17:04, 22 September 2010

Copy and past the following into a text editor and save it as alignreads.py to use the script.
#!/usr/bin/env python
###Imports / Variable Initilization##########################################################################################
import os, string, sys, time, copy
from optparse import *
from subprocess import *
argList     = sys.argv #argumen list supplied by user
argNum      = len(argList) #the number of arguments supplied
minArgNum   = 1 #the smallest amount of arguments with which it is possible to run the script
debugLog    = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied
cwd         = os.getcwd()
progName, progVersion = ('alignreads.py','2.23')
progUsage = 'python %s [options] <Reads in .fa file> <Reference>  OR...\n       python %s [options] <YASRA folder>' % (progName, progName)
mulitipleRuns = False
yasraSubFolderName = 'YASRA_related_files'
nucmerSubFolderName = 'NUCmer_related_files'
multRunSubFolderName = 'aligned_data'
#############################################################################################################################

def errorExit(message=None,exitStatus=1):
    '''Version 1.0
    Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
    
    if message:
        print '%s: Error: %s' % (progName, message)
    else:
        print progName +': Unknown error, printing debug log...'
        for line in debugLog: print line   #print debug log
    sys.exit(exitStatus)

###Command Line Parser Initilization#########################################################################################
cmndLineParser  = OptionParser(usage=progUsage, version="Version %s" % progVersion)
yasraGroup      = OptionGroup(cmndLineParser, "YASRA-Related Modifiers") #Yasra Modifiers
nucmerGroup     = OptionGroup(cmndLineParser, "NUCmer-Related Modifiers") #Nucmer Modifiers
deltaGroup      = OptionGroup(cmndLineParser, "Delta-Filter-Related Modifiers") #delta-filter Modifiers
sumqualGroup    = OptionGroup(cmndLineParser, "sumqual-Related Modifiers") #sumqual Modifiers'
qualtofaGroup   = OptionGroup(cmndLineParser, "qualtofa-Related Modifiers") #qualtofa Modifiers
maskingGroup    = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.')
#############################################################################################################################

def maskingCallback(option, opt_str, value, parser):
    value = []
    def floatable(str):
        try:
            float(str)
            return True
        except ValueError: return False
    for arg in parser.rargs:         
        if arg[:2] == "--" and len(arg) > 2: break   # stop on --foo like options             
        if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break   # stop on -a, but not on -3 or -3.0
        value.append(arg)
        if len(value) >= 2: break
    if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest)
    del parser.rargs[:len(value)]
    if len(value) == 1: value.append(0)
    value[0] = int(value[0])
    value[1] = float(value[1])
    setattr(parser.values, option.dest, value)

###Basic Option Initilization#######################################################################################################
cmndLineParser.add_option(  "-H",   "--advanced-help",      action="store_true",    default=False,                      dest="advanced_help",                                                                                                           help="Display help information for all supported options (Default: only basic options are shown)")
cmndLineParser.add_option(  "-z",   "--import-options",     action="store",         default="",         type="string",  dest="import_options",                                                          metavar="STRING",                               help="specify the path to a Command_Line_Record.txt fie from a previous run, or the folder that contains one. Any other options used with this one are overwritten. (Default: use options supplied)")
yasraGroup.add_option(      "-d",   "--silent",             action="store_false",   default=True,                       dest="verbose",                                                                                                                 help="Nothing is printed to the screen (Default: print the output of yasra to the screen)")
yasraGroup.add_option(      "-t",   "--read-type",          action="store",         default="solexa",   type="choice",  dest="read_type",           choices=["454","solexa"],                           metavar="454 or solexa",                        help="Specify the type of reads. (Default: solexa)")
yasraGroup.add_option(      "-o",   "--read-orientation",   action="store",         default="circular", type="choice",  dest="read_orientation",    choices=["circular","linear"],                      metavar="circular or linear",                   help="Specify orientation of the sequence. (Default: circular")
yasraGroup.add_option(      "-p",   "--percent-identity",   action="store",         default="same",     type="choice",  dest="percent_identity",    choices=["same","high","medium","low","verylow","desperate"],   metavar="same, high, medium, low or very low",  help="The percent identity (PID in yasra). The settings correspond to different percent values depending on the read type (-t). (Defalt: same)")
yasraGroup.add_option(      "-a",   "--single-step",        action="store_true",    default=False,                      dest="single_step",                                                                                                             help="Activate yasra's single_step option (Default: run yasra normally)")
nucmerGroup.add_option(     "-f",   "--prefix",             action="store",         default="out",      type="string",  dest="prefix",                                                                  metavar="STRING",                               help="Set the output file prefix (Default: out)")
nucmerGroup.add_option(     "-b",   "--break-length",       action="store",         default=200,        type="int",     dest="break_length",                                                            metavar="INT",                                  help="Distance an alignment extension will attempt to extend poor scoring regions before giving up (Default: 200)")
nucmerGroup.add_option(     "-j",   "--alternate-ref",      action="store",         default="",         type="string",  dest="alternate_ref",                                                           metavar="INT",                                  help="Specify a new refence to be used in the rest of the alignment after yasra. (Default: use YASRA's reference)")
deltaGroup.add_option(      "-y",   "--min-identity",       action="store",         default=80,         type="int",     dest="min_identity",                                                            metavar="INT",                                  help="Set the minimum alignment identity [0, 100], (Default: 80)")
deltaGroup.add_option(      "-l",   "--min-align-length",   action="store",         default=100,        type="int",     dest="min_align_length",                                                        metavar="INT",                                  help="Set the minimum alignment length (Default: 100)")
#sumqualGroup.add_option(    "-e",   "--dont-extend",        action="store_false",   default=True,                       dest="extend",                                                                                                                  help="Dont include parts of contigs that extend past the start and end of the reference in the consensus (Default: extend contigs)")
qualtofaGroup.add_option(   "-c",   "--exclude-contigs",    action="store_false",   default=True,                       dest="include_contigs",                                                                                                         help="Dont include each contig on its own line (Default: include contigs)")
qualtofaGroup.add_option(   "-i",   "--no-match-overlap",  action="store_true",    default=False,                                          help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
qualtofaGroup.add_option(   "-e",   "--no-overlap",         action="store_true",    default=False,                                         help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
qualtofaGroup.add_option(   "-k",   "--keep-contained",     action="store_true",    default=False,                      dest="keep_contained",                                                                                                          help="Include contained contigs (Defalt: save sequences of contained contigs to a separate file)")
qualtofaGroup.add_option(   "-q",   "--end-trim-qual",      action="store",         default=0,          type="int",     dest="end_trim_qual",                                                           metavar="INT",                                  help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount (Default: 0)")
qualtofaGroup.add_option(   "-s",   "--dont-save-SNPs",     action="store_false",   default=True,                       dest="save_SNPs",                                                                                                               help="Dont save SNPs to a .qual file(Default: Save SNP file)")
maskingGroup.add_option(   "-m",   "--mask-contigs",       action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contigs',      help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Cannot be used with the -c modifier.(Default: 0, 0)")
maskingGroup.add_option(   "-n",   "--mask-contig-SNPs",   action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contig_SNPs',  help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Cannot be used with the -c modifier.(Default: 0, 0)")
maskingGroup.add_option(   "-w",   "--mask-consensus",     action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_consensus',    help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
maskingGroup.add_option(   "-x",   "--mask-SNPs",          action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_SNPs',         help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
#############################################################################################################################

###Simple Help Menu##########################################################################################################
if len(argList) == 1: #if no arguments are supplied
    cmndLineParser.add_option_group(yasraGroup)
    cmndLineParser.add_option_group(nucmerGroup)
    cmndLineParser.add_option_group(deltaGroup)
    #cmndLineParser.add_option_group(sumqualGroup)
    cmndLineParser.add_option_group(qualtofaGroup)
    cmndLineParser.add_option_group(maskingGroup)
    cmndLineParser.print_help()
    sys.exit(0)
#############################################################################################################################

###Advanced Option Initilization#############################################################################################
cmndLineParser.add_option(  "-Z",   "--debug",              action="store_true",    default=False,                      dest="debug",                                                                                                               help="Save the debug log to the current working directory. (Default: dont save)")
yasraGroup.add_option(      "-E",   "--external-makefile",  action="store",         default=False,      type="string",  dest="external_makefile",                                                   metavar="FILEPATH",                             help="Specify path to external makefile used by YASRA. (Default: use the makefile built in to runyasra)")
yasraGroup.add_option(      "-Q",   "--no-dot-replace-reads",action="store_false",  default=True,                       dest="dot_replace_reads",                                                                                                   help="Do NOT replace N's with dots (.) in the microreads file before running yasra/ (Default: replace dots)")
yasraGroup.add_option(      "-I",   "--no-dos2unix-ref",    action="store_false",   default=True,                       dest="dos2unix_ref",                                                                                                        help="Do NOT run dos2unix on the reference before running yasra/ (Default: run dos2unix)")
nucmerGroup.add_option(     "-A",   "--anchor-uniqueness",  action="store",         default="ref",      type="choice",  dest="anchor_uniqueness",   choices=["mum","ref","max"],                    metavar="mum, ref, or max",                     help="Specify how NUCmer chooses anchor matches using one of three settings: mum = Use anchor matches that are unique in both the reference and query, ref =  Use anchor matches that are unique in the reference but not necessarily unique in the query, max = Use all anchor matches regardless of their uniqueness. (Default = ref)")
nucmerGroup.add_option(     "-T",   "--min-cluster",        action="store",         default=65,         type="int",     dest="min_cluster",                                                         metavar="INT",                                  help="Minimum cluster length used in the NUCmer analysis. (Default: 65)")
nucmerGroup.add_option(     "-D",   "--diag-factor",        action="store",         default=0.12,       type="float",   dest="diag_factor",                                                         metavar="FLOAT",                                help="Maximum diagonal difference factor for clustering, i.e. diagonal difference / match separation used by NUCmer. (Default: 0.12)")
nucmerGroup.add_option(     "-J",   "--no-extend",          action="store_true",    default=False,                      dest="no_extend",                                                                                                           help="Prevent alignment extensions from their anchoring clusters but still align the DNA between clustered matches in NUCmer. (Default: extend)")
nucmerGroup.add_option(     "-F",   "--forward-only",       action="store_true",    default=False,                      dest="forward_only",                                                                                                        help="Align only the forward strands of each sequence. (Default: forward and reverse)")
nucmerGroup.add_option(     "-X",   "--max-gap",            action="store",         default=90,         type="int",     dest="max_gap",                                                             metavar="INT",                                  help="Maximum gap between two adjacent matches in a cluster. (Default: 90)")             
nucmerGroup.add_option(     "-M",   "--min-match",          action="store",         default=20,         type="int",     dest="min_match",                                                           metavar="INT",                                  help="Minimum length of an maximal exact match. (Default: 20)")
nucmerGroup.add_option(     "-C",   "--coords",             action="store_true",    default=False,                      dest="coords",                                                                                                              help="Automatically generate the <prefix>.coords file using the 'show-coords' program with the -r option. (Default: dont)")
nucmerGroup.add_option(     "-O",   "--no-optimize",        action="store_true",    default=False,                      dest="no_optimize",                                                                                                         help="Toggle alignment score optimization. Setting --nooptimize will prevent alignment score optimization and result in sometimes longer, but lower scoring alignments (default: optimize)")
nucmerGroup.add_option(     "-S",   "--no-simplify",        action="store_true",    default=False,                      dest="no_simplify",                                                                                                         help="Simplify alignments by removing shadowed clusters. Turn this option off if aligning a sequence to itself to look for repeats. (Default: simplify)")
deltaGroup.add_option(      "-K",   "--max-overlap",        action="store",         default=100,        type="float",   dest="max_overlap",                                                         metavar="FLOAT",                                help="Set the maximum alignment overlap for -r and -q options as a percent of the alignment length [0, 100]. (Default 100)")
deltaGroup.add_option(      "-B",   "--query-alignment",    action="store_true",    default=False,                      dest="query_alignment",                                                                                                     help="Query alignment using length*identity weighted LIS. For each query, leave only the alignments which form the longest consistent set for the query. (Defualt: global alignment)")
deltaGroup.add_option(      "-R",   "--ref-alignment",      action="store_true",    default=False,                      dest="ref_alignment",                                                                                                       help="Reference alignment using length*identity weighted LIS. For each reference, leave only the alignments which form the longest consistent set for the reference. (Defualt: global alignment)")
deltaGroup.add_option(      "-G",   "--global-alignment",   action="store_false",   default=True,                       dest="global_alignment",                                                                                                    help="Global alignment using length*identity weighted LIS (longest increasing subset). For every reference-query pair, leave only the alignments which form the longest mutually consistent set. (this is the default)")
deltaGroup.add_option(      "-U",   "--min-uniqueness",     action="store",         default=0,          type="float",   dest="min_uniqueness",                                                      metavar="FLOAT",                                help="Set the minimum alignment uniqueness, i.e. percent of the alignment matching to unique reference AND query sequence [0, 100]. (Default 0)")
sumqualGroup.add_option(    "-Y",   "--save-ref-dels",      action="store_true",    default=False,                      dest="save_ref_dels",                                                                                                       help="Save the sequence of the reference that corresponds to empty gaps in the consensus in a fasta file. (Default: dont save)")
#sumqualGroup.add_option(    "-V",   "--remove-ref-dels",    action="store_true",    default=False,                      dest="remove_ref_dels",                                                                                                     help="Only include portions of the reference that are represented by contig quality values (i.e. no empty bases in the consensus). (Defualt: included all of the reference)")
qualtofaGroup.add_option(   "-W",   "--dont-align-contigs", action="store_true",    default=False,                      dest="dont_align_contigs",                                                                                                  help="Do NOT align contigs to the reference using '-'s at the start of each contig; independent of the consensus. (Default: align contigs)")
#qualtofaGroup.add_option(   "-P",   "--only-matching",      action="store_true",    default=False,                      dest="only_matching",                                                                                                       help="Only include regions of the contigs that NUCmer identifies as matching (Default: include non-matching regions)")
qualtofaGroup.add_option(   "-N",   "--end-trim-num",       action="store",         default=0,          type="int",     dest="end_trim_num",                                                        metavar="INT",                                  help="Trim the ends of the contigs by the specified number of bases. (Default: 0)")
qualtofaGroup.add_option(   "-L",   "--min-match-length",   action="store",         default=50,         type="int",     dest="min_match_length",                                                    metavar="INT",                                  help="Set minimum length of the matching region of the contigs. (Default: 50)")
#############################################################################################################################

def strCmndLine(cmndLine):
    out = ''
    for part in cmndLine:
        out += str(part) + ' '
    return out[:-1]

def argNum(cmndLine):
    out = 0
    for part in cmndLine:
        if part[0] != '-':
            out += 1
    return out

###Command Line Parsing and Advanced Help####################################################################################
cmndLineParser.add_option_group(yasraGroup)
cmndLineParser.add_option_group(nucmerGroup)
cmndLineParser.add_option_group(deltaGroup)
cmndLineParser.add_option_group(sumqualGroup)
cmndLineParser.add_option_group(qualtofaGroup)
cmndLineParser.add_option_group(maskingGroup)
runyasraArgs = {'read_type' : '--read-type', 'read_orientation' : '--orientation', 'percent_identity' : '--percent-identity', 'single_step' : '--single-step', 'dot_replace_reads' : '--remove-dots-reads', 'external_makefile' : '--make-path', 'verbose' : '--verbose', 'dos2unix_ref' : '--dos2unix-ref'}
nucmerArgs = {'break_length' : '--breaklen', 'min_cluster' : '--mincluster', 'diag_factor' : '--diagfactor', 'no_extend' : '--[no]extend', 'forward_only' : '--forward','max_gap' : '--maxgap', 'coords' : '--coords', 'no_optimize' : '--[no]optimize', 'prefix' : '--prefix', 'no_simplify' : '--[no]simplify'}
deltaArgs = {'global_alignment' : '-g','min_identity' : '-i','min_align_length' : '-l','query_alignment' : '-q','ref_alignment' : '-r','min_uniqueness' : '-u','max_overlap' : '-o'}
sumqualArgs = {'save_ref_dels' : '--save-gaps', 'verbose' : '--verbose'}
qualtofaArgs = {'verbose' : '--verbose', 'include_contigs' : '-c','mask_contigs' : '--mask-contigs','mask_contig_SNPs' : '--mask-contig-SNPs', 'dont_align_contigs' : '--dont-align-contigs', 'no_match_overlap' : '--no-match-overlap','no_overlap' : '--no-overlap', 'mask_consensus' : '--mask-consensus', 'mask_SNPs' : '--mask-SNPs', 'keep_contained' : '--keep-contained', 'end_trim_num' : '--end-trim', 'end_trim_qual' : '--end-trim-qual', 'min_match_length' : '--min-match-length', 'save_SNPs' : '--save-SNPs'}
(options, args) = cmndLineParser.parse_args(argList[1:])
if len(args) > 2:
    errorExit('Too many argments supplied. alignreads.py takes either 1 directory or 2 files as arguments. %s arguments supplied...' % str(len(args)))
if options.advanced_help:
    cmndLineParser.print_help()
    sys.exit(0)
############################################################################################################################

###Option import modifier###################################################################################################
if len(options.import_options) > 0 and os.path.exists(options.import_options):    
    if os.path.isdir(options.import_options):
        recordPath = options.import_options + '/Command_Line_Record.txt'
    elif os.path.exists(options.import_options) and options.import_options[-23:] == 'Command_Line_Record.txt':
        recordPath = options.import_options
    else:
        errorExit('incorrect path to command line record') 
    record = open(recordPath)
    newCmndLine = record.readline()
    argList = newCmndLine.split()
    (options, notArgs) = cmndLineParser.parse_args(argList[1:])
    record.close()
############################################################################################################################

def getProgPath(progName):
    prevCWD = os.getcwd()
    allPaths = os.environ['PATH'].split(':')
    matchs = []
    for dirPath in allPaths:
        path = dirPath + '/' + progName
        if os.path.exists(path):
            matchs.append([os.stat(path).st_mtime,path])
    matchs.sort()
    matchs.reverse()
    if len(matchs) >= 1:
        return matchs[0][1]
    else:
        return -1
            
def getOptCmndLine(args,converterKey):
    out = []
    for equivalancy in converterKey.iteritems(): #equivalancy = (alignreads option, yasra option) 
        option = getattr(args, equivalancy[0])
        if option != False: #if the option is a boolean and is the default value, therefore not nessesary (always false in this script)
            out.append(str(equivalancy[1]))
            if option != True:
                if type(option) != list: option = [option]
                for opt in option: out.append(str(opt))
    return out

def mvFiles(folder):
    '''takes all of the files in the current working directory and moves them to a new directory within it caled folder'''
    fileList = os.listdir(os.getcwd())
    os.mkdir(folder)
    for path in fileList:
        if os.path.isdir(path) == False:
            old = './' + path
            new = './' + os.path.basename(folder) + '/' + path
            os.rename(old,new)

###YASRA####################################################################################################################
#yasraPath = getProgPath('runyasra') 
runyasraFolder = ''
pastRuns = False #see end of script
if len(args) == 2: #if runyasra is to be used...
    yasraQuery = os.path.join(cwd,args[0])
    yasraRef   = os.path.join(cwd,args[1])
    runyasraCmndLine = ['runyasra.py'] + getOptCmndLine(options,runyasraArgs) + [yasraQuery,yasraRef]
    sys.argv = runyasraCmndLine
    if options.verbose: print 'alignreads: implimenting YASRA with runyasra.py using the following command line: %s' % ' '.join(runyasraCmndLine)
    import runyasra
    yasraFolder = runyasra.outDirPath
    os.chdir(yasraFolder)
    mvFiles(yasraSubFolderName)
    runyasraFolder = os.path.join(yasraFolder,yasraSubFolderName)
elif len(args) == 1 and os.path.isdir(args[0]): #if the user supplied the folder of a previous yasra run, insead of running it again
    yasraFolder = os.path.join(cwd,args[0])
    runyasraFolder = os.path.join(yasraFolder,yasraSubFolderName)
    os.chdir(yasraFolder)
    if os.path.exists(runyasraFolder): #if the full pipeline had been run on the yasra data
        pastRuns = True #see end of script
        makePath = os.path.join(runyasraFolder, 'Makefile')
        makefileHandle = open(makePath, 'r')
        nucmerSubFolderPath = os.path.join(yasraFolder,nucmerSubFolderName)
        if os.path.exists(nucmerSubFolderPath): #if the input directory has gone through the pipeline exactly once before
            mvFiles(multRunSubFolderName + '_1')
            newNSFP = os.path.join(yasraFolder,multRunSubFolderName + '_1',nucmerSubFolderName)
            os.rename(nucmerSubFolderPath,newNSFP)
            nucmerSubFolderPath = newNSFP
        if options.verbose: 
            nextRunNum = max([int(name[len(multRunSubFolderName) + 1:]) for name in os.listdir(yasraFolder) if os.path.isdir(name) and name.startswith(multRunSubFolderName)]) + 1
            print 'alignreads: Using previous alignreads output at %s. Saving new ouput to %s.' % (yasraFolder,os.path.join(yasraFolder,multRunSubFolderName + '_' + str(nextRunNum)))
    else:
        if options.verbose: print 'alignreads: Using previous yasra ouput at %s (New data will be saved within this folder).' % yasraFolder
        makePath = os.path.join(os.getcwd(),'Makefile')
        makefileHandle = open(makePath, 'r')
        mvFiles(yasraSubFolderName)
    makefile = makefileHandle.readlines()
    makefileHandle.close()
    for line in makefile: #the makefile used in the previously run yasra folder is searched to find the names of the input files
        if line.find('READS=') == 0:
            yasraQuery = line.strip().replace('READS=','')
        elif line.find('TEMPLATE=') == 0:
            yasraRef = line.strip().replace('TEMPLATE=','')
            break
    try: yasraQuery = runyasraFolder + '/' + os.path.basename(yasraQuery)
    except NameError: errorExit('Unable to find path to read file from yasra Makefile at %s' % makePath)
    try: yasraRef = runyasraFolder + '/' + os.path.basename(yasraRef)
    except NameError:  errorExit('Unable to find path to reference file from yasra Makefile at %s' % makePath)
############################################################################################################################

def printProcess(process):
    '''Prints the standard ouput and error messages of a process while the process ia running.'''
    outLen = 0
    errLen = 0
    out = ''
    err = ''
    while process.poll() is None:  #while it hasnt finished...
        (outData, errData) = process.communicate()
        outDiff =  len(outData) - outLen
        errDiff =  len(errData) - errLen
        if outDiff > 0:
            outLen += outDiff
            out += outData[-outDiff:]
            print outData[-outDiff:]
        if errDiff > 0:
            errLen += errDiff
            err += errData[-errDiff:]
            print errData[-errDiff:]
    return (out, err)

###NUCmer###################################################################################################################
if options.alternate_ref != "":
    nucRef = options.alternate_ref
else:
    nucRef = yasraRef
nucmerPath = getProgPath('nucmer')
nucQuery = os.path.join(runyasraFolder,'Final_Assembly_%s_%s' % (os.path.basename(yasraQuery), os.path.basename(yasraRef)))
nucmerCmndLine = [nucmerPath] + getOptCmndLine(options,nucmerArgs) + [nucRef,nucQuery] #USAGE: nucmer  [options]  <Reference>  <Query>
nucmerStdOut = open(os.path.join(yasraFolder,'nucmer_standard_output.txt'), 'w')
nucmerStdErr = open(os.path.join(yasraFolder,'nucmer_standard_error.txt'), 'w')
nucmerProcess = Popen(nucmerCmndLine, stdout=PIPE, stderr=PIPE)
if options.verbose:
    (nucOutData, nucErrData) = printProcess(nucmerProcess)
else:
    (nucOutData, nucErrData) = nucmerProcess.communicate()
nucmerProcess.wait()
nucmerStdOut.write(nucOutData)
nucmerStdOut.write(nucErrData)
nucmerStdOut.close()
nucmerStdOut.close()
if nucmerProcess.returncode != 0: errorExit('Nucmer returned a non-zero code; it may have not completed successfully')
############################################################################################################################

###delta-filter#############################################################################################################
deltaPath = getProgPath('delta-filter')
nucOut = os.path.join(yasraFolder,options.prefix + '.delta')
deltaCmndLine = [deltaPath] + getOptCmndLine(options,deltaArgs) + [nucOut] #USAGE: delta-filter  [options]  <deltafile>
deltaOut = os.path.join(yasraFolder,options.prefix + '_filtered.delta')
filtered = open(deltaOut, 'w')
if call(deltaCmndLine, stdout=filtered) != 0: errorExit('delta-filter returned a non-zero code; it may have not completed successfully')
filtered.close()
mvFiles(nucmerSubFolderName)
############################################################################################################################

###sumqual and qualtofa#####################################################################################################
deltaPath = os.path.join(yasraFolder,nucmerSubFolderName,os.path.basename(deltaOut))
qualPath = os.path.join(runyasraFolder,'Assembly_%s_%s.qual' % (os.path.basename(yasraQuery), os.path.basename(yasraRef)))
sys.argv = ['sumqual.py'] + getOptCmndLine(options,sumqualArgs) + ['--save-run-info'] + [qualPath,deltaPath,nucRef] #sumqual.py [options] <quality file path> <NUCmer file path> <Reference Path>
import sumqual
sumqualOutPath = os.path.join(yasraFolder,os.path.basename(deltaOut) + '.qual')
sys.argv = ['qualtofa.py'] + getOptCmndLine(options,qualtofaArgs) + ['--save-run-info'] + [sumqualOutPath]
import qualtofa
############################################################################################################################

def listToString(List):
    out = ''
    for item in List:
        out += str(item) + ' '
    return out[:-1]

###Command Line Record Saving###############################################################################################
cmndLineOut = open('Command_Line_Record.txt', 'w')
strLine = ''
for part in argList:
    strLine += str(part) + ' '
strLine = strLine[:-1]
cmndLineOut.write(strLine)
cmndLineOut.close()
#############################################################################################################################

def getNum():
    nums = []
    for path in os.listdir(os.getcwd()):
        baseName = os.path.basename(path)
        if baseName.startswith('aligned_data_'):
            nums.append(int(baseName[13:]))
    if len(nums) > 0:
        return max(nums)
    else:
        return 0
    
###########################################################################################################################    
if pastRuns:
    os.chdir(yasraFolder)   
    nextRunNum = max([int(name[len(multRunSubFolderName) + 1:]) for name in os.listdir(yasraFolder) if os.path.isdir(name) and name.startswith(multRunSubFolderName)]) + 1
    multRunSubFolderName = '%s_%s' % (multRunSubFolderName,nextRunNum)
    multRunSubFolderPath = os.path.join(yasraFolder,multRunSubFolderName)
    nucmerSubFolderPath = os.path.join(yasraFolder,nucmerSubFolderName)
    mvFiles(multRunSubFolderName)
    os.rename(nucmerSubFolderPath,os.path.join(multRunSubFolderPath,nucmerSubFolderName))
############################################################################################################################
Alignreads.py: Difference between revisions

Latest revision as of 17:04, 22 September 2010

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools