Qualtofa.py: Difference between revisions

Latest revision as of 12:11, 8 September 2010

Copy and past the following into a text editor and save it as qualtofa.py to use the script.

#!/usr/bin/env python

###Imports / Variable Initilization##########################################################################################
import os, string, sys, time, copy
from datetime import *
from optparse import *
from subprocess import *
commandLine = copy.deepcopy(sys.argv)
cwd = os.getcwd()
timeStamp = datetime.now().ctime().replace(' ','-').replace(':','-').replace('--','-')  #Ex: 'Mon-Jun-14-11-08-55-2010'
cntgMatchRanges = None
progName, progVersion = ('qualtofa.py','2.52')
progUsage = 'python %s [options] <.qual file>' % progName
printLog = []
#############################################################################################################################

def errorExit(message=None,exitStatus=1):
    '''Version 1.0
    Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
    
    if message:
        print '%s: Error: %s' % (progName, message)
    else:
        print progName +': Unknown error, printing debug log...'
        for line in debugLog: print line   #print debug log
    sys.exit(exitStatus)

def mkRange(numList):
    '''Function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)'''
    outList = []
    if len(numList) > 0:
        start   = numList[0]
        current = numList[0]
        numList.append(numList[-1])
        for num in numList[1:]:
            if (num - 1) != current:
                outList.append([start,current])
                start = num
            current = num
    return outList

def readLFile(filePath, numLinesToRead = None):
    fileHandle = open(filePath, 'r')
    if numLinesToRead is None:
        fileData = fileHandle.readlines()
    else:
        fileData = [fileHandle.readline() for count in range(0,numLinesToRead)]
    fileHandle.close()
    return fileData

def writeLFile(filePath, fileData):
    fileHandle = open(filePath, 'w')
    for line in fileData:
        fileHandle.write(fileData)
    fileHandle.close()

def saveQualStandard(filePath, contigs, qualValues, headers, schema=None):
    if schema is None: schema = ['Position in the contig(1 based)', 'Called base', "Number of reads suppoting 'A'", "Number of reads supporting 'C'", "Number of reads supporting 'G'", "Number of reads supporting 'T'", "Number of reads supporting '_'", 'Coverage']
    if len(contigs) == 0: errorExit('saveQualStandard: empty list supplied')
    if len(contigs) != len(qualValues) or len(contigs) != len(headers): errorExit('saveQualStandard: Unequal number of entries is input list')
    outHandle = open(filePath, 'w')
    outHandle.write('Schema:'+ '\n' + '\t'.join(schema) + '\n')
    for cIndex in range(0,len(contigs)):
        outHandle.write(headers[cIndex] + '\n')
        for bIndex in range(0,len(contigs[cIndex])): outHandle.write('\t'.join([contigs[cIdnex][bIndex]] + qualValues[cIndex]) + '\n')
    outHandle.close()
            
def parseQualStandard(filePath):
    columbWidth = 8
    fileData = [line.strip().split('\t') for line in readLFile(filePath)]   #fileData is a list of lists representing the tab-deliminated structure of the .qual file      
    if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
    schema = fileData[0]   #extract the tab-deliminated schema into a list
    headers, contigs, qualValues = ([], [], [])   #initilizes multiple lists at a time
    index = 0
    for line in fileData[1:]:
        if len(line) == 1:   #if it is a header
            headers.append(line[0])
            contigs.append([])
            qualValues.append([])
        elif len(line) == columbWidth:   #if it is  quality value
            contigs[-1].append(line[1])
            qualValues[-1].append([int(num) for num in line[2:]])   #converts all of the quality values into integer objects and saves them in qualvalues
        else: errorExit('parseQualStandard: Quality file at %s deviated from the expected format on line %d.' % (filePath, index))
        index += 1        
    return (contigs, qualValues, headers, schema)

def parseQualAlignment(filePath):
    columbWidth = 9
    headColNum = 2
    fileData = [[part.strip() for part in line.split('\t')] for line in readLFile(filePath)]   #fileData is a list of lists representing the tab-deliminated structure of the .qual file  
    if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
    schema = fileData[0]   #extract the tab-deliminated schema into a list
    refHeader = fileData[1][0]
    reference = [line[1] for line in fileData[2:]]   #extracts the reference from the second columb
    qualData = [line[headColNum:] for line in fileData[2:]]   #removes the first lines and columbs of fileData, which dont contain unextracted data
    headers, contigs, cntgStarts, qualValues = ([], [], [], [])   #initilizes multiple lists at a time
    refPos = 0
    for line in qualData:
        if len(line) > 0:
            for index in range(0,len(line),columbWidth):
                if headers.count(line[index]) == 0:   #if the contig header has not been encountered before
                    headers.append(line[index])
                    cntgStarts.append(refPos)
                    contigs.append([])
                    qualValues.append([])
                headIndex = headers.index(line[index])
                contigs[headIndex].append(line[index + 2])   #append the called base character to its contig
                qualValues[headIndex].append([int(num) for num in line[index + 3: index + columbWidth]])   #converts all of the quality values into integer objects and saves them in qualvalues 
        refPos += 1
    alignmentIndexs = [[cntgStarts[index], cntgStarts[index] + len(contigs[index]) - 1] for index in range(0,len(cntgStarts))]
    return (contigs, qualValues, headers, alignmentIndexs, reference, refHeader, schema)

def parseQual(filePath):
    columbWidth = 9
    headColNum = 2
    fileHead = readLFile(filePath, 2)
    if fileHead[0].strip() == 'Schema:': del fileHead[0]   #removes the unessesary first line of the file, if present
    schema = fileHead[0].strip().split('\t')
    if schema[1] == 'Reference' and len(schema) % columbWidth == headColNum:
        return parseQualAlignment(filePath)
    elif len(schema) == 8:
        return parseQualStandard(filePath)
    else: errorExit('parseQual: The quality file at %s has an unreconized format based on the schema on line 2.' % filePath)

def listToStr(aList):
    out = ''
    for char in aList: out += char
    return out

def saveFasta(filePath, headers, sequences):
    outHandle = open(filePath, 'w')
    if len(headers) != 0 and len(sequences) != 0:
        if len(headers) != len(sequences): errorExit('saveFastaFile: different number of headers and sequences')
        if type(headers) == str and type(sequences) == str:
            headers = [headers]
            sequences = [sequences]
        if type(sequences[0]) == list: sequences = [listToStr(seq) for seq in sequences]
        for index in range(0,len(headers)):
            outHandle.write('>%s\n' % headers[index])
            if type(sequences[index]) == str: sequences[index] = [sequences[index]]  #if there is only one line of sequence for a given header, make it a one-item list for the next loop
            for seq in sequences[index]:
                outHandle.write(seq + '\n')
    outHandle.close()

def parseFasta(filePath):
    headers = []
    seqs = []
    fileHandle = open(filePath, 'r')
    fileData = fileHandle.readlines() + ['>']   #the additional header line is there make to following for loop process the last sequence 
    fileHandle.close()
    currSeq = ''
    for line in fileData:
        if line[0] == '>':   #if the line is a fasta header
            headers.append(line[1:].strip() )   #the fisrt character is the '>' and the last is a newline '\n'
            seqs.append(currSeq)
            currSeq = ''
        else:
            currSeq += line.strip()
    return (headers[:-1], seqs[1:])

def removeRepeats(aList):
    for entry in aList:
        if aList.count(entry) > 1:
            aList.remove(entry)
            return removeRepeats(aList)
    return aList

def maskingCallback(option, opt_str, value, parser):
    value = []
    def floatable(str):
        try:
            float(str)
            return True
        except ValueError: return False
    for arg in parser.rargs:         
        if arg[:2] == "--" and len(arg) > 2: break   # stop on --foo like options             
        if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break   # stop on -a, but not on -3 or -3.0
        value.append(arg)
        if len(value) == 2: break 
    if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest)
    del parser.rargs[:len(value)]
    if len(value) == 1: value.append(0)
    value[0] = int(value[0])
    value[1] = float(value[1])
    if value[0] < 0: errorExit("The first argument of the option '%s' must be a positive integer; %d supplied." % (option.dest,value[0]))
    if value[1] < 0 or value[1] > 1: errorExit("The second argument of the option '%s' must be a decimal between 0 and 1; %f supplied." % (option.dest,value[1]))
    setattr(parser.values, option.dest, value)

###Command Line Parser#######################################################################################################
cmndLineParser  = OptionParser(usage=progUsage, version="Version %s" % progVersion)
maksingGroup = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.')
cmndLineParser.add_option(   "-c",   "--include-contigs",    action="store_true",    default=False,                                                             help="Include each contig on its own line. (Default: only output the consensus)")
maksingGroup.add_option(   "-m",   "--mask-contigs",       action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contigs',      help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
maksingGroup.add_option(   "-s",   "--mask-contig-SNPs",   action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contig_SNPs',  help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
maksingGroup.add_option(   "-M",   "--mask-consensus",     action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_consensus',    help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
maksingGroup.add_option(   "-S",   "--mask-SNPs",          action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_SNPs',         help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
cmndLineParser.add_option_group(maksingGroup)
cmndLineParser.add_option(   "-t",   "--end-trim",           action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs by a specified number of bases. (Default: 0)")
cmndLineParser.add_option(   "-q",   "--end-trim-qual",      action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount. This option is implemented after standard end trimming (see -t option). (Default: 0)")
cmndLineParser.add_option(   "-l",   "--min-match-length",   action="store",         default=0,          type="int",    metavar="INT",      help="Only include contigs that have a least one match longer or as long as the specified amount. This is implemented after any end trimming (see -q and -t options). (Default: 0)")
cmndLineParser.add_option(   "-n",   "--no-overlap",         action="store_true",    default=False,                                         help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
cmndLineParser.add_option(   "-a",   "--no-match-overlap",   action="store_true",    default=False,                                          help="Add deletions to the reference to accommodate overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
cmndLineParser.add_option(   "-k",   "--keep-contained",     action="store_true",    default=False,                                         help="Include contained contigs in the alignment and consensus. (Default: save sequences of contained contigs to a separate file)")
cmndLineParser.add_option(   "-p",   "--save-SNPs",          action="store_true",    default=False,                                         help="Save SNPs to a .qual file. (Default: Do not save SNP file)")
cmndLineParser.add_option(   "-d",   "--dont-align-contigs", action="store_true",    default=False,                                         help="Do not align contigs to the reference. Requires the use of the -c modifier. (Default: align contigs to the reference)")
cmndLineParser.add_option(   "-r",   "--external-ref",       action="store",         default=None,       type="string", metavar="PATH",     help="Specify a reference to use instead of the one contained in the quality file.")
cmndLineParser.add_option(   "-v",   "--verbose",            action="store_true",    default=False,                                         help="Print progress updates and relevant statistics to the standard output. (Default: run silently)")
cmndLineParser.add_option(   "-i",   "--save-run-info",      action="store_true",    default=False,     help="Save a txt file of anything printed to the screen. Requires the use of -v/--verbose. (Default: Do not save)")
(options, args) = cmndLineParser.parse_args(commandLine)
if options.include_contigs == False:
    if sum(options.mask_contigs) > 1: errorExit('The option -m/--mask-contigs requires the use of the option -c/--include-contigs.')
    if sum(options.mask_contig_SNPs) > 1: errorExit('The option -s/--mask-contig-SNPs requires the use of the option -c/--include-contigs.')
    if options.dont_align_contigs: errorExit('The option -d/--dont-align-contigs requires the use of the option -c/--include-contigs.')
if len(args) == 1:
    cmndLineParser.print_help()
    sys.exit(0)
argNum = len(args) - 1   #counts the amount of arguments, negating the 'qualtofa' at the start of the command line
if argNum != 1: errorExit('qualtofa takes exactly 1 argument; %d supplied' % argNum, 0)
qualPath = args[1]   #The file path to the input file supplied by the user
#############################################################################################################################

def verbose(toPrint):
    if options.verbose:
        print toPrint,
        printLog.append(toPrint)

###Quality file parsing######################################################################################################
verbose('qualtofa: Loading data from %s...' % qualPath)
qualData = parseQual(qualPath)
verbose('Complete\n')
if len(qualData) == 4:   #if the quality file contains a list of unaligned contigs (e.g. one directly from yasra)
    isAlignment = False
    if options.no_match_overlap: errorExit('The option -a/--no-match-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_consensus): errorExit('The option -M/--mask-consensus requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_SNPs): errorExit('The option -S/--mask-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_contig_SNPs): errorExit('The option -s/--mask-contig-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.keep_contained: errorExit('The option -k/--keep-contained requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.min_match_length: errorExit('The option -l/--min-match-length requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.save_SNPs: errorExit('The option -p/--save-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.no_overlap: errorExit('The option -n/--no-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.external_ref: errorExit('The option -r/--external-ref requires an aligned quality consensous file, like that given by sumqual.py.')
    contigs, qualities, headers, schema = qualData
elif len(qualData) == 7:   #if the quality file contains an alignment (i.e. from sumqual)
    isAlignment = True
    contigs, qualities, headers, alignmentIndexs, reference, refHeader, schema = qualData
else: errorExit()
#############################################################################################################################

###External reference parsing(-r option implimintation)######################################################################
if options.external_ref is not None:
    verbose('qualtofa: option -r/--external-ref: loading reference at %s...' % options.external_ref)
    (seqs,headers) = parseFasta(options.external_ref)
    reference, refHeader = seqs[0], headers[0]
    verbose('Complete\n')
#############################################################################################################################

def delContig(index):
    del contigs[index]
    del headers[index]
    del qualities[index]
    if isAlignment:
        del alignmentIndexs[index]
        if cntgMatchRanges is not None: del cntgMatchRanges[index]

def delContigs(indexes):
    if len(indexes) > 0:    
        indexes.sort()
        indexes.reverse()
        indexes = removeRepeats(indexes)
        for index in indexes: delContig(index)   #deletes all contigs

def delBase(cntgIndex, baseIndex):
    del qualities[cntgIndex][baseIndex]
    del contigs[cntgIndex][baseIndex]
    if isAlignment:
        if baseIndex == 0: alignmentIndexs[cntgIndex][0] += 1
        else: alignmentIndexs[cntgIndex][1] -= 1 

def delBases(cntgIndex, baseIndexes):
    baseIndexes.sort()
    baseIndexes.reverse()
    for index in baseIndexes: delBase(cntgIndex, index)

###Contig end Trimming (-q and -t option implimentation)#####################################################################
if options.end_trim > 0:
    verbose('qualtofa: option -t/--end-trim: Trimming %d bases from both ends of each contig...' % options.end_trim)
    rejectIndexes = [index for index in range(0,len(contigs)) if len(contigs[index]) <= options.end_trim * 2]   #makes a list of all the indexs of contigs that are too short to be trimmed. 
    delContigs(rejectIndexes)   #deletes all rejected contigs
    contigs = [cntg[options.end_trim : -options.end_trim] for cntg in contigs]   #trims all remaining contigs
    qualities = [qual[options.end_trim : -options.end_trim] for qual in qualities]   #trims all remaining contigs
    alignmentIndexs = [[indexRange[0] + options.end_trim,indexRange[1] - options.end_trim]  for indexRange in alignmentIndexs]
    if options.verbose:
        verbose('Complete\n')
        if len(rejectIndexes) > 0: verbose('option -t/--end-trim: %d contig(s) were removed from the analysis because they were shorter than twice the trim length of %d.\n' % (len(rejectIndexes),options.end_trim))

if options.end_trim_qual > 0:
    verbose('qualtofa: option -q/--end-trim-qual: Trimming contig ends until a base with a coverage of %d or more is reached...' % options.end_trim_qual)
    for index in range(0, len(contigs)):
        while len(qualities[index]) > 0 and qualities[index][0][-1] < options.end_trim_qual: delBase(index,0)
        while len(qualities[index]) > 0 and qualities[index][-1][-1] < options.end_trim_qual: delBase(index,-1)
    rejectIndexes = [index for index in range(0,len(qualities)) if len(qualities[index]) == 0] #makes a list of all the indexs of contigs that dont contain sequence after trimming
    delContigs(rejectIndexes)   #deletes all rejected contigs
    if options.verbose:
        verbose('Complete\n')
        if len(rejectIndexes) > 0: verbose('option -q/--end-trim-qual: %d contig(s) removed from the analysis because all of their sequence had quality values less than %d.\n' % (len(rejectIndexes),options.end_trim_qual))
#############################################################################################################################

###Contigs Match Coordinate Extraction (-l option implimentation)############################################################
if isAlignment:
    cntgMatchRanges = [mkRange([index for index in range(0,len(cntg)) if cntg[index].isupper()]) for cntg in contigs]
    if options.min_match_length > 0:
        verbose('qualtofa: option -l/--min-match-length: filtering contigs without a match of at least %d base pairs...' % options.min_match_length)
        maxMatchLens = [max([match[1] - match[0] + 1 for match in matchList] + [0]) for matchList in cntgMatchRanges]
        rejectIndexes = [index for index in range(0,len(contigs)) if maxMatchLens[index] < options.min_match_length]
        delContigs(rejectIndexes)
        if options.verbose:
            verbose('Complete\n')
            if len(rejectIndexes) > 0: verbose('option -l/--min-match-length: %d contig(s) removed from the analysis due to the lack of a match %d base pairs long.\n' % (len(rejectIndexes),options.min_match_length))
#############################################################################################################################

###Contained contig extraction (-k option implimentation)####################################################################
if isAlignment:
    verbose('qualtofa: Searching for contained contigs...')
    matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
    containedIndexes = [[bIndex for bIndex in range(0,len(matchRanges)) if matchRanges[aIndex][0] <= matchRanges[bIndex][0] and matchRanges[aIndex][1] >= matchRanges[bIndex][1] and aIndex != bIndex] for aIndex in range(0,len(matchRanges))]
    containedIndexes = [index for contained in containedIndexes for index in contained]
    containedContigs = [(headers[index], contigs[index]) for index in containedIndexes]
    verbose('Complete\n')
    if options.keep_contained == False:
        delContigs(containedIndexes)
        if len(containedIndexes) > 0: verbose('qualtofa: %d contained contig(s) removed from the analysis.\n' % len(containedIndexes))
    elif len(containedIndexes) > 0: verbose('qualtofa: option -k/--keep-contained: %d contained contig(s) included in the analysis.\n' % len(containedIndexes))
#############################################################################################################################

def getIUPAC(baseValues):
    '''Takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code. Assumes only input characters to be ACTG-~N'''
    def IUPAC(bases):
        conversion = {'CT':'Y','AG':'R','AT':'W','CG':'S','GT':'K','AC':'M','AGT':'D','ACG':'V','ACT':'H','CGT':'B','ACGT':'N'}
        bases.sort()  #['A', 'C', 'G', 'T']
        if bases[0].islower():
            bases = map(str.upper,bases)
            return str.lower(conversion[''.join(bases)])
        else: return conversion[''.join(bases)]
    chars = removeRepeats(baseValues)
    chars.sort()  #['-', 'A', 'C', 'G', 'N', 'T', 'a', 'c', 'g', 'n', 't', '~']
    if len(chars) == 0: return '-'
    if len(chars) == 1: return chars[0]
    if chars[-1] == '~':   #converts '~' to '-'
        del chars[-1]
        if chars[0] != '-': chars.insert(0,'-')
    priorityList = ('ACGT','N','-','acgt','n')
    for group in priorityList:
        matchs = [base for base in chars if group.find(base) != -1]
        if len(matchs) == 0: continue
        elif len(matchs) == 1: return matchs[0]
        else: return IUPAC(matchs)

###Overlap handleing (-i, -n options implimentation)############################################################################
if isAlignment:
    if options.no_match_overlap or options.no_overlap:
        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: Adding deletions to the reference to accomodate all overlapping sequence...')
        else: verbose('qualtofa: option -a/--no-match-overlap: Adding deletions to the reference to accomodate overlapping matches...')
        matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
        if options.no_overlap: interSeqGaps = [alignmentIndexs[index + 1][0] - alignmentIndexs[index][1] -1 for index in range(0,len(contigs) - 1)]
        else: interSeqGaps = [matchRanges[index + 1][0] - matchRanges[index][1] - 1 for index in range(0,len(contigs) - 1)]
        verbose('Complete\n')
        gapSum = sum([abs(gap) for gap in interSeqGaps if gap < 0])
        totalLen = sum(map(len,contigs))
        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: %d of %d total bases (%d percent) in the consensus is overlaping sequence.\n' % (gapSum,totalLen,gapSum*100/len(reference)))
        else: verbose('qualtofa: option -a/--no-match-overlap: %d of %d total bases (%.1f%%) in the consensus are overlapping matchs.\n' % (gapSum,totalLen,float(gapSum)*100/len(reference)))
        refOffset = 0
        for index in range(0,len(interSeqGaps)):
            if interSeqGaps[index] < 0:   #if matching sequence overlaps...
                for count in range(0,abs(interSeqGaps[index])): reference.insert(matchRanges[index][1] + refOffset + 1, '-')
                refOffset += abs(interSeqGaps[index])
            alignmentIndexs[index + 1][0] += refOffset
            alignmentIndexs[index + 1][1] += refOffset
##########################################################################################################################################

def getProp(call, qual):
    equivalancies = {'A':0,'C':1,'G':2,'T':3,'N':4}
    try: callIndex = equivalancies[call.upper()]
    except KeyError: errorExit('getProp: Non-neucleotide base %s encountered...' % call)
    if callIndex == 4:
        return float(max(qual[:4]))/float(qual[-1])
    return float(qual[callIndex])/float(qual[-1])    
    
###Consensous Creation (-M, -S options implimentation)############################################################################
if isAlignment:
    verbose('qualtofa: Creating consensus...')
    numMasked, numQualMasked, SNPmasked, SNPqualmasked, numPropMasked, SNPpropMasked, SNPcount = (0,0,0,0,0,0,0)
    if sum(options.mask_SNPs + options.mask_consensus) > 0: includeMaskedCon = True
    else: includeMaskedCon = False
    rawConsensous = [[] for index in range(0,len(reference))]
    if includeMaskedCon: maskedRawCon = [[] for index in range(0,len(reference))]
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            rIndex = alignmentIndexs[cIndex][0] + bIndex
            refChar = reference[rIndex]
            cntgChar = contigs[cIndex][bIndex]
            rawConsensous[rIndex].append(cntgChar)
            if includeMaskedCon:
                if cntgChar.isupper() and refChar.isalpha() and refChar != cntgChar:
                    SNPcount += 1
                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0]:
                        cntgChar = 'N'
                        SNPqualmasked += 1
                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
                        cntgChar = 'N'
                        SNPpropMasked += 1
                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
                        SNPmasked += 1
                if contigs[cIndex][bIndex].isalpha():
                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0]:
                        numQualMasked += 1
                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
                        else: cntgChar = 'n'
                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
                        numPropMasked += 1
                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
                        else: cntgChar = 'n'
                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
                        numMasked += 1
                maskedRawCon[rIndex].append(cntgChar)           
    conSequence = [getIUPAC(charList) for charList in rawConsensous]
    if includeMaskedCon: maskedConSeq = [getIUPAC(charList) for charList in maskedRawCon]
    verbose('Complete\n')
    totalLen = len(conSequence)
    if sum(options.mask_SNPs) > 0: verbose('qualtofa: %d of %d total consensus SNPs (%.1f%%) were masked.\n' % (SNPmasked,SNPcount, float(SNPmasked*100)/SNPcount))
    if options.mask_SNPs[0] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPqualmasked,SNPcount, float(SNPqualmasked*100)/SNPcount, options.mask_SNPs[0]))
    if options.mask_SNPs[1] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropMasked,SNPcount, float(SNPpropMasked*100)/SNPcount, options.mask_SNPs[1]))
    if options.mask_consensus[0] > 0: verbose('qualtofa: %d of %d total consensus bases (%.1f%% )were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))        
    if options.mask_consensus[0] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_consensus[0]))        
    if options.mask_consensus[1] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_consensus[1]))        
#############################################################################################################################

        
###Contigs SNP Detection/Masking(-s option implimentation)###################################################################
if isAlignment:
    SNPcovNum, SNPpropNum, SNPmaskedNum, SNPcount = (0,0,0,0)
    SNPs = []
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            refPos = bIndex +  alignmentIndexs[cIndex][0]
            refChar = reference[refPos]
            cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
            if cntgChar.isupper() and refChar.isalpha() and cntgChar.isalpha() and refChar != cntgChar:
                SNPcount += 1
                SNPs.append([refPos,refChar,headers[cIndex],bIndex,cntgChar] + qualities[cIndex][bIndex])
                if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0]:
                    contigs[cIndex][bIndex] = 'N'
                    SNPcovNum += 1
                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
                    contigs[cIndex][bIndex] = 'N'
                    SNPpropNum += 1
                if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
                    SNPmaskedNum += 1
    totalLen = sum(map(len,contigs))
    if sum(options.mask_contig_SNPs) > 0: verbose('qualtofa: %d of %d total SNPs (%.1f%%) were masked\n' % (SNPmaskedNum,SNPcount,float(SNPmaskedNum*100)/SNPcount)) 
    if options.mask_contig_SNPs[0] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPcovNum,SNPcount,float(SNPcovNum*100)/SNPcount,options.mask_contig_SNPs[0])) 
    if options.mask_contig_SNPs[1] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropNum,SNPcount,float(SNPpropNum*100)/SNPcount,options.mask_contig_SNPs[1]))     
#############################################################################################################################

###Contigs Masking(-m option implimentation)#################################################################################
if sum(options.mask_contigs) > 0:
    numMasked, numQualMasked, numPropMasked = (0,0,0)
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
            if cntgChar.isalpha():
                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
                    numMasked += 1
                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0]:
                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
                    else: contigs[cIndex][bIndex] = 'n'
                    numQualMasked += 1
                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
                    else: contigs[cIndex][bIndex] = 'n'
                    numPropMasked += 1             
    totalLen = sum(map(len,contigs))
    verbose('qualtofa: %d bases of %d total bases (%.1f%%) were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen)) 
    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_contigs[0]))  
    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_contigs[1]))      
#############################################################################################################################

###Contianed Contigs file saving#############################################################################################
if len(containedContigs) > 0:
    outPath = qualPath + '_contained_contigs.fsa'
    verbose('qualtofa: Saving contained contigs to %s...' % outPath)
    saveFasta(outPath, *zip(*containedContigs))
    verbose('Complete\n')
#############################################################################################################################

###SNP file saving###########################################################################################################
if options.save_SNPs:
    outPath = qualPath + '_SNPs.qual'
    verbose('qualtofa: Saving %d SNPs to %s...' % (len(SNPs),outPath))
    outHandle = open(outPath, 'w')
    outHandle.write('Schema:\nPosition\tReference\tName\tIndex\tcall\tA\tC\tG\tT\tdash\tsum\n')
    for SNP in SNPs: outHandle.write('\t'.join(map(str,SNP)) + '\n')
    outHandle.close()
    verbose('Complete\n')
#############################################################################################################################

###Contig sequence alignment#################################################################################################
if isAlignment and options.dont_align_contigs == False:
    contigs = [['-'] * alignmentIndexs[index][0] + contigs[index] for index in range(0,len(contigs))]
#############################################################################################################################

###Output file Saving########################################################################################################
verbose('qualtofa: Saving fasta output to %s...' % (qualPath + '.fsa'))
conHeader = 'Consensus'
maskedConHeader = 'Masked Consensus'    
if isAlignment:
    outputHeads, outputSeqs = ([refHeader,conHeader],[reference,conSequence])
    if includeMaskedCon:
        outputHeads.append(maskedConHeader)
        outputSeqs.append(maskedConSeq)
else: outputHeads, outputSeqs = ([],[])
if options.include_contigs:
    outputHeads += headers
    outputSeqs += contigs
outPath = os.path.join(os.getcwd(), os.path.basename(qualPath) + '.fsa')
saveFasta(outPath, outputHeads, outputSeqs)
verbose('Complete\n')
if options.save_run_info:
    runtimeOutPath = os.path.join(os.getcwd(), 'qualtofa_runtime_output_%s.txt' % timeStamp)
    verbose('qualtofa: option -i/--save-run-info: saving run-time output to %s...' % runtimeOutPath)
    runHandle = open(runtimeOutPath, 'w')
    for line in printLog: runHandle.write(line)
    runHandle.close()
    verbose('Complete\n')
#############################################################################################################################

Qualtofa.py: Difference between revisions

Latest revision as of 12:11, 8 September 2010

Navigation menu

Page actions

Page actions

Personal tools

Navigation

Search

research

Tools

@@ Line 2: / Line 2: @@
 <pre>
 #!/usr/bin/env python
-#############################################################################################################################
-# qualtofa [options] <YASRA/sumqual quality file path>                                                                      #
-#---------------------------------------------------------------------------------------------------------------------------#
-# Example: python qualtofa.py -i -c q 20 r -q 5 -s 20 ../myValues.qual                                                      #
-#---------------------------------------------------------------------------------------------------------------------------#
-# Version: 2.3   Last Modified: 4/19/2010     Author: Zachary S. L. Foster                                                  #
-# Intended uses:                                                                                                            #
-#  1)Produces alignments in FASTA format from sumquals output.                                                              #
-#  2)Extract the sequences of multiple contigs from a YASRA quality file.                                                   #
-#---------------------------------------------------------------------------------------------------------------------------#
-# Modifiers:                                                                                                                #
-#  -c  : Include each contig on its own line... (EX: -c q 20 r)                                                             #
-#        q  : Set masking value for contigs (Analogous to -q modifier below; reference NOT needed).                         #
-#        s* : Set SNP masking value for contigs (Analogous to -s modifier below).                                           #
-#        r* : Do NOT align to the reference using '-'s for spacers.                                                         #
-#  -d  : Save debug log to current working directory.                                                                       #
-#  -i* : Condense all overlapping regions into IUPAC ambiguity codes...                                                     #
-#        Default: Add deletions to the reference to accommodate the overlapping sequence.                                   #
-#  -m* : Only include matching regions from the quality file.                                                               #
-#  -p  : Print output to screen.                                                                                            #
-#  -q* : Set minimum masking value (a new masked sequence will be added to the output file).                                #
-#  -s* : Set minimum masking value for SNPs.                                                                                #
-#  -a  : Include contained contigs (defalt: save sequences of contiained contigs to a seperate file)                        #
-#  -t  : Trim the ends of the contigs by the specified number of bases.                                                     #
-#  -v  : Trim all the bases on either end of all contigs that have a quality value less than the specified amount.          #
-#  -b  : Verbose output; print progress reports and statistics.                                                             #
-#  -l  : Set minimum match length                                                                                           #
-#  -S  : Silent; Do not print anything to the standard output (aka screen)                                                  #
-#  -n  : save SNPs to a .qual file
-#                                                                                                                           #
-# *Note - these modifiers rely on the reference included in the output of sumqual. If your quality                          #
-#         file is a list of unaligned contigs (e.g. from YASRA), these modifiers cannot be used.                            #
-#############################################################################################################################
 ###Imports / Variable Initilization##########################################################################################
 import os, string, sys, time, copy
-defArgList  = ['qualtofa.py','-b','-c','s','1','-c','r','-i','-n','-s','100','C:/Python26/AGCT.filtered.delta.qual']
+from datetime import *
-#defArgList  = ['qualtofa.py','-i','-q','20','-s','100','-c','-v','15','-b','-l','100','C:/Python26/out.delta.qual']
+from optparse import *
-#defArgList  = ['qualtofa.py','-v','50','-c','q','20','s','100','C:/Python26/out.delta.qual']
+from subprocess import *
-#defArgList  = ['qualtofa.py','-t','10','-c','q','20','s','100','C:/Python26/out.delta.qual']
+commandLine = copy.deepcopy(sys.argv)
-argList     = sys.argv #argumen list supplied by user
+cwd = os.getcwd()
-argNum      = len(argList) #the number of arguments supplied
+timeStamp = datetime.now().ctime().replace(' ','-').replace(':','-').replace('--','-')  #Ex: 'Mon-Jun-14-11-08-55-2010'
-minArgNum   = 1 #the smallest amount of arguments with which it is possible to run the script
+cntgMatchRanges = None
-saveDebug   = False #is True if the debug is to be saved
+progName, progVersion = ('qualtofa.py','2.52')
-warning     = False #is set to 'True' if the program encounters any minor errors during the analysis; recorded in debug log
+progUsage = 'python %s [options] <.qual file>' % progName
-printOut    = False #is True when no the -p modifier is supplied; #The results will be printed to the standard output (usually the screen)
+printLog = []
-helpOnly    = False #is 'True' when no arguments are given; only help menu is printed
-includeCntgs= False #is True when the -c modifier is used
-IUPAC       = False #is True when the -i modifier is used
-matchsOnly  = False #is True when the -m modifier is used
-alignCntgs  = True #is False when the r argument of the -c modifier is used
-SQaligned   = False #if the input file has the sumqual output format
-verbose     = False #is True when the -b modifier is used
-SNPs        = False
-silent      = False
-minMatchLen = 50 #Minimum match length accepted (after contig end trimming by -t and -v modifiers)
-trimNum     = 0 #number of bases removed from the ends of all the contigs
-trimQual    = 0 #minimum quality value needed to not be removed from the ends of the contigs
-containedCngts = False #is True when the -a modifier is used
-maskingChar = 'N' #What represents a "unknown" base
-minQualScore= 0 #determined by the presence of the -q modifier
-cntgQualScore= 0 #determined by the presence of the -c modifier, with argument q
-cntgSNPScore= 0 #determined by the presence of the -c modifier, with argument s
-minSNPScore = 0 #determined by the presence of the -s modifier
-savePath    = './' #the directory in which the output will be saved
-defSavePath = 'C:/Python26/' #save path used during script testing with the python GUI IDLE
-modifiers   = [] #eventually contains a list of all modifiers and their arguments supplied by the user
-allMods     = 'cdeimpqastvblSn' #all the modifiers recognized by the script
-activeMods  = '' #a string containing all modifiers specified by the user
-modArgs     = [] #arguments supplied for the modifiers; assumed in the same order as the modifiers
-qualData    = []
-refPath     = ''
-refData     = []
-outData     = [] #eventually holds the data that will be saved in the output file
-debugLog    = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied
 #############################################################################################################################
-#Error handling function#####################################################################################################
+def errorExit(message=None,exitStatus=1):
-#>Is called when the script encounters a fatal error                                                                        #
+    '''Version 1.0
-#>Prints the debug log to standard out (usually the screen)                                                                 #
+    Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
-def errorExit():
-    print 'The program was forced to exit prematurely, printing debug log...\n'
+     if message:
-     for line in debugLog:
+        print '%s: Error: %s' % (progName, message)
-        print line
+     else:
-     sys.exit()
+        print progName +': Unknown error, printing debug log...'
-#############################################################################################################################
+        for line in debugLog: print line   #print debug log
+     sys.exit(exitStatus)
-def printHelp(): #is called if no arguments are supplied
-    print '/--------------------------------------------------------------------------------------------------\\'
-    print '| qualtofa [options] <YASRA/sumqual quality file path>                                             |'
-    print '|--------------------------------------------------------------------------------------------------|'
-    print '| Example: python qualtofa.py -i -c q 20 r -q 5 -s 20 ../myValues.qual                             |'
-    print '|--------------------------------------------------------------------------------------------------|'
-    print '| verboseOut: 2.4   Last Modified: 6/24/2010     Author: Zachary S. L. Foster                          |'
-    print '| Intended uses:                                                                                   |'
-    print '|  1)Produces alignments in FASTA format from sumquals output.                                     |'
-    print '|  2)Extract the sequences of multiple contigs from a YASRA quality file.                          |'
-    print '|--------------------------------------------------------------------------------------------------|'
-    print '| Modifiers:                                                                                       |'
-    print '|  -c  : Include each contig on its own line... (EX: -c q 20 r)                                    |'
-     print '|        q  : Set masking value for contigs (Analogous to -q modifier below; reference NOT needed).|'
-    print '|        s* : Set SNP masking value for contigs (Analogous to -s modifier below).                  |'
-    print '|        r* : Align to the reference using dashs for spacers.                                      |'
-    print '|  -d  : Save debug log to current working directory.                                              |'
-    print '|  -i* : Condense all overlapping regions into IUPAC ambiguity codes...                            |'
-    print '|        Default: Add deletions to the reference to accommodate the overlapping sequence.          |'
-    print '|  -m* : Only include matching regions from the quality file.                                      |'
-    print '|  -p  : Print output to screen.                                                                   |'
-    print '|  -q* : Set minimum masking value (a new masked sequence will be added to the output file).       |'
-    print '|  -s* : Set minimum masking value for SNPs.                                                       |'
-    print '|  -a  : Include contained contigs Defalt: save sequences of contiained contigs to a seperate file |'
-    print '|  -t  : Trim the ends of the contigs by the specified number of bases.                            |'
-    print '|  -v  : Trim all the bases on either end of all contigs that have a quality value less than       |'
-     print '|        the specified amount.                                                                     |'
-    print '|  -b  : Verbose output; print progress reports and statistics.                                    |'
-    print '|  -l  : Set minimum match length                                                                  |'
-    print '|  -S  : Silent; Do not print anything to the standard output (aka screen)                         |'
-    print '|                                                                                                  |'
-    print '| *Note - these modifiers rely on the reference included in the output of sumqual. If your quality |'
-    print '|         file is a list of unaligned contigs (e.g. from YASRA), these modifiers cannot be used.   |'
-    print '\\--------------------------------------------------------------------------------------------------/'
-#############################################################################################################################
-#>function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)               #
 def mkRange(numList):
+    '''Function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)'''
      outList = []
-     start   = numList[0]
+     if len(numList) > 0:
-    current = numList[0]
+        start   = numList[0]
-    numList.append(numList[-1])
+        current = numList[0]
-    for num in numList[1:]:
+        numList.append(numList[-1])
-        if (num - 1) != current:
+        for num in numList[1:]:
-            outList.append([start,current])
+            if (num - 1) != current:
-            start = num
+                outList.append([start,current])
-        current = num
+                start = num
+            current = num
      return outList
-#############################################################################################################################
-###Argument Interprtation####################################################################################################
+def readLFile(filePath, numLinesToRead = None):
-#>Parses the arguments supplied by the user, or the default values if the script is being run on IDLE                       #
+     fileHandle = open(filePath, 'r')
-#>If no arguments are given, the help menu is printed                                                                       #
+    if numLinesToRead is None:
-#>Modifiers and their arguments are isolated from the raw input for later processing (in Modifier Interpretation)           #
+        fileData = fileHandle.readlines()
-#if __name__ == '__main__': #If the program is being called independent of the Python GUI, IDLE...
+     else:
-if argNum > minArgNum: #If at least the minimum number of arguments necessary is supplied...
+        fileData = [fileHandle.readline() for count in range(0,numLinesToRead)]
-     if os.path.exists(argList[-1]) == 0: #if the path dose not exist
+     fileHandle.close()
-        debugLog.append('Error: Invalid file path to input data')
+    return fileData
-        errorExit() #end the program
-elif argNum == 1: #If no arguments are supplied...
-    helpOnly = True
-    printHelp() #prints help menu
-else:
-    debugLog.append('Error: Too few arguments supplied\n')
-     errorExit()
-#else: #If the script is being imported on to IDLE
-#    argList  = defArgList #use default arguments
-#    argNum   = len(defArgList)
-#    if argNum == 1: #If no arguments are supplied...
-#        helpOnly = True
-#        printHelp() #prints help menu
-#    savePath = defSavePath #sets the save path to the default, specified in the variable initialization section
-#    debugLog.append('Alert: default debugging input arguments are being used\n')
-if helpOnly == False: #if arguments were supplied...
-    qualPath = argList[-1] #the path to the qual file containing the input data
-     if argNum > minArgNum + 1: #if modifiers are present (i.e. more the minimum number of arguments)
-        modifiers = argList[1:-minArgNum] #everything before the required arguments are modifiers and their arguments
-#############################################################################################################################
-if helpOnly == False:
-    ###Modifier Interpretation###############################################################################################
+def writeLFile(filePath, fileData):
-    #>Parses any modifiers and modifier arguments determined by the previous section of code, "Argument Interpretation"     #
+    fileHandle = open(filePath, 'w')
-    #>Given arguments are compared against a list of known arguments                                                        #
+    for line in fileData:
-    #>Matches found change the appropriate variable for the desired effect of the modifier the script                       #
+        fileHandle.write(fileData)
-    if len(modifiers) > 0: #if modifiers are supplied
+    fileHandle.close()
-        for mod in modifiers: #loops through the list of modifiers and modifier arguments
-            if mod[0] == '-' and len(mod) == 2: #list entry considered modifier if it starts with - and is only two characters
-                activeMods += mod[1:] #sorts the modifiers into activeMods...
-            else:
-                modArgs.append(mod) #assumes everything else to be a modifier argument
-        for letter in activeMods: #checks if the modifiers are recognized
-            if string.find(allMods,letter) == -1: #checks if the modifier is recognized by this script
-                debugLog.append('Warning: Unexpected modifier: ' + letter + '\n')
-                warning = True #if the input modifier is not found
-            else:
-                if letter == 'c': #if -c is supplied...
-                    if len(modArgs) > 0 and modArgs[0] == 'q': #if there is at least two non-processed modifier arguments
-                        if len(modArgs) > 1:
-                            cntgQualScore = int(modArgs[1])
-                            del modArgs[:2] #the modifier arguments are deleted from the list
-                        else:
-                            debugLog.append('Error: Modifier argument not supplied\n')
-                            errorExit() #exit the script
-                    if len(modArgs) > 0 and modArgs[0] == 's': #if there is at least two non-processed modifier arguments
-                        if len(modArgs) > 1:
-                            cntgSNPScore = int(modArgs[1])
-                            del modArgs[:2] #the modifier arguments are deleted from the list
-                        else:
-                            debugLog.append('Error: Modifier argument not supplied\n')
-                            errorExit() #exit the script
-                    if len(modArgs) > 0 and modArgs[0] == 'r': #if there is at least one non-processed modifier argument
-                        alignCntgs = False
-                        del modArgs[0] #the modifier argument is deleted from the list
-                    includeCntgs = True #include each contig on its own line
-                elif letter == 'd': #if -d is supplied...
-                    saveDebug = True #The debug log will be saved to the current working directory
-                elif letter == 'a': #if -a is supplied...
-                    containedCngts = True
-                elif letter == 'e': #if -e is supplied...
-                    saveEditLog = True #save log of edits made to the contig
-                elif letter == 'i': #if -i is supplied...
-                    IUPAC = True #condense all overlapping regions into IUPAC ambiguity codes
-                elif letter == 'm': #if -m is supplied...
-                    matchsOnly = True #only include matching regions of from the qual file
-                elif letter == 'p': #if -p is supplied...
-                    printOut = True #The results will be printed to the standard output (usually the screen)
-                elif letter == 'b': #if -b is supplied...
-                    verbose = True #Verbose output
-                elif letter == 'S': #if -S is supplied...
-                    silent = True #Verbose output
-                elif letter == 'n': #if -n is supplied...
-                    SNPs = True
-                elif letter == 'q': #if -q is supplied...
-                    if len(modArgs) > 0: #if there is at least one non-processed modifier argument
-                        minQualScore = int(modArgs[0])
-                        del modArgs[0] #the original argument is deleted from the list
-                    else: #if the list of modifier arguments is empty...
-                        print 'Error: Modifier argument not supplied\n'
-                        sys.exit() #exit the script
-                elif letter == 's': #if -s is supplied...
-                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
-                        minSNPScore = int(modArgs[0]) #The string specifying the desired lines to process from the input file
-                        del modArgs[0] #the original argument is deleted from the list
-                    else: #if the list of modifier arguments is empty...
-                        print 'Error: Modifier argument not supplied\n'
-                        sys.exit() #exit the script
-                elif letter == 't': #if -t is supplied...
-                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
-                        trimNum = int(modArgs[0]) #The string specifying the desired lines to process from the input file
-                        del modArgs[0] #the original argument is deleted from the list
-                    else: #if the list of modifier arguments is empty...
-                        print 'Error: Modifier argument not supplied\n'
-                        sys.exit() #exit the script
-                elif letter == 'v': #if -v is supplied...
-                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
-                        trimQual = int(modArgs[0]) #The string specifying the desired lines to process from the input file
-                        del modArgs[0] #the original argument is deleted from the list
-                    else: #if the list of modifier arguments is empty...
-                        print 'Error: Modifier argument not supplied\n'
-                        sys.exit() #exit the script
-                elif letter == 'l': #if -l is supplied...
-                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
-                        minMatchLen = int(modArgs[0]) #The string specifying the desired lines to process from the input file
-                        del modArgs[0] #the original argument is deleted from the list
-                    else: #if the list of modifier arguments is empty...
-                        print 'Error: Modifier argument not supplied\n'
-                        sys.exit() #exit the script
-    if verbose:
-        print 'qualtofa: Version: 2.4   Last Modified: 6/24/2010 by Zachary S. L. Foster'
-    #########################################################################################################################
-     if verbose:
+def saveQualStandard(filePath, contigs, qualValues, headers, schema=None):
-         print 'Parsing input file ' + qualPath + '.',
+    if schema is None: schema = ['Position in the contig(1 based)', 'Called base', "Number of reads suppoting 'A'", "Number of reads supporting 'C'", "Number of reads supporting 'G'", "Number of reads supporting 'T'", "Number of reads supporting '_'", 'Coverage']
+    if len(contigs) == 0: errorExit('saveQualStandard: empty list supplied')
+     if len(contigs) != len(qualValues) or len(contigs) != len(headers): errorExit('saveQualStandard: Unequal number of entries is input list')
+    outHandle = open(filePath, 'w')
+    outHandle.write('Schema:'+ '\n' + '\t'.join(schema) + '\n')
+    for cIndex in range(0,len(contigs)):
+         outHandle.write(headers[cIndex] + '\n')
+        for bIndex in range(0,len(contigs[cIndex])): outHandle.write('\t'.join([contigs[cIdnex][bIndex]] + qualValues[cIndex]) + '\n')
+    outHandle.close()
+def parseQualStandard(filePath):
+    columbWidth = 8
+    fileData = [line.strip().split('\t') for line in readLFile(filePath)]   #fileData is a list of lists representing the tab-deliminated structure of the .qual file
+    if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
+    schema = fileData[0]   #extract the tab-deliminated schema into a list
+    headers, contigs, qualValues = ([], [], [])   #initilizes multiple lists at a time
+    index = 0
+    for line in fileData[1:]:
+        if len(line) == 1:   #if it is a header
+            headers.append(line[0])
+            contigs.append([])
+            qualValues.append([])
+        elif len(line) == columbWidth:   #if it is  quality value
+            contigs[-1].append(line[1])
+            qualValues[-1].append([int(num) for num in line[2:]])   #converts all of the quality values into integer objects and saves them in qualvalues
+        else: errorExit('parseQualStandard: Quality file at %s deviated from the expected format on line %d.' % (filePath, index))
+        index += 1
+    return (contigs, qualValues, headers, schema)
-     ###Quality Value File Parsing############################################################################################
+def parseQualAlignment(filePath):
-     qualHandle = open(qualPath, 'r') #opens the file containing the quality values
+     columbWidth = 9
-     qualRaw    = qualHandle.readlines() #saves all of the file into qualRaw
+     headColNum = 2
-    qualHandle.close() #closes the file object
+     fileData = [[part.strip() for part in line.split('\t')] for line in readLFile(filePath)]   #fileData is a list of lists representing the tab-deliminated structure of the .qual file
-     qualName = qualRaw[2][:-1] #the name of the first contig is the 3rd line minus the newline
+     if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
-     qualVals = [] #will contain a list of quality values for each contig
+     schema = fileData[0]   #extract the tab-deliminated schema into a list
-     qualData = [] #the data structure holding the parsed data
+     refHeader = fileData[1][0]
-     qualRaw.append('randomline') #place mark needed to process the last contig
+     reference = [line[1] for line in fileData[2:]]   #extracts the reference from the second columb
-     for line in qualRaw[3:]: #for every line past the first 4...
+     qualData = [line[headColNum:] for line in fileData[2:]]   #removes the first lines and columbs of fileData, which dont contain unextracted data
-        splitLine = string.split(line) #make a list of all the parts of line separated by whitespace
+    headers, contigs, cntgStarts, qualValues = ([], [], [], [])   #initilizes multiple lists at a time
-         if len(splitLine) == 1 and splitLine[0].isdigit() == False: #if there is only one "word" (i.e. it is a name of a contig)
+    refPos = 0
-            qualData.append([qualName,qualVals]) #add the previous contig to the parsed data
+    for line in qualData:
-            qualName = splitLine[0] #new contig name
+         if len(line) > 0:
-            qualVals = []
+            for index in range(0,len(line),columbWidth):
-        else: #if the line is a set of quality values...
+                if headers.count(line[index]) == 0:   #if the contig header has not been encountered before
-            qualVals.append(splitLine) #append them to the list of quality values for this contig
+                    headers.append(line[index])
-     #########################################################################################################################
+                    cntgStarts.append(refPos)
+                    contigs.append([])
+                    qualValues.append([])
+                headIndex = headers.index(line[index])
+                contigs[headIndex].append(line[index + 2])   #append the called base character to its contig
+                qualValues[headIndex].append([int(num) for num in line[index + 3: index + columbWidth]])   #converts all of the quality values into integer objects and saves them in qualvalues
+        refPos += 1
+    alignmentIndexs = [[cntgStarts[index], cntgStarts[index] + len(contigs[index]) - 1] for index in range(0,len(cntgStarts))]
+     return (contigs, qualValues, headers, alignmentIndexs, reference, refHeader, schema)
-     if verbose:
+def parseQual(filePath):
-         print '.',
+    columbWidth = 9
+    headColNum = 2
+    fileHead = readLFile(filePath, 2)
+     if fileHead[0].strip() == 'Schema:': del fileHead[0]   #removes the unessesary first line of the file, if present
+    schema = fileHead[0].strip().split('\t')
+    if schema[1] == 'Reference' and len(schema) % columbWidth == headColNum:
+        return parseQualAlignment(filePath)
+    elif len(schema) == 8:
+         return parseQualStandard(filePath)
+    else: errorExit('parseQual: The quality file at %s has an unreconized format based on the schema on line 2.' % filePath)
-    ###Reference Extraction##################################################################################################
+def listToStr(aList):
-    schema = string.split(qualRaw[1])
+     out = ''
-     if len(schema) > 0 and schema[1] == 'Reference':
+    for char in aList: out += char
-        SQaligned = True
+     return out
-        reference = []
-        for qual in qualData[0][1]:
-            reference.append(qual[1])
-     #########################################################################################################################
-     if verbose:
+def saveFasta(filePath, headers, sequences):
-         print '.',
+    outHandle = open(filePath, 'w')
+     if len(headers) != 0 and len(sequences) != 0:
+         if len(headers) != len(sequences): errorExit('saveFastaFile: different number of headers and sequences')
+        if type(headers) == str and type(sequences) == str:
+            headers = [headers]
+            sequences = [sequences]
+        if type(sequences[0]) == list: sequences = [listToStr(seq) for seq in sequences]
+        for index in range(0,len(headers)):
+            outHandle.write('>%s\n' % headers[index])
+            if type(sequences[index]) == str: sequences[index] = [sequences[index]]  #if there is only one line of sequence for a given header, make it a one-item list for the next loop
+            for seq in sequences[index]:
+                outHandle.write(seq + '\n')
+    outHandle.close()
-     ###Contig Extraction####################################################################################################
+def parseFasta(filePath):
-     contigs = [] #will hold a list of contig names and their sequences (ex: [['name1',['A','C','G']],['name2',['G','C','A']],...])
+     headers = []
-     def addBase(cntgName,baseQuals):
+     seqs = []
-        for cntgIndex in range(0,len(contigs)):
+    fileHandle = open(filePath, 'r')
-            if contigs[cntgIndex][0] == cntgName:
+    fileData = fileHandle.readlines() + ['>']   #the additional header line is there make to following for loop process the last sequence
-                contigs[cntgIndex][1].append(baseQuals)
+    fileHandle.close()
-                return
+     currSeq = ''
-        if SQaligned:
+    for line in fileData:
-             contigs.append([cntgName,[baseQuals],refIndex])
+        if line[0] == '>':   #if the line is a fasta header
+            headers.append(line[1:].strip() )   #the fisrt character is the '>' and the last is a newline '\n'
+             seqs.append(currSeq)
+            currSeq = ''
          else:
-             contigs.append([cntgName,[baseQuals]])
+             currSeq += line.strip()
+    return (headers[:-1], seqs[1:])
-     if SQaligned:
-        qualNum = 9
+def removeRepeats(aList):
-        refColNum = 3
+     for entry in aList:
-     else:
+        if aList.count(entry) > 1:
-         qualNum = 7
+            aList.remove(entry)
-         refColNum = 0
+            return removeRepeats(aList)
-     for cntgIndex in range(0,len(qualData)): #for every contig...
+    return aList
-         for baseIndex in range(0,len(qualData[cntgIndex][1])): #for every base of every contig
-            baseDepth = (len(qualData[cntgIndex][1][baseIndex]) - refColNum + 1) / qualNum #the number of distinct bases (quality value data) per index
+def maskingCallback(option, opt_str, value, parser):
-            for depth in range(0,baseDepth): #for every quality value for every base in every contig
+    value = []
-                startIndex = (depth * qualNum) + refColNum + 1 #the index of the character value for that quality value
+     def floatable(str):
-                endIndex = startIndex + 5
+         try:
-                if SQaligned:
+            float(str)
-                    name = qualData[cntgIndex][1][baseIndex][startIndex - 2]
+            return True
-                    refIndex = qualData[cntgIndex][1][baseIndex][0]
+         except ValueError: return False
-                else:
+     for arg in parser.rargs:
-                    name = qualData[cntgIndex][0]
+        if arg[:2] == "--" and len(arg) > 2: break   # stop on --foo like options
-                quals = qualData[cntgIndex][1][baseIndex][startIndex:endIndex + 2]
+        if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break   # stop on -a, but not on -3 or -3.0
-                addBase(name,quals) #add the character value to the list
+        value.append(arg)
-     #########################################################################################################################
+         if len(value) == 2: break
+    if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest)
+    del parser.rargs[:len(value)]
+    if len(value) == 1: value.append(0)
+    value[0] = int(value[0])
+    value[1] = float(value[1])
+    if value[0] < 0: errorExit("The first argument of the option '%s' must be a positive integer; %d supplied." % (option.dest,value[0]))
+    if value[1] < 0 or value[1] > 1: errorExit("The second argument of the option '%s' must be a decimal between 0 and 1; %f supplied." % (option.dest,value[1]))
+    setattr(parser.values, option.dest, value)
+###Command Line Parser#######################################################################################################
+cmndLineParser  = OptionParser(usage=progUsage, version="Version %s" % progVersion)
+maksingGroup = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.')
+cmndLineParser.add_option(   "-c",   "--include-contigs",    action="store_true",    default=False,                                                             help="Include each contig on its own line. (Default: only output the consensus)")
+maksingGroup.add_option(   "-m",   "--mask-contigs",       action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contigs',      help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
+maksingGroup.add_option(   "-s",   "--mask-contig-SNPs",   action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contig_SNPs',  help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
+maksingGroup.add_option(   "-M",   "--mask-consensus",     action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_consensus',    help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
+maksingGroup.add_option(   "-S",   "--mask-SNPs",          action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_SNPs',         help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
+cmndLineParser.add_option_group(maksingGroup)
+cmndLineParser.add_option(   "-t",   "--end-trim",           action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs by a specified number of bases. (Default: 0)")
+cmndLineParser.add_option(   "-q",   "--end-trim-qual",      action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount. This option is implemented after standard end trimming (see -t option). (Default: 0)")
+cmndLineParser.add_option(   "-l",   "--min-match-length",   action="store",         default=0,          type="int",    metavar="INT",      help="Only include contigs that have a least one match longer or as long as the specified amount. This is implemented after any end trimming (see -q and -t options). (Default: 0)")
+cmndLineParser.add_option(   "-n",   "--no-overlap",         action="store_true",    default=False,                                         help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
+cmndLineParser.add_option(   "-a",   "--no-match-overlap",   action="store_true",    default=False,                                          help="Add deletions to the reference to accommodate overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
+cmndLineParser.add_option(   "-k",   "--keep-contained",     action="store_true",    default=False,                                         help="Include contained contigs in the alignment and consensus. (Default: save sequences of contained contigs to a separate file)")
+cmndLineParser.add_option(   "-p",   "--save-SNPs",          action="store_true",    default=False,                                         help="Save SNPs to a .qual file. (Default: Do not save SNP file)")
+cmndLineParser.add_option(   "-d",   "--dont-align-contigs", action="store_true",    default=False,                                         help="Do not align contigs to the reference. Requires the use of the -c modifier. (Default: align contigs to the reference)")
+cmndLineParser.add_option(   "-r",   "--external-ref",       action="store",         default=None,       type="string", metavar="PATH",     help="Specify a reference to use instead of the one contained in the quality file.")
+cmndLineParser.add_option(   "-v",   "--verbose",            action="store_true",    default=False,                                         help="Print progress updates and relevant statistics to the standard output. (Default: run silently)")
+cmndLineParser.add_option(   "-i",   "--save-run-info",      action="store_true",    default=False,     help="Save a txt file of anything printed to the screen. Requires the use of -v/--verbose. (Default: Do not save)")
+(options, args) = cmndLineParser.parse_args(commandLine)
+if options.include_contigs == False:
+    if sum(options.mask_contigs) > 1: errorExit('The option -m/--mask-contigs requires the use of the option -c/--include-contigs.')
+    if sum(options.mask_contig_SNPs) > 1: errorExit('The option -s/--mask-contig-SNPs requires the use of the option -c/--include-contigs.')
+    if options.dont_align_contigs: errorExit('The option -d/--dont-align-contigs requires the use of the option -c/--include-contigs.')
+if len(args) == 1:
+    cmndLineParser.print_help()
+    sys.exit(0)
+argNum = len(args) - 1   #counts the amount of arguments, negating the 'qualtofa' at the start of the command line
+if argNum != 1: errorExit('qualtofa takes exactly 1 argument; %d supplied' % argNum, 0)
+qualPath = args[1]   #The file path to the input file supplied by the user
+#############################################################################################################################
+def verbose(toPrint):
+    if options.verbose:
+        print toPrint,
+        printLog.append(toPrint)
+###Quality file parsing######################################################################################################
+verbose('qualtofa: Loading data from %s...' % qualPath)
+qualData = parseQual(qualPath)
+verbose('Complete\n')
+if len(qualData) == 4:   #if the quality file contains a list of unaligned contigs (e.g. one directly from yasra)
+    isAlignment = False
+    if options.no_match_overlap: errorExit('The option -a/--no-match-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
+    if any(options.mask_consensus): errorExit('The option -M/--mask-consensus requires an aligned quality consensous file, like that given by sumqual.py.')
+    if any(options.mask_SNPs): errorExit('The option -S/--mask-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
+    if any(options.mask_contig_SNPs): errorExit('The option -s/--mask-contig-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
+    if options.keep_contained: errorExit('The option -k/--keep-contained requires an aligned quality consensous file, like that given by sumqual.py.')
+    if options.min_match_length: errorExit('The option -l/--min-match-length requires an aligned quality consensous file, like that given by sumqual.py.')
+    if options.save_SNPs: errorExit('The option -p/--save-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
+    if options.no_overlap: errorExit('The option -n/--no-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
+    if options.external_ref: errorExit('The option -r/--external-ref requires an aligned quality consensous file, like that given by sumqual.py.')
+    contigs, qualities, headers, schema = qualData
+elif len(qualData) == 7:   #if the quality file contains an alignment (i.e. from sumqual)
+    isAlignment = True
+     contigs, qualities, headers, alignmentIndexs, reference, refHeader, schema = qualData
+else: errorExit()
+#############################################################################################################################
-    ###Conitg Match Index addition#########################################################################################
+###External reference parsing(-r option implimintation)######################################################################
-    if SQaligned:
+if options.external_ref is not None:
-        rejects = []
+    verbose('qualtofa: option -r/--external-ref: loading reference at %s...' % options.external_ref)
-        for cntgIndex in range(0,len(contigs)):
+    (seqs,headers) = parseFasta(options.external_ref)
-            inMatch = False
+    reference, refHeader = seqs[0], headers[0]
-            cntgEnd = 0
+    verbose('Complete\n')
-            cntgStart = 0
+#############################################################################################################################
-            for baseIndex in range(0,len(contigs[cntgIndex][1])):
-                if contigs[cntgIndex][1][baseIndex][0].isupper() and inMatch == False:
-                    cntgStart = baseIndex + 1
-                    inMatch = True
-                elif contigs[cntgIndex][1][baseIndex][0].islower() and inMatch == True:
-                    cntgEnd = baseIndex
-                    break
-            if cntgEnd == 0:
-                cntgEnd = len(contigs[cntgIndex][1])
-            refStart = int(contigs[cntgIndex][2]) + cntgStart - 1
-            refEnd = int(contigs[cntgIndex][2]) + cntgEnd - 1
-            del contigs[cntgIndex][2]
-            if refEnd - refStart < minMatchLen:
-                rejects.append(cntgIndex)
-            else:
-                contigs[cntgIndex].append([refStart,refEnd,cntgStart,cntgEnd])
-        rejects.reverse()
-        for index in rejects:
-            del contigs[index]
-        if verbose:
-            print 'Complete'
-            print '    ' + str(len(contigs)) + 'contigs will be included in the anaylsis.'
-            if len(rejects) == 1:
-                print '    1 contig was not included due to insufficient sequence length ( < ' + str(minMatchLen) + ' ).'
-            elif len(rejects) > 1:
-                print '    ' + str(len(rejects)) + ' contigs were not included due to insufficient sequence length ( < ' + str(minMatchLen) + ' ).'
+def delContig(index):
-    #########################################################################################################################
+    del contigs[index]
+    del headers[index]
-    ###Contig end trimming###################################################################################################
+    del qualities[index]
-    def trim(cntgIndex,startT,endT):
+     if isAlignment:
-        if startT + endT + minMatchLen >= (contigs[cntgIndex][2][1] - contigs[cntgIndex][2][0] + 1):
+        del alignmentIndexs[index]
-            del contigs[cntgIndex]
+        if cntgMatchRanges is not None: del cntgMatchRanges[index]
-            return 1
-        else:
-            unMatchedStart = contigs[cntgIndex][2][2] - 1 #the number of bases before the match
-            unMatchedEnd   = len(contigs[cntgIndex][1]) - contigs[cntgIndex][2][3] #the number of bases after the match
-            matchedStart   = startT  - unMatchedStart
-            matchedEnd     = endT - unMatchedEnd
-            if matchedStart > 0: #if the unmatched sequence is too small to accomodate the whole trim length
-                contigs[cntgIndex][2][0] += matchedStart
-                contigs[cntgIndex][2][2] = 1
-                contigs[cntgIndex][2][3] -= startT #+ matchedStart
-            else: #if the entire trim length can be taken out of the non-matching sequence
-                contigs[cntgIndex][2][2] -= startT
-                contigs[cntgIndex][2][3] -= startT
-            if matchedEnd > 0:
-                contigs[cntgIndex][2][1] -= matchedEnd
-                contigs[cntgIndex][2][3] -= matchedEnd
-            del contigs[cntgIndex][1][:startT]
-            if endT > 0:
-                del contigs[cntgIndex][1][-endT:]
-        return 0
-     if trimNum > 0: #if -t modifier is supplied
+def delContigs(indexes):
-         if verbose:
+     if len(indexes) > 0:
-            print '-t Modifier: Contig ends are being trimed by ' + str(trimNum) + '...',
+         indexes.sort()
-        delCount = 0
+         indexes.reverse()
-        for cntgIndex in range(-len(contigs) + 1,1):
+         indexes = removeRepeats(indexes)
-            cntgIndex *= -1
+         for index in indexes: delContig(index)   #deletes all contigs
-            if trim(cntgIndex,trimNum,trimNum) == 1:
-                delCount += 1
-         if verbose:
-            print 'Complete'
-            if delCount == 1:
-                print '    1 contig was removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
-            elif delCount > 1:
-                print '    ' + str(delCount) + ' contigs were removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
-    if trimQual > 0: #if -v modifier is supplied
-         delCount = 0
-        if verbose:
-            print '-v Modifier: Contig ends are being trimed until a base with a quality value of at least ' + str(trimQual) + ' is encountered...',
-         for cntgIndex in range(-len(contigs) + 1,1):
-            cntgIndex *= -1
-            startTrim = 0
-            endTrim = 0
-            for baseIndex in range(0,len(contigs[cntgIndex][1])):
-                if int(contigs[cntgIndex][1][baseIndex][-1]) < trimQual:
-                    startTrim += 1
-                else:
-                    break
-            for baseIndex in range(-len(contigs[cntgIndex][1]) + 1,1): #loops backwards
-                baseIndex *= -1
-                if int(contigs[cntgIndex][1][baseIndex][-1]) < trimQual:
-                    endTrim += 1
-                else:
-                    break
-            if trim(cntgIndex,startTrim,endTrim) == 1:
-                delCount += 1
-        if verbose:
-            print 'Complete'
-            if delCount == 1:
-                print '    1 contig was removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
-            elif delCount > 1:
-                print '    ' + str(delCount) + ' contigs were removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
-    #########################################################################################################################
-    ###Non-matching contig end triming#######################################################################################
+def delBase(cntgIndex, baseIndex):
-    if matchsOnly:
+    del qualities[cntgIndex][baseIndex]
-        if verbose:
+    del contigs[cntgIndex][baseIndex]
-            print '-m Modifier: Removing non-matching sequence...',
+    if isAlignment:
-        if SQaligned:
+        if baseIndex == 0: alignmentIndexs[cntgIndex][0] += 1
-            for cntgIndex in range(0,len(contigs)):
+        else: alignmentIndexs[cntgIndex][1] -= 1
-                del contigs[cntgIndex][1][contigs[cntgIndex][2][3]:]
-                del contigs[cntgIndex][1][:contigs[cntgIndex][2][2] - 1]
-                contigs[cntgIndex][2][3] -= contigs[cntgIndex][2][2] - 1
-                contigs[cntgIndex][2][2] = 1
-        else:
-            debugLog.append('Error: the -m modifier must be used with an aligned quality file from sumqual.')
-            errorExit() #exit the script
-        if verbose:
-            print 'Complete'
-    #########################################################################################################################
-    ###Contained contig extraction###########################################################################################
+def delBases(cntgIndex, baseIndexes):
-    contCntgSeqs = []
+    baseIndexes.sort()
-    if containedCngts == False and includeCntgs:
+    baseIndexes.reverse()
-        if verbose:
+    for index in baseIndexes: delBase(cntgIndex, index)
-            print 'Removing contianed contigs from the anaylsis...',
-        contained = []
-        contCntgs = []
-        for cntgIndex in range(0,len(contigs)):
-            for compIndex in range(0,len(contigs)):
-                if contigs[compIndex][2][0] >= contigs[cntgIndex][2][0] and contigs[compIndex][2][1] <= contigs[cntgIndex][2][1] and compIndex != cntgIndex:
-                    contained.append(compIndex)
-        contained.reverse()
-        for index in contained:
-            contCntgs.append(contigs[index])
-            del contigs[index]
-        for cntg in contCntgs:
-            contCntgSeqs.append('>' + cntg[0])
-            outStr = ''
-            for qual in cntg[1]:
-                outStr += str(qual[0])
-            contCntgSeqs.append(outStr)
-        if verbose:
-            print 'Complete'
-            if len(contained) == 0:
-                print '    No contained contigs found.'
-            elif len(contained) == 1:
-                print '    1 contained contig found.'
-            else:
-                print '    ' + str(len(contained)) + ' contained contigs found.'
-    elif verbose:
-        print '-a Modifier: Contained contigs will be included in the anaylsis.'
-    #########################################################################################################################
-     sortIndex = 0
+###Contig end Trimming (-q and -t option implimentation)#####################################################################
-     def sortComp(cntgA,cntgB):
+if options.end_trim > 0:
-        cntgAStart = cntgA[2][0] - cntgA[2][2]
+     verbose('qualtofa: option -t/--end-trim: Trimming %d bases from both ends of each contig...' % options.end_trim)
-        cntgBStart = cntgB[2][0] - cntgB[2][2]
+    rejectIndexes = [index for index in range(0,len(contigs)) if len(contigs[index]) <= options.end_trim * 2]   #makes a list of all the indexs of contigs that are too short to be trimmed.
-        if cntgAStart > cntgBStart:
+     delContigs(rejectIndexes)   #deletes all rejected contigs
-            return 1
+    contigs = [cntg[options.end_trim : -options.end_trim] for cntg in contigs]   #trims all remaining contigs
-         elif cntgAStart == cntgBStart:
+    qualities = [qual[options.end_trim : -options.end_trim] for qual in qualities]   #trims all remaining contigs
-            return 0
+    alignmentIndexs = [[indexRange[0] + options.end_trim,indexRange[1] - options.end_trim]  for indexRange in alignmentIndexs]
-        else:
+    if options.verbose:
-            return -1
+        verbose('Complete\n')
+         if len(rejectIndexes) > 0: verbose('option -t/--end-trim: %d contig(s) were removed from the analysis because they were shorter than twice the trim length of %d.\n' % (len(rejectIndexes),options.end_trim))
-    #########################################################################################################################
+if options.end_trim_qual > 0:
-    #>Funtion used by the IUPAC function to remove repeated characters in list                                              #
+     verbose('qualtofa: option -q/--end-trim-qual: Trimming contig ends until a base with a coverage of %d or more is reached...' % options.end_trim_qual)
-     #>ex: ['A','C','C'] becomes ['A','C']                                                                                   #
+     for index in range(0, len(contigs)):
-     def removeRepeats(charList):
+         while len(qualities[index]) > 0 and qualities[index][0][-1] < options.end_trim_qual: delBase(index,0)
-         for charIndex in range(0,len(charList)):
+        while len(qualities[index]) > 0 and qualities[index][-1][-1] < options.end_trim_qual: delBase(index,-1)
-            for index in range(0,len(charList)):
+    rejectIndexes = [index for index in range(0,len(qualities)) if len(qualities[index]) == 0] #makes a list of all the indexs of contigs that dont contain sequence after trimming
-                if charIndex != index and charList[charIndex] == charList[index]:
+    delContigs(rejectIndexes)   #deletes all rejected contigs
-                    if charList[charIndex].islower():
+    if options.verbose:
-                        del charList[charIndex]
+        verbose('Complete\n')
-                    else:
+         if len(rejectIndexes) > 0: verbose('option -q/--end-trim-qual: %d contig(s) removed from the analysis because all of their sequence had quality values less than %d.\n' % (len(rejectIndexes),options.end_trim_qual))
-                        del charList[index]
+#############################################################################################################################
-                    return removeRepeats(charList)
-         return charList
-    #########################################################################################################################
-    #########################################################################################################################
+###Contigs Match Coordinate Extraction (-l option implimentation)############################################################
-    #>Function that takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code             #
+if isAlignment:
-    #>Assumes only input characters to be ACTG-~N                                                                           #
+     cntgMatchRanges = [mkRange([index for index in range(0,len(cntg)) if cntg[index].isupper()]) for cntg in contigs]
-    #>Ex: ['A','C'] becomes 'M'                                                                                             #
+    if options.min_match_length > 0:
-     def getIUPAC(baseValues):
+         verbose('qualtofa: option -l/--min-match-length: filtering contigs without a match of at least %d base pairs...' % options.min_match_length)
-        returnChar = ''
+        maxMatchLens = [max([match[1] - match[0] + 1 for match in matchList] + [0]) for matchList in cntgMatchRanges]
-        x = removeRepeats(baseValues)
+        rejectIndexes = [index for index in range(0,len(contigs)) if maxMatchLens[index] < options.min_match_length]
-        if len(x) == 0:
+         delContigs(rejectIndexes)
-            return '-'
+         if options.verbose:
-        elif len(x) == 1:
+             verbose('Complete\n')
-            return x[0]
+             if len(rejectIndexes) > 0: verbose('option -l/--min-match-length: %d contig(s) removed from the analysis due to the lack of a match %d base pairs long.\n' % (len(rejectIndexes),options.min_match_length))
-        for index in range(0,len(x)):
+#############################################################################################################################
-            if x[index] == '~':
-                x[index] = '-'
-        x.sort()
-        for index in range(-len(x) + 1, 1):
-            index *= -1
-            if len(x) > 1 and x[index] == '-':
-                del x[index]
-         for index in range(-len(x) + 1, 1):
-            index *= -1
-            if len(x) > 1 and x[index].upper() == 'N':
-                del x[index]
-        if len(x) > 1:
-            returnLower = True
-            for char in x:
-                if char.isupper():
-                    returnLower = False
-                    break
-            if returnLower == False:
-                for index in range(-len(x) + 1, 1):
-                    index *= -1
-                    if len(x) > 1 and x[index].islower():
-                        del x[index]
-            else:
-                for index in range(0,len(x)):
-                    x[index] = x[index].upper()
-         if len(x) == 1:
-            return x[0]
-         elif len(x) == 2:
-            if (x[0] == 'C' and x[1] == 'T') or (x[0] == 'T' and x[1] == 'C'):
-                returnChar = 'Y'
-             elif (x[0] == 'A' and x[1] == 'G') or (x[0] == 'G' and x[1] == 'A'):
-                returnChar = 'R'
-             elif (x[0] == 'A' and x[1] == 'T') or (x[0] == 'T' and x[1] == 'A'):
-                returnChar = 'W'
-            elif (x[0] == 'C' and x[1] == 'G') or (x[0] == 'G' and x[1] == 'C'):
-                returnChar = 'S'
-            elif (x[0] == 'G' and x[1] == 'T') or (x[0] == 'T' and x[1] == 'G'):
-                returnChar = 'K'
-            elif (x[0] == 'C' and x[1] == 'A') or (x[0] == 'A' and x[1] == 'C'):
-                returnChar = 'M'
-        elif len(x) == 3:
-            if x[0] != 'C' and x[1] != 'C' and x[2] != 'C':
-                returnChar = 'D'
-            elif x[0] != 'T' and x[1] != 'T' and x[2] != 'T':
-                returnChar = 'V'
-            elif x[0] != 'G' and x[1] != 'G' and x[2] != 'G':
-                returnChar = 'H'
-            elif x[0] != 'A' and x[1] != 'A' and x[2] != 'A':
-                returnChar = 'B'
-        else:
-            returnChar = 'N'
-        if returnLower:
-            returnChar = returnChar.lower()
-        return returnChar
-    #########################################################################################################################
-    SNPquals   = []
-    alignedSeq = ''
-    maskedSeq  = ''
-    contigs.sort(sortComp)
-     def getMasked(cntg,baseIndex,refIndex,minBaseVal,minSNPVal,minBaseProp,minSNPProp):
+###Contained contig extraction (-k option implimentation)####################################################################
-        char = cntg[1][baseIndex][0]
+if isAlignment:
-        qual = int(cntg[1][baseIndex][-1])
+     verbose('qualtofa: Searching for contained contigs...')
-        if char != '-':
+    matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
-            ref = reference[refIndex].upper()
+    containedIndexes = [[bIndex for bIndex in range(0,len(matchRanges)) if matchRanges[aIndex][0] <= matchRanges[bIndex][0] and matchRanges[aIndex][1] >= matchRanges[bIndex][1] and aIndex != bIndex] for aIndex in range(0,len(matchRanges))]
-            if char.isupper() and char.upper() != ref.upper() and ref != '-': #is SNP
+    containedIndexes = [index for contained in containedIndexes for index in contained]
-                if qual < minSNPVal:
+    containedContigs = [(headers[index], contigs[index]) for index in containedIndexes]
-                    char = maskingChar.upper()
+    verbose('Complete\n')
-                qual = [ref] + [refIndex + 1] + [cntg[0]] + [baseIndex + 1] + cntg[1][baseIndex]
+    if options.keep_contained == False:
-                SNPquals.append(qual)
+        delContigs(containedIndexes)
-            elif minBaseVal > 0 and int(cntg[1][baseIndex][-1]) < minBaseVal:
+        if len(containedIndexes) > 0: verbose('qualtofa: %d contained contig(s) removed from the analysis.\n' % len(containedIndexes))
-                if cntg[1][baseIndex][0].isupper():
+    elif len(containedIndexes) > 0: verbose('qualtofa: option -k/--keep-contained: %d contained contig(s) included in the analysis.\n' % len(containedIndexes))
-                    char = maskingChar.upper()
+#############################################################################################################################
-                else:
-                    char = maskingChar.lower()
-        return char
-     ###IUPAC Alignment Generation############################################################################################
+def getIUPAC(baseValues):
-     if IUPAC:
+     '''Takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code. Assumes only input characters to be ACTG-~N'''
-         if SQaligned:
+     def IUPAC(bases):
-            if verbose:
+         conversion = {'CT':'Y','AG':'R','AT':'W','CG':'S','GT':'K','AC':'M','AGT':'D','ACG':'V','ACT':'H','CGT':'B','ACGT':'N'}
-                print '-i Modifier: Generating IUPAC-format consensous alignment...',
+        bases.sort()  #['A', 'C', 'G', 'T']
-            for refIndex in range(0,len(reference)):
+        if bases[0].islower():
-                maskedChars = []
+            bases = map(str.upper,bases)
-                origChars = []
+            return str.lower(conversion[''.join(bases)])
-                for cntg in contigs:
+        else: return conversion[''.join(bases)]
-                    start = cntg[2][0] - cntg[2][2]
+    chars = removeRepeats(baseValues)
-                    end   = cntg[2][1] + (len(cntg[1]) - cntg[2][3]) - 1
+    chars.sort()  #['-', 'A', 'C', 'G', 'N', 'T', 'a', 'c', 'g', 'n', 't', '~']
-                    if refIndex >= start and refIndex <= end:
+    if len(chars) == 0: return '-'
-                        baseIndex = refIndex - start
+    if len(chars) == 1: return chars[0]
-                        maskedChars.append(getMasked(cntg,baseIndex,refIndex,minQualScore,minSNPScore,minBaseProp,minSNPProp))
+    if chars[-1] == '~':   #converts '~' to '-'
-                        origChars.append(cntg[1][baseIndex][0])
+        del chars[-1]
-                maskedSeq += getIUPAC(maskedChars)
+        if chars[0] != '-': chars.insert(0,'-')
-                alignedSeq += getIUPAC(origChars)
+    priorityList = ('ACGT','N','-','acgt','n')
-            outData.append(['Aligned_Contigs',alignedSeq])
+    for group in priorityList:
-            if minQualScore != 0 or minSNPScore != 0:
+        matchs = [base for base in chars if group.find(base) != -1]
-                outData.append(['Masked_Contigs',maskedSeq])
+        if len(matchs) == 0: continue
-            if verbose:
+        elif len(matchs) == 1: return matchs[0]
-                print 'Complete'
+         else: return IUPAC(matchs)
-         else:
-            debugLog.append('Error: the -i modifier must be used with an aligned quality file from sumqual.')
-            errorExit() #exit the script
-    #########################################################################################################################
-     def contigsOverlap(index1,index2):
+###Overlap handleing (-i, -n options implimentation)############################################################################
-         cntg = contigs[index1]
+if isAlignment:
-        comp = contigs[index2]
+     if options.no_match_overlap or options.no_overlap:
-        compStart = comp[2][0] - comp[2][2]
+        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: Adding deletions to the reference to accomodate all overlapping sequence...')
-         cntgEnd   = cntg[2][1] + (len(cntg[1]) - cntg[2][3]) - 1
+        else: verbose('qualtofa: option -a/--no-match-overlap: Adding deletions to the reference to accomodate overlapping matches...')
-         out = cntgEnd - compStart + 1
+         matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
-         if out >= 0:
+         if options.no_overlap: interSeqGaps = [alignmentIndexs[index + 1][0] - alignmentIndexs[index][1] -1 for index in range(0,len(contigs) - 1)]
-             return out
+        else: interSeqGaps = [matchRanges[index + 1][0] - matchRanges[index][1] - 1 for index in range(0,len(contigs) - 1)]
-        else:
+        verbose('Complete\n')
-             return 0
+        gapSum = sum([abs(gap) for gap in interSeqGaps if gap < 0])
+         totalLen = sum(map(len,contigs))
+        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: %d of %d total bases (%d percent) in the consensus is overlaping sequence.\n' % (gapSum,totalLen,gapSum*100/len(reference)))
+        else: verbose('qualtofa: option -a/--no-match-overlap: %d of %d total bases (%.1f%%) in the consensus are overlapping matchs.\n' % (gapSum,totalLen,float(gapSum)*100/len(reference)))
+         refOffset = 0
+        for index in range(0,len(interSeqGaps)):
+             if interSeqGaps[index] < 0:   #if matching sequence overlaps...
+                for count in range(0,abs(interSeqGaps[index])): reference.insert(matchRanges[index][1] + refOffset + 1, '-')
+                refOffset += abs(interSeqGaps[index])
+             alignmentIndexs[index + 1][0] += refOffset
+            alignmentIndexs[index + 1][1] += refOffset
+##########################################################################################################################################
-     def matchsOverlap(index1,index2):
+def getProp(call, qual):
-         compStart = contigs[index2][2][0]
+    equivalancies = {'A':0,'C':1,'G':2,'T':3,'N':4}
-        cntgEnd   = contigs[index1][2][1]
+    try: callIndex = equivalancies[call.upper()]
-        out = cntgEnd - compStart + 1
+    except KeyError: errorExit('getProp: Non-neucleotide base %s encountered...' % call)
-        if out >= 0:
+    if callIndex == 4:
-            return out
+        return float(max(qual[:4]))/float(qual[-1])
-        else:
+    return float(qual[callIndex])/float(qual[-1])
-            return 0
+###Consensous Creation (-M, -S options implimentation)############################################################################
+if isAlignment:
+    verbose('qualtofa: Creating consensus...')
+    numMasked, numQualMasked, SNPmasked, SNPqualmasked, numPropMasked, SNPpropMasked, SNPcount = (0,0,0,0,0,0,0)
+    if sum(options.mask_SNPs + options.mask_consensus) > 0: includeMaskedCon = True
+    else: includeMaskedCon = False
+    rawConsensous = [[] for index in range(0,len(reference))]
+    if includeMaskedCon: maskedRawCon = [[] for index in range(0,len(reference))]
+     for cIndex in range(0,len(contigs)):
+         for bIndex in range(0,len(contigs[cIndex])):
+            rIndex = alignmentIndexs[cIndex][0] + bIndex
+            refChar = reference[rIndex]
+            cntgChar = contigs[cIndex][bIndex]
+            rawConsensous[rIndex].append(cntgChar)
+            if includeMaskedCon:
+                if cntgChar.isupper() and refChar.isalpha() and refChar != cntgChar:
+                    SNPcount += 1
+                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0]:
+                        cntgChar = 'N'
+                        SNPqualmasked += 1
+                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
+                        cntgChar = 'N'
+                        SNPpropMasked += 1
+                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
+                        SNPmasked += 1
+                if contigs[cIndex][bIndex].isalpha():
+                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0]:
+                        numQualMasked += 1
+                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
+                        else: cntgChar = 'n'
+                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
+                        numPropMasked += 1
+                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
+                        else: cntgChar = 'n'
+                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
+                        numMasked += 1
+                maskedRawCon[rIndex].append(cntgChar)
+    conSequence = [getIUPAC(charList) for charList in rawConsensous]
+    if includeMaskedCon: maskedConSeq = [getIUPAC(charList) for charList in maskedRawCon]
+    verbose('Complete\n')
+    totalLen = len(conSequence)
+    if sum(options.mask_SNPs) > 0: verbose('qualtofa: %d of %d total consensus SNPs (%.1f%%) were masked.\n' % (SNPmasked,SNPcount, float(SNPmasked*100)/SNPcount))
+    if options.mask_SNPs[0] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPqualmasked,SNPcount, float(SNPqualmasked*100)/SNPcount, options.mask_SNPs[0]))
+    if options.mask_SNPs[1] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropMasked,SNPcount, float(SNPpropMasked*100)/SNPcount, options.mask_SNPs[1]))
+    if options.mask_consensus[0] > 0: verbose('qualtofa: %d of %d total consensus bases (%.1f%% )were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))
+    if options.mask_consensus[0] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_consensus[0]))
+    if options.mask_consensus[1] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_consensus[1]))
+#############################################################################################################################
-    ###Spaced alignment Generation###########################################################################################
-    if IUPAC == False and SQaligned:
+###Contigs SNP Detection/Masking(-s option implimentation)###################################################################
-        if verbose:
+if isAlignment:
-            print 'Generating consensous alignment...',
+    SNPcovNum, SNPpropNum, SNPmaskedNum, SNPcount = (0,0,0,0)
-        start = contigs[0][2][0] - contigs[0][2][2]
+    SNPs = []
-        if start > 1:
+    for cIndex in range(0,len(contigs)):
-            for count in range(0,start):
+         for bIndex in range(0,len(contigs[cIndex])):
-                alignedSeq += '-'
+             refPos = bIndex +  alignmentIndexs[cIndex][0]
-                maskedSeq += '-'
+             refChar = reference[refPos]
-        refDels = []
+             cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
-         for qualIndex in range(0,len(contigs[0][1])):
+            if cntgChar.isupper() and refChar.isalpha() and cntgChar.isalpha() and refChar != cntgChar:
-             alignedSeq += contigs[0][1][qualIndex][0]
+                SNPcount += 1
-             refIndex = contigs[0][2][0] - contigs[0][2][2] + qualIndex
+                SNPs.append([refPos,refChar,headers[cIndex],bIndex,cntgChar] + qualities[cIndex][bIndex])
-             maskedSeq += getMasked(contigs[0],qualIndex,refIndex,minQualScore,minSNPScore)
+                 if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0]:
-        for cntgIndex in range(0,len(contigs) - 1):
+                    contigs[cIndex][bIndex] = 'N'
-            compIndex = cntgIndex + 1
+                    SNPcovNum += 1
-            matchNum = matchsOverlap(cntgIndex,compIndex) #the amount of bases that the matching regions of the adjacent conigs overlap
+                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
-            cntgNum = contigsOverlap(cntgIndex,compIndex)
+                    contigs[cIndex][bIndex] = 'N'
-            delNum = cntgNum  #= (contig length - end of ref match) + match overlap size
+                    SNPpropNum += 1
-            refInsert = int(contigs[cntgIndex][2][1]) #the index at which the deletion will be inserted
+                if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
-            for count in range(0,delNum):
+                    SNPmaskedNum += 1
-                 reference.insert(refInsert, '-')
+    totalLen = sum(map(len,contigs))
-            for cIndex in range(compIndex,len(contigs)):
+    if sum(options.mask_contig_SNPs) > 0: verbose('qualtofa: %d of %d total SNPs (%.1f%%) were masked\n' % (SNPmaskedNum,SNPcount,float(SNPmaskedNum*100)/SNPcount))
-                contigs[cIndex][2][0] += delNum #the contig with the larger starting match index must be moved up
+    if options.mask_contig_SNPs[0] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPcovNum,SNPcount,float(SNPcovNum*100)/SNPcount,options.mask_contig_SNPs[0]))
-                contigs[cIndex][2][1] += delNum
+    if options.mask_contig_SNPs[1] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropNum,SNPcount,float(SNPpropNum*100)/SNPcount,options.mask_contig_SNPs[1]))
-            cntgEnd = contigs[cntgIndex][2][1] + (len(contigs[cntgIndex][1]) - contigs[cntgIndex][2][3]) - 1
+#############################################################################################################################
-            compStart = contigs[compIndex][2][0] - contigs[compIndex][2][2]
-            if cntgEnd + 1 < compStart:
-                count = compStart - cntgEnd - 1
-                for x in range(0,count):
-                    alignedSeq += '-'
-                    maskedSeq += '-'
-            for qualIndex in range(0,len(contigs[compIndex][1])):
-                alignedSeq += contigs[compIndex][1][qualIndex][0]
-                refIndex = contigs[compIndex][2][0] - contigs[compIndex][2][2] + qualIndex
-                maskedSeq += getMasked(contigs[compIndex],qualIndex,refIndex,minQualScore,minSNPScore)
-        outData.append(['Aligned_Contigs',alignedSeq])
-        if minQualScore != 0 or minSNPScore != 0:
-            outData.append(['Masked_Contigs',maskedSeq])
-        if verbose:
-            print 'Complete'
-     #########################################################################################################################
+###Contigs Masking(-m option implimentation)#################################################################################
+if sum(options.mask_contigs) > 0:
+     numMasked, numQualMasked, numPropMasked = (0,0,0)
+    for cIndex in range(0,len(contigs)):
+        for bIndex in range(0,len(contigs[cIndex])):
+            cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
+            if cntgChar.isalpha():
+                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
+                    numMasked += 1
+                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0]:
+                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
+                    else: contigs[cIndex][bIndex] = 'n'
+                    numQualMasked += 1
+                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
+                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
+                    else: contigs[cIndex][bIndex] = 'n'
+                    numPropMasked += 1
+    totalLen = sum(map(len,contigs))
+    verbose('qualtofa: %d bases of %d total bases (%.1f%%) were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))
+    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_contigs[0]))
+    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_contigs[1]))
+#############################################################################################################################
-    ###Contig Addition#######################################################################################################
+###Contianed Contigs file saving#############################################################################################
-    if includeCntgs:
+if len(containedContigs) > 0:
-        if verbose:
+    outPath = qualPath + '_contained_contigs.fsa'
-            print '-c Modifier: Extracting/Aligning Contigs...',
+    verbose('qualtofa: Saving contained contigs to %s...' % outPath)
-        for cntg in contigs:
+    saveFasta(outPath, *zip(*containedContigs))
-            cntgSeq = ''
+    verbose('Complete\n')
-            cntgStart = cntg[2][0] - cntg[2][2] + 1
+#############################################################################################################################
-            if SQaligned and alignCntgs:
-                for count in range(1,cntgStart):
-                    cntgSeq += '-'
-            qualIndex = 0
-            for qual in cntg[1]:
-                refIndex = cntg[2][0] - cntg[2][2] + qualIndex
-                cntgSeq += getMasked(cntg,qualIndex,refIndex,cntgQualScore,cntgSNPScore)
-                qualIndex += 1
-            outData.append([cntg[0],cntgSeq])
-        if verbose:
-            print 'Complete'
-    #########################################################################################################################
-    refStr = ''
-    for char in reference:
-        refStr += char
-    outData.insert(0,['Reference',refStr])
-    ###Pointless spacer removal##############################################################################################
+###SNP file saving###########################################################################################################
-    if verbose:
+if options.save_SNPs:
-        print 'Pointless index detection and removal...',
+     outPath = qualPath + '_SNPs.qual'
-     delList = [] #will contain the indices at which all of the sequences have '-' and must be deleted
+     verbose('qualtofa: Saving %d SNPs to %s...' % (len(SNPs),outPath))
-     for refIndex in range(0,len(outData[0][1])): #loops through each index in terms of the reference
+    outHandle = open(outPath, 'w')
-        pointlessSpace = True #each indice is asumed to be a space
+    outHandle.write('Schema:\nPosition\tReference\tName\tIndex\tcall\tA\tC\tG\tT\tdash\tsum\n')
-        for cntgIndex in range(0,len(outData)): #loops through all the sequences that are being checked for spaces
+    for SNP in SNPs: outHandle.write('\t'.join(map(str,SNP)) + '\n')
-            seqLen = len(outData[cntgIndex][1])
+    outHandle.close()
-            if refIndex < seqLen: #if the index being tested is present on this contig...
+     verbose('Complete\n')
-                bp = outData[cntgIndex][1][refIndex]
+#############################################################################################################################
-                if bp != '-': #if the base is not a space...
-                    pointlessSpace = False
-        if pointlessSpace:
-            delList.append(refIndex)
-    if len(delList) > 0:
-        for deletion in delList:
-            for index in range(0,len(SNPquals)):
-                if deletion < SNPquals[index][1]:
-                    SNPquals[index][1] -= 1
-        delList = mkRange(delList) #makes a list of indices into a list of index ranges
-        delList.reverse() #reverses the order of the list to preserve the accuracy of the index values in delList as they are deleted from faData
-        for theRange in delList: #for every range specifed by delList...
-            for cntgIndex in range(0,len(outData)): #for every contig is faData...
-                outData[cntgIndex][1] = outData[cntgIndex][1][:theRange[0]] + outData[cntgIndex][1][theRange[1] + 1:] #remove the section determined to be spaces
-     if verbose:
-        print 'Complete'
-    #########################################################################################################################
-    ###Out file writing and saveing procedures###############################################################################
+###Contig sequence alignment#################################################################################################
-    fileSavePath = savePath + os.path.basename(qualPath) + '.fsa' #the path to where the output is saved
+if isAlignment and options.dont_align_contigs == False:
-    if verbose:
+    contigs = [['-'] * alignmentIndexs[index][0] + contigs[index] for index in range(0,len(contigs))]
-        print 'Generating output files...',
+#############################################################################################################################
-    outHandle = open(fileSavePath, 'w') #opens the file object for saving the output
-    for seq in outData: #prints every line of outSeqs to the output file...
-        outHandle.write('>' + seq[0] + '\n')
-        outHandle.write(seq[1] + '\n')
-    if printOut: #if the -p modifier is supplied...
-        for seq in outData: #prints every line of outSeqs to the screen...
-            print seq[0]
-            print seq[1]
-    if len(contCntgSeqs) > 0:
-        contCntgSavePath = savePath + os.path.basename(qualPath) + '_ContatiedContigs.fsa' #the path to where contained contigs are saved is saved
-        CCHandle = open(contCntgSavePath, 'w')
-        for line in contCntgSeqs:
-            CCHandle.write(line + '\n')
-        CCHandle.close()
-    outHandle.close() #closes file object
-    if SNPs:
-        snpSavePath = savePath + os.path.basename(qualPath) + '_SNPs.qual' #the path to where SNPs are saved
-        snpHandle = open(snpSavePath, 'w')
-        for qual in SNPquals:
-            line = ''
-            for entry in qual:
-                line += str(entry) + '\t'
-            line = line[:-1]
-            snpHandle.write(line + '\n')
-        snpHandle.close()
-    if verbose:
-        print 'Complete'
-        print '    ' + str(len(outData)) + ' sequences saved to ' + fileSavePath + '.'
-        if SNPs:
-            print '    ' + str(len(SNPquals)) + ' SNPs saved to ' + snpSavePath + '.'
-        if len(contCntgSeqs) > 0:
-            if len(contained) == 1:
-                print '    1 contained contig sequence saved to ' + contCntgSavePath + '.'
-            elif len(contained) > 1:
-                print '    ' + str(len(contained)) + ' contained contig sequences saved to ' + contCntgSavePath + '.'
-    #########################################################################################################################
-    ###Debug Saving procedures###############################################################################################
+###Output file Saving########################################################################################################
-     if saveDebug: #if the -d modifier is supplied...
+verbose('qualtofa: Saving fasta output to %s...' % (qualPath + '.fsa'))
-        debugHandle = open(savePath + os.path.basename(qualPath) + '_debug.txt', 'w')
+conHeader = 'Consensus'
-        for line in debugLog:
+maskedConHeader = 'Masked Consensus'
-            debugHandle.write(line)
+if isAlignment:
-        debugHandle.close()
+    outputHeads, outputSeqs = ([refHeader,conHeader],[reference,conSequence])
-     #########################################################################################################################
+     if includeMaskedCon:
+        outputHeads.append(maskedConHeader)
+        outputSeqs.append(maskedConSeq)
+else: outputHeads, outputSeqs = ([],[])
+if options.include_contigs:
+    outputHeads += headers
+    outputSeqs += contigs
+outPath = os.path.join(os.getcwd(), os.path.basename(qualPath) + '.fsa')
+saveFasta(outPath, outputHeads, outputSeqs)
+verbose('Complete\n')
+if options.save_run_info:
+    runtimeOutPath = os.path.join(os.getcwd(), 'qualtofa_runtime_output_%s.txt' % timeStamp)
+    verbose('qualtofa: option -i/--save-run-info: saving run-time output to %s...' % runtimeOutPath)
+    runHandle = open(runtimeOutPath, 'w')
+    for line in printLog: runHandle.write(line)
+    runHandle.close()
+     verbose('Complete\n')
+#############################################################################################################################
 </pre>