Qualtofa.py: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
mNo edit summary
No edit summary
 
Line 2: Line 2:
<pre>
<pre>
#!/usr/bin/env python
#!/usr/bin/env python
#############################################################################################################################
# qualtofa [options] <YASRA/sumqual quality file path>                                                                      #
#---------------------------------------------------------------------------------------------------------------------------#
# Example: python qualtofa.py -i -c q 20 r -q 5 -s 20 ../myValues.qual                                                      #
#---------------------------------------------------------------------------------------------------------------------------#
# Version: 2.3  Last Modified: 4/19/2010    Author: Zachary S. L. Foster                                                  #
# Intended uses:                                                                                                            #
#  1)Produces alignments in FASTA format from sumquals output.                                                              #
#  2)Extract the sequences of multiple contigs from a YASRA quality file.                                                  #
#---------------------------------------------------------------------------------------------------------------------------#
# Modifiers:                                                                                                                #
#  -c  : Include each contig on its own line... (EX: -c q 20 r)                                                            #
#        q  : Set masking value for contigs (Analogous to -q modifier below; reference NOT needed).                        #
#        s* : Set SNP masking value for contigs (Analogous to -s modifier below).                                          #
#        r* : Do NOT align to the reference using '-'s for spacers.                                                        #
#  -d  : Save debug log to current working directory.                                                                      #
#  -i* : Condense all overlapping regions into IUPAC ambiguity codes...                                                    #
#        Default: Add deletions to the reference to accommodate the overlapping sequence.                                  #
#  -m* : Only include matching regions from the quality file.                                                              #
#  -p  : Print output to screen.                                                                                            #
#  -q* : Set minimum masking value (a new masked sequence will be added to the output file).                                #
#  -s* : Set minimum masking value for SNPs.                                                                                #
#  -a  : Include contained contigs (defalt: save sequences of contiained contigs to a seperate file)                        #
#  -t  : Trim the ends of the contigs by the specified number of bases.                                                    #
#  -v  : Trim all the bases on either end of all contigs that have a quality value less than the specified amount.          #
#  -b  : Verbose output; print progress reports and statistics.                                                            #
#  -l  : Set minimum match length                                                                                          #
#  -S  : Silent; Do not print anything to the standard output (aka screen)                                                  #
#  -n  : save SNPs to a .qual file
#                                                                                                                          #
# *Note - these modifiers rely on the reference included in the output of sumqual. If your quality                          #
#        file is a list of unaligned contigs (e.g. from YASRA), these modifiers cannot be used.                            #
#############################################################################################################################


###Imports / Variable Initilization##########################################################################################
###Imports / Variable Initilization##########################################################################################
import os, string, sys, time, copy
import os, string, sys, time, copy
defArgList  = ['qualtofa.py','-b','-c','s','1','-c','r','-i','-n','-s','100','C:/Python26/AGCT.filtered.delta.qual']
from datetime import *
#defArgList  = ['qualtofa.py','-i','-q','20','-s','100','-c','-v','15','-b','-l','100','C:/Python26/out.delta.qual']
from optparse import *
#defArgList  = ['qualtofa.py','-v','50','-c','q','20','s','100','C:/Python26/out.delta.qual']
from subprocess import *
#defArgList  = ['qualtofa.py','-t','10','-c','q','20','s','100','C:/Python26/out.delta.qual']
commandLine = copy.deepcopy(sys.argv)
argList    = sys.argv #argumen list supplied by user
cwd = os.getcwd()
argNum      = len(argList) #the number of arguments supplied
timeStamp = datetime.now().ctime().replace(' ','-').replace(':','-').replace('--','-') #Ex: 'Mon-Jun-14-11-08-55-2010'
minArgNum  = 1 #the smallest amount of arguments with which it is possible to run the script
cntgMatchRanges = None
saveDebug  = False #is True if the debug is to be saved
progName, progVersion = ('qualtofa.py','2.52')
warning    = False #is set to 'True' if the program encounters any minor errors during the analysis; recorded in debug log
progUsage = 'python %s [options] <.qual file>' % progName
printOut    = False #is True when no the -p modifier is supplied; #The results will be printed to the standard output (usually the screen)
printLog = []
helpOnly    = False #is 'True' when no arguments are given; only help menu is printed
includeCntgs= False #is True when the -c modifier is used
IUPAC      = False #is True when the -i modifier is used
matchsOnly  = False #is True when the -m modifier is used
alignCntgs  = True #is False when the r argument of the -c modifier is used
SQaligned  = False #if the input file has the sumqual output format
verbose    = False #is True when the -b modifier is used
SNPs        = False
silent      = False
minMatchLen = 50 #Minimum match length accepted (after contig end trimming by -t and -v modifiers)
trimNum    = 0 #number of bases removed from the ends of all the contigs
trimQual    = 0 #minimum quality value needed to not be removed from the ends of the contigs
containedCngts = False #is True when the -a modifier is used
maskingChar = 'N' #What represents a "unknown" base
minQualScore= 0 #determined by the presence of the -q modifier
cntgQualScore= 0 #determined by the presence of the -c modifier, with argument q
cntgSNPScore= 0 #determined by the presence of the -c modifier, with argument s
minSNPScore = 0 #determined by the presence of the -s modifier
savePath    = './' #the directory in which the output will be saved
defSavePath = 'C:/Python26/' #save path used during script testing with the python GUI IDLE
modifiers  = [] #eventually contains a list of all modifiers and their arguments supplied by the user
allMods    = 'cdeimpqastvblSn' #all the modifiers recognized by the script
activeMods  = '' #a string containing all modifiers specified by the user
modArgs    = [] #arguments supplied for the modifiers; assumed in the same order as the modifiers
qualData    = []
refPath    = ''
refData    = []
outData    = [] #eventually holds the data that will be saved in the output file
debugLog    = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied
#############################################################################################################################
#############################################################################################################################


#Error handling function#####################################################################################################
def errorExit(message=None,exitStatus=1):
#>Is called when the script encounters a fatal error                                                                       #
    '''Version 1.0
#>Prints the debug log to standard out (usually the screen)                                                                 #
    Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
def errorExit():
      
    print 'The program was forced to exit prematurely, printing debug log...\n'
     if message:
     for line in debugLog:
        print '%s: Error: %s' % (progName, message)
        print line
     else:
     sys.exit()
        print progName +': Unknown error, printing debug log...'
#############################################################################################################################
        for line in debugLog: print line  #print debug log
 
     sys.exit(exitStatus)
def printHelp(): #is called if no arguments are supplied
    print '/--------------------------------------------------------------------------------------------------\\'
    print '| qualtofa [options] <YASRA/sumqual quality file path>                                            |'
    print '|--------------------------------------------------------------------------------------------------|'
    print '| Example: python qualtofa.py -i -c q 20 r -q 5 -s 20 ../myValues.qual                            |'
    print '|--------------------------------------------------------------------------------------------------|'
    print '| verboseOut: 2.4  Last Modified: 6/24/2010    Author: Zachary S. L. Foster                          |'
    print '| Intended uses:                                                                                  |'
    print '|  1)Produces alignments in FASTA format from sumquals output.                                    |'
    print '|  2)Extract the sequences of multiple contigs from a YASRA quality file.                          |'
    print '|--------------------------------------------------------------------------------------------------|'                     
    print '| Modifiers:                                                                                      |'
    print '|  -c  : Include each contig on its own line... (EX: -c q 20 r)                                   |'
     print '|        q  : Set masking value for contigs (Analogous to -q modifier below; reference NOT needed).|'
    print '|        s* : Set SNP masking value for contigs (Analogous to -s modifier below).                  |'
    print '|        r* : Align to the reference using dashs for spacers.                                      |'
    print '|  -d  : Save debug log to current working directory.                                              |'
    print '|  -i* : Condense all overlapping regions into IUPAC ambiguity codes...                           |'
    print '|        Default: Add deletions to the reference to accommodate the overlapping sequence.          |'
    print '|  -m* : Only include matching regions from the quality file.                                      |'
    print '|  -p  : Print output to screen.                                                                  |'
    print '|  -q* : Set minimum masking value (a new masked sequence will be added to the output file).      |'
    print '|  -s* : Set minimum masking value for SNPs.                                                      |'
    print '|  -a  : Include contained contigs Defalt: save sequences of contiained contigs to a seperate file |'
    print '|  -t  : Trim the ends of the contigs by the specified number of bases.                            |'
    print '|  -v  : Trim all the bases on either end of all contigs that have a quality value less than      |'
     print '|        the specified amount.                                                                     |'
    print '|  -b  : Verbose output; print progress reports and statistics.                                    |'
    print '|  -l  : Set minimum match length                                                                  |'
    print '|  -S  : Silent; Do not print anything to the standard output (aka screen)                         |'
    print '|                                                                                                  |'
    print '| *Note - these modifiers rely on the reference included in the output of sumqual. If your quality |'
    print '|        file is a list of unaligned contigs (e.g. from YASRA), these modifiers cannot be used.  |'
    print '\\--------------------------------------------------------------------------------------------------/'


#############################################################################################################################
#>function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)              #
def mkRange(numList):
def mkRange(numList):
    '''Function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)'''
     outList = []
     outList = []
     start  = numList[0]
     if len(numList) > 0:
    current = numList[0]
        start  = numList[0]
    numList.append(numList[-1])
        current = numList[0]
    for num in numList[1:]:
        numList.append(numList[-1])
        if (num - 1) != current:
        for num in numList[1:]:
            outList.append([start,current])
            if (num - 1) != current:
            start = num
                outList.append([start,current])
        current = num
                start = num
            current = num
     return outList
     return outList
#############################################################################################################################


###Argument Interprtation####################################################################################################
def readLFile(filePath, numLinesToRead = None):
#>Parses the arguments supplied by the user, or the default values if the script is being run on IDLE                      #
     fileHandle = open(filePath, 'r')
#>If no arguments are given, the help menu is printed                                                                      #
    if numLinesToRead is None:
#>Modifiers and their arguments are isolated from the raw input for later processing (in Modifier Interpretation)           #
        fileData = fileHandle.readlines()
#if __name__ == '__main__': #If the program is being called independent of the Python GUI, IDLE...
     else:
if argNum > minArgNum: #If at least the minimum number of arguments necessary is supplied...
        fileData = [fileHandle.readline() for count in range(0,numLinesToRead)]
     if os.path.exists(argList[-1]) == 0: #if the path dose not exist
     fileHandle.close()
        debugLog.append('Error: Invalid file path to input data')
    return fileData
        errorExit() #end the program
elif argNum == 1: #If no arguments are supplied...
    helpOnly = True
    printHelp() #prints help menu
else:
    debugLog.append('Error: Too few arguments supplied\n')
     errorExit()
#else: #If the script is being imported on to IDLE
#    argList  = defArgList #use default arguments
#    argNum  = len(defArgList)
#    if argNum == 1: #If no arguments are supplied...
#        helpOnly = True
#        printHelp() #prints help menu
#    savePath = defSavePath #sets the save path to the default, specified in the variable initialization section
#    debugLog.append('Alert: default debugging input arguments are being used\n')
if helpOnly == False: #if arguments were supplied...
    qualPath = argList[-1] #the path to the qual file containing the input data
     if argNum > minArgNum + 1: #if modifiers are present (i.e. more the minimum number of arguments)
        modifiers = argList[1:-minArgNum] #everything before the required arguments are modifiers and their arguments
#############################################################################################################################
       
if helpOnly == False:


    ###Modifier Interpretation###############################################################################################
def writeLFile(filePath, fileData):
    #>Parses any modifiers and modifier arguments determined by the previous section of code, "Argument Interpretation"    #
    fileHandle = open(filePath, 'w')
    #>Given arguments are compared against a list of known arguments                                                        #
    for line in fileData:
    #>Matches found change the appropriate variable for the desired effect of the modifier the script                      #
        fileHandle.write(fileData)
    if len(modifiers) > 0: #if modifiers are supplied
    fileHandle.close()
        for mod in modifiers: #loops through the list of modifiers and modifier arguments
            if mod[0] == '-' and len(mod) == 2: #list entry considered modifier if it starts with - and is only two characters       
                activeMods += mod[1:] #sorts the modifiers into activeMods...
            else: 
                modArgs.append(mod) #assumes everything else to be a modifier argument
        for letter in activeMods: #checks if the modifiers are recognized 
            if string.find(allMods,letter) == -1: #checks if the modifier is recognized by this script
                debugLog.append('Warning: Unexpected modifier: ' + letter + '\n')
                warning = True #if the input modifier is not found
            else:
                if letter == 'c': #if -c is supplied...
                    if len(modArgs) > 0 and modArgs[0] == 'q': #if there is at least two non-processed modifier arguments
                        if len(modArgs) > 1:
                            cntgQualScore = int(modArgs[1])
                            del modArgs[:2] #the modifier arguments are deleted from the list
                        else:
                            debugLog.append('Error: Modifier argument not supplied\n')
                            errorExit() #exit the script
                    if len(modArgs) > 0 and modArgs[0] == 's': #if there is at least two non-processed modifier arguments
                        if len(modArgs) > 1:
                            cntgSNPScore = int(modArgs[1])
                            del modArgs[:2] #the modifier arguments are deleted from the list
                        else:
                            debugLog.append('Error: Modifier argument not supplied\n')
                            errorExit() #exit the script
                    if len(modArgs) > 0 and modArgs[0] == 'r': #if there is at least one non-processed modifier argument
                        alignCntgs = False
                        del modArgs[0] #the modifier argument is deleted from the list
                    includeCntgs = True #include each contig on its own line
                elif letter == 'd': #if -d is supplied...
                    saveDebug = True #The debug log will be saved to the current working directory
                elif letter == 'a': #if -a is supplied...
                    containedCngts = True
                elif letter == 'e': #if -e is supplied...
                    saveEditLog = True #save log of edits made to the contig
                elif letter == 'i': #if -i is supplied...
                    IUPAC = True #condense all overlapping regions into IUPAC ambiguity codes
                elif letter == 'm': #if -m is supplied...
                    matchsOnly = True #only include matching regions of from the qual file
                elif letter == 'p': #if -p is supplied...
                    printOut = True #The results will be printed to the standard output (usually the screen)
                elif letter == 'b': #if -b is supplied...
                    verbose = True #Verbose output
                elif letter == 'S': #if -S is supplied...
                    silent = True #Verbose output
                elif letter == 'n': #if -n is supplied...
                    SNPs = True
                elif letter == 'q': #if -q is supplied...
                    if len(modArgs) > 0: #if there is at least one non-processed modifier argument
                        minQualScore = int(modArgs[0])  
                        del modArgs[0] #the original argument is deleted from the list
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script
                elif letter == 's': #if -s is supplied...
                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
                        minSNPScore = int(modArgs[0]) #The string specifying the desired lines to process from the input file
                        del modArgs[0] #the original argument is deleted from the list
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script
                elif letter == 't': #if -t is supplied...
                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
                        trimNum = int(modArgs[0]) #The string specifying the desired lines to process from the input file
                        del modArgs[0] #the original argument is deleted from the list
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script
                elif letter == 'v': #if -v is supplied...
                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
                        trimQual = int(modArgs[0]) #The string specifying the desired lines to process from the input file
                        del modArgs[0] #the original argument is deleted from the list
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script
                elif letter == 'l': #if -l is supplied...
                    if len(modArgs) > 0: #if there are at least two non-processed modifier arguments
                        minMatchLen = int(modArgs[0]) #The string specifying the desired lines to process from the input file
                        del modArgs[0] #the original argument is deleted from the list
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script
    if verbose:
        print 'qualtofa: Version: 2.4  Last Modified: 6/24/2010 by Zachary S. L. Foster'
    #########################################################################################################################


     if verbose:
def saveQualStandard(filePath, contigs, qualValues, headers, schema=None):
         print 'Parsing input file ' + qualPath + '.',
    if schema is None: schema = ['Position in the contig(1 based)', 'Called base', "Number of reads suppoting 'A'", "Number of reads supporting 'C'", "Number of reads supporting 'G'", "Number of reads supporting 'T'", "Number of reads supporting '_'", 'Coverage']
    if len(contigs) == 0: errorExit('saveQualStandard: empty list supplied')
     if len(contigs) != len(qualValues) or len(contigs) != len(headers): errorExit('saveQualStandard: Unequal number of entries is input list')
    outHandle = open(filePath, 'w')
    outHandle.write('Schema:'+ '\n' + '\t'.join(schema) + '\n')
    for cIndex in range(0,len(contigs)):
         outHandle.write(headers[cIndex] + '\n')
        for bIndex in range(0,len(contigs[cIndex])): outHandle.write('\t'.join([contigs[cIdnex][bIndex]] + qualValues[cIndex]) + '\n')
    outHandle.close()
           
def parseQualStandard(filePath):
    columbWidth = 8
    fileData = [line.strip().split('\t') for line in readLFile(filePath)]  #fileData is a list of lists representing the tab-deliminated structure of the .qual file     
    if fileData[0] == ['Schema:']: del fileData[0]  #removes the unessesary first line of the file, if present
    schema = fileData[0]  #extract the tab-deliminated schema into a list
    headers, contigs, qualValues = ([], [], [])  #initilizes multiple lists at a time
    index = 0
    for line in fileData[1:]:
        if len(line) == 1:  #if it is a header
            headers.append(line[0])
            contigs.append([])
            qualValues.append([])
        elif len(line) == columbWidth:  #if it is  quality value
            contigs[-1].append(line[1])
            qualValues[-1].append([int(num) for num in line[2:]])  #converts all of the quality values into integer objects and saves them in qualvalues
        else: errorExit('parseQualStandard: Quality file at %s deviated from the expected format on line %d.' % (filePath, index))
        index += 1       
    return (contigs, qualValues, headers, schema)


     ###Quality Value File Parsing############################################################################################
def parseQualAlignment(filePath):
     qualHandle = open(qualPath, 'r') #opens the file containing the quality values
     columbWidth = 9
     qualRaw    = qualHandle.readlines() #saves all of the file into qualRaw
     headColNum = 2
    qualHandle.close() #closes the file object
     fileData = [[part.strip() for part in line.split('\t')] for line in readLFile(filePath)]  #fileData is a list of lists representing the tab-deliminated structure of the .qual file
     qualName = qualRaw[2][:-1] #the name of the first contig is the 3rd line minus the newline
     if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
     qualVals = [] #will contain a list of quality values for each contig
     schema = fileData[0]   #extract the tab-deliminated schema into a list
     qualData = [] #the data structure holding the parsed data
     refHeader = fileData[1][0]
     qualRaw.append('randomline') #place mark needed to process the last contig
     reference = [line[1] for line in fileData[2:]]  #extracts the reference from the second columb
     for line in qualRaw[3:]: #for every line past the first 4...
     qualData = [line[headColNum:] for line in fileData[2:]#removes the first lines and columbs of fileData, which dont contain unextracted data
        splitLine = string.split(line) #make a list of all the parts of line separated by whitespace
    headers, contigs, cntgStarts, qualValues = ([], [], [], [])   #initilizes multiple lists at a time
         if len(splitLine) == 1 and splitLine[0].isdigit() == False: #if there is only one "word" (i.e. it is a name of a contig)
    refPos = 0
            qualData.append([qualName,qualVals]) #add the previous contig to the parsed data
    for line in qualData:
            qualName = splitLine[0] #new contig name
         if len(line) > 0:
            qualVals = []  
            for index in range(0,len(line),columbWidth):
        else: #if the line is a set of quality values...
                if headers.count(line[index]) == 0:   #if the contig header has not been encountered before
            qualVals.append(splitLine) #append them to the list of quality values for this contig
                    headers.append(line[index])
     #########################################################################################################################
                    cntgStarts.append(refPos)
                    contigs.append([])
                    qualValues.append([])
                headIndex = headers.index(line[index])
                contigs[headIndex].append(line[index + 2])  #append the called base character to its contig
                qualValues[headIndex].append([int(num) for num in line[index + 3: index + columbWidth]])  #converts all of the quality values into integer objects and saves them in qualvalues
        refPos += 1
    alignmentIndexs = [[cntgStarts[index], cntgStarts[index] + len(contigs[index]) - 1] for index in range(0,len(cntgStarts))]
     return (contigs, qualValues, headers, alignmentIndexs, reference, refHeader, schema)


     if verbose:
def parseQual(filePath):
         print '.',
    columbWidth = 9
    headColNum = 2
    fileHead = readLFile(filePath, 2)
     if fileHead[0].strip() == 'Schema:': del fileHead[0]  #removes the unessesary first line of the file, if present
    schema = fileHead[0].strip().split('\t')
    if schema[1] == 'Reference' and len(schema) % columbWidth == headColNum:
        return parseQualAlignment(filePath)
    elif len(schema) == 8:
         return parseQualStandard(filePath)
    else: errorExit('parseQual: The quality file at %s has an unreconized format based on the schema on line 2.' % filePath)


    ###Reference Extraction##################################################################################################
def listToStr(aList):
    schema = string.split(qualRaw[1])
     out = ''
     if len(schema) > 0 and schema[1] == 'Reference':
    for char in aList: out += char
        SQaligned = True
     return out
        reference = []
        for qual in qualData[0][1]:
            reference.append(qual[1])
     #########################################################################################################################


     if verbose:
def saveFasta(filePath, headers, sequences):
         print '.',
    outHandle = open(filePath, 'w')
     if len(headers) != 0 and len(sequences) != 0:
         if len(headers) != len(sequences): errorExit('saveFastaFile: different number of headers and sequences')
        if type(headers) == str and type(sequences) == str:
            headers = [headers]
            sequences = [sequences]
        if type(sequences[0]) == list: sequences = [listToStr(seq) for seq in sequences]
        for index in range(0,len(headers)):
            outHandle.write('>%s\n' % headers[index])
            if type(sequences[index]) == str: sequences[index] = [sequences[index]]  #if there is only one line of sequence for a given header, make it a one-item list for the next loop
            for seq in sequences[index]:
                outHandle.write(seq + '\n')
    outHandle.close()


     ###Contig Extraction####################################################################################################
def parseFasta(filePath):
     contigs = [] #will hold a list of contig names and their sequences (ex: [['name1',['A','C','G']],['name2',['G','C','A']],...])
     headers = []
     def addBase(cntgName,baseQuals):
     seqs = []
        for cntgIndex in range(0,len(contigs)):
    fileHandle = open(filePath, 'r')
            if contigs[cntgIndex][0] == cntgName:
    fileData = fileHandle.readlines() + ['>']   #the additional header line is there make to following for loop process the last sequence
                contigs[cntgIndex][1].append(baseQuals)
    fileHandle.close()
                return
     currSeq = ''
        if SQaligned:   
    for line in fileData:
             contigs.append([cntgName,[baseQuals],refIndex])
        if line[0] == '>':   #if the line is a fasta header
            headers.append(line[1:].strip() )  #the fisrt character is the '>' and the last is a newline '\n'
             seqs.append(currSeq)
            currSeq = ''
         else:
         else:
             contigs.append([cntgName,[baseQuals]])
             currSeq += line.strip()
       
    return (headers[:-1], seqs[1:])
     if SQaligned:
 
        qualNum = 9
def removeRepeats(aList):
        refColNum = 3
     for entry in aList:
     else:
        if aList.count(entry) > 1:
         qualNum = 7
            aList.remove(entry)
         refColNum = 0
            return removeRepeats(aList)
     for cntgIndex in range(0,len(qualData)): #for every contig...
    return aList
         for baseIndex in range(0,len(qualData[cntgIndex][1])): #for every base of every contig
 
            baseDepth = (len(qualData[cntgIndex][1][baseIndex]) - refColNum + 1) / qualNum #the number of distinct bases (quality value data) per index
def maskingCallback(option, opt_str, value, parser):
            for depth in range(0,baseDepth): #for every quality value for every base in every contig
    value = []
                startIndex = (depth * qualNum) + refColNum + 1 #the index of the character value for that quality value
     def floatable(str):
                endIndex = startIndex + 5
         try:
                if SQaligned:
            float(str)
                    name = qualData[cntgIndex][1][baseIndex][startIndex - 2]
            return True
                    refIndex = qualData[cntgIndex][1][baseIndex][0]
         except ValueError: return False
                else:
     for arg in parser.rargs:       
                    name = qualData[cntgIndex][0]
        if arg[:2] == "--" and len(arg) > 2: break  # stop on --foo like options           
                quals = qualData[cntgIndex][1][baseIndex][startIndex:endIndex + 2]
        if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break  # stop on -a, but not on -3 or -3.0
                addBase(name,quals) #add the character value to the list
        value.append(arg)
     #########################################################################################################################
         if len(value) == 2: break
    if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest)
    del parser.rargs[:len(value)]
    if len(value) == 1: value.append(0)
    value[0] = int(value[0])
    value[1] = float(value[1])
    if value[0] < 0: errorExit("The first argument of the option '%s' must be a positive integer; %d supplied." % (option.dest,value[0]))
    if value[1] < 0 or value[1] > 1: errorExit("The second argument of the option '%s' must be a decimal between 0 and 1; %f supplied." % (option.dest,value[1]))
    setattr(parser.values, option.dest, value)
 
###Command Line Parser#######################################################################################################
cmndLineParser  = OptionParser(usage=progUsage, version="Version %s" % progVersion)
maksingGroup = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.')
cmndLineParser.add_option(  "-c",  "--include-contigs",    action="store_true",    default=False,                                                            help="Include each contig on its own line. (Default: only output the consensus)")
maksingGroup.add_option(  "-m",  "--mask-contigs",      action="callback",      default=[0,0],      metavar='INT FLOAT',  callback=maskingCallback,    dest='mask_contigs',      help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
maksingGroup.add_option(   "-s",  "--mask-contig-SNPs",  action="callback",      default=[0,0],      metavar='INT FLOAT',  callback=maskingCallback,    dest='mask_contig_SNPs',  help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
maksingGroup.add_option(  "-M",  "--mask-consensus",    action="callback",      default=[0,0],      metavar='INT FLOAT',  callback=maskingCallback,    dest='mask_consensus',    help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
maksingGroup.add_option(  "-S",  "--mask-SNPs",          action="callback",      default=[0,0],      metavar='INT FLOAT',  callback=maskingCallback,    dest='mask_SNPs',        help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
cmndLineParser.add_option_group(maksingGroup)
cmndLineParser.add_option(  "-t",  "--end-trim",          action="store",        default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs by a specified number of bases. (Default: 0)")
cmndLineParser.add_option(   "-q",  "--end-trim-qual",      action="store",        default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount. This option is implemented after standard end trimming (see -t option). (Default: 0)")
cmndLineParser.add_option(  "-l",  "--min-match-length",  action="store",        default=0,          type="int",    metavar="INT",      help="Only include contigs that have a least one match longer or as long as the specified amount. This is implemented after any end trimming (see -q and -t options). (Default: 0)")
cmndLineParser.add_option(  "-n",  "--no-overlap",         action="store_true",    default=False,                                        help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
cmndLineParser.add_option(  "-a",  "--no-match-overlap",  action="store_true",    default=False,                                          help="Add deletions to the reference to accommodate overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
cmndLineParser.add_option(  "-k",  "--keep-contained",    action="store_true",    default=False,                                        help="Include contained contigs in the alignment and consensus. (Default: save sequences of contained contigs to a separate file)")
cmndLineParser.add_option(  "-p",  "--save-SNPs",          action="store_true",    default=False,                                        help="Save SNPs to a .qual file. (Default: Do not save SNP file)")
cmndLineParser.add_option(  "-d",  "--dont-align-contigs", action="store_true",    default=False,                                        help="Do not align contigs to the reference. Requires the use of the -c modifier. (Default: align contigs to the reference)")
cmndLineParser.add_option(  "-r",  "--external-ref",      action="store",        default=None,      type="string", metavar="PATH",    help="Specify a reference to use instead of the one contained in the quality file.")
cmndLineParser.add_option(  "-v",  "--verbose",            action="store_true",    default=False,                                        help="Print progress updates and relevant statistics to the standard output. (Default: run silently)")
cmndLineParser.add_option(  "-i",  "--save-run-info",      action="store_true",    default=False,    help="Save a txt file of anything printed to the screen. Requires the use of -v/--verbose. (Default: Do not save)")
(options, args) = cmndLineParser.parse_args(commandLine)
if options.include_contigs == False:
    if sum(options.mask_contigs) > 1: errorExit('The option -m/--mask-contigs requires the use of the option -c/--include-contigs.')
    if sum(options.mask_contig_SNPs) > 1: errorExit('The option -s/--mask-contig-SNPs requires the use of the option -c/--include-contigs.')
    if options.dont_align_contigs: errorExit('The option -d/--dont-align-contigs requires the use of the option -c/--include-contigs.')
if len(args) == 1:
    cmndLineParser.print_help()
    sys.exit(0)
argNum = len(args) - 1  #counts the amount of arguments, negating the 'qualtofa' at the start of the command line
if argNum != 1: errorExit('qualtofa takes exactly 1 argument; %d supplied' % argNum, 0)
qualPath = args[1]   #The file path to the input file supplied by the user
#############################################################################################################################
 
def verbose(toPrint):
    if options.verbose:
        print toPrint,
        printLog.append(toPrint)
 
###Quality file parsing######################################################################################################
verbose('qualtofa: Loading data from %s...' % qualPath)
qualData = parseQual(qualPath)
verbose('Complete\n')
if len(qualData) == 4:  #if the quality file contains a list of unaligned contigs (e.g. one directly from yasra)
    isAlignment = False
    if options.no_match_overlap: errorExit('The option -a/--no-match-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_consensus): errorExit('The option -M/--mask-consensus requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_SNPs): errorExit('The option -S/--mask-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_contig_SNPs): errorExit('The option -s/--mask-contig-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.keep_contained: errorExit('The option -k/--keep-contained requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.min_match_length: errorExit('The option -l/--min-match-length requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.save_SNPs: errorExit('The option -p/--save-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.no_overlap: errorExit('The option -n/--no-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.external_ref: errorExit('The option -r/--external-ref requires an aligned quality consensous file, like that given by sumqual.py.')
    contigs, qualities, headers, schema = qualData
elif len(qualData) == 7:  #if the quality file contains an alignment (i.e. from sumqual)
    isAlignment = True
     contigs, qualities, headers, alignmentIndexs, reference, refHeader, schema = qualData
else: errorExit()
#############################################################################################################################


    ###Conitg Match Index addition#########################################################################################
###External reference parsing(-r option implimintation)######################################################################
    if SQaligned:
if options.external_ref is not None:
        rejects = []
    verbose('qualtofa: option -r/--external-ref: loading reference at %s...' % options.external_ref)
        for cntgIndex in range(0,len(contigs)):
    (seqs,headers) = parseFasta(options.external_ref)
            inMatch = False
    reference, refHeader = seqs[0], headers[0]
            cntgEnd = 0
    verbose('Complete\n')
            cntgStart = 0
#############################################################################################################################
            for baseIndex in range(0,len(contigs[cntgIndex][1])):
                if contigs[cntgIndex][1][baseIndex][0].isupper() and inMatch == False:
                    cntgStart = baseIndex + 1
                    inMatch = True
                elif contigs[cntgIndex][1][baseIndex][0].islower() and inMatch == True:
                    cntgEnd = baseIndex
                    break
            if cntgEnd == 0:
                cntgEnd = len(contigs[cntgIndex][1])
            refStart = int(contigs[cntgIndex][2]) + cntgStart - 1
            refEnd = int(contigs[cntgIndex][2]) + cntgEnd - 1
            del contigs[cntgIndex][2]
            if refEnd - refStart < minMatchLen:
                rejects.append(cntgIndex)
            else:
                contigs[cntgIndex].append([refStart,refEnd,cntgStart,cntgEnd])
        rejects.reverse()
        for index in rejects:
            del contigs[index]
        if verbose:
            print 'Complete'
            print '    ' + str(len(contigs)) + 'contigs will be included in the anaylsis.'
            if len(rejects) == 1:
                print '    1 contig was not included due to insufficient sequence length ( < ' + str(minMatchLen) + ' ).'
            elif len(rejects) > 1:
                print '    ' + str(len(rejects)) + ' contigs were not included due to insufficient sequence length ( < ' + str(minMatchLen) + ' ).'


       
def delContig(index):
    #########################################################################################################################
    del contigs[index]
    del headers[index]
    ###Contig end trimming###################################################################################################
    del qualities[index]
    def trim(cntgIndex,startT,endT):
     if isAlignment:
        if startT + endT + minMatchLen >= (contigs[cntgIndex][2][1] - contigs[cntgIndex][2][0] + 1):
        del alignmentIndexs[index]
            del contigs[cntgIndex]
        if cntgMatchRanges is not None: del cntgMatchRanges[index]
            return 1
        else:
            unMatchedStart = contigs[cntgIndex][2][2] - 1 #the number of bases before the match
            unMatchedEnd  = len(contigs[cntgIndex][1]) - contigs[cntgIndex][2][3] #the number of bases after the match
            matchedStart  = startT  - unMatchedStart
            matchedEnd     = endT - unMatchedEnd
            if matchedStart > 0: #if the unmatched sequence is too small to accomodate the whole trim length
                contigs[cntgIndex][2][0] += matchedStart
                contigs[cntgIndex][2][2] = 1
                contigs[cntgIndex][2][3] -= startT #+ matchedStart
            else: #if the entire trim length can be taken out of the non-matching sequence
                contigs[cntgIndex][2][2] -= startT
                contigs[cntgIndex][2][3] -= startT               
            if matchedEnd > 0:
                contigs[cntgIndex][2][1] -= matchedEnd
                contigs[cntgIndex][2][3] -= matchedEnd
            del contigs[cntgIndex][1][:startT]
            if endT > 0:
                del contigs[cntgIndex][1][-endT:]
        return 0


     if trimNum > 0: #if -t modifier is supplied
def delContigs(indexes):
         if verbose:
     if len(indexes) > 0:  
            print '-t Modifier: Contig ends are being trimed by ' + str(trimNum) + '...',
         indexes.sort()
        delCount = 0
         indexes.reverse()
        for cntgIndex in range(-len(contigs) + 1,1):
         indexes = removeRepeats(indexes)
            cntgIndex *= -1
         for index in indexes: delContig(index)   #deletes all contigs
            if trim(cntgIndex,trimNum,trimNum) == 1:
                delCount += 1
         if verbose:
            print 'Complete'
            if delCount == 1:
                print '    1 contig was removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
            elif delCount > 1:
                print '    ' + str(delCount) + ' contigs were removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
    if trimQual > 0: #if -v modifier is supplied
         delCount = 0
        if verbose:
            print '-v Modifier: Contig ends are being trimed until a base with a quality value of at least ' + str(trimQual) + ' is encountered...',
         for cntgIndex in range(-len(contigs) + 1,1):
            cntgIndex *= -1
            startTrim = 0
            endTrim = 0
            for baseIndex in range(0,len(contigs[cntgIndex][1])):
                if int(contigs[cntgIndex][1][baseIndex][-1]) < trimQual:
                    startTrim += 1
                else:
                    break
            for baseIndex in range(-len(contigs[cntgIndex][1]) + 1,1): #loops backwards
                baseIndex *= -1
                if int(contigs[cntgIndex][1][baseIndex][-1]) < trimQual:
                    endTrim += 1
                else:
                    break
            if trim(cntgIndex,startTrim,endTrim) == 1:
                delCount += 1
        if verbose:
            print 'Complete'
            if delCount == 1:
                print '    1 contig was removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
            elif delCount > 1:
                print '    ' + str(delCount) + ' contigs were removed due to insufficient sequence length ( < ' + str(minMatchLen) + ' ) after trimming.'
    #########################################################################################################################


    ###Non-matching contig end triming#######################################################################################
def delBase(cntgIndex, baseIndex):
    if matchsOnly:
    del qualities[cntgIndex][baseIndex]
        if verbose:
    del contigs[cntgIndex][baseIndex]
            print '-m Modifier: Removing non-matching sequence...',
    if isAlignment:
        if SQaligned:
        if baseIndex == 0: alignmentIndexs[cntgIndex][0] += 1
            for cntgIndex in range(0,len(contigs)):
        else: alignmentIndexs[cntgIndex][1] -= 1  
                del contigs[cntgIndex][1][contigs[cntgIndex][2][3]:]
                del contigs[cntgIndex][1][:contigs[cntgIndex][2][2] - 1]
                contigs[cntgIndex][2][3] -= contigs[cntgIndex][2][2] - 1
                contigs[cntgIndex][2][2] = 1
        else:
            debugLog.append('Error: the -m modifier must be used with an aligned quality file from sumqual.')
            errorExit() #exit the script
        if verbose:
            print 'Complete'
    #########################################################################################################################


    ###Contained contig extraction###########################################################################################
def delBases(cntgIndex, baseIndexes):
    contCntgSeqs = []
    baseIndexes.sort()
    if containedCngts == False and includeCntgs:
    baseIndexes.reverse()
        if verbose:
    for index in baseIndexes: delBase(cntgIndex, index)
            print 'Removing contianed contigs from the anaylsis...',
        contained = []
        contCntgs = []
        for cntgIndex in range(0,len(contigs)):
            for compIndex in range(0,len(contigs)):
                if contigs[compIndex][2][0] >= contigs[cntgIndex][2][0] and contigs[compIndex][2][1] <= contigs[cntgIndex][2][1] and compIndex != cntgIndex:
                    contained.append(compIndex)
        contained.reverse()
        for index in contained:
            contCntgs.append(contigs[index])
            del contigs[index]
        for cntg in contCntgs:
            contCntgSeqs.append('>' + cntg[0])
            outStr = ''
            for qual in cntg[1]:
                outStr += str(qual[0])
            contCntgSeqs.append(outStr)
        if verbose:
            print 'Complete'
            if len(contained) == 0:
                print '    No contained contigs found.'
            elif len(contained) == 1:
                print '    1 contained contig found.'
            else:
                print '    ' + str(len(contained)) + ' contained contigs found.'
    elif verbose:
        print '-a Modifier: Contained contigs will be included in the anaylsis.'
    #########################################################################################################################


     sortIndex = 0
###Contig end Trimming (-q and -t option implimentation)#####################################################################
     def sortComp(cntgA,cntgB):
if options.end_trim > 0:
        cntgAStart = cntgA[2][0] - cntgA[2][2]
     verbose('qualtofa: option -t/--end-trim: Trimming %d bases from both ends of each contig...' % options.end_trim)
        cntgBStart = cntgB[2][0] - cntgB[2][2]
    rejectIndexes = [index for index in range(0,len(contigs)) if len(contigs[index]) <= options.end_trim * 2]  #makes a list of all the indexs of contigs that are too short to be trimmed.
        if cntgAStart > cntgBStart:
     delContigs(rejectIndexes)   #deletes all rejected contigs
            return 1
    contigs = [cntg[options.end_trim : -options.end_trim] for cntg in contigs]  #trims all remaining contigs
         elif cntgAStart == cntgBStart:
    qualities = [qual[options.end_trim : -options.end_trim] for qual in qualities]   #trims all remaining contigs
            return 0
    alignmentIndexs = [[indexRange[0] + options.end_trim,indexRange[1] - options.end_trim] for indexRange in alignmentIndexs]
        else:
    if options.verbose:
            return -1
        verbose('Complete\n')
         if len(rejectIndexes) > 0: verbose('option -t/--end-trim: %d contig(s) were removed from the analysis because they were shorter than twice the trim length of %d.\n' % (len(rejectIndexes),options.end_trim))


    #########################################################################################################################
if options.end_trim_qual > 0:
    #>Funtion used by the IUPAC function to remove repeated characters in list                                              #
     verbose('qualtofa: option -q/--end-trim-qual: Trimming contig ends until a base with a coverage of %d or more is reached...' % options.end_trim_qual)
     #>ex: ['A','C','C'] becomes ['A','C']                                                                                  #
     for index in range(0, len(contigs)):
     def removeRepeats(charList):
         while len(qualities[index]) > 0 and qualities[index][0][-1] < options.end_trim_qual: delBase(index,0)
         for charIndex in range(0,len(charList)):
        while len(qualities[index]) > 0 and qualities[index][-1][-1] < options.end_trim_qual: delBase(index,-1)
            for index in range(0,len(charList)):
    rejectIndexes = [index for index in range(0,len(qualities)) if len(qualities[index]) == 0] #makes a list of all the indexs of contigs that dont contain sequence after trimming
                if charIndex != index and charList[charIndex] == charList[index]:
    delContigs(rejectIndexes)   #deletes all rejected contigs
                    if charList[charIndex].islower():
    if options.verbose:
                        del charList[charIndex]
        verbose('Complete\n')
                    else:
         if len(rejectIndexes) > 0: verbose('option -q/--end-trim-qual: %d contig(s) removed from the analysis because all of their sequence had quality values less than %d.\n' % (len(rejectIndexes),options.end_trim_qual))
                        del charList[index]                       
#############################################################################################################################
                    return removeRepeats(charList)
         return charList
    #########################################################################################################################


    #########################################################################################################################
###Contigs Match Coordinate Extraction (-l option implimentation)############################################################
    #>Function that takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code            #
if isAlignment:
    #>Assumes only input characters to be ACTG-~N                                                                          #
     cntgMatchRanges = [mkRange([index for index in range(0,len(cntg)) if cntg[index].isupper()]) for cntg in contigs]
    #>Ex: ['A','C'] becomes 'M'                                                                                            #
    if options.min_match_length > 0:
     def getIUPAC(baseValues):
         verbose('qualtofa: option -l/--min-match-length: filtering contigs without a match of at least %d base pairs...' % options.min_match_length)
        returnChar = ''
        maxMatchLens = [max([match[1] - match[0] + 1 for match in matchList] + [0]) for matchList in cntgMatchRanges]
        x = removeRepeats(baseValues)
        rejectIndexes = [index for index in range(0,len(contigs)) if maxMatchLens[index] < options.min_match_length]
        if len(x) == 0:
         delContigs(rejectIndexes)
            return '-'
         if options.verbose:
        elif len(x) == 1:
             verbose('Complete\n')
            return x[0]
             if len(rejectIndexes) > 0: verbose('option -l/--min-match-length: %d contig(s) removed from the analysis due to the lack of a match %d base pairs long.\n' % (len(rejectIndexes),options.min_match_length))
        for index in range(0,len(x)):
#############################################################################################################################
            if x[index] == '~':
                x[index] = '-'
        x.sort()
        for index in range(-len(x) + 1, 1):
            index *= -1
            if len(x) > 1 and x[index] == '-':
                del x[index]
         for index in range(-len(x) + 1, 1):
            index *= -1
            if len(x) > 1 and x[index].upper() == 'N':
                del x[index]
        if len(x) > 1:
            returnLower = True
            for char in x:
                if char.isupper():
                    returnLower = False
                    break
            if returnLower == False:
                for index in range(-len(x) + 1, 1):
                    index *= -1
                    if len(x) > 1 and x[index].islower():
                        del x[index]
            else:
                for index in range(0,len(x)):
                    x[index] = x[index].upper()
         if len(x) == 1:
            return x[0]
         elif len(x) == 2:
            if (x[0] == 'C' and x[1] == 'T') or (x[0] == 'T' and x[1] == 'C'):
                returnChar = 'Y'
             elif (x[0] == 'A' and x[1] == 'G') or (x[0] == 'G' and x[1] == 'A'):
                returnChar = 'R'
             elif (x[0] == 'A' and x[1] == 'T') or (x[0] == 'T' and x[1] == 'A'):
                returnChar = 'W'
            elif (x[0] == 'C' and x[1] == 'G') or (x[0] == 'G' and x[1] == 'C'):
                returnChar = 'S'
            elif (x[0] == 'G' and x[1] == 'T') or (x[0] == 'T' and x[1] == 'G'):
                returnChar = 'K'
            elif (x[0] == 'C' and x[1] == 'A') or (x[0] == 'A' and x[1] == 'C'):
                returnChar = 'M'
        elif len(x) == 3:
            if x[0] != 'C' and x[1] != 'C' and x[2] != 'C':
                returnChar = 'D'
            elif x[0] != 'T' and x[1] != 'T' and x[2] != 'T':
                returnChar = 'V'
            elif x[0] != 'G' and x[1] != 'G' and x[2] != 'G':
                returnChar = 'H'
            elif x[0] != 'A' and x[1] != 'A' and x[2] != 'A':
                returnChar = 'B'
        else:
            returnChar = 'N'
        if returnLower:
            returnChar = returnChar.lower()
        return returnChar
    #########################################################################################################################
    SNPquals  = []
    alignedSeq = ''
    maskedSeq  = ''
    contigs.sort(sortComp)


     def getMasked(cntg,baseIndex,refIndex,minBaseVal,minSNPVal,minBaseProp,minSNPProp):
###Contained contig extraction (-k option implimentation)####################################################################
        char = cntg[1][baseIndex][0]
if isAlignment:
        qual = int(cntg[1][baseIndex][-1])
     verbose('qualtofa: Searching for contained contigs...')
        if char != '-':
    matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
            ref = reference[refIndex].upper()
    containedIndexes = [[bIndex for bIndex in range(0,len(matchRanges)) if matchRanges[aIndex][0] <= matchRanges[bIndex][0] and matchRanges[aIndex][1] >= matchRanges[bIndex][1] and aIndex != bIndex] for aIndex in range(0,len(matchRanges))]
            if char.isupper() and char.upper() != ref.upper() and ref != '-': #is SNP
    containedIndexes = [index for contained in containedIndexes for index in contained]
                if qual < minSNPVal:
    containedContigs = [(headers[index], contigs[index]) for index in containedIndexes]
                    char = maskingChar.upper()
    verbose('Complete\n')
                qual = [ref] + [refIndex + 1] + [cntg[0]] + [baseIndex + 1] + cntg[1][baseIndex]
    if options.keep_contained == False:
                SNPquals.append(qual)
        delContigs(containedIndexes)
            elif minBaseVal > 0 and int(cntg[1][baseIndex][-1]) < minBaseVal:
        if len(containedIndexes) > 0: verbose('qualtofa: %d contained contig(s) removed from the analysis.\n' % len(containedIndexes))
                if cntg[1][baseIndex][0].isupper():
    elif len(containedIndexes) > 0: verbose('qualtofa: option -k/--keep-contained: %d contained contig(s) included in the analysis.\n' % len(containedIndexes))
                    char = maskingChar.upper()
#############################################################################################################################
                else:
                    char = maskingChar.lower()
        return char


     ###IUPAC Alignment Generation############################################################################################
def getIUPAC(baseValues):
     if IUPAC:
     '''Takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code. Assumes only input characters to be ACTG-~N'''
         if SQaligned:
     def IUPAC(bases):
            if verbose:
         conversion = {'CT':'Y','AG':'R','AT':'W','CG':'S','GT':'K','AC':'M','AGT':'D','ACG':'V','ACT':'H','CGT':'B','ACGT':'N'}
                print '-i Modifier: Generating IUPAC-format consensous alignment...',
        bases.sort() #['A', 'C', 'G', 'T']
            for refIndex in range(0,len(reference)):
        if bases[0].islower():
                maskedChars = []
            bases = map(str.upper,bases)
                origChars = []
            return str.lower(conversion[''.join(bases)])
                for cntg in contigs:
        else: return conversion[''.join(bases)]
                    start = cntg[2][0] - cntg[2][2]
    chars = removeRepeats(baseValues)
                    end  = cntg[2][1] + (len(cntg[1]) - cntg[2][3]) - 1
    chars.sort()  #['-', 'A', 'C', 'G', 'N', 'T', 'a', 'c', 'g', 'n', 't', '~']
                    if refIndex >= start and refIndex <= end:
    if len(chars) == 0: return '-'
                        baseIndex = refIndex - start
    if len(chars) == 1: return chars[0]
                        maskedChars.append(getMasked(cntg,baseIndex,refIndex,minQualScore,minSNPScore,minBaseProp,minSNPProp))
    if chars[-1] == '~':  #converts '~' to '-'
                        origChars.append(cntg[1][baseIndex][0])
        del chars[-1]
                maskedSeq += getIUPAC(maskedChars)
        if chars[0] != '-': chars.insert(0,'-')
                alignedSeq += getIUPAC(origChars)
    priorityList = ('ACGT','N','-','acgt','n')
            outData.append(['Aligned_Contigs',alignedSeq])
    for group in priorityList:
            if minQualScore != 0 or minSNPScore != 0:
        matchs = [base for base in chars if group.find(base) != -1]
                outData.append(['Masked_Contigs',maskedSeq])
        if len(matchs) == 0: continue
            if verbose:
        elif len(matchs) == 1: return matchs[0]
                print 'Complete'
         else: return IUPAC(matchs)
               
         else:
            debugLog.append('Error: the -i modifier must be used with an aligned quality file from sumqual.')
            errorExit() #exit the script
    #########################################################################################################################


     def contigsOverlap(index1,index2):
###Overlap handleing (-i, -n options implimentation)############################################################################
         cntg = contigs[index1]
if isAlignment:
        comp = contigs[index2]
     if options.no_match_overlap or options.no_overlap:
        compStart = comp[2][0] - comp[2][2]
        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: Adding deletions to the reference to accomodate all overlapping sequence...')
         cntgEnd  = cntg[2][1] + (len(cntg[1]) - cntg[2][3]) - 1
        else: verbose('qualtofa: option -a/--no-match-overlap: Adding deletions to the reference to accomodate overlapping matches...')
         out = cntgEnd - compStart + 1
         matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
         if out >= 0:
         if options.no_overlap: interSeqGaps = [alignmentIndexs[index + 1][0] - alignmentIndexs[index][1] -1 for index in range(0,len(contigs) - 1)]
             return out
        else: interSeqGaps = [matchRanges[index + 1][0] - matchRanges[index][1] - 1 for index in range(0,len(contigs) - 1)]
        else:
        verbose('Complete\n')
             return 0
        gapSum = sum([abs(gap) for gap in interSeqGaps if gap < 0])
         totalLen = sum(map(len,contigs))
        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: %d of %d total bases (%d percent) in the consensus is overlaping sequence.\n' % (gapSum,totalLen,gapSum*100/len(reference)))
        else: verbose('qualtofa: option -a/--no-match-overlap: %d of %d total bases (%.1f%%) in the consensus are overlapping matchs.\n' % (gapSum,totalLen,float(gapSum)*100/len(reference)))
         refOffset = 0
        for index in range(0,len(interSeqGaps)):
             if interSeqGaps[index] < 0:  #if matching sequence overlaps...
                for count in range(0,abs(interSeqGaps[index])): reference.insert(matchRanges[index][1] + refOffset + 1, '-')
                refOffset += abs(interSeqGaps[index])
             alignmentIndexs[index + 1][0] += refOffset
            alignmentIndexs[index + 1][1] += refOffset
##########################################################################################################################################


     def matchsOverlap(index1,index2):
def getProp(call, qual):
         compStart = contigs[index2][2][0]
    equivalancies = {'A':0,'C':1,'G':2,'T':3,'N':4}
        cntgEnd  = contigs[index1][2][1]  
    try: callIndex = equivalancies[call.upper()]
        out = cntgEnd - compStart + 1
    except KeyError: errorExit('getProp: Non-neucleotide base %s encountered...' % call)
        if out >= 0:
    if callIndex == 4:
            return out
        return float(max(qual[:4]))/float(qual[-1])
        else:
    return float(qual[callIndex])/float(qual[-1])   
            return 0
   
###Consensous Creation (-M, -S options implimentation)############################################################################
if isAlignment:
    verbose('qualtofa: Creating consensus...')
    numMasked, numQualMasked, SNPmasked, SNPqualmasked, numPropMasked, SNPpropMasked, SNPcount = (0,0,0,0,0,0,0)
    if sum(options.mask_SNPs + options.mask_consensus) > 0: includeMaskedCon = True
    else: includeMaskedCon = False
    rawConsensous = [[] for index in range(0,len(reference))]
    if includeMaskedCon: maskedRawCon = [[] for index in range(0,len(reference))]
     for cIndex in range(0,len(contigs)):
         for bIndex in range(0,len(contigs[cIndex])):
            rIndex = alignmentIndexs[cIndex][0] + bIndex
            refChar = reference[rIndex]
            cntgChar = contigs[cIndex][bIndex]
            rawConsensous[rIndex].append(cntgChar)
            if includeMaskedCon:
                if cntgChar.isupper() and refChar.isalpha() and refChar != cntgChar:
                    SNPcount += 1
                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0]:
                        cntgChar = 'N'
                        SNPqualmasked += 1
                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
                        cntgChar = 'N'
                        SNPpropMasked += 1
                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
                        SNPmasked += 1
                if contigs[cIndex][bIndex].isalpha():
                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0]:
                        numQualMasked += 1
                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
                        else: cntgChar = 'n'
                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
                        numPropMasked += 1
                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
                        else: cntgChar = 'n'
                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
                        numMasked += 1
                maskedRawCon[rIndex].append(cntgChar)         
    conSequence = [getIUPAC(charList) for charList in rawConsensous]
    if includeMaskedCon: maskedConSeq = [getIUPAC(charList) for charList in maskedRawCon]
    verbose('Complete\n')
    totalLen = len(conSequence)
    if sum(options.mask_SNPs) > 0: verbose('qualtofa: %d of %d total consensus SNPs (%.1f%%) were masked.\n' % (SNPmasked,SNPcount, float(SNPmasked*100)/SNPcount))
    if options.mask_SNPs[0] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPqualmasked,SNPcount, float(SNPqualmasked*100)/SNPcount, options.mask_SNPs[0]))
    if options.mask_SNPs[1] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropMasked,SNPcount, float(SNPpropMasked*100)/SNPcount, options.mask_SNPs[1]))
    if options.mask_consensus[0] > 0: verbose('qualtofa: %d of %d total consensus bases (%.1f%% )were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))       
    if options.mask_consensus[0] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_consensus[0]))       
    if options.mask_consensus[1] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_consensus[1]))       
#############################################################################################################################


    ###Spaced alignment Generation###########################################################################################
       
    if IUPAC == False and SQaligned:
###Contigs SNP Detection/Masking(-s option implimentation)###################################################################
        if verbose:
if isAlignment:
            print 'Generating consensous alignment...',
    SNPcovNum, SNPpropNum, SNPmaskedNum, SNPcount = (0,0,0,0)
        start = contigs[0][2][0] - contigs[0][2][2]
    SNPs = []
        if start > 1:
    for cIndex in range(0,len(contigs)):
            for count in range(0,start):
         for bIndex in range(0,len(contigs[cIndex])):
                alignedSeq += '-'
             refPos = bIndex + alignmentIndexs[cIndex][0]
                maskedSeq += '-'
             refChar = reference[refPos]
        refDels = []
             cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
         for qualIndex in range(0,len(contigs[0][1])):
            if cntgChar.isupper() and refChar.isalpha() and cntgChar.isalpha() and refChar != cntgChar:
             alignedSeq += contigs[0][1][qualIndex][0]
                SNPcount += 1
             refIndex = contigs[0][2][0] - contigs[0][2][2] + qualIndex
                SNPs.append([refPos,refChar,headers[cIndex],bIndex,cntgChar] + qualities[cIndex][bIndex])
             maskedSeq += getMasked(contigs[0],qualIndex,refIndex,minQualScore,minSNPScore)
                 if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0]:
        for cntgIndex in range(0,len(contigs) - 1):
                    contigs[cIndex][bIndex] = 'N'
            compIndex = cntgIndex + 1
                    SNPcovNum += 1
            matchNum = matchsOverlap(cntgIndex,compIndex) #the amount of bases that the matching regions of the adjacent conigs overlap
                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
            cntgNum = contigsOverlap(cntgIndex,compIndex)
                    contigs[cIndex][bIndex] = 'N'
            delNum = cntgNum  #= (contig length - end of ref match) + match overlap size
                    SNPpropNum += 1
            refInsert = int(contigs[cntgIndex][2][1]) #the index at which the deletion will be inserted
                if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
            for count in range(0,delNum):
                    SNPmaskedNum += 1
                 reference.insert(refInsert, '-')
    totalLen = sum(map(len,contigs))
            for cIndex in range(compIndex,len(contigs)):
    if sum(options.mask_contig_SNPs) > 0: verbose('qualtofa: %d of %d total SNPs (%.1f%%) were masked\n' % (SNPmaskedNum,SNPcount,float(SNPmaskedNum*100)/SNPcount))  
                contigs[cIndex][2][0] += delNum #the contig with the larger starting match index must be moved up
    if options.mask_contig_SNPs[0] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPcovNum,SNPcount,float(SNPcovNum*100)/SNPcount,options.mask_contig_SNPs[0]))  
                contigs[cIndex][2][1] += delNum
    if options.mask_contig_SNPs[1] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropNum,SNPcount,float(SNPpropNum*100)/SNPcount,options.mask_contig_SNPs[1]))   
            cntgEnd = contigs[cntgIndex][2][1] + (len(contigs[cntgIndex][1]) - contigs[cntgIndex][2][3]) - 1
#############################################################################################################################
            compStart = contigs[compIndex][2][0] - contigs[compIndex][2][2]
            if cntgEnd + 1 < compStart:
                count = compStart - cntgEnd - 1
                for x in range(0,count):
                    alignedSeq += '-'
                    maskedSeq += '-'
            for qualIndex in range(0,len(contigs[compIndex][1])):
                alignedSeq += contigs[compIndex][1][qualIndex][0]
                refIndex = contigs[compIndex][2][0] - contigs[compIndex][2][2] + qualIndex
                maskedSeq += getMasked(contigs[compIndex],qualIndex,refIndex,minQualScore,minSNPScore)
        outData.append(['Aligned_Contigs',alignedSeq])
        if minQualScore != 0 or minSNPScore != 0:
            outData.append(['Masked_Contigs',maskedSeq])
        if verbose:
            print 'Complete'


     #########################################################################################################################
###Contigs Masking(-m option implimentation)#################################################################################
if sum(options.mask_contigs) > 0:
     numMasked, numQualMasked, numPropMasked = (0,0,0)
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
            if cntgChar.isalpha():
                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
                    numMasked += 1
                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0]:
                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
                    else: contigs[cIndex][bIndex] = 'n'
                    numQualMasked += 1
                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
                    else: contigs[cIndex][bIndex] = 'n'
                    numPropMasked += 1           
    totalLen = sum(map(len,contigs))
    verbose('qualtofa: %d bases of %d total bases (%.1f%%) were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))
    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_contigs[0])) 
    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_contigs[1]))     
#############################################################################################################################


    ###Contig Addition#######################################################################################################
###Contianed Contigs file saving#############################################################################################
    if includeCntgs:
if len(containedContigs) > 0:
        if verbose:
    outPath = qualPath + '_contained_contigs.fsa'
            print '-c Modifier: Extracting/Aligning Contigs...',
    verbose('qualtofa: Saving contained contigs to %s...' % outPath)
        for cntg in contigs:
    saveFasta(outPath, *zip(*containedContigs))
            cntgSeq = ''
    verbose('Complete\n')
            cntgStart = cntg[2][0] - cntg[2][2] + 1
#############################################################################################################################
            if SQaligned and alignCntgs:
                for count in range(1,cntgStart):
                    cntgSeq += '-'
            qualIndex = 0
            for qual in cntg[1]:
                refIndex = cntg[2][0] - cntg[2][2] + qualIndex
                cntgSeq += getMasked(cntg,qualIndex,refIndex,cntgQualScore,cntgSNPScore)
                qualIndex += 1
            outData.append([cntg[0],cntgSeq])
        if verbose:
            print 'Complete'
    #########################################################################################################################
    refStr = ''
    for char in reference:
        refStr += char
    outData.insert(0,['Reference',refStr])


    ###Pointless spacer removal##############################################################################################
###SNP file saving###########################################################################################################
    if verbose:
if options.save_SNPs:
        print 'Pointless index detection and removal...',
     outPath = qualPath + '_SNPs.qual'
     delList = [] #will contain the indices at which all of the sequences have '-' and must be deleted
     verbose('qualtofa: Saving %d SNPs to %s...' % (len(SNPs),outPath))
     for refIndex in range(0,len(outData[0][1])): #loops through each index in terms of the reference
    outHandle = open(outPath, 'w')
        pointlessSpace = True #each indice is asumed to be a space
    outHandle.write('Schema:\nPosition\tReference\tName\tIndex\tcall\tA\tC\tG\tT\tdash\tsum\n')
        for cntgIndex in range(0,len(outData)): #loops through all the sequences that are being checked for spaces
    for SNP in SNPs: outHandle.write('\t'.join(map(str,SNP)) + '\n')
            seqLen = len(outData[cntgIndex][1])
    outHandle.close()
            if refIndex < seqLen: #if the index being tested is present on this contig...  
     verbose('Complete\n')
                bp = outData[cntgIndex][1][refIndex]
#############################################################################################################################
                if bp != '-': #if the base is not a space...
                    pointlessSpace = False
        if pointlessSpace:
            delList.append(refIndex)
    if len(delList) > 0:
        for deletion in delList:
            for index in range(0,len(SNPquals)):
                if deletion < SNPquals[index][1]:
                    SNPquals[index][1] -= 1
        delList = mkRange(delList) #makes a list of indices into a list of index ranges
        delList.reverse() #reverses the order of the list to preserve the accuracy of the index values in delList as they are deleted from faData
        for theRange in delList: #for every range specifed by delList...
            for cntgIndex in range(0,len(outData)): #for every contig is faData...
                outData[cntgIndex][1] = outData[cntgIndex][1][:theRange[0]] + outData[cntgIndex][1][theRange[1] + 1:] #remove the section determined to be spaces
     if verbose:
        print 'Complete'
    #########################################################################################################################


    ###Out file writing and saveing procedures###############################################################################
###Contig sequence alignment#################################################################################################
    fileSavePath = savePath + os.path.basename(qualPath) + '.fsa' #the path to where the output is saved
if isAlignment and options.dont_align_contigs == False:
    if verbose:
    contigs = [['-'] * alignmentIndexs[index][0] + contigs[index] for index in range(0,len(contigs))]
        print 'Generating output files...',
#############################################################################################################################
    outHandle = open(fileSavePath, 'w') #opens the file object for saving the output
    for seq in outData: #prints every line of outSeqs to the output file...
        outHandle.write('>' + seq[0] + '\n')
        outHandle.write(seq[1] + '\n')
    if printOut: #if the -p modifier is supplied...
        for seq in outData: #prints every line of outSeqs to the screen...
            print seq[0]
            print seq[1]
    if len(contCntgSeqs) > 0:
        contCntgSavePath = savePath + os.path.basename(qualPath) + '_ContatiedContigs.fsa' #the path to where contained contigs are saved is saved
        CCHandle = open(contCntgSavePath, 'w')
        for line in contCntgSeqs:
            CCHandle.write(line + '\n')
        CCHandle.close()
    outHandle.close() #closes file object
    if SNPs:
        snpSavePath = savePath + os.path.basename(qualPath) + '_SNPs.qual' #the path to where SNPs are saved
        snpHandle = open(snpSavePath, 'w')
        for qual in SNPquals:
            line = ''
            for entry in qual:
                line += str(entry) + '\t'
            line = line[:-1]
            snpHandle.write(line + '\n')
        snpHandle.close()       
    if verbose:
        print 'Complete'
        print '    ' + str(len(outData)) + ' sequences saved to ' + fileSavePath + '.'
        if SNPs:
            print '    ' + str(len(SNPquals)) + ' SNPs saved to ' + snpSavePath + '.'
        if len(contCntgSeqs) > 0:
            if len(contained) == 1:
                print '    1 contained contig sequence saved to ' + contCntgSavePath + '.'
            elif len(contained) > 1:
                print '    ' + str(len(contained)) + ' contained contig sequences saved to ' + contCntgSavePath + '.'
    #########################################################################################################################


    ###Debug Saving procedures###############################################################################################
###Output file Saving########################################################################################################
     if saveDebug: #if the -d modifier is supplied...
verbose('qualtofa: Saving fasta output to %s...' % (qualPath + '.fsa'))
        debugHandle = open(savePath + os.path.basename(qualPath) + '_debug.txt', 'w')
conHeader = 'Consensus'
        for line in debugLog:
maskedConHeader = 'Masked Consensus'   
            debugHandle.write(line)
if isAlignment:
        debugHandle.close()
    outputHeads, outputSeqs = ([refHeader,conHeader],[reference,conSequence])
     #########################################################################################################################  
     if includeMaskedCon:
        outputHeads.append(maskedConHeader)
        outputSeqs.append(maskedConSeq)
else: outputHeads, outputSeqs = ([],[])
if options.include_contigs:
    outputHeads += headers
    outputSeqs += contigs
outPath = os.path.join(os.getcwd(), os.path.basename(qualPath) + '.fsa')
saveFasta(outPath, outputHeads, outputSeqs)
verbose('Complete\n')
if options.save_run_info:
    runtimeOutPath = os.path.join(os.getcwd(), 'qualtofa_runtime_output_%s.txt' % timeStamp)
    verbose('qualtofa: option -i/--save-run-info: saving run-time output to %s...' % runtimeOutPath)
    runHandle = open(runtimeOutPath, 'w')
    for line in printLog: runHandle.write(line)
    runHandle.close()
     verbose('Complete\n')
#############################################################################################################################
</pre>
</pre>

Latest revision as of 12:11, 8 September 2010

Copy and past the following into a text editor and save it as qualtofa.py to use the script.

#!/usr/bin/env python

###Imports / Variable Initilization##########################################################################################
import os, string, sys, time, copy
from datetime import *
from optparse import *
from subprocess import *
commandLine = copy.deepcopy(sys.argv)
cwd = os.getcwd()
timeStamp = datetime.now().ctime().replace(' ','-').replace(':','-').replace('--','-')  #Ex: 'Mon-Jun-14-11-08-55-2010'
cntgMatchRanges = None
progName, progVersion = ('qualtofa.py','2.52')
progUsage = 'python %s [options] <.qual file>' % progName
printLog = []
#############################################################################################################################

def errorExit(message=None,exitStatus=1):
    '''Version 1.0
    Is called when the script encounters a fatal error. Prints the debug log to standard out (usually the screen)'''
    
    if message:
        print '%s: Error: %s' % (progName, message)
    else:
        print progName +': Unknown error, printing debug log...'
        for line in debugLog: print line   #print debug log
    sys.exit(exitStatus)

def mkRange(numList):
    '''Function designed to make a list of ranges (e.g. 2-5, 8-10) out of a list of integers (e.g. 2,3,4,5,8,9,10)'''
    outList = []
    if len(numList) > 0:
        start   = numList[0]
        current = numList[0]
        numList.append(numList[-1])
        for num in numList[1:]:
            if (num - 1) != current:
                outList.append([start,current])
                start = num
            current = num
    return outList

def readLFile(filePath, numLinesToRead = None):
    fileHandle = open(filePath, 'r')
    if numLinesToRead is None:
        fileData = fileHandle.readlines()
    else:
        fileData = [fileHandle.readline() for count in range(0,numLinesToRead)]
    fileHandle.close()
    return fileData

def writeLFile(filePath, fileData):
    fileHandle = open(filePath, 'w')
    for line in fileData:
        fileHandle.write(fileData)
    fileHandle.close()

def saveQualStandard(filePath, contigs, qualValues, headers, schema=None):
    if schema is None: schema = ['Position in the contig(1 based)', 'Called base', "Number of reads suppoting 'A'", "Number of reads supporting 'C'", "Number of reads supporting 'G'", "Number of reads supporting 'T'", "Number of reads supporting '_'", 'Coverage']
    if len(contigs) == 0: errorExit('saveQualStandard: empty list supplied')
    if len(contigs) != len(qualValues) or len(contigs) != len(headers): errorExit('saveQualStandard: Unequal number of entries is input list')
    outHandle = open(filePath, 'w')
    outHandle.write('Schema:'+ '\n' + '\t'.join(schema) + '\n')
    for cIndex in range(0,len(contigs)):
        outHandle.write(headers[cIndex] + '\n')
        for bIndex in range(0,len(contigs[cIndex])): outHandle.write('\t'.join([contigs[cIdnex][bIndex]] + qualValues[cIndex]) + '\n')
    outHandle.close()
            
def parseQualStandard(filePath):
    columbWidth = 8
    fileData = [line.strip().split('\t') for line in readLFile(filePath)]   #fileData is a list of lists representing the tab-deliminated structure of the .qual file      
    if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
    schema = fileData[0]   #extract the tab-deliminated schema into a list
    headers, contigs, qualValues = ([], [], [])   #initilizes multiple lists at a time
    index = 0
    for line in fileData[1:]:
        if len(line) == 1:   #if it is a header
            headers.append(line[0])
            contigs.append([])
            qualValues.append([])
        elif len(line) == columbWidth:   #if it is  quality value
            contigs[-1].append(line[1])
            qualValues[-1].append([int(num) for num in line[2:]])   #converts all of the quality values into integer objects and saves them in qualvalues
        else: errorExit('parseQualStandard: Quality file at %s deviated from the expected format on line %d.' % (filePath, index))
        index += 1        
    return (contigs, qualValues, headers, schema)

def parseQualAlignment(filePath):
    columbWidth = 9
    headColNum = 2
    fileData = [[part.strip() for part in line.split('\t')] for line in readLFile(filePath)]   #fileData is a list of lists representing the tab-deliminated structure of the .qual file  
    if fileData[0] == ['Schema:']: del fileData[0]   #removes the unessesary first line of the file, if present
    schema = fileData[0]   #extract the tab-deliminated schema into a list
    refHeader = fileData[1][0]
    reference = [line[1] for line in fileData[2:]]   #extracts the reference from the second columb
    qualData = [line[headColNum:] for line in fileData[2:]]   #removes the first lines and columbs of fileData, which dont contain unextracted data
    headers, contigs, cntgStarts, qualValues = ([], [], [], [])   #initilizes multiple lists at a time
    refPos = 0
    for line in qualData:
        if len(line) > 0:
            for index in range(0,len(line),columbWidth):
                if headers.count(line[index]) == 0:   #if the contig header has not been encountered before
                    headers.append(line[index])
                    cntgStarts.append(refPos)
                    contigs.append([])
                    qualValues.append([])
                headIndex = headers.index(line[index])
                contigs[headIndex].append(line[index + 2])   #append the called base character to its contig
                qualValues[headIndex].append([int(num) for num in line[index + 3: index + columbWidth]])   #converts all of the quality values into integer objects and saves them in qualvalues 
        refPos += 1
    alignmentIndexs = [[cntgStarts[index], cntgStarts[index] + len(contigs[index]) - 1] for index in range(0,len(cntgStarts))]
    return (contigs, qualValues, headers, alignmentIndexs, reference, refHeader, schema)

def parseQual(filePath):
    columbWidth = 9
    headColNum = 2
    fileHead = readLFile(filePath, 2)
    if fileHead[0].strip() == 'Schema:': del fileHead[0]   #removes the unessesary first line of the file, if present
    schema = fileHead[0].strip().split('\t')
    if schema[1] == 'Reference' and len(schema) % columbWidth == headColNum:
        return parseQualAlignment(filePath)
    elif len(schema) == 8:
        return parseQualStandard(filePath)
    else: errorExit('parseQual: The quality file at %s has an unreconized format based on the schema on line 2.' % filePath)

def listToStr(aList):
    out = ''
    for char in aList: out += char
    return out

def saveFasta(filePath, headers, sequences):
    outHandle = open(filePath, 'w')
    if len(headers) != 0 and len(sequences) != 0:
        if len(headers) != len(sequences): errorExit('saveFastaFile: different number of headers and sequences')
        if type(headers) == str and type(sequences) == str:
            headers = [headers]
            sequences = [sequences]
        if type(sequences[0]) == list: sequences = [listToStr(seq) for seq in sequences]
        for index in range(0,len(headers)):
            outHandle.write('>%s\n' % headers[index])
            if type(sequences[index]) == str: sequences[index] = [sequences[index]]  #if there is only one line of sequence for a given header, make it a one-item list for the next loop
            for seq in sequences[index]:
                outHandle.write(seq + '\n')
    outHandle.close()

def parseFasta(filePath):
    headers = []
    seqs = []
    fileHandle = open(filePath, 'r')
    fileData = fileHandle.readlines() + ['>']   #the additional header line is there make to following for loop process the last sequence 
    fileHandle.close()
    currSeq = ''
    for line in fileData:
        if line[0] == '>':   #if the line is a fasta header
            headers.append(line[1:].strip() )   #the fisrt character is the '>' and the last is a newline '\n'
            seqs.append(currSeq)
            currSeq = ''
        else:
            currSeq += line.strip()
    return (headers[:-1], seqs[1:])

def removeRepeats(aList):
    for entry in aList:
        if aList.count(entry) > 1:
            aList.remove(entry)
            return removeRepeats(aList)
    return aList

def maskingCallback(option, opt_str, value, parser):
    value = []
    def floatable(str):
        try:
            float(str)
            return True
        except ValueError: return False
    for arg in parser.rargs:         
        if arg[:2] == "--" and len(arg) > 2: break   # stop on --foo like options             
        if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): break   # stop on -a, but not on -3 or -3.0
        value.append(arg)
        if len(value) == 2: break 
    if len(value) == 0: errorExit("option '%s' requires one or two arguments; none supplied." % option.dest)
    del parser.rargs[:len(value)]
    if len(value) == 1: value.append(0)
    value[0] = int(value[0])
    value[1] = float(value[1])
    if value[0] < 0: errorExit("The first argument of the option '%s' must be a positive integer; %d supplied." % (option.dest,value[0]))
    if value[1] < 0 or value[1] > 1: errorExit("The second argument of the option '%s' must be a decimal between 0 and 1; %f supplied." % (option.dest,value[1]))
    setattr(parser.values, option.dest, value)

###Command Line Parser#######################################################################################################
cmndLineParser  = OptionParser(usage=progUsage, version="Version %s" % progVersion)
maksingGroup = OptionGroup(cmndLineParser, 'Coverage and Call Proportion Masking','The following options take one integer argument and one decimal argument between 0 and 1, if the second is not supplied it is assumed to be 0.')
cmndLineParser.add_option(   "-c",   "--include-contigs",    action="store_true",    default=False,                                                             help="Include each contig on its own line. (Default: only output the consensus)")
maksingGroup.add_option(   "-m",   "--mask-contigs",       action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contigs',      help="Set minimum coverage depth and call proportion for contig masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
maksingGroup.add_option(   "-s",   "--mask-contig-SNPs",   action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_contig_SNPs',  help="Set minimum coverage depth and call proportion for contig SNP masking; independent of the consensus. Requires the use of the -c modifier.(Default: 0, 0)")
maksingGroup.add_option(   "-M",   "--mask-consensus",     action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_consensus',    help="Set minimum coverage depth and call proportion for the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
maksingGroup.add_option(   "-S",   "--mask-SNPs",          action="callback",      default=[0,0],      metavar='INT FLOAT',   callback=maskingCallback,    dest='mask_SNPs',         help="Set minimum coverage depth and call proportion for SNPs in the consensus; a new masked sequence will be added to the output file. (Default: 0, 0)")
cmndLineParser.add_option_group(maksingGroup)
cmndLineParser.add_option(   "-t",   "--end-trim",           action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs by a specified number of bases. (Default: 0)")
cmndLineParser.add_option(   "-q",   "--end-trim-qual",      action="store",         default=0,          type="int",    metavar="INT",      help="Trim all the bases on either end of all contigs that have a quality value less than the specified amount. This option is implemented after standard end trimming (see -t option). (Default: 0)")
cmndLineParser.add_option(   "-l",   "--min-match-length",   action="store",         default=0,          type="int",    metavar="INT",      help="Only include contigs that have a least one match longer or as long as the specified amount. This is implemented after any end trimming (see -q and -t options). (Default: 0)")
cmndLineParser.add_option(   "-n",   "--no-overlap",         action="store_true",    default=False,                                         help="Add deletions (i.e. -'s) to the reference to accommodate any overlapping sequence, including unmatched sequence. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
cmndLineParser.add_option(   "-a",   "--no-match-overlap",   action="store_true",    default=False,                                          help="Add deletions to the reference to accommodate overlapping matches. (Default: Condense all overlapping regions of the consensus into IUPAC ambiguity codes.)")
cmndLineParser.add_option(   "-k",   "--keep-contained",     action="store_true",    default=False,                                         help="Include contained contigs in the alignment and consensus. (Default: save sequences of contained contigs to a separate file)")
cmndLineParser.add_option(   "-p",   "--save-SNPs",          action="store_true",    default=False,                                         help="Save SNPs to a .qual file. (Default: Do not save SNP file)")
cmndLineParser.add_option(   "-d",   "--dont-align-contigs", action="store_true",    default=False,                                         help="Do not align contigs to the reference. Requires the use of the -c modifier. (Default: align contigs to the reference)")
cmndLineParser.add_option(   "-r",   "--external-ref",       action="store",         default=None,       type="string", metavar="PATH",     help="Specify a reference to use instead of the one contained in the quality file.")
cmndLineParser.add_option(   "-v",   "--verbose",            action="store_true",    default=False,                                         help="Print progress updates and relevant statistics to the standard output. (Default: run silently)")
cmndLineParser.add_option(   "-i",   "--save-run-info",      action="store_true",    default=False,     help="Save a txt file of anything printed to the screen. Requires the use of -v/--verbose. (Default: Do not save)")
(options, args) = cmndLineParser.parse_args(commandLine)
if options.include_contigs == False:
    if sum(options.mask_contigs) > 1: errorExit('The option -m/--mask-contigs requires the use of the option -c/--include-contigs.')
    if sum(options.mask_contig_SNPs) > 1: errorExit('The option -s/--mask-contig-SNPs requires the use of the option -c/--include-contigs.')
    if options.dont_align_contigs: errorExit('The option -d/--dont-align-contigs requires the use of the option -c/--include-contigs.')
if len(args) == 1:
    cmndLineParser.print_help()
    sys.exit(0)
argNum = len(args) - 1   #counts the amount of arguments, negating the 'qualtofa' at the start of the command line
if argNum != 1: errorExit('qualtofa takes exactly 1 argument; %d supplied' % argNum, 0)
qualPath = args[1]   #The file path to the input file supplied by the user
#############################################################################################################################

def verbose(toPrint):
    if options.verbose:
        print toPrint,
        printLog.append(toPrint)

###Quality file parsing######################################################################################################
verbose('qualtofa: Loading data from %s...' % qualPath)
qualData = parseQual(qualPath)
verbose('Complete\n')
if len(qualData) == 4:   #if the quality file contains a list of unaligned contigs (e.g. one directly from yasra)
    isAlignment = False
    if options.no_match_overlap: errorExit('The option -a/--no-match-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_consensus): errorExit('The option -M/--mask-consensus requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_SNPs): errorExit('The option -S/--mask-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if any(options.mask_contig_SNPs): errorExit('The option -s/--mask-contig-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.keep_contained: errorExit('The option -k/--keep-contained requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.min_match_length: errorExit('The option -l/--min-match-length requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.save_SNPs: errorExit('The option -p/--save-SNPs requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.no_overlap: errorExit('The option -n/--no-overlap requires an aligned quality consensous file, like that given by sumqual.py.')
    if options.external_ref: errorExit('The option -r/--external-ref requires an aligned quality consensous file, like that given by sumqual.py.')
    contigs, qualities, headers, schema = qualData
elif len(qualData) == 7:   #if the quality file contains an alignment (i.e. from sumqual)
    isAlignment = True
    contigs, qualities, headers, alignmentIndexs, reference, refHeader, schema = qualData
else: errorExit()
#############################################################################################################################

###External reference parsing(-r option implimintation)######################################################################
if options.external_ref is not None:
    verbose('qualtofa: option -r/--external-ref: loading reference at %s...' % options.external_ref)
    (seqs,headers) = parseFasta(options.external_ref)
    reference, refHeader = seqs[0], headers[0]
    verbose('Complete\n')
#############################################################################################################################

def delContig(index):
    del contigs[index]
    del headers[index]
    del qualities[index]
    if isAlignment:
        del alignmentIndexs[index]
        if cntgMatchRanges is not None: del cntgMatchRanges[index]

def delContigs(indexes):
    if len(indexes) > 0:    
        indexes.sort()
        indexes.reverse()
        indexes = removeRepeats(indexes)
        for index in indexes: delContig(index)   #deletes all contigs

def delBase(cntgIndex, baseIndex):
    del qualities[cntgIndex][baseIndex]
    del contigs[cntgIndex][baseIndex]
    if isAlignment:
        if baseIndex == 0: alignmentIndexs[cntgIndex][0] += 1
        else: alignmentIndexs[cntgIndex][1] -= 1 

def delBases(cntgIndex, baseIndexes):
    baseIndexes.sort()
    baseIndexes.reverse()
    for index in baseIndexes: delBase(cntgIndex, index)

###Contig end Trimming (-q and -t option implimentation)#####################################################################
if options.end_trim > 0:
    verbose('qualtofa: option -t/--end-trim: Trimming %d bases from both ends of each contig...' % options.end_trim)
    rejectIndexes = [index for index in range(0,len(contigs)) if len(contigs[index]) <= options.end_trim * 2]   #makes a list of all the indexs of contigs that are too short to be trimmed. 
    delContigs(rejectIndexes)   #deletes all rejected contigs
    contigs = [cntg[options.end_trim : -options.end_trim] for cntg in contigs]   #trims all remaining contigs
    qualities = [qual[options.end_trim : -options.end_trim] for qual in qualities]   #trims all remaining contigs
    alignmentIndexs = [[indexRange[0] + options.end_trim,indexRange[1] - options.end_trim]  for indexRange in alignmentIndexs]
    if options.verbose:
        verbose('Complete\n')
        if len(rejectIndexes) > 0: verbose('option -t/--end-trim: %d contig(s) were removed from the analysis because they were shorter than twice the trim length of %d.\n' % (len(rejectIndexes),options.end_trim))

if options.end_trim_qual > 0:
    verbose('qualtofa: option -q/--end-trim-qual: Trimming contig ends until a base with a coverage of %d or more is reached...' % options.end_trim_qual)
    for index in range(0, len(contigs)):
        while len(qualities[index]) > 0 and qualities[index][0][-1] < options.end_trim_qual: delBase(index,0)
        while len(qualities[index]) > 0 and qualities[index][-1][-1] < options.end_trim_qual: delBase(index,-1)
    rejectIndexes = [index for index in range(0,len(qualities)) if len(qualities[index]) == 0] #makes a list of all the indexs of contigs that dont contain sequence after trimming
    delContigs(rejectIndexes)   #deletes all rejected contigs
    if options.verbose:
        verbose('Complete\n')
        if len(rejectIndexes) > 0: verbose('option -q/--end-trim-qual: %d contig(s) removed from the analysis because all of their sequence had quality values less than %d.\n' % (len(rejectIndexes),options.end_trim_qual))
#############################################################################################################################

###Contigs Match Coordinate Extraction (-l option implimentation)############################################################
if isAlignment:
    cntgMatchRanges = [mkRange([index for index in range(0,len(cntg)) if cntg[index].isupper()]) for cntg in contigs]
    if options.min_match_length > 0:
        verbose('qualtofa: option -l/--min-match-length: filtering contigs without a match of at least %d base pairs...' % options.min_match_length)
        maxMatchLens = [max([match[1] - match[0] + 1 for match in matchList] + [0]) for matchList in cntgMatchRanges]
        rejectIndexes = [index for index in range(0,len(contigs)) if maxMatchLens[index] < options.min_match_length]
        delContigs(rejectIndexes)
        if options.verbose:
            verbose('Complete\n')
            if len(rejectIndexes) > 0: verbose('option -l/--min-match-length: %d contig(s) removed from the analysis due to the lack of a match %d base pairs long.\n' % (len(rejectIndexes),options.min_match_length))
#############################################################################################################################

###Contained contig extraction (-k option implimentation)####################################################################
if isAlignment:
    verbose('qualtofa: Searching for contained contigs...')
    matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
    containedIndexes = [[bIndex for bIndex in range(0,len(matchRanges)) if matchRanges[aIndex][0] <= matchRanges[bIndex][0] and matchRanges[aIndex][1] >= matchRanges[bIndex][1] and aIndex != bIndex] for aIndex in range(0,len(matchRanges))]
    containedIndexes = [index for contained in containedIndexes for index in contained]
    containedContigs = [(headers[index], contigs[index]) for index in containedIndexes]
    verbose('Complete\n')
    if options.keep_contained == False:
        delContigs(containedIndexes)
        if len(containedIndexes) > 0: verbose('qualtofa: %d contained contig(s) removed from the analysis.\n' % len(containedIndexes))
    elif len(containedIndexes) > 0: verbose('qualtofa: option -k/--keep-contained: %d contained contig(s) included in the analysis.\n' % len(containedIndexes))
#############################################################################################################################

def getIUPAC(baseValues):
    '''Takes a list of base character values (ACTG-~N) and condenses them into a single IUPAC code. Assumes only input characters to be ACTG-~N'''
    def IUPAC(bases):
        conversion = {'CT':'Y','AG':'R','AT':'W','CG':'S','GT':'K','AC':'M','AGT':'D','ACG':'V','ACT':'H','CGT':'B','ACGT':'N'}
        bases.sort()  #['A', 'C', 'G', 'T']
        if bases[0].islower():
            bases = map(str.upper,bases)
            return str.lower(conversion[''.join(bases)])
        else: return conversion[''.join(bases)]
    chars = removeRepeats(baseValues)
    chars.sort()  #['-', 'A', 'C', 'G', 'N', 'T', 'a', 'c', 'g', 'n', 't', '~']
    if len(chars) == 0: return '-'
    if len(chars) == 1: return chars[0]
    if chars[-1] == '~':   #converts '~' to '-'
        del chars[-1]
        if chars[0] != '-': chars.insert(0,'-')
    priorityList = ('ACGT','N','-','acgt','n')
    for group in priorityList:
        matchs = [base for base in chars if group.find(base) != -1]
        if len(matchs) == 0: continue
        elif len(matchs) == 1: return matchs[0]
        else: return IUPAC(matchs)

###Overlap handleing (-i, -n options implimentation)############################################################################
if isAlignment:
    if options.no_match_overlap or options.no_overlap:
        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: Adding deletions to the reference to accomodate all overlapping sequence...')
        else: verbose('qualtofa: option -a/--no-match-overlap: Adding deletions to the reference to accomodate overlapping matches...')
        matchRanges = [[cntgMatchRanges[index][0][0] + alignmentIndexs[index][0],cntgMatchRanges[index][-1][1] + alignmentIndexs[index][0]] for index in range(0,len(cntgMatchRanges))]
        if options.no_overlap: interSeqGaps = [alignmentIndexs[index + 1][0] - alignmentIndexs[index][1] -1 for index in range(0,len(contigs) - 1)]
        else: interSeqGaps = [matchRanges[index + 1][0] - matchRanges[index][1] - 1 for index in range(0,len(contigs) - 1)]
        verbose('Complete\n')
        gapSum = sum([abs(gap) for gap in interSeqGaps if gap < 0])
        totalLen = sum(map(len,contigs))
        if options.no_overlap: verbose('qualtofa: option -n/--no-overlap: %d of %d total bases (%d percent) in the consensus is overlaping sequence.\n' % (gapSum,totalLen,gapSum*100/len(reference)))
        else: verbose('qualtofa: option -a/--no-match-overlap: %d of %d total bases (%.1f%%) in the consensus are overlapping matchs.\n' % (gapSum,totalLen,float(gapSum)*100/len(reference)))
        refOffset = 0
        for index in range(0,len(interSeqGaps)):
            if interSeqGaps[index] < 0:   #if matching sequence overlaps...
                for count in range(0,abs(interSeqGaps[index])): reference.insert(matchRanges[index][1] + refOffset + 1, '-')
                refOffset += abs(interSeqGaps[index])
            alignmentIndexs[index + 1][0] += refOffset
            alignmentIndexs[index + 1][1] += refOffset
##########################################################################################################################################

def getProp(call, qual):
    equivalancies = {'A':0,'C':1,'G':2,'T':3,'N':4}
    try: callIndex = equivalancies[call.upper()]
    except KeyError: errorExit('getProp: Non-neucleotide base %s encountered...' % call)
    if callIndex == 4:
        return float(max(qual[:4]))/float(qual[-1])
    return float(qual[callIndex])/float(qual[-1])    
    
###Consensous Creation (-M, -S options implimentation)############################################################################
if isAlignment:
    verbose('qualtofa: Creating consensus...')
    numMasked, numQualMasked, SNPmasked, SNPqualmasked, numPropMasked, SNPpropMasked, SNPcount = (0,0,0,0,0,0,0)
    if sum(options.mask_SNPs + options.mask_consensus) > 0: includeMaskedCon = True
    else: includeMaskedCon = False
    rawConsensous = [[] for index in range(0,len(reference))]
    if includeMaskedCon: maskedRawCon = [[] for index in range(0,len(reference))]
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            rIndex = alignmentIndexs[cIndex][0] + bIndex
            refChar = reference[rIndex]
            cntgChar = contigs[cIndex][bIndex]
            rawConsensous[rIndex].append(cntgChar)
            if includeMaskedCon:
                if cntgChar.isupper() and refChar.isalpha() and refChar != cntgChar:
                    SNPcount += 1
                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0]:
                        cntgChar = 'N'
                        SNPqualmasked += 1
                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
                        cntgChar = 'N'
                        SNPpropMasked += 1
                    if qualities[cIndex][bIndex][-1] < options.mask_SNPs[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_SNPs[1]:
                        SNPmasked += 1
                if contigs[cIndex][bIndex].isalpha():
                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0]:
                        numQualMasked += 1
                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
                        else: cntgChar = 'n'
                    if getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
                        numPropMasked += 1
                        if contigs[cIndex][bIndex].isupper(): cntgChar = 'N'
                        else: cntgChar = 'n'
                    if qualities[cIndex][bIndex][-1] < options.mask_consensus[0] or getProp(contigs[cIndex][bIndex],qualities[cIndex][bIndex]) < options.mask_consensus[1]:
                        numMasked += 1
                maskedRawCon[rIndex].append(cntgChar)           
    conSequence = [getIUPAC(charList) for charList in rawConsensous]
    if includeMaskedCon: maskedConSeq = [getIUPAC(charList) for charList in maskedRawCon]
    verbose('Complete\n')
    totalLen = len(conSequence)
    if sum(options.mask_SNPs) > 0: verbose('qualtofa: %d of %d total consensus SNPs (%.1f%%) were masked.\n' % (SNPmasked,SNPcount, float(SNPmasked*100)/SNPcount))
    if options.mask_SNPs[0] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPqualmasked,SNPcount, float(SNPqualmasked*100)/SNPcount, options.mask_SNPs[0]))
    if options.mask_SNPs[1] > 0: verbose('qualtofa: option -S/--mask-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropMasked,SNPcount, float(SNPpropMasked*100)/SNPcount, options.mask_SNPs[1]))
    if options.mask_consensus[0] > 0: verbose('qualtofa: %d of %d total consensus bases (%.1f%% )were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen))        
    if options.mask_consensus[0] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_consensus[0]))        
    if options.mask_consensus[1] > 0: verbose('qualtofa: option -M/--mask-consensus: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_consensus[1]))        
#############################################################################################################################

        
###Contigs SNP Detection/Masking(-s option implimentation)###################################################################
if isAlignment:
    SNPcovNum, SNPpropNum, SNPmaskedNum, SNPcount = (0,0,0,0)
    SNPs = []
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            refPos = bIndex +  alignmentIndexs[cIndex][0]
            refChar = reference[refPos]
            cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
            if cntgChar.isupper() and refChar.isalpha() and cntgChar.isalpha() and refChar != cntgChar:
                SNPcount += 1
                SNPs.append([refPos,refChar,headers[cIndex],bIndex,cntgChar] + qualities[cIndex][bIndex])
                if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0]:
                    contigs[cIndex][bIndex] = 'N'
                    SNPcovNum += 1
                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
                    contigs[cIndex][bIndex] = 'N'
                    SNPpropNum += 1
                if qualities[cIndex][bIndex][-1] < options.mask_contig_SNPs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contig_SNPs[1]:
                    SNPmaskedNum += 1
    totalLen = sum(map(len,contigs))
    if sum(options.mask_contig_SNPs) > 0: verbose('qualtofa: %d of %d total SNPs (%.1f%%) were masked\n' % (SNPmaskedNum,SNPcount,float(SNPmaskedNum*100)/SNPcount)) 
    if options.mask_contig_SNPs[0] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than %d coverage depth.\n' % (SNPcovNum,SNPcount,float(SNPcovNum*100)/SNPcount,options.mask_contig_SNPs[0])) 
    if options.mask_contig_SNPs[1] > 0: verbose('qualtofa: option -s/--mask-contig-SNPs: %d of %d total SNPs (%.1f%%) have less than a %.2f call proportion.\n' % (SNPpropNum,SNPcount,float(SNPpropNum*100)/SNPcount,options.mask_contig_SNPs[1]))     
#############################################################################################################################

###Contigs Masking(-m option implimentation)#################################################################################
if sum(options.mask_contigs) > 0:
    numMasked, numQualMasked, numPropMasked = (0,0,0)
    for cIndex in range(0,len(contigs)):
        for bIndex in range(0,len(contigs[cIndex])):
            cntgChar = copy.deepcopy(contigs[cIndex][bIndex])
            if cntgChar.isalpha():
                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0] or getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
                    numMasked += 1
                if qualities[cIndex][bIndex][-1] < options.mask_contigs[0]:
                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
                    else: contigs[cIndex][bIndex] = 'n'
                    numQualMasked += 1
                if getProp(cntgChar,qualities[cIndex][bIndex]) < options.mask_contigs[1]:
                    if contigs[cIndex][bIndex].isupper(): contigs[cIndex][bIndex] = 'N'
                    else: contigs[cIndex][bIndex] = 'n'
                    numPropMasked += 1             
    totalLen = sum(map(len,contigs))
    verbose('qualtofa: %d bases of %d total bases (%.1f%%) were masked.\n' % (numMasked,totalLen, float(numMasked*100)/totalLen)) 
    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than %d coverage depth.\n' % (numQualMasked,totalLen, float(numQualMasked*100)/totalLen, options.mask_contigs[0]))  
    verbose('qualtofa: option -m/--mask-contigs: %d of %d total bases (%.1f%%) have less than a %.2f call proportion.\n' % (numPropMasked,totalLen, float(numPropMasked*100)/totalLen, options.mask_contigs[1]))      
#############################################################################################################################

###Contianed Contigs file saving#############################################################################################
if len(containedContigs) > 0:
    outPath = qualPath + '_contained_contigs.fsa'
    verbose('qualtofa: Saving contained contigs to %s...' % outPath)
    saveFasta(outPath, *zip(*containedContigs))
    verbose('Complete\n')
#############################################################################################################################

###SNP file saving###########################################################################################################
if options.save_SNPs:
    outPath = qualPath + '_SNPs.qual'
    verbose('qualtofa: Saving %d SNPs to %s...' % (len(SNPs),outPath))
    outHandle = open(outPath, 'w')
    outHandle.write('Schema:\nPosition\tReference\tName\tIndex\tcall\tA\tC\tG\tT\tdash\tsum\n')
    for SNP in SNPs: outHandle.write('\t'.join(map(str,SNP)) + '\n')
    outHandle.close()
    verbose('Complete\n')
#############################################################################################################################

###Contig sequence alignment#################################################################################################
if isAlignment and options.dont_align_contigs == False:
    contigs = [['-'] * alignmentIndexs[index][0] + contigs[index] for index in range(0,len(contigs))]
#############################################################################################################################

###Output file Saving########################################################################################################
verbose('qualtofa: Saving fasta output to %s...' % (qualPath + '.fsa'))
conHeader = 'Consensus'
maskedConHeader = 'Masked Consensus'    
if isAlignment:
    outputHeads, outputSeqs = ([refHeader,conHeader],[reference,conSequence])
    if includeMaskedCon:
        outputHeads.append(maskedConHeader)
        outputSeqs.append(maskedConSeq)
else: outputHeads, outputSeqs = ([],[])
if options.include_contigs:
    outputHeads += headers
    outputSeqs += contigs
outPath = os.path.join(os.getcwd(), os.path.basename(qualPath) + '.fsa')
saveFasta(outPath, outputHeads, outputSeqs)
verbose('Complete\n')
if options.save_run_info:
    runtimeOutPath = os.path.join(os.getcwd(), 'qualtofa_runtime_output_%s.txt' % timeStamp)
    verbose('qualtofa: option -i/--save-run-info: saving run-time output to %s...' % runtimeOutPath)
    runHandle = open(runtimeOutPath, 'w')
    for line in printLog: runHandle.write(line)
    runHandle.close()
    verbose('Complete\n')
#############################################################################################################################