Allcomb.py

From OpenWetWare
Jump to navigationJump to search

Copy and past the following into a text editor and save it as allcomb.py to use the script.

#!/usr/bin/env python
 
#############################################################################################################################
#allcomb [options] <quality file path> <Reference>                                                                          #
#Example: python allcomb.py -n 20 ../myFile.txt                                                                             #
#Version: 1.1   Last Modified: 2/25/2010     Author: Zachary S. L. Foster                                                   #
#############################################################################################################################

####################################################################################################
# allcomb [options] <file path to text>                                                            #
#--------------------------------------------------------------------------------------------------#
# Example: python allcomb.py -i 2 -g 2 ../myFile.txt                                               #
#--------------------------------------------------------------------------------------------------#
# Version: 1.2   Last Modified: 2/25/2010     Author: Zachary S. L. Foster                         #
# Intended uses:                                                                                   #
#  1)Find all possible combinations of elements (or group of elements) for every line of a         #
#    tab-delimitated text file and output every combination, on its own line, in a .txt file.      #
#--------------------------------------------------------------------------------------------------#
# Modifiers:                                                                                       #
#  -d  : Save debug log to current working directory.                                              #
#  -g  : Specify the number of elements in to treat as a group when combining.                     # 
#  -i  : Specify the number of columns in front to ignore when making combinations.                #
#        (Data in ignored columns is preserved in the first columns of the output file)            #
####################################################################################################


###Imports / Variable Initilization##########################################################################################
import os, string, sys, time
defArgList  = ['allcomb.py','-g','1','-i','1','C:/Python26/allcombdef.txt'] #argument list used during script testing the with Python GUI IDLE
argList     = sys.argv #argument list supplied by user
argNum      = len(argList) #the number of arguments supplied
minArgNum   = 1 #the smallest amount of arguments with which it is possible to run the script
saveDebug   = False #is True if the debug is to be saved
warning     = False #is set to 'True' if the program encounters any minor errors during the analysis; recorded in debug log
printOut    = False #is True when no the -p modifier is supplied; #The results will be printed to the standard output (usually the screen)
helpOnly    = False #is 'True' when no arguments are given; only help menu is printed
savePath    = './' #the directory in which the output will be saved
defSavePath = 'C:/Python26/' #save path used during script testing with the python GUI IDLE
modifiers   = [] #eventually contains a list of all modifiers and their arguments supplied by the user
allMods     = 'dpig' #all the modifiers recognized by the script 
activeMods  = '' #a string containing all modifiers specified by the user 
modArgs     = [] #arguments supplied for the modifiers; assumed in the same order as the modifiers
outData     = [] #eventually holds the data that will be saved in the output file 
debugLog    = ['***********DEBUG LOG***********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied
groupSize   = 1
ignoreSize  = 1 #number of coumbs to preserve in the start of each line
#############################################################################################################################

#Error handling function#####################################################################################################
#>Is called when the script encounters a fatal error                                                                        #
#>Prints the debug log to standard out (usually the screen)                                                                 #
def errorExit():
    print 'The program was forced to exit prematurely, printing debug log...\n'
    for line in debugLog:
        print line
    sys.exit()
#############################################################################################################################

def printHelp():
    print '/--------------------------------------------------------------------------------------------------\\'
    print '| allcomb [options] <file path to text>                                                            |'
    print '|--------------------------------------------------------------------------------------------------|'
    print '| Example: python allcomb.py -i 2 -g 2 ../myFile.txt                                               |'
    print '|--------------------------------------------------------------------------------------------------|'
    print '| Version: 1.2   Last Modified: 2/25/2010     Author: Zachary S. L. Foster                         |'
    print '| Intended uses:                                                                                   |'
    print '|  1)Find all possible combinations of elements (or group of elements) for every line of a         |'
    print '|    tab-delimitated text file and output every combination, on its own line, in a .txt file.      |'
    print '|--------------------------------------------------------------------------------------------------|'
    print '| Modifiers:                                                                                       |'
    print '|  -d  : Save debug log to current working directory.                                              |'
    print '|  -g  : Specify the number of elements in to treat as a group when combining.                     |'
    print '|  -i  : Specify the number of columns in front to ignore when making combinations.                |'
    print '|        Data in ignored columns is preserved in the first columns of the output file.             |'
    print '\\--------------------------------------------------------------------------------------------------/'


###Argument Interprtation####################################################################################################
#>Parses the arguments supplied by the user, or the default values if the script is being run on IDLE                       #
#>If no arguments are given, the help menu is printed                                                                       #
#>Modifiers and their arguments are isolated from the raw input for later processing (in Modifier Interpretation)           #
if __name__ == '__main__': #If the program is being called independent of the Python GUI, IDLE...
    if argNum > minArgNum: #If at least the minimum number of arguments necessary is supplied...
        if os.path.exists(argList[-1]) == 0: #if the path dose not exist
            debugLog.append('Error: Invalid file path to input data')
            errorExit() #end the program 
    elif argNum == 1: #If no arguments are supplied...
        helpOnly = True
        printHelp() #prints help menu 
    else:
        debugLog.append('Error: Too few arguments supplied\n')
        errorExit() 
else: #If the script is being imported on to IDLE 
    argList  = defArgList #use default arguments 
    argNum   = len(defArgList)
    if argNum == 1: #If no arguments are supplied...
        helpOnly = True
        printHelp() #prints help menu
    savePath = defSavePath #sets the save path to the default, specified in the variable initialization section 
    debugLog.append('Alert: default debugging input arguments are being used\n')
if helpOnly == False: #if arguments were supplied...
    inPath = argList[-1] #the path to the qual file containing the input data 
    if argNum > minArgNum + 1: #if modifiers are present (i.e. more the minimum number of arguments)
        modifiers = argList[1:-minArgNum] #everything before the required arguments are modifiers and their arguments
#############################################################################################################################
        
if helpOnly == False:
    ###Modifier Interpretation###############################################################################################
    #>Parses any modifiers and modifier arguments determined by the previous section of code, "Argument Interpretation"     #
    #>Given arguments are compared against a list of known arguments                                                        #
    #>Matches found change the appropriate variable for the desired effect of the modifier the script                       #
    if len(modifiers) > 0: #if modifiers are supplied
        for mod in modifiers: #loops through the list of modifiers and modifier arguments
            if mod[0] == '-' and len(mod) == 2: #list entry considered modifier if it starts with - and is only two characters         
                activeMods += mod[1:] #sorts the modifiers into activeMods...
            else:  
                modArgs.append(mod) #assumes everything else to be a modifier argument
        for letter in activeMods: #checks if the modifiers are recognized  
            if string.find(allMods,letter) == -1: #checks if the modifier is recognized by this script
                debugLog.append('Warning: Unexpected modifier: ' + letter + '\n')
                warning = True #if the input modifier is not found
            else:
                if letter == 'd': #if -d is supplied...
                    saveDebug = True #The debug log will be saved to the current working directory 
                elif letter == 'p': #if -p is supplied...
                    printOut = True #The results will be printed to the standard output (usually the screen)
                elif letter == 'g': #if -g is supplied...
                    if len(modArgs) > 0: #if there is at least one non-processed modifier argument 
                        groupSize = int(modArgs[0]) 
                        del modArgs[0] #the original argument is deleted from the list 
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script
                elif letter == 'i': #if -i is supplied...
                    if len(modArgs) > 0: #if there is at least one non-processed modifier argument 
                        ignoreSize = int(modArgs[0]) 
                        del modArgs[0] #the original argument is deleted from the list 
                    else: #if the list of modifier arguments is empty...
                        print 'Error: Modifier argument not supplied\n'
                        sys.exit() #exit the script

    #########################################################################################################################

    ###Input File Parsing and File procedures################################################################################
    inData = []
    inHandle = open(inPath, 'r') #opens the file containing the quality values
    inRaw    = inHandle.readlines() #saves all of the file into qualRaw
    inHandle.close() #closes the file object
    for line in inRaw:
        lineParts = string.split(line)
        ignored = lineParts[0]
        for index in range(1,groupSize):
            ignored += '\t' + lineParts[index]
        del lineParts[:groupSize]
        if len(lineParts) % groupSize != 0:
            debugLog.append('Error: Invalid input data: odd number of arguments')
            errorExit()
        grouped = []
        while len(lineParts) > 0:
            group = lineParts[0]
            for index in range(1,groupSize):
                group += '\t' + lineParts[index]
            grouped.append(group)
            del lineParts[:groupSize]
        parsedLine = [ignored,grouped]
        inData.append(parsedLine)
    #########################################################################################################################

    ###Output data generation################################################################################################
    outData = []
    for line in inData:
        for refIndex in range(0,len(line[1])):
            for compIndex in range(refIndex + 1,len(line[1])):
                if len(line[0]) > 0:
                    outData.append(line[0] + '\t')
                outData.append(line[1][refIndex] + '\t' + line[1][compIndex] + '\n')
    #########################################################################################################################

    ###Out file writing and saveing procedures###############################################################################
    fileSavePath = savePath + os.path.basename(inPath) + '_all_combinations.txt' #the path to where the output is saved
    outHandle = open(fileSavePath, 'w') #opens the file object for saving the output
    for line in outData:
        outHandle.write(line)
    outHandle.close() #closes file object
    #########################################################################################################################

    ###Debug Saving procedures###############################################################################################
    if saveDebug: #if the -d modifier is supplied...
        debugHandle = open(savePath + os.path.basename(inPath) + '_debug.txt', 'w')
        for line in debugLog:
            debugHandle.write(line)
        debugHandle.close()
    #########################################################################################################################