Baseanno.py: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
mNo edit summary
mNo edit summary
 
Line 1: Line 1:
Copy and past the following into a text editor and save it as sumqual.py to use the script.
Copy and past the following into a text editor and save it as baseanno.py to use the script.
<pre>
<pre>
#!/usr/bin/env python
#!/usr/bin/env python

Latest revision as of 17:47, 14 January 2010

Copy and past the following into a text editor and save it as baseanno.py to use the script.

#!/usr/bin/env python

#############################################################################################################################
#baseanno.py [options] <text file with annotations and start/end indices>                                                   #
#Example: python baseanno.py -p myAnnotations.txt                                                                           #
#Version: 1.0   Last Modified: 12/22/2009   Author: Zachary S. L. Foster                                                    #
#---------------------------------------------------------------------------------------------------------------------------#
#Argument: a whitespace-delimitated text (.txt) file which contains a list of annotations in the following format:          #
#   Annotation     Start index    End index     ...                                                                         #
#   *The name of the annotation have no spaces; if it does, use the -t modifier                                             #
#---------------------------------------------------------------------------------------------------------------------------#
#>Outputs a tab-delimitated list of base indices and their respective annotations                                           #
#>If a single base has more than one annotation, then the additional annotations are added after the first on the same line #
#---------------------------------------------------------------------------------------------------------------------------#
#Modifiers:                                                                                                                 #
#   -d      : Save debug log to current working directory                                                                   #
#   -p      : Print output to standard out (usually the shell/screen being called from)                                     #
#   -t      : Use if the name of your annotations have spaces AND the input data is tab-delimitated                         #
#############################################################################################################################

###Imports / Variable Initilization##########################################################################################
import os, string, sys
defArgList  = ['baseanno.py','-p','-t','C:/Python26/baseannoTestData.txt'] #argument list used during script testing the with Python GUI IDLE  
argList     = sys.argv #argument list supplied by user
argNum      = len(argList) #the number of arguments supplied
minArgNum   = 1 #the smallest amount of arguments with which it is possible to run the script
saveDubug   = False #is True if the debug is to be saved
warning     = False #is set to 'True' if the program encounters any minor errors during the analysis; recorded in debug log
tabDelim    = False #is 'True' if the -t modifier is supplied; enforces tab-delimitation in input file
helpOnly    = False #is 'True' when no arguments are given; only help menu is printed
debugLog    = ['*********DEBUG LOG*********\n'] #where all errors/anomalies are recorded; saved if the -d modifier is supplied  
savePath    = './' #the directory in which the output will be saved
defSavePath = 'C:/Python26/' #save path used during script testing with the python GUI IDLE 
inData      = [] #a list of lists containing the data from the input file [[ref,density,actual],...]
parsedData  = [] #a list containing data for each 1000 bp chunk [[GSS ref match length...see above]...]
outData     = [] #a list of strings corresponding to each line in the output file
modifiers   = [] #eventually contains a list of all modifiers and their arguments supplied by the user
allMods     = 'dpt' #all the modifiers recognized by the script 
activeMods  = '' #a string containing all modifiers specified by the user 
modArgs     = [] #arguments supplied for the modifiers; assumed in the same order as the modifiers 
#############################################################################################################################

def printHelp():
    print '/---------------------------------------------------------------------------------------\\'
    print '| baseanno.py [options] <text file with annotations and start/end indices>              |'
    print '|---------------------------------------------------------------------------------------|'
    print '| Example: python baseanno.py -p myAnnotations.txt                                      |'
    print '|---------------------------------------------------------------------------------------|'
    print '| Version: 1.0   Last Edited: 12/22/2009                                                |'
    print '|  >Outputs a tab-delimitated list of base indices and their respective annotations     |'
    print '|  >If a single base has more than one annotation, then the additional annotations are  |'
    print '|    added after the first on the same line                                             |'
    print '|---------------------------------------------------------------------------------------|'                       
    print '| Modifiers:                                                                            |'
    print '|  -d  : Save debug log                                                                 |'
    print '|  -p  : Print output to standard out (usually the shell being called from)             |'
    print '|  -t  : Use if your annotations have spaces AND the input data is tab-delimitated      |'
    print '\\---------------------------------------------------------------------------------------/'

#Error handling function#####################################################################################################
#>Is called when the script encounters a fatal error                                                                        #
#>Prints the debug log to standard out (usually the screen)                                                                 #
def errorExit():
    print 'The program was forced to exit prematurely, printing debug log...\n'
    for line in debugLog:
        print line
    sys.exit()
#############################################################################################################################

###Argument Interprtation####################################################################################################
#>Parses the arguments supplied by the user, or the default values if the script is being run on IDLE                       #
#>If no arguments are given, the help menu is printed                                                                       #
#>Modifiers and their arguments are isolated from the raw input for later processing (in Modifier Interpretation)           #
if __name__ == '__main__': #If the program is being called independent of the Python GUI, IDLE...
    if argNum > minArgNum: #If at least the minimum number of arguments necessary is supplied...
        if os.path.exists(argList[-1]) == 0: #if the path dose not exist
            debugLog.append('Error: Invalid file path to input data')
            errorExit() #end the program 
    elif argNum == 1: #If no arguments are supplied...
        helpOnly = True
        printHelp() #prints help menu 
    else:
        debugLog.append('Error: Too few arguments supplied\n')
        errorExit() 
else: #If the script is being imported on to IDLE 
    argList  = defArgList #use default arguments 
    argNum   = len(defArgList)
    if argNum == 1: #If no arguments are supplied...
        helpOnly = True
        printHelp() #prints help menu
    savePath = defSavePath #sets the save path to the default, specified in the variable initialization section 
    debugLog.append('Alert: default debugging input arguments are being used\n')
if helpOnly == False: #if arguments were supplied...
    inPath = argList[-1] #the path to the fasta file containing the input data 
    if argNum > minArgNum + 1: #if modifiers are present (i.e. more the minimum number of arguments)
        modifiers = argList[1:-minArgNum] #everything before the required arguments are modifiers and their arguments
#############################################################################################################################


if helpOnly == False:
    ###Modifier Interpretation###############################################################################################
    #>Parses any modifiers and modifier arguments determined by the previous section of code, "Argument Interpretation"     #
    #>Given arguments are compared against a list of known arguments                                                        #
    #>Matches found change the appropriate variable for the desired effect of the modifier the script                       #
    if len(modifiers) > 0: #if modifiers are supplied
        for mod in modifiers: #loops through the list of modifiers and modifier arguments 
            if mod[0] == '-' and len(mod) == 2: #list entry considered modifier if it starts with - and is only two characters        
                activeMods += mod[1:] #sorts the modifiers into activeMods...
            else:  
                modArgs.append(mod) #assumes everything else to be a modifier argument 
        for letter in activeMods: #loops through supplied modifiers    
            if string.find(allMods,letter) == -1: #checks if the modifier is recognized by this script 
                debugLog.append('Warning: Unexpected modifier: ' + letter + '\n')
                warning = True #if the input modifier is not found
            else: #if it is a recognized modifier...
                if letter == 'd': #if -d is supplied...
                    saveDebug = True #The debug log will be saved to the current working directory 
                if letter == 'p': #if -p is supplied...
                    printOut = True #The results will be printed to the standard output (usually the screen)
                if letter == 't': #if -t is supplied...
                    tabDelim = True #tab-delimitation is enforced for input file
    #########################################################################################################################

    ###Input data parseing and file procedures###############################################################################
    inHandle = open(inPath, 'r') #creates a file object
    for line in inHandle: #loops through the lines of the input file
        if tabDelim:
            lineParts = string.split(line,'\t') #produces a list of all the contents of each line (delimitated by tabs only)
        else:
            lineParts = string.split(line) #produces a list of all the contents of each line (delimitated by spaces/tabs)
        if len(lineParts) >= 3:
            annotation = lineParts[0]
            start = int(lineParts[1])
            end = int(lineParts[2])
            inData.append([annotation,start,end]) #adds parsed line to the list that will be analyzed 
    inHandle.close() #closes the file object
    #########################################################################################################################

    ###Consensous creation###################################################################################################
    outData = []
    lastBaseIndex = 0
    for anno in inData:
        if anno[2] > lastBaseIndex:
            lastBaseIndex = anno[2]
    for index in range(1,lastBaseIndex + 1):
        outData.append(str(index))
    for anno in inData:
        start = anno[1]
        end = anno[2]
        for index in range(start - 1,end):
            outData [index] += '\t' + anno[0]
    #########################################################################################################################

    ###Out file writing and saveing procedures###############################################################################
    savePath += os.path.basename(inPath)[:-4] + '_byBase.txt' #the path to where the output is saved
    outHandle = open(savePath, 'w') #opens the file object for saving the output
    for line in outData: #prints every line of outData to the output file...
        outHandle.write(line + '\n')
    outHandle.close()
    if printOut: #if the -p modifier is supplied
        for line in outData: #prints every line to the standard output...
            print line
    #########################################################################################################################

    ###Debug Saving procedures###############################################################################################
    if saveDebug: #if the -d modifier is supplied...
        DebugSavePath = savePath[:-4] + '.BADebug.txt' #the path to where the debug log is to be saved 
        debugHandle = open(fileSavePath, 'w') #opens the file object for saving the debug log 
        for line in debugLog:
            debugHandle.write(line)
        debugHandle.close()
    #########################################################################################################################