TChan/Notebook/2007-5-3: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
No edit summary
No edit summary
Line 1: Line 1:
=Presentation=
** Lessons learned
** Stuff done
=Allelic Frequency=
=Allelic Frequency=
* '''Input''': rs#
* '''Input''': rs# (string)
* '''Output''': allelic frequency
* '''Output''': allelic frequency data (list of lists (of lists, in some cases))
 
==Sample Input==
<pre>"rs11200538"</pre>
 
==Code==
<pre>
import urllib
 
# Definitions of functions
 
# Returns the dbSNP URL for the search term
def parse_for_dbSNP_search(search_term):
    #search_term will be initial input, the RS# in a string (ie. "rs11200538" or "11200538")
    parsed_term = search_term.replace("rs", "")
    return "http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=%s" % parsed_term
   
# Grabs the dbSNP HTML search_file
def get_dbSNP_search_file(URL, genl_search_file):
    URL_stream_genl = urllib.urlopen(URL)
    page = URL_stream_genl.read()
    URL_stream_genl.close()
    genl_search_file.write(page)
 
# Extracts out the relevant allelic frequency line from the dbSNP HTML search file
def extract_allelic_freq_line(dbSNP_file):
    for line in dbSNP_file:
        if line.find('''Alleles</TH></TR><TR ><TH  bgcolor="silver">ss''') != -1:
            return line
        elif line.find('''There is no frequency data.''') != -1:
            better_luck_next_time = "There is no frequency data."
            return better_luck_next_time
 
# Divides the relevant allelic frequency line into separate HTML-table 'rows', which delineate the populations
def divide_freq_line_into_TRs(freq_line):
    TR_list = []
    while freq_line.rfind("<TR") != -1:
        TR_instance = freq_line.rfind("<TR")
        TR_list.insert(0, freq_line[TR_instance:(len(freq_line))])
        freq_line = freq_line[0:TR_instance]
    TR_list.insert(0, freq_line)
    return TR_list
 
# Parses out (1) categories, and (2) population rows
def extract_categories_and_population_TRs(categories, population_list, TR_list):
    for element in TR_list:
        if element.find('''ss#''') != -1:
            categories = element
        elif element.find('''<td ><a href="snp_viewTable.cgi?pop=''') != -1:
            population_list.append(element)
    return categories, population_list
 
def parse_IMG_tags_out_of_category(category):
    if "<IMG" in category:
        category = category[0:category.find("<IMG")]
    return category
 
def parse_BR_tags_out_of_category(category):
    br = "<BR>", "<br>"
    if category.endswith(br):
        category = category[0:len(category)-4]
    category = category.replace("<BR>", ' ')
    category = category.replace("<br>", ' ')
    return category
 
# Returns cleaned-up categories (ie. ss#, Population, etc.)
def parse_categories(categories):
    categories_list = []
    while categories.rfind('''<TH  bgcolor="silver">''') != -1:
        category_instance = categories.rfind('''<TH  bgcolor="silver">''')
        end_tag_instance = categories.rfind('''</TH>''')
        categories_list.insert(0, categories[(category_instance+22):end_tag_instance])
        categories = categories[0:category_instance]
   
    for index in range(len(categories_list)):
        categories_list[index] = parse_IMG_tags_out_of_category(categories_list[index])
        categories_list[index] = parse_BR_tags_out_of_category(categories_list[index])
    return categories_list
 
 
# Extraction functions to parse allelic frequency data from populations
 
# Returns whether or not the particular population in population_list has an ss_numb
def ss_numb_in_population(population):
    if '''<a href="snp_ss.cgi?ss=''' in population:
        return True
    else:
        return False
 
def extract_ss_numb(population):
    #SS_numb START: after '''<a href="snp_ss.cgi?ss='''
    #SS_numb END: before the '''">''' immediately after '''<a href="snp_ss.cgi?ss='''
    if ss_numb_in_population(population):
        ss_numb = population[population.find('''<a href="snp_ss.cgi?ss=''')+23:population.find('''">''',
                            population.find('''<a href="snp_ss.cgi?ss='''))]
        last_index = population.find('''">''', population.find('''<a href="snp_ss.cgi?ss=''')) + 2
    else:
        ss_numb = ''
        last_index = 0
    return ss_numb, last_index
 
def extract_population_name(population, last_index):
    #population_name START: after the '''">''' immediately after '''<a href="snp_viewTable.cgi?pop='''
    #population_name END: before the '''</a>''' that occurs after '''<a href="snp_viewTable.cgi?pop='''
    population_name = population[population.find('''">''', last_index)+2:
                                    population.find('''</a>''', population.find('''<a href="snp_viewTable.cgi?pop='''))]
    last_index = population.find('''</a>''', population.find('''<a href="snp_viewTable.cgi?pop=''')) + 5
    return population_name, last_index
 
def extract_group(population, last_index):
    start_point = population.find('''<td >''', last_index) + 5
    group = population[start_point:population.find('''</td>''', start_point)]
    last_index = population.find('''</td>''', start_point) + 5
    return group, last_index
 
def extract_chrom_cnt(population, last_index):
    start_point = population.find('''<td >''', last_index) + 5
    chrom_cnt = population[start_point:population.find('''</td>''', start_point)]
    chrom_cnt = chrom_cnt.strip()
    last_index = population.find('''</td>''', start_point)
    return chrom_cnt, last_index
 
def extract_source(population, last_index):
    start_point = population.find('''<td >''', last_index) + 5
    source = population[start_point:population.find('''</td>''', start_point)]
    source = source.strip()
    last_index = population.find('''</td>''', start_point)
    return source, last_index
 
def extract_allele_combos(num_of_allele_combos, population, last_index):
    # This function works even if there are identical allele combos
    allele_combos = []
    start_point = population.find('''<FONT  size="-1">''', last_index) + 17
    for i in range(num_of_allele_combos):
        allele_combo = population[start_point:population.find('''</FONT>''', start_point)]
        allele_combos.append(allele_combo)
        last_index = start_point + 5
        start_point = population.find('''<FONT  size="-1">''', population.find('''</FONT>''', start_point)) + 17
    for j in range(num_of_allele_combos):
        allele_combos[j] = allele_combos[j].strip()
    return allele_combos, last_index
 
def extract_HWP(population, last_index):
    # This function works even if the last allele_combo was ''
    start_point = population.find('''<FONT  size="-1">''', last_index) + 17
    HWP = population[start_point:population.find('''</FONT>''', start_point)]
    HWP = HWP.strip()
    last_index = population.find('''</FONT>''', start_point)
    return HWP, last_index
   
def extract_alleles(num_of_alleles, population, last_index):
    alleles = []
    start_point = population.find('''<FONT  size="-1">''', last_index) + 17
    for i in range(num_of_alleles):
        allele = population[start_point:population.find('''</FONT>''', start_point)]
        alleles.append(allele)
        last_index = start_point + 1
        start_point = population.find('''<FONT  size="-1">''', population.find('''</FONT>''', start_point)) + 17
    for j in range(num_of_alleles):
        alleles[j] = alleles[j].strip()
    return alleles, last_index
 
# Master function to compile the list of lists (of lists) that holds all the interesting allelic frequency data
def parse_population_list(num_of_allele_combos, num_of_alleles, population_list, master_data_list):
    for index in range(len(population_list)):
        last_index = 0
        ss_numb = ''
        ss_numb, last_index = extract_ss_numb(population_list[index])
        population_name, last_index = extract_population_name(population_list[index], last_index)
        group, last_index = extract_group(population_list[index], last_index)
        chrom_cnt, last_index = extract_chrom_cnt(population_list[index], last_index)
        source, last_index = extract_source(population_list[index], last_index)
        allele_combos, last_index = extract_allele_combos(num_of_allele_combos, population_list[index], last_index)
        HWP, last_index = extract_HWP(population_list[index], last_index)
        alleles, last_index = extract_alleles(num_of_alleles, population_list[index], last_index)
        master_data_list.append([ss_numb, population_name, group, chrom_cnt, source, allele_combos, HWP, alleles])
    return master_data_list
 
 
#BEGIN ACTUAL PROGRAM
search_term = "rs11200538"    # example search_term for now; will be returned by rest of program when finished   
search_file_name = "%s_dbSNP.html" % search_term
 
dbSNP_file = open(search_file_name, 'w')
URL = parse_for_dbSNP_search(search_term)
get_dbSNP_search_file(URL, dbSNP_file)
dbSNP_file.close()


* On further investigation of GeneCards, I found that they just get their [http://www.genecards.org/info.shtml#snp allele frequency data from dbSNP].  Also, since the GeneCard is for the gene relevant to our rs#, allelic frequency data in the GeneCard "SNP/Variants" box does not necessarily give the frequency of our one particular allele.  The allelic frequency data is of all alleles for the gene, ranked by whether or not there is any frequency data on that allele.
dbSNP_file = open(search_file_name, 'r')
freq_line = extract_allelic_freq_line(dbSNP_file)
dbSNP_file.close()


* Thus, I will write code to parse dbSNP XML for the allele frequency data, and return that or "No data available," if dbSNP doesn't have any information.
TR_list = divide_freq_line_into_TRs(freq_line)
categories = ''
population_list = []


* The data for "Population Diversity" is somewhat indecipherable, and the only documentation of it is the following:
categories, population_list = extract_categories_and_population_TRs(categories, population_list, TR_list)


''Population Diversity Data
categories_list = []
categories_list = parse_categories(categories)
num_of_categories = len(categories_list)
num_of_allele_combos = categories_list.count('A/A') + categories_list.count('A/T') + categories_list.count('A/C') + categories_list.count('A/G') + categories_list.count('T/A') + categories_list.count('T/T') +categories_list.count('T/C') + categories_list.count('T/G') + categories_list.count('C/A') + categories_list.count('C/T') + categories_list.count('C/C') + categories_list.count('C/G') + categories_list.count('G/A') + categories_list.count('G/T') + categories_list.count('G/C') + categories_list.count('G/G')
num_of_alleles = categories_list.count('A') + categories_list.count('T') + categories_list.count('C') + categories_list.count('G')


The best single measure of a variation's diversity in different populations is its average heterozygosity. This measure serves as the general probability that both alleles are in a diploid individual or in a sample of two chromosomes. Estimates of average heterozygosity have an accompanying standard error based on the sample sizes of the underlying data, which reflects the overall uncertainty of the estimate. dbSNP’s computation of average heterozygosity and standard error for RefSNP clusters is available online. Please note that dbSNP computes heterozygosity based on the submitted allele frequency for each SNP. If the frequency data for a SNP is not submitted, we cannot compute the heterozygosity value, and therefore the refSNP report will show no heterozygosity estimate.
master_data_list = []
master_data_list.append(categories_list)


Additional population diversity data include population counts, individuals sampled for a variation, genotype frequencies, and Hardy Weinberg probabilities.''
master_data_list = parse_population_list(num_of_allele_combos, num_of_alleles, population_list, master_data_list)
print master_data_list


Thus, it seems like a good idea to output the heterozygosity, though it is somewhat difficult to understand for a user.
</pre>


==Sample Output==
<pre>
['ss#', 'Population', 'Individual Group', 'Chrom. Sample Cnt.', 'Source', 'A/A', 'A/G', 'G/G', 'HWP', 'A', 'G']
['ss16081968', 'HapMap-CEU', 'European', '118', 'IG', ['0.983', '0.017', ''], '1.000', ['0.992', '0.008']]
['', '<td ></td><td ><a href="snp_viewTable.cgi?pop=1410">HapMap-HCB', 'Asian', '90', 'IG', ['0.556', '0.356', '0.089'], '0.584', ['0.733', '0.267']]
['', '<td ></td><td ><a href="snp_viewTable.cgi?pop=1411">HapMap-JPT', 'Asian', '90', 'IG', ['0.533', '0.356', '0.111'], '0.371', ['0.711', '0.289']]
['', '<td ></td><td ><a href="snp_viewTable.cgi?pop=1412">HapMap-YRI', 'Sub-Saharan African', '120', 'IG', ['1.000', '', ''], '1.000', ['', 'tom"><td ></td><td ><a href="snp_viewTable.cgi?pop=1412">HapMap-YRI</a></td><td >Sub-Saharan African</td><td >  120</td><td >IG</td><td ><FONT  size="-1"> 1.000']]
['', '<td ></td><td ><a href="snp_viewTable.cgi?pop=1771">CHMJ', 'Asian', '74', 'IG', ['', '', ''], '0.757', ['0.243', 'tom"><td ></td><td ><a href="snp_viewTable.cgi?pop=1771">CHMJ</a></td><td >Asian</td><td >    74</td><td >IG</td><td ><FONT  size="-1">']]
['ss24106683', 'AFD_EUR_PANEL', 'European', '48', 'IG', ['0.917', '0.083', ''], '1.000', ['0.958', '0.042']]
['', '<td ></td><td ><a href="snp_viewTable.cgi?pop=1372">AFD_AFR_PANEL', 'African American', '44', 'IG', ['1.000', '', ''], '1.000', ['', 'tom"><td ></td><td ><a href="snp_viewTable.cgi?pop=1372">AFD_AFR_PANEL</a></td><td >African American</td><td >    44</td><td >IG</td><td ><FONT  size="-1"> 1.000']]
['', '<td ></td><td ><a href="snp_viewTable.cgi?pop=1373">AFD_CHN_PANEL', 'Asian', '48', 'IG', ['0.583', '0.333', '0.083'], '0.655', ['0.750', '0.250']]
<pre>


=Presentation=
The categories in master_data_list[0] correspond to the data items in each of the following rows.  For convenience, the allele_combos frequencies and the allele frequencies were collected in to their own lists.
** Lessons learned
** Stuff done

Revision as of 01:32, 3 May 2007

Presentation

    • Lessons learned
    • Stuff done


Allelic Frequency

  • Input: rs# (string)
  • Output: allelic frequency data (list of lists (of lists, in some cases))

Sample Input

"rs11200538"

Code

import urllib

# Definitions of functions

# Returns the dbSNP URL for the search term
def parse_for_dbSNP_search(search_term):
    #search_term will be initial input, the RS# in a string (ie. "rs11200538" or "11200538")
    parsed_term = search_term.replace("rs", "")
    return "http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=%s" % parsed_term
    
# Grabs the dbSNP HTML search_file
def get_dbSNP_search_file(URL, genl_search_file):
    URL_stream_genl = urllib.urlopen(URL)
    page = URL_stream_genl.read()
    URL_stream_genl.close()
    genl_search_file.write(page)

# Extracts out the relevant allelic frequency line from the dbSNP HTML search file
def extract_allelic_freq_line(dbSNP_file):
    for line in dbSNP_file:
        if line.find('''Alleles</TH></TR><TR ><TH  bgcolor="silver">ss''') != -1:
            return line
        elif line.find('''There is no frequency data.''') != -1:
            better_luck_next_time = "There is no frequency data."
            return better_luck_next_time

# Divides the relevant allelic frequency line into separate HTML-table 'rows', which delineate the populations
def divide_freq_line_into_TRs(freq_line):
    TR_list = []
    while freq_line.rfind("<TR") != -1:
        TR_instance = freq_line.rfind("<TR")
        TR_list.insert(0, freq_line[TR_instance:(len(freq_line))])
        freq_line = freq_line[0:TR_instance]
    TR_list.insert(0, freq_line)
    return TR_list

# Parses out (1) categories, and (2) population rows
def extract_categories_and_population_TRs(categories, population_list, TR_list):
    for element in TR_list:
        if element.find('''ss#''') != -1:
            categories = element
        elif element.find('''<td ><a href="snp_viewTable.cgi?pop=''') != -1:
            population_list.append(element)
    return categories, population_list

def parse_IMG_tags_out_of_category(category):
    if "<IMG" in category:
        category = category[0:category.find("<IMG")]
    return category

def parse_BR_tags_out_of_category(category):
    br = "<BR>", "<br>"
    if category.endswith(br):
        category = category[0:len(category)-4]
    category = category.replace("<BR>", ' ')
    category = category.replace("<br>", ' ')
    return category

# Returns cleaned-up categories (ie. ss#, Population, etc.)
def parse_categories(categories):
    categories_list = []
    while categories.rfind('''<TH  bgcolor="silver">''') != -1:
        category_instance = categories.rfind('''<TH  bgcolor="silver">''')
        end_tag_instance = categories.rfind('''</TH>''')
        categories_list.insert(0, categories[(category_instance+22):end_tag_instance])
        categories = categories[0:category_instance]
    
    for index in range(len(categories_list)):
        categories_list[index] = parse_IMG_tags_out_of_category(categories_list[index])
        categories_list[index] = parse_BR_tags_out_of_category(categories_list[index])
    return categories_list


# Extraction functions to parse allelic frequency data from populations

# Returns whether or not the particular population in population_list has an ss_numb
def ss_numb_in_population(population):
    if '''<a href="snp_ss.cgi?ss=''' in population:
        return True
    else:
        return False

def extract_ss_numb(population):
    #SS_numb START: after '''<a href="snp_ss.cgi?ss='''
    #SS_numb END: before the '''">''' immediately after '''<a href="snp_ss.cgi?ss=''' 
    if ss_numb_in_population(population):
        ss_numb = population[population.find('''<a href="snp_ss.cgi?ss=''')+23:population.find('''">''',
                             population.find('''<a href="snp_ss.cgi?ss='''))]
        last_index = population.find('''">''', population.find('''<a href="snp_ss.cgi?ss=''')) + 2
    else:
        ss_numb = ''
        last_index = 0
    return ss_numb, last_index

def extract_population_name(population, last_index):
    #population_name START: after the '''">''' immediately after '''<a href="snp_viewTable.cgi?pop='''
    #population_name END: before the '''</a>''' that occurs after '''<a href="snp_viewTable.cgi?pop='''
    population_name = population[population.find('''">''', last_index)+2:
                                     population.find('''</a>''', population.find('''<a href="snp_viewTable.cgi?pop='''))]
    last_index = population.find('''</a>''', population.find('''<a href="snp_viewTable.cgi?pop=''')) + 5
    return population_name, last_index

def extract_group(population, last_index):
    start_point = population.find('''<td >''', last_index) + 5
    group = population[start_point:population.find('''</td>''', start_point)]
    last_index = population.find('''</td>''', start_point) + 5
    return group, last_index

def extract_chrom_cnt(population, last_index):
    start_point = population.find('''<td >''', last_index) + 5
    chrom_cnt = population[start_point:population.find('''</td>''', start_point)]
    chrom_cnt = chrom_cnt.strip()
    last_index = population.find('''</td>''', start_point)
    return chrom_cnt, last_index

def extract_source(population, last_index):
    start_point = population.find('''<td >''', last_index) + 5
    source = population[start_point:population.find('''</td>''', start_point)]
    source = source.strip()
    last_index = population.find('''</td>''', start_point)
    return source, last_index

def extract_allele_combos(num_of_allele_combos, population, last_index):
    # This function works even if there are identical allele combos
    allele_combos = []
    start_point = population.find('''<FONT  size="-1">''', last_index) + 17
    for i in range(num_of_allele_combos):
        allele_combo = population[start_point:population.find('''</FONT>''', start_point)]
        allele_combos.append(allele_combo)
        last_index = start_point + 5
        start_point = population.find('''<FONT  size="-1">''', population.find('''</FONT>''', start_point)) + 17
    for j in range(num_of_allele_combos):
        allele_combos[j] = allele_combos[j].strip()
    return allele_combos, last_index

def extract_HWP(population, last_index):
    # This function works even if the last allele_combo was ''
    start_point = population.find('''<FONT  size="-1">''', last_index) + 17
    HWP = population[start_point:population.find('''</FONT>''', start_point)]
    HWP = HWP.strip()
    last_index = population.find('''</FONT>''', start_point)
    return HWP, last_index
    
def extract_alleles(num_of_alleles, population, last_index):
    alleles = []
    start_point = population.find('''<FONT  size="-1">''', last_index) + 17
    for i in range(num_of_alleles):
        allele = population[start_point:population.find('''</FONT>''', start_point)]
        alleles.append(allele)
        last_index = start_point + 1
        start_point = population.find('''<FONT  size="-1">''', population.find('''</FONT>''', start_point)) + 17
    for j in range(num_of_alleles):
        alleles[j] = alleles[j].strip()
    return alleles, last_index

# Master function to compile the list of lists (of lists) that holds all the interesting allelic frequency data
def parse_population_list(num_of_allele_combos, num_of_alleles, population_list, master_data_list):
    for index in range(len(population_list)):
        last_index = 0
        ss_numb = ''
        ss_numb, last_index = extract_ss_numb(population_list[index])
        population_name, last_index = extract_population_name(population_list[index], last_index)
        group, last_index = extract_group(population_list[index], last_index)
        chrom_cnt, last_index = extract_chrom_cnt(population_list[index], last_index)
        source, last_index = extract_source(population_list[index], last_index)
        allele_combos, last_index = extract_allele_combos(num_of_allele_combos, population_list[index], last_index)
        HWP, last_index = extract_HWP(population_list[index], last_index)
        alleles, last_index = extract_alleles(num_of_alleles, population_list[index], last_index)
        master_data_list.append([ss_numb, population_name, group, chrom_cnt, source, allele_combos, HWP, alleles])
    return master_data_list


#BEGIN ACTUAL PROGRAM
search_term = "rs11200538"     # example search_term for now; will be returned by rest of program when finished    
search_file_name = "%s_dbSNP.html" % search_term

dbSNP_file = open(search_file_name, 'w')
URL = parse_for_dbSNP_search(search_term)
get_dbSNP_search_file(URL, dbSNP_file)
dbSNP_file.close()

dbSNP_file = open(search_file_name, 'r')
freq_line = extract_allelic_freq_line(dbSNP_file)
dbSNP_file.close()

TR_list = divide_freq_line_into_TRs(freq_line)
categories = ''
population_list = []

categories, population_list = extract_categories_and_population_TRs(categories, population_list, TR_list)

categories_list = []
categories_list = parse_categories(categories)
num_of_categories = len(categories_list)
num_of_allele_combos = categories_list.count('A/A') + categories_list.count('A/T') + categories_list.count('A/C') + categories_list.count('A/G') + categories_list.count('T/A') + categories_list.count('T/T') +categories_list.count('T/C') + categories_list.count('T/G') + categories_list.count('C/A') + categories_list.count('C/T') + categories_list.count('C/C') + categories_list.count('C/G') + categories_list.count('G/A') + categories_list.count('G/T') + categories_list.count('G/C') + categories_list.count('G/G')
num_of_alleles = categories_list.count('A') + categories_list.count('T') + categories_list.count('C') + categories_list.count('G')

master_data_list = []
master_data_list.append(categories_list)

master_data_list = parse_population_list(num_of_allele_combos, num_of_alleles, population_list, master_data_list)
print master_data_list

Sample Output

['ss#', 'Population', 'Individual Group', 'Chrom. Sample Cnt.', 'Source', 'A/A', 'A/G', 'G/G', 'HWP', 'A', 'G']
['ss16081968', 'HapMap-CEU', 'European', '118', 'IG', ['0.983', '0.017', ], '1.000', ['0.992', '0.008']]
[, '<a href="snp_viewTable.cgi?pop=1410">HapMap-HCB', 'Asian', '90', 'IG', ['0.556', '0.356', '0.089'], '0.584', ['0.733', '0.267']]
[, '<a href="snp_viewTable.cgi?pop=1411">HapMap-JPT', 'Asian', '90', 'IG', ['0.533', '0.356', '0.111'], '0.371', ['0.711', '0.289']]
[, '<a href="snp_viewTable.cgi?pop=1412">HapMap-YRI', 'Sub-Saharan African', '120', 'IG', ['1.000', , ], '1.000', [, 'tom"><a href="snp_viewTable.cgi?pop=1412">HapMap-YRI</a>Sub-Saharan African   120IG 1.000']]
[, '<a href="snp_viewTable.cgi?pop=1771">CHMJ', 'Asian', '74', 'IG', [, , ], '0.757', ['0.243', 'tom"><a href="snp_viewTable.cgi?pop=1771">CHMJ</a>Asian    74IG']]
['ss24106683', 'AFD_EUR_PANEL', 'European', '48', 'IG', ['0.917', '0.083', ], '1.000', ['0.958', '0.042']]
[, '<a href="snp_viewTable.cgi?pop=1372">AFD_AFR_PANEL', 'African American', '44', 'IG', ['1.000', , ], '1.000', [, 'tom"><a href="snp_viewTable.cgi?pop=1372">AFD_AFR_PANEL</a>African American    44IG 1.000']]
[, '<a href="snp_viewTable.cgi?pop=1373">AFD_CHN_PANEL', 'Asian', '48', 'IG', ['0.583', '0.333', '0.083'], '0.655', ['0.750', '0.250']]

The categories in master_data_list[0] correspond to the data items in each of the following rows.  For convenience, the allele_combos frequencies and the allele frequencies were collected in to their own lists.