#!/usr/bin/env python

#from Bio import SeqIO
#from scipy import stats

#
# project3.py
# Author: [YOUR NAME HERE]
# Date Created:
# Last Modified:
#
# [DESCRIPTION OF PROGRAM FUNCTION HERE]
#


# CONSTANTS
CUTOFF = 0.8
# update these to the full data files when you are sure your code works!  
FASTQ="rand_100.fastq"
CLASSIF="rand_100.txt"
# additional constants go here

def main():
    belowcutoffNs, abovecutoffNs = [],[]  # initialize both as empty lists
    
    id2seq = read_fastq(FASTQ) 
    classifseq = read_classifications(CLASSIF)
    # here, you should use getGenusConfidence to find sequences whose confidence 
    # scores are above or below the cutoff, and add the number of N's in the 
    # sequence to belowcutoffNs or abovecutoffNs, as appropriate.  


    # then print on the number of N's in each
    printListAvg("below",belowcutoffNs)
    printListAvg("above",abovecutoffNs)


# inputs: a string called 'label', and a list of numeric values called 'data'
# prints the label and then the average of the numbers in the list
# no return value
#
def printListAvg(label,data):
    pass


# input: a classification string (e.g. "
# if 12th elt of the classification string exists,
# return its floating point value; else return zero
#
def getGenusConfidence(clstring):
    pass


# input is a sequence
# output is an integer representing the number of Ns in the sequence
#    
def countNs (sequence):
    pass


# take as input the name of the fastq file
# open it, read fastq formatted stuff from it
# return a dict whose keys are the rest of the id and values are the sequences
#
def read_fastq(filename):
    pass


# input is the name of the classfication file
# reads from the file 
# remove the ">39 " chars from start of the id
# creates a dict that maps sequence ids to classification strings
#
def read_classifications(filename):
    pass


# main program: just calls the main function
# 
main()