# reformat input file: create a new header line with the sample class, # HP or RC; get rid of the last column. f = open("picrust.txt","r") # name of input file (PICRUSt output) outfile = open("pythonlefseinput.txt","w") # we'll write results here TAB="\t" # a constant that we'll use later on firstline = f.readline() # we'll ignore this secondline = f.readline() # this is what we use to create the sample classes # # create row indicating sample classes, HP or RC. # they are at the end of the sample names in the the second row of the input # secondline = secondline.split(TAB) ncols = len(secondline)-1 headers=["#KEGG"] # first column of output header starts with this for i in range(1,ncols): samplename = secondline[i] last2chars = samplename[-2:] headers.append(last2chars) headerstring = TAB.join(headers) headerstring = headerstring + "\n" outfile.write(headerstring) # # for the rest of the lines in the file, just delete the rightmost column # for line in f: cols = line.split(TAB) # split up by columns outline = cols[:-1] # create a version without the last column outstring = TAB.join(outline) # join it back together outstring = outstring + "\n" # add newline outfile.write(outstring) # and write it out f.close() # ... and close the files outfile.close() print "All done; look at pythonlefseinput.txt to see the data file."