/*
 * Decompiled with CFR 0.152.
 */
package analysis.cassava.gbs;

import Utils.TextWriter;
import format.text.Bins;
import format.text.Table;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.TreeSet;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.snp.ExportUtils;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.dna.tag.TagsByTaxa;
import net.maizegenetics.dna.tag.TagsByTaxaByte;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.taxa.Taxon;

public class cassavaUtils {
    public void mkBinSummary(String statisticFileS, String binFileS) {
        Table t = new Table(statisticFileS);
        int[] cor = new int[t.getRowNumber()];
        double[] maf = new double[t.getRowNumber()];
        double[] missing = new double[t.getRowNumber()];
        double[] het = new double[t.getRowNumber()];
        for (int i = 0; i < t.getRowNumber(); ++i) {
            cor[i] = Integer.valueOf(t.content[i][3]);
            maf[i] = Double.valueOf(t.content[i][4]);
            missing[i] = Double.valueOf(t.content[i][5]);
            het[i] = Double.valueOf(t.content[i][6]);
        }
        Bins mafBin = new Bins(1, cor[cor.length - 1], 500000, cor, maf);
        Bins missingBin = new Bins(1, cor[cor.length - 1], 500000, cor, missing);
        Bins hetBin = new Bins(1, cor[cor.length - 1], 500000, cor, het);
        TextWriter tw = new TextWriter(binFileS);
        tw.write("BinID\tBinStart\tMeanMAF\tMeanMissing\tMeanHet\tSDMAF\tSDMissing\tSDHet");
        tw.newLine();
        for (int i = 0; i < mafBin.getBinNum(); ++i) {
            tw.write(String.valueOf(i + 1) + "\t" + String.valueOf(mafBin.getBinEnd(i)) + "\t" + String.valueOf(mafBin.getBinMean(i)) + "\t" + String.valueOf(missingBin.getBinMean(i)) + "\t" + String.valueOf(hetBin.getBinMean(i)));
            tw.write("\t" + String.valueOf(mafBin.getBinSD(i)) + "\t" + String.valueOf(missingBin.getBinSD(i)) + "\t" + String.valueOf(hetBin.getBinSD(i)));
            tw.newLine();
        }
        tw.close();
    }

    public void splictH5ToChromosomes(String inputGenotypeFileS, String outputDirS) {
        GenotypeTable gt = ImportUtils.readGuessFormat(inputGenotypeFileS);
        Chromosome[] chrs = gt.chromosomes();
        for (int i = 0; i < chrs.length; ++i) {
            File outfile = new File(outputDirS, "genotypeFilterByHets.chr" + String.valueOf(chrs[i].getChromosomeNumber()) + ".h5");
            FilterGenotypeTable fgt = FilterGenotypeTable.getInstance(gt, chrs[i]);
            ExportUtils.writeGenotypeHDF5(fgt, outfile.getAbsolutePath());
            System.out.println(String.valueOf(fgt.numberOfSites()) + " sites, " + String.valueOf(fgt.numberOfTaxa()) + " taxa, on chromosome " + chrs[i].getName() + " is written to " + outfile.getAbsolutePath());
        }
    }

    public void splitHapMapToChromosomes(String inputHapMapFileS, String outputHapMapDirS, int chrNum) {
        int[] chrID = new int[chrNum];
        File[] outfiles = new File[chrNum];
        for (int i = 0; i < chrNum; ++i) {
            chrID[i] = i + 1;
            outfiles[i] = new File(outputHapMapDirS, "genotypeFilterByHets.chr" + String.valueOf(chrID[i]) + ".hmp.txt");
        }
        try {
            String temp;
            BufferedWriter[] bws = new BufferedWriter[chrNum];
            for (int i = 0; i < bws.length; ++i) {
                bws[i] = new BufferedWriter(new FileWriter(outfiles[i]), 65536);
            }
            BufferedReader br = new BufferedReader(new FileReader(inputHapMapFileS), 65536);
            ArrayList<String> sampleList = new ArrayList<String>();
            while (!(temp = br.readLine()).startsWith("rs")) {
                sampleList.add(temp);
            }
            String header = temp;
            for (int i = 0; i < bws.length; ++i) {
                for (int j = 0; j < sampleList.size(); ++j) {
                    bws[i].write((String)sampleList.get(i));
                    bws[i].newLine();
                }
                bws[i].write(header);
                bws[i].newLine();
            }
            while ((temp = br.readLine()) != null) {
                String[] tem = temp.substring(0, 50).split("\t");
                int hit = Arrays.binarySearch(chrID, Integer.valueOf(tem[2]));
                bws[hit].write(temp);
                bws[hit].newLine();
            }
            for (int i = 0; i < bws.length; ++i) {
                bws[i].flush();
                bws[i].close();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void filterSiteEndChr1() {
        String inputGenotypeFileS = "M:\\pipelineTest\\cassava\\gbs\\original_genotype\\chr1.hmp.txt";
        String filterGenotypeFileS = "M:\\pipelineTest\\cassava\\gbs\\filtered_genotype\\chr1_filter.hmp.txt";
        String endChr1GenotypeFileS = "M:\\pipelineTest\\cassava\\gbs\\filtered_genotype\\endChr1.hmp.txt";
        String filterEndChr1GenotypeFileS = "M:\\pipelineTest\\cassava\\gbs\\filtered_genotype\\endChr1_filter.hmp.txt";
        String filterEndChr1GenotypeHomoTaxaFileS = "M:\\pipelineTest\\cassava\\gbs\\filtered_genotype\\endChr1_filter_homoTaxa.hmp.txt";
        int startPos = 19000000;
        int endPos = 20420071;
        GenotypeTable agt = ImportUtils.readFromHapmap(filterGenotypeFileS);
        GenotypeTable afgt = FilterGenotypeTable.getInstance(agt, agt.chromosomes()[0], startPos, endPos);
        ExportUtils.writeToHapmap(afgt, filterEndChr1GenotypeFileS);
        TaxaListBuilder tlb = new TaxaListBuilder();
        for (int i = 0; i < afgt.numberOfTaxa(); ++i) {
            if (!((double)afgt.heterozygousCountForTaxon(i) / (double)afgt.numberOfSites() < 0.1)) continue;
            tlb.add((Taxon)afgt.taxa().get(i));
        }
        TaxaList tl = tlb.build();
        afgt = FilterGenotypeTable.getInstance(afgt, tl);
        ExportUtils.writeToHapmap(afgt, filterEndChr1GenotypeHomoTaxaFileS);
    }

    public void filterSite(String inputGenotype, String outputGenotype, double coverageForSite, double minMaf, double maxHetForTaxon, double maxHetForSite) {
        GenotypeTable gt = ImportUtils.readGuessFormat(inputGenotype);
        Chromosome[] theChr = gt.chromosomes();
        int window = 1000;
        ArrayList<Integer> siteKeepList = new ArrayList<Integer>();
        for (Chromosome currentChr : theChr) {
            System.out.println("Filtering chromosome " + currentChr.getName());
            ArrayList<Integer> hiCoverSiteList = new ArrayList<Integer>();
            int[] chrStartEnd = gt.firstLastSiteOfChromosome(currentChr);
            for (int i = chrStartEnd[0]; i < chrStartEnd[1] + 1; ++i) {
                if (!((double)gt.totalNonMissingForSite(i) / (double)gt.numberOfTaxa() > coverageForSite) || !(gt.minorAlleleFrequency(i) > minMaf)) continue;
                hiCoverSiteList.add(i);
            }
            Object[] hiCoverSite = hiCoverSiteList.toArray(new Integer[hiCoverSiteList.size()]);
            System.out.println(hiCoverSite.length);
            int siteNumOnCurrentChr = chrStartEnd[1] - chrStartEnd[0] + 1;
            int base = siteNumOnCurrentChr % window;
            int numOfWindow = base == 0 ? siteNumOnCurrentChr / window : siteNumOnCurrentChr / window + 1;
            int chrKeepCnt = 0;
            for (int i = 0; i < numOfWindow; ++i) {
                int actualWindow = window;
                if (base != 0 && i == numOfWindow - 1) {
                    actualWindow = base;
                }
                int actualStart = chrStartEnd[0] + window * i;
                int actualEnd = actualStart + actualWindow;
                ArrayList<Integer> taxaIndexList = new ArrayList<Integer>();
                int hiCoverCntInWindow = 0;
                for (int j = 0; j < gt.numberOfTaxa(); ++j) {
                    double hetRatio;
                    int nonMissingSiteCnt = 0;
                    hiCoverCntInWindow = 0;
                    int hetCnt = 0;
                    for (int k = actualStart; k < actualEnd; ++k) {
                        int hit = Arrays.binarySearch(hiCoverSite, (Object)k);
                        if (hit < 0) continue;
                        ++hiCoverCntInWindow;
                        if (gt.genotype(j, k) == -1) continue;
                        ++nonMissingSiteCnt;
                        if (!gt.isHeterozygous(j, k)) continue;
                        ++hetCnt;
                    }
                    if (nonMissingSiteCnt == 0 || !((hetRatio = (double)hetCnt / (double)nonMissingSiteCnt) < maxHetForTaxon)) continue;
                    taxaIndexList.add(j);
                }
                Integer[] selectedTaxaIndex = taxaIndexList.toArray(new Integer[taxaIndexList.size()]);
                System.out.println(hiCoverSite.length + "\t" + hiCoverCntInWindow + "\t");
                System.out.println(((Position)gt.positions().get(actualStart)).getPosition() + "\t" + actualStart + " Homo Taxa num is " + selectedTaxaIndex.length);
                for (int j = actualStart; j < actualEnd; ++j) {
                    double hetRatio;
                    int missingTaxaCnt = 0;
                    int nonMissingTaxaCnt = 0;
                    int hetCnt = 0;
                    for (int k = 0; k < selectedTaxaIndex.length; ++k) {
                        if (gt.genotype(selectedTaxaIndex[k], j) != -1) {
                            ++nonMissingTaxaCnt;
                        } else {
                            ++missingTaxaCnt;
                        }
                        if (!gt.isHeterozygous(selectedTaxaIndex[k], j)) continue;
                        ++hetCnt;
                    }
                    if (nonMissingTaxaCnt == 0 || !((hetRatio = (double)hetCnt / (double)nonMissingTaxaCnt) < maxHetForSite) || !(gt.minorAlleleFrequency(j) > 0.01)) continue;
                    siteKeepList.add(j);
                    ++chrKeepCnt;
                }
            }
            System.out.println("Keep " + String.valueOf(chrKeepCnt) + " sites. " + String.valueOf((double)chrKeepCnt / (double)siteNumOnCurrentChr) + "\n");
        }
        int[] siteKeepArray = new int[siteKeepList.size()];
        for (int i = 0; i < siteKeepList.size(); ++i) {
            siteKeepArray[i] = (Integer)siteKeepList.get(i);
        }
        FilterGenotypeTable fgt = FilterGenotypeTable.getInstance(gt, siteKeepArray);
        ExportUtils.writeGenotypeHDF5(fgt, outputGenotype);
    }

    public void mkTaxaNameFileS(String genotypeFileS, String taxaNameFileS) {
        GenotypeTable gt = ImportUtils.readGuessFormat(genotypeFileS);
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(taxaNameFileS), 65536);
            bw.write("Taxa_FullName");
            for (int i = 0; i < gt.numberOfTaxa(); ++i) {
                bw.write(gt.taxaName(i));
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void mergeTaxaTBT(String sourceTBTFileS, String targetTBTFileS) {
        int i;
        int i2;
        String label;
        TagsByTaxaByte tbt = new TagsByTaxaByte(sourceTBTFileS, TagsByTaxa.FilePacking.Byte);
        TreeSet<String> labelSet = new TreeSet<String>();
        String[] temp = null;
        for (int i3 = 0; i3 < tbt.getTaxaCount(); ++i3) {
            temp = tbt.getTaxaName(i3).split(":");
            label = temp[0] + ":" + temp[temp.length - 1];
            labelSet.add(label);
        }
        Object[] labels = labelSet.toArray(new String[labelSet.size()]);
        Arrays.sort(labels);
        String[][] labelFullName = new String[labels.length][];
        ArrayList[] labelFullNameList = new ArrayList[labelFullName.length];
        for (i2 = 0; i2 < labelFullNameList.length; ++i2) {
            labelFullNameList[i2] = new ArrayList();
        }
        for (i2 = 0; i2 < tbt.getTaxaCount(); ++i2) {
            temp = tbt.getTaxaName(i2).split(":");
            label = temp[0] + ":" + temp[temp.length - 1];
            int hit = Arrays.binarySearch(labels, label);
            labelFullNameList[hit].add(tbt.getTaxaName(i2));
        }
        for (i2 = 0; i2 < labelFullName.length; ++i2) {
            labelFullName[i2] = labelFullNameList[i2].toArray(new String[labelFullNameList[i2].size()]);
            Arrays.sort(labelFullName[i2]);
        }
        int[][] index = new int[labelFullName.length][];
        for (i = 0; i < index.length; ++i) {
            index[i] = new int[labelFullName[i].length];
        }
        for (i = 0; i < tbt.getTaxaCount(); ++i) {
            for (int j = 0; j < labelFullName.length; ++j) {
                int hit = Arrays.binarySearch(labelFullName[j], tbt.getTaxaName(i));
                if (hit < 0) continue;
                index[j][hit] = i;
            }
        }
        try {
            int i4;
            DataOutputStream dis = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(targetTBTFileS), 65536));
            dis.writeInt(tbt.getTagCount());
            dis.writeInt(tbt.getTagSizeInLong());
            dis.writeInt(labels.length);
            for (i4 = 0; i4 < labels.length; ++i4) {
                if (labelFullName[i4].length == 1) {
                    dis.writeUTF(labelFullName[i4][0]);
                    continue;
                }
                temp = labelFullName[i4][0].split(":");
                String newName = temp[0] + ":" + temp[1] + ":0:" + temp[3];
                dis.writeUTF(newName);
            }
            for (i4 = 0; i4 < tbt.getTagCount(); ++i4) {
                int j;
                long[] tag = tbt.getTag(i4);
                for (j = 0; j < tbt.getTagSizeInLong(); ++j) {
                    dis.writeLong(tag[j]);
                }
                dis.writeByte(tbt.getTagLength(i4));
                for (j = 0; j < labels.length; ++j) {
                    int count = 0;
                    for (int k = 0; k < labelFullName[j].length; ++k) {
                        count += tbt.getReadCountForTagTaxon(i4, index[j][k]);
                    }
                    if (count > 127) {
                        count = 127;
                    }
                    dis.writeByte(count);
                }
            }
            dis.flush();
            dis.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void mkTagCountFromTBT(String tbtFileS, String tagCountFileS) {
        TagsByTaxaByte tbt = new TagsByTaxaByte(tbtFileS, TagsByTaxa.FilePacking.Byte);
        int minCount = 10000;
        try {
            DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tagCountFileS), 65536));
            dos.writeInt(tbt.getTagCount());
            dos.writeInt(tbt.getTagSizeInLong());
            for (int i = 0; i < tbt.getTagCount(); ++i) {
                long[] tag = tbt.getTag(i);
                for (int j = 0; j < tag.length; ++j) {
                    dos.writeLong(tag[j]);
                }
                dos.writeByte(tbt.getTagLength(i));
                int count = 0;
                for (int j = 0; j < tbt.getTaxaCount(); ++j) {
                    count += tbt.getReadCountForTagTaxon(i, j);
                }
                dos.writeInt(count);
                if (count >= minCount) continue;
                minCount = count;
            }
            dos.flush();
            dos.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println("tagCountFile is created from TBT at " + tagCountFileS);
        System.out.println("MinCount is " + String.valueOf(minCount));
    }
}

