/*
 * Decompiled with CFR 0.152.
 */
package net.maizegenetics.analysis.imputation;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Random;
import java.util.regex.Pattern;
import net.maizegenetics.analysis.imputation.AGPMap;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.BitSet;
import org.apache.log4j.Logger;

public class ImputationUtils {
    private static final Logger myLogger = Logger.getLogger(ImputationUtils.class);
    public static Pattern tab = Pattern.compile("\t");

    public static int[] order(int[] array) {
        int n = array.length;
        class SortElement
        implements Comparable<SortElement> {
            int val;
            int ndx;

            SortElement(int x, int index) {
                this.val = x;
                this.ndx = index;
            }

            @Override
            public int compareTo(SortElement se) {
                return this.val - se.val;
            }
        }
        Object[] sortArray = new SortElement[n];
        for (int i = 0; i < n; ++i) {
            sortArray[i] = new SortElement(array[i], i);
        }
        Arrays.sort(sortArray);
        int[] order = new int[n];
        for (int i = 0; i < n; ++i) {
            order[i] = ((SortElement)sortArray[i]).ndx;
        }
        return order;
    }

    public static int[] reverseOrder(int[] array) {
        int n = array.length;
        class SortElement
        implements Comparable<SortElement> {
            int val;
            int ndx;

            SortElement(int x, int index) {
                this.val = x;
                this.ndx = index;
            }

            @Override
            public int compareTo(SortElement se) {
                return se.val - this.val;
            }
        }
        Object[] sortArray = new SortElement[n];
        for (int i = 0; i < n; ++i) {
            sortArray[i] = new SortElement(array[i], i);
        }
        Arrays.sort(sortArray);
        int[] order = new int[n];
        for (int i = 0; i < n; ++i) {
            order[i] = ((SortElement)sortArray[i]).ndx;
        }
        return order;
    }

    public static GenotypeTable[] getTwoClusters(GenotypeTable gt, int[] parentIndex) {
        float dist;
        float[] tloc;
        int t;
        float prevdist;
        float[] loc2;
        float[] loc1;
        int maxiter = 5;
        int ntaxa = gt.numberOfTaxa();
        int nsnps = gt.numberOfSites();
        int seed1 = parentIndex[0];
        int seed2 = parentIndex[1];
        Random rand = new Random();
        if (seed1 == -1) {
            if (seed2 == -1) {
                seed1 = rand.nextInt(ntaxa);
                loc1 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed1, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed1, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
                while (seed2 == -1 || seed1 == seed2) {
                    seed2 = rand.nextInt(ntaxa);
                }
                loc2 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed2, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed2, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
            } else {
                loc1 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(0, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(0, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
                loc2 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed2, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed2, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
                seed1 = 0;
                prevdist = ImputationUtils.getManhattanDistance(loc2, loc1, nsnps);
                for (t = 1; t < ntaxa; ++t) {
                    BitSet[] bitSetArray = new BitSet[]{gt.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Minor)};
                    tloc = ImputationUtils.snpsAsFloatVector(bitSetArray, nsnps);
                    dist = ImputationUtils.getManhattanDistance(loc2, tloc, nsnps);
                    if (!(dist > prevdist)) continue;
                    prevdist = dist;
                    loc1 = tloc;
                    seed1 = t;
                }
            }
        } else if (seed2 == -1) {
            loc1 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed1, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed1, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
            loc2 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(0, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(0, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
            seed2 = 0;
            prevdist = ImputationUtils.getManhattanDistance(loc1, loc2, nsnps);
            for (t = 1; t < ntaxa; ++t) {
                BitSet[] bitSetArray = new BitSet[]{gt.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Minor)};
                tloc = ImputationUtils.snpsAsFloatVector(bitSetArray, nsnps);
                dist = ImputationUtils.getManhattanDistance(loc1, tloc, nsnps);
                if (!(dist > prevdist)) continue;
                prevdist = dist;
                loc2 = tloc;
                seed2 = t;
            }
        } else {
            loc1 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed1, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed1, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
            loc2 = ImputationUtils.snpsAsFloatVector(new BitSet[]{gt.allelePresenceForAllSites(seed2, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(seed2, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
        }
        int[] size1 = new int[nsnps];
        int[] size2 = new int[nsnps];
        for (int i = 0; i < nsnps; ++i) {
            if (loc1[i] >= 0.0f) {
                size1[i] = 1;
            }
            if (!(loc2[i] >= 0.0f)) continue;
            size2[i] = 1;
        }
        boolean[] isInCluster1 = new boolean[ntaxa];
        isInCluster1[seed1] = true;
        isInCluster1[seed2] = false;
        for (int t2 = 0; t2 < ntaxa; ++t2) {
            float dist2;
            if (t2 == seed1 || t2 == seed2) continue;
            BitSet[] bitSetArray = new BitSet[]{gt.allelePresenceForAllSites(t2, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t2, GenotypeTable.WHICH_ALLELE.Minor)};
            float[] tloc2 = ImputationUtils.snpsAsFloatVector(bitSetArray, nsnps);
            float dist1 = ImputationUtils.getManhattanDistance(loc1, tloc2, nsnps);
            if (dist1 <= (dist2 = ImputationUtils.getManhattanDistance(loc2, tloc2, nsnps))) {
                isInCluster1[t2] = true;
                loc1 = ImputationUtils.getMeanLocation(loc1, size1, tloc2, true, nsnps);
                continue;
            }
            isInCluster1[t2] = false;
            loc2 = ImputationUtils.getMeanLocation(loc2, size2, tloc2, true, nsnps);
        }
        for (int iter = 0; iter < maxiter; ++iter) {
            boolean noChanges = true;
            for (int t3 = 0; t3 < ntaxa; ++t3) {
                float dist2;
                BitSet[] bitSetArray = new BitSet[]{gt.allelePresenceForAllSites(t3, GenotypeTable.WHICH_ALLELE.Major), gt.allelePresenceForAllSites(t3, GenotypeTable.WHICH_ALLELE.Minor)};
                float[] tloc3 = ImputationUtils.snpsAsFloatVector(bitSetArray, nsnps);
                float dist1 = ImputationUtils.getManhattanDistance(loc1, tloc3, nsnps);
                if (dist1 <= (dist2 = ImputationUtils.getManhattanDistance(loc2, tloc3, nsnps)) && !isInCluster1[t3]) {
                    isInCluster1[t3] = true;
                    loc1 = ImputationUtils.getMeanLocation(loc1, size1, tloc3, true, nsnps);
                    loc2 = ImputationUtils.getMeanLocation(loc2, size2, tloc3, false, nsnps);
                    noChanges = false;
                    continue;
                }
                if (!(dist1 > dist2) || !isInCluster1[t3]) continue;
                isInCluster1[t3] = false;
                loc1 = ImputationUtils.getMeanLocation(loc1, size1, tloc3, false, nsnps);
                loc2 = ImputationUtils.getMeanLocation(loc2, size2, tloc3, true, nsnps);
                noChanges = false;
            }
            if (noChanges) break;
        }
        System.out.println("distance between clusters = " + ImputationUtils.getManhattanDistance(loc1, loc2, nsnps));
        TaxaListBuilder builder1 = new TaxaListBuilder();
        TaxaListBuilder builder2 = new TaxaListBuilder();
        for (int t4 = 0; t4 < ntaxa; ++t4) {
            if (isInCluster1[t4]) {
                builder1.add((Taxon)gt.taxa().get(t4));
                continue;
            }
            builder2.add((Taxon)gt.taxa().get(t4));
        }
        GenotypeTable gt1 = FilterGenotypeTable.getInstance(gt, builder1.build());
        GenotypeTable gt2 = FilterGenotypeTable.getInstance(gt, builder2.build());
        return new GenotypeTable[]{gt1, gt2};
    }

    public static GenotypeTable[] getTwoClusters(GenotypeTable inputAlignment, int minGametesPerTaxon) {
        int ntaxa = inputAlignment.numberOfTaxa();
        TaxaListBuilder builder = new TaxaListBuilder();
        for (int t = 0; t < ntaxa; ++t) {
            if (inputAlignment.totalNonMissingForTaxon(t) < minGametesPerTaxon) continue;
            builder.add((Taxon)inputAlignment.taxa().get(t));
        }
        TaxaList adequatelyCoveredTaxa = builder.build();
        if (adequatelyCoveredTaxa.size() < 10) {
            myLogger.info((Object)("Included lines less than 10 in getTwoClusters, poor coverage in interval starting at " + inputAlignment.siteName(0)));
            return null;
        }
        GenotypeTable myGenotypes = FilterGenotypeTable.getInstance(inputAlignment, adequatelyCoveredTaxa);
        int ntrials = 5;
        int maxiter = 5;
        ntaxa = myGenotypes.numberOfTaxa();
        int nsnps = myGenotypes.numberOfSites();
        boolean[][] isInCluster1 = new boolean[ntrials][ntaxa];
        int bestTrial = -1;
        float maxDistance = 0.0f;
        Random rand = new Random();
        float[][] taxaLocs = new float[ntaxa][nsnps];
        for (int t = 0; t < ntaxa; ++t) {
            taxaLocs[t] = ImputationUtils.snpsAsFloatVector(new BitSet[]{myGenotypes.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Major), myGenotypes.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Minor)}, nsnps);
        }
        for (int trial = 0; trial < ntrials; ++trial) {
            int seed1 = rand.nextInt(ntaxa);
            int seed2 = -1;
            while (seed2 == -1 || seed1 == seed2) {
                seed2 = rand.nextInt(ntaxa);
            }
            isInCluster1[trial][seed1] = true;
            isInCluster1[trial][seed2] = false;
            for (int t = 0; t < ntaxa; ++t) {
                float dist2;
                if (t == seed1 || t == seed2) continue;
                float dist1 = ImputationUtils.getManhattanDistance(taxaLocs[seed1], taxaLocs[t], nsnps);
                isInCluster1[trial][t] = dist1 < (dist2 = ImputationUtils.getManhattanDistance(taxaLocs[seed2], taxaLocs[t], nsnps)) ? true : (dist1 > dist2 ? false : rand.nextDouble() > 0.5);
            }
            float[][] meanLocs = new float[2][];
            boolean badclusters = false;
            for (int iter = 0; iter < maxiter; ++iter) {
                int t;
                boolean noChanges = true;
                int nCluster1 = 0;
                int nCluster2 = 0;
                for (int t2 = 0; t2 < ntaxa; ++t2) {
                    if (isInCluster1[trial][t2]) {
                        ++nCluster1;
                        continue;
                    }
                    ++nCluster2;
                }
                if (nCluster1 == 0 || nCluster2 == 0) {
                    badclusters = true;
                    break;
                }
                float[][] cluster1Locs = new float[nCluster1][];
                float[][] cluster2Locs = new float[nCluster2][];
                int countCluster1 = 0;
                int countCluster2 = 0;
                for (t = 0; t < ntaxa; ++t) {
                    if (isInCluster1[trial][t]) {
                        cluster1Locs[countCluster1++] = taxaLocs[t];
                        continue;
                    }
                    cluster2Locs[countCluster2++] = taxaLocs[t];
                }
                meanLocs = new float[][]{ImputationUtils.getMeanLocation(cluster1Locs), ImputationUtils.getMeanLocation(cluster2Locs)};
                for (t = 0; t < ntaxa; ++t) {
                    float dist2;
                    BitSet[] bitSetArray = new BitSet[]{myGenotypes.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Major), myGenotypes.allelePresenceForAllSites(t, GenotypeTable.WHICH_ALLELE.Minor)};
                    float[] tloc = ImputationUtils.snpsAsFloatVector(bitSetArray, nsnps);
                    float dist1 = ImputationUtils.getManhattanDistance(meanLocs[0], tloc, nsnps);
                    if (dist1 < (dist2 = ImputationUtils.getManhattanDistance(meanLocs[1], tloc, nsnps)) && !isInCluster1[trial][t]) {
                        isInCluster1[trial][t] = true;
                        noChanges = false;
                        continue;
                    }
                    if (!(dist1 > dist2) || !isInCluster1[trial][t]) continue;
                    isInCluster1[trial][t] = false;
                    noChanges = false;
                }
                if (noChanges) break;
            }
            if (badclusters) {
                System.out.println("Trial " + trial + ": bad clustering, no distance could be calculated");
                continue;
            }
            float distanceBetweenClusters = ImputationUtils.getManhattanDistance(meanLocs[0], meanLocs[1], nsnps);
            if (distanceBetweenClusters > maxDistance) {
                maxDistance = distanceBetweenClusters;
                bestTrial = trial;
            }
            System.out.println("Trial " + trial + ": distance between clusters = " + distanceBetweenClusters);
        }
        TaxaListBuilder builder1 = new TaxaListBuilder();
        TaxaListBuilder builder2 = new TaxaListBuilder();
        for (int t = 0; t < ntaxa; ++t) {
            if (isInCluster1[bestTrial][t]) {
                builder1.add((Taxon)myGenotypes.taxa().get(t));
                continue;
            }
            builder2.add((Taxon)myGenotypes.taxa().get(t));
        }
        GenotypeTable gt1 = FilterGenotypeTable.getInstance(myGenotypes, builder1.build());
        GenotypeTable gt2 = FilterGenotypeTable.getInstance(myGenotypes, builder2.build());
        return new GenotypeTable[]{gt1, gt2};
    }

    public static float[] snpsAsFloatVector(BitSet[] alleles, int nsnps) {
        float[] result = new float[nsnps];
        for (int s = 0; s < nsnps; ++s) {
            if (alleles[0].fastGet(s)) {
                result[s] = 2.0f;
                if (!alleles[1].fastGet(s)) continue;
                result[s] = 1.0f;
                continue;
            }
            result[s] = alleles[1].fastGet(s) ? 0.0f : 1.0f;
        }
        return result;
    }

    public static float getManhattanDistance(float[] loc, float[] t, int nsnps) {
        float d = 0.0f;
        int nsites = 0;
        for (int s = 0; s < nsnps; ++s) {
            if (!(loc[s] >= 0.0f) || !(t[s] >= 0.0f)) continue;
            d += Math.abs(loc[s] - t[s]);
            ++nsites;
        }
        return d / (float)nsites;
    }

    public static float[] getMeanLocation(float[] loc, int[] size, float[] t, boolean add, int nsnps) {
        float[] result = new float[nsnps];
        if (add) {
            for (int s = 0; s < nsnps; ++s) {
                if (t[s] >= 0.0f) {
                    if (size[s] > 0) {
                        result[s] = (loc[s] * (float)size[s] + t[s]) / (float)(size[s] + 1);
                        int n = s;
                        size[n] = size[n] + 1;
                        continue;
                    }
                    result[s] = t[s];
                    size[s] = 1;
                    continue;
                }
                result[s] = loc[s];
            }
        } else {
            for (int s = 0; s < nsnps; ++s) {
                if (t[s] >= 0.0f) {
                    if (size[s] > 1) {
                        result[s] = (loc[s] * (float)size[s] - t[s]) / (float)(size[s] - 1);
                        int n = s;
                        size[n] = size[n] - 1;
                        continue;
                    }
                    if (size[s] != 1) continue;
                    result[s] = 0.0f;
                    size[s] = 0;
                    continue;
                }
                result[s] = loc[s];
            }
        }
        return result;
    }

    public static float[] getMeanLocation(float[][] locs) {
        int nsites = locs[0].length;
        int ntaxa = locs.length;
        float[] result = new float[nsites];
        for (int s = 0; s < nsites; ++s) {
            int count = 0;
            float sum = 0.0f;
            for (int t = 0; t < ntaxa; ++t) {
                if (!Float.isNaN(locs[t][s])) {
                    ++count;
                    sum += locs[t][s];
                }
                result[s] = count > 0 ? sum / (float)count : Float.NaN;
            }
        }
        return result;
    }

    public static void printAlleleStats(GenotypeTable gt, String name) {
        int monoCount = 0;
        int polyCount = 0;
        int[] binCount = new int[21];
        int nsites = gt.numberOfSites();
        for (int s = 0; s < nsites; ++s) {
            int bin;
            if (gt.majorAlleleFrequency(s) > 0.75) {
                ++monoCount;
                continue;
            }
            ++polyCount;
            int n = bin = (int)Math.floor(20.0 * gt.majorAlleleFrequency(s));
            binCount[n] = binCount[n] + 1;
        }
        System.out.println(name);
        System.out.println("mono count = " + monoCount + ", poly count = " + polyCount);
        System.out.print("bins: ");
        for (int i = 0; i < 20; ++i) {
            System.out.print(" " + binCount[i]);
        }
        System.out.println();
        System.out.println();
    }

    public static void mergeNonconsensusFiles(String dir, String match, String outfileName) {
        int i;
        File[] mergeFiles = ImputationUtils.filterFiles(dir, match);
        int nfiles = mergeFiles.length;
        String[] colLabel = new String[nfiles];
        HashMap<String, String[]> taxonMap = new HashMap<String, String[]>();
        for (int f = 0; f < nfiles; ++f) {
            System.out.println("processing" + mergeFiles[f].getName());
            try {
                BufferedReader br = new BufferedReader(new FileReader(mergeFiles[f]));
                br.readLine();
                String input = br.readLine();
                String[] info = tab.split(input);
                colLabel[f] = info[1];
                while (input != null) {
                    info = tab.split(input);
                    String[] values = (String[])taxonMap.get(info[0]);
                    if (values == null) {
                        values = new String[nfiles];
                        for (i = 0; i < nfiles; ++i) {
                            values[i] = "";
                        }
                        taxonMap.put(info[0], values);
                    }
                    values[f] = info[2];
                    input = br.readLine();
                }
                br.close();
                continue;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        File outfile = new File(dir, outfileName);
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
            StringBuilder sb = new StringBuilder("Taxon");
            for (i = 0; i < nfiles; ++i) {
                sb.append("\t").append(colLabel[i]);
            }
            bw.write(sb.toString());
            bw.newLine();
            LinkedList taxaList = new LinkedList(taxonMap.keySet());
            Collections.sort(taxaList);
            for (String taxon : taxaList) {
                sb = new StringBuilder(taxon);
                String[] values = (String[])taxonMap.get(taxon);
                for (int f = 0; f < nfiles; ++f) {
                    sb.append("\t").append(values[f]);
                }
                bw.write(sb.toString());
                bw.newLine();
            }
            bw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static File[] filterFiles(String dir, String match) {
        File matchdir = new File(dir);
        final String pattern = new String(match);
        File[] filteredFiles = matchdir.listFiles(new FilenameFilter(){

            @Override
            public boolean accept(File dir, String name) {
                return name.matches(pattern);
            }
        });
        return filteredFiles;
    }

    public static void mergeFiles(File[] mergeFiles, int idcol, int datacol, int[] colOrder, String outfile) {
        int nfiles = mergeFiles.length;
        if (colOrder == null) {
            colOrder = new int[]{0, 2, 3, 4, 5, 6, 7, 8, 9, 1};
        }
        HashMap<String, String[]> taxonMap = new HashMap<String, String[]>();
        for (int f = 0; f < nfiles; ++f) {
            System.out.println("processing" + mergeFiles[f].getName());
            try {
                BufferedReader br = new BufferedReader(new FileReader(mergeFiles[f]));
                br.readLine();
                String input = br.readLine();
                String[] info = tab.split(input);
                while (input != null) {
                    info = tab.split(input);
                    String[] values = (String[])taxonMap.get(info[idcol]);
                    if (values == null) {
                        values = new String[nfiles];
                        for (int i = 0; i < nfiles; ++i) {
                            values[i] = "";
                        }
                        taxonMap.put(info[idcol], values);
                    }
                    values[f] = info[datacol];
                    input = br.readLine();
                }
                br.close();
                continue;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
            StringBuilder sb = new StringBuilder("Taxon");
            for (int i = 0; i < nfiles; ++i) {
                sb.append("\t").append(i + 1);
            }
            sb.append("\t").append("average");
            bw.write(sb.toString());
            bw.newLine();
            LinkedList taxaList = new LinkedList(taxonMap.keySet());
            Collections.sort(taxaList);
            for (String taxon : taxaList) {
                sb = new StringBuilder(taxon);
                String[] values = (String[])taxonMap.get(taxon);
                double sum = 0.0;
                double count = 0.0;
                for (int f = 0; f < nfiles; ++f) {
                    sb.append("\t").append(values[f]);
                    try {
                        sum += Double.parseDouble(values[f]);
                        count += 1.0;
                        continue;
                    }
                    catch (NumberFormatException e) {
                        // empty catch block
                    }
                }
                sb.append("\t").append(sum / count);
                bw.write(sb.toString());
                bw.newLine();
            }
            bw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void imputeLinkageMarkers(double interval, boolean hapmapFormat, String origsnpFile, String snpfilePattern, String outfilePattern) {
        String[] nuc = new String[]{"A", "M", "C"};
        HashMap<Byte, String> byteToNumberString = new HashMap<Byte, String>();
        byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("A"), "0");
        byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("M"), "1");
        byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("C"), "2");
        HashMap<Byte, Double> byteToNumber = new HashMap<Byte, Double>();
        byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("A"), 0.0);
        byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("M"), 1.0);
        byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("C"), 2.0);
        Pattern tab = Pattern.compile("\t");
        BufferedWriter bw = null;
        byte missingByte = NucleotideAlignmentConstants.getNucleotideDiploidByte("NN");
        boolean chromosome = true;
        String outFilename = String.format(outfilePattern, interval);
        String snpFilename = origsnpFile;
        AGPMap agpmap = new AGPMap();
        int ntaxa = 0;
        try {
            BufferedReader br = new BufferedReader(new FileReader(snpFilename));
            bw = new BufferedWriter(new FileWriter(outFilename));
            String header = br.readLine();
            String[] info = tab.split(header);
            int ncol = info.length;
            ntaxa = ncol - 11;
            if (hapmapFormat) {
                bw.write("rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode");
            } else {
                bw.write("Snp\tallele\tchr\tpos\tcm");
            }
            for (int t = 11; t < ncol; ++t) {
                bw.write("\t");
                bw.write(info[t]);
            }
            bw.newLine();
            br.close();
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        String[] family = new String[]{"Z001", "Z002", "Z003", "Z004", "Z005", "Z006", "Z007", "Z008", "Z009", "Z010", "Z011", "Z012", "Z013", "Z014", "Z015", "Z016", "Z018", "Z019", "Z020", "Z021", "Z022", "Z023", "Z024", "Z025", "Z026"};
        for (int chr = 1; chr <= 10; ++chr) {
            String chrstr = Integer.toString(chr);
            for (int fam = 0; fam < 25; ++fam) {
                System.out.println("Imputing data for chromosome " + chr + ", family " + family[fam] + ".");
                snpFilename = String.format(snpfilePattern, chr, family[fam]);
                GenotypeTable a = ImportUtils.readFromHapmap(snpFilename);
                int nsnps = a.numberOfSites();
                double startgenpos = agpmap.getCmFromPosition(chr, a.positions().chromosomalPosition(0));
                startgenpos = Math.ceil(startgenpos / interval) * interval;
                double endgenpos = agpmap.getCmFromPosition(chr, a.positions().chromosomalPosition(nsnps - 1));
                endgenpos = Math.floor(endgenpos / interval) * interval;
                int leftflank = 0;
                int rightflank = 0;
                try {
                    for (double curpos = startgenpos; curpos <= endgenpos; curpos += interval) {
                        byte rightByte;
                        byte leftByte;
                        int leftndx;
                        int rightndx;
                        int t;
                        int physpos = agpmap.getPositionFromCm(chr, curpos);
                        String physposString = Integer.toString(physpos);
                        String genpos = Double.toString(curpos);
                        bw.write("S_");
                        bw.write(physposString);
                        bw.write("\timputed\t");
                        bw.write(chrstr);
                        bw.write("\t");
                        bw.write(physposString);
                        bw.write("\t");
                        bw.write(genpos);
                        if (hapmapFormat) {
                            bw.write("\tNA\tNA\tNA\tNA\tNA\tNA");
                        }
                        while (physpos > a.positions().chromosomalPosition(rightflank)) {
                            ++rightflank;
                        }
                        leftflank = rightflank - 1;
                        if (hapmapFormat) {
                            for (t = 0; t < ntaxa; ++t) {
                                bw.write("\t");
                                rightndx = rightflank;
                                for (leftndx = leftflank; a.genotype(t, leftndx) == missingByte && leftndx > 0; --leftndx) {
                                }
                                while (a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1) {
                                    ++rightndx;
                                }
                                leftByte = a.genotype(t, leftndx);
                                rightByte = a.genotype(t, rightndx);
                                if (leftByte == missingByte) {
                                    if (rightByte == missingByte) {
                                        bw.write("N");
                                        continue;
                                    }
                                    bw.write(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte));
                                    continue;
                                }
                                if (rightByte == missingByte) {
                                    bw.write(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte));
                                    continue;
                                }
                                if (a.genotype(t, leftndx) == a.genotype(t, rightndx)) {
                                    bw.write(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte));
                                    continue;
                                }
                                bw.write("N");
                            }
                        } else {
                            for (t = 0; t < ntaxa; ++t) {
                                bw.write("\t");
                                rightndx = rightflank;
                                for (leftndx = leftflank; a.genotype(t, leftndx) == missingByte && leftndx > 0; --leftndx) {
                                }
                                while (a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1) {
                                    ++rightndx;
                                }
                                leftByte = a.genotype(t, leftndx);
                                rightByte = a.genotype(t, rightndx);
                                if (leftByte == missingByte) {
                                    if (rightByte == missingByte) {
                                        bw.write("-");
                                        continue;
                                    }
                                    bw.write((String)byteToNumberString.get(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte)));
                                    continue;
                                }
                                if (rightByte == missingByte) {
                                    bw.write((String)byteToNumberString.get(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte)));
                                    continue;
                                }
                                if (a.genotype(t, leftndx) == a.genotype(t, rightndx)) {
                                    bw.write((String)byteToNumberString.get(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte)));
                                    continue;
                                }
                                double leftval = (Double)byteToNumber.get(NucleotideAlignmentConstants.getNucleotideIUPAC(leftByte));
                                double rightval = (Double)byteToNumber.get(NucleotideAlignmentConstants.getNucleotideIUPAC(rightByte));
                                int leftpos = a.positions().chromosomalPosition(leftndx);
                                int rightpos = a.positions().chromosomalPosition(rightndx);
                                double pd = (double)(physpos - leftpos) / (double)(rightpos - leftpos);
                                double thisval = leftval * (1.0 - pd) + rightval * pd;
                                bw.write(Double.toString(thisval));
                            }
                        }
                        bw.newLine();
                    }
                    continue;
                }
                catch (IOException e) {
                    e.printStackTrace();
                    System.exit(-1);
                }
            }
        }
        try {
            bw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println("Finished imputing markers.");
    }

    public static void imputeLinkageMarkersAcrossFamilies(double interval, boolean hapmapFormat, boolean excludeTaxa) {
        LinkedList<String> excludeList = ImputationUtils.getListOfTaxa("/Volumes/Macintosh HD 2/results/recombination study/nam/final.Panzea.consolidated.B/Nam.exclude.release.1.txt");
        String[] nuc = new String[]{"A", "M", "C"};
        Pattern tab = Pattern.compile("\t");
        byte missingByte = NucleotideAlignmentConstants.getNucleotideDiploidByte("NN");
        AGPMap agpmap = new AGPMap();
        for (int chr = 1; chr <= 10; ++chr) {
            File snpfiledir = new File("/Volumes/Macintosh HD 2/data/zea/build2.6/nam/imputed.var.plusfounders");
            final String chrname = "chr" + chr + ".family";
            File[] snpFiles = snpfiledir.listFiles(new FilenameFilter(){

                @Override
                public boolean accept(File dir, String name) {
                    return name.startsWith("USNAM_build2.6_imputed_var") && name.contains(chrname);
                }
            });
            String chrstr = Integer.toString(chr);
            int minpos = Integer.MAX_VALUE;
            int maxpos = 0;
            for (File snpfile : snpFiles) {
                try {
                    String testInput;
                    BufferedReader br = new BufferedReader(new FileReader(snpfile));
                    br.readLine();
                    String input = br.readLine();
                    String[] info = tab.split(input, 5);
                    int firstpos = Integer.parseInt(info[3]);
                    while ((testInput = br.readLine()) != null) {
                        input = testInput;
                    }
                    info = tab.split(input, 5);
                    int lastpos = Integer.parseInt(info[3]);
                    minpos = Math.min(minpos, firstpos);
                    maxpos = Math.max(maxpos, lastpos);
                    br.close();
                }
                catch (IOException e) {
                    e.printStackTrace();
                    System.exit(-1);
                }
            }
            double startgenpos = agpmap.getCmFromPosition(chr, minpos);
            startgenpos = Math.ceil(startgenpos / interval) * interval;
            int nIntervals = (int)((agpmap.getCmFromPosition(chr, maxpos) - startgenpos) / interval);
            int nImputedSnps = nIntervals + 1;
            class ImputedSnp {
                int physicalPos;
                double geneticPos;
                StringBuilder sb = new StringBuilder();

                ImputedSnp() {
                }
            }
            LinkedList<ImputedSnp> snpList = new LinkedList<ImputedSnp>();
            double curpos = startgenpos;
            for (int i = 0; i < nImputedSnps; ++i) {
                ImputedSnp snp = new ImputedSnp();
                snp.geneticPos = curpos;
                snp.physicalPos = agpmap.getPositionFromCm(chr, curpos += interval);
                snpList.add(snp);
            }
            StringBuilder taxaHeader = new StringBuilder();
            for (File snpfile : snpFiles) {
                System.out.println("Imputing data for " + snpfile.getName() + ".");
                GenotypeTable a = ImportUtils.readFromHapmap(snpfile.getPath());
                boolean b73isA = ImputationUtils.isB73HaplotypeA(a);
                HashMap<Byte, String> byteToNumberString = new HashMap<Byte, String>();
                HashMap<Byte, Double> byteToNumber = new HashMap<Byte, Double>();
                HashMap<Byte, String> byteToNucleotide = new HashMap<Byte, String>();
                if (b73isA) {
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "0");
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "1");
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "1");
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "2");
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), 0.0);
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), 1.0);
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), 1.0);
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), 2.0);
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "A");
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "M");
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "M");
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "C");
                } else {
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "2");
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "1");
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "1");
                    byteToNumberString.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "0");
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), 2.0);
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), 1.0);
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), 1.0);
                    byteToNumber.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), 0.0);
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AA"), "C");
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("AC"), "M");
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CA"), "M");
                    byteToNucleotide.put(NucleotideAlignmentConstants.getNucleotideDiploidByte("CC"), "A");
                }
                int nsnps = a.numberOfSites();
                int ntaxa = a.numberOfTaxa();
                int leftflank = 0;
                int rightflank = 0;
                TaxaList myTaxa = a.taxa();
                for (int t = 0; t < ntaxa; ++t) {
                    if (!ImputationUtils.useTaxon(myTaxa.taxaName(t), excludeList)) continue;
                    taxaHeader.append("\t").append(myTaxa.taxaName(t));
                }
                for (ImputedSnp isnp : snpList) {
                    byte rightByte;
                    int leftndx;
                    byte leftByte;
                    int t;
                    while (rightflank < nsnps && isnp.physicalPos > a.positions().chromosomalPosition(rightflank)) {
                        ++rightflank;
                    }
                    leftflank = rightflank - 1;
                    if (hapmapFormat) {
                        for (t = 0; t < ntaxa; ++t) {
                            if (!ImputationUtils.useTaxon(a.taxa().taxaName(t), excludeList)) continue;
                            isnp.sb.append("\t");
                            if (leftflank < 0) {
                                leftByte = missingByte;
                            } else {
                                for (leftndx = leftflank; a.genotype(t, leftndx) == missingByte && leftndx > 0; --leftndx) {
                                }
                                leftByte = a.genotype(t, leftndx);
                            }
                            if (rightflank > nsnps - 1) {
                                rightByte = missingByte;
                            } else {
                                int rightndx;
                                for (rightndx = rightflank; a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1; ++rightndx) {
                                }
                                rightByte = a.genotype(t, rightndx);
                            }
                            if (leftByte == missingByte) {
                                if (rightByte == missingByte) {
                                    isnp.sb.append("N");
                                    continue;
                                }
                                isnp.sb.append((String)byteToNucleotide.get(rightByte));
                                continue;
                            }
                            if (rightByte == missingByte) {
                                isnp.sb.append((String)byteToNucleotide.get(leftByte));
                                continue;
                            }
                            if (leftByte == rightByte) {
                                isnp.sb.append((String)byteToNucleotide.get(leftByte));
                                continue;
                            }
                            isnp.sb.append("N");
                        }
                        continue;
                    }
                    for (t = 0; t < ntaxa; ++t) {
                        int rightndx;
                        if (!ImputationUtils.useTaxon(a.taxa().taxaName(t), excludeList)) continue;
                        isnp.sb.append("\t");
                        if (leftflank < 0) {
                            leftByte = missingByte;
                        } else {
                            for (leftndx = leftflank; a.genotype(t, leftndx) == missingByte && leftndx > 0; --leftndx) {
                            }
                            leftByte = a.genotype(t, leftndx);
                        }
                        if (rightflank > nsnps - 1) {
                            rightByte = missingByte;
                        } else {
                            for (rightndx = rightflank; a.genotype(t, rightndx) == missingByte && rightndx < nsnps - 1; ++rightndx) {
                            }
                            rightByte = a.genotype(t, rightndx);
                        }
                        if (leftByte == missingByte) {
                            if (rightByte == missingByte) {
                                isnp.sb.append("-");
                                continue;
                            }
                            isnp.sb.append((String)byteToNumberString.get(rightByte));
                            continue;
                        }
                        if (rightByte == missingByte) {
                            isnp.sb.append((String)byteToNumberString.get(leftByte));
                            continue;
                        }
                        if (leftByte == rightByte) {
                            isnp.sb.append((String)byteToNumberString.get(rightByte));
                            continue;
                        }
                        double leftval = (Double)byteToNumber.get(leftByte);
                        double rightval = (Double)byteToNumber.get(rightByte);
                        int leftpos = a.positions().chromosomalPosition(leftndx);
                        int rightpos = a.positions().chromosomalPosition(rightndx);
                        double pd = (double)(isnp.physicalPos - leftpos) / (double)(rightpos - leftpos);
                        double thisval = leftval * (1.0 - pd) + rightval * pd;
                        isnp.sb.append(Double.toString(thisval));
                    }
                }
            }
            File outfile = hapmapFormat ? new File(snpfiledir, "imputed.Markers/imputedMarkers.chr" + chrstr + "." + interval + "cm.USNAM2.6.imputed.var.hmp.txt") : new File(snpfiledir, "imputed.Markers/imputedMarkers.chr" + chrstr + "." + interval + "cm.USNAM2.6.imputed.var.txt");
            try {
                BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
                if (hapmapFormat) {
                    bw.write("rs#\talleles\tchrom\tpos\tcm\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode");
                } else {
                    bw.write("Snp\tallele\tchr\tpos\tcm");
                }
                bw.write(taxaHeader.toString());
                bw.write("\n");
                for (ImputedSnp isnp : snpList) {
                    bw.write(String.format("S%d_%d\tNA\t", chr, isnp.physicalPos));
                    bw.write(chrstr);
                    bw.write("\t");
                    bw.write(Integer.toString(isnp.physicalPos));
                    bw.write("\t");
                    bw.write(Double.toString(isnp.geneticPos));
                    if (hapmapFormat) {
                        bw.write("\tNA\tNA\tNA\tNA\tNA\tNA");
                    }
                    bw.write(isnp.sb.toString());
                    bw.write("\n");
                }
                bw.close();
                continue;
            }
            catch (IOException e) {
                // empty catch block
            }
        }
    }

    public static boolean useTaxon(String name, LinkedList<String> excludelist) {
        return excludelist != null ? name.startsWith("Z0") & !excludelist.contains(name) : name.startsWith("Z0");
    }

    public static boolean isB73HaplotypeA(GenotypeTable a) {
        TaxaList myTaxa = a.taxa();
        int ndx = myTaxa.indexOf("B73(PI550473):MRG:2:250027110");
        int nsnps = a.numberOfSites();
        HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
        for (int s = 0; s < nsnps; ++s) {
            Byte allele = a.genotype(ndx, s);
            Integer count = (Integer)alleleCounts.get(allele);
            if (count == null) {
                alleleCounts.put(allele, 1);
                continue;
            }
            alleleCounts.put(allele, 1 + count);
        }
        Byte bestAllele = -1;
        int maxCount = 0;
        for (Byte b : alleleCounts.keySet()) {
            int count = (Integer)alleleCounts.get(b);
            if (count <= maxCount) continue;
            maxCount = count;
            bestAllele = b;
        }
        return bestAllele == NucleotideAlignmentConstants.getNucleotideDiploidByte('A');
    }

    public static void imputeLinkageMarkersFrom1106(double interval) {
        String snpFilename = "/Volumes/Macintosh HD 2/data/namgbs/ImputedMarkerGenotypes_flowering_traits_092909.txt";
        String outFilename = "/Volumes/Macintosh HD 2/results/namgbs/jointlinkage/array1106/imputedMarkers.1106.allchr.txt";
        ArrayList<float[]> genotypes = new ArrayList<float[]>();
        ArrayList<String> taxanames = new ArrayList<String>();
        int nmarkers = 1106;
        AGPMap agpmap = new AGPMap();
        BufferedWriter bw = null;
        try {
            String input;
            BufferedReader br = new BufferedReader(new FileReader(snpFilename));
            br.readLine();
            while ((input = br.readLine()) != null) {
                String[] info = tab.split(input);
                float[] geno = new float[nmarkers];
                for (int i = 0; i < nmarkers; ++i) {
                    geno[i] = Float.parseFloat(info[i + 5]);
                }
                genotypes.add(geno);
                taxanames.add(info[0]);
            }
            br.close();
            bw = new BufferedWriter(new FileWriter(outFilename));
            bw.write("Snp\tallele\tchr\tpos\tcm");
            for (String taxon : taxanames) {
                bw.write("\t");
                bw.write(taxon);
            }
            bw.write("\n");
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        int ntaxa = taxanames.size();
        for (int chr = 1; chr <= 10; ++chr) {
            String chrstr = Integer.toString(chr);
            double startgenpos = agpmap.getFirstGeneticPosition(chr);
            startgenpos = Math.floor(startgenpos / interval) * interval;
            double endgenpos = agpmap.getLastGeneticPosition(chr);
            endgenpos = Math.ceil(endgenpos / interval) * interval;
            try {
                for (double curpos = startgenpos; curpos <= endgenpos; curpos += interval) {
                    int physpos = agpmap.getPositionFromCm(chr, curpos);
                    String physposString = Integer.toString(physpos);
                    String genpos = Double.toString(curpos);
                    bw.write("S");
                    bw.write(chrstr);
                    bw.write("_");
                    bw.write(physposString);
                    bw.write("\timputed\t");
                    bw.write(chrstr);
                    bw.write("\t");
                    bw.write(physposString);
                    bw.write("\t");
                    bw.write(genpos);
                    int[] flanks = agpmap.getFlankingMarkerIndices(chr, curpos);
                    for (int t = 0; t < ntaxa; ++t) {
                        double val;
                        bw.write("\t");
                        if (flanks[0] < 0) {
                            val = ((float[])genotypes.get(t))[flanks[1]];
                        } else if (flanks[1] >= nmarkers) {
                            val = ((float[])genotypes.get(t))[flanks[0]];
                        } else if (flanks[0] == flanks[1]) {
                            val = ((float[])genotypes.get(t))[flanks[0]];
                        } else {
                            double pd = (curpos - agpmap.getGeneticPos(flanks[0])) / (agpmap.getGeneticPos(flanks[1]) - agpmap.getGeneticPos(flanks[0]));
                            val = (double)((float[])genotypes.get(t))[flanks[0]] * (1.0 - pd) + (double)((float[])genotypes.get(t))[flanks[1]] * pd;
                        }
                        bw.write(Double.toString(val));
                    }
                    bw.newLine();
                }
                continue;
            }
            catch (IOException e) {
                e.printStackTrace();
                System.exit(-1);
            }
        }
        try {
            bw.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println("Finished imputing markers.");
    }

    public static LinkedList<String> getListOfTaxa(String filename) {
        LinkedList<String> taxaList = new LinkedList<String>();
        try {
            String taxon;
            BufferedReader br = new BufferedReader(new FileReader(filename));
            while ((taxon = br.readLine()) != null) {
                taxaList.add(taxon);
            }
            br.close();
        }
        catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        return taxaList;
    }
}

