/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.utils.clustering;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.utils.vectors.VectorHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class ClusterDumper
extends AbstractJob {
    public static final String OUTPUT_OPTION = "output";
    public static final String DICTIONARY_TYPE_OPTION = "dictionaryType";
    public static final String DICTIONARY_OPTION = "dictionary";
    public static final String POINTS_DIR_OPTION = "pointsDir";
    public static final String NUM_WORDS_OPTION = "numWords";
    public static final String SUBSTRING_OPTION = "substring";
    public static final String SEQ_FILE_DIR_OPTION = "seqFileDir";
    private static final Logger log = LoggerFactory.getLogger(ClusterDumper.class);
    private Path seqFileDir;
    private Path pointsDir;
    private String termDictionary;
    private String dictionaryFormat;
    private String outputFile;
    private int subString = Integer.MAX_VALUE;
    private int numTopFeatures = 10;
    private Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;

    public ClusterDumper(Path seqFileDir, Path pointsDir) {
        this.seqFileDir = seqFileDir;
        this.pointsDir = pointsDir;
        this.init();
    }

    public ClusterDumper() {
        this.setConf(new Configuration());
    }

    public static void main(String[] args) throws Exception {
        new ClusterDumper().run(args);
    }

    public int run(String[] args) throws Exception {
        int sub;
        this.addOption(SEQ_FILE_DIR_OPTION, "s", "The directory containing Sequence Files for the Clusters", true);
        this.addOption(OUTPUT_OPTION, "o", "Optional output directory. Default is to output to the console.");
        this.addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print");
        this.addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print");
        this.addOption(POINTS_DIR_OPTION, "p", "The directory containing points sequence files mapping input vectors to their cluster.  If specified, then the program will output the points associated with a cluster");
        this.addOption(DICTIONARY_OPTION, "d", "The dictionary file");
        this.addOption(DICTIONARY_TYPE_OPTION, "dt", "The dictionary file type (text|sequencefile)", "text");
        if (this.parseArguments(args) == null) {
            return -1;
        }
        this.seqFileDir = new Path(this.getOption(SEQ_FILE_DIR_OPTION));
        if (this.hasOption(POINTS_DIR_OPTION)) {
            this.pointsDir = new Path(this.getOption(POINTS_DIR_OPTION));
        }
        this.outputFile = this.getOption(OUTPUT_OPTION);
        if (this.hasOption(SUBSTRING_OPTION) && (sub = Integer.parseInt(this.getOption(SUBSTRING_OPTION))) >= 0) {
            this.subString = sub;
        }
        this.termDictionary = this.getOption(DICTIONARY_OPTION);
        this.dictionaryFormat = this.getOption(DICTIONARY_TYPE_OPTION);
        if (this.hasOption(NUM_WORDS_OPTION)) {
            this.numTopFeatures = Integer.parseInt(this.getOption(NUM_WORDS_OPTION));
        }
        this.init();
        this.printClusters(null);
        return 0;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void printClusters(String[] dictionary) throws IOException {
        Writer writer;
        boolean shouldClose;
        Configuration conf = new Configuration();
        if (this.termDictionary != null) {
            if ("text".equals(this.dictionaryFormat)) {
                dictionary = VectorHelper.loadTermDictionary(new File(this.termDictionary));
            } else if ("sequencefile".equals(this.dictionaryFormat)) {
                dictionary = VectorHelper.loadTermDictionary(conf, this.termDictionary);
            } else {
                throw new IllegalArgumentException("Invalid dictionary format");
            }
        }
        if (this.outputFile == null) {
            shouldClose = false;
            writer = new OutputStreamWriter(System.out);
        } else {
            shouldClose = true;
            writer = Files.newWriter((File)new File(this.outputFile), (Charset)Charsets.UTF_8);
        }
        try {
            for (Cluster value : new SequenceFileDirValueIterable(new Path(this.seqFileDir, "part-*"), PathType.GLOB, conf)) {
                List<WeightedVectorWritable> points;
                String fmtStr = value.asFormatString(dictionary);
                if (this.subString > 0 && fmtStr.length() > this.subString) {
                    writer.write(58);
                    writer.write(fmtStr, 0, Math.min(this.subString, fmtStr.length()));
                } else {
                    writer.write(fmtStr);
                }
                writer.write(10);
                if (dictionary != null) {
                    String topTerms = ClusterDumper.getTopFeatures(value.getCenter(), dictionary, this.numTopFeatures);
                    writer.write("\tTop Terms: ");
                    writer.write(topTerms);
                    writer.write(10);
                }
                if ((points = this.clusterIdToPoints.get(value.getId())) == null) continue;
                writer.write("\tWeight:  Point:\n\t");
                Iterator<WeightedVectorWritable> iterator = points.iterator();
                while (iterator.hasNext()) {
                    WeightedVectorWritable point = iterator.next();
                    writer.write(String.valueOf(point.getWeight()));
                    writer.write(": ");
                    writer.write(AbstractCluster.formatVector((Vector)point.getVector(), (String[])dictionary));
                    if (!iterator.hasNext()) continue;
                    writer.write("\n\t");
                }
                writer.write(10);
            }
        }
        finally {
            if (shouldClose) {
                writer.close();
            }
        }
    }

    private void init() {
        if (this.pointsDir != null) {
            Configuration conf = new Configuration();
            this.clusterIdToPoints = ClusterDumper.readPoints(this.pointsDir, conf);
        } else {
            this.clusterIdToPoints = Collections.emptyMap();
        }
    }

    public String getOutputFile() {
        return this.outputFile;
    }

    public void setOutputFile(String outputFile) {
        this.outputFile = outputFile;
    }

    public int getSubString() {
        return this.subString;
    }

    public void setSubString(int subString) {
        this.subString = subString;
    }

    public Map<Integer, List<WeightedVectorWritable>> getClusterIdToPoints() {
        return this.clusterIdToPoints;
    }

    public String getTermDictionary() {
        return this.termDictionary;
    }

    public void setTermDictionary(String termDictionary, String dictionaryType) {
        this.termDictionary = termDictionary;
        this.dictionaryFormat = dictionaryType;
    }

    public void setNumTopFeatures(int num) {
        this.numTopFeatures = num;
    }

    public int getNumTopFeatures() {
        return this.numTopFeatures;
    }

    public static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, Configuration conf) {
        TreeMap<Integer, List<WeightedVectorWritable>> result = new TreeMap<Integer, List<WeightedVectorWritable>>();
        for (Pair record : new SequenceFileDirIterable(pointsPathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            int keyValue = ((IntWritable)record.getFirst()).get();
            ArrayList<Object> pointList = (ArrayList<Object>)result.get(keyValue);
            if (pointList == null) {
                pointList = new ArrayList<Object>();
                result.put(keyValue, pointList);
            }
            pointList.add(record.getSecond());
        }
        return result;
    }

    public static String getTopFeatures(Vector vector, String[] dictionary, int numTerms) {
        ArrayList<TermIndexWeight> vectorTerms = new ArrayList<TermIndexWeight>();
        Iterator iter = vector.iterateNonZero();
        while (iter.hasNext()) {
            Vector.Element elt = (Vector.Element)iter.next();
            vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
        }
        Collections.sort(vectorTerms, new Comparator<TermIndexWeight>(){

            @Override
            public int compare(TermIndexWeight one, TermIndexWeight two) {
                return Double.compare(two.weight, one.weight);
            }
        });
        LinkedList<Pair> topTerms = new LinkedList<Pair>();
        for (int i = 0; i < vectorTerms.size() && i < numTerms; ++i) {
            int index = ((TermIndexWeight)vectorTerms.get(i)).index;
            String dictTerm = dictionary[index];
            if (dictTerm == null) {
                log.error("Dictionary entry missing for {}", (Object)index);
                continue;
            }
            topTerms.add(new Pair((Object)dictTerm, (Object)((TermIndexWeight)vectorTerms.get(i)).weight));
        }
        StringBuilder sb = new StringBuilder(100);
        for (Pair item : topTerms) {
            String term = (String)item.getFirst();
            sb.append("\n\t\t");
            sb.append(StringUtils.rightPad((String)term, (int)40));
            sb.append("=>");
            sb.append(StringUtils.leftPad((String)((Double)item.getSecond()).toString(), (int)20));
        }
        return sb.toString();
    }

    private static class TermIndexWeight {
        private final int index;
        private final double weight;

        TermIndexWeight(int index, double weight) {
            this.index = index;
            this.weight = weight;
        }
    }
}

