/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.example.bigram;

import com.google.common.collect.Iterators;
import edu.umd.cloud9.io.SequenceFileUtils;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ToolRunner;
import tl.lin.data.pair.PairOfWritables;

public class AnalyzeBigramCount {
    private static final String INPUT = "input";

    public static void main(String[] args) {
        Options options = new Options();
        OptionBuilder.withArgName((String)"path");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"input path");
        options.addOption(OptionBuilder.create((String)INPUT));
        CommandLine cmdline = null;
        GnuParser parser = new GnuParser();
        try {
            cmdline = parser.parse(options, args);
        }
        catch (ParseException exp) {
            System.err.println("Error parsing command line: " + exp.getMessage());
            System.exit(-1);
        }
        if (!cmdline.hasOption(INPUT)) {
            System.out.println("args: " + Arrays.toString(args));
            HelpFormatter formatter = new HelpFormatter();
            formatter.setWidth(120);
            formatter.printHelp(AnalyzeBigramCount.class.getName(), options);
            ToolRunner.printGenericCommandUsage((PrintStream)System.out);
            System.exit(-1);
        }
        String inputPath = cmdline.getOptionValue(INPUT);
        System.out.println("input path: " + inputPath);
        List bigrams = SequenceFileUtils.readDirectory(new Path(inputPath));
        Collections.sort(bigrams, new Comparator<PairOfWritables<Text, IntWritable>>(){

            @Override
            public int compare(PairOfWritables<Text, IntWritable> e1, PairOfWritables<Text, IntWritable> e2) {
                if (((IntWritable)e2.getRightElement()).compareTo((IntWritable)e1.getRightElement()) == 0) {
                    return ((Text)e1.getLeftElement()).compareTo((BinaryComparable)e2.getLeftElement());
                }
                return ((IntWritable)e2.getRightElement()).compareTo((IntWritable)e1.getRightElement());
            }
        });
        int singletons = 0;
        int sum = 0;
        for (PairOfWritables pairOfWritables : bigrams) {
            sum += ((IntWritable)pairOfWritables.getRightElement()).get();
            if (((IntWritable)pairOfWritables.getRightElement()).get() != 1) continue;
            ++singletons;
        }
        System.out.println("total number of unique bigrams: " + bigrams.size());
        System.out.println("total number of bigrams: " + sum);
        System.out.println("number of bigrams that appear only once: " + singletons);
        System.out.println("\nten most frequent bigrams: ");
        Iterator iter = Iterators.limit(bigrams.iterator(), (int)10);
        while (iter.hasNext()) {
            PairOfWritables pairOfWritables = (PairOfWritables)iter.next();
            System.out.println(pairOfWritables.getLeftElement() + "\t" + pairOfWritables.getRightElement());
        }
    }
}

