/*
 * Decompiled with CFR 0.152.
 */
package cmu.arktweetnlp;

import cmu.arktweetnlp.Tagger;
import cmu.arktweetnlp.Twokenize;
import cmu.arktweetnlp.impl.ModelSentence;
import cmu.arktweetnlp.impl.Sentence;
import cmu.arktweetnlp.impl.features.WordClusterPaths;
import cmu.arktweetnlp.io.CoNLLReader;
import cmu.arktweetnlp.io.JsonTweetReader;
import cmu.arktweetnlp.util.BasicFileIO;
import edu.stanford.nlp.util.StringUtils;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashSet;

public class RunTagger {
    Tagger tagger;
    String inputFormat = "auto";
    String outputFormat = "auto";
    int inputField = 1;
    String inputFilename;
    String modelFilename = "/cmu/arktweetnlp/model.20120919";
    public boolean noOutput = false;
    public boolean justTokenize = false;
    public Decoder decoder = Decoder.GREEDY;
    public boolean showConfidence = true;
    PrintStream outputStream = new PrintStream((OutputStream)System.out, true, "UTF-8");
    Iterable<Sentence> inputIterable = null;
    private static HashSet<String> _wordsInCluster;
    int numTokensCorrect = 0;
    int numTokens = 0;
    int oovTokensCorrect = 0;
    int oovTokens = 0;
    int clusterTokensCorrect = 0;
    int clusterTokens = 0;

    public static void die(String string) {
        System.err.println(string);
        System.exit(-1);
    }

    public void detectAndSetInputFormat(String string) throws IOException {
        JsonTweetReader jsonTweetReader = new JsonTweetReader();
        if (jsonTweetReader.isJson(string)) {
            System.err.println("Detected JSON input format");
            this.inputFormat = "json";
        } else {
            System.err.println("Detected text input format");
            this.inputFormat = "text";
        }
    }

    public void runTagger() throws IOException, ClassNotFoundException {
        String string;
        this.tagger = new Tagger();
        if (!this.justTokenize) {
            this.tagger.loadModel(this.modelFilename);
        }
        if (this.inputFormat.equals("conll")) {
            this.runTaggerInEvalMode();
            return;
        }
        JsonTweetReader jsonTweetReader = new JsonTweetReader();
        LineNumberReader lineNumberReader = new LineNumberReader(BasicFileIO.openFileToReadUTF8(this.inputFilename));
        long l = System.currentTimeMillis();
        int n = 0;
        while ((string = lineNumberReader.readLine()) != null) {
            String string2;
            String[] stringArray = string.split("\t");
            String string3 = stringArray[this.inputField - 1];
            if (lineNumberReader.getLineNumber() == 1 && this.inputFormat.equals("auto")) {
                this.detectAndSetInputFormat(string3);
            }
            if (this.inputFormat.equals("json")) {
                string2 = jsonTweetReader.getText(string3);
                if (string2 == null) {
                    System.err.println("Warning, null text (JSON parse error?), using blank string instead");
                    string2 = "";
                }
            } else {
                string2 = string3;
            }
            Sentence sentence = new Sentence();
            sentence.tokens = Twokenize.tokenizeRawTweetText(string2);
            ModelSentence modelSentence = null;
            if (sentence.T() > 0 && !this.justTokenize) {
                modelSentence = new ModelSentence(sentence.T());
                this.tagger.featureExtractor.computeFeatures(sentence, modelSentence);
                this.goDecode(modelSentence);
            }
            if (this.outputFormat.equals("conll")) {
                this.outputJustTagging(sentence, modelSentence);
            } else {
                this.outputPrependedTagging(sentence, modelSentence, this.justTokenize, string);
            }
            n += sentence.T();
        }
        long l2 = System.currentTimeMillis();
        System.err.printf("Tokenized%s %d tweets (%d tokens) in %.1f seconds: %.1f tweets/sec, %.1f tokens/sec\n", this.justTokenize ? "" : " and tagged", lineNumberReader.getLineNumber(), n, (double)(l2 - l) / 1000.0, (double)lineNumberReader.getLineNumber() / ((double)(l2 - l) / 1000.0), (double)n / ((double)(l2 - l) / 1000.0));
        lineNumberReader.close();
    }

    public void goDecode(ModelSentence modelSentence) {
        if (this.decoder == Decoder.GREEDY) {
            this.tagger.model.greedyDecode(modelSentence, this.showConfidence);
        } else if (this.decoder == Decoder.VITERBI) {
            this.tagger.model.viterbiDecode(modelSentence);
        }
    }

    public void runTaggerInEvalMode() throws IOException, ClassNotFoundException {
        long l = System.currentTimeMillis();
        int n = 0;
        ArrayList<Sentence> arrayList = CoNLLReader.readFile(this.inputFilename);
        this.inputIterable = arrayList;
        int[][] nArray = new int[this.tagger.model.numLabels][this.tagger.model.numLabels];
        for (Sentence sentence : arrayList) {
            ++n;
            ModelSentence modelSentence = new ModelSentence(sentence.T());
            this.tagger.featureExtractor.computeFeatures(sentence, modelSentence);
            this.goDecode(modelSentence);
            if (!this.noOutput) {
                this.outputJustTagging(sentence, modelSentence);
            }
            this.evaluateSentenceTagging(sentence, modelSentence);
        }
        System.err.printf("%d / %d correct = %.4f acc, %.4f err\n", this.numTokensCorrect, this.numTokens, (double)this.numTokensCorrect * 1.0 / (double)this.numTokens, 1.0 - (double)this.numTokensCorrect * 1.0 / (double)this.numTokens);
        double d = (double)(System.currentTimeMillis() - l) / 1000.0;
        System.err.printf("%d tweets in %.1f seconds, %.1f tweets/sec\n", n, d, (double)n * 1.0 / d);
    }

    private void evaluateOOV(Sentence sentence, ModelSentence modelSentence) throws FileNotFoundException, IOException, ClassNotFoundException {
        for (int i = 0; i < modelSentence.T; ++i) {
            int n = this.tagger.model.labelVocab.num(sentence.labels.get(i));
            int n2 = modelSentence.labels[i];
            if (!RunTagger.wordsInCluster().contains(sentence.tokens.get(i))) continue;
            this.oovTokensCorrect += n == n2 ? 1 : 0;
            ++this.oovTokens;
        }
    }

    private void getconfusion(Sentence sentence, ModelSentence modelSentence, int[][] nArray) {
        for (int i = 0; i < modelSentence.T; ++i) {
            int n = this.tagger.model.labelVocab.num(sentence.labels.get(i));
            int n2 = modelSentence.labels[i];
            if (n == -1) continue;
            int[] nArray2 = nArray[n];
            int n3 = n2;
            nArray2[n3] = nArray2[n3] + 1;
        }
    }

    public void evaluateSentenceTagging(Sentence sentence, ModelSentence modelSentence) {
        for (int i = 0; i < modelSentence.T; ++i) {
            int n;
            int n2 = this.tagger.model.labelVocab.num(sentence.labels.get(i));
            this.numTokensCorrect += n2 == (n = modelSentence.labels[i]) ? 1 : 0;
            ++this.numTokens;
        }
    }

    private String formatConfidence(double d) {
        return String.format("%.4f", d);
    }

    public void outputJustTagging(Sentence sentence, ModelSentence modelSentence) {
        if (this.outputFormat.equals("conll")) {
            for (int i = 0; i < sentence.T(); ++i) {
                this.outputStream.printf("%s\t%s", sentence.tokens.get(i), this.tagger.model.labelVocab.name(modelSentence.labels[i]));
                if (modelSentence.confidences != null) {
                    this.outputStream.printf("\t%s", this.formatConfidence(modelSentence.confidences[i]));
                }
                this.outputStream.printf("\n", new Object[0]);
            }
            this.outputStream.println("");
        } else {
            RunTagger.die("bad output format for just tagging: " + this.outputFormat);
        }
    }

    public void outputPrependedTagging(Sentence sentence, ModelSentence modelSentence, boolean bl, String string) {
        int n = sentence.T();
        Object[] objectArray = new String[n];
        Object[] objectArray2 = new String[n];
        Object[] objectArray3 = new String[n];
        for (int i = 0; i < n; ++i) {
            objectArray[i] = sentence.tokens.get(i);
            if (!bl) {
                objectArray2[i] = this.tagger.model.labelVocab.name(modelSentence.labels[i]);
            }
            if (!this.showConfidence) continue;
            objectArray3[i] = this.formatConfidence(modelSentence.confidences[i]);
        }
        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append(StringUtils.join((Object[])objectArray));
        stringBuilder.append("\t");
        if (!bl) {
            stringBuilder.append(StringUtils.join((Object[])objectArray2));
            stringBuilder.append("\t");
        }
        if (this.showConfidence) {
            stringBuilder.append(StringUtils.join((Object[])objectArray3));
            stringBuilder.append("\t");
        }
        stringBuilder.append(string);
        this.outputStream.println(stringBuilder.toString());
    }

    public static void main(String[] stringArray) throws IOException, ClassNotFoundException {
        if (stringArray.length > 0 && (stringArray[0].equals("-h") || stringArray[0].equals("--help"))) {
            RunTagger.usage();
        }
        RunTagger runTagger = new RunTagger();
        int n = 0;
        while (n < stringArray.length && stringArray[n].startsWith("-")) {
            if (stringArray[n].equals("--model")) {
                runTagger.modelFilename = stringArray[n + 1];
                n += 2;
                continue;
            }
            if (stringArray[n].equals("--just-tokenize")) {
                runTagger.justTokenize = true;
                ++n;
                continue;
            }
            if (stringArray[n].equals("--decoder")) {
                if (stringArray[n + 1].equals("viterbi")) {
                    runTagger.decoder = Decoder.VITERBI;
                } else if (stringArray[n + 1].equals("greedy")) {
                    runTagger.decoder = Decoder.GREEDY;
                } else {
                    RunTagger.die("unknown decoder " + stringArray[n + 1]);
                }
                n += 2;
                continue;
            }
            if (stringArray[n].equals("--quiet")) {
                runTagger.noOutput = true;
                ++n;
                continue;
            }
            if (stringArray[n].equals("--input-format")) {
                String string = stringArray[n + 1];
                if (!(string.equals("json") || string.equals("text") || string.equals("conll"))) {
                    RunTagger.usage("input format must be: json, text, or conll");
                }
                runTagger.inputFormat = stringArray[n + 1];
                n += 2;
                continue;
            }
            if (stringArray[n].equals("--output-format")) {
                runTagger.outputFormat = stringArray[n + 1];
                n += 2;
                continue;
            }
            if (stringArray[n].equals("--input-field")) {
                runTagger.inputField = Integer.parseInt(stringArray[n + 1]);
                n += 2;
                continue;
            }
            if (stringArray[n].equals("--word-clusters")) {
                WordClusterPaths.clusterResourceName = stringArray[n + 1];
                ++n;
                continue;
            }
            if (stringArray[n].equals("--no-confidence")) {
                runTagger.showConfidence = false;
                ++n;
                continue;
            }
            System.out.println("bad option " + stringArray[n]);
            RunTagger.usage();
        }
        if (stringArray.length - n > 1) {
            RunTagger.usage();
        }
        if (stringArray.length == n || stringArray[n].equals("-")) {
            System.err.println("Listening on stdin for input.  (-h for help)");
            runTagger.inputFilename = "/dev/stdin";
        } else {
            runTagger.inputFilename = stringArray[n];
        }
        runTagger.finalizeOptions();
        runTagger.runTagger();
    }

    public void finalizeOptions() throws IOException {
        if (this.outputFormat.equals("auto")) {
            this.outputFormat = this.inputFormat.equals("conll") ? "conll" : "pretsv";
        }
        if (this.showConfidence && this.decoder == Decoder.VITERBI) {
            System.err.println("Confidence output is unimplemented in Viterbi, turning it off.");
            this.showConfidence = false;
        }
        if (this.justTokenize) {
            this.showConfidence = false;
        }
    }

    public static void usage() {
        RunTagger.usage(null);
    }

    public static void usage(String string) {
        System.out.println("RunTagger [options] [ExamplesFilename]\n  runs the CMU ARK Twitter tagger on tweets from ExamplesFilename, \n  writing taggings to standard output. Listens on stdin if no input filename.\n\nOptions:\n  --model <Filename>        Specify model filename. (Else use built-in.)\n  --just-tokenize           Only run the tokenizer; no POS tags.\n  --quiet                   Quiet: no output\n  --input-format <Format>   Default: auto\n                            Options: json, text, conll\n  --output-format <Format>  Default: automatically decide from input format.\n                            Options: pretsv, conll\n  --input-field NUM         Default: 1\n                            Which tab-separated field contains the input\n                            (1-indexed, like unix 'cut')\n                            Only for {json, text} input formats.\n  --word-clusters <File>    Alternate word clusters file (see FeatureExtractor)\n  --no-confidence           Don't output confidence probabilities\n  --decoder <Decoder>       Change the decoding algorithm (default: greedy)\n\nTweet-per-line input formats:\n   json: Every input line has a JSON object containing the tweet,\n         as per the Streaming API. (The 'text' field is used.)\n   text: Every input line has the text for one tweet.\nWe actually assume input lines are TSV and the tweet data is one field.\n(Therefore tab characters are not allowed in tweets.\nTwitter's own JSON formats guarantee this;\nif you extract the text yourself, you must remove tabs and newlines.)\nTweet-per-line output format is\n   pretsv: Prepend the tokenization and tagging as new TSV fields, \n           so the output includes a complete copy of the input.\nBy default, three TSV fields are prepended:\n   Tokenization \\t POSTags \\t Confidences \\t (original data...)\nThe tokenization and tags are parallel space-separated lists.\nThe 'conll' format is token-per-line, blank spaces separating tweets.\n");
        if (string != null) {
            System.out.println("ERROR: " + string);
        }
        System.exit(1);
    }

    public static HashSet<String> wordsInCluster() {
        if (_wordsInCluster == null) {
            _wordsInCluster = new HashSet<String>(WordClusterPaths.wordToPath.keySet());
        }
        return _wordsInCluster;
    }

    public static enum Decoder {
        GREEDY,
        VITERBI;

    }
}

