/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.StringLabelFactory;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.CNFTransformers;
import edu.stanford.nlp.parser.lexparser.CollinsPuncTransformer;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.ParentAnnotationStats;
import edu.stanford.nlp.parser.lexparser.PostSplitter;
import edu.stanford.nlp.parser.lexparser.TrainOptions;
import edu.stanford.nlp.parser.lexparser.TreeAnnotator;
import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.CompositeTreeTransformer;
import edu.stanford.nlp.trees.CompositeTreebank;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeLeafLabelTransformer;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.FileFilter;
import java.io.PrintWriter;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

public class TreeAnnotatorAndBinarizer
implements TreeTransformer {
    private static Redwood.RedwoodChannels log = Redwood.channels(TreeAnnotatorAndBinarizer.class);
    private final TreeFactory tf;
    private final TreebankLanguagePack tlp;
    private final TreeTransformer annotator;
    private final TreeBinarizer binarizer;
    private final PostSplitter postSplitter;
    private final boolean forceCNF;
    private final TrainOptions trainOptions;
    private final ClassicCounter<Tree> annotatedRuleCounts;
    private final ClassicCounter<String> annotatedStateCounts;

    public TreeAnnotatorAndBinarizer(TreebankLangParserParams tlpParams, boolean forceCNF, boolean insideFactor, boolean doSubcategorization, Options op) {
        this(tlpParams.headFinder(), tlpParams.headFinder(), tlpParams, forceCNF, insideFactor, doSubcategorization, op);
    }

    public TreeAnnotatorAndBinarizer(HeadFinder annotationHF, HeadFinder binarizationHF, TreebankLangParserParams tlpParams, boolean forceCNF, boolean insideFactor, boolean doSubcategorization, Options op) {
        this.trainOptions = op.trainOptions;
        this.annotator = doSubcategorization ? new TreeAnnotator(annotationHF, tlpParams, op) : new TreeNullAnnotator(annotationHF);
        this.binarizer = new TreeBinarizer(binarizationHF, tlpParams.treebankLanguagePack(), insideFactor, this.trainOptions.markovFactor, this.trainOptions.markovOrder, this.trainOptions.compactGrammar() > 0, this.trainOptions.compactGrammar() > 1, this.trainOptions.HSEL_CUT, this.trainOptions.markFinalStates, this.trainOptions.simpleBinarizedLabels, this.trainOptions.noRebinarization);
        this.postSplitter = this.trainOptions.selectivePostSplit ? new PostSplitter(tlpParams, op) : null;
        this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        this.tlp = tlpParams.treebankLanguagePack();
        this.forceCNF = forceCNF;
        this.annotatedRuleCounts = this.trainOptions.printAnnotatedRuleCounts ? new ClassicCounter() : null;
        this.annotatedStateCounts = this.trainOptions.printAnnotatedStateCounts ? new ClassicCounter() : null;
    }

    public void dumpStats() {
        if (this.trainOptions.selectivePostSplit) {
            this.postSplitter.dumpStats();
        }
    }

    public void setDoSelectiveSplit(boolean doSelectiveSplit) {
        this.binarizer.setDoSelectiveSplit(doSelectiveSplit);
    }

    public void addRoot(Tree t) {
        if (t.isLeaf()) {
            log.info("Warning: tree is leaf: " + t);
            t = this.tf.newTreeNode(this.tlp.startSymbol(), Collections.singletonList(t));
        }
        t.setLabel(new CategoryWordTag(this.tlp.startSymbol(), ".$.", ".$$."));
        ArrayList<Tree> preTermChildList = new ArrayList<Tree>();
        Tree boundaryTerm = this.tf.newLeaf(new Word(".$."));
        preTermChildList.add(boundaryTerm);
        Tree boundaryPreTerm = this.tf.newTreeNode(new CategoryWordTag(".$$.", ".$.", ".$$."), preTermChildList);
        List<Tree> childList = t.getChildrenAsList();
        childList.add(boundaryPreTerm);
        t.setChildren(childList);
    }

    @Override
    public Tree transformTree(Tree t) {
        if (this.trainOptions.printTreeTransformations > 0) {
            TrainOptions.printTrainTree(null, "ORIGINAL TREE:", t);
        }
        Tree trTree = this.annotator.transformTree(t);
        if (this.trainOptions.selectivePostSplit) {
            trTree = this.postSplitter.transformTree(trTree);
        }
        if (this.trainOptions.printTreeTransformations > 0) {
            TrainOptions.printTrainTree(this.trainOptions.printAnnotatedPW, "ANNOTATED TREE:", trTree);
        }
        if (this.trainOptions.printAnnotatedRuleCounts) {
            Tree tr2 = trTree.deepCopy(new LabeledScoredTreeFactory(), new StringLabelFactory());
            Set<Tree> localTrees = tr2.localTrees();
            for (Tree tr : localTrees) {
                this.annotatedRuleCounts.incrementCount(tr);
            }
        }
        if (this.trainOptions.printAnnotatedStateCounts) {
            for (Tree subt : trTree) {
                if (subt.isLeaf()) continue;
                this.annotatedStateCounts.incrementCount(subt.label().value());
            }
        }
        this.addRoot(trTree);
        Tree binarizedTree = this.binarizer.transformTree(trTree);
        if (this.trainOptions.printTreeTransformations > 0) {
            TrainOptions.printTrainTree(this.trainOptions.printBinarizedPW, "BINARIZED TREE:", binarizedTree);
            --this.trainOptions.printTreeTransformations;
        }
        if (this.forceCNF) {
            binarizedTree = new CNFTransformers.ToCNFTransformer().transformTree(binarizedTree);
        }
        return binarizedTree;
    }

    public void printRuleCounts() {
        log.info(new Object[0]);
        for (Tree t : this.annotatedRuleCounts.keySet()) {
            log.info(this.annotatedRuleCounts.getCount(t) + "\t" + t.label().value() + " -->");
            for (Tree dtr : t.getChildrenAsList()) {
                log.info(" ");
                log.info(dtr.label().value());
            }
            log.info(new Object[0]);
        }
    }

    public void printStateCounts() {
        log.info(new Object[0]);
        log.info("Annotated state counts");
        Set<String> keys = this.annotatedStateCounts.keySet();
        ArrayList<String> keyList = new ArrayList<String>(keys);
        Collections.sort(keyList);
        for (String s : keyList) {
            log.info(s + "\t" + this.annotatedStateCounts.getCount(s));
        }
    }

    private static int numSubArgs(String[] args, int index) {
        int i = index;
        while (i + 1 < args.length && args[i + 1].charAt(0) != '-') {
            ++i;
        }
        return i - index;
    }

    private static void removeDeleteSplittersFromSplitters(TreebankLanguagePack tlp, Options op) {
        if (op.trainOptions.deleteSplitters != null) {
            ArrayList<String> deleted = new ArrayList<String>();
            for (String del : op.trainOptions.deleteSplitters) {
                String baseDel = tlp.basicCategory(del);
                boolean checkBasic = del.equals(baseDel);
                Iterator<String> it = op.trainOptions.splitters.iterator();
                while (it.hasNext()) {
                    String elem = it.next();
                    String baseElem = tlp.basicCategory(elem);
                    boolean delStr = checkBasic && baseElem.equals(baseDel) || elem.equals(del);
                    if (!delStr) continue;
                    it.remove();
                    deleted.add(elem);
                }
            }
            if (op.testOptions.verbose) {
                log.info("Removed from vertical splitters: " + deleted);
            }
        }
    }

    public static Triple<Treebank, Treebank, Treebank> getAnnotatedBinaryTreebankFromTreebank(Treebank trainTreebank, Treebank secondaryTreebank, Treebank tuneTreebank, Options op) {
        TreebankLangParserParams tlpParams = op.tlpParams;
        TreebankLanguagePack tlp = tlpParams.treebankLanguagePack();
        if (op.testOptions.verbose) {
            PrintWriter pwErr = tlpParams.pw(System.err);
            pwErr.print("Training ");
            pwErr.println(trainTreebank.textualSummary(tlp));
            if (secondaryTreebank != null) {
                pwErr.print("Secondary training ");
                pwErr.println(secondaryTreebank.textualSummary(tlp));
            }
        }
        CompositeTreeTransformer trainTransformer = new CompositeTreeTransformer();
        if (op.trainOptions.preTransformer != null) {
            trainTransformer.addTransformer(op.trainOptions.preTransformer);
        }
        if (op.trainOptions.collinsPunc) {
            CollinsPuncTransformer collinsPuncTransformer = new CollinsPuncTransformer(tlp);
            trainTransformer.addTransformer(collinsPuncTransformer);
        }
        log.info("Binarizing trees...");
        TreeAnnotatorAndBinarizer binarizer = !op.trainOptions.leftToRight ? new TreeAnnotatorAndBinarizer(tlpParams, op.forceCNF, !op.trainOptions.outsideFactor(), !op.trainOptions.predictSplits, op) : new TreeAnnotatorAndBinarizer(tlpParams.headFinder(), new LeftHeadFinder(), tlpParams, op.forceCNF, !op.trainOptions.outsideFactor(), !op.trainOptions.predictSplits, op);
        trainTransformer.addTransformer(binarizer);
        if (op.wordFunction != null) {
            TreeLeafLabelTransformer wordFunctionTransformer = new TreeLeafLabelTransformer(op.wordFunction);
            trainTransformer.addTransformer(wordFunctionTransformer);
        }
        Treebank wholeTreebank = secondaryTreebank == null ? trainTreebank : new CompositeTreebank(trainTreebank, secondaryTreebank);
        if (op.trainOptions.selectiveSplit) {
            op.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(wholeTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, tlp);
            TreeAnnotatorAndBinarizer.removeDeleteSplittersFromSplitters(tlp, op);
            if (op.testOptions.verbose) {
                ArrayList<String> list = new ArrayList<String>(op.trainOptions.splitters);
                Collections.sort(list);
                log.info("Parent split categories: " + list);
            }
        }
        if (op.trainOptions.selectivePostSplit) {
            TreeAnnotator myTransformer = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);
            wholeTreebank = wholeTreebank.transform(myTransformer);
            op.trainOptions.postSplitters = ParentAnnotationStats.getSplitCategories(wholeTreebank, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, tlp);
            if (op.testOptions.verbose) {
                log.info("Parent post annotation split categories: " + op.trainOptions.postSplitters);
            }
        }
        if (op.trainOptions.hSelSplit) {
            int ptt = op.trainOptions.printTreeTransformations;
            op.trainOptions.printTreeTransformations = 0;
            binarizer.setDoSelectiveSplit(false);
            for (Tree tree : wholeTreebank) {
                trainTransformer.transformTree(tree);
            }
            binarizer.setDoSelectiveSplit(true);
            op.trainOptions.printTreeTransformations = ptt;
        }
        trainTreebank = trainTreebank.transform(trainTransformer);
        if (secondaryTreebank != null) {
            secondaryTreebank = secondaryTreebank.transform(trainTransformer);
        }
        if (op.trainOptions.printAnnotatedStateCounts) {
            binarizer.printStateCounts();
        }
        if (op.trainOptions.printAnnotatedRuleCounts) {
            binarizer.printRuleCounts();
        }
        if (tuneTreebank != null) {
            tuneTreebank = tuneTreebank.transform(trainTransformer);
        }
        if (op.testOptions.verbose) {
            binarizer.dumpStats();
        }
        return new Triple<Treebank, Treebank, Treebank>(trainTreebank, secondaryTreebank, tuneTreebank);
    }

    public static void main(String[] args) {
        Options op = new Options();
        String treebankPath = null;
        FileFilter trainFilter = null;
        int i = 0;
        while (i < args.length && args[i].startsWith("-")) {
            if (args[i].equalsIgnoreCase("-train")) {
                int numSubArgs = TreeAnnotatorAndBinarizer.numSubArgs(args, i);
                ++i;
                if (numSubArgs >= 1) {
                    treebankPath = args[i];
                    ++i;
                } else {
                    throw new RuntimeException("Error: -train option must have treebankPath as first argument.");
                }
                if (numSubArgs == 2) {
                    trainFilter = new NumberRangesFileFilter(args[i++], true);
                    continue;
                }
                if (numSubArgs < 3) continue;
                int low = Integer.parseInt(args[i]);
                int high = Integer.parseInt(args[i + 1]);
                trainFilter = new NumberRangeFileFilter(low, high, true);
                i += 2;
                continue;
            }
            i = op.setOption(args, i);
        }
        if (i < args.length) {
            log.info("usage: java TreeAnnotatorAndBinarizer options*");
            log.info("  Options are like for lexicalized parser including -train treebankPath fileRange]");
            return;
        }
        log.info("Annotating from treebank dir: " + treebankPath);
        DiskTreebank trainTreebank = op.tlpParams.diskTreebank();
        if (trainFilter == null) {
            trainTreebank.loadPath(treebankPath);
        } else {
            trainTreebank.loadPath(treebankPath, trainFilter);
        }
        Treebank binaryTrainTreebank = TreeAnnotatorAndBinarizer.getAnnotatedBinaryTreebankFromTreebank(trainTreebank, null, null, op).first();
        Iterator it = ((AbstractCollection)trainTreebank).iterator();
        for (Tree t : binaryTrainTreebank) {
            System.out.println("Original tree:");
            ((Tree)it.next()).pennPrint();
            System.out.println("Binarized tree:");
            t.pennPrint();
            System.out.println();
        }
    }

    static class TreeNullAnnotator
    implements TreeTransformer {
        private final TreeFactory tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        private final HeadFinder hf;

        @Override
        public Tree transformTree(Tree t) {
            Tree copy = t.treeSkeletonCopy(this.tf);
            return this.transformTreeHelper(copy);
        }

        private Tree transformTreeHelper(Tree t) {
            if (t != null) {
                String cat = t.label().value();
                if (t.isLeaf()) {
                    Word label = new Word(cat);
                    t.setLabel(label);
                } else {
                    String tag;
                    String word;
                    Tree[] kids;
                    for (Tree child : kids = t.children()) {
                        this.transformTreeHelper(child);
                    }
                    Tree headChild = this.hf.determineHead(t);
                    if (headChild == null) {
                        log.error("null head for tree\n" + t.toString());
                        word = null;
                        tag = null;
                    } else if (headChild.isLeaf()) {
                        tag = cat;
                        word = headChild.label().value();
                    } else {
                        CategoryWordTag headLabel = (CategoryWordTag)headChild.label();
                        word = headLabel.word();
                        tag = headLabel.tag();
                    }
                    CategoryWordTag label = new CategoryWordTag(cat, word, tag);
                    t.setLabel(label);
                }
            }
            return t;
        }

        public TreeNullAnnotator(HeadFinder hf) {
            this.hf = hf;
        }
    }
}

