/*
 * Decompiled with CFR 0.152.
 */
package datafu.pig.text.opennlp;

import datafu.pig.text.opennlp.CachedFile;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;

public class POSTag
extends EvalFunc<DataBag> {
    private POSTaggerME tagger = null;
    private static final String MODEL_FILE = "pos";
    private TupleFactory tf = TupleFactory.getInstance();
    private BagFactory bf = BagFactory.getInstance();
    private String modelPath;

    public POSTag(String modelPath) {
        this.modelPath = modelPath;
    }

    public List<String> getCacheFiles() {
        ArrayList<String> list = new ArrayList<String>(1);
        list.add(this.modelPath + "#" + MODEL_FILE);
        return list;
    }

    public DataBag exec(Tuple input) throws IOException {
        DataBag inputBag = null;
        if (input.size() != 1) {
            throw new IOException();
        }
        inputBag = (DataBag)input.get(0);
        DataBag outBag = this.bf.newDefaultBag();
        if (this.tagger == null) {
            String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
            FileInputStream modelIn = new FileInputStream(loadFile);
            BufferedInputStream buffer = new BufferedInputStream(modelIn);
            POSModel model = new POSModel((InputStream)buffer);
            this.tagger = new POSTaggerME(model);
        }
        int bagLength = (int)inputBag.size();
        String[] words = new String[bagLength];
        Iterator itr = inputBag.iterator();
        int i = 0;
        while (itr.hasNext()) {
            words[i] = (String)((Tuple)itr.next()).get(0);
            ++i;
        }
        String[] tags = this.tagger.tag(words);
        double[] probs = this.tagger.probs();
        for (int j = 0; j < tags.length; ++j) {
            Tuple newTuple = this.tf.newTuple(3);
            newTuple.set(0, (Object)words[j]);
            newTuple.set(1, (Object)tags[j]);
            newTuple.set(2, (Object)probs[j]);
            outBag.add(newTuple);
        }
        return outBag;
    }

    public Schema outputSchema(Schema input) {
        try {
            Schema.FieldSchema inputFieldSchema = input.getField(0);
            if (inputFieldSchema.type != 120) {
                throw new RuntimeException("Expected a BAG as input");
            }
            Schema inputBagSchema = inputFieldSchema.schema;
            if (inputBagSchema == null) {
                return null;
            }
            if (inputBagSchema.getField((int)0).type != 110) {
                throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", DataType.findTypeName((byte)inputBagSchema.getField((int)0).type)));
            }
            Schema inputTupleSchema = inputBagSchema.getField((int)0).schema;
            if (inputTupleSchema.size() != 1) {
                throw new RuntimeException("Expected one field for the token data");
            }
            if (inputTupleSchema.getField((int)0).type != 55) {
                throw new RuntimeException(String.format("Expected source to be a CHARARRAY, but instead found %s", DataType.findTypeName((byte)inputTupleSchema.getField((int)0).type)));
            }
            Schema tupleSchema = new Schema();
            tupleSchema.add(new Schema.FieldSchema("token", 55));
            tupleSchema.add(new Schema.FieldSchema("tag", 55));
            tupleSchema.add(new Schema.FieldSchema("probability", 25));
            return new Schema(new Schema.FieldSchema(this.getSchemaName(((Object)((Object)this)).getClass().getName().toLowerCase(), input), tupleSchema, 120));
        }
        catch (FrontendException e) {
            throw new RuntimeException(e);
        }
    }
}

