/*
 * Decompiled with CFR 0.152.
 */
package datafu.pig.text.opennlp;

import java.io.IOException;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;

public class TokenizeWhitespace
extends EvalFunc<DataBag> {
    private WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
    private TupleFactory tf = TupleFactory.getInstance();
    private BagFactory bf = BagFactory.getInstance();

    public DataBag exec(Tuple input) throws IOException {
        String[] tokens;
        if (input.size() != 1) {
            throw new IOException();
        }
        String inputString = input.get(0).toString();
        if (inputString == null || inputString == "") {
            return null;
        }
        DataBag outBag = this.bf.newDefaultBag();
        for (String token : tokens = this.tokenizer.tokenize(inputString)) {
            Tuple outTuple = this.tf.newTuple((Object)token);
            outBag.add(outTuple);
        }
        return outBag;
    }

    public Schema outputSchema(Schema input) {
        try {
            Schema.FieldSchema inputFieldSchema = input.getField(0);
            if (inputFieldSchema.type != 55) {
                throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
            }
            Schema tupleSchema = new Schema();
            tupleSchema.add(new Schema.FieldSchema("token", 55));
            return new Schema(new Schema.FieldSchema(this.getSchemaName(((Object)((Object)this)).getClass().getName().toLowerCase(), input), tupleSchema, 120));
        }
        catch (FrontendException e) {
            throw new RuntimeException(e);
        }
    }
}

