/*
 * Decompiled with CFR 0.152.
 */
package elki.datasource.parser;

import elki.data.FeatureVector;
import elki.data.LabelList;
import elki.data.SparseFloatVector;
import elki.data.SparseNumberVector;
import elki.data.type.SimpleTypeInformation;
import elki.data.type.VectorFieldTypeInformation;
import elki.data.type.VectorTypeInformation;
import elki.datasource.parser.CSVReaderFormat;
import elki.datasource.parser.NumberVectorLabelParser;
import elki.logging.Logging;
import elki.utilities.exceptions.AbortException;
import elki.utilities.io.ParseUtil;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import it.unimi.dsi.fastutil.ints.Int2DoubleMap;
import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import java.util.ArrayList;

public class TermFrequencyParser<V extends SparseNumberVector>
extends NumberVectorLabelParser<V> {
    private static final Logging LOG = Logging.getLogger(TermFrequencyParser.class);
    int numterms;
    Object2IntOpenHashMap<String> keymap;
    boolean normalize;
    private SparseNumberVector.Factory<V> sparsefactory;
    Int2DoubleOpenHashMap values = new Int2DoubleOpenHashMap();
    ArrayList<String> labels = new ArrayList();

    public TermFrequencyParser(boolean normalize, SparseNumberVector.Factory<V> factory) {
        this(normalize, CSVReaderFormat.DEFAULT_FORMAT, null, factory);
    }

    public TermFrequencyParser(boolean normalize, CSVReaderFormat format, long[] labelIndices, SparseNumberVector.Factory<V> factory) {
        super(format, labelIndices, factory);
        this.normalize = normalize;
        this.keymap = new Object2IntOpenHashMap();
        this.keymap.defaultReturnValue(-1);
        this.sparsefactory = factory;
        this.warnedDim = true;
    }

    @Override
    protected boolean parseLineInternal() {
        double len = 0.0;
        String curterm = null;
        int c = 0;
        while (this.tokenizer.valid()) {
            if (this.isLabelColumn(c++)) {
                this.labels.add(this.tokenizer.getSubstring());
            } else if (curterm == null) {
                curterm = this.tokenizer.getSubstring();
            } else {
                try {
                    double attribute = this.tokenizer.getDouble();
                    int curdim = this.keymap.getInt((Object)curterm);
                    if (curdim < 0) {
                        curdim = this.numterms++;
                        this.keymap.put((Object)curterm, curdim);
                    }
                    this.values.put(curdim, attribute);
                    len += attribute;
                    curterm = null;
                }
                catch (NumberFormatException e) {
                    if (!(this.warnedPrecision || e != ParseUtil.PRECISION_OVERFLOW && e != ParseUtil.EXPONENT_OVERFLOW)) {
                        this.getLogger().warning((CharSequence)("Too many digits in what looked like a double number - treating as string: " + this.tokenizer.getSubstring()));
                        this.warnedPrecision = true;
                    }
                    this.labels.add(curterm);
                    curterm = this.tokenizer.getSubstring();
                }
            }
            this.tokenizer.advance();
        }
        if (curterm != null) {
            this.labels.add(curterm);
        }
        this.haslabels |= !this.labels.isEmpty();
        if (this.normalize && Math.abs(len - 1.0) > Double.MIN_NORMAL) {
            ObjectIterator iter = this.values.int2DoubleEntrySet().fastIterator();
            while (iter.hasNext()) {
                Int2DoubleMap.Entry entry = (Int2DoubleMap.Entry)iter.next();
                entry.setValue(entry.getDoubleValue() / len);
            }
        }
        this.curvec = this.sparsefactory.newNumberVector(this.values, this.numterms);
        this.curlbl = LabelList.make(this.labels);
        this.values.clear();
        this.labels.clear();
        return true;
    }

    @Override
    protected SimpleTypeInformation<V> getTypeInformation(int mindim, int maxdim) {
        if (mindim == maxdim) {
            return new VectorFieldTypeInformation((FeatureVector.Factory)this.factory, mindim);
        }
        if (mindim < maxdim) {
            return new VectorTypeInformation((FeatureVector.Factory)this.factory, this.factory.getDefaultSerializer(), mindim, maxdim);
        }
        throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Par<V extends SparseNumberVector>
    extends NumberVectorLabelParser.Par<V> {
        public static final OptionID NORMALIZE_FLAG = new OptionID("tf.normalize", "Normalize vectors to manhattan length 1 (convert term counts to term frequencies)");
        boolean normalize = false;

        @Override
        public void configure(Parameterization config) {
            super.configure(config);
            new Flag(NORMALIZE_FLAG).grab(config, x -> {
                this.normalize = x;
            });
        }

        @Override
        protected void getFactory(Parameterization config) {
            new ObjectParameter(VECTOR_TYPE_ID, SparseNumberVector.Factory.class, SparseFloatVector.Factory.class).grab(config, x -> {
                this.factory = x;
            });
        }

        @Override
        public TermFrequencyParser<V> make() {
            return new TermFrequencyParser(this.normalize, this.format, this.labelIndices, (SparseNumberVector.Factory)this.factory);
        }
    }
}

