/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier;

import com.google.common.collect.ConcurrentHashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Locale;
import java.util.Random;
import org.apache.commons.io.Charsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.vectorizer.encoders.ConstantValueEncoder;
import org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder;
import org.apache.mahout.vectorizer.encoders.StaticWordValueEncoder;

public final class NewsgroupHelper {
    private static final SimpleDateFormat[] DATE_FORMATS = new SimpleDateFormat[]{new SimpleDateFormat("", Locale.ENGLISH), new SimpleDateFormat("MMM-yyyy", Locale.ENGLISH), new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ENGLISH)};
    public static final int FEATURES = 10000;
    private static final long DATE_REFERENCE = 853286460L;
    private static final long MONTH = 2592000L;
    private static final long WEEK = 604800L;
    private final Random rand = RandomUtils.getRandom();
    private final Analyzer analyzer = new StandardAnalyzer();
    private final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
    private final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");

    public FeatureVectorEncoder getEncoder() {
        return this.encoder;
    }

    public FeatureVectorEncoder getBias() {
        return this.bias;
    }

    public Random getRandom() {
        return this.rand;
    }

    public Vector encodeFeatureVector(File file, int actual, int leakType, Multiset<String> overallCounts) throws IOException {
        long date = (long)(1000.0 * ((double)(853286460L + (long)actual * 2592000L) + 604800.0 * this.rand.nextDouble()));
        ConcurrentHashMultiset words = ConcurrentHashMultiset.create();
        BufferedReader reader = Files.newReader((File)file, (Charset)Charsets.UTF_8);
        Object object = null;
        try {
            String line = reader.readLine();
            StringReader dateString = new StringReader(DATE_FORMATS[leakType % 3].format(new Date(date)));
            NewsgroupHelper.countWords(this.analyzer, (Collection<String>)words, dateString, overallCounts);
            while (line != null && !line.isEmpty()) {
                boolean countHeader = (line.startsWith("From:") || line.startsWith("Subject:") || line.startsWith("Keywords:") || line.startsWith("Summary:")) && leakType < 6;
                do {
                    StringReader in = new StringReader(line);
                    if (!countHeader) continue;
                    NewsgroupHelper.countWords(this.analyzer, (Collection<String>)words, in, overallCounts);
                } while ((line = reader.readLine()) != null && line.startsWith(" "));
            }
            if (leakType < 3) {
                NewsgroupHelper.countWords(this.analyzer, (Collection<String>)words, reader, overallCounts);
            }
        }
        catch (Throwable line) {
            object = line;
            throw line;
        }
        finally {
            if (reader != null) {
                if (object != null) {
                    try {
                        reader.close();
                    }
                    catch (Throwable line) {
                        ((Throwable)object).addSuppressed(line);
                    }
                } else {
                    reader.close();
                }
            }
        }
        RandomAccessSparseVector v = new RandomAccessSparseVector(10000);
        this.bias.addToVector("", 1.0, (Vector)v);
        for (String word : words.elementSet()) {
            this.encoder.addToVector(word, Math.log1p(words.count((Object)word)), (Vector)v);
        }
        return v;
    }

    public static void countWords(Analyzer analyzer, Collection<String> words, Reader in, Multiset<String> overallCounts) throws IOException {
        TokenStream ts = analyzer.tokenStream("text", in);
        ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            String s = ((CharTermAttribute)ts.getAttribute(CharTermAttribute.class)).toString();
            words.add(s);
        }
        overallCounts.addAll(words);
        ts.end();
        Closeables.close((Closeable)ts, (boolean)true);
    }
}

