/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.integration.webgraph;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import edu.umd.cloud9.integration.IntegrationUtils;
import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.webgraph.data.AnchorText;
import edu.umd.cloud9.webgraph.driver.TrecDriver;
import edu.umd.cloud9.webgraph.normalizer.AnchorTextBasicNormalizer;
import java.util.ArrayList;
import java.util.Map;
import java.util.Random;
import junit.framework.JUnit4TestAdapter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.junit.Assert;
import org.junit.Test;

public class VerifyGov2Webgraph {
    private static final Random rand = new Random();
    private static final String tmp = "/tmp/tmp-" + VerifyGov2Webgraph.class.getSimpleName() + rand.nextInt(10000);
    private static final String collectionPath = "/shared/collections/gov2/collection.raw/gov2-corpus/GX000";
    private static final String docnoMapping = "/shared/indexes/gov2/docno-mapping.dat";
    private static final String collectionOutput = tmp + "/webgraph-gov2";
    private ImmutableMap<String, Float> anchorList1 = ImmutableMap.of((Object)"mine safety health administration", (Object)Float.valueOf(5.5f), (Object)"mine safety health administration msha", (Object)Float.valueOf(1.25f), (Object)"msha", (Object)Float.valueOf(1.25f), (Object)"safety health mining", (Object)Float.valueOf(0.25f));
    private ImmutableMap<String, ImmutableSet<Integer>> anchorSources1 = ImmutableMap.of((Object)"mine safety health administration", (Object)ImmutableSet.of((Object)28502, (Object)11970, (Object)11445, (Object)65562, (Object)67427, (Object)6338, (Object[])new Integer[0]), (Object)"mine safety health administration msha", (Object)ImmutableSet.of((Object)25765, (Object)24550, (Object)14962, (Object)82536, (Object)68902, (Object)46419, (Object[])new Integer[]{35554, 6461, 17709}), (Object)"msha", (Object)ImmutableSet.of((Object)25765, (Object)1050, (Object)35317), (Object)"safety health mining", (Object)ImmutableSet.of((Object)29107));
    private ImmutableMap<String, Float> anchorList2 = ImmutableMap.of((Object)"hanford", (Object)Float.valueOf(3.5f), (Object)"richland operations office rl", (Object)Float.valueOf(0.5f));
    private ImmutableMap<String, ImmutableSet<Integer>> anchorSources2 = ImmutableMap.of((Object)"hanford", (Object)ImmutableSet.of((Object)55133, (Object)89334, (Object)51706, (Object)52487, (Object)44864, (Object)39214, (Object[])new Integer[0]), (Object)"richland operations office rl", (Object)ImmutableSet.of((Object)51706));

    @Test
    public void runTests() throws Exception {
        this.runTrecDriver();
        this.verifyAnchors();
    }

    private void runTrecDriver() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        Assert.assertTrue((boolean)fs.exists(new Path(collectionPath)));
        fs.delete(new Path(collectionOutput), true);
        ArrayList jars = Lists.newArrayList();
        jars.add(IntegrationUtils.getJar("dist", "cloud9"));
        jars.add(IntegrationUtils.getJar("lib", "guava"));
        jars.add(IntegrationUtils.getJar("lib", "dsiutils"));
        jars.add(IntegrationUtils.getJar("lib", "fastutil"));
        jars.add(IntegrationUtils.getJar("lib", "sux4j"));
        jars.add(IntegrationUtils.getJar("lib", "commons-collections"));
        jars.add(IntegrationUtils.getJar("lib", "commons-lang"));
        jars.add(IntegrationUtils.getJar("lib", "tools"));
        jars.add(IntegrationUtils.getJar("lib", "htmlparser"));
        jars.add(IntegrationUtils.getJar("lib", "pcj"));
        Object[] args = new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), TrecDriver.class.getCanonicalName(), String.format("-libjars=%s", Joiner.on((String)",").join((Iterable)jars)), "-input", collectionPath, "-output", collectionOutput, "-collection", "gov2", "-docno", docnoMapping, "-caw", "-normalizer", AnchorTextBasicNormalizer.class.getCanonicalName()};
        IntegrationUtils.exec(Joiner.on((String)" ").join(args));
    }

    private void verifyAnchors() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        IntWritable key = new IntWritable();
        ArrayListWritable<AnchorText> value = new ArrayListWritable<AnchorText>();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs.getConf(), new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)new Path(collectionOutput + "/" + "weightedReverseWebGraph" + "/part-00000"))});
        reader.next((Writable)key, value);
        reader.next((Writable)key, value);
        this.verifyWeights((Map<String, Float>)this.anchorList1, value);
        this.verifySources((Map<String, ImmutableSet<Integer>>)this.anchorSources1, value);
        reader.close();
        reader = new SequenceFile.Reader(fs.getConf(), new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)new Path(collectionOutput + "/" + "weightedReverseWebGraph" + "/part-00010"))});
        reader.next((Writable)key, value);
        reader.next((Writable)key, value);
        this.verifyWeights((Map<String, Float>)this.anchorList2, value);
        this.verifySources((Map<String, ImmutableSet<Integer>>)this.anchorSources2, value);
        reader.close();
    }

    private void verifyWeights(Map<String, Float> anchor, ArrayListWritable<AnchorText> value) {
        for (int i = 0; i < value.size(); ++i) {
            if (!anchor.containsKey(((AnchorText)value.get(i)).getText())) continue;
            Assert.assertEquals((double)anchor.get(((AnchorText)value.get(i)).getText()).floatValue(), (double)((AnchorText)value.get(i)).getWeight(), (double)1.0E-5);
        }
    }

    private void verifySources(Map<String, ImmutableSet<Integer>> anchor, ArrayListWritable<AnchorText> value) {
        for (int i = 0; i < value.size(); ++i) {
            if (!anchor.containsKey(((AnchorText)value.get(i)).getText())) continue;
            int[] srcs = ((AnchorText)value.get(i)).getDocuments();
            Assert.assertEquals((long)anchor.get(((AnchorText)value.get(i)).getText()).size(), (long)srcs.length);
            for (int j = 0; j < srcs.length; ++j) {
                Assert.assertTrue((boolean)anchor.get(((AnchorText)value.get(i)).getText()).contains((Object)srcs[j]));
            }
        }
    }

    public static junit.framework.Test suite() {
        return new JUnit4TestAdapter(VerifyGov2Webgraph.class);
    }
}

