/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.integration.webgraph;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import edu.umd.cloud9.integration.IntegrationUtils;
import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.webgraph.data.AnchorText;
import edu.umd.cloud9.webgraph.data.AnchorTextConstants;
import edu.umd.cloud9.webgraph.driver.ClueWebDriver;
import edu.umd.cloud9.webgraph.normalizer.AnchorTextBasicNormalizer;
import java.util.ArrayList;
import java.util.Map;
import java.util.Random;
import junit.framework.JUnit4TestAdapter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.junit.Assert;
import org.junit.Test;

public class VerifyClueWeb09EN01Webgraph {
    private static final Random rand = new Random();
    private static final String tmp = "/tmp/tmp-" + VerifyClueWeb09EN01Webgraph.class.getSimpleName() + rand.nextInt(10000);
    private static final String collectionPath = "/shared/collections/ClueWeb09/collection.compressed.block/";
    private static final String docnoMapping = "/shared/collections/ClueWeb09/docno-mapping.dat";
    private static final String collectionOutput = tmp + "/webgraph-clueweb09";
    private ImmutableMap<Integer, String> urlMap = ImmutableMap.of((Object)200, (Object)"http://160.254.123.37/adr_index_performance_review.jsp", (Object)600, (Object)"http://207.218.246.235/s/spiderman4/", (Object)10, (Object)"http://00perdomain.com/computers/", (Object)610, (Object)"http://207.218.246.235/s/startrek11/news/863_Tyler_Perry_Joins_Star_Trek_11_Cast.html");
    private ImmutableMap<Integer, ImmutableSet<Integer>> internalLinkMap = ImmutableMap.of((Object)200, (Object)ImmutableSet.of((Object)207, (Object)208, (Object)209, (Object)210, (Object)201, (Object)202, (Object[])new Integer[]{203, 204, 205, 206}), (Object)600, (Object)ImmutableSet.of((Object)520, (Object)615, (Object)616, (Object)619, (Object)526, (Object)480, (Object[])new Integer[]{481, 529, 533, 487, 629, 601, 585, 492, 591, 641, 596, 646, 506, 507, 602, 603, 604, 605, 559, 651, 467, 468}), (Object)10, (Object)ImmutableSet.of((Object)11, (Object)13, (Object)6), (Object)610, (Object)ImmutableSet.of((Object)520, (Object)615, (Object)619, (Object)480, (Object)481, (Object)626, (Object[])new Integer[]{486, 487, 629, 600, 614, 492, 533, 591, 640, 641, 548, 596, 646, 506, 507, 651, 605, 559, 467, 468}));
    private ImmutableMap<Integer, ImmutableSet<Integer>> externalLinkMap = ImmutableMap.of((Object)600, (Object)ImmutableSet.of((Object)31937044));

    @Test
    public void runTests() throws Exception {
        this.runClueDriver();
        this.verifyWebGraph();
    }

    private void runClueDriver() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        Assert.assertTrue((boolean)fs.exists(new Path(collectionPath)));
        fs.delete(new Path(collectionOutput), true);
        ArrayList jars = Lists.newArrayList();
        jars.add(IntegrationUtils.getJar("dist", "cloud9"));
        jars.add(IntegrationUtils.getJar("lib", "guava"));
        jars.add(IntegrationUtils.getJar("lib", "dsiutils"));
        jars.add(IntegrationUtils.getJar("lib", "fastutil"));
        jars.add(IntegrationUtils.getJar("lib", "sux4j"));
        jars.add(IntegrationUtils.getJar("lib", "commons-collections"));
        jars.add(IntegrationUtils.getJar("lib", "commons-lang"));
        jars.add(IntegrationUtils.getJar("lib", "tools"));
        jars.add(IntegrationUtils.getJar("lib", "htmlparser"));
        jars.add(IntegrationUtils.getJar("lib", "pcj"));
        Object[] args = new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), ClueWebDriver.class.getCanonicalName(), String.format("-libjars=%s", Joiner.on((String)",").join((Iterable)jars)), "-input", collectionPath, "-output", collectionOutput, "-docno", docnoMapping, "-begin", "1", "-end", "1", "-il", "-normalizer", AnchorTextBasicNormalizer.class.getCanonicalName()};
        IntegrationUtils.exec(Joiner.on((String)" ").join(args));
    }

    private void verifyWebGraph() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        IntWritable key = new IntWritable();
        ArrayListWritable<AnchorText> value = new ArrayListWritable<AnchorText>();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs.getConf(), new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)new Path(collectionOutput + "/" + "webGraph" + "/part-00000"))});
        reader.next((Writable)key, value);
        this.verifyURLs(200, (Map<Integer, String>)this.urlMap, value);
        this.verifyLinks(200, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, (Map<Integer, ImmutableSet<Integer>>)this.internalLinkMap, value);
        reader.next((Writable)key, value);
        reader.next((Writable)key, value);
        this.verifyURLs(600, (Map<Integer, String>)this.urlMap, value);
        this.verifyLinks(600, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, (Map<Integer, ImmutableSet<Integer>>)this.internalLinkMap, value);
        this.verifyLinks(600, AnchorTextConstants.Type.EXTERNAL_OUT_LINK.val, (Map<Integer, ImmutableSet<Integer>>)this.externalLinkMap, value);
        reader.close();
        reader = new SequenceFile.Reader(fs.getConf(), new SequenceFile.Reader.Option[]{SequenceFile.Reader.file((Path)new Path(collectionOutput + "/" + "webGraph" + "/part-00010"))});
        reader.next((Writable)key, value);
        this.verifyURLs(10, (Map<Integer, String>)this.urlMap, value);
        this.verifyLinks(10, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, (Map<Integer, ImmutableSet<Integer>>)this.internalLinkMap, value);
        reader.next((Writable)key, value);
        reader.next((Writable)key, value);
        reader.next((Writable)key, value);
        this.verifyURLs(610, (Map<Integer, String>)this.urlMap, value);
        this.verifyLinks(610, AnchorTextConstants.Type.INTERNAL_OUT_LINK.val, (Map<Integer, ImmutableSet<Integer>>)this.internalLinkMap, value);
        reader.close();
    }

    private void verifyURLs(int key, Map<Integer, String> urls, ArrayListWritable<AnchorText> value) {
        for (int i = 0; i < value.size(); ++i) {
            if (!((AnchorText)value.get(i)).isURL()) continue;
            Assert.assertEquals((Object)urls.get(key), (Object)((AnchorText)value.get(i)).getText());
            break;
        }
    }

    private void verifyLinks(int key, byte type, Map<Integer, ImmutableSet<Integer>> links, ArrayListWritable<AnchorText> value) {
        for (int i = 0; i < value.size(); ++i) {
            if ((!((AnchorText)value.get(i)).isInternalOutLink() || type != AnchorTextConstants.Type.INTERNAL_OUT_LINK.val) && (!((AnchorText)value.get(i)).isExternalOutLink() || type != AnchorTextConstants.Type.EXTERNAL_OUT_LINK.val)) continue;
            int[] targets = ((AnchorText)value.get(i)).getDocuments();
            Assert.assertEquals((long)links.get(key).size(), (long)targets.length);
            for (int j = 0; j < targets.length; ++j) {
                Assert.assertTrue((boolean)links.get(key).contains((Object)targets[j]));
            }
        }
    }

    public static junit.framework.Test suite() {
        return new JUnit4TestAdapter(VerifyClueWeb09EN01Webgraph.class);
    }
}

