/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.integration.collection.clue;

import com.google.common.base.Joiner;
import edu.umd.cloud9.collection.clue.ClueWarcDocnoMapping;
import edu.umd.cloud9.collection.clue.ClueWarcDocnoMappingBuilder;
import edu.umd.cloud9.collection.clue.ClueWarcForwardIndex;
import edu.umd.cloud9.collection.clue.ClueWarcForwardIndexBuilder;
import edu.umd.cloud9.collection.clue.CountClueWarcRecords;
import edu.umd.cloud9.integration.IntegrationUtils;
import java.io.InputStream;
import java.util.Random;
import junit.framework.JUnit4TestAdapter;
import junit.framework.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.junit.Assert;

public class IntegrationTest {
    private static final Random random = new Random();
    private static final Path collectionPathRepacked = new Path("/shared/collections/ClueWeb09/collection.compressed.block/en.01");
    private static final Path collectionPathRaw = new Path("/shared/collections/ClueWeb09/collection.raw/");
    private static final String tmpPrefix = "tmp-" + IntegrationTest.class.getCanonicalName() + "-" + random.nextInt(10000);
    private static final String mappingFile = tmpPrefix + "-mapping.dat";

    @org.junit.Test
    public void runTests() throws Exception {
        this.testDocnoMapping();
        this.testDemoCountDocsRepacked();
        this.testDemoCountDocsRaw();
        this.testForwardIndex();
    }

    private void testDocnoMapping() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        Assert.assertTrue((boolean)fs.exists(collectionPathRepacked));
        Object[] args = new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), ClueWarcDocnoMappingBuilder.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-collection=" + collectionPathRepacked, "-docnoMapping=" + mappingFile};
        IntegrationUtils.exec(Joiner.on((String)" ").join(args));
        ClueWarcDocnoMapping mapping = new ClueWarcDocnoMapping();
        mapping.loadMapping(new Path(mappingFile), fs);
        Assert.assertEquals((Object)"clueweb09-en0000-00-00000", (Object)mapping.getDocid(1));
        Assert.assertEquals((Object)"clueweb09-en0000-29-13313", (Object)mapping.getDocid(1000000));
        Assert.assertEquals((long)1L, (long)mapping.getDocno("clueweb09-en0000-00-00000"));
        Assert.assertEquals((long)1000000L, (long)mapping.getDocno("clueweb09-en0000-29-13313"));
    }

    private void testDemoCountDocsRepacked() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        Assert.assertTrue((boolean)fs.exists(collectionPathRepacked));
        String records = tmpPrefix + "-records.txt";
        Object[] args = new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), CountClueWarcRecords.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-repacked", "-path=" + collectionPathRepacked, "-docnoMapping=" + mappingFile, "-countOutput=" + records};
        IntegrationUtils.exec(Joiner.on((String)" ").join(args));
        LineReader reader = new LineReader((InputStream)fs.open(new Path(records)));
        Text str = new Text();
        reader.readLine(str);
        reader.close();
        Assert.assertEquals((long)50220423L, (long)Integer.parseInt(str.toString()));
    }

    private void testDemoCountDocsRaw() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        Assert.assertTrue((boolean)fs.exists(collectionPathRaw));
        String records = tmpPrefix + "-records.txt";
        Object[] args = new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), CountClueWarcRecords.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-original", "-segment=1", "-path=" + collectionPathRaw, "-docnoMapping=" + mappingFile, "-countOutput=" + records};
        IntegrationUtils.exec(Joiner.on((String)" ").join(args));
        LineReader reader = new LineReader((InputStream)fs.open(new Path(records)));
        Text str = new Text();
        reader.readLine(str);
        reader.close();
        Assert.assertEquals((long)50220423L, (long)Integer.parseInt(str.toString()));
    }

    private void testForwardIndex() throws Exception {
        Configuration conf = IntegrationUtils.getBespinConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        Assert.assertTrue((boolean)fs.exists(collectionPathRepacked));
        String index = tmpPrefix + "-findex.dat";
        Object[] args = new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), ClueWarcForwardIndexBuilder.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-collection=" + collectionPathRepacked, "-index=" + index};
        IntegrationUtils.exec(Joiner.on((String)" ").join(args));
        ClueWarcForwardIndex findex = new ClueWarcForwardIndex();
        findex.loadIndex(new Path(index), new Path(mappingFile), fs);
        Assert.assertTrue((boolean)findex.getDocument(14069750).getContent().contains("Vizergy: How Design and SEO work together"));
        Assert.assertTrue((boolean)findex.getDocument("clueweb09-en0008-76-19728").getContent().contains("Jostens - Homeschool Yearbooks"));
        Assert.assertEquals((long)1L, (long)findex.getFirstDocno());
        Assert.assertEquals((long)50220423L, (long)findex.getLastDocno());
    }

    public static Test suite() {
        return new JUnit4TestAdapter(IntegrationTest.class);
    }
}

