/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.webgraph.driver;

import edu.umd.cloud9.collection.DocumentForwardIndex;
import edu.umd.cloud9.collection.Indexable;
import edu.umd.cloud9.collection.clue.ClueWarcDocnoMapping;
import edu.umd.cloud9.mapred.NullInputFormat;
import edu.umd.cloud9.mapred.NullMapper;
import edu.umd.cloud9.mapred.NullOutputFormat;
import edu.umd.cloud9.webgraph.data.IndexableAnchorText;
import edu.umd.cloud9.webgraph.data.IndexableAnchorTextForwardIndex;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Random;
import javax.servlet.Servlet;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Logger;
import org.mortbay.jetty.HandlerContainer;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.servlet.Context;
import org.mortbay.jetty.servlet.ServletHolder;

public class ClueWebAnchorTextForwardIndexHttpServer {
    private static final Logger LOG = Logger.getLogger(ClueWebAnchorTextForwardIndexHttpServer.class);
    private static final String SEPARATOR = ",";
    private static final int[] lastDocs = new int[10];
    private static final ArrayList<String> clueweb = new ArrayList();
    private static IndexableAnchorTextForwardIndex sForwardIndex;
    private static DocumentForwardIndex<Indexable>[] docForwardIndex;

    private ClueWebAnchorTextForwardIndexHttpServer() {
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length != 3) {
            System.out.println("usage: [index-file] [docno-mapping-data-files] [clue-forward-index-root]");
            System.exit(-1);
        }
        String indexFile = otherArgs[0];
        String mappingFile = otherArgs[1];
        String clueIndexRoot = otherArgs[2].endsWith("/") ? otherArgs[2] : otherArgs[2] + "/";
        String cluewebForwardIndex = "";
        for (int i = 1; i < 10; ++i) {
            cluewebForwardIndex = cluewebForwardIndex + clueIndexRoot + "findex.en.0" + i + ".dat" + SEPARATOR + " ";
        }
        cluewebForwardIndex = cluewebForwardIndex + clueIndexRoot + "findex.en.10.dat";
        LOG.info((Object)"Launching DocumentForwardIndexHttpServer");
        LOG.info((Object)(" - index file: " + indexFile));
        LOG.info((Object)(" - docno mapping data file: " + mappingFile));
        LOG.info((Object)(" - ClueWeb09 index root:" + clueIndexRoot));
        FileSystem fs = FileSystem.get((Configuration)conf);
        Random rand = new Random();
        int r = rand.nextInt();
        Path tmpPath = new Path("/tmp/" + r);
        if (fs.exists(tmpPath)) {
            fs.delete(tmpPath, true);
        }
        JobConf job = new JobConf(conf, ClueWebAnchorTextForwardIndexHttpServer.class);
        job.setJobName("ForwardIndexServer:" + indexFile);
        job.set("mapred.child.java.opts", "-Xmx2048m");
        job.setNumMapTasks(1);
        job.setNumReduceTasks(0);
        job.setInputFormat(NullInputFormat.class);
        job.setOutputFormat(NullOutputFormat.class);
        job.setMapperClass(ServerMapper.class);
        job.set("IndexFile", indexFile);
        job.set("DocnoMappingDataFile", mappingFile);
        job.set("TmpPath", tmpPath.toString());
        job.set("ClueWebIndexFiles", cluewebForwardIndex);
        JobClient client = new JobClient(job);
        client.submitJob(job);
        LOG.info((Object)"Waiting for server to start up...");
        while (!fs.exists(tmpPath)) {
            Thread.sleep(50000L);
            LOG.info((Object)"...");
        }
        FSDataInputStream in = fs.open(tmpPath);
        String host = in.readUTF();
        in.close();
        LOG.info((Object)("host: " + host));
        LOG.info((Object)"port: 8888");
    }

    static /* synthetic */ DocumentForwardIndex[] access$302(DocumentForwardIndex[] x0) {
        docForwardIndex = x0;
        return x0;
    }

    public static class FetchDocnoServlet
    extends HttpServlet {
        static final long serialVersionUID = 5970126341L;

        public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            this.doPost(req, res);
        }

        public void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            LOG.info((Object)"triggered servlet for fetching document by docno");
            int docno = 0;
            try {
                IndexableAnchorText doc;
                if (req.getParameterValues("docno") != null) {
                    docno = Integer.parseInt(req.getParameterValues("docno")[0]);
                }
                if ((doc = sForwardIndex.getDocument(docno)) == null) {
                    throw new Exception();
                }
                LOG.info((Object)("fetched: " + ((Indexable)doc).getDocid() + " = docno " + docno));
                res.setContentType(((Indexable)doc).getDisplayContentType());
                PrintWriter out = res.getWriter();
                out.print(((Indexable)doc).getContent().replace("<body>", "<body><a href=\"/fetch_content?docno=" + docno + "\"> Fetch content for docno: " + docno + "</a><br><br>"));
                out.close();
            }
            catch (Exception e) {
                LOG.info((Object)("trapped error fetching " + docno));
                res.setContentType("text/html");
                PrintWriter out = res.getWriter();
                out.print("<html><head><title>Invalid docno!</title><head>\n");
                out.print("<body>\n");
                out.print("<h1>Error!</h1>\n");
                out.print("<h3>Invalid docno: " + docno + "</h3>\n");
                out.print("</body></html>\n");
                out.close();
            }
        }
    }

    public static class FetchDocidServlet
    extends HttpServlet {
        static final long serialVersionUID = 3986721097L;

        public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            this.doPost(req, res);
        }

        public void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            LOG.info((Object)"triggered servlet for fetching document by docid");
            String docid = null;
            try {
                IndexableAnchorText doc;
                if (req.getParameterValues("docid") != null) {
                    docid = req.getParameterValues("docid")[0];
                }
                if ((doc = sForwardIndex.getDocument(docid)) == null) {
                    throw new Exception();
                }
                LOG.info((Object)("fetched: " + ((Indexable)doc).getDocid()));
                res.setContentType(((Indexable)doc).getDisplayContentType());
                PrintWriter out = res.getWriter();
                out.print(((Indexable)doc).getContent().replace("<body>", "<body><a href=\"/fetch_content?docid=" + docid + "\"> Fetch content for docid: " + docid + "</a><br><br>"));
                out.close();
            }
            catch (Exception e) {
                LOG.info((Object)("trapped error fetching " + docid));
                res.setContentType("text/html");
                PrintWriter out = res.getWriter();
                out.print("<html><head><title>Invalid docid!</title><head>\n");
                out.print("<body>\n");
                out.print("<h1>Error!</h1>\n");
                out.print("<h3>Invalid docid: " + docid + "</h3>\n");
                out.print("</body></html>\n");
                out.close();
            }
        }
    }

    public static class FetchDocContentServlet
    extends HttpServlet {
        static final long serialVersionUID = 5970126341L;

        public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            this.doPost(req, res);
        }

        public void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            LOG.info((Object)"triggered servlet for fetching document content");
            int docno = 0;
            try {
                if (req.getParameterValues("docno") != null) {
                    docno = Integer.parseInt(req.getParameterValues("docno")[0]);
                } else if (req.getParameterValues("docid") != null) {
                    docno = sForwardIndex.getDocno(req.getParameterValues("docid")[0]);
                }
                Indexable doc = null;
                int i = 0;
                for (i = 0; i < lastDocs.length; ++i) {
                    if (docno > lastDocs[i]) continue;
                    doc = docForwardIndex[i].getDocument(docno);
                    break;
                }
                if (doc == null) {
                    throw new Exception();
                }
                LOG.info((Object)("fetched: " + doc.getDocid() + " = docno " + docno));
                res.setContentType(doc.getDisplayContentType());
                PrintWriter out = res.getWriter();
                out.print(doc.getContent().replaceAll("<\\s*/\\s*[bB][oO][dD][Yy]\\s*>", "<br><br><a href=\"/fetch_docno?docno=" + docno + "\"> Fetch anchor text for docno: " + docno + "</a></body>"));
                out.close();
            }
            catch (Exception e) {
                LOG.info((Object)("trapped error fetching " + docno));
                res.setContentType("text/html");
                PrintWriter out = res.getWriter();
                out.print("<html><head><title>Invalid docno!</title><head>\n");
                out.print("<body>\n");
                out.print("<h1>Error!</h1>\n");
                out.print("<h3>Invalid docno: " + docno + "</h3>\n");
                out.print("</body></html>\n");
                out.close();
            }
        }
    }

    public static class HomeServlet
    extends HttpServlet {
        static final long serialVersionUID = 8253865405L;
        static final Random r = new Random();

        public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
            res.setContentType("text/html");
            PrintWriter out = res.getWriter();
            out.println("<html><head><title>Collection Access: " + sForwardIndex.getCollectionPath() + "</title><head>");
            out.println("<body>");
            out.println("<h3>Collection Access: " + sForwardIndex.getCollectionPath() + "</h3>");
            int firstDocno = sForwardIndex.getFirstDocno();
            int lastDocno = sForwardIndex.getLastDocno();
            int numDocs = lastDocno - firstDocno;
            String firstDocid = sForwardIndex.getDocid(firstDocno);
            String lastDocid = sForwardIndex.getDocid(lastDocno);
            out.println("First document: docno <a href=\"/fetch_docno?docno=" + firstDocno + "\">" + firstDocno + "</a> or <a href=\"/fetch_docid?docid=" + firstDocid + "\">" + firstDocid + "</a><br/>");
            out.println("Last document: docno <a href=\"/fetch_docno?docno=" + lastDocno + "\">" + lastDocno + "</a> or <a href=\"/fetch_docid?docid=" + lastDocid + "\">" + lastDocid + "</a>");
            out.println("<h3>Fetch a docid</h3>");
            out.println("<p>(random examples: ");
            String id = sForwardIndex.getDocid(r.nextInt(numDocs) + firstDocno);
            out.println("<a href=\"/fetch_docid?docid=" + id + "\">" + id + "</a>, ");
            id = sForwardIndex.getDocid(r.nextInt(numDocs) + firstDocno);
            out.println("<a href=\"/fetch_docid?docid=" + id + "\">" + id + "</a>, ");
            id = sForwardIndex.getDocid(r.nextInt(numDocs) + firstDocno);
            out.println("<a href=\"/fetch_docid?docid=" + id + "\">" + id + "</a>)</p>");
            out.println("<form method=\"post\" action=\"fetch_docid\">");
            out.println("<input type=\"text\" name=\"docid\" size=\"60\" />");
            out.println("<input type=\"submit\" value=\"Fetch!\" />");
            out.println("</form>");
            out.println("</p>");
            out.println("<p>");
            out.println("<form method=\"post\" action=\"fetch_content\">");
            out.println("<input type=\"text\" name=\"docid\" size=\"60\" />");
            out.println("<input type=\"submit\" value=\"Fetch Content!\" />");
            out.println("</form>");
            out.println("</p>");
            out.println("<h3>Fetch a docno</h3>");
            out.println("<p>(random examples: ");
            int n = r.nextInt(numDocs) + firstDocno;
            out.println("<a href=\"/fetch_docno?docno=" + n + "\">" + n + "</a>, ");
            n = r.nextInt(numDocs) + firstDocno;
            out.println("<a href=\"/fetch_docno?docno=" + n + "\">" + n + "</a>, ");
            n = r.nextInt(numDocs) + firstDocno;
            out.println("<a href=\"/fetch_docno?docno=" + n + "\">" + n + "</a>)</p>");
            out.println("<p>");
            out.println("<form method=\"post\" action=\"fetch_docno\">");
            out.println("<input type=\"text\" name=\"docno\" size=\"60\" />");
            out.println("<input type=\"submit\" value=\"Fetch!\" />");
            out.println("</form>");
            out.println("</p>");
            out.println("<p>");
            out.println("<form method=\"post\" action=\"fetch_content\">");
            out.println("<input type=\"text\" name=\"docno\" size=\"60\" />");
            out.println("<input type=\"submit\" value=\"Fetch Content!\" />");
            out.println("</form>");
            out.println("</p>");
            out.print("</body></html>\n");
            out.close();
        }
    }

    private static class ServerMapper
    extends NullMapper {
        private ServerMapper() {
        }

        @Override
        public void run(JobConf conf, Reporter reporter) throws IOException {
            int port = 8888;
            String indexFile = conf.get("IndexFile");
            String mappingFile = conf.get("DocnoMappingDataFile");
            Path tmpPath = new Path(conf.get("TmpPath"));
            String[] cluewebIndexFiles = conf.get("ClueWebIndexFiles").split(ClueWebAnchorTextForwardIndexHttpServer.SEPARATOR);
            String host = InetAddress.getLocalHost().toString();
            LOG.info((Object)("host: " + host));
            LOG.info((Object)("port: " + port));
            LOG.info((Object)("forward index: " + indexFile));
            FSDataInputStream in = FileSystem.get((Configuration)conf).open(new Path(indexFile));
            String indexClass = in.readUTF();
            in.close();
            LOG.info((Object)("index class: " + indexClass));
            try {
                sForwardIndex = new IndexableAnchorTextForwardIndex(new ClueWarcDocnoMapping());
                sForwardIndex.loadIndex(new Path(indexFile), new Path(mappingFile), FileSystem.get((Configuration)conf));
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new RuntimeException("Error initializing forward index!");
            }
            for (String s : cluewebIndexFiles) {
                clueweb.add(s.trim());
            }
            ClueWebAnchorTextForwardIndexHttpServer.access$302(new DocumentForwardIndex[clueweb.size()]);
            for (int i = 0; i < clueweb.size(); ++i) {
                in = FileSystem.get((Configuration)conf).open(new Path((String)clueweb.get(i)));
                String indexClueWebClass = in.readUTF();
                in.close();
                try {
                    docForwardIndex[i] = (DocumentForwardIndex)Class.forName(indexClueWebClass).newInstance();
                    docForwardIndex[i].loadIndex(new Path((String)clueweb.get(i)), new Path(mappingFile), FileSystem.get((Configuration)conf));
                    lastDocs[i] = docForwardIndex[i].getLastDocno();
                    continue;
                }
                catch (Exception e) {
                    e.printStackTrace();
                    throw new RuntimeException("Error initializing forward index!");
                }
            }
            Server server = new Server(port);
            Context root = new Context((HandlerContainer)server, "/", 1);
            root.addServlet(new ServletHolder((Servlet)new FetchDocidServlet()), "/fetch_docid");
            root.addServlet(new ServletHolder((Servlet)new FetchDocnoServlet()), "/fetch_docno");
            root.addServlet(new ServletHolder((Servlet)new FetchDocContentServlet()), "/fetch_content");
            root.addServlet(new ServletHolder((Servlet)new HomeServlet()), "/");
            FSDataOutputStream out = FileSystem.get((Configuration)conf).create(tmpPath, true);
            out.writeUTF(host);
            out.close();
            try {
                server.start();
            }
            catch (Exception e) {
                e.printStackTrace();
            }
            while (true) {
                // Infinite loop
            }
        }
    }
}

