/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.cloud9.collection.wikipedia;

import com.google.common.base.Function;
import com.google.common.collect.Lists;
import edu.umd.cloud9.collection.Indexable;
import info.bliki.wiki.filter.ITextConverter;
import info.bliki.wiki.filter.PlainTextConverter;
import info.bliki.wiki.model.WikiModel;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.io.WritableUtils;

public abstract class WikipediaPage
extends Indexable {
    public static final String XML_START_TAG = "<page>";
    public static final String XML_END_TAG = "</page>";
    protected static final String XML_START_TAG_TITLE = "<title>";
    protected static final String XML_END_TAG_TITLE = "</title>";
    protected static final String XML_START_TAG_NAMESPACE = "<ns>";
    protected static final String XML_END_TAG_NAMESPACE = "</ns>";
    protected static final String XML_START_TAG_ID = "<id>";
    protected static final String XML_END_TAG_ID = "</id>";
    protected static final String XML_START_TAG_TEXT = "<text xml:space=\"preserve\">";
    protected static final String XML_END_TAG_TEXT = "</text>";
    protected String page;
    protected String title;
    protected String mId;
    protected int textStart;
    protected int textEnd;
    protected boolean isRedirect;
    protected boolean isDisambig;
    protected boolean isStub;
    protected boolean isArticle;
    protected String language;
    private WikiModel wikiModel = new WikiModel("", "");
    private PlainTextConverter textConverter = new PlainTextConverter();
    private static final Pattern REF = Pattern.compile("<ref>.*?</ref>");
    private static final Pattern LANG_LINKS = Pattern.compile("\\[\\[[a-z\\-]+:[^\\]]+\\]\\]");
    private static final Pattern DOUBLE_CURLY = Pattern.compile("\\{\\{.*?\\}\\}");
    private static final Pattern URL = Pattern.compile("http://[^ <]+");
    private static final Pattern HTML_TAG = Pattern.compile("<[^!][^>]*>");
    private static final Pattern HTML_COMMENT = Pattern.compile("<!--.*?-->", 32);

    public void write(DataOutput out) throws IOException {
        byte[] bytes = this.page.getBytes("UTF-8");
        WritableUtils.writeVInt((DataOutput)out, (int)bytes.length);
        out.write(bytes, 0, bytes.length);
        out.writeUTF(this.language == null ? "unk" : this.language);
    }

    public void readFields(DataInput in) throws IOException {
        int length = WritableUtils.readVInt((DataInput)in);
        byte[] bytes = new byte[length];
        in.readFully(bytes, 0, length);
        WikipediaPage.readPage(this, new String(bytes, "UTF-8"));
        this.language = in.readUTF();
    }

    @Override
    public String getDocid() {
        return this.mId;
    }

    @Deprecated
    public void setLanguage(String language) {
        this.language = language;
    }

    public String getLanguage() {
        return this.language;
    }

    @Override
    public String getContent() {
        String s = this.getWikiMarkup();
        s = LANG_LINKS.matcher(s).replaceAll(" ");
        this.wikiModel.setUp();
        s = this.getTitle() + "\n" + this.wikiModel.render((ITextConverter)this.textConverter, s);
        this.wikiModel.tearDown();
        s = StringEscapeUtils.unescapeHtml((String)StringEscapeUtils.unescapeHtml((String)s));
        s = REF.matcher(s).replaceAll(" ");
        s = HTML_COMMENT.matcher(s).replaceAll(" ");
        s = URL.matcher(s).replaceAll(" ");
        s = DOUBLE_CURLY.matcher(s).replaceAll(" ");
        s = HTML_TAG.matcher(s).replaceAll(" ");
        return s;
    }

    @Override
    public String getDisplayContent() {
        this.wikiModel.setUp();
        String s = "<h1>" + this.getTitle() + "</h1>\n" + this.wikiModel.render(this.getWikiMarkup());
        this.wikiModel.tearDown();
        s = DOUBLE_CURLY.matcher(s).replaceAll(" ");
        return s;
    }

    @Override
    public String getDisplayContentType() {
        return "text/html";
    }

    public String getRawXML() {
        return this.page;
    }

    public String getWikiMarkup() {
        if (this.textStart == -1) {
            return null;
        }
        return this.page.substring(this.textStart + 27, this.textEnd);
    }

    public String getTitle() {
        return this.title;
    }

    public boolean isDisambiguation() {
        return this.isDisambig;
    }

    public boolean isRedirect() {
        return this.isRedirect;
    }

    public boolean isEmpty() {
        return this.textStart == -1;
    }

    public boolean isStub() {
        return this.isStub;
    }

    public boolean isArticle() {
        return this.isArticle;
    }

    public String findInterlanguageLink(String lang) {
        int start = this.page.indexOf("[[" + lang + ":");
        if (start < 0) {
            return null;
        }
        int end = this.page.indexOf("]]", start);
        if (end < 0) {
            return null;
        }
        String link = this.page.substring(start + 3 + lang.length(), end);
        if (link.indexOf("\n") != -1) {
            return null;
        }
        if (link.length() == 0) {
            return null;
        }
        return link;
    }

    public List<Link> extractLinks() {
        int end;
        int start = 0;
        ArrayList links = Lists.newArrayList();
        while ((start = this.page.indexOf("[[", start)) >= 0 && (end = this.page.indexOf("]]", start)) >= 0) {
            String text = this.page.substring(start + 2, end);
            String anchor = null;
            if (text.length() == 0) {
                start = end + 1;
                continue;
            }
            if (text.indexOf(":") != -1) {
                start = end + 1;
                continue;
            }
            int a = text.indexOf("|");
            if (a != -1) {
                anchor = text.substring(a + 1, text.length());
                text = text.substring(0, a);
            }
            if ((a = text.indexOf("#")) != -1) {
                text = text.substring(0, a);
            }
            if (text.length() == 0) {
                start = end + 1;
                continue;
            }
            if (anchor == null) {
                anchor = text;
            }
            links.add(new Link(anchor, text));
            start = end + 1;
        }
        return links;
    }

    public List<String> extractLinkTargets() {
        return Lists.transform(this.extractLinks(), (Function)new Function<Link, String>(){

            @Nullable
            public String apply(@Nullable Link link) {
                return link.getTarget();
            }
        });
    }

    public static void readPage(WikipediaPage page, String s) {
        page.page = s;
        page.processPage(s);
    }

    protected abstract void processPage(String var1);

    public static class Link {
        private String anchor;
        private String target;

        private Link(String anchor, String target) {
            this.anchor = anchor;
            this.target = target;
        }

        public String getAnchorText() {
            return this.anchor;
        }

        public String getTarget() {
            return this.target;
        }

        public String toString() {
            return String.format("[target: %s, anchor: %s]", this.target, this.anchor);
        }
    }
}

