/*
 * Decompiled with CFR 0.152.
 */
package org.archive.resource.html;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.format.text.html.ParseObserver;
import org.archive.resource.html.HTMLMetaData;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.nodes.TextNode;

public class ExtractingParseObserver
implements ParseObserver {
    HTMLMetaData data;
    Stack<ArrayList<String>> openAnchors;
    Stack<StringBuilder> openAnchorTexts;
    String title = null;
    boolean inTitle = false;
    protected static String cssUrlPatString = "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)";
    protected static String cssImportNoUrlPatString = "@import\\s+(('[^']+')|(\"[^\"]+\")|(\\('[^']+'\\))|(\\(\"[^\"]+\"\\))|(\\([^)]+\\))|([a-z0-9_.:/\\\\-]+))\\s*;";
    protected static Pattern cssImportNoUrlPattern = Pattern.compile(cssImportNoUrlPatString);
    protected static Pattern cssUrlPattern = Pattern.compile(cssUrlPatString);
    private static final int MAX_TEXT_LEN = 100;
    private static final String PATH = "path";
    private static final String PATH_SEPARATOR = "@/";
    private static final Map<String, TagExtractor> extractors = new HashMap<String, TagExtractor>();

    public ExtractingParseObserver(HTMLMetaData data) {
        this.data = data;
        this.openAnchors = new Stack();
        this.openAnchorTexts = new Stack();
    }

    @Override
    public void handleDocumentStart() {
    }

    @Override
    public void handleDocumentComplete() {
    }

    @Override
    public void handleTagEmpty(TagNode tag) {
        this.handleTagOpen(tag);
    }

    @Override
    public void handleTagOpen(TagNode tag) {
        TagExtractor extractor;
        String name = tag.getTagName();
        if (name.equals("TITLE")) {
            this.inTitle = !tag.isEmptyXmlTag();
            return;
        }
        String v = tag.getAttribute("background");
        if (v != null) {
            this.data.addHref(PATH, ExtractingParseObserver.makePath(name, "background"), "url", v);
        }
        if ((extractor = extractors.get(name)) != null) {
            extractor.extract(this.data, tag, this);
        }
    }

    @Override
    public void handleTagClose(TagNode tag) {
        if (this.inTitle) {
            this.inTitle = false;
            this.data.setTitle(this.title);
            this.title = null;
            return;
        }
        if (tag.getTagName().equals("A") && this.openAnchors.size() > 0) {
            ArrayList<String> vals = this.openAnchors.pop();
            StringBuilder text = this.openAnchorTexts.pop();
            if (vals != null && vals.size() > 0) {
                if (text != null) {
                    String trimmed = text.toString().trim().replaceAll("\\s+", " ");
                    if (trimmed.length() > 100) {
                        trimmed = trimmed.substring(0, 100);
                    }
                    if (trimmed.length() > 0) {
                        vals.add("text");
                        vals.add(trimmed);
                    }
                }
                this.data.addHref(vals);
            }
        }
    }

    @Override
    public void handleTextNode(TextNode text) {
        String t = text.getText().replaceAll("\\s+", " ");
        if (t.length() > 100) {
            t = t.substring(0, 100);
        }
        if (this.inTitle) {
            this.title = t;
        } else {
            StringBuilder s;
            Iterator i$ = this.openAnchorTexts.iterator();
            while (i$.hasNext() && (s = (StringBuilder)i$.next()).length() < 100) {
                if (s.length() + t.length() < 100) {
                    s.append(t);
                    continue;
                }
                s.append(t.substring(0, 100 - s.length()));
            }
        }
    }

    @Override
    public void handleScriptNode(TextNode text) {
    }

    @Override
    public void handleStyleNode(TextNode text) {
        this.patternCSSExtract(this.data, cssUrlPattern, text.getText());
        this.patternCSSExtract(this.data, cssImportNoUrlPattern, text.getText());
    }

    @Override
    public void handleRemarkNode(RemarkNode remark) {
    }

    private static String makePath(String tag, String attr) {
        StringBuilder sb = new StringBuilder(tag.length() + PATH_SEPARATOR.length() + attr.length());
        return sb.append(tag).append(PATH_SEPARATOR).append(attr).toString();
    }

    private static void addBasicHrefs(HTMLMetaData data, TagNode node, String ... attrs) {
        for (String attr : attrs) {
            String val = node.getAttribute(attr);
            if (val == null) continue;
            data.addHref(PATH, ExtractingParseObserver.makePath(node.getTagName(), attr), "url", val);
        }
    }

    private static ArrayList<String> getAttrList(TagNode node, String ... attrs) {
        ArrayList<String> l = new ArrayList<String>();
        for (String attr : attrs) {
            String val = node.getAttribute(attr);
            if (val == null) continue;
            l.add(attr);
            l.add(val);
        }
        if (l.size() == 0) {
            return null;
        }
        return l;
    }

    private static ArrayList<String> getAttrListUrl(TagNode node, String urlAttr, String ... optionalAttrs) {
        String url = node.getAttribute(urlAttr);
        ArrayList<String> l = null;
        if (url != null) {
            l = new ArrayList<String>();
            l.add(PATH);
            l.add(ExtractingParseObserver.makePath(node.getTagName(), urlAttr));
            l.add("url");
            l.add(url);
            for (String attr : optionalAttrs) {
                String val = node.getAttribute(attr);
                if (val == null) continue;
                l.add(attr);
                l.add(val);
            }
        }
        return l;
    }

    private static void addHrefWithAttrs(HTMLMetaData data, TagNode node, String hrefAttr, String ... optionalAttrs) {
        ArrayList<String> l = ExtractingParseObserver.getAttrListUrl(node, hrefAttr, optionalAttrs);
        if (l != null) {
            data.addHref(l);
        }
    }

    private void patternCSSExtract(HTMLMetaData data, Pattern pattern, String content) {
        Matcher m = pattern.matcher(content);
        int idx = 0;
        int contentLen = content.length();
        while (idx < contentLen && m.find(idx)) {
            int urlEnd;
            String url = m.group(1);
            int origUrlLength = url.length();
            int urlStart = m.start(1);
            idx = urlEnd = m.end(1);
            if (url.length() < 2) continue;
            if (url.charAt(0) == '(' && url.charAt(origUrlLength - 1) == ')') {
                url = url.substring(1, origUrlLength - 1);
                ++urlStart;
                origUrlLength -= 2;
            }
            if (url.charAt(0) == '\"') {
                url = url.substring(1, origUrlLength - 1);
                ++urlStart;
            } else if (url.charAt(0) == '\'') {
                url = url.substring(1, origUrlLength - 1);
                ++urlStart;
            } else if (url.charAt(0) == '\\') {
                if (url.length() == 2) continue;
                url = url.substring(2, origUrlLength - 2);
                urlStart += 2;
            }
            int urlLength = url.length();
            data.addHref(PATH, "STYLE/#text", "href", url);
            idx += urlLength;
        }
    }

    static {
        extractors.put("A", new AnchorTagExtractor());
        extractors.put("APPLET", new AppletTagExtractor());
        extractors.put("AREA", new AreaTagExtractor());
        extractors.put("BASE", new BaseTagExtractor());
        extractors.put("EMBED", new EmbedTagExtractor());
        extractors.put("FORM", new FormTagExtractor());
        extractors.put("FRAME", new FrameTagExtractor());
        extractors.put("IFRAME", new IFrameTagExtractor());
        extractors.put("IMG", new ImgTagExtractor());
        extractors.put("INPUT", new InputTagExtractor());
        extractors.put("LINK", new LinkTagExtractor());
        extractors.put("META", new MetaTagExtractor());
        extractors.put("OBJECT", new ObjectTagExtractor());
        extractors.put("SCRIPT", new ScriptTagExtractor());
    }

    private static class ScriptTagExtractor
    implements TagExtractor {
        private ScriptTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ArrayList l = ExtractingParseObserver.getAttrListUrl(node, "src", new String[]{"type"});
            if (l != null) {
                data.addScript(l);
            }
        }
    }

    private static class ObjectTagExtractor
    implements TagExtractor {
        private ObjectTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"codebase", "cdata"});
        }
    }

    private static class MetaTagExtractor
    implements TagExtractor {
        private MetaTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ArrayList l = ExtractingParseObserver.getAttrList(node, new String[]{"name", "rel", "content", "http-equiv"});
            if (l != null) {
                data.addMeta(l);
            }
        }
    }

    private static class LinkTagExtractor
    implements TagExtractor {
        private LinkTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ArrayList l = ExtractingParseObserver.getAttrListUrl(node, "href", new String[]{"rel", "type"});
            if (l != null) {
                data.addLink(l);
            }
        }
    }

    private static class InputTagExtractor
    implements TagExtractor {
        private InputTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"src"});
        }
    }

    private static class ImgTagExtractor
    implements TagExtractor {
        private ImgTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addHrefWithAttrs(data, node, "src", new String[]{"alt", "title"});
        }
    }

    private static class IFrameTagExtractor
    implements TagExtractor {
        private IFrameTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"src"});
        }
    }

    private static class FrameTagExtractor
    implements TagExtractor {
        private FrameTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"src"});
        }
    }

    private static class FormTagExtractor
    implements TagExtractor {
        private FormTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ArrayList<String> l = new ArrayList<String>();
            String url = node.getAttribute("action");
            if (url != null) {
                l.add(ExtractingParseObserver.PATH);
                l.add(ExtractingParseObserver.makePath("FORM", "action"));
                l.add("url");
                l.add(url);
                for (String a : new String[]{"target", "method"}) {
                    String v = node.getAttribute(a);
                    if (v == null) continue;
                    l.add(a);
                    l.add(v);
                }
                data.addHref(l);
            }
        }
    }

    private static class EmbedTagExtractor
    implements TagExtractor {
        private EmbedTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"src"});
        }
    }

    private static class BaseTagExtractor
    implements TagExtractor {
        private BaseTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            String url = node.getAttribute("href");
            if (url != null) {
                data.setBaseHref(url);
            }
        }
    }

    private static class AreaTagExtractor
    implements TagExtractor {
        private AreaTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"href"});
        }
    }

    private static class AppletTagExtractor
    implements TagExtractor {
        private AppletTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ExtractingParseObserver.addBasicHrefs(data, node, new String[]{"codebase", "cdata"});
        }
    }

    private static class AnchorTagExtractor
    implements TagExtractor {
        private AnchorTagExtractor() {
        }

        @Override
        public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) {
            ArrayList<String> l = new ArrayList<String>();
            String url = node.getAttribute("href");
            if (url != null) {
                l.add(ExtractingParseObserver.PATH);
                l.add(ExtractingParseObserver.makePath("A", "href"));
                l.add("url");
                l.add(url);
                for (String a : new String[]{"target", "alt", "title"}) {
                    String v = node.getAttribute(a);
                    if (v == null) continue;
                    l.add(a);
                    l.add(v);
                }
            }
            if (node.isEmptyXmlTag()) {
                data.addHref(l);
            } else {
                obs.openAnchors.push(l);
                obs.openAnchorTexts.push(new StringBuilder());
            }
        }
    }

    private static interface TagExtractor {
        public void extract(HTMLMetaData var1, TagNode var2, ExtractingParseObserver var3);
    }
}

