/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.crawl;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Strings;
import de.tblsoft.solr.crawl.Breadcrumb;
import de.tblsoft.solr.crawl.BreadcrumbEntry;
import de.tblsoft.solr.crawl.Custom;
import de.tblsoft.solr.crawl.Webpage;
import de.tblsoft.solr.crawl.attr.AttributeExtractor;
import de.tblsoft.solr.crawl.attr.AttributeExtractorFactory;
import de.tblsoft.solr.crawl.attr.Attributes;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

public class JSoupAnalyzer {
    private Webpage webpage;
    private long startTime;
    private long responseTime;
    private String url;
    private Document jsoupDocument;

    public JSoupAnalyzer(String url, String html) {
        this.url = url;
        this.webpage = new Webpage();
        this.webpage.setRawHtml(html);
        this.jsoupDocument = Jsoup.parse((String)html);
    }

    public void analyze() {
        this.webpage = new Webpage();
        this.webpage.setUrl(this.url);
        this.webpage.setBaseUrl(this.extractBaseUrl());
        this.webpage.setTitle(this.getFirstElement("title"));
        this.webpage.setMetaDescription(this.getMeta("description"));
        this.webpage.setCanonical(this.getCanonical());
        this.webpage.setH1(this.getAllElements("h1"));
        this.webpage.setH2(this.getAllElements("h2"));
        this.webpage.setH3(this.getAllElements("h3"));
        this.webpage.setH4(this.getAllElements("h4"));
        this.webpage.setH5(this.getAllElements("h5"));
        this.webpage.setH6(this.getAllElements("h6"));
        this.webpage.setLinks(this.getAbsoluteLinks());
        this.webpage.setImages(this.getAbsoluteImages());
        this.webpage.setDomains(this.getDomains());
        this.webpage.setJsonLd(this.getJsonLd());
        this.extractAllMeta();
        this.webpage.setImage(this.extractImage());
        this.webpage.setParseTime(System.currentTimeMillis() - this.startTime);
    }

    public String extractBaseUrl() {
        try {
            URI uri = new URI(this.url);
            return uri.getScheme() + "://" + uri.getHost();
        }
        catch (Exception exception) {
            return null;
        }
    }

    public void extractAttributes(Attributes attributes) {
        AttributeExtractor attributeExtractor = AttributeExtractorFactory.create(attributes.getStrategy(), this);
        attributes.setAttributes(attributeExtractor.extractAttributes(attributes));
        this.webpage.setAttributes(attributes);
    }

    public void extractBreadcrumb(Breadcrumb breadcrumb) {
        if (breadcrumb == null) {
            return;
        }
        Elements aTags = this.jsoupDocument.select(breadcrumb.getSelector()).select("a");
        for (Element aTag : aTags) {
            String href = aTag.attr("href");
            String name = aTag.text();
            BreadcrumbEntry entry = new BreadcrumbEntry();
            entry.setId(href);
            entry.setLink(href);
            entry.setName(name);
            breadcrumb.addBreadcrumbEntry(entry);
        }
        this.webpage.setBreadcrumb(breadcrumb);
    }

    public String extractImage() {
        if (this.webpage.getMeta() != null && this.webpage.getMeta().getProperty() != null) {
            String ogImage = this.webpage.getMeta().getProperty().get("og:image");
            return this.getAbsoluteUrl(ogImage);
        }
        return null;
    }

    public void extractCustom(List<Custom> customs) {
        if (this.webpage.getCustom() == null) {
            this.webpage.setCustom(new HashMap<String, Custom>());
        }
        for (Custom custom : customs) {
            StringBuilder builder = new StringBuilder();
            this.jsoupDocument.select(custom.getJsoupSelector()).forEach(builder::append);
            custom.setHtml(builder.toString());
            Element element = this.jsoupDocument.select(custom.getJsoupSelector()).first();
            if (element != null) {
                for (Attribute attribute : element.attributes().asList()) {
                    custom.putAttribute(attribute.getKey(), attribute.getValue());
                }
            }
            custom.setText(this.jsoupDocument.select(custom.getJsoupSelector()).text());
            this.webpage.getCustom().put(custom.getFieldName(), custom);
        }
    }

    public Webpage getWebpage() {
        if (this.webpage == null) {
            this.analyze();
        }
        return this.webpage;
    }

    public Collection<String> getDomains() {
        HashSet<String> domains = new HashSet<String>();
        ArrayList<String> urls = new ArrayList<String>();
        urls.addAll(this.webpage.getLinks());
        urls.addAll(this.webpage.getImages());
        for (String url : urls) {
            try {
                URI uri = new URI(url);
                String host = uri.getHost();
                if (host == null) continue;
                domains.add(host);
            }
            catch (Exception exception) {}
        }
        return domains;
    }

    public Collection<String> getAbsoluteLinks() {
        HashSet<String> absoluteUrls = new HashSet<String>();
        Elements link = this.jsoupDocument.select("a");
        for (int i = 0; i < link.size(); ++i) {
            String absUrl = ((Element)link.get(i)).absUrl("href");
            absoluteUrls.add(absUrl);
        }
        return absoluteUrls;
    }

    public Collection<String> getAbsoluteImages() {
        HashSet<String> absoluteUrls = new HashSet<String>();
        Elements link = this.jsoupDocument.select("img");
        for (int i = 0; i < link.size(); ++i) {
            String absUrl = ((Element)link.get(i)).absUrl("src");
            absoluteUrls.add(absUrl);
        }
        return absoluteUrls;
    }

    public Collection<String> getJsonLd() {
        ArrayList<String> jsonLdList = new ArrayList<String>();
        Elements jsonLdScripts = this.jsoupDocument.select("script[type=application/ld+json]");
        for (int i = 0; i < jsonLdScripts.size(); ++i) {
            try {
                String jsonLd = ((Element)jsonLdScripts.get(i)).data();
                ObjectMapper objectMapper = new ObjectMapper();
                objectMapper.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
                objectMapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
                objectMapper.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
                JsonNode json = objectMapper.readTree(jsonLd);
                jsonLdList.add(objectMapper.writeValueAsString((Object)json));
                continue;
            }
            catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
        return jsonLdList;
    }

    public String getCanonical() {
        Elements meta = this.jsoupDocument.select("link[rel=canonical]");
        if (meta.size() > 0) {
            return ((Element)meta.get(0)).attr("href");
        }
        return "";
    }

    public String getFirstElementAttr(Elements element, String attr) {
        if (element.size() > 0) {
            return element.attr(attr);
        }
        return null;
    }

    public List<String> getAllElements(String selector) {
        ArrayList<String> allElements = new ArrayList<String>();
        Elements elements = this.jsoupDocument.select(selector);
        for (int i = 0; i < elements.size(); ++i) {
            Element element = (Element)elements.get(i);
            StringBuilder value = new StringBuilder();
            for (Element subElements : element.getAllElements()) {
                for (TextNode textNode : subElements.textNodes()) {
                    String text = textNode.text();
                    value.append(text);
                    value.append(" ");
                }
            }
            allElements.add(value.toString().trim());
        }
        return allElements;
    }

    public String getFirstElement(String selector) {
        Elements elements = this.jsoupDocument.select(selector);
        if (elements.size() > 0) {
            String value = ((Element)elements.get(0)).text();
            return value;
        }
        return null;
    }

    public String getMeta(String metaName) {
        Elements meta = this.jsoupDocument.select("meta[name=" + metaName + "]");
        return this.getFirstElementAttr(meta, "content");
    }

    public String getItempropArticleBody() {
        Elements articleBody = this.jsoupDocument.select("[itemprop=articleBody]");
        String text = articleBody.text();
        return text;
    }

    public void extractAllMeta() {
        Elements meta = this.jsoupDocument.select("meta");
        for (Element element : meta) {
            String name = element.attr("name");
            String property = element.attr("property");
            String itemprop = element.attr("itemprop");
            String content = element.attr("content");
            if (!Strings.isNullOrEmpty((String)name)) {
                this.webpage.getMeta().addName(name, content);
            }
            if (!Strings.isNullOrEmpty((String)property)) {
                this.webpage.getMeta().addProperty(property, content);
            }
            if (Strings.isNullOrEmpty((String)itemprop)) continue;
            this.webpage.getMeta().addProperty(itemprop, content);
        }
    }

    private String getAbsoluteUrl(String url) {
        if (url == null) {
            return null;
        }
        if (url.startsWith("http")) {
            return url;
        }
        if (url.startsWith("/")) {
            return this.webpage.getBaseUrl() + url;
        }
        return url;
    }

    public void setWebpage(Webpage webpage) {
        this.webpage = webpage;
    }

    public long getStartTime() {
        return this.startTime;
    }

    public void setStartTime(long startTime) {
        this.startTime = startTime;
    }

    public long getResponseTime() {
        return this.responseTime;
    }

    public void setResponseTime(long responseTime) {
        this.responseTime = responseTime;
    }

    public String getUrl() {
        return this.url;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    public Document getJsoupDocument() {
        return this.jsoupDocument;
    }

    public void setJsoupDocument(Document jsoupDocument) {
        this.jsoupDocument = jsoupDocument;
    }
}

