/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.pipeline.filter;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Strings;
import de.tblsoft.solr.pipeline.AbstractFilter;
import de.tblsoft.solr.pipeline.filter.HttpWorker;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlJsoupFilter
extends AbstractFilter {
    private static Logger LOG = LoggerFactory.getLogger(HttpWorker.class);
    protected Document jsoupDocument;
    protected String html;
    protected de.tblsoft.solr.pipeline.bean.Document document;
    protected String metaPrefix = "__meta_";
    protected String htmlField;
    private boolean deleteHtmlField;
    private Map<String, String> mapping;

    @Override
    public void init() {
        this.htmlField = this.getProperty("htmlField", "html");
        this.deleteHtmlField = this.getPropertyAsBoolean("deleteHtmlField", false);
        this.metaPrefix = this.getProperty("metaPrefix", "__meta_");
        List<String> mappingConfiguration = this.getPropertyAsList("mapping", new ArrayList<String>());
        this.mapping = new HashMap<String, String>();
        this.readConfig(mappingConfiguration);
        super.init();
    }

    private void readConfig(List<String> mappingConfiguration) {
        for (String v : mappingConfiguration) {
            String[] s = v.split("->");
            this.mapping.put(s[0], s[1]);
        }
    }

    @Override
    public void document(de.tblsoft.solr.pipeline.bean.Document document) {
        this.document = document;
        try {
            this.html = document.getFieldValue(this.htmlField);
            if (this.html == null) {
                super.document(document);
                return;
            }
            this.jsoupDocument = Jsoup.parse((String)this.html);
            document.addField("canonical", this.getCanonical());
            this.mapFirstElement("title", "title");
            this.mapMeta("description", "description");
            this.mapAllElements("h1", "h1");
            this.mapAllElements("h2", "h2");
            this.mapAllElements("h3", "h3");
            this.mapAllElements("h4", "h4");
            for (Map.Entry<String, String> mappingEntry : this.mapping.entrySet()) {
                this.mapFirstElement(mappingEntry.getValue(), mappingEntry.getKey());
            }
            document.setField("links", this.getAbsoluteLinks());
            document.setField("jsonld", this.getJsonLd());
            this.mapItempropArticleBody();
            this.extractAllMeta();
            if (this.deleteHtmlField) {
                document.deleteField(this.htmlField);
            }
        }
        catch (Exception e) {
            String url = document.getFieldValue("loc");
            LOG.error("There was an error processing the html for url: " + url + " Error: ", (Throwable)e);
        }
        super.document(document);
    }

    public Collection<String> getAbsoluteLinks() {
        HashSet<String> absoluteUrls = new HashSet<String>();
        Elements link = this.jsoupDocument.select("a");
        for (int i = 0; i < link.size(); ++i) {
            String absUrl = ((Element)link.get(i)).absUrl("href");
            absoluteUrls.add(absUrl);
        }
        return absoluteUrls;
    }

    public Collection<String> getJsonLd() {
        ArrayList<String> jsonLdList = new ArrayList<String>();
        Elements jsonLdScripts = this.jsoupDocument.select("script[type=application/ld+json]");
        for (int i = 0; i < jsonLdScripts.size(); ++i) {
            try {
                String jsonLd = ((Element)jsonLdScripts.get(i)).data();
                ObjectMapper objectMapper = new ObjectMapper();
                objectMapper.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
                objectMapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
                objectMapper.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
                JsonNode json = objectMapper.readTree(jsonLd);
                jsonLdList.add(objectMapper.writeValueAsString((Object)json));
                continue;
            }
            catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
        return jsonLdList;
    }

    public String getCanonical() {
        Elements meta = this.jsoupDocument.select("link[rel=canonical]");
        if (meta.size() > 0) {
            return ((Element)meta.get(0)).attr("href");
        }
        return "";
    }

    public void extractAllMeta() {
        Elements meta = this.jsoupDocument.select("meta");
        for (Element element : meta) {
            String name = element.attr("name");
            String property = element.attr("property");
            String itemprop = element.attr("itemprop");
            String content = element.attr("content");
            if (StringUtils.isNotEmpty((CharSequence)name)) {
                this.document.addField("metanames", name);
                this.document.addField("__meta_" + name, content);
            }
            if (StringUtils.isNotEmpty((CharSequence)property)) {
                this.document.addField("propertynames", property);
                property = property.replaceAll(Pattern.quote(":"), "_");
                this.document.addField("__property_" + property, content);
            }
            if (!StringUtils.isNotEmpty((CharSequence)itemprop)) continue;
            this.document.addField("itempropnames", name);
            this.document.addField("__itemprop_" + itemprop, content);
        }
    }

    public void mapFirstElementAttr(Elements element, String attr, String fieldName) {
        if (element.size() > 0) {
            this.document.addField(fieldName, element.attr(attr));
        }
    }

    public void mapAllElements(String selector, String fieldName) {
        Elements elements = this.jsoupDocument.select(selector);
        for (int i = 0; i < elements.size(); ++i) {
            Element element = (Element)elements.get(i);
            StringBuilder value = new StringBuilder();
            for (Element subElements : element.getAllElements()) {
                for (TextNode textNode : subElements.textNodes()) {
                    String text = textNode.text();
                    value.append(text);
                    value.append(" ");
                }
            }
            this.document.addField(fieldName, value.toString().trim());
        }
    }

    public String getFirstElement(String selector) {
        Elements elements = this.jsoupDocument.select(selector);
        if (elements.size() > 0) {
            String value = ((Element)elements.get(0)).text();
            return value;
        }
        return null;
    }

    public void mapFirstElement(String selector, String fieldName) {
        String value = this.getFirstElement(selector);
        if (!Strings.isNullOrEmpty((String)value)) {
            this.document.addField(fieldName, value);
        }
    }

    public void mapMeta(String metaName, String fieldName) {
        Elements meta = this.jsoupDocument.select("meta[name=" + metaName + "]");
        this.mapFirstElementAttr(meta, "content", fieldName);
    }

    public void mapItempropArticleBody() {
        Elements articleBody = this.jsoupDocument.select("[itemprop=articleBody]");
        String text = articleBody.text();
        this.document.addField("articleBody", text);
    }
}

