/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.pipeline.filter;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
import com.jayway.jsonpath.Predicate;
import de.tblsoft.solr.crawl.Breadcrumb;
import de.tblsoft.solr.crawl.Custom;
import de.tblsoft.solr.crawl.JSoupAnalyzer;
import de.tblsoft.solr.crawl.Webpage;
import de.tblsoft.solr.crawl.attr.Attribute;
import de.tblsoft.solr.crawl.attr.Attributes;
import de.tblsoft.solr.http.ElasticHelper;
import de.tblsoft.solr.pipeline.AbstractFilter;
import de.tblsoft.solr.pipeline.bean.Document;
import de.tblsoft.solr.pipeline.filter.HttpWorker;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlFilter
extends AbstractFilter {
    private static Logger LOG = LoggerFactory.getLogger(HttpWorker.class);
    protected String html;
    protected String htmlField;
    protected String urlField;
    protected String attributeStrategy;
    protected List<String> attributeSelector;
    protected String breadCrumbSelector;
    private Map<String, String> webPageMapping;
    private Map<String, String> jsoupMapping;

    @Override
    public void init() {
        this.htmlField = this.getProperty("htmlField", "html");
        this.urlField = this.getProperty("urlField", "url");
        this.attributeStrategy = this.getProperty("attributeStrategy", null);
        this.attributeSelector = this.getPropertyAsList("attributeSelector", null);
        this.breadCrumbSelector = this.getProperty("breadCrumbSelector", null);
        List<String> mappingConfiguration = this.getPropertyAsList("mapping", new ArrayList<String>());
        this.webPageMapping = this.readConfig(mappingConfiguration, "webpage");
        this.jsoupMapping = this.readConfig(mappingConfiguration, "jsoup");
        super.init();
    }

    private Map<String, String> readConfig(List<String> mappingConfiguration, String prefix) {
        HashMap<String, String> mapping = new HashMap<String, String>();
        for (String v : mappingConfiguration) {
            if (!v.startsWith(prefix)) continue;
            v = v.replaceFirst(prefix + ":", "");
            String[] s = v.split("->");
            mapping.put(s[0], s[1]);
        }
        return mapping;
    }

    @Override
    public void document(Document document) {
        String url = document.getFieldValue(this.urlField);
        String html = document.getFieldValue(this.htmlField);
        JSoupAnalyzer jSoupAnalyzer = new JSoupAnalyzer(url, html);
        Attributes attributes = new Attributes();
        attributes.setStrategy(this.attributeStrategy);
        attributes.setSelector(this.attributeSelector);
        jSoupAnalyzer.analyze();
        jSoupAnalyzer.extractAttributes(attributes);
        if (StringUtils.isNotEmpty((CharSequence)this.breadCrumbSelector)) {
            Breadcrumb breadcrumb = new Breadcrumb();
            breadcrumb.setSelector(this.breadCrumbSelector);
            jSoupAnalyzer.extractBreadcrumb(breadcrumb);
        }
        ArrayList<Custom> customs = new ArrayList<Custom>();
        for (Map.Entry<String, String> entry : this.jsoupMapping.entrySet()) {
            Custom custom = new Custom();
            custom.setJsoupSelector(entry.getKey());
            custom.setFieldName(entry.getValue());
            customs.add(custom);
        }
        jSoupAnalyzer.extractCustom(customs);
        Webpage webpage = jSoupAnalyzer.getWebpage();
        ObjectMapper objectMapper = new ObjectMapper();
        StringWriter writer = new StringWriter();
        try {
            objectMapper.writeValue((Writer)writer, (Object)webpage);
            DocumentContext context = JsonPath.using((Configuration)Configuration.defaultConfiguration().addOptions(new Option[]{Option.SUPPRESS_EXCEPTIONS})).parse(writer.toString());
            for (Map.Entry<String, String> mapping : this.webPageMapping.entrySet()) {
                String jsonPath = mapping.getKey();
                String fieldName = mapping.getValue();
                Object value = context.read(jsonPath, new Predicate[0]);
                if (value == null) continue;
                document.setField(fieldName, value);
            }
            if (webpage.getAttributes() != null && webpage.getAttributes().getAttributes() != null) {
                for (Attribute attribute : webpage.getAttributes().getAttributes()) {
                    String key = ElasticHelper.normalizeKey(attribute.getName());
                    document.addField("attributes", attribute.getValue());
                    document.setField("attr_" + key, attribute.getValue());
                    document.addField("datatypes", key + "=" + ElasticHelper.guessDatatype(attribute.getValue()));
                    document.addField("attributeKeys", key + "=" + attribute.getName());
                }
            }
            super.document(document);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

