/*
 * Decompiled with CFR 0.152.
 */
package org.metafacture.html;

import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.apache.commons.io.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Description(value="Decode HTML to metadata events. The attrValsAsSubfields option can be used to override the default attribute values to be used as subfields (e.g. by default `link rel=\"canonical\" href=\"http://example.org\"` becomes `link.canonical`). It expects an HTTP-style query string specifying as key the attributes whose value should be used as a subfield, and as value the attribute whose value should be the subfield value, e.g. the default contains `link.rel=href`. To use the HTML element text as the value (instead of another attribute), omit the value of the query-string key-value pair, e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, e.g. `&h3.class`")
@In(value=Reader.class)
@Out(value=StreamReceiver.class)
@FluxCommand(value="decode-html")
public class HtmlDecoder
extends DefaultObjectPipe<Reader, StreamReceiver> {
    private static final Logger LOG = LoggerFactory.getLogger(HtmlDecoder.class);
    private static final String DEFAULT_ATTR_VALS_AS_SUBFIELDS = "meta.name=content&meta.property=content&link.rel=href&a.rel=href";
    private Map<String, String> attrValsAsSubfields;

    public HtmlDecoder() {
        this.setAttrValsAsSubfields(DEFAULT_ATTR_VALS_AS_SUBFIELDS);
    }

    public void process(Reader reader) {
        try {
            StreamReceiver receiver = (StreamReceiver)this.getReceiver();
            receiver.startRecord(UUID.randomUUID().toString());
            Document document = Jsoup.parse((String)IOUtils.toString((Reader)reader));
            this.process((Element)document, receiver);
            receiver.endRecord();
        }
        catch (IOException e) {
            LOG.error(e.getMessage(), (Throwable)e);
        }
    }

    private void process(Element parent, StreamReceiver receiver) {
        for (Element element : parent.children()) {
            String value;
            receiver.startEntity(element.nodeName());
            Attributes attributes = element.attributes();
            boolean addedValueAsSubfield = false;
            for (Attribute attribute : attributes) {
                addedValueAsSubfield = this.handleAttributeValuesAsSubfields(receiver, element, attributes, attribute);
                receiver.literal(attribute.getKey(), attribute.getValue());
            }
            String text = element.text().trim();
            String string = value = text.isEmpty() ? element.data() : text;
            if (!value.isEmpty() && !addedValueAsSubfield) {
                receiver.literal("value", value);
            }
            this.process(element, receiver);
            receiver.endEntity();
        }
    }

    private boolean handleAttributeValuesAsSubfields(StreamReceiver receiver, Element element, Attributes attributes, Attribute attribute) {
        String fullFieldKey = element.nodeName() + "." + attribute.getKey();
        if (this.attrValsAsSubfields.containsKey(fullFieldKey)) {
            String configValue = this.attrValsAsSubfields.get(fullFieldKey);
            if (configValue.trim().isEmpty()) {
                receiver.literal(attribute.getValue(), element.text().trim());
                return true;
            }
            String value = attributes.get(configValue);
            receiver.literal(attribute.getValue(), value);
        }
        return false;
    }

    public void setAttrValsAsSubfields(String mapString) {
        this.attrValsAsSubfields = new HashMap<String, String>();
        Object input = mapString.startsWith("&") ? DEFAULT_ATTR_VALS_AS_SUBFIELDS + mapString : mapString;
        for (String nameValuePair : ((String)input).split("&")) {
            String[] nameValue = nameValuePair.split("=");
            try {
                String utf8 = StandardCharsets.UTF_8.name();
                String key = URLDecoder.decode(nameValue[0], utf8);
                String val = nameValue.length > 1 ? URLDecoder.decode(nameValue[1], utf8) : "";
                this.attrValsAsSubfields.put(key, val);
            }
            catch (UnsupportedEncodingException e) {
                LOG.error(e.getMessage(), (Throwable)e);
            }
        }
    }
}

