/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.BufferedInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.xpath.CachedXPathAPI;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.ExtractException;
import org.codelibs.fess.crawler.extractor.Extractor;
import org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class HtmlXpathExtractor
extends AbstractXmlExtractor
implements Extractor {
    protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>", 10);
    protected Map<String, String> featureMap = new HashMap<String, String>();
    protected Map<String, String> propertyMap = new HashMap<String, String>();
    protected String targetNodePath = "//HTML/BODY | //@alt | //@title";
    private final ThreadLocal<CachedXPathAPI> xpathAPI = new ThreadLocal();

    @Override
    public ExtractData getText(InputStream in, Map<String, String> params) {
        if (in == null) {
            throw new CrawlerSystemException("The inputstream is null.");
        }
        try {
            BufferedInputStream bis = new BufferedInputStream(in);
            String enc = this.getEncoding(bis);
            DOMParser parser = this.getDomParser();
            InputSource inputSource = new InputSource(bis);
            inputSource.setEncoding(enc);
            parser.parse(inputSource);
            Document document = parser.getDocument();
            StringBuilder buf = new StringBuilder(255);
            NodeList nodeList = this.getXPathAPI().selectNodeList((Node)document, this.targetNodePath);
            for (int i = 0; i < nodeList.getLength(); ++i) {
                Node node = nodeList.item(i);
                buf.append(node.getTextContent()).append(' ');
            }
            return new ExtractData(buf.toString().replaceAll("\\s+", " ").trim());
        }
        catch (Exception e) {
            throw new ExtractException(e);
        }
    }

    protected CachedXPathAPI getXPathAPI() {
        CachedXPathAPI cachedXPathAPI = this.xpathAPI.get();
        if (cachedXPathAPI == null) {
            cachedXPathAPI = new CachedXPathAPI();
            this.xpathAPI.set(cachedXPathAPI);
        }
        return cachedXPathAPI;
    }

    protected DOMParser getDomParser() {
        DOMParser parser = new DOMParser();
        try {
            for (Map.Entry<String, String> entry : this.featureMap.entrySet()) {
                parser.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
            }
            for (Map.Entry<String, String> entry : this.propertyMap.entrySet()) {
                parser.setProperty(entry.getKey(), (Object)entry.getValue());
            }
        }
        catch (Exception e) {
            throw new CrawlerSystemException("Invalid parser configuration.", e);
        }
        return parser;
    }

    @Override
    protected Pattern getEncodingPattern() {
        return this.metaCharsetPattern;
    }

    @Override
    protected Pattern getTagPattern() {
        return null;
    }

    public void addFeature(String key, String value) {
        if (StringUtil.isBlank((String)key) || StringUtil.isBlank((String)value)) {
            throw new CrawlerSystemException("key or value is null.");
        }
        this.featureMap.put(key, value);
    }

    public void addProperty(String key, String value) {
        if (StringUtil.isBlank((String)key) || StringUtil.isBlank((String)value)) {
            throw new CrawlerSystemException("key or value is null.");
        }
        this.propertyMap.put(key, value);
    }

    public Map<String, String> getFeatureMap() {
        return this.featureMap;
    }

    public void setFeatureMap(Map<String, String> featureMap) {
        this.featureMap = featureMap;
    }

    public Map<String, String> getPropertyMap() {
        return this.propertyMap;
    }

    public void setPropertyMap(Map<String, String> propertyMap) {
        this.propertyMap = propertyMap;
    }

    public Pattern getMetaCharsetPattern() {
        return this.metaCharsetPattern;
    }

    public void setMetaCharsetPattern(Pattern metaCharsetPattern) {
        this.metaCharsetPattern = metaCharsetPattern;
    }

    public String getTargetNodePath() {
        return this.targetNodePath;
    }

    public void setTargetNodePath(String targetNodePath) {
        this.targetNodePath = targetNodePath;
    }
}

