/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.handler.extraction;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Locale;
import org.apache.commons.io.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.extraction.SolrContentHandler;
import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
import org.apache.solr.handler.loader.ContentStreamLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.TextSerializer;
import org.apache.xml.serialize.XMLSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class ExtractingDocumentLoader
extends ContentStreamLoader {
    private static final Logger log = LoggerFactory.getLogger(ExtractingDocumentLoader.class);
    public static final String TEXT_FORMAT = "text";
    public static final String XML_FORMAT = "xml";
    private static final XPathParser PARSER = new XPathParser("xhtml", "http://www.w3.org/1999/xhtml");
    final IndexSchema schema;
    final SolrParams params;
    final UpdateRequestProcessor processor;
    final boolean ignoreTikaException;
    protected AutoDetectParser autoDetectParser;
    private final AddUpdateCommand templateAdd;
    protected TikaConfig config;
    protected SolrContentHandlerFactory factory;

    public ExtractingDocumentLoader(SolrQueryRequest req, UpdateRequestProcessor processor, TikaConfig config, SolrContentHandlerFactory factory) {
        this.params = req.getParams();
        this.schema = req.getSchema();
        this.config = config;
        this.processor = processor;
        this.templateAdd = new AddUpdateCommand(req);
        this.templateAdd.overwrite = this.params.getBool("overwrite", true);
        this.templateAdd.commitWithin = this.params.getInt("commitWithin", -1);
        this.autoDetectParser = new AutoDetectParser(config);
        this.factory = factory;
        this.ignoreTikaException = this.params.getBool("ignoreTikaException", false);
    }

    void doAdd(SolrContentHandler handler, AddUpdateCommand template) throws IOException {
        template.solrDoc = handler.newDocument();
        this.processor.processAdd(template);
    }

    void addDoc(SolrContentHandler handler) throws IOException {
        this.templateAdd.clear();
        this.doAdd(handler, this.templateAdd);
    }

    public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
        AutoDetectParser parser = null;
        String streamType = req.getParams().get("stream.type", null);
        if (streamType != null) {
            MediaType mt = MediaType.parse((String)streamType.trim().toLowerCase(Locale.ENGLISH));
            parser = (Parser)new DefaultParser(this.config.getMediaTypeRegistry()).getParsers().get(mt);
        } else {
            parser = this.autoDetectParser;
        }
        if (parser != null) {
            Metadata metadata = new Metadata();
            String resourceName = req.getParams().get("resource.name", null);
            if (resourceName != null) {
                metadata.add("resourceName", resourceName);
            }
            if (stream.getContentType() != null) {
                metadata.add("Content-Type", stream.getContentType());
            }
            InputStream inputStream = null;
            try {
                SolrContentHandler handler;
                inputStream = stream.getStream();
                metadata.add("stream_name", stream.getName());
                metadata.add("stream_source_info", stream.getSourceInfo());
                metadata.add("stream_size", String.valueOf(stream.getSize()));
                metadata.add("stream_content_type", stream.getContentType());
                String charset = ContentStreamBase.getCharsetFromContentType((String)stream.getContentType());
                if (charset != null) {
                    metadata.add("Content-Encoding", charset);
                }
                String xpathExpr = this.params.get("xpath");
                boolean extractOnly = this.params.getBool("extractOnly", false);
                SolrContentHandler parsingHandler = handler = this.factory.createSolrContentHandler(metadata, this.params, this.schema);
                StringWriter writer = null;
                TextSerializer serializer = null;
                if (extractOnly) {
                    String extractFormat = this.params.get("extractFormat", XML_FORMAT);
                    writer = new StringWriter();
                    if (extractFormat.equals(TEXT_FORMAT)) {
                        serializer = new TextSerializer();
                        serializer.setOutputCharStream((Writer)writer);
                        serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true));
                    } else {
                        serializer = new XMLSerializer((Writer)writer, new OutputFormat("XML", "UTF-8", true));
                    }
                    if (xpathExpr != null) {
                        Matcher matcher = PARSER.parse(xpathExpr);
                        serializer.startDocument();
                        parsingHandler = new MatchingContentHandler((ContentHandler)serializer, matcher);
                    } else {
                        parsingHandler = serializer;
                    }
                } else if (xpathExpr != null) {
                    Matcher matcher = PARSER.parse(xpathExpr);
                    parsingHandler = new MatchingContentHandler((ContentHandler)handler, matcher);
                }
                try {
                    ParseContext context = new ParseContext();
                    parser.parse(inputStream, (ContentHandler)parsingHandler, metadata, context);
                }
                catch (TikaException e) {
                    if (this.ignoreTikaException) {
                        log.warn("skip extracting text due to " + e.getLocalizedMessage() + ". metadata=" + metadata.toString());
                    }
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, (Throwable)e);
                }
                if (!extractOnly) {
                    this.addDoc(handler);
                }
                if (xpathExpr != null) {
                    serializer.endDocument();
                }
                rsp.add(stream.getName(), (Object)writer.toString());
                writer.close();
                String[] names = metadata.names();
                NamedList metadataNL = new NamedList();
                for (int i = 0; i < names.length; ++i) {
                    String[] vals = metadata.getValues(names[i]);
                    metadataNL.add(names[i], (Object)vals);
                }
                rsp.add(stream.getName() + "_metadata", (Object)metadataNL);
            }
            catch (SAXException e) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, (Throwable)e);
            }
            finally {
                IOUtils.closeQuietly((InputStream)inputStream);
            }
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stream type of " + streamType + " didn't match any known parsers.  Please supply the " + "stream.type" + " parameter.");
        }
    }
}

