/*
 * Decompiled with CFR 0.152.
 */
package org.apache.jackrabbit.oak.plugins.index.search.spi.binary;

import com.google.common.collect.Lists;
import com.google.common.io.ByteSource;
import com.google.common.io.CountingInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeoutException;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.BlobByteSource;
import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.TextExtractionStats;
import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.TikaParserConfig;
import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditorContext;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.WriteOutContentHandler;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class FulltextBinaryTextExtractor {
    private static final Logger log = LoggerFactory.getLogger(FulltextBinaryTextExtractor.class);
    private static final Parser defaultParser = FulltextBinaryTextExtractor.createDefaultParser();
    private static final long SMALL_BINARY = Long.getLong("oak.search.smallBinary", 16384L);
    private final TextExtractionStats textExtractionStats = new TextExtractionStats();
    private final ExtractedTextCache extractedTextCache;
    private final IndexDefinition definition;
    private final boolean reindex;
    private Parser parser;
    private TikaConfigHolder tikaConfig;
    private Set<MediaType> supportedMediaTypes;
    private Set<MediaType> nonIndexedMediaType;

    public FulltextBinaryTextExtractor(ExtractedTextCache extractedTextCache, IndexDefinition definition, boolean reindex) {
        this.extractedTextCache = extractedTextCache;
        this.definition = definition;
        this.reindex = reindex;
    }

    public void done(boolean reindex) {
        this.textExtractionStats.log(reindex);
        this.textExtractionStats.collectStats(this.extractedTextCache);
    }

    public List<String> newBinary(PropertyState property, NodeState state, String path) {
        String encoding;
        ArrayList values = Lists.newArrayList();
        Metadata metadata = new Metadata();
        String type = state.getString("jcr:mimeType");
        if ((type = this.definition.getTikaMappedMimeType(type)) == null || !this.isSupportedMediaType(type)) {
            log.trace("[{}] Ignoring binary content for node {} due to unsupported (or null) jcr:mimeType [{}]", new Object[]{this.getIndexName(), path, type});
            return values;
        }
        metadata.set("Content-Type", type);
        if ("jcr:data".equals(property.getName()) && (encoding = state.getString("jcr:encoding")) != null) {
            metadata.set("Content-Encoding", encoding);
        }
        for (Blob v : (Iterable)property.getValue(Type.BINARIES)) {
            String value = this.parseStringValue(v, metadata, path, property.getName());
            if (value == null) continue;
            values.add(value);
        }
        return values;
    }

    private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
        String text = this.extractedTextCache.get(path, propertyName, v, this.reindex);
        if (text == null) {
            text = this.parseStringValue0(v, metadata, path);
        }
        return text;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private String parseStringValue0(Blob v, final Metadata metadata, String path) {
        final WriteOutContentHandler handler = new WriteOutContentHandler(this.definition.getMaxExtractLength());
        long start = System.currentTimeMillis();
        long bytesRead = 0L;
        long length = v.length();
        if (log.isDebugEnabled()) {
            log.debug("Extracting {}, {} bytes, id {}", new Object[]{path, length, v.getContentIdentity()});
        }
        try {
            final CountingInputStream stream = new CountingInputStream((InputStream)new LazyInputStream((ByteSource)new BlobByteSource(v)));
            try {
                if (length > SMALL_BINARY) {
                    String name = "Extracting " + path + ", " + length + " bytes";
                    this.extractedTextCache.process(name, new Callable<Void>(){

                        @Override
                        public Void call() throws Exception {
                            FulltextBinaryTextExtractor.this.getParser().parse((InputStream)stream, (ContentHandler)handler, metadata, new ParseContext());
                            return null;
                        }
                    });
                } else {
                    this.getParser().parse((InputStream)stream, (ContentHandler)handler, metadata, new ParseContext());
                }
            }
            finally {
                bytesRead = stream.getCount();
                stream.close();
            }
        }
        catch (LinkageError e) {
            log.debug("[{}] Failed to extract text from a binary property: {}. This often happens when some media types are disabled by configuration. The stack trace is included to flag some 'unintended' failures", new Object[]{this.getIndexName(), path, e});
            this.extractedTextCache.put(v, ExtractedText.ERROR);
            return "TextExtractionError";
        }
        catch (TimeoutException t) {
            log.warn("[{}] Failed to extract text from a binary property due to timeout: {}.", (Object)this.getIndexName(), (Object)path);
            this.extractedTextCache.put(v, ExtractedText.ERROR);
            this.extractedTextCache.putTimeout(v, ExtractedText.ERROR);
            return "TextExtractionError";
        }
        catch (Throwable t) {
            if (!handler.isWriteLimitReached(t)) {
                log.debug("[{}] Failed to extract text from a binary property: {}. This is a fairly common case, and nothing to worry about. The stack trace is included to help improve the text extraction feature.", new Object[]{this.getIndexName(), path, t});
                this.extractedTextCache.put(v, ExtractedText.ERROR);
                return "TextExtractionError";
            }
            log.debug("Extracted text size exceeded configured limit({})", (Object)this.definition.getMaxExtractLength());
        }
        String result = handler.toString();
        if (bytesRead > 0L) {
            long time = System.currentTimeMillis() - start;
            int len = result.length();
            this.recordTextExtractionStats(time, bytesRead, len);
            if (log.isDebugEnabled()) {
                log.debug("Extracting {} took {} ms, {} bytes read, {} text size", new Object[]{path, time, bytesRead, len});
            }
        }
        this.extractedTextCache.put(v, new ExtractedText(ExtractedText.ExtractionResult.SUCCESS, (CharSequence)result));
        return result;
    }

    private void recordTextExtractionStats(long timeInMillis, long bytesRead, int textLength) {
        this.textExtractionStats.addStats(timeInMillis, bytesRead, textLength);
    }

    private String getIndexName() {
        return this.definition.getIndexName();
    }

    public TikaConfig getTikaConfig() {
        if (this.tikaConfig == null) {
            this.tikaConfig = FulltextBinaryTextExtractor.initializeTikaConfig(this.definition);
        }
        return this.tikaConfig.config;
    }

    private Parser getParser() {
        if (this.parser == null) {
            this.parser = this.initializeTikaParser(this.definition);
        }
        return this.parser;
    }

    private boolean isSupportedMediaType(String type) {
        MediaType mediaType;
        if (this.supportedMediaTypes == null) {
            this.supportedMediaTypes = this.getParser().getSupportedTypes(new ParseContext());
            this.nonIndexedMediaType = this.getNonIndexedMediaTypes();
        }
        return this.supportedMediaTypes.contains(mediaType = MediaType.parse((String)type)) && !this.nonIndexedMediaType.contains(mediaType);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Set<MediaType> getNonIndexedMediaTypes() {
        InputStream configStream;
        block7: {
            Object configUrl;
            configStream = null;
            String configSource = null;
            try {
                if (this.definition.hasCustomTikaConfig()) {
                    configSource = String.format("Custom config at %s", this.definition.getIndexPath());
                    configStream = this.definition.getTikaConfig();
                } else {
                    configUrl = FulltextIndexEditorContext.class.getResource("tika-config.xml");
                    configSource = "Default : tika-config.xml";
                    if (configUrl != null) {
                        configStream = ((URL)configUrl).openStream();
                    }
                }
                if (configStream == null) break block7;
                configUrl = TikaParserConfig.getNonIndexedMediaTypes(configStream);
            }
            catch (IOException | TikaException | SAXException e) {
                try {
                    log.warn("Tika configuration not available : " + configSource, e);
                }
                catch (Throwable throwable) {
                    IOUtils.closeQuietly(configStream);
                    throw throwable;
                }
                IOUtils.closeQuietly((InputStream)configStream);
            }
            IOUtils.closeQuietly((InputStream)configStream);
            return configUrl;
        }
        IOUtils.closeQuietly((InputStream)configStream);
        return Collections.emptySet();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition definition) {
        block9: {
            ClassLoader current = Thread.currentThread().getContextClassLoader();
            InputStream configStream = null;
            String configSource = null;
            try {
                Object configUrl;
                Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
                if (definition != null && definition.hasCustomTikaConfig()) {
                    log.debug("[{}] Using custom tika config", (Object)definition.getIndexName());
                    configSource = "Custom config at " + definition.getIndexPath();
                    configStream = definition.getTikaConfig();
                } else {
                    configUrl = FulltextIndexEditorContext.class.getResource("tika-config.xml");
                    if (configUrl != null) {
                        configSource = ((URL)configUrl).toString();
                        configStream = ((URL)configUrl).openStream();
                    }
                }
                if (configStream != null) {
                    configUrl = new TikaConfigHolder(new TikaConfig(configStream), configSource);
                    IOUtils.closeQuietly((InputStream)configStream);
                    Thread.currentThread().setContextClassLoader(current);
                    return configUrl;
                }
                IOUtils.closeQuietly(configStream);
            }
            catch (IOException | TikaException | SAXException e) {
                log.warn("Tika configuration not available : " + configSource, e);
                break block9;
            }
            finally {
                IOUtils.closeQuietly(configStream);
                Thread.currentThread().setContextClassLoader(current);
            }
            Thread.currentThread().setContextClassLoader(current);
        }
        return new TikaConfigHolder(TikaConfig.getDefaultConfig(), "Default Config");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Parser initializeTikaParser(IndexDefinition definition) {
        ClassLoader current = Thread.currentThread().getContextClassLoader();
        try {
            if (definition.hasCustomTikaConfig()) {
                Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
                AutoDetectParser autoDetectParser = new AutoDetectParser(this.getTikaConfig());
                return autoDetectParser;
            }
        }
        finally {
            Thread.currentThread().setContextClassLoader(current);
        }
        return defaultParser;
    }

    private static AutoDetectParser createDefaultParser() {
        ClassLoader current = Thread.currentThread().getContextClassLoader();
        TikaConfigHolder configHolder = null;
        try {
            configHolder = FulltextBinaryTextExtractor.initializeTikaConfig(null);
            Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
            log.info("Loaded default Tika Config from classpath {}", (Object)configHolder);
            AutoDetectParser autoDetectParser = new AutoDetectParser(configHolder.config);
            return autoDetectParser;
        }
        catch (Exception e) {
            log.warn("Tika configuration not available : " + configHolder, (Throwable)e);
        }
        finally {
            Thread.currentThread().setContextClassLoader(current);
        }
        return new AutoDetectParser();
    }

    private static final class TikaConfigHolder {
        final TikaConfig config;
        final String sourceInfo;

        public TikaConfigHolder(TikaConfig config, String sourceInfo) {
            this.config = config;
            this.sourceInfo = sourceInfo;
        }

        public String toString() {
            return this.sourceInfo;
        }
    }
}

