/*
 * Decompiled with CFR 0.152.
 */
package com.liferay.portal.tika.internal.extract;

import com.liferay.petra.concurrent.NoticeableFuture;
import com.liferay.petra.io.StreamUtil;
import com.liferay.petra.io.unsync.UnsyncBufferedInputStream;
import com.liferay.petra.io.unsync.UnsyncByteArrayInputStream;
import com.liferay.petra.process.ProcessCallable;
import com.liferay.petra.process.ProcessChannel;
import com.liferay.petra.process.ProcessException;
import com.liferay.petra.process.ProcessExecutor;
import com.liferay.portal.kernel.log.Log;
import com.liferay.portal.kernel.log.LogFactoryUtil;
import com.liferay.portal.kernel.util.ArrayUtil;
import com.liferay.portal.kernel.util.TextExtractor;
import com.liferay.portal.tika.internal.configuration.helper.TikaConfigurationHelper;
import com.liferay.portal.tika.internal.util.ProcessConfigUtil;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.Tika;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.txt.UniversalEncodingDetector;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

@Component(service={TextExtractor.class})
public class TextExtractorImpl
implements TextExtractor {
    private static final Log _log = LogFactoryUtil.getLog(TextExtractorImpl.class);
    @Reference
    private ProcessExecutor _processExecutor;
    @Reference
    private TikaConfigurationHelper _tikaConfigurationHelper;

    public String extractText(InputStream inputStream, int maxStringLength) {
        String text;
        block7: {
            if (maxStringLength == 0) {
                return "";
            }
            text = null;
            try {
                Tika tika = new Tika(this._tikaConfigurationHelper.getTikaConfig());
                tika.setMaxStringLength(maxStringLength);
                if (!inputStream.markSupported()) {
                    inputStream = new UnsyncBufferedInputStream(inputStream);
                }
                if (this._tikaConfigurationHelper.useForkProcess(tika.detect(inputStream))) {
                    InputStream finalInputStream = inputStream;
                    ProcessChannel processChannel = this._processExecutor.execute(ProcessConfigUtil.getProcessConfig(), (ProcessCallable)new ExtractTextProcessCallable(tika.getParser(), tika.getDetector(), tika.getMaxStringLength(), StreamUtil.toByteArray((InputStream)finalInputStream)));
                    NoticeableFuture future = processChannel.getProcessNoticeableFuture();
                    text = (String)future.get();
                } else {
                    text = TextExtractorImpl._parseToString(tika.getParser(), tika.getDetector(), tika.getMaxStringLength(), inputStream);
                }
            }
            catch (Exception exception) {
                if (!_log.isWarnEnabled()) break block7;
                _log.warn((Throwable)exception);
            }
        }
        if (_log.isDebugEnabled()) {
            _log.debug((Object)("Extracted text: " + text));
        }
        return text;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static String _parseToString(Parser parser, final Detector detector, int maxStringLength, InputStream inputStream) throws IOException, TikaException {
        inputStream.mark(1);
        try {
            if (inputStream.read() == -1) {
                String string = "";
                return string;
            }
        }
        finally {
            inputStream.reset();
        }
        UniversalEncodingDetector universalEncodingDetector = new UniversalEncodingDetector();
        Metadata metadata = new Metadata();
        Charset charset = universalEncodingDetector.detect(inputStream, metadata);
        String contentEncoding = "";
        if (charset != null) {
            contentEncoding = charset.name();
        }
        if (!contentEncoding.equals("")) {
            metadata.set("Content-Encoding", contentEncoding);
            metadata.set("Content-Type", "text/plain; charset=" + contentEncoding);
        }
        WriteOutContentHandler writeOutContentHandler = new WriteOutContentHandler(maxStringLength);
        try {
            ParseContext parseContext = new ParseContext();
            parseContext.set(EmbeddedDocumentExtractor.class, (Object)new ParsingEmbeddedDocumentExtractor(parseContext){

                public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean outputHtml) throws IOException, SAXException {
                    MediaType mediaType = detector.detect(inputStream, new Metadata());
                    if (Objects.equals("image/png", mediaType.toString())) {
                        return;
                    }
                    super.parseEmbedded(inputStream, contentHandler, metadata, outputHtml);
                }
            });
            parseContext.set(Parser.class, (Object)parser);
            parser.parse(inputStream, (ContentHandler)new BodyContentHandler((ContentHandler)writeOutContentHandler), metadata, parseContext);
        }
        catch (SAXException saxException) {
            if (!writeOutContentHandler.isWriteLimitReached((Throwable)saxException)) {
                throw new TikaException(saxException.getMessage(), (Throwable)saxException);
            }
        }
        finally {
            inputStream.close();
        }
        return writeOutContentHandler.toString();
    }

    private static class ExtractTextProcessCallable
    implements ProcessCallable<String> {
        private static final long serialVersionUID = 1L;
        private final byte[] _data;
        private final Detector _detector;
        private final int _maxStringLength;
        private final Parser _parser;

        public String call() throws ProcessException {
            if (ArrayUtil.isEmpty((byte[])this._data)) {
                return "";
            }
            Logger logger = Logger.getLogger("org.apache.tika.parser.SQLite3Parser");
            logger.setLevel(Level.SEVERE);
            logger = Logger.getLogger("org.apache.tika.parsers.PDFParser");
            logger.setLevel(Level.SEVERE);
            try {
                return TextExtractorImpl._parseToString(this._parser, this._detector, this._maxStringLength, (InputStream)new UnsyncByteArrayInputStream(this._data));
            }
            catch (Exception exception) {
                throw new ProcessException((Throwable)exception);
            }
        }

        private ExtractTextProcessCallable(Parser parser, Detector detector, int maxStringLength, byte[] data) {
            this._parser = parser;
            this._detector = detector;
            this._maxStringLength = maxStringLength;
            this._data = data;
        }
    }
}

