/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
import org.apache.pdfbox.text.PDFTextStripper;
import org.codelibs.core.lang.ThreadUtil;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.ExtractException;
import org.codelibs.fess.crawler.extractor.Extractor;
import org.codelibs.fess.crawler.extractor.ExtractorFactory;
import org.codelibs.fess.crawler.extractor.impl.PasswordBasedExtractor;
import org.codelibs.fess.crawler.helper.MimeTypeHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PdfExtractor
extends PasswordBasedExtractor {
    private static final Logger logger = LoggerFactory.getLogger(PdfExtractor.class);
    protected long timeout = 30000L;
    protected boolean isDaemonThread = false;

    @Override
    public ExtractData getText(InputStream in, Map<String, String> params) {
        ExtractData extractData;
        block12: {
            if (in == null) {
                throw new CrawlerSystemException("The inputstream is null.");
            }
            String password = this.getPassword(params);
            PDDocument document = PDDocument.load((InputStream)in, (String)(password == null ? null : password));
            try {
                StringWriter writer = new StringWriter();
                PDFTextStripper stripper = new PDFTextStripper();
                AtomicBoolean done = new AtomicBoolean(false);
                PDDocument doc = document;
                HashSet exceptionSet = new HashSet();
                Thread task = new Thread(() -> {
                    try {
                        stripper.writeText(doc, (Writer)writer);
                        this.extractEmbeddedDocuments(doc, writer);
                        this.extractAnnotations(doc, writer);
                    }
                    catch (Exception e) {
                        exceptionSet.add(e);
                    }
                    finally {
                        done.set(true);
                    }
                }, Thread.currentThread().getName() + "-pdf");
                task.setDaemon(this.isDaemonThread);
                task.start();
                task.join(this.timeout);
                if (!done.get()) {
                    for (int i = 0; i < 100 && !done.get(); ++i) {
                        task.interrupt();
                        ThreadUtil.sleep((long)100L);
                    }
                    throw new ExtractException("PDFBox process cannot finish in " + this.timeout + " sec.");
                }
                if (!exceptionSet.isEmpty()) {
                    throw (Exception)exceptionSet.iterator().next();
                }
                writer.flush();
                ExtractData extractData2 = new ExtractData(writer.toString());
                this.extractMetadata(document, extractData2);
                extractData = extractData2;
                if (document == null) break block12;
            }
            catch (Throwable throwable) {
                try {
                    if (document != null) {
                        try {
                            document.close();
                        }
                        catch (Throwable throwable2) {
                            throwable.addSuppressed(throwable2);
                        }
                    }
                    throw throwable;
                }
                catch (Exception e) {
                    throw new ExtractException(e);
                }
            }
            document.close();
        }
        return extractData;
    }

    protected void extractAnnotations(PDDocument doc, StringWriter writer) {
        for (PDPage page : doc.getPages()) {
            try {
                for (PDAnnotation annotation : page.getAnnotations()) {
                    PDAnnotationFileAttachment annotationFileAttachment;
                    PDFileSpecification fileSpec;
                    if (!(annotation instanceof PDAnnotationFileAttachment) || !((fileSpec = (annotationFileAttachment = (PDAnnotationFileAttachment)annotation).getFile()) instanceof PDComplexFileSpecification)) continue;
                    PDComplexFileSpecification complexFileSpec = (PDComplexFileSpecification)fileSpec;
                    PDEmbeddedFile embeddedFile = this.getEmbeddedFile(complexFileSpec);
                    this.extractFile(complexFileSpec.getFilename(), embeddedFile, writer);
                }
            }
            catch (IOException e) {
                logger.warn("Failed to parse annotation.", (Throwable)e);
            }
        }
    }

    protected void extractFile(String filename, PDEmbeddedFile embeddedFile, StringWriter writer) {
        block9: {
            Extractor extractor;
            MimeTypeHelper mimeTypeHelper = this.getMimeTypeHelper();
            ExtractorFactory extractorFactory = this.getExtractorFactory();
            String mimeType = mimeTypeHelper.getContentType(null, filename);
            if (mimeType != null && (extractor = extractorFactory.getExtractor(mimeType)) != null) {
                try (COSInputStream is = embeddedFile.createInputStream();){
                    HashMap<String, String> map = new HashMap<String, String>();
                    map.put("resourceName", filename);
                    String content = extractor.getText((InputStream)is, map).getContent();
                    writer.write(content);
                    writer.write(10);
                }
                catch (Exception e) {
                    if (!logger.isDebugEnabled()) break block9;
                    logger.debug("Exception in an internal extractor.", (Throwable)e);
                }
            }
        }
    }

    protected void extractEmbeddedDocuments(PDDocument document, StringWriter writer) {
        PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
        PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
        if (efTree == null) {
            return;
        }
        try {
            Map embeddedFileNames = efTree.getNames();
            if (embeddedFileNames != null) {
                this.processEmbeddedDocNames(embeddedFileNames, writer);
            } else {
                List kids = efTree.getKids();
                if (kids == null) {
                    return;
                }
                for (PDNameTreeNode node : kids) {
                    this.processEmbeddedDocNames(node.getNames(), writer);
                }
            }
        }
        catch (IOException e) {
            logger.warn("Failed to parse embedded documents.", (Throwable)e);
        }
    }

    protected void processEmbeddedDocNames(Map<String, PDComplexFileSpecification> embeddedFileNames, StringWriter writer) {
        if (embeddedFileNames == null || embeddedFileNames.isEmpty()) {
            return;
        }
        for (Map.Entry<String, PDComplexFileSpecification> ent : embeddedFileNames.entrySet()) {
            PDComplexFileSpecification spec = ent.getValue();
            if (spec == null) continue;
            PDEmbeddedFile embeddedFile = this.getEmbeddedFile(spec);
            this.extractFile(ent.getKey(), embeddedFile, writer);
        }
    }

    protected PDEmbeddedFile getEmbeddedFile(PDComplexFileSpecification fileSpec) {
        PDEmbeddedFile embeddedFile = null;
        if (fileSpec != null) {
            embeddedFile = fileSpec.getEmbeddedFileUnicode();
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileDos();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileMac();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileUnix();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFile();
            }
        }
        return embeddedFile;
    }

    protected void extractMetadata(PDDocument document, ExtractData extractData) {
        PDDocumentInformation info = document.getDocumentInformation();
        if (info == null) {
            return;
        }
        for (String key : info.getMetadataKeys()) {
            String value = info.getCustomMetadataValue(key);
            this.addMetadata(extractData, key, value);
        }
    }

    protected void addMetadata(ExtractData extractData, String name, String value) {
        if (value != null) {
            extractData.putValue(name, value);
        }
    }

    public long getTimeout() {
        return this.timeout;
    }

    public void setTimeout(long timeout) {
        this.timeout = timeout;
    }

    public void setDaemonThread(boolean isDaemonThread) {
        this.isDaemonThread = isDaemonThread;
    }
}

