/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.io.SinkHandlerLogThread;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.ContentExtractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;
import org.archive.modules.extractor.PDFParser;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.FileUtils;

public class ExtractorPDF
extends ContentExtractor {
    private static final long serialVersionUID = 3L;
    private static final Logger LOGGER = Logger.getLogger(ExtractorPDF.class.getName());

    public long getMaxSizeToParse() {
        return (Long)this.kp.get("maxSizeToParse");
    }

    public void setMaxSizeToParse(long threshold) {
        this.kp.put((Object)"maxSizeToParse", (Object)threshold);
    }

    public ExtractorPDF() {
        this.setMaxSizeToParse(0xA00000L);
    }

    @Override
    protected boolean shouldExtract(CrawlURI uri) {
        long max = this.getMaxSizeToParse();
        if (uri.getRecorder().getRecordedInput().getSize() > max) {
            return false;
        }
        String ct = uri.getContentType();
        return ct != null && ct.startsWith("application/pdf");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    protected boolean innerExtract(CrawlURI curi) {
        ArrayList<String> uris;
        File tempFile;
        Thread thread = Thread.currentThread();
        int sn = thread instanceof SinkHandlerLogThread ? ((SinkHandlerLogThread)thread).getSerialNumber() : System.identityHashCode(thread);
        try {
            tempFile = File.createTempFile("tt" + sn, "tmp.pdf");
        }
        catch (IOException ioe) {
            throw new RuntimeException(ioe);
        }
        try {
            curi.getRecorder().copyContentBodyTo(tempFile);
            try (PDFParser parser = new PDFParser(tempFile.getAbsolutePath());){
                uris = parser.extractURIs();
            }
        }
        catch (IOException e) {
            curi.getNonFatalFailures().add(e);
            boolean bl = false;
            return bl;
        }
        catch (RuntimeException e) {
            curi.getNonFatalFailures().add(e);
            boolean bl = false;
            return bl;
        }
        finally {
            FileUtils.deleteSoonerOrLater((File)tempFile);
        }
        if (uris == null) {
            return true;
        }
        for (String uri : uris) {
            try {
                UURI src = curi.getUURI();
                UURI dest = UURIFactory.getInstance((String)uri);
                LinkContext lc = LinkContext.NAVLINK_MISC;
                Hop hop = Hop.NAVLINK;
                this.addOutlink(curi, dest, lc, hop);
            }
            catch (URIException e1) {
                this.logUriError(e1, curi.getUURI(), uri);
            }
        }
        this.numberOfLinksExtracted.addAndGet(uris.size());
        LOGGER.fine(curi + " has " + uris.size() + " links.");
        return true;
    }
}

