/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.nio.charset.CoderMalfunctionError;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.modules.Processor;
import org.archive.modules.extractor.ExtractorParameters;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;
import org.archive.modules.extractor.UriErrorLoggerModule;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.UriUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;

public abstract class Extractor
extends Processor {
    private static final Logger LOGGER = Logger.getLogger(Extractor.class.getName());
    protected AtomicLong numberOfLinksExtracted = new AtomicLong(0L);
    private static final Logger logger = Logger.getLogger(Extractor.class.getName());
    public static final ExtractorParameters DEFAULT_PARAMETERS = new ExtractorParameters(){

        @Override
        public int getMaxOutlinks() {
            return 6000;
        }

        @Override
        public boolean getExtractIndependently() {
            return false;
        }

        @Override
        public boolean getExtract404s() {
            return false;
        }
    };
    protected transient UriErrorLoggerModule loggerModule;
    protected transient ExtractorParameters extractorParameters;

    public Extractor() {
        this.setExtractorParameters(DEFAULT_PARAMETERS);
    }

    public UriErrorLoggerModule getLoggerModule() {
        return this.loggerModule;
    }

    @Autowired
    public void setLoggerModule(UriErrorLoggerModule loggerModule) {
        this.loggerModule = loggerModule;
    }

    public ExtractorParameters getExtractorParameters() {
        return this.extractorParameters;
    }

    @Autowired(required=false)
    public void setExtractorParameters(ExtractorParameters helper) {
        this.extractorParameters = helper;
    }

    @Override
    protected final void innerProcess(CrawlURI uri) throws InterruptedException {
        try {
            this.extract(uri);
        }
        catch (NullPointerException npe) {
            this.handleException(uri, npe);
        }
        catch (StackOverflowError soe) {
            this.handleException(uri, soe);
        }
        catch (CoderMalfunctionError cme) {
            this.handleException(uri, cme);
        }
    }

    private void handleException(CrawlURI uri, Throwable t) {
        uri.getAnnotations().add("err=" + t.getClass().getName());
        uri.getNonFatalFailures().add(t);
        logger.log(Level.INFO, "Exception", t);
    }

    protected abstract void extract(CrawlURI var1);

    protected CrawlURI addOutlink(CrawlURI curi, String uri, LinkContext context, Hop hop) {
        if (UriUtils.isDataUri((CharSequence)uri)) {
            return null;
        }
        try {
            UURI dest = UURIFactory.getInstance((UURI)curi.getUURI(), (String)uri);
            CrawlURI link = curi.createCrawlURI(dest, context, hop);
            curi.getOutLinks().add(link);
            return link;
        }
        catch (URIException e) {
            this.logUriError(e, curi.getUURI(), uri);
            return null;
        }
    }

    protected void addOutlink(CrawlURI curi, UURI uuri, LinkContext context, Hop hop) {
        if ("data".equalsIgnoreCase(uuri.getScheme())) {
            return;
        }
        try {
            CrawlURI link = curi.createCrawlURI(uuri, context, hop);
            curi.getOutLinks().add(link);
        }
        catch (URIException e) {
            this.logUriError(e, curi.getUURI(), uuri.toString());
        }
    }

    public void logUriError(URIException e, UURI uuri, CharSequence l) {
        this.loggerModule.logUriError(e, uuri, l);
    }

    @Override
    protected JSONObject toCheckpointJson() throws JSONException {
        JSONObject json = super.toCheckpointJson();
        json.put("numberOfLinksExtracted", this.numberOfLinksExtracted.get());
        return json;
    }

    @Override
    protected void fromCheckpointJson(JSONObject json) throws JSONException {
        super.fromCheckpointJson(json);
        this.numberOfLinksExtracted.set(json.getLong("numberOfLinksExtracted"));
    }

    @Override
    public String report() {
        StringBuffer ret = new StringBuffer();
        ret.append(super.report());
        ret.append("  " + this.numberOfLinksExtracted + " links from " + this.getURICount() + " CrawlURIs\n");
        return ret.toString();
    }

    public static CrawlURI addRelativeToBase(CrawlURI uri, int max, CharSequence newUri, LinkContext context, Hop hop) throws URIException {
        if (UriUtils.isDataUri((CharSequence)newUri)) {
            return null;
        }
        UURI dest = UURIFactory.getInstance((UURI)uri.getBaseURI(), (String)newUri.toString());
        return Extractor.add2(uri, max, dest, context, hop);
    }

    public static CrawlURI addRelativeToVia(CrawlURI uri, int max, String newUri, LinkContext context, Hop hop) throws URIException {
        if (UriUtils.isDataUri((CharSequence)newUri)) {
            return null;
        }
        UURI relTo = uri.getVia();
        if (relTo == null) {
            if (!uri.getAnnotations().contains("usedBaseForVia")) {
                LOGGER.info("no via where expected; using base instead: " + uri);
                uri.getAnnotations().add("usedBaseForVia");
            }
            relTo = uri.getBaseURI();
        }
        UURI dest = UURIFactory.getInstance((UURI)relTo, (String)newUri);
        return Extractor.add2(uri, max, dest, context, hop);
    }

    public static void add(CrawlURI uri, int max, String newUri, LinkContext context, Hop hop) throws URIException {
        UURI dest = UURIFactory.getInstance((String)newUri);
        Extractor.add2(uri, max, dest, context, hop);
    }

    private static CrawlURI add2(CrawlURI curi, int max, UURI dest, LinkContext context, Hop hop) throws URIException {
        if (curi.getOutLinks().size() < max) {
            CrawlURI link = curi.createCrawlURI(dest, context, hop);
            curi.getOutLinks().add(link);
            return link;
        }
        curi.incrementDiscardedOutLinks();
        return null;
    }
}

