/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.exception.NestableRuntimeException;
import org.archive.io.ReplayCharSequence;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.ContentExtractor;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;
import org.archive.net.UURI;
import org.archive.util.DevUtils;
import org.archive.util.TextUtils;
import org.archive.util.UriUtils;

public class ExtractorJS
extends ContentExtractor {
    private static final long serialVersionUID = 3L;
    private static Logger LOGGER = Logger.getLogger(ExtractorJS.class.getName());
    protected static final String JAVASCRIPT_STRING_EXTRACTOR = "(\\\\{0,8}+(?:['\"]|u002[27]))([^'\"]{0,2083})(?:\\1)";
    protected long numberOfCURIsHandled = 0L;

    @Override
    protected boolean shouldExtract(CrawlURI uri) {
        String contentType = uri.getContentType();
        if (contentType == null) {
            return false;
        }
        if (contentType.indexOf("javascript") >= 0) {
            return true;
        }
        if (contentType.indexOf("jscript") >= 0) {
            return true;
        }
        if (contentType.indexOf("ecmascript") >= 0) {
            return true;
        }
        if (contentType.startsWith("application/json")) {
            return true;
        }
        if (uri.toString().toLowerCase().endsWith(".js")) {
            return true;
        }
        LinkContext context = uri.getViaContext();
        if (context == null) {
            return false;
        }
        String s = context.toString().toLowerCase();
        return s.startsWith("script");
    }

    @Override
    protected boolean innerExtract(CrawlURI curi) {
        ++this.numberOfCURIsHandled;
        ReplayCharSequence cs = null;
        try {
            cs = curi.getRecorder().getContentReplayCharSequence();
            try {
                this.numberOfLinksExtracted.addAndGet(this.considerStrings(curi, (CharSequence)cs));
            }
            catch (StackOverflowError e) {
                DevUtils.warnHandle((Throwable)e, (String)"ExtractorJS StackOverflowError");
            }
            return true;
        }
        catch (IOException e) {
            curi.getNonFatalFailures().add(e);
            return false;
        }
    }

    protected long considerStrings(CrawlURI curi, CharSequence cs) {
        return this.considerStrings(this, curi, cs, true);
    }

    public long considerStrings(Extractor ext, CrawlURI curi, CharSequence cs) {
        return this.considerStrings(ext, curi, cs, false);
    }

    public long considerStrings(Extractor ext, CrawlURI curi, CharSequence cs, boolean handlingJSFile) {
        long foundLinks = 0L;
        Matcher strings = TextUtils.getMatcher((String)JAVASCRIPT_STRING_EXTRACTOR, (CharSequence)cs);
        int startIndex = 0;
        while (strings.find(startIndex)) {
            CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2));
            if (UriUtils.isPossibleUri((CharSequence)subsequence) && this.considerString(ext, curi, handlingJSFile, subsequence.toString())) {
                ++foundLinks;
            }
            startIndex = strings.end(1);
        }
        TextUtils.recycleMatcher((Matcher)strings);
        return foundLinks;
    }

    protected boolean considerString(Extractor ext, CrawlURI curi, boolean handlingJSFile, String candidate) {
        try {
            candidate = StringEscapeUtils.unescapeJavaScript((String)candidate);
        }
        catch (NestableRuntimeException e) {
            LOGGER.log(Level.WARNING, "problem unescaping some javascript", e);
        }
        candidate = UriUtils.speculativeFixup((String)candidate, (UURI)curi.getUURI());
        if (this.shouldAddUri(candidate)) {
            try {
                int max = ext.getExtractorParameters().getMaxOutlinks();
                if (handlingJSFile) {
                    ExtractorJS.addRelativeToVia(curi, max, candidate, LinkContext.JS_MISC, Hop.SPECULATIVE);
                    return true;
                }
                ExtractorJS.addRelativeToBase(curi, max, candidate, LinkContext.JS_MISC, Hop.SPECULATIVE);
                return true;
            }
            catch (URIException e) {
                ext.logUriError(e, curi.getUURI(), candidate);
            }
        }
        return false;
    }

    protected boolean shouldAddUri(String candidate) {
        return UriUtils.isVeryLikelyUri((CharSequence)candidate);
    }
}

