/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.url.LaxURLCodec;
import org.archive.util.TextUtils;

public class ExtractorURI
extends Extractor {
    private static final long serialVersionUID = 3L;
    private static Logger LOGGER = Logger.getLogger(ExtractorURI.class.getName());
    protected static final String ABS_HTTP_URI_PATTERN = "^https?://[^\\s<>]*$";

    @Override
    protected boolean shouldProcess(CrawlURI uri) {
        return true;
    }

    @Override
    public void extract(CrawlURI curi) {
        for (CrawlURI link : curi.getOutLinks()) {
            this.extractLink(curi, link);
        }
    }

    protected void extractLink(CrawlURI curi, CrawlURI wref) {
        UURI source = null;
        try {
            source = UURIFactory.getInstance((String)wref.getURI());
        }
        catch (URIException e) {
            LOGGER.log(Level.FINE, "bad URI", e);
        }
        if (source == null) {
            return;
        }
        List<String> found = ExtractorURI.extractQueryStringLinks(source);
        for (String uri : found) {
            try {
                UURI dest = UURIFactory.getInstance((String)uri);
                LinkContext lc = LinkContext.SPECULATIVE_MISC;
                Hop hop = Hop.SPECULATIVE;
                this.addOutlink(curi, dest, lc, hop);
                this.numberOfLinksExtracted.incrementAndGet();
            }
            catch (URIException e) {
                LOGGER.log(Level.FINE, "bad URI", e);
            }
        }
    }

    protected static List<String> extractQueryStringLinks(UURI source) {
        String[] params;
        String decodedQuery;
        ArrayList<String> results = new ArrayList<String>();
        try {
            decodedQuery = source.getQuery();
        }
        catch (URIException e1) {
            return results;
        }
        if (decodedQuery == null) {
            return results;
        }
        Matcher m = TextUtils.getMatcher((String)ABS_HTTP_URI_PATTERN, (CharSequence)decodedQuery);
        if (m.matches()) {
            TextUtils.recycleMatcher((Matcher)m);
            results.add(decodedQuery);
        }
        String rawQuery = new String(source.getRawQuery());
        for (String param : params = rawQuery.split("&")) {
            String candidate;
            String[] keyVal = param.split("=");
            if (keyVal.length != 2) continue;
            try {
                candidate = LaxURLCodec.DEFAULT.decode(keyVal[1]);
            }
            catch (DecoderException e) {
                continue;
            }
            m.reset(candidate);
            if (!m.matches()) continue;
            results.add(candidate);
        }
        return results;
    }
}

