/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;

public class ExtractorImpliedURI
extends Extractor {
    private static final long serialVersionUID = 3L;
    private static Logger LOGGER = Logger.getLogger(ExtractorImpliedURI.class.getName());

    public Pattern getRegex() {
        return (Pattern)this.kp.get("regex");
    }

    public void setRegex(Pattern regex) {
        this.kp.put((Object)"regex", (Object)regex);
    }

    public String getFormat() {
        return (String)this.kp.get("format");
    }

    public void setFormat(String format) {
        this.kp.put((Object)"format", (Object)format);
    }

    public boolean getRemoveTriggerUris() {
        return (Boolean)this.kp.get("removeTriggerUris");
    }

    public void setRemoveTriggerUris(boolean remove) {
        this.kp.put((Object)"removeTriggerUris", (Object)remove);
    }

    public ExtractorImpliedURI() {
        this.setRegex(Pattern.compile("^(.*)$"));
        this.setFormat("");
        this.setRemoveTriggerUris(false);
    }

    @Override
    protected boolean shouldProcess(CrawlURI uri) {
        return true;
    }

    @Override
    public void extract(CrawlURI curi) {
        ArrayList<CrawlURI> links = new ArrayList<CrawlURI>(curi.getOutLinks());
        int max = links.size();
        for (int i = 0; i < max; ++i) {
            CrawlURI link = (CrawlURI)links.get(i);
            Pattern trigger = this.getRegex();
            String build = this.getFormat();
            UURI dest = link.getUURI();
            String implied = ExtractorImpliedURI.extractImplied((CharSequence)dest, trigger, build);
            if (implied == null) continue;
            try {
                UURI target = UURIFactory.getInstance((String)implied);
                LinkContext lc = LinkContext.INFERRED_MISC;
                Hop hop = Hop.INFERRED;
                this.addOutlink(curi, target, lc, hop);
                this.numberOfLinksExtracted.incrementAndGet();
                boolean removeTriggerURI = this.getRemoveTriggerUris();
                if (!removeTriggerURI) continue;
                if (curi.getOutLinks().remove(link)) {
                    LOGGER.log(Level.FINE, link.getURI() + " has been removed from " + curi.getURI() + " outlinks list.");
                    this.numberOfLinksExtracted.decrementAndGet();
                    continue;
                }
                LOGGER.log(Level.FINE, "Failed to remove " + link.getURI() + " from " + curi.getURI() + " outlinks list.");
                continue;
            }
            catch (URIException e) {
                LOGGER.log(Level.FINE, "bad URI", e);
            }
        }
    }

    protected static String extractImplied(CharSequence uri, Pattern trigger, String build) {
        if (trigger == null) {
            return null;
        }
        Matcher m = trigger.matcher(uri);
        if (m.matches()) {
            String result = m.replaceFirst(build);
            return result;
        }
        return null;
    }
}

