/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.lang.StringUtils;
import org.archive.io.ReplayCharSequence;
import org.archive.modules.CrawlURI;
import org.archive.modules.Processor;
import org.archive.util.TextUtils;

public class HTTPContentDigest
extends Processor {
    private static final long serialVersionUID = 3L;
    private static Logger logger = Logger.getLogger(HTTPContentDigest.class.getName());
    private static final String SHA1 = "SHA1";

    public String getStripRegex() {
        return (String)this.kp.get("stripRegex");
    }

    public void setStripRegex(String regex) {
        this.kp.put((Object)"stripRegex", (Object)regex);
    }

    public long getMaxSizeToDigest() {
        return (Long)this.kp.get("maxSizeToDigest");
    }

    public void setMaxSizeToDigest(long threshold) {
        this.kp.put((Object)"maxSizeToDigest", (Object)threshold);
    }

    public HTTPContentDigest() {
        this.setStripRegex("");
        this.setMaxSizeToDigest(0x100000L);
    }

    @Override
    protected boolean shouldProcess(CrawlURI uri) {
        if (!uri.getContentType().startsWith("text")) {
            return false;
        }
        long maxSize = this.getMaxSizeToDigest();
        return maxSize <= -1L || maxSize >= uri.getContentSize();
    }

    @Override
    protected void innerProcess(CrawlURI curi) throws InterruptedException {
        String regex = this.getStripRegex();
        ReplayCharSequence cs = null;
        try {
            cs = curi.getRecorder().getContentReplayCharSequence();
            MessageDigest digest = null;
            try {
                digest = MessageDigest.getInstance(SHA1);
            }
            catch (NoSuchAlgorithmException e1) {
                e1.printStackTrace();
                return;
            }
            digest.reset();
            String s = null;
            if (StringUtils.isEmpty((String)regex)) {
                s = cs.toString();
            } else {
                Matcher m = TextUtils.getMatcher((String)regex, (CharSequence)cs);
                s = m.replaceAll(" ");
                TextUtils.recycleMatcher((Matcher)m);
            }
            digest.update(s.getBytes());
            byte[] newDigestValue = digest.digest();
            curi.setContentDigest(SHA1, newDigestValue);
        }
        catch (Exception e) {
            curi.getNonFatalFailures().add(e);
            logger.warning("Failed get of replay char sequence " + curi.toString() + " " + e.getMessage() + " " + Thread.currentThread().getName());
            return;
        }
    }
}

