/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.writer;

import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import org.archive.format.warc.WARCConstants;
import org.archive.io.WriterPool;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPool;
import org.archive.io.warc.WARCWriterPoolSettings;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.revisit.IdenticalPayloadDigestRevisit;
import org.archive.modules.writer.WriterPoolProcessor;
import org.archive.spring.ConfigPath;
import org.archive.uid.RecordIDGenerator;
import org.archive.uid.UUIDGenerator;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.ANVLRecord;

public abstract class BaseWARCWriterProcessor
extends WriterPoolProcessor
implements WARCWriterPoolSettings {
    private static final Logger logger = Logger.getLogger(BaseWARCWriterProcessor.class.getName());
    protected AtomicLong urlsWritten = new AtomicLong();
    protected ConcurrentMap<String, ConcurrentMap<String, AtomicLong>> stats = new ConcurrentHashMap<String, ConcurrentMap<String, AtomicLong>>();
    protected RecordIDGenerator generator = new UUIDGenerator();
    private transient List<String> cachedMetadata;

    public ConcurrentMap<String, ConcurrentMap<String, AtomicLong>> getStats() {
        return this.stats;
    }

    public RecordIDGenerator getRecordIDGenerator() {
        return this.generator;
    }

    public void setRecordIDGenerator(RecordIDGenerator generator) {
        this.generator = generator;
    }

    protected URI getRecordID() throws IOException {
        return this.generator.getRecordID();
    }

    @Override
    public long getDefaultMaxFileSize() {
        return 1000000000L;
    }

    @Override
    public List<ConfigPath> getDefaultStorePaths() {
        ArrayList<ConfigPath> paths = new ArrayList<ConfigPath>();
        paths.add(new ConfigPath("warcs default store path", "warcs"));
        return paths;
    }

    @Override
    protected void setupPool(AtomicInteger serialNo) {
        this.setPool((WriterPool)new WARCWriterPool(serialNo, (WARCWriterPoolSettings)this, this.getPoolMaxActive(), this.getMaxWaitForIdleMs()));
    }

    @Override
    public List<String> getMetadata() {
        if (this.cachedMetadata != null) {
            return this.cachedMetadata;
        }
        ANVLRecord record = new ANVLRecord();
        record.addLabelValue("software", "Heritrix/" + ArchiveUtils.VERSION + " http://crawler.archive.org");
        try {
            InetAddress host = InetAddress.getLocalHost();
            record.addLabelValue("ip", host.getHostAddress());
            record.addLabelValue("hostname", host.getCanonicalHostName());
        }
        catch (UnknownHostException e) {
            logger.log(Level.WARNING, "unable top obtain local crawl engine host", e);
        }
        record.addLabelValue("format", "WARC File Format 1.0");
        record.addLabelValue("conformsTo", "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf");
        CrawlMetadata provider = this.getMetadataProvider();
        this.addIfNotBlank(record, "operator", provider.getOperator());
        this.addIfNotBlank(record, "publisher", provider.getOrganization());
        this.addIfNotBlank(record, "audience", provider.getAudience());
        this.addIfNotBlank(record, "isPartOf", provider.getJobName());
        this.addIfNotBlank(record, "description", provider.getDescription());
        this.addIfNotBlank(record, "robots", provider.getRobotsPolicyName().toLowerCase());
        this.addIfNotBlank(record, "http-header-user-agent", provider.getUserAgent());
        this.addIfNotBlank(record, "http-header-from", provider.getOperatorFrom());
        return Collections.singletonList(record.toString());
    }

    protected void addIfNotBlank(ANVLRecord record, String label, String value) {
        if (StringUtils.isNotBlank((String)value)) {
            record.addLabelValue(label, value);
        }
    }

    protected void addStats(Map<String, Map<String, Long>> substats) {
        for (String key : substats.keySet()) {
            if (this.stats.get(key) == null) {
                this.stats.putIfAbsent(key, new ConcurrentHashMap());
            }
            for (String subkey : substats.get(key).keySet()) {
                AtomicLong oldValue = (AtomicLong)((ConcurrentMap)this.stats.get(key)).get(subkey);
                if (oldValue == null) {
                    oldValue = ((ConcurrentMap)this.stats.get(key)).putIfAbsent(subkey, new AtomicLong(substats.get(key).get(subkey)));
                }
                if (oldValue == null) continue;
                oldValue.addAndGet(substats.get(key).get(subkey));
            }
        }
    }

    @Override
    public String report() {
        logger.info("final stats: " + this.stats);
        StringBuilder buf = new StringBuilder();
        buf.append("Processor: " + this.getClass().getName() + "\n");
        buf.append("  Function:          Writes WARCs\n");
        buf.append("  Total CrawlURIs:   " + this.urlsWritten + "\n");
        buf.append("  Revisit records:   " + WARCWriter.getStat(this.stats, (String)WARCConstants.WARCRecordType.revisit.toString(), (String)"numRecords") + "\n");
        long bytes = WARCWriter.getStat(this.stats, (String)WARCConstants.WARCRecordType.response.toString(), (String)"contentBytes") + WARCWriter.getStat(this.stats, (String)WARCConstants.WARCRecordType.resource.toString(), (String)"contentBytes");
        buf.append("  Crawled content bytes (including http headers): " + bytes + " (" + ArchiveUtils.formatBytesForDisplay((long)bytes) + ")\n");
        bytes = WARCWriter.getStat(this.stats, (String)"totals", (String)"totalBytes");
        buf.append("  Total uncompressed bytes (including all warc records): " + bytes + " (" + ArchiveUtils.formatBytesForDisplay((long)bytes) + ")\n");
        buf.append("  Total size on disk (" + (this.getCompress() ? "compressed" : "uncompressed") + "): " + this.getTotalBytesWritten() + " (" + ArchiveUtils.formatBytesForDisplay((long)this.getTotalBytesWritten()) + ")\n");
        return buf.toString();
    }

    protected Map<String, Map<String, Long>> copyStats(Map<String, Map<String, Long>> orig) {
        HashMap<String, Map<String, Long>> copy = new HashMap<String, Map<String, Long>>(orig.size());
        for (String k : orig.keySet()) {
            copy.put(k, new HashMap<String, Long>(orig.get(k)));
        }
        return copy;
    }

    protected void updateMetadataAfterWrite(CrawlURI curi, WARCWriter writer, long startPosition) {
        if (WARCWriter.getStat((Map)writer.getTmpStats(), (String)"totals", (String)"numRecords") > 0L) {
            this.addStats(writer.getTmpStats());
            this.urlsWritten.incrementAndGet();
        }
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("wrote " + WARCWriter.getStat((Map)writer.getTmpStats(), (String)"totals", (String)"sizeOnDisk") + " bytes to " + writer.getFile().getName() + " for " + curi);
        }
        this.addTotalBytesWritten(writer.getPosition() - startPosition);
        curi.addExtraInfo("warcFilename", writer.getFilenameWithoutOccupiedSuffix());
        curi.addExtraInfo("warcFileOffset", startPosition);
        curi.getData().put("warc-stats", this.copyStats(writer.getTmpStats()));
        HashMap<String, Object>[] history = curi.getFetchHistory();
        if (history != null && history[0] != null) {
            history[0].put("write-tag", writer.getFilenameWithoutOccupiedSuffix());
        }
        if (curi.getContentDigest() != null && curi.hasContentDigestHistory()) {
            for (WARCRecordInfo warcRecord : writer.getTmpRecordLog()) {
                if ((warcRecord.getType() == WARCConstants.WARCRecordType.response || warcRecord.getType() == WARCConstants.WARCRecordType.resource) && warcRecord.getContentStream() != null && warcRecord.getContentLength() > 0L) {
                    curi.getContentDigestHistory().put("original-url", warcRecord.getUrl());
                    curi.getContentDigestHistory().put("warc-record-id", warcRecord.getRecordId().toString());
                    curi.getContentDigestHistory().put("warc-filename", warcRecord.getWARCFilename());
                    curi.getContentDigestHistory().put("warc-file-offset", warcRecord.getWARCFileOffset());
                    curi.getContentDigestHistory().put("content-written-date", warcRecord.getCreate14DigitDate());
                    curi.getContentDigestHistory().put("content-digest-count", 1);
                    continue;
                }
                if (warcRecord.getType() != WARCConstants.WARCRecordType.revisit || !(curi.getRevisitProfile() instanceof IdenticalPayloadDigestRevisit)) continue;
                Integer oldCount = (Integer)curi.getContentDigestHistory().get("content-digest-count");
                if (oldCount == null) {
                    oldCount = 1;
                }
                curi.getContentDigestHistory().put("content-digest-count", oldCount + 1);
            }
        }
    }
}

