/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.warc;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.Collection;
import org.apache.commons.lang.StringUtils;
import org.archive.format.warc.WARCConstants;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.modules.CrawlURI;
import org.archive.modules.warc.BaseWARCRecordBuilder;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.ANVLRecord;

public class MetadataRecordBuilder
extends BaseWARCRecordBuilder {
    @Override
    public boolean shouldBuildRecord(CrawlURI curi) {
        String scheme = curi.getUURI().getScheme().toLowerCase();
        return scheme.startsWith("http") || "ftp".equals(scheme) || "sftp".equals(scheme);
    }

    @Override
    public WARCRecordInfo buildRecord(CrawlURI curi, URI concurrentTo) throws IOException {
        String timestamp = ArchiveUtils.getLog14Date((long)curi.getFetchBeginTime());
        WARCRecordInfo recordInfo = new WARCRecordInfo();
        recordInfo.setType(WARCConstants.WARCRecordType.metadata);
        recordInfo.setRecordId(MetadataRecordBuilder.generateRecordID());
        if (concurrentTo != null) {
            recordInfo.addExtraHeader("WARC-Concurrent-To", "<" + concurrentTo + ">");
        }
        recordInfo.setUrl(curi.toString());
        recordInfo.setCreate14DigitDate(timestamp);
        recordInfo.setMimetype("application/warc-fields");
        recordInfo.setEnforceLength(true);
        ANVLRecord r = new ANVLRecord();
        if (curi.isSeed()) {
            r.addLabel("seed");
        } else {
            if (curi.forceFetch()) {
                r.addLabel("force-fetch");
            }
            if (StringUtils.isNotBlank((String)curi.getVia().toString())) {
                r.addLabelValue("via", curi.getVia().toString());
            }
            if (StringUtils.isNotBlank((String)curi.getPathFromSeed())) {
                r.addLabelValue("hopsFromSeed", curi.getPathFromSeed());
            }
            if (curi.containsDataKey("source")) {
                r.addLabelValue("sourceTag", (String)curi.getData().get("source"));
            }
        }
        long duration = curi.getFetchCompletedTime() - curi.getFetchBeginTime();
        if (duration > -1L) {
            r.addLabelValue("fetchTimeMs", Long.toString(duration));
        }
        if (curi.getData().containsKey("ftp-fetch-status")) {
            r.addLabelValue("ftpFetchStatus", curi.getData().get("ftp-fetch-status").toString());
        }
        if (curi.getRecorder() != null && curi.getRecorder().getCharset() != null) {
            r.addLabelValue("charsetForLinkExtraction", curi.getRecorder().getCharset().name());
        }
        for (String string : curi.getAnnotations()) {
            if (!string.startsWith("usingCharsetIn") && !string.startsWith("inconsistentCharsetIn")) continue;
            String[] kv = string.split(":", 2);
            r.addLabelValue(kv[0], kv[1]);
        }
        Collection<CrawlURI> links = curi.getOutLinks();
        if (links != null && links.size() > 0) {
            for (CrawlURI link : links) {
                r.addLabelValue("outlink", link.getURI() + " " + link.getLastHop() + " " + link.getViaContext());
            }
        }
        byte[] byArray = r.getUTF8Bytes();
        recordInfo.setContentStream((InputStream)new ByteArrayInputStream(byArray));
        recordInfo.setContentLength((long)byArray.length);
        return recordInfo;
    }
}

