/*
 * Decompiled with CFR 0.152.
 */
package org.archive.crawler.util;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.crawler.util.SeedUrlNotFoundException;
import org.archive.util.ArchiveUtils;

public class RecoveryLogMapper {
    private static final char LOG_LINE_START_CHAR = "F+ ".charAt(0);
    private static final Logger logger = Logger.getLogger(RecoveryLogMapper.class.getName());
    private PrintWriter seedNotFoundPrintWriter = null;
    private Map<String, String> crawledUrlToSeedMap = new HashMap<String, String>();
    private Map<String, Set<String>> seedUrlToDiscoveredUrlsMap = new HashMap<String, Set<String>>();
    private Set<String> successfullyCrawledUrls = new HashSet<String>();

    public RecoveryLogMapper(String recoverLogFileName) throws FileNotFoundException, IOException, SeedUrlNotFoundException {
        this.load(recoverLogFileName);
    }

    public RecoveryLogMapper(String recoverLogFileName, String seedNotFoundLogFileName) throws FileNotFoundException, IOException, SeedUrlNotFoundException {
        this.seedNotFoundPrintWriter = new PrintWriter(new FileOutputStream(seedNotFoundLogFileName));
        this.load(recoverLogFileName);
    }

    protected void load(String recoverLogFileName) throws FileNotFoundException, IOException, SeedUrlNotFoundException {
        LineNumberReader reader = new LineNumberReader(ArchiveUtils.getBufferedReader((File)new File(recoverLogFileName)));
        String curLine = null;
        while ((curLine = reader.readLine()) != null) {
            if (curLine.length() == 0 || curLine.charAt(0) != LOG_LINE_START_CHAR) continue;
            String[] args = curLine.split("\\s+");
            int curLineNumWords = args.length;
            String firstUrl = args[1];
            if (firstUrl.startsWith("dns:")) continue;
            if (curLine.startsWith("F+ ")) {
                String seedForFirstUrl;
                if (curLineNumWords == 2) {
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("F_ADD with 2 words --> seed URL (" + firstUrl + ")");
                    }
                    if (this.seedUrlToDiscoveredUrlsMap.get(firstUrl) != null) continue;
                    this.seedUrlToDiscoveredUrlsMap.put(firstUrl, new HashSet());
                    continue;
                }
                String viaUrl = args[curLineNumWords - 1];
                if (logger.isLoggable(Level.FINE)) {
                    logger.fine("F_ADD with 3+ words --> new URL " + firstUrl + " via URL " + viaUrl);
                }
                if ((seedForFirstUrl = this.crawledUrlToSeedMap.get(viaUrl)) == null) {
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("\tvia URL is a seed");
                    }
                    this.crawledUrlToSeedMap.put(firstUrl, viaUrl);
                    seedForFirstUrl = viaUrl;
                } else {
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("\tvia URL discovered via seed URL " + seedForFirstUrl);
                    }
                    this.crawledUrlToSeedMap.put(firstUrl, seedForFirstUrl);
                }
                Set<String> theSeedUrlList = this.seedUrlToDiscoveredUrlsMap.get(seedForFirstUrl);
                if (theSeedUrlList == null) {
                    String message = "recover log " + recoverLogFileName + " at line " + reader.getLineNumber() + " listed F+ URL (" + viaUrl + ") for which found no seed list.";
                    if (this.seedNotFoundPrintWriter != null) {
                        this.seedNotFoundPrintWriter.println(message);
                        continue;
                    }
                    throw new SeedUrlNotFoundException(message);
                }
                theSeedUrlList.add(firstUrl);
                continue;
            }
            if (!curLine.startsWith("Fs ")) continue;
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("F_SUCCESS for URL " + firstUrl);
            }
            this.successfullyCrawledUrls.add(firstUrl);
        }
        reader.close();
        if (this.seedNotFoundPrintWriter != null) {
            this.seedNotFoundPrintWriter.close();
        }
    }

    public String getSeedForUrl(String urlString) {
        return this.seedUrlToDiscoveredUrlsMap.get(urlString) != null ? urlString : this.crawledUrlToSeedMap.get(urlString);
    }

    public Map<String, Set<String>> getSeedUrlToDiscoveredUrlsMap() {
        return this.seedUrlToDiscoveredUrlsMap;
    }

    public Set<String> getSuccessfullyCrawledUrls() {
        return this.successfullyCrawledUrls;
    }

    public static Logger getLogger() {
        return logger;
    }

    public Iterator<String> getIteratorOfURLsSuccessfullyCrawledFromSeedUrl(String seedUrlString) throws SeedUrlNotFoundException {
        return new SuccessfullyCrawledURLsIterator(seedUrlString);
    }

    public Collection<String> getSeedCollection() {
        return this.seedUrlToDiscoveredUrlsMap.keySet();
    }

    public static void main(String[] args) {
        if (args.length < 1) {
            System.out.println("Usage: RecoveryLogMapper recoverLogFileName");
            Runtime.getRuntime().exit(-1);
        }
        String recoverLogFileName = args[0];
        try {
            RecoveryLogMapper myRecoveryLogMapper = new RecoveryLogMapper(recoverLogFileName);
            for (String curSeedUrl : myRecoveryLogMapper.getSeedCollection()) {
                System.out.println("URLs successfully crawled from seed URL " + curSeedUrl);
                Iterator<String> iteratorOfUrlsCrawledFromSeedUrl = myRecoveryLogMapper.getIteratorOfURLsSuccessfullyCrawledFromSeedUrl(curSeedUrl);
                while (iteratorOfUrlsCrawledFromSeedUrl.hasNext()) {
                    String curCrawledUrlString = iteratorOfUrlsCrawledFromSeedUrl.next();
                    System.out.println("    -> " + curCrawledUrlString);
                }
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private class SuccessfullyCrawledURLsIterator
    implements Iterator<String> {
        private String nextValue = null;
        private Iterator<String> discoveredUrlsIterator;

        public SuccessfullyCrawledURLsIterator(String seedUrlString) throws SeedUrlNotFoundException {
            Set<String> discoveredUrlList = RecoveryLogMapper.this.getSeedUrlToDiscoveredUrlsMap().get(seedUrlString);
            if (discoveredUrlList == null) {
                throw new SeedUrlNotFoundException("Seed URL " + seedUrlString + "  not found in seed list");
            }
            this.discoveredUrlsIterator = discoveredUrlList.iterator();
        }

        private void populateNextValue() {
            while (this.nextValue == null & this.discoveredUrlsIterator.hasNext()) {
                String curDiscoveredUrl = this.discoveredUrlsIterator.next();
                boolean succCrawled = RecoveryLogMapper.this.getSuccessfullyCrawledUrls().contains(curDiscoveredUrl);
                if (RecoveryLogMapper.getLogger().isLoggable(Level.FINE)) {
                    RecoveryLogMapper.getLogger().fine("populateNextValue: curDiscoveredUrl=" + curDiscoveredUrl + ", succCrawled=" + succCrawled);
                }
                if (!succCrawled) continue;
                this.nextValue = curDiscoveredUrl;
            }
        }

        @Override
        public boolean hasNext() {
            this.populateNextValue();
            return this.nextValue != null;
        }

        @Override
        public String next() {
            this.populateNextValue();
            String returnValue = this.nextValue;
            this.nextValue = null;
            return returnValue;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("SuccessfullyCrawledURLsIterator.remove: not supported.");
        }
    }
}

