/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.crawler;

import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.fetcher.CustomFetchStatus;
import edu.uci.ics.crawler4j.fetcher.PageFetchResult;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.frontier.DocIDServer;
import edu.uci.ics.crawler4j.frontier.Frontier;
import edu.uci.ics.crawler4j.parser.HtmlParseData;
import edu.uci.ics.crawler4j.parser.ParseData;
import edu.uci.ics.crawler4j.parser.Parser;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.WebURL;
import java.util.ArrayList;
import org.apache.log4j.Logger;

public class WebCrawler
implements Runnable {
    protected static final Logger logger = Logger.getLogger((String)WebCrawler.class.getName());
    protected int myId;
    protected CrawlController myController;
    private Thread myThread;
    private Parser parser;
    private PageFetcher pageFetcher;
    private RobotstxtServer robotstxtServer;
    private DocIDServer docIdServer;
    private Frontier frontier;
    private boolean isWaitingForNewURLs;

    public void init(int id, CrawlController crawlController) {
        this.myId = id;
        this.pageFetcher = crawlController.getPageFetcher();
        this.robotstxtServer = crawlController.getRobotstxtServer();
        this.docIdServer = crawlController.getDocIdServer();
        this.frontier = crawlController.getFrontier();
        this.parser = new Parser(crawlController.getConfig());
        this.myController = crawlController;
        this.isWaitingForNewURLs = false;
    }

    public int getMyId() {
        return this.myId;
    }

    public CrawlController getMyController() {
        return this.myController;
    }

    public void onStart() {
    }

    public void onBeforeExit() {
    }

    protected void handlePageStatusCode(WebURL webUrl, int statusCode, String statusDescription) {
    }

    protected void onContentFetchError(WebURL webUrl) {
    }

    protected void onParseError(WebURL webUrl) {
    }

    public Object getMyLocalData() {
        return null;
    }

    /*
     * Unable to fully structure code
     */
    @Override
    public void run() {
        this.onStart();
        block2: while (true) {
            assignedURLs = new ArrayList<WebURL>(50);
            this.isWaitingForNewURLs = true;
            this.frontier.getNextURLs(50, assignedURLs);
            this.isWaitingForNewURLs = false;
            if (assignedURLs.size() == 0) {
                if (this.frontier.isFinished()) {
                    return;
                }
                try {
                    Thread.sleep(3000L);
                }
                catch (InterruptedException e) {
                    e.printStackTrace();
                }
                continue;
            }
            i$ = assignedURLs.iterator();
            do {
                if (i$.hasNext()) ** break;
                continue block2;
                curURL = (WebURL)i$.next();
                if (curURL == null) continue;
                this.processPage(curURL);
                this.frontier.setProcessed(curURL);
            } while (!this.myController.isShuttingDown());
            break;
        }
        WebCrawler.logger.info((Object)"Exiting because of controller shutdown.");
    }

    public boolean shouldVisit(WebURL url) {
        return true;
    }

    public void visit(Page page) {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void processPage(WebURL curURL) {
        if (curURL == null) {
            return;
        }
        PageFetchResult fetchResult = null;
        try {
            fetchResult = this.pageFetcher.fetchHeader(curURL);
            int statusCode = fetchResult.getStatusCode();
            this.handlePageStatusCode(curURL, statusCode, CustomFetchStatus.getStatusDescription(statusCode));
            if (statusCode != 200) {
                if (statusCode == 301 || statusCode == 302) {
                    if (this.myController.getConfig().isFollowRedirects()) {
                        String movedToUrl = fetchResult.getMovedToUrl();
                        if (movedToUrl == null) {
                            return;
                        }
                        int newDocId = this.docIdServer.getDocId(movedToUrl);
                        if (newDocId > 0) {
                            return;
                        }
                        WebURL webURL = new WebURL();
                        webURL.setURL(movedToUrl);
                        webURL.setParentDocid(curURL.getParentDocid());
                        webURL.setParentUrl(curURL.getParentUrl());
                        webURL.setDepth(curURL.getDepth());
                        webURL.setDocid(-1);
                        webURL.setAnchor(curURL.getAnchor());
                        if (this.shouldVisit(webURL) && this.robotstxtServer.allows(webURL)) {
                            webURL.setDocid(this.docIdServer.getNewDocID(movedToUrl));
                            this.frontier.schedule(webURL);
                        }
                    }
                } else if (fetchResult.getStatusCode() == 1001) {
                    logger.info((Object)("Skipping a page which was bigger than max allowed size: " + curURL.getURL()));
                }
                return;
            }
            if (!curURL.getURL().equals(fetchResult.getFetchedUrl())) {
                if (this.docIdServer.isSeenBefore(fetchResult.getFetchedUrl())) {
                    return;
                }
                curURL.setURL(fetchResult.getFetchedUrl());
                curURL.setDocid(this.docIdServer.getNewDocID(fetchResult.getFetchedUrl()));
            }
            Page page = new Page(curURL);
            int docid = curURL.getDocid();
            if (!fetchResult.fetchContent(page)) {
                this.onContentFetchError(curURL);
                return;
            }
            if (!this.parser.parse(page, curURL.getURL())) {
                this.onParseError(curURL);
                return;
            }
            ParseData parseData = page.getParseData();
            if (parseData instanceof HtmlParseData) {
                HtmlParseData htmlParseData = (HtmlParseData)parseData;
                ArrayList<WebURL> toSchedule = new ArrayList<WebURL>();
                int maxCrawlDepth = this.myController.getConfig().getMaxDepthOfCrawling();
                for (WebURL webURL : htmlParseData.getOutgoingUrls()) {
                    webURL.setParentDocid(docid);
                    webURL.setParentUrl(curURL.getURL());
                    int newdocid = this.docIdServer.getDocId(webURL.getURL());
                    if (newdocid > 0) {
                        webURL.setDepth((short)-1);
                        webURL.setDocid(newdocid);
                        continue;
                    }
                    webURL.setDocid(-1);
                    webURL.setDepth((short)(curURL.getDepth() + 1));
                    if (maxCrawlDepth != -1 && curURL.getDepth() >= maxCrawlDepth || !this.shouldVisit(webURL) || !this.robotstxtServer.allows(webURL)) continue;
                    webURL.setDocid(this.docIdServer.getNewDocID(webURL.getURL()));
                    toSchedule.add(webURL);
                }
                this.frontier.scheduleAll(toSchedule);
            }
            try {
                this.visit(page);
            }
            catch (Exception e) {
                logger.error((Object)("Exception while running the visit method. Message: '" + e.getMessage() + "' at " + e.getStackTrace()[0]));
            }
        }
        catch (Exception e) {
            logger.error((Object)(e.getMessage() + ", while processing: " + curURL.getURL()));
        }
        finally {
            if (fetchResult != null) {
                fetchResult.discardContentIfNotConsumed();
            }
        }
    }

    public Thread getThread() {
        return this.myThread;
    }

    public void setThread(Thread myThread) {
        this.myThread = myThread;
    }

    public boolean isNotWaitingForNewURLs() {
        return !this.isWaitingForNewURLs;
    }
}

