/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler;

import java.io.Closeable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Resource;
import org.apache.commons.io.IOUtils;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.lang.SystemUtil;
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.CrawlerStatus;
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
import org.codelibs.fess.crawler.client.CrawlerClient;
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
import org.codelibs.fess.crawler.container.CrawlerContainer;
import org.codelibs.fess.crawler.entity.AccessResult;
import org.codelibs.fess.crawler.entity.RequestData;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.UrlQueue;
import org.codelibs.fess.crawler.exception.ChildUrlsException;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.helper.LogHelper;
import org.codelibs.fess.crawler.log.LogType;
import org.codelibs.fess.crawler.processor.ResponseProcessor;
import org.codelibs.fess.crawler.rule.Rule;
import org.codelibs.fess.crawler.service.DataService;
import org.codelibs.fess.crawler.service.UrlQueueService;
import org.codelibs.fess.crawler.util.CrawlingParameterUtil;

public class CrawlerThread
implements Runnable {
    @Resource
    protected UrlQueueService<UrlQueue<?>> urlQueueService;
    @Resource
    protected DataService<AccessResult<?>> dataService;
    @Resource
    protected CrawlerContainer crawlerContainer;
    @Resource
    protected LogHelper logHelper;
    protected CrawlerClientFactory clientFactory;
    protected CrawlerContext crawlerContext;
    protected boolean noWaitOnFolder = false;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void startCrawling() {
        Object object = this.crawlerContext.activeThreadCountLock;
        synchronized (object) {
            CrawlerContext crawlerContext = this.crawlerContext;
            Integer n = crawlerContext.activeThreadCount;
            Integer n2 = crawlerContext.activeThreadCount = Integer.valueOf(crawlerContext.activeThreadCount + 1);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void finishCrawling() {
        Object object = this.crawlerContext.activeThreadCountLock;
        synchronized (object) {
            CrawlerContext crawlerContext = this.crawlerContext;
            Integer n = crawlerContext.activeThreadCount;
            Integer n2 = crawlerContext.activeThreadCount = Integer.valueOf(crawlerContext.activeThreadCount - 1);
        }
    }

    protected boolean isContinue(int tcCount) {
        if (!this.crawlerContainer.available()) {
            return false;
        }
        boolean isContinue = false;
        if (tcCount < this.crawlerContext.maxThreadCheckCount) {
            long maxAccessCount = this.crawlerContext.getMaxAccessCount();
            if (maxAccessCount > 0L && this.crawlerContext.getAccessCount() >= maxAccessCount) {
                return false;
            }
            isContinue = true;
        }
        if (!isContinue && this.crawlerContext.activeThreadCount > 0) {
            return true;
        }
        return isContinue;
    }

    protected void log(LogHelper logHelper, LogType key, Object ... objs) {
        if (logHelper != null) {
            logHelper.log(key, objs);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void run() {
        this.log(this.logHelper, LogType.START_THREAD, this.crawlerContext);
        int threadCheckCount = 0;
        CrawlingParameterUtil.setCrawlerContext(this.crawlerContext);
        CrawlingParameterUtil.setUrlQueueService(this.urlQueueService);
        CrawlingParameterUtil.setDataService(this.dataService);
        try {
            while (this.crawlerContext.getStatus() != CrawlerStatus.DONE && this.isContinue(threadCheckCount)) {
                block28: {
                    UrlQueue<?> urlQueue = this.urlQueueService.poll(this.crawlerContext.sessionId);
                    if (this.isValid(urlQueue)) {
                        ResponseData responseData = null;
                        this.log(this.logHelper, LogType.START_CRAWLING, this.crawlerContext, urlQueue);
                        try {
                            boolean contentUpdated;
                            CrawlerClient client = this.getClient(urlQueue.getUrl());
                            if (client == null) {
                                this.log(this.logHelper, LogType.UNSUPPORTED_URL_AT_CRAWLING_STARTED, this.crawlerContext, urlQueue);
                                continue;
                            }
                            this.startCrawling();
                            CrawlingParameterUtil.setUrlQueue(urlQueue);
                            if (this.crawlerContext.intervalController != null) {
                                this.crawlerContext.intervalController.delay(1);
                            }
                            if (contentUpdated = this.isContentUpdated(client, urlQueue)) {
                                this.log(this.logHelper, LogType.GET_CONTENT, this.crawlerContext, urlQueue);
                                long startTime = SystemUtil.currentTimeMillis();
                                responseData = client.execute(RequestDataBuilder.newRequestData().method(urlQueue.getMethod()).url(urlQueue.getUrl()).build());
                                responseData.setExecutionTime(SystemUtil.currentTimeMillis() - startTime);
                                responseData.setParentUrl(urlQueue.getParentUrl());
                                responseData.setSessionId(this.crawlerContext.sessionId);
                                if (responseData.getRedirectLocation() == null) {
                                    this.log(this.logHelper, LogType.PROCESS_RESPONSE, this.crawlerContext, urlQueue, responseData);
                                    this.processResponse(urlQueue, responseData);
                                } else {
                                    this.log(this.logHelper, LogType.REDIRECT_LOCATION, this.crawlerContext, urlQueue, responseData);
                                    this.storeChildUrl(responseData.getRedirectLocation(), urlQueue.getUrl(), null, urlQueue.getDepth() == null ? 1 : urlQueue.getDepth() + 1);
                                }
                            }
                            this.log(this.logHelper, LogType.FINISHED_CRAWLING, this.crawlerContext, urlQueue);
                            break block28;
                        }
                        catch (ChildUrlsException e) {
                            try {
                                Set<RequestData> childUrlSet = e.getChildUrlList();
                                this.log(this.logHelper, LogType.PROCESS_CHILD_URLS_BY_EXCEPTION, this.crawlerContext, urlQueue, childUrlSet);
                                this.storeChildUrls(childUrlSet, urlQueue.getUrl(), urlQueue.getDepth() == null ? 1 : urlQueue.getDepth() + 1);
                            }
                            catch (Exception e1) {
                                this.log(this.logHelper, LogType.CRAWLING_EXCETPION, this.crawlerContext, urlQueue, e1);
                            }
                            if (this.noWaitOnFolder) {
                                continue;
                            }
                            break block28;
                        }
                        catch (CrawlingAccessException e) {
                            this.log(this.logHelper, LogType.CRAWLING_ACCESS_EXCEPTION, this.crawlerContext, urlQueue, e);
                            break block28;
                        }
                        catch (Throwable e) {
                            this.log(this.logHelper, LogType.CRAWLING_EXCETPION, this.crawlerContext, urlQueue, e);
                            break block28;
                        }
                        finally {
                            this.addSitemapsFromRobotsTxt(urlQueue);
                            if (responseData != null) {
                                IOUtils.closeQuietly(responseData);
                            }
                            if (this.crawlerContext.intervalController != null) {
                                this.crawlerContext.intervalController.delay(2);
                            }
                            threadCheckCount = 0;
                            CrawlingParameterUtil.setUrlQueue(null);
                            this.finishCrawling();
                            continue;
                        }
                    }
                    this.log(this.logHelper, LogType.NO_URL_IN_QUEUE, this.crawlerContext, urlQueue, threadCheckCount);
                    if (this.crawlerContext.intervalController != null) {
                        this.crawlerContext.intervalController.delay(4);
                    }
                    ++threadCheckCount;
                }
                if (this.crawlerContext.intervalController == null) continue;
                this.crawlerContext.intervalController.delay(8);
            }
        }
        catch (Throwable t) {
            this.log(this.logHelper, LogType.SYSTEM_ERROR, t);
        }
        finally {
            CrawlingParameterUtil.setCrawlerContext(null);
            CrawlingParameterUtil.setUrlQueueService(null);
            CrawlingParameterUtil.setDataService(null);
        }
        this.log(this.logHelper, LogType.FINISHED_THREAD, this.crawlerContext);
    }

    protected void addSitemapsFromRobotsTxt(UrlQueue<?> urlQueue) {
        String[] sitemaps = this.crawlerContext.removeSitemaps();
        if (sitemaps != null) {
            for (String childUrl : sitemaps) {
                try {
                    this.storeChildUrl(childUrl, urlQueue.getUrl(), null, urlQueue.getDepth() == null ? 1 : urlQueue.getDepth() + 1);
                }
                catch (Exception e) {
                    this.log(this.logHelper, LogType.PROCESS_CHILD_URL_BY_EXCEPTION, this.crawlerContext, urlQueue, childUrl, e);
                }
            }
        }
    }

    protected CrawlerClient getClient(String url) {
        return this.clientFactory.getClient(url);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected boolean isContentUpdated(CrawlerClient client, UrlQueue<?> urlQueue) {
        if (urlQueue.getLastModified() != null) {
            ResponseData responseData;
            block5: {
                boolean bl;
                block6: {
                    this.log(this.logHelper, LogType.CHECK_LAST_MODIFIED, this.crawlerContext, urlQueue);
                    long startTime = SystemUtil.currentTimeMillis();
                    responseData = null;
                    try {
                        responseData = client.execute(RequestDataBuilder.newRequestData().head().url(urlQueue.getUrl()).build());
                        if (responseData == null || responseData.getLastModified() == null || responseData.getLastModified().getTime() > urlQueue.getLastModified() || responseData.getHttpStatusCode() != 200) break block5;
                        this.log(this.logHelper, LogType.NOT_MODIFIED, this.crawlerContext, urlQueue);
                        responseData.setExecutionTime(SystemUtil.currentTimeMillis() - startTime);
                        responseData.setParentUrl(urlQueue.getParentUrl());
                        responseData.setSessionId(this.crawlerContext.sessionId);
                        responseData.setStatus(304);
                        responseData.setHttpStatusCode(304);
                        this.processResponse(urlQueue, responseData);
                        bl = false;
                        if (responseData == null) break block6;
                    }
                    catch (Throwable throwable) {
                        if (responseData != null) {
                            IOUtils.closeQuietly(responseData);
                        }
                        throw throwable;
                    }
                    IOUtils.closeQuietly((Closeable)responseData);
                }
                return bl;
            }
            if (responseData != null) {
                IOUtils.closeQuietly((Closeable)responseData);
            }
        }
        return true;
    }

    protected void processResponse(UrlQueue<?> urlQueue, ResponseData responseData) {
        Rule rule = this.crawlerContext.ruleManager.getRule(responseData);
        if (rule == null) {
            this.log(this.logHelper, LogType.NO_RULE, this.crawlerContext, urlQueue, responseData);
        } else {
            responseData.setRuleId(rule.getRuleId());
            ResponseProcessor responseProcessor = rule.getResponseProcessor();
            if (responseProcessor == null) {
                this.log(this.logHelper, LogType.NO_RESPONSE_PROCESSOR, this.crawlerContext, urlQueue, responseData, rule);
            } else {
                responseProcessor.process(responseData);
            }
        }
    }

    protected void storeChildUrls(Set<RequestData> childUrlList, String url, int depth) {
        if (this.crawlerContext.getMaxDepth() >= 0 && depth > this.crawlerContext.getMaxDepth()) {
            return;
        }
        HashSet urlSet = new HashSet();
        List childList = childUrlList.stream().filter(d -> StringUtil.isNotBlank((String)d.getUrl()) && urlSet.add(d.getUrl() + "\n" + d.getMetaData()) && this.crawlerContext.urlFilter.match(d.getUrl())).map(d -> {
            UrlQueue uq = (UrlQueue)this.crawlerContainer.getComponent("urlQueue");
            uq.setCreateTime(SystemUtil.currentTimeMillis());
            uq.setDepth(depth);
            uq.setMethod("GET");
            uq.setParentUrl(url);
            uq.setSessionId(this.crawlerContext.sessionId);
            uq.setUrl(d.getUrl());
            uq.setMetaData(d.getMetaData());
            return uq;
        }).collect(Collectors.toList());
        this.urlQueueService.offerAll(this.crawlerContext.sessionId, childList);
    }

    protected void storeChildUrl(String childUrl, String parentUrl, String metaData, int depth) {
        if (this.crawlerContext.getMaxDepth() >= 0 && depth > this.crawlerContext.getMaxDepth()) {
            return;
        }
        if (StringUtil.isNotBlank((String)childUrl) && this.crawlerContext.urlFilter.match(childUrl)) {
            ArrayList<UrlQueue> childList = new ArrayList<UrlQueue>(1);
            UrlQueue uq = (UrlQueue)this.crawlerContainer.getComponent("urlQueue");
            uq.setCreateTime(SystemUtil.currentTimeMillis());
            uq.setDepth(depth);
            uq.setMethod("GET");
            uq.setParentUrl(parentUrl);
            uq.setSessionId(this.crawlerContext.sessionId);
            uq.setUrl(childUrl);
            uq.setMetaData(metaData);
            childList.add(uq);
            this.urlQueueService.offerAll(this.crawlerContext.sessionId, childList);
        }
    }

    protected boolean isValid(UrlQueue<?> urlQueue) {
        if (urlQueue == null) {
            return false;
        }
        if (StringUtil.isBlank((String)urlQueue.getUrl())) {
            return false;
        }
        if (this.crawlerContext.getMaxDepth() >= 0 && urlQueue.getDepth() > this.crawlerContext.getMaxDepth()) {
            return false;
        }
        return this.crawlerContext.urlFilter.match(urlQueue.getUrl());
    }

    public boolean isNoWaitOnFolder() {
        return this.noWaitOnFolder;
    }

    public void setNoWaitOnFolder(boolean noWaitOnFolder) {
        this.noWaitOnFolder = noWaitOnFolder;
    }
}

