/*
 * Decompiled with CFR 0.152.
 */
package com.mule.mulechain.crawler.internal.helpers;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class crawlingHelper {
    private static final Logger LOGGER = LoggerFactory.getLogger(crawlingHelper.class);

    public static Document getDocument(String url) throws IOException {
        Document document = Jsoup.connect((String)url).get();
        return document;
    }

    public static Document getDocumentDynamic(String url) throws Exception {
        Document document = null;
        ChromeOptions options = new ChromeOptions();
        options.addArguments(new String[]{"--headless"});
        options.addArguments(new String[]{"--disable-gpu"});
        options.addArguments(new String[]{"--no-sandbox"});
        options.addArguments(new String[]{"--disable-dev-shm-usage"});
        options.addArguments(new String[]{"--allow-running-insecure-content"});
        ChromeDriver driver = new ChromeDriver(options);
        try {
            driver.get(url);
            String pageSource = driver.getPageSource();
            document = Jsoup.parse((String)pageSource, (String)url);
        }
        catch (Exception e) {
            LOGGER.error("Error in loading dynamic content: " + e.toString());
            throw e;
        }
        finally {
            driver.quit();
        }
        return document;
    }

    public static String extractFileNameFromUrl(String url) {
        String fileName = url.substring(url.lastIndexOf("/") + 1, url.indexOf(63) > 0 ? url.indexOf(63) : url.length());
        return fileName.contains(".") ? fileName : fileName + ".jpg";
    }

    public static String extractAndDecodeUrl(String fullUrl) throws UnsupportedEncodingException, MalformedURLException {
        URL url = new URL(fullUrl);
        String query = url.getQuery();
        if (query != null) {
            String[] params;
            for (String param : params = query.split("&")) {
                String[] pair = param.split("=");
                if (pair.length != 2 || !"url".equals(pair[0])) continue;
                return URLDecoder.decode(pair[1], StandardCharsets.UTF_8.name());
            }
            return fullUrl;
        }
        return fullUrl;
    }

    public static String convertToJSON(Object contentToSerialize) throws JsonProcessingException {
        ObjectMapper mapper = new ObjectMapper();
        return mapper.writeValueAsString(contentToSerialize);
    }

    public static Map<String, String> getPageMetaTags(Document document) {
        HashMap<String, String> metaTagData = new HashMap<String, String>();
        Elements metaTags = document.select("meta");
        for (Element metaTag : metaTags) {
            String name = metaTag.attr("name");
            if (name.isEmpty()) {
                name = metaTag.attr("property");
            }
            String content = metaTag.attr("content");
            if (name.isEmpty() || content.isEmpty()) continue;
            metaTagData.put(name, content);
        }
        return metaTagData;
    }

    public static Map<String, Object> getPageInsights(Document document, List<String> tags, PageInsightType insight) throws MalformedURLException {
        HashMap<String, Object> pageInsightData = new HashMap<String, Object>();
        HashSet<String> internalLinks = new HashSet<String>();
        HashSet<String> externalLinks = new HashSet<String>();
        HashSet<String> referenceLinks = new HashSet<String>();
        HashSet<String> imageLinks = new HashSet<String>();
        HashMap<String, HashSet<String>> linksMap = new HashMap<String, HashSet<String>>();
        HashMap<String, Integer> elementCounts = new HashMap<String, Integer>();
        String baseUrl = document.baseUri();
        if (insight == PageInsightType.ALL || insight == PageInsightType.INTERNALLINKS || insight == PageInsightType.REFERENCELINKS || insight == PageInsightType.EXTERNALLINKS) {
            Elements links = document.select("a[href]");
            for (Element link : links) {
                String href = link.absUrl("href");
                if (crawlingHelper.isExternalLink(baseUrl, href)) {
                    externalLinks.add(href);
                    continue;
                }
                if (crawlingHelper.isReferenceLink(baseUrl, href)) {
                    referenceLinks.add(href);
                    continue;
                }
                internalLinks.add(href);
            }
            if (insight == PageInsightType.ALL || insight == PageInsightType.INTERNALLINKS) {
                linksMap.put("internal", internalLinks);
            }
            if (insight == PageInsightType.ALL || insight == PageInsightType.EXTERNALLINKS) {
                linksMap.put("external", externalLinks);
            }
            if (insight == PageInsightType.ALL || insight == PageInsightType.REFERENCELINKS) {
                linksMap.put("reference", referenceLinks);
            }
        }
        if (insight == PageInsightType.ALL || insight == PageInsightType.IMAGELINKS) {
            Elements images = document.select("img[src]");
            for (Element img : images) {
                String imageUrl = img.absUrl("src");
                imageLinks.add(imageUrl);
            }
            linksMap.put("images", imageLinks);
        }
        if (insight == PageInsightType.ALL || insight == PageInsightType.ELEMENTCOUNTSTATS) {
            String[] elementsToCount = new String[]{"div", "p", "h1", "h2", "h3", "h4", "h5"};
            if (tags != null && !tags.isEmpty()) {
                elementsToCount = tags.toArray(new String[tags.size()]);
            }
            for (String tag : elementsToCount) {
                Elements elements = document.select(tag);
                elementCounts.put(tag, elements.size());
            }
            elementCounts.put("internal", internalLinks.size());
            elementCounts.put("external", externalLinks.size());
            elementCounts.put("reference", referenceLinks.size());
            elementCounts.put("images", imageLinks.size());
            elementCounts.put("wordCount", crawlingHelper.countWords(crawlingHelper.getPageContent(document, tags)));
            pageInsightData.put("pageStats", elementCounts);
        }
        pageInsightData.put("url", document.baseUri());
        pageInsightData.put("title", document.title());
        if (insight == PageInsightType.ALL || insight == PageInsightType.INTERNALLINKS || insight == PageInsightType.REFERENCELINKS || insight == PageInsightType.EXTERNALLINKS || insight == PageInsightType.IMAGELINKS) {
            pageInsightData.put("links", linksMap);
        }
        return pageInsightData;
    }

    public static String getPageContent(Document document, List<String> tags) {
        StringBuilder collectedText = new StringBuilder();
        if (tags != null && !tags.isEmpty()) {
            for (String selector : tags) {
                Elements elements = document.select(selector);
                for (Element element : elements) {
                    collectedText.append(element.text()).append(" ");
                }
            }
        } else {
            String textContent = document.text();
            collectedText.append(textContent);
        }
        return collectedText.toString().trim();
    }

    private static int countWords(String text) {
        if (text == null || text.trim().isEmpty()) {
            return 0;
        }
        String[] words = text.trim().split("\\s+");
        return words.length;
    }

    public static String getSanitizedFilename(String title) {
        return title.replaceAll("[\\\\/:*?\"<>|]", "_").replaceAll(" ", "");
    }

    private static boolean isReferenceLink(String baseUrl, String linkToCheck) {
        try {
            URI baseUri = new URI(baseUrl);
            URI linkUri = new URI(linkToCheck);
            return baseUri.getScheme().equals(linkUri.getScheme()) && baseUri.getHost().equals(linkUri.getHost()) && baseUri.getPath().equals(linkUri.getPath()) && linkUri.getFragment() != null;
        }
        catch (URISyntaxException e) {
            LOGGER.error(e.toString());
            return false;
        }
    }

    private static boolean isExternalLink(String baseUrl, String linkToCheck) throws MalformedURLException {
        URL parsedUrl = new URL(baseUrl);
        String baseDomain = parsedUrl.getHost();
        return !linkToCheck.contains(baseDomain);
    }

    public static void addDelay(int delayMillis) {
        if (delayMillis > 0) {
            try {
                LOGGER.info("Adding delay of " + delayMillis + " ms before fetching contents for the next URL.");
                Thread.sleep(delayMillis);
            }
            catch (InterruptedException e) {
                LOGGER.error("Thread interrupted during delay: " + e.getMessage());
                Thread.currentThread().interrupt();
            }
        }
    }

    public static enum PageInsightType {
        ALL,
        INTERNALLINKS,
        EXTERNALLINKS,
        REFERENCELINKS,
        IMAGELINKS,
        ELEMENTCOUNTSTATS;

    }
}

