/*
 * Decompiled with CFR 0.152.
 */
package org.mule.extension.webcrawler.internal.helper.page;

import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Base64;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mule.extension.webcrawler.internal.constant.Constants;
import org.mule.extension.webcrawler.internal.error.WebCrawlerErrorType;
import org.mule.extension.webcrawler.internal.helper.webdriver.WebDriverManager;
import org.mule.extension.webcrawler.internal.util.URLUtils;
import org.mule.extension.webcrawler.internal.util.Utils;
import org.mule.runtime.extension.api.error.ErrorTypeDefinition;
import org.mule.runtime.extension.api.exception.ModuleException;
import org.openqa.selenium.WebDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PageHelper {
    private static final Logger LOGGER = LoggerFactory.getLogger(PageHelper.class);

    public static Document getDocument(String url, String userAgent, String referrer) throws IOException {
        LOGGER.debug(String.format("Retrieving JSoup Document for url %s with user agent %s and referrer %s", url, userAgent, referrer));
        Connection connection = Jsoup.connect((String)url);
        if (!userAgent.isEmpty()) {
            connection.userAgent(userAgent);
        }
        if (!referrer.isEmpty()) {
            connection.referrer(referrer);
        }
        Document document = connection.get();
        return document;
    }

    public static Document getDocumentDynamic(String url, String userAgent, Boolean quitDriver) throws Exception {
        Document document = null;
        WebDriver driver = WebDriverManager.getDriver(userAgent);
        try {
            driver.get(url);
            String pageSource = driver.getPageSource();
            document = Jsoup.parse((String)pageSource, (String)url);
        }
        catch (Exception e) {
            LOGGER.error("Error in loading dynamic content: " + e.toString());
            throw e;
        }
        finally {
            if (quitDriver.booleanValue()) {
                WebDriverManager.quitDriver();
            }
        }
        return document;
    }

    public static JSONArray getPageMetaTags(Document document) {
        JSONArray metaTagArray = new JSONArray();
        Elements metaTags = document.select("meta");
        for (Element metaTag : metaTags) {
            String name = metaTag.attr("name");
            String property = metaTag.attr("property");
            String content = metaTag.attr("content");
            if (name.isEmpty() && property.isEmpty() || content.isEmpty()) continue;
            JSONObject metaTagObject = new JSONObject();
            if (!property.isEmpty()) {
                metaTagObject.put("property", (Object)property);
            } else {
                metaTagObject.put("name", (Object)name);
            }
            metaTagObject.put("content", (Object)content);
            metaTagArray.put((Object)metaTagObject);
        }
        return metaTagArray;
    }

    public static HashMap<String, Object> getPageInsights(Document document, List<String> tags, Constants.PageInsightType insight) {
        HashMap<String, Object> pageInsightData = new HashMap<String, Object>();
        try {
            HashSet<String> documentLinks = new HashSet<String>();
            HashSet<String> internalLinks = new HashSet<String>();
            HashSet<String> externalLinks = new HashSet<String>();
            HashSet<String> referenceLinks = new HashSet<String>();
            HashSet<String> imageLinks = new HashSet<String>();
            HashMap<String, HashSet<String>> linksMap = new HashMap<String, HashSet<String>>();
            HashMap<String, Integer> elementCounts = new HashMap<String, Integer>();
            String baseUrl = document.baseUri();
            if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.DOCUMENTLINKS || insight == Constants.PageInsightType.INTERNALLINKS || insight == Constants.PageInsightType.REFERENCELINKS || insight == Constants.PageInsightType.EXTERNALLINKS) {
                Elements links = document.select("a[href]");
                for (Element link : links) {
                    String href = link.absUrl("href");
                    if (URLUtils.isDocumentUrl(href)) {
                        documentLinks.add(href);
                        continue;
                    }
                    if (URLUtils.isExternalLink(baseUrl, href)) {
                        externalLinks.add(href);
                        continue;
                    }
                    if (URLUtils.isReferenceLink(baseUrl, href)) {
                        referenceLinks.add(href);
                        continue;
                    }
                    internalLinks.add(href);
                }
                if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.DOCUMENTLINKS) {
                    linksMap.put("documents", documentLinks);
                }
                if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.INTERNALLINKS) {
                    linksMap.put("internal", internalLinks);
                }
                if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.EXTERNALLINKS) {
                    linksMap.put("external", externalLinks);
                }
                if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.REFERENCELINKS) {
                    linksMap.put("reference", referenceLinks);
                }
            }
            if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.IMAGELINKS) {
                Elements images = document.select("img[src]");
                for (Element img : images) {
                    String imageUrl = img.absUrl("src");
                    imageLinks.add(imageUrl);
                }
                linksMap.put("images", imageLinks);
            }
            if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.ELEMENTCOUNTSTATS) {
                String[] elementsToCount = new String[]{"div", "p", "h1", "h2", "h3", "h4", "h5"};
                if (tags != null && !tags.isEmpty()) {
                    elementsToCount = tags.toArray(new String[tags.size()]);
                }
                for (String tag : elementsToCount) {
                    Elements elements = document.select(tag);
                    elementCounts.put(tag, elements.size());
                }
                elementCounts.put("internal", internalLinks.size());
                elementCounts.put("external", externalLinks.size());
                elementCounts.put("reference", referenceLinks.size());
                elementCounts.put("images", imageLinks.size());
                elementCounts.put("wordCount", Utils.countWords(PageHelper.getPageContent(document, tags, false)));
                pageInsightData.put("pageStats", elementCounts);
            }
            pageInsightData.put("url", document.baseUri());
            pageInsightData.put("title", document.title());
            if (insight == Constants.PageInsightType.ALL || insight == Constants.PageInsightType.DOCUMENTLINKS || insight == Constants.PageInsightType.INTERNALLINKS || insight == Constants.PageInsightType.REFERENCELINKS || insight == Constants.PageInsightType.EXTERNALLINKS || insight == Constants.PageInsightType.IMAGELINKS) {
                pageInsightData.put("links", linksMap);
            }
        }
        catch (Exception e) {
            throw new ModuleException(String.format("Error while getting page insights for %s.", document.baseUri()), (ErrorTypeDefinition)WebCrawlerErrorType.PAGE_OPERATIONS_FAILURE, (Throwable)e);
        }
        return pageInsightData;
    }

    public static String getPageContent(Document document, List<String> tags, Boolean rawHtml) {
        if (rawHtml.booleanValue()) {
            return PageHelper.getPageRawHtmlContent(document, tags);
        }
        return PageHelper.getPageContent(document, tags);
    }

    private static String getPageContent(Document document, List<String> tags) {
        StringBuilder collectedText = new StringBuilder();
        HashSet<Element> selectedElements = new HashSet<Element>();
        if (tags != null && !tags.isEmpty()) {
            for (String selector : tags) {
                Elements elements = document.select(selector);
                for (Element element : elements) {
                    if (PageHelper.isNestedInsideAnotherSelected(element, selectedElements)) continue;
                    collectedText.append(element.text()).append(" ");
                    selectedElements.add(element);
                }
            }
        } else {
            collectedText.append(document.text());
        }
        return collectedText.toString().trim();
    }

    private static String getPageRawHtmlContent(Document document, List<String> tags) {
        StringBuilder collectedHtml = new StringBuilder();
        HashSet<Element> selectedElements = new HashSet<Element>();
        if (tags != null && !tags.isEmpty()) {
            for (String selector : tags) {
                Elements elements = document.select(selector);
                for (Element element : elements) {
                    if (PageHelper.isNestedInsideAnotherSelected(element, selectedElements)) continue;
                    collectedHtml.append(element.outerHtml()).append("\n");
                    selectedElements.add(element);
                }
            }
        } else {
            collectedHtml.append(document.html());
        }
        return collectedHtml.toString().trim();
    }

    private static boolean isNestedInsideAnotherSelected(Element element, Set<Element> selectedElements) {
        for (Element selected : selectedElements) {
            if (!PageHelper.isDescendant(selected, element)) continue;
            return true;
        }
        return false;
    }

    private static boolean isDescendant(Element parent, Element element) {
        for (Element e : element.parents()) {
            if (e != parent) continue;
            return true;
        }
        return false;
    }

    public static String savePageContents(JSONObject results, String downloadPath, String title) throws IOException {
        String pageContents = results.toString();
        String fileName = "";
        String timestamp = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
        fileName = Utils.getSanitizedFilename(title) + "_" + timestamp + ".json";
        File file = new File(downloadPath, fileName);
        file.getParentFile().mkdirs();
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(file));){
            writer.write(pageContents);
            LOGGER.info("Saved content to file: " + fileName);
        }
        catch (IOException e) {
            LOGGER.error("An error occurred while writing to the file: " + e.getMessage());
        }
        return file != null ? file.getName() : "File is null";
    }

    public static JSONArray downloadWebsiteImages(Document document, String saveDirectory, int maxNumber) throws IOException {
        return PageHelper.downloadWebsiteImages(document, saveDirectory, "", maxNumber);
    }

    public static JSONArray downloadWebsiteImages(Document document, String saveDirectory, String imagesSubFolder, int maxNumber) throws IOException {
        JSONArray imagesJSONArray = new JSONArray();
        Set imageUrls = new HashSet();
        Map linksMap = (Map)PageHelper.getPageInsights(document, null, Constants.PageInsightType.IMAGELINKS).get("links");
        if (linksMap != null) {
            imageUrls = (Set)linksMap.get("images");
        }
        if (imageUrls != null) {
            LOGGER.info("Number of img[src] elements found : " + imageUrls.size());
            for (String imageUrl : imageUrls) {
                JSONObject imageJSONObject = PageHelper.downloadSingleImage(imageUrl, saveDirectory, imagesSubFolder);
                if (imageJSONObject != null) {
                    imagesJSONArray.put((Object)imageJSONObject);
                }
                if (maxNumber <= 0 || imagesJSONArray.length() < maxNumber) continue;
                break;
            }
        }
        return imagesJSONArray;
    }

    public static JSONObject downloadSingleImage(String imageUrl, String saveDirectory) throws IOException {
        return PageHelper.downloadSingleImage(imageUrl, saveDirectory, "");
    }

    public static JSONObject downloadSingleImage(String imageUrl, String saveDirectory, String imagesSubFolder) throws IOException {
        JSONObject jsonObject;
        block26: {
            LOGGER.info("Processing image: " + imageUrl);
            String imagesSaveDirectory = saveDirectory + "/" + imagesSubFolder;
            jsonObject = new JSONObject();
            try {
                jsonObject.put("url", (Object)imageUrl);
                if (imagesSubFolder.compareTo("") != 0) {
                    jsonObject.put("relativePath", (Object)imagesSubFolder);
                }
                if (imageUrl.startsWith("data:image/")) {
                    byte[] imageBytes;
                    String base64Data = imageUrl.substring(imageUrl.indexOf(",") + 1);
                    if (base64Data.isEmpty()) {
                        LOGGER.info("Base64 data is empty for URL: " + imageUrl);
                        return null;
                    }
                    try {
                        imageBytes = Base64.getDecoder().decode(base64Data);
                    }
                    catch (IllegalArgumentException e) {
                        LOGGER.info("Error decoding base64 data: " + e.getMessage());
                        return null;
                    }
                    if (imageBytes.length == 0) {
                        LOGGER.info("Decoded image bytes are empty for URL: " + imageUrl);
                        return null;
                    }
                    String mimeType = imageUrl.substring(5, imageUrl.indexOf(";"));
                    String fileExtension = mimeType.split("/")[1];
                    String timestamp = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
                    String fileName = "image_" + timestamp + "." + fileExtension;
                    File file = new File(imagesSaveDirectory, fileName);
                    file.getParentFile().mkdirs();
                    try (FileOutputStream out = new FileOutputStream(file);){
                        out.write(imageBytes);
                        LOGGER.info("Data URL image saved: " + file.getAbsolutePath());
                    }
                    jsonObject.put("fileName", (Object)fileName);
                    jsonObject.put("mimeType", (Object)mimeType);
                    break block26;
                }
                URL url = new URL(imageUrl);
                String decodedUrl = URLUtils.extractAndDecodeUrl(imageUrl);
                String fileName = URLUtils.extractFileNameFromUrl(decodedUrl);
                String mimeType = URLUtils.detectMimeTypeFromFileName(fileName);
                File file = new File(imagesSaveDirectory, fileName);
                file.getParentFile().mkdirs();
                try (InputStream in = url.openStream();
                     FileOutputStream out = new FileOutputStream(file);){
                    int bytesRead;
                    byte[] buffer = new byte[1024];
                    while ((bytesRead = in.read(buffer)) != -1) {
                        out.write(buffer, 0, bytesRead);
                    }
                }
                LOGGER.debug("Image saved: " + file.getAbsolutePath());
                jsonObject.put("fileName", (Object)fileName);
                jsonObject.put("mimeType", (Object)mimeType);
            }
            catch (IOException e) {
                LOGGER.error("Error saving image: " + imageUrl, (Throwable)e);
                return null;
            }
        }
        return jsonObject;
    }

    public static JSONArray downloadFiles(Document document, String saveDir, int maxNumber) throws IOException {
        return PageHelper.downloadFiles(document, saveDir, "", maxNumber);
    }

    public static JSONArray downloadFiles(Document document, String saveDir, String filesSubFolder, int maxNumber) throws IOException {
        JSONArray documentsJSONArray = new JSONArray();
        Set documentURLs = new HashSet();
        HashMap linkFileMap = new HashMap();
        Map linksMap = (Map)PageHelper.getPageInsights(document, null, Constants.PageInsightType.DOCUMENTLINKS).get("links");
        if (linksMap != null) {
            documentURLs = (Set)linksMap.get("documents");
        }
        if (documentURLs != null) {
            LOGGER.debug("Number of documents found : " + documentURLs.size());
            for (String documentURL : documentURLs) {
                JSONObject documentJSONObject = PageHelper.downloadFile(documentURL, saveDir, filesSubFolder);
                if (documentJSONObject != null) {
                    documentsJSONArray.put((Object)documentJSONObject);
                }
                if (maxNumber <= 0 || documentsJSONArray.length() < maxNumber) continue;
                break;
            }
        }
        return documentsJSONArray;
    }

    public static JSONObject downloadFile(String fileURL, String saveDir) {
        return PageHelper.downloadFile(fileURL, saveDir, "");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static JSONObject downloadFile(String fileURL, String saveDir, String filesSubFolder) {
        JSONObject jsonObject;
        String fileName;
        block26: {
            String docsSaveDirectory = saveDir + "/" + filesSubFolder;
            HttpURLConnection httpConn = null;
            fileName = null;
            jsonObject = new JSONObject();
            try {
                jsonObject.put("url", (Object)fileURL);
                if (filesSubFolder.compareTo("") != 0) {
                    jsonObject.put("relativePath", (Object)filesSubFolder);
                }
                URL url = new URL(fileURL);
                httpConn = (HttpURLConnection)url.openConnection();
                httpConn.setRequestMethod("GET");
                int responseCode = httpConn.getResponseCode();
                if (responseCode == 200) {
                    String disposition = httpConn.getHeaderField("Content-Disposition");
                    if (disposition != null && disposition.contains("filename=")) {
                        int index = disposition.indexOf("filename=");
                        fileName = disposition.substring(index + 9).replaceAll("\"", "");
                    } else {
                        String urlPath = fileURL.split("\\?")[0];
                        fileName = urlPath.substring(urlPath.lastIndexOf("/") + 1);
                    }
                    fileName = URLDecoder.decode(fileName, StandardCharsets.UTF_8.name());
                    LOGGER.debug(String.format("Downloading file %s at %s", fileName, fileURL));
                    File directory = new File(docsSaveDirectory);
                    if (!directory.exists()) {
                        if (directory.mkdirs()) {
                            LOGGER.debug("Directory created: " + directory.getAbsolutePath());
                        } else {
                            LOGGER.error("Failed to create directory: " + directory.getAbsolutePath());
                            JSONObject jSONObject = null;
                            return jSONObject;
                        }
                    }
                    try (BufferedInputStream inputStream = new BufferedInputStream(httpConn.getInputStream());
                         FileOutputStream outputStream = new FileOutputStream(docsSaveDirectory + fileName);){
                        int bytesRead;
                        byte[] buffer = new byte[4096];
                        while ((bytesRead = ((InputStream)inputStream).read(buffer)) != -1) {
                            outputStream.write(buffer, 0, bytesRead);
                        }
                        LOGGER.debug("File downloaded: " + docsSaveDirectory + fileName);
                        break block26;
                    }
                }
                LOGGER.debug("No file to download. Server replied HTTP code: " + responseCode);
            }
            catch (IOException e) {
                LOGGER.error("Error downloading file: " + e.getMessage());
            }
            finally {
                if (httpConn != null) {
                    httpConn.disconnect();
                }
            }
        }
        if (fileName == null) {
            return null;
        }
        jsonObject.put("fileName", fileName);
        String mimeType = URLUtils.detectMimeTypeFromFileName(fileName);
        jsonObject.put("mimeType", (Object)mimeType);
        return jsonObject;
    }
}

