/*
 * Decompiled with CFR 0.152.
 */
package io.github.cdimascio.essence.cleaners;

import io.github.cdimascio.essence.cleaners.ScoreCleaner;
import io.github.cdimascio.essence.util.NodeHeuristics;
import io.github.cdimascio.essence.util.TraversalHelpers;
import io.github.cdimascio.essence.util.TraversalHelpersKt;
import io.github.cdimascio.essence.words.StopWords;
import io.github.cdimascio.essence.words.StopWordsStatistics;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.TypeCastException;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

@Metadata(mv={1, 1, 13}, bv={1, 0, 3}, k=1, d1={"\u00006\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\u0006\n\u0002\b\u0004\u0018\u00002\u00020\u0001B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0014\u0010\u0005\u001a\u0004\u0018\u00010\u00062\b\u0010\u0007\u001a\u0004\u0018\u00010\u0006H\u0002J\u001e\u0010\b\u001a\n\u0018\u00010\u0006j\u0004\u0018\u0001`\t2\u000e\u0010\n\u001a\n\u0018\u00010\u0006j\u0004\u0018\u0001`\tJ\u001e\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\f2\u0006\u0010\u000e\u001a\u00020\u00062\u0006\u0010\u000f\u001a\u00020\u0010H\u0002J\u0012\u0010\u0011\u001a\u00020\u00102\b\u0010\u0012\u001a\u0004\u0018\u00010\u0006H\u0002J\u0012\u0010\u0013\u001a\u0004\u0018\u00010\u00062\u0006\u0010\u0007\u001a\u00020\u0006H\u0002R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0014"}, d2={"Lio/github/cdimascio/essence/cleaners/ScoreCleaner;", "", "stopWords", "Lio/github/cdimascio/essence/words/StopWords;", "(Lio/github/cdimascio/essence/words/StopWords;)V", "addSiblingsToTopNode", "Lorg/jsoup/nodes/Element;", "targetNode", "clean", "Lio/github/cdimascio/essence/scorers/ScoredElement;", "element", "getSiblingsContent", "", "", "node", "score", "", "getSiblingsScore", "topNode", "skipNonTextualTopNodes", "essence"})
public final class ScoreCleaner {
    private final StopWords stopWords;

    @Nullable
    public final Element clean(@Nullable Element element) {
        if (element == null) {
            return null;
        }
        Function1 isParagraphOrAnchor2 = clean.isParagraphOrAnchor.1.INSTANCE;
        Element topNode = this.skipNonTextualTopNodes(element);
        Element element2 = this.addSiblingsToTopNode(topNode);
        if (element2 != null) {
            Element element3;
            Element updatedElement = element3 = element2;
            Iterator iterator = updatedElement.children().iterator();
            while (iterator.hasNext()) {
                Element child;
                Element element4 = child = (Element)iterator.next();
                Intrinsics.checkExpressionValueIsNotNull((Object)element4, (String)"child");
                if (((Boolean)isParagraphOrAnchor2.invoke((Object)element4)).booleanValue() || !NodeHeuristics.INSTANCE.hasHighLinkDensity((Node)child) && !NodeHeuristics.INSTANCE.isTableOrListWithNoParagraphs(child) && NodeHeuristics.INSTANCE.isNodeThresholdMet((Node)updatedElement, child) || !child.hasParent()) continue;
                child.remove();
            }
        }
        return topNode;
    }

    private final Element skipNonTextualTopNodes(Element targetNode) {
        Node child;
        String string = targetNode.ownText();
        Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"targetNode.ownText()");
        if (StringsKt.isBlank((CharSequence)string) && targetNode.childNodeSize() == 1 && (child = (Node)targetNode.childNodes().get(0)) instanceof Element) {
            return this.skipNonTextualTopNodes((Element)child);
        }
        return targetNode;
    }

    /*
     * WARNING - void declaration
     */
    private final Element addSiblingsToTopNode(Element targetNode) {
        void $receiver$iv$iv;
        double baselineParagraphSiblingScore = this.getSiblingsScore(targetNode);
        if (targetNode == null) {
            return null;
        }
        List<Node> previousSiblings = TraversalHelpers.INSTANCE.getAllPreviousSiblings((Node)targetNode);
        Iterable $receiver$iv = previousSiblings;
        Iterable iterable = $receiver$iv;
        Collection destination$iv$iv = new ArrayList();
        for (Object element$iv$iv : $receiver$iv$iv) {
            Node it = (Node)element$iv$iv;
            if (!(it instanceof Element)) continue;
            destination$iv$iv.add(element$iv$iv);
        }
        $receiver$iv = (List)destination$iv$iv;
        for (Object element$iv : $receiver$iv) {
            Node sib;
            Node node = sib = (Node)element$iv;
            if (node == null) {
                throw new TypeCastException("null cannot be cast to non-null type org.jsoup.nodes.Element");
            }
            List<String> siblingContent = this.getSiblingsContent((Element)node, baselineParagraphSiblingScore);
            for (String content : siblingContent) {
                CharSequence charSequence = content;
                if (!(!StringsKt.isBlank((CharSequence)charSequence))) continue;
                targetNode.prependChild((Node)new TextNode(content));
            }
        }
        return targetNode;
    }

    private final List<String> getSiblingsContent(Element node, double score) {
        Elements candidateParagraphs;
        if (Intrinsics.areEqual((Object)node.tagName(), (Object)"p")) {
            String string = node.text();
            Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"node.text()");
            CharSequence charSequence = string;
            if (!StringsKt.isBlank((CharSequence)charSequence)) {
                return CollectionsKt.listOf((Object)node.text());
            }
        }
        if ((candidateParagraphs = TraversalHelpersKt.find(node, "p")).isEmpty()) {
            return CollectionsKt.emptyList();
        }
        List contents = new ArrayList();
        for (Element p : candidateParagraphs) {
            String text2;
            String string = text2 = p.text();
            Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"text");
            CharSequence charSequence = string;
            if (!(!StringsKt.isBlank((CharSequence)charSequence))) continue;
            StopWordsStatistics stats = this.stopWords.statistics(text2);
            int paragraphScore = stats.getStopWords().size();
            double siblingBaselineScore = 0.3;
            Element element = p;
            Intrinsics.checkExpressionValueIsNotNull((Object)element, (String)"p");
            boolean hasHighLinkDensity = NodeHeuristics.INSTANCE.hasHighLinkDensity((Node)element);
            double sibScore = score * siblingBaselineScore;
            if (!(sibScore < (double)paragraphScore) || hasHighLinkDensity) continue;
            contents.add(text2);
        }
        return contents;
    }

    private final double getSiblingsScore(Element topNode) {
        double base = 100000.0;
        int paragraphsNum = 0;
        double paragraphsScore = 0.0;
        if (topNode == null) {
            return base;
        }
        Elements elementsToCheck = TraversalHelpersKt.find(topNode, "p");
        for (Element element : elementsToCheck) {
            String text2;
            String string = text2 = element.text();
            Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"text");
            StopWordsStatistics stats = this.stopWords.statistics(string);
            Element element2 = element;
            Intrinsics.checkExpressionValueIsNotNull((Object)element2, (String)"element");
            boolean hasHighLinkDensity = NodeHeuristics.INSTANCE.hasHighLinkDensity((Node)element2);
            if (stats.getStopWords().size() <= 2 || hasHighLinkDensity) continue;
            ++paragraphsNum;
            paragraphsScore += (double)stats.getStopWords().size();
        }
        if (paragraphsNum > 0) {
            return paragraphsScore / (double)paragraphsNum;
        }
        return base;
    }

    public ScoreCleaner(@NotNull StopWords stopWords) {
        Intrinsics.checkParameterIsNotNull((Object)stopWords, (String)"stopWords");
        this.stopWords = stopWords;
    }
}

