/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.lingo;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.util.Arrays;
import java.util.List;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.clustering.lingo.IFeatureScorer;
import org.carrot2.clustering.lingo.ILabelAssigner;
import org.carrot2.clustering.lingo.LingoProcessingContext;
import org.carrot2.clustering.lingo.SimpleLabelAssigner;
import org.carrot2.clustering.lingo.UniqueLabelAssigner;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.vsm.ITermWeighting;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.GraphUtils;
import org.carrot2.util.LinearApproximation;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="LingoClusteringAlgorithm")
public class ClusterBuilder {
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=10.0)
    @Group(value="Labels")
    @Level(value=AttributeLevel.MEDIUM)
    @Label(value="Phrase label boost")
    public double phraseLabelBoost = 1.5;
    @Input
    @Processing
    @Attribute
    @IntRange(min=2, max=8)
    @Group(value="Labels")
    @Level(value=AttributeLevel.ADVANCED)
    @Label(value="Phrase length penalty start")
    public int phraseLengthPenaltyStart = 8;
    @Input
    @Processing
    @Attribute
    @IntRange(min=2, max=8)
    @Group(value="Labels")
    @Level(value=AttributeLevel.ADVANCED)
    @Label(value="Phrase length penalty stop")
    public int phraseLengthPenaltyStop = 8;
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Group(value="Clusters")
    @Level(value=AttributeLevel.MEDIUM)
    @Label(value="Cluster merging threshold")
    public double clusterMergingThreshold = 0.7;
    public IFeatureScorer featureScorer = null;
    @Input
    @Processing
    @Attribute
    @Required
    @ImplementingClasses(classes={UniqueLabelAssigner.class, SimpleLabelAssigner.class})
    @Group(value="Labels")
    @Level(value=AttributeLevel.ADVANCED)
    @Label(value="Cluster label assignment method")
    public ILabelAssigner labelAssigner = new UniqueLabelAssigner();
    private LinearApproximation documentSizeCoefficients = new LinearApproximation(new double[]{1.0, 1.5, 1.3, 0.9, 0.7, 0.6, 0.3, 0.05, 0.05, 0.05, 0.05}, 0.0, 1.0);

    void buildLabels(LingoProcessingContext context, ITermWeighting termWeighting) {
        int featureIndex;
        PreprocessingContext preprocessingContext = context.preprocessingContext;
        VectorSpaceModelContext vsmContext = context.vsmContext;
        DoubleMatrix2D reducedTdMatrix = context.reducedVsmContext.baseMatrix;
        int[] wordsStemIndex = preprocessingContext.allWords.stemIndex;
        int[] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex;
        int[] mostFrequentOriginalWordIndex = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
        int[][] phrasesWordIndices = preprocessingContext.allPhrases.wordIndices;
        BitSet[] labelsDocumentIndices = preprocessingContext.allLabels.documentIndices;
        int wordCount = preprocessingContext.allWords.image.length;
        int documentCount = preprocessingContext.documents.size();
        BitSet oneWordCandidateStemIndices = new BitSet();
        for (int i = 0; i < labelsFeatureIndex.length && (featureIndex = labelsFeatureIndex[i]) < wordCount; ++i) {
            oneWordCandidateStemIndices.set((long)wordsStemIndex[featureIndex]);
        }
        IntIntHashMap stemToRowIndex = vsmContext.stemToRowIndex;
        IntIntHashMap filteredRowToStemIndex = new IntIntHashMap();
        IntArrayList filteredRows = new IntArrayList();
        int filteredRowIndex = 0;
        for (IntIntCursor it : stemToRowIndex) {
            if (!oneWordCandidateStemIndices.get(it.key)) continue;
            filteredRowToStemIndex.put(filteredRowIndex++, it.key);
            filteredRows.add(it.value);
        }
        double[] featureScores = this.featureScorer != null ? this.featureScorer.getFeatureScores(context) : null;
        int[] wordLabelIndex = new int[wordCount];
        Arrays.fill(wordLabelIndex, -1);
        for (int i = 0; i < labelsFeatureIndex.length; ++i) {
            int featureIndex2 = labelsFeatureIndex[i];
            if (featureIndex2 >= wordCount) continue;
            wordLabelIndex[featureIndex2] = i;
        }
        DoubleMatrix2D stemCos = reducedTdMatrix.viewSelection(filteredRows.toArray(), null).copy();
        for (int r = 0; r < stemCos.rows(); ++r) {
            int labelIndex = wordLabelIndex[mostFrequentOriginalWordIndex[filteredRowToStemIndex.get(r)]];
            double penalty = this.getDocumentCountPenalty(labelIndex, documentCount, labelsDocumentIndices);
            if (featureScores != null) {
                penalty *= featureScores[labelIndex];
            }
            stemCos.viewRow(r).assign(Functions.mult((double)penalty));
        }
        DoubleMatrix2D phraseMatrix = vsmContext.termPhraseMatrix;
        int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex;
        DoubleMatrix2D phraseCos = null;
        if (phraseMatrix != null) {
            phraseCos = phraseMatrix.zMult(reducedTdMatrix, null, 1.0, 0.0, false, false);
            if (this.phraseLengthPenaltyStop < this.phraseLengthPenaltyStart) {
                this.phraseLengthPenaltyStop = this.phraseLengthPenaltyStart;
            }
            double penaltyStep = 1.0 / (double)(this.phraseLengthPenaltyStop - this.phraseLengthPenaltyStart + 1);
            for (int row = 0; row < phraseCos.rows(); ++row) {
                double penalty;
                int phraseFeature = labelsFeatureIndex[row + firstPhraseIndex];
                int[] phraseWordIndices = phrasesWordIndices[phraseFeature - wordCount];
                if (phraseWordIndices.length >= this.phraseLengthPenaltyStop) {
                    penalty = 0.0;
                } else {
                    penalty = this.getDocumentCountPenalty(row + firstPhraseIndex, documentCount, labelsDocumentIndices);
                    if (phraseWordIndices.length >= this.phraseLengthPenaltyStart) {
                        penalty *= 1.0 - penaltyStep * (double)(phraseWordIndices.length - this.phraseLengthPenaltyStart + 1);
                    }
                    if (featureScores != null) {
                        penalty *= featureScores[row + firstPhraseIndex];
                    }
                }
                phraseCos.viewRow(row).assign(Functions.mult((double)(penalty * this.phraseLabelBoost)));
            }
        }
        this.labelAssigner.assignLabels(context, stemCos, filteredRowToStemIndex, phraseCos);
    }

    private double getDocumentCountPenalty(int labelIndex, int documentCount, BitSet[] labelsDocumentIndices) {
        return this.documentSizeCoefficients.getValue((double)labelsDocumentIndices[labelIndex].cardinality() / (double)documentCount);
    }

    void assignDocuments(LingoProcessingContext context) {
        int[] clusterLabelFeatureIndex = context.clusterLabelFeatureIndex;
        BitSet[] clusterDocuments = new BitSet[clusterLabelFeatureIndex.length];
        int[] labelsFeatureIndex = context.preprocessingContext.allLabels.featureIndex;
        BitSet[] documentIndices = context.preprocessingContext.allLabels.documentIndices;
        IntIntHashMap featureValueToIndex = new IntIntHashMap();
        for (int i = 0; i < labelsFeatureIndex.length; ++i) {
            featureValueToIndex.put(labelsFeatureIndex[i], i);
        }
        for (int clusterIndex = 0; clusterIndex < clusterDocuments.length; ++clusterIndex) {
            clusterDocuments[clusterIndex] = documentIndices[featureValueToIndex.get(clusterLabelFeatureIndex[clusterIndex])];
        }
        context.clusterDocuments = clusterDocuments;
    }

    void merge(LingoProcessingContext context) {
        final BitSet[] clusterDocuments = context.clusterDocuments;
        int[] clusterLabelFeatureIndex = context.clusterLabelFeatureIndex;
        double[] clusterLabelScore = context.clusterLabelScore;
        List<IntArrayList> mergedClusters = GraphUtils.findCoherentSubgraphs(clusterDocuments.length, new GraphUtils.IArcPredicate(){
            private BitSet temp = new BitSet();

            @Override
            public boolean isArcPresent(int clusterA, int clusterB) {
                int size;
                this.temp.clear();
                BitSet setA = clusterDocuments[clusterA];
                BitSet setB = clusterDocuments[clusterB];
                if (setA.cardinality() < setB.cardinality()) {
                    this.temp.or(setA);
                    this.temp.intersect(setB);
                    size = (int)setB.cardinality();
                } else {
                    this.temp.or(setB);
                    this.temp.intersect(setA);
                    size = (int)setA.cardinality();
                }
                return (double)this.temp.cardinality() / (double)size >= ClusterBuilder.this.clusterMergingThreshold;
            }
        }, true);
        for (IntArrayList clustersToMerge : mergedClusters) {
            int clusterIndex;
            int i;
            int mergeBaseClusterIndex = -1;
            double maxScore = -1.0;
            int[] buf = clustersToMerge.buffer;
            int max = clustersToMerge.size();
            for (i = 0; i < max; ++i) {
                clusterIndex = buf[i];
                if (!(clusterLabelScore[clusterIndex] > maxScore)) continue;
                mergeBaseClusterIndex = clusterIndex;
                maxScore = clusterLabelScore[clusterIndex];
            }
            for (i = 0; i < max; ++i) {
                clusterIndex = buf[i];
                if (clusterIndex == mergeBaseClusterIndex) continue;
                clusterDocuments[mergeBaseClusterIndex].or(clusterDocuments[clusterIndex]);
                clusterLabelFeatureIndex[clusterIndex] = -1;
                clusterDocuments[clusterIndex] = null;
            }
        }
    }
}

