/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.hebrew.TokenFilters;

import com.code972.hebmorph.DescFlag;
import com.code972.hebmorph.HebrewToken;
import com.code972.hebmorph.Lemmatizer;
import com.code972.hebmorph.PrefixType;
import com.code972.hebmorph.Token;
import com.code972.hebmorph.datastructures.DictHebMorph;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hebrew.HebrewTokenTypeAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

public final class HebrewLemmatizerTokenFilter
extends TokenFilter {
    private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)this.addAttribute(PositionIncrementAttribute.class);
    private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
    private final HebrewTokenTypeAttribute hebrewTypeAtt = (HebrewTokenTypeAttribute)this.addAttribute(HebrewTokenTypeAttribute.class);
    private Lemmatizer lemmatizer;
    private List<Token> previousLemmas = new ArrayList<Token>();
    private int previousStartOffset;
    private int previousEndOffset;
    private boolean previousTolerated = false;
    private boolean lemmatizeExactHebrewWords;
    private boolean lemmatizeExactNonHebrewWords;
    private HebrewTokenTypeAttribute.HebrewType previousType;
    private final Set<String> duplicateLemmas = new HashSet<String>(20);
    private final List<HebrewToken> tokensList = new ArrayList<HebrewToken>(20);

    public HebrewLemmatizerTokenFilter(TokenStream input, DictHebMorph dict) {
        this(input, dict, true, true);
    }

    public HebrewLemmatizerTokenFilter(TokenStream input, DictHebMorph dict, boolean lemmatizeExactHebrewWords, boolean lemmatizeExactNonHebrewWords) {
        super(input);
        this.lemmatizer = new Lemmatizer(dict);
        this.lemmatizeExactHebrewWords = lemmatizeExactHebrewWords;
        this.lemmatizeExactNonHebrewWords = lemmatizeExactNonHebrewWords;
    }

    public boolean incrementToken() throws IOException {
        if (!this.previousLemmas.isEmpty()) {
            HebrewToken hebToken;
            this.clearAttributes();
            String tokenVal = this.previousType == HebrewTokenTypeAttribute.HebrewType.Hebrew || this.previousType == HebrewTokenTypeAttribute.HebrewType.Acronym || this.previousType == HebrewTokenTypeAttribute.HebrewType.Construct ? ((hebToken = (HebrewToken)this.previousLemmas.remove(0)).getLemma() == null ? hebToken.getText().substring(hebToken.getPrefixLength()) : hebToken.getLemma()) : this.previousLemmas.remove(0).getText();
            this.termAtt.setEmpty().append(tokenVal);
            this.hebrewTypeAtt.setType(HebrewTokenTypeAttribute.HebrewType.Lemma);
            this.posIncrAtt.setPositionIncrement(0);
            this.offsetAtt.setOffset(this.previousStartOffset, this.previousEndOffset);
            return true;
        }
        if (!this.input.incrementToken()) {
            return false;
        }
        if (this.hebrewTypeAtt.isNumeric() || this.hebrewTypeAtt.isExact() && (!this.lemmatizeExactHebrewWords && this.hebrewTypeAtt.isHebrew() || !this.lemmatizeExactNonHebrewWords && this.hebrewTypeAtt.getType() == HebrewTokenTypeAttribute.HebrewType.NonHebrew)) {
            return true;
        }
        this.previousLemmas.clear();
        this.duplicateLemmas.clear();
        this.previousStartOffset = this.offsetAtt.startOffset();
        this.previousEndOffset = this.offsetAtt.endOffset();
        this.previousType = this.hebrewTypeAtt.getType();
        if (this.hebrewTypeAtt.isHebrew()) {
            this.previousTolerated = false;
            String word = this.termAtt.toString();
            this.tokensList.clear();
            this.lemmatizer.lemmatize(word, this.tokensList);
            if (this.tokensList.isEmpty()) {
                this.lemmatizer.lemmatizeTolerant(word, this.tokensList);
                this.previousTolerated = true;
            }
            this.tokensList.sort(Comparator.reverseOrder());
            for (HebrewToken hebToken : this.tokensList) {
                if (!this.isValidToken(hebToken) && this.previousTolerated || !this.duplicateLemmas.add(hebToken.getLemma())) continue;
                this.previousLemmas.add(hebToken);
            }
            if (!this.tokensList.isEmpty() && this.previousLemmas.isEmpty()) {
                for (HebrewToken hebToken : this.tokensList) {
                    if (!this.duplicateLemmas.add(hebToken.getLemma())) continue;
                    this.previousLemmas.add(hebToken);
                }
            }
            if (this.previousLemmas.isEmpty()) {
                this.previousLemmas.add(new HebrewToken(this.termAtt.toString(), 0, DescFlag.D_EMPTY, word, PrefixType.PS_EMPTY, 1.0f));
            }
        } else {
            this.previousLemmas.add(new Token(this.termAtt.toString()));
        }
        return true;
    }

    public void reset() throws IOException {
        super.reset();
    }

    public boolean isValidToken(HebrewToken t) {
        if (t.getScore() < 0.7f) {
            return false;
        }
        return t.getMask() != DescFlag.D_VERB || !(t.getScore() < 0.85f);
    }
}

