/*
 * Decompiled with CFR 0.152.
 */
package com.code972.hebmorph;

import com.code972.hebmorph.DescFlag;
import com.code972.hebmorph.HebrewToken;
import com.code972.hebmorph.HebrewUtils;
import com.code972.hebmorph.Lemmatizer;
import com.code972.hebmorph.PrefixType;
import com.code972.hebmorph.Reference;
import com.code972.hebmorph.Token;
import com.code972.hebmorph.Tokenizer;
import com.code972.hebmorph.datastructures.DictHebMorph;
import com.code972.hebmorph.datastructures.DictRadix;
import java.io.IOException;
import java.io.Reader;
import java.util.List;

public class StreamLemmatizer
extends Lemmatizer {
    private final Tokenizer _tokenizer;
    private int _startOffset;
    private int _endOffset;
    private boolean tolerateWhenLemmatizingStream = true;

    public StreamLemmatizer(Reader input, DictHebMorph dict) {
        this(input, dict, null);
    }

    public StreamLemmatizer(Reader input, DictHebMorph dict, DictRadix<Byte> specialTokenizationCases) {
        super(dict);
        this._tokenizer = new Tokenizer(input, dict == null ? null : dict.getPref(), specialTokenizationCases);
    }

    public void reset(Reader input) {
        this._tokenizer.reset(input);
        this._startOffset = 0;
        this._endOffset = 0;
    }

    public int getStartOffset() {
        return this._startOffset;
    }

    public int getEndOffset() {
        return this._endOffset;
    }

    public void setSuffixForExactMatch(Character suffixForExactMatch) {
        this._tokenizer.setSuffixForExactMatch(suffixForExactMatch);
    }

    public final int getLemmatizeNextToken(Reference<String> nextToken, List<Token> retTokens) throws IOException {
        int tokenType;
        block12: {
            block13: {
                List<HebrewToken> lemmas;
                block15: {
                    block14: {
                        int curChar;
                        retTokens.clear();
                        do {
                            tokenType = this._tokenizer.nextToken(nextToken);
                            this._startOffset = this._tokenizer.getOffset();
                            this._endOffset = this._startOffset + this._tokenizer.getLengthInSource();
                            if (tokenType == 0) break block12;
                            if ((tokenType & Tokenizer.TokenType.Hebrew) <= 0) break block13;
                            nextToken.ref = StreamLemmatizer.removeNiqqud((String)nextToken.ref);
                        } while (((tokenType & Tokenizer.TokenType.Construct) > 0 || (tokenType & Tokenizer.TokenType.Acronym) > 0) && this.isLegalPrefix((String)nextToken.ref));
                        if ((tokenType & Tokenizer.TokenType.Exact) > 0) break block12;
                        if ((tokenType & Tokenizer.TokenType.Mixed) <= 0 && (tokenType & Tokenizer.TokenType.Custom) <= 0) break block14;
                        for (curChar = 0; curChar < ((String)nextToken.ref).length() && HebrewUtils.isHebrewLetter(((String)nextToken.ref).charAt(curChar)); ++curChar) {
                        }
                        if (curChar <= 0 || curChar >= ((String)nextToken.ref).length() - 1 || !this.isLegalPrefix(((String)nextToken.ref).substring(0, curChar))) break block14;
                        int startOfNonHebrew = curChar;
                        while (curChar < ((String)nextToken.ref).length() && !HebrewUtils.isHebrewLetter(((String)nextToken.ref).charAt(curChar))) {
                            ++curChar;
                        }
                        if (curChar != ((String)nextToken.ref).length()) break block14;
                        nextToken.ref = ((String)nextToken.ref).substring(startOfNonHebrew, ((String)nextToken.ref).length());
                        tokenType = Tokenizer.TokenType.NonHebrew;
                        retTokens.add(new Token((String)nextToken.ref));
                        break block12;
                    }
                    if ((tokenType & Tokenizer.TokenType.Acronym) > 0) {
                        nextToken.ref = this.tryStrippingPrefix((String)nextToken.ref);
                        if (((String)nextToken.ref).indexOf(34) == -1) {
                            tokenType &= ~Tokenizer.TokenType.Acronym;
                        }
                    }
                    if ((lemmas = this.lemmatize((String)nextToken.ref)) != null && lemmas.size() > 0) {
                        if ((tokenType & Tokenizer.TokenType.Construct) > 0) {
                            // empty if block
                        }
                        for (Token token : lemmas) {
                            retTokens.add(token);
                        }
                    }
                    if (!retTokens.isEmpty() || (tokenType & Tokenizer.TokenType.Acronym) <= 0) break block15;
                    retTokens.add(new HebrewToken((String)nextToken.ref, 0, DescFlag.D_ACRONYM, (String)nextToken.ref, PrefixType.PS_NONDEF, 1.0f));
                    break block12;
                }
                if (!this.tolerateWhenLemmatizingStream || !retTokens.isEmpty() || (lemmas = this.lemmatizeTolerant((String)nextToken.ref)) == null || lemmas.size() <= 0) break block12;
                if ((tokenType & Tokenizer.TokenType.Construct) > 0) {
                    // empty if block
                }
                for (Token token : lemmas) {
                    retTokens.add(token);
                }
                break block12;
            }
            if ((tokenType & Tokenizer.TokenType.Numeric) > 0) {
                retTokens.add(new Token((String)nextToken.ref, true));
            } else {
                retTokens.add(new Token((String)nextToken.ref));
            }
        }
        return tokenType;
    }
}

