/*
 * Decompiled with CFR 0.152.
 */
package com.tencent.tcvdbtext.tokenizer;

import com.google.common.collect.Lists;
import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.WordDictionary;
import com.tencent.tcvdbtext.hash.BaseHash;
import com.tencent.tcvdbtext.hash.Mm3BaseHash;
import com.tencent.tcvdbtext.tokenizer.BaseTokenizer;
import com.tencent.tcvdbtext.tokenizer.StopWords;
import java.nio.file.Paths;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class JiebaTokenizer
extends BaseTokenizer {
    private JiebaSegmenter segmenter;

    public JiebaTokenizer(BaseHash hash, Boolean enableStopWords, Set<String> stopWords, Boolean lowerCase, String dictFilePath) {
        super(hash, enableStopWords, stopWords, lowerCase, dictFilePath);
        if (!dictFilePath.isEmpty()) {
            WordDictionary.getInstance().init(Paths.get(dictFilePath, new String[0]));
        }
        this.segmenter = new JiebaSegmenter();
    }

    public JiebaTokenizer(BaseHash hash, Boolean enableStopWords, Set<String> stopWords, Boolean lowerCase, Boolean cutAll, String dictFilePath) {
        super(hash, enableStopWords, stopWords, lowerCase, cutAll, dictFilePath);
        if (!dictFilePath.isEmpty()) {
            WordDictionary.getInstance().init(Paths.get(dictFilePath, new String[0]));
        }
        this.segmenter = new JiebaSegmenter();
    }

    public JiebaTokenizer() {
        this.hash = new Mm3BaseHash();
        this.segmenter = new JiebaSegmenter();
        this.stopWords = StopWords.getStopWordsFromFile("data/stopwords.txt");
    }

    public JiebaTokenizer(String dictFilePath) {
        this.hash = new Mm3BaseHash();
        this.segmenter = new JiebaSegmenter();
        this.stopWords = StopWords.getStopWordsFromFile("data/stopwords.txt");
        WordDictionary.getInstance().loadUserDict(Paths.get(dictFilePath, new String[0]));
    }

    public void setDict(String dicFile) {
        if (!dicFile.isEmpty()) {
            WordDictionary.getInstance().loadUserDict(Paths.get(dicFile, new String[0]));
        }
    }

    @Override
    public void setStopWords(String stopWordsFile) {
        if (!stopWordsFile.isEmpty()) {
            this.stopWords = StopWords.getStopWordsFromFilePath(stopWordsFile);
        }
    }

    @Override
    public List<String> tokenize(String sentence) {
        if (sentence.isEmpty()) {
            return Lists.newArrayList();
        }
        if (this.lowerCase != null && this.lowerCase.booleanValue()) {
            sentence = sentence.toLowerCase();
        }
        List<String> words = this.cutAll != null && this.cutAll != false ? this.segmenter.process(sentence, JiebaSegmenter.SegMode.INDEX).stream().map(word -> word.word).collect(Collectors.toList()) : this.segmenter.process(sentence, JiebaSegmenter.SegMode.SEARCH).stream().map(word -> word.word).collect(Collectors.toList());
        words = words.stream().filter(word -> {
            if (word.equals(" ") || word.equals("\u3000")) {
                return false;
            }
            return this.enableStopWords == null || this.enableStopWords == false || this.stopWords == null || !this.stopWords.contains(word);
        }).collect(Collectors.toList());
        return words;
    }

    @Override
    public List<Long> encode(String text) {
        List<String> tokenize = this.tokenize(text);
        return tokenize.stream().map(word -> this.hash.hash((String)word)).collect(Collectors.toList());
    }

    @Override
    public String decode(List<Integer> tokens) {
        return null;
    }

    public void updateParameter(BaseHash hash, Set<String> stopWords, Boolean lowerCase, String dictFilePath) {
        this.hash = hash;
        this.stopWords = stopWords;
        this.lowerCase = lowerCase;
        this.dictFilePath = dictFilePath;
        this.loadDict(dictFilePath);
    }

    @Override
    public void loadDict(String dictFile) {
        WordDictionary.getInstance().loadUserDict(Paths.get(dictFile, new String[0]));
    }

    @Override
    public void setLowerCase(Boolean lowerCase) {
        this.lowerCase = lowerCase;
    }

    public static class Builder {
        private BaseHash hash;
        private Set<String> stopWords;
        private Boolean lowerCase;
        private String dictFilePath;
        private Boolean enableStopWords;
        private Boolean cutAll;

        public Builder withHash(BaseHash hash) {
            this.hash = hash;
            return this;
        }

        public Builder withStopWords(Set<String> stopWords) {
            this.stopWords = stopWords;
            return this;
        }

        public Builder withLowerCase(Boolean lowerCase) {
            this.lowerCase = lowerCase;
            return this;
        }

        public Builder withDictFilePath(String dictFilePath) {
            this.dictFilePath = dictFilePath;
            return this;
        }

        public Builder withEnableStopWords(Boolean enableStopWords) {
            this.enableStopWords = enableStopWords;
            return this;
        }

        public Builder withCutAll(Boolean cutAll) {
            this.cutAll = cutAll;
            return this;
        }

        public JiebaTokenizer build() {
            return new JiebaTokenizer(this.hash, this.enableStopWords, this.stopWords, this.lowerCase, this.cutAll, this.dictFilePath);
        }
    }
}

