/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.text.tokenization.tokenizerfactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import org.deeplearning4j.text.tokenization.tokenizer.BertWordPieceStreamTokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.BertWordPieceTokenizer;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;

public class BertWordPieceTokenizerFactory
implements TokenizerFactory {
    private final NavigableMap<String, Integer> vocab;
    private TokenPreProcess tokenPreProcess;
    private boolean lowerCaseOnly = false;

    public BertWordPieceTokenizerFactory(NavigableMap<String, Integer> vocab) {
        this.vocab = vocab;
    }

    public BertWordPieceTokenizerFactory(File pathToVocab) throws IOException {
        this(BertWordPieceTokenizerFactory.loadVocab(pathToVocab));
    }

    public BertWordPieceTokenizerFactory(InputStream vocabInputStream) throws IOException {
        this(BertWordPieceTokenizerFactory.loadVocab(vocabInputStream));
    }

    @Override
    public Tokenizer create(String toTokenize) {
        BertWordPieceTokenizer t = new BertWordPieceTokenizer(toTokenize, this.vocab, this.lowerCaseOnly);
        t.setTokenPreProcessor(this.tokenPreProcess);
        return t;
    }

    @Override
    public Tokenizer create(InputStream toTokenize) {
        BertWordPieceStreamTokenizer t = new BertWordPieceStreamTokenizer(toTokenize, this.vocab, this.lowerCaseOnly);
        t.setTokenPreProcessor(this.tokenPreProcess);
        return t;
    }

    @Override
    public void setTokenPreProcessor(TokenPreProcess preProcessor) {
        this.tokenPreProcess = preProcessor;
    }

    @Override
    public TokenPreProcess getTokenPreProcessor() {
        return this.tokenPreProcess;
    }

    public boolean isLowerCaseOnly() {
        return this.lowerCaseOnly;
    }

    public void setLowerCaseOnly(boolean lowerCaseOnly) {
        this.lowerCaseOnly = lowerCaseOnly;
    }

    public Map<String, Integer> getVocab() {
        return Collections.unmodifiableMap(this.vocab);
    }

    public static NavigableMap<String, Integer> loadVocab(InputStream is) throws IOException {
        TreeMap<String, Integer> map = new TreeMap<String, Integer>(Collections.reverseOrder());
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(is));){
            String token;
            int i = 0;
            while ((token = reader.readLine()) != null) {
                map.put(token, i++);
            }
        }
        return map;
    }

    public static NavigableMap<String, Integer> loadVocab(File vocabFile) throws IOException {
        return BertWordPieceTokenizerFactory.loadVocab(new FileInputStream(vocabFile));
    }
}

