/*
 * Decompiled with CFR 0.152.
 */
package com.worksap.nlp.sudachi.dictionary;

import com.worksap.nlp.dartsclone.DoubleArray;
import com.worksap.nlp.sudachi.dictionary.CSVParser;
import com.worksap.nlp.sudachi.dictionary.DictionaryHeader;
import com.worksap.nlp.sudachi.dictionary.WordInfo;
import java.io.Console;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.LogManager;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DictionaryBuilder {
    static final int STRING_MAX_LENGTH = Short.MAX_VALUE;
    static final int ARRAY_MAX_LENGTH = 127;
    static final int MIN_REQUIRED_NUMBER_OF_COLUMNS = 18;
    static final int BUFFER_SIZE = 0x100000;
    POSTable posTable = new POSTable();
    SortedMap<byte[], List<Integer>> trieKeys = new TreeMap<byte[], List<Integer>>((l, r) -> {
        int llen = ((byte[])l).length;
        int rlen = ((byte[])r).length;
        for (int i = 0; i < Math.min(llen, rlen); ++i) {
            if (l[i] == r[i]) continue;
            return (l[i] & 0xFF) - (r[i] & 0xFF);
        }
        return ((byte[])l).length - ((byte[])r).length;
    });
    List<WordEntry> entries = new ArrayList<WordEntry>();
    boolean isUserDictionary = false;
    ByteBuffer byteBuffer;
    Buffer buffer;
    protected Logger logger = Logger.getLogger(this.getClass().getName());
    static final Pattern unicodeLiteral = Pattern.compile("\\\\u([0-9a-fA-F]{4}|\\{[0-9a-fA-F]+\\})");

    DictionaryBuilder() {
        this.byteBuffer = ByteBuffer.allocate(0x100000);
        this.byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
        this.buffer = this.byteBuffer;
    }

    void build(List<String> lexiconPaths, FileInputStream matrixInput, FileOutputStream output) throws IOException {
        this.logger.info("reading the source file...");
        for (String path : lexiconPaths) {
            try (FileInputStream lexiconInput = new FileInputStream(path);){
                this.buildLexicon(path, lexiconInput);
            }
        }
        this.logger.info(() -> String.format(" %,d words%n", this.entries.size()));
        FileChannel outputChannel = output.getChannel();
        this.writeGrammar(matrixInput, outputChannel);
        this.writeLexicon(outputChannel);
        outputChannel.close();
    }

    void buildLexicon(String filename, FileInputStream lexiconInput) throws IOException {
        int lineno = -1;
        try (InputStreamReader isr = new InputStreamReader(lexiconInput);
             LineNumberReader reader = new LineNumberReader(isr);
             CSVParser parser = new CSVParser(reader);){
            List<String> columns = parser.getNextRecord();
            while (columns != null) {
                lineno = reader.getLineNumber();
                WordEntry entry = this.parseLine(columns.toArray(new String[columns.size()]));
                if (entry.headword != null) {
                    this.addToTrie(entry.headword, this.entries.size());
                }
                this.entries.add(entry);
                columns = parser.getNextRecord();
            }
        }
        catch (Exception e) {
            if (lineno > 0) {
                this.logger.severe("Error: " + e.getMessage() + " at line " + lineno + " in " + filename + "\n");
            }
            throw e;
        }
    }

    WordEntry parseLine(String[] cols) {
        if (cols.length < 18) {
            throw new IllegalArgumentException("invalid format");
        }
        for (int i = 0; i < 15; ++i) {
            cols[i] = DictionaryBuilder.decode(cols[i]);
        }
        if (!(cols[0].getBytes(StandardCharsets.UTF_8).length <= Short.MAX_VALUE && DictionaryBuilder.isValidLength(cols[4]) && DictionaryBuilder.isValidLength(cols[11]) && DictionaryBuilder.isValidLength(cols[12]))) {
            throw new IllegalArgumentException("string is too long");
        }
        if (cols[0].isEmpty()) {
            throw new IllegalArgumentException("headword is empty");
        }
        WordEntry entry = new WordEntry();
        if (!cols[1].equals("-1")) {
            entry.headword = cols[0];
        }
        entry.parameters = new short[]{Short.parseShort(cols[1]), Short.parseShort(cols[2]), Short.parseShort(cols[3])};
        short posId = this.getPosId(cols[5], cols[6], cols[7], cols[8], cols[9], cols[10]);
        if (posId < 0) {
            throw new IllegalArgumentException("invalid part of speech");
        }
        entry.aUnitSplitString = cols[15];
        entry.bUnitSplitString = cols[16];
        entry.wordStructureString = cols[17];
        this.checkSplitInfoFormat(entry.aUnitSplitString);
        this.checkSplitInfoFormat(entry.bUnitSplitString);
        this.checkSplitInfoFormat(entry.wordStructureString);
        if (!(!cols[14].equals("A") || entry.aUnitSplitString.equals("*") && entry.bUnitSplitString.equals("*"))) {
            throw new IllegalArgumentException("invalid splitting");
        }
        int[] synonymGids = new int[]{};
        if (cols.length > 18) {
            synonymGids = this.parseSynonymGids(cols[18]);
        }
        entry.wordInfo = new WordInfo(cols[4], (short)cols[0].getBytes(StandardCharsets.UTF_8).length, posId, cols[12], cols[13].equals("*") ? -1 : Integer.parseInt(cols[13]), "", cols[11], null, null, null, synonymGids);
        return entry;
    }

    void addToTrie(String headword, int wordId) {
        byte[] key = headword.getBytes(StandardCharsets.UTF_8);
        this.trieKeys.computeIfAbsent(key, k -> new ArrayList()).add(wordId);
    }

    short getPosId(String ... posStrings) {
        return this.posTable.getId(String.join((CharSequence)",", posStrings));
    }

    void writeGrammar(FileInputStream matrixInput, FileChannel output) throws IOException {
        this.logger.info("writing the POS table...");
        this.convertPOSTable(this.posTable.getList());
        this.buffer.flip();
        output.write(this.byteBuffer);
        this.printSize(this.byteBuffer.limit());
        this.buffer.clear();
        this.logger.info("writing the connection matrix...");
        if (matrixInput == null) {
            this.byteBuffer.putShort((short)0);
            this.byteBuffer.putShort((short)0);
            this.buffer.flip();
            output.write(this.byteBuffer);
            this.printSize(this.byteBuffer.limit());
            this.buffer.clear();
        } else {
            ByteBuffer matrix = this.convertMatrix(matrixInput);
            this.buffer.flip();
            output.write(this.byteBuffer);
            this.buffer.clear();
            output.write(matrix);
            this.printSize((long)matrix.limit() + 4L);
        }
    }

    void convertPOSTable(List<String> posList) {
        this.byteBuffer.putShort((short)posList.size());
        for (String pos : posList) {
            for (String text : pos.split(",")) {
                this.writeString(text);
            }
        }
    }

    ByteBuffer convertMatrix(InputStream matrixInput) throws IOException {
        String line;
        LineNumberReader reader = new LineNumberReader(new InputStreamReader(matrixInput));
        String header = reader.readLine();
        if (header == null) {
            throw new IllegalArgumentException("invalid format at line " + reader.getLineNumber());
        }
        String[] lr = header.split("\\s+");
        short leftSize = Short.parseShort(lr[0]);
        short rightSize = Short.parseShort(lr[1]);
        this.byteBuffer.putShort(leftSize);
        this.byteBuffer.putShort(rightSize);
        ByteBuffer matrix = ByteBuffer.allocate(2 * leftSize * rightSize);
        matrix.order(ByteOrder.LITTLE_ENDIAN);
        while ((line = reader.readLine()) != null) {
            if (line.matches("\\s*")) continue;
            String[] cols = line.split("\\s+");
            if (cols.length < 3) {
                this.logger.warning("invalid format at line " + reader.getLineNumber());
                continue;
            }
            short left = Short.parseShort(cols[0]);
            short right = Short.parseShort(cols[1]);
            short cost = Short.parseShort(cols[2]);
            matrix.putShort(2 * (left + leftSize * right), cost);
        }
        return matrix;
    }

    void writeLexicon(FileChannel output) throws IOException {
        DoubleArray trie = new DoubleArray();
        int size = this.trieKeys.size();
        byte[][] keys = new byte[size][];
        int[] values = new int[size];
        ByteBuffer wordIdTable = ByteBuffer.allocate(this.entries.size() * 6);
        wordIdTable.order(ByteOrder.LITTLE_ENDIAN);
        int i = 0;
        for (Map.Entry<byte[], List<Integer>> entry : this.trieKeys.entrySet()) {
            keys[i] = entry.getKey();
            values[i] = wordIdTable.position();
            ++i;
            List<Integer> wordIds = entry.getValue();
            wordIdTable.put((byte)wordIds.size());
            for (int wid : wordIds) {
                wordIdTable.putInt(wid);
            }
        }
        this.logger.info("building the trie");
        trie.build((byte[][])keys, values, (n, s) -> {
            if (n % (s / 10 + 1) == 0) {
                this.logger.info(".");
            }
        });
        this.logger.info("done\n");
        this.logger.info("writing the trie...");
        this.buffer.clear();
        this.byteBuffer.putInt(trie.size());
        this.buffer.flip();
        output.write(this.byteBuffer);
        this.buffer.clear();
        output.write(trie.byteArray());
        this.printSize((long)(trie.size() * 4) + 4L);
        trie = null;
        this.logger.info("writing the word-ID table...");
        this.byteBuffer.putInt(wordIdTable.position());
        this.buffer.flip();
        output.write(this.byteBuffer);
        this.buffer.clear();
        ((Buffer)wordIdTable).flip();
        output.write(wordIdTable);
        this.printSize((long)wordIdTable.position() + 4L);
        wordIdTable = null;
        this.logger.info("writing the word parameters...");
        this.byteBuffer.putInt(this.entries.size());
        for (WordEntry wordEntry : this.entries) {
            this.byteBuffer.putShort(wordEntry.parameters[0]);
            this.byteBuffer.putShort(wordEntry.parameters[1]);
            this.byteBuffer.putShort(wordEntry.parameters[2]);
            this.buffer.flip();
            output.write(this.byteBuffer);
            this.buffer.clear();
        }
        this.printSize((long)(this.entries.size() * 6) + 4L);
        this.writeWordInfo(output);
    }

    void writeWordInfo(FileChannel output) throws IOException {
        long mark = output.position();
        output.position(mark + (long)(4 * this.entries.size()));
        ByteBuffer offsets = ByteBuffer.allocate(4 * this.entries.size());
        offsets.order(ByteOrder.LITTLE_ENDIAN);
        this.logger.info("writing the wordInfos...");
        long base = output.position();
        for (WordEntry entry : this.entries) {
            WordInfo wi = entry.wordInfo;
            offsets.putInt((int)output.position());
            this.writeString(wi.getSurface());
            this.writeStringLength(wi.getLength());
            this.byteBuffer.putShort(wi.getPOSId());
            if (wi.getNormalizedForm().equals(wi.getSurface())) {
                this.writeString("");
            } else {
                this.writeString(wi.getNormalizedForm());
            }
            this.byteBuffer.putInt(wi.getDictionaryFormWordId());
            if (wi.getReadingForm().equals(wi.getSurface())) {
                this.writeString("");
            } else {
                this.writeString(wi.getReadingForm());
            }
            this.writeIntArray(this.parseSplitInfo(entry.aUnitSplitString));
            this.writeIntArray(this.parseSplitInfo(entry.bUnitSplitString));
            this.writeIntArray(this.parseSplitInfo(entry.wordStructureString));
            this.writeIntArray(wi.getSynonymGoupIds());
            this.buffer.flip();
            output.write(this.byteBuffer);
            this.buffer.clear();
        }
        this.printSize(output.position() - base);
        this.logger.info("writing wordInfo offsets...");
        output.position(mark);
        ((Buffer)offsets).flip();
        output.write(offsets);
        this.printSize(offsets.position());
    }

    static boolean isValidLength(String text) {
        return text.length() <= Short.MAX_VALUE;
    }

    static String decode(String text) {
        Matcher m = unicodeLiteral.matcher(text);
        if (!m.find()) {
            return text;
        }
        StringBuffer sb = new StringBuffer();
        m.reset();
        while (m.find()) {
            String u = m.group(1);
            if (u.startsWith("{")) {
                u = u.substring(1, u.length() - 1);
            }
            m.appendReplacement(sb, new String(Character.toChars(Integer.parseInt(u, 16))));
        }
        m.appendTail(sb);
        return sb.toString();
    }

    void checkSplitInfoFormat(String info) {
        if (info.chars().filter(i -> i == 47).count() + 1L > 127L) {
            throw new IllegalArgumentException("too many units");
        }
    }

    int[] parseSplitInfo(String info) {
        if (info.equals("*")) {
            return new int[0];
        }
        String[] words = info.split("/");
        if (words.length > 127) {
            throw new IllegalArgumentException("too many units");
        }
        int[] ret = new int[words.length];
        for (int i = 0; i < words.length; ++i) {
            if (this.isId(words[i])) {
                ret[i] = this.parseId(words[i]);
                continue;
            }
            ret[i] = this.wordToId(words[i]);
            if (ret[i] >= 0) continue;
            throw new IllegalArgumentException("not found such a word");
        }
        return ret;
    }

    boolean isId(String text) {
        return text.matches("U?\\d+");
    }

    int parseId(String text) {
        int id = 0;
        if (text.startsWith("U")) {
            id = Integer.parseInt(text.substring(1));
            if (this.isUserDictionary) {
                id |= 0x10000000;
            }
        } else {
            id = Integer.parseInt(text);
        }
        this.checkWordId(id);
        return id;
    }

    int wordToId(String text) {
        String[] cols = text.split(",");
        if (cols.length < 8) {
            throw new IllegalArgumentException("too few columns");
        }
        String headword = DictionaryBuilder.decode(cols[0]);
        short posId = this.getPosId(cols[1], cols[2], cols[3], cols[4], cols[5], cols[6]);
        if (posId < 0) {
            throw new IllegalArgumentException("invalid part of speech");
        }
        String reading = DictionaryBuilder.decode(cols[7]);
        return this.getWordId(headword, posId, reading);
    }

    int getWordId(String headword, short posId, String readingForm) {
        for (int wid = 0; wid < this.entries.size(); ++wid) {
            WordInfo info = this.entries.get((int)wid).wordInfo;
            if (!info.getSurface().equals(headword) || info.getPOSId() != posId || !info.getReadingForm().equals(readingForm)) continue;
            return wid;
        }
        return -1;
    }

    void checkWordId(int wordId) {
        if (wordId < 0 || wordId >= this.entries.size()) {
            throw new IllegalArgumentException("invalid word ID");
        }
    }

    int[] parseSynonymGids(String str) {
        if (str.equals("*")) {
            return new int[0];
        }
        String[] ids = str.split("/");
        if (ids.length > 127) {
            throw new IllegalArgumentException("too many units");
        }
        int[] ret = new int[ids.length];
        for (int i = 0; i < ids.length; ++i) {
            ret[i] = Integer.parseInt(ids[i]);
        }
        return ret;
    }

    void writeString(String text) {
        this.writeStringLength((short)text.length());
        for (int i = 0; i < text.length(); ++i) {
            this.byteBuffer.putChar(text.charAt(i));
        }
    }

    void writeStringLength(short length) {
        if (length <= 127) {
            this.byteBuffer.put((byte)length);
        } else {
            this.byteBuffer.put((byte)(length >> 8 | 0x80));
            this.byteBuffer.put((byte)(length & 0xFF));
        }
    }

    void writeIntArray(int[] array) {
        this.byteBuffer.put((byte)array.length);
        for (int i : array) {
            this.byteBuffer.putInt(i);
        }
    }

    void printSize(long size) {
        this.logger.info(() -> String.format(" %,d bytes%n", size));
    }

    static void printUsage() {
        Console console = System.console();
        console.printf("usage: DictionaryBuilder -o file -m file [-d description] files...\n", new Object[0]);
        console.printf("\t-o file\toutput to file\n", new Object[0]);
        console.printf("\t-m file\tmatrix file\n", new Object[0]);
        console.printf("\t-d description\tcomment\n", new Object[0]);
    }

    static void readLoggerConfig() throws IOException {
        InputStream is = DictionaryBuilder.class.getResourceAsStream("/logger.properties");
        if (is != null) {
            LogManager.getLogManager().readConfiguration(is);
        }
    }

    public static void main(String[] args) throws IOException {
        DictionaryBuilder.readLoggerConfig();
        String description = "";
        String outputPath = null;
        String matrixPath = null;
        int i = 0;
        for (i = 0; i < args.length; ++i) {
            if (args[i].equals("-o") && i + 1 < args.length) {
                outputPath = args[++i];
                continue;
            }
            if (args[i].equals("-m") && i + 1 < args.length) {
                matrixPath = args[++i];
                continue;
            }
            if (args[i].equals("-d") && i + 1 < args.length) {
                description = args[++i];
                continue;
            }
            if (!args[i].equals("-h")) break;
            DictionaryBuilder.printUsage();
            return;
        }
        if (args.length <= i || outputPath == null || matrixPath == null) {
            DictionaryBuilder.printUsage();
            return;
        }
        List<String> lexiconPaths = Arrays.asList(args).subList(i, args.length);
        DictionaryHeader header = new DictionaryHeader(-3558123966965398476L, Instant.now().getEpochSecond(), description);
        try (FileInputStream matrixInput = new FileInputStream(matrixPath);
             FileOutputStream output = new FileOutputStream(outputPath);){
            output.write(header.toByte());
            DictionaryBuilder builder = new DictionaryBuilder();
            builder.build(lexiconPaths, matrixInput, output);
        }
    }

    static class POSTable {
        private List<String> table = new ArrayList<String>();

        POSTable() {
        }

        short getId(String s) {
            int id = this.table.indexOf(s);
            if (id < 0) {
                id = this.table.size();
                this.table.add(s);
            }
            return (short)id;
        }

        List<String> getList() {
            return this.table;
        }
    }

    static class WordEntry {
        String headword;
        short[] parameters;
        WordInfo wordInfo;
        String aUnitSplitString;
        String bUnitSplitString;
        String wordStructureString;

        WordEntry() {
        }
    }
}

