/*
 * Decompiled with CFR 0.152.
 */
package com.worksap.nlp.sudachi.dictionary.build;

import com.worksap.nlp.sudachi.WordId;
import com.worksap.nlp.sudachi.dictionary.POS;
import com.worksap.nlp.sudachi.dictionary.WordInfo;
import com.worksap.nlp.sudachi.dictionary.build.DicBuffer;
import com.worksap.nlp.sudachi.dictionary.build.ModelOutput;
import com.worksap.nlp.sudachi.dictionary.build.POSTable;
import com.worksap.nlp.sudachi.dictionary.build.Parameters;
import com.worksap.nlp.sudachi.dictionary.build.WordIdResolver;
import com.worksap.nlp.sudachi.dictionary.build.WordLookup;
import com.worksap.nlp.sudachi.dictionary.build.WriteDictionary;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CsvLexicon
implements WriteDictionary {
    static final int ARRAY_MAX_LENGTH = 127;
    static final int MIN_REQUIRED_NUMBER_OF_COLUMNS = 18;
    static final Pattern unicodeLiteral = Pattern.compile("\\\\u([0-9a-fA-F]{4}|\\{[0-9a-fA-F]+})");
    private static final Pattern PATTERN_ID = Pattern.compile("U?\\d+");
    private final Parameters parameters = new Parameters();
    private final POSTable posTable;
    private final List<WordEntry> entries = new ArrayList<WordEntry>();
    private WordIdResolver widResolver = new WordLookup.Noop();

    public CsvLexicon(POSTable pos) {
        this.posTable = pos;
    }

    public void setResolver(WordIdResolver widResolver) {
        this.widResolver = widResolver;
    }

    public static String unescape(String text) {
        Matcher m = unicodeLiteral.matcher(text);
        if (!m.find()) {
            return text;
        }
        StringBuffer sb = new StringBuffer();
        m.reset();
        while (m.find()) {
            String u = m.group(1);
            if (u.startsWith("{")) {
                u = u.substring(1, u.length() - 1);
            }
            m.appendReplacement(sb, new String(Character.toChars(Integer.parseInt(u, 16))));
        }
        m.appendTail(sb);
        return sb.toString();
    }

    public List<WordEntry> getEntries() {
        return this.entries;
    }

    WordEntry parseLine(List<String> cols) {
        if (cols.size() < 18) {
            throw new IllegalArgumentException("invalid format");
        }
        for (int i = 0; i < 15; ++i) {
            cols.set(i, CsvLexicon.unescape(cols.get(i)));
        }
        if (!(cols.get(0).getBytes(StandardCharsets.UTF_8).length <= Short.MAX_VALUE && DicBuffer.isValidLength(cols.get(4)) && DicBuffer.isValidLength(cols.get(11)) && DicBuffer.isValidLength(cols.get(12)))) {
            throw new IllegalArgumentException("string is too long");
        }
        if (cols.get(0).isEmpty()) {
            throw new IllegalArgumentException("headword is empty");
        }
        WordEntry entry = new WordEntry();
        if (!cols.get(1).equals("-1")) {
            entry.headword = cols.get(0);
        }
        this.parameters.add(Short.parseShort(cols.get(1)), Short.parseShort(cols.get(2)), Short.parseShort(cols.get(3)));
        POS pos = new POS(cols.get(5), cols.get(6), cols.get(7), cols.get(8), cols.get(9), cols.get(10));
        short posId = this.posTable.getId(pos);
        entry.aUnitSplitString = cols.get(15);
        entry.bUnitSplitString = cols.get(16);
        entry.wordStructureString = cols.get(17);
        this.checkSplitInfoFormat(entry.aUnitSplitString);
        this.checkSplitInfoFormat(entry.bUnitSplitString);
        this.checkSplitInfoFormat(entry.wordStructureString);
        if (!(!cols.get(14).equals("A") || entry.aUnitSplitString.equals("*") && entry.bUnitSplitString.equals("*"))) {
            throw new IllegalArgumentException("invalid splitting");
        }
        int[] synonymGids = new int[]{};
        if (cols.size() > 18) {
            synonymGids = this.parseSynonymGids(cols.get(18));
        }
        entry.wordInfo = new WordInfo(cols.get(4), (short)cols.get(0).getBytes(StandardCharsets.UTF_8).length, posId, cols.get(12), cols.get(13).equals("*") ? -1 : Integer.parseInt(cols.get(13)), "", cols.get(11), null, null, null, synonymGids);
        return entry;
    }

    int[] parseSynonymGids(String str) {
        if (str.equals("*")) {
            return new int[0];
        }
        String[] ids = str.split("/");
        if (ids.length > 127) {
            throw new IllegalArgumentException("too many units");
        }
        int[] ret = new int[ids.length];
        for (int i = 0; i < ids.length; ++i) {
            ret[i] = Integer.parseInt(ids[i]);
        }
        return ret;
    }

    int wordToId(String text) {
        String[] cols = text.split(",", 8);
        if (cols.length < 8) {
            throw new IllegalArgumentException("too few columns");
        }
        String headword = CsvLexicon.unescape(cols[0]);
        POS pos = new POS(Arrays.copyOfRange(cols, 1, 7));
        short posId = this.posTable.getId(pos);
        String reading = CsvLexicon.unescape(cols[7]);
        return this.widResolver.lookup(headword, posId, reading);
    }

    void checkSplitInfoFormat(String info) {
        if (info.chars().filter(i -> i == 47).count() + 1L > 127L) {
            throw new IllegalArgumentException("too many units");
        }
    }

    boolean isId(String text) {
        return PATTERN_ID.matcher(text).matches();
    }

    int[] parseSplitInfo(String info) {
        if (info.equals("*")) {
            return new int[0];
        }
        String[] words = info.split("/");
        if (words.length > 127) {
            throw new IllegalArgumentException("too many units");
        }
        int[] ret = new int[words.length];
        for (int i = 0; i < words.length; ++i) {
            String ref = words[i];
            if (this.isId(ref)) {
                ret[i] = this.parseId(ref);
                continue;
            }
            ret[i] = this.wordToId(ref);
            if (ret[i] >= 0) continue;
            throw new IllegalArgumentException("couldn't find " + ref + " in the dictionaries");
        }
        return ret;
    }

    int parseId(String text) {
        int id = 0;
        if (text.startsWith("U")) {
            id = Integer.parseInt(text.substring(1));
            if (this.widResolver.isUser()) {
                id = WordId.make(1, id);
            }
        } else {
            id = Integer.parseInt(text);
        }
        this.widResolver.validate(id);
        return id;
    }

    @Override
    public void writeTo(ModelOutput output) throws IOException {
        ByteBuffer buf = ByteBuffer.allocate(4);
        buf.order(ByteOrder.LITTLE_ENDIAN);
        buf.putInt(this.entries.size());
        buf.flip();
        output.write(buf);
        this.parameters.writeTo(output);
        int offsetsSize = 4 * this.entries.size();
        DicBuffer offsets = new DicBuffer(offsetsSize);
        long offsetsPosition = output.position();
        output.position(offsetsPosition + (long)offsetsSize);
        output.withPart("word entries", () -> {
            DicBuffer buffer = new DicBuffer(131072);
            int offset = (int)output.position();
            int numEntries = this.entries.size();
            for (int i = 0; i < numEntries; ++i) {
                WordEntry entry = this.entries.get(i);
                if (buffer.wontFit(16384)) {
                    offset += buffer.consume(output::write).intValue();
                }
                offsets.putInt(offset + buffer.position());
                WordInfo wi = entry.wordInfo;
                buffer.put(wi.getSurface());
                buffer.putLength(wi.getLength());
                buffer.putShort(wi.getPOSId());
                buffer.putEmptyIfEqual(wi.getNormalizedForm(), wi.getSurface());
                buffer.putInt(wi.getDictionaryFormWordId());
                buffer.putEmptyIfEqual(wi.getReadingForm(), wi.getSurface());
                buffer.putInts(this.parseSplitInfo(entry.aUnitSplitString));
                buffer.putInts(this.parseSplitInfo(entry.bUnitSplitString));
                buffer.putInts(this.parseSplitInfo(entry.wordStructureString));
                buffer.putInts(wi.getSynonymGoupIds());
                output.progress(i, numEntries);
            }
            buffer.consume(output::write);
        });
        long pos = output.position();
        output.position(offsetsPosition);
        output.withPart("WordInfo offsets", () -> offsets.consume(output::write));
        output.position(pos);
    }

    public int addEntry(WordEntry e) {
        int id = this.entries.size();
        this.entries.add(e);
        return id;
    }

    public void setLimits(int left, int right) {
        this.parameters.setLimits(left, right);
    }

    public static class WordEntry {
        String headword;
        WordInfo wordInfo;
        String aUnitSplitString;
        String bUnitSplitString;
        String wordStructureString;
    }
}

