/*
 * Decompiled with CFR 0.152.
 */
package edu.umn.biomedicus.tokenization;

import edu.umn.biomedicus.tokenization.TokenResult;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;

public class Tokenizer {
    private static final int MAX_ITER = 10000;
    private static final List<String> UNITS = Tokenizer.loadUnitsList();
    private static final Pattern MID_BREAKS = Pattern.compile("[\\p{Sm}\\p{Sk}\\p{P}&&[^.,'\u2019\\-#$]]|(?<=[^\\p{Z}])-|-(?=[\\p{L}])|(?<=[^\\p{N}]),(?=[^\\p{N}])|(?<=[^\\p{N}]),(?=[\\p{N}])|(?<=[\\p{N}]),(?=[^\\p{N}])");
    private static final Pattern START_BREAKS = Pattern.compile("^[',\u2019]");
    private static final Pattern X = Pattern.compile("[xX]");
    private static final Pattern END_BREAKS = Pattern.compile("(?<=('[SsDdMm]|n't|N'T|'ll|'LL|'ve|'VE|'re|'RE|'|\u2019|,))$");
    private static final Pattern NUMBER_WORD = Pattern.compile("[-]?[0-9.xX]*[0-9.]++([\\p{Alpha}]++)[.]?$");
    private static final Pattern NUMBER_X = Pattern.compile(".*?[0-9.]*[0-9]++([xX][0-9.]*[0-9.]++)+$");
    private final StringBuilder word = new StringBuilder();
    private List<String> units;
    private int startIndex = -1;
    private List<TokenResult> results;

    @Nonnull
    public static List<TokenResult> allTokens(@Nonnull CharSequence text) {
        ArrayList<TokenResult> tokenResults = new ArrayList<TokenResult>();
        for (TokenResult result : Tokenizer.tokenize(text)) {
            tokenResults.add(result);
        }
        return tokenResults;
    }

    @Nonnull
    public static Iterable<TokenResult> tokenize(final @Nonnull CharSequence text) {
        if (text == null) {
            throw new IllegalArgumentException("Null text");
        }
        return () -> new Iterator<TokenResult>(){
            int index = 0;
            TokenResult next = null;
            Tokenizer tokenizer = new Tokenizer();
            Iterator subIt = null;
            {
                this.advance();
            }

            void advance() {
                for (int i = 0; i < 10000; ++i) {
                    if (this.subIt != null && this.subIt.hasNext()) {
                        this.next = (TokenResult)this.subIt.next();
                        return;
                    }
                    this.subIt = null;
                    if (this.index > text.length()) {
                        this.next = null;
                        return;
                    }
                    if (this.index == text.length()) {
                        this.subIt = this.tokenizer.finish().iterator();
                        ++this.index;
                        continue;
                    }
                    List<TokenResult> results = this.tokenizer.advance(text.charAt(this.index), this.index++);
                    this.subIt = results.size() > 0 ? results.iterator() : null;
                }
            }

            @Override
            public boolean hasNext() {
                return this.next != null;
            }

            @Override
            public TokenResult next() {
                if (this.next == null) {
                    throw new NoSuchElementException("No next token.");
                }
                TokenResult temp = this.next;
                this.advance();
                return temp;
            }
        };
    }

    public static void addUnit(String unit) {
        if (!UNITS.contains(unit)) {
            UNITS.add(unit);
        }
    }

    public static void removeUnit(String unit) {
        UNITS.remove(unit);
    }

    public static void replaceUnits(List<String> newUnits) {
        UNITS.clear();
        UNITS.addAll(newUnits);
    }

    private static List<String> loadUnitsList() {
        String unitListPath = System.getProperty("biomedicus.tokenizer.unitsListPath");
        if (unitListPath != null) {
            try {
                return Files.readAllLines(Paths.get(unitListPath, new String[0]));
            }
            catch (IOException iOException) {
                // empty catch block
            }
        }
        InputStream is = Tokenizer.class.getResourceAsStream("unitsList.txt");
        return new BufferedReader(new InputStreamReader(is)).lines().collect(Collectors.toList());
    }

    public Tokenizer() {
        this.units = UNITS;
    }

    public Tokenizer(List<String> units) {
        this.units = units;
    }

    public Tokenizer(List<String> additionalUnits, List<String> ignoredUnits) {
        this.units = new ArrayList<String>(UNITS);
        this.units.addAll(additionalUnits);
        this.units.removeAll(ignoredUnits);
    }

    @Nonnull
    public List<TokenResult> advance(char ch, int index) {
        int type = Character.getType(ch);
        if (type == 12 || type == 13 || type == 14 || type == 16 || ch == '\n' || ch == '\t' || ch == '\r') {
            return this.breakWord();
        }
        if (this.word.length() == 0) {
            this.startIndex = index;
        }
        this.word.append(ch);
        return Collections.emptyList();
    }

    @Nonnull
    public List<TokenResult> finish() {
        return this.breakWord();
    }

    private List<TokenResult> breakWord() {
        if (this.word.length() == 0) {
            return Collections.emptyList();
        }
        this.results = new ArrayList<TokenResult>();
        Matcher midMatcher = MID_BREAKS.matcher(this.word);
        int start = 0;
        while (midMatcher.find()) {
            if (start != midMatcher.start()) {
                this.breakStarts(start, midMatcher.start());
            }
            if (midMatcher.start() != midMatcher.end()) {
                this.addResult(midMatcher.start(), midMatcher.end());
            }
            start = midMatcher.end();
        }
        if (start != this.word.length()) {
            this.breakStarts(start, this.word.length());
        }
        this.startIndex = -1;
        this.word.setLength(0);
        return this.results;
    }

    private void breakStarts(int start, int end) {
        block1: {
            Matcher startMatcher;
            while ((startMatcher = START_BREAKS.matcher(this.word.subSequence(start, end))).find() && startMatcher.end() != 0) {
                this.addResult(start, start + startMatcher.end());
                start += startMatcher.end();
            }
            if (start == end) break block1;
            this.breakEnds(start, end);
        }
    }

    private void breakEnds(int start, int end) {
        Matcher matcher = END_BREAKS.matcher(this.word.subSequence(start, end));
        if (matcher.find()) {
            if (matcher.start(1) != 0) {
                this.breakEnds(start, start + matcher.start(1));
            }
            if (matcher.start(1) != matcher.end(1)) {
                this.addResult(start + matcher.start(1), start + matcher.end(1));
            }
        } else {
            this.breakUnitsOfTheEndsOfNumbers(start, end);
        }
    }

    private void breakUnitsOfTheEndsOfNumbers(int start, int end) {
        String suffix;
        CharSequence tokenText = this.word.subSequence(start, end);
        Matcher matcher = NUMBER_WORD.matcher(tokenText);
        if (matcher.matches() && (suffix = matcher.group(1)) != null && UNITS.contains(suffix.toLowerCase())) {
            this.splitNumbersByX(start, start + matcher.start(1));
            this.addResult(start + matcher.start(1), end);
            return;
        }
        this.splitNumbersByX(start, end);
    }

    private void splitNumbersByX(int start, int end) {
        CharSequence tokenText = this.word.subSequence(start, end);
        Matcher matcher = NUMBER_X.matcher(tokenText);
        if (matcher.matches()) {
            int prev = start;
            Matcher xMatcher = X.matcher(tokenText);
            while (xMatcher.find()) {
                this.addResult(prev, start + xMatcher.start());
                prev = start + xMatcher.end();
                this.addResult(start + xMatcher.start(), prev);
            }
            if (prev != end) {
                this.addResult(prev, end);
            }
        } else {
            this.addResult(start, end);
        }
    }

    private void addResult(int start, int end) {
        if (start != end) {
            this.results.add(new StandardTokenResult(this.startIndex + start, this.startIndex + end));
        }
    }

    static class StandardTokenResult
    implements TokenResult {
        private final int startIndex;
        private final int endIndex;

        StandardTokenResult(int startIndex, int endIndex) {
            this.startIndex = startIndex;
            this.endIndex = endIndex;
        }

        @Override
        public int getStartIndex() {
            return this.startIndex;
        }

        @Override
        public int getEndIndex() {
            return this.endIndex;
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            StandardTokenResult result = (StandardTokenResult)o;
            return this.startIndex == result.startIndex && this.endIndex == result.endIndex;
        }

        public int hashCode() {
            return Objects.hash(this.startIndex, this.endIndex);
        }
    }
}

