/*
 * Decompiled with CFR 0.152.
 */
package org.predict4all.nlp.parser.matcher;

import gnu.trove.map.hash.TObjectIntHashMap;
import gnu.trove.procedure.TObjectIntProcedure;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.predict4all.nlp.EquivalenceClass;
import org.predict4all.nlp.io.TokenFileInputStream;
import org.predict4all.nlp.io.TokenFileOutputStream;
import org.predict4all.nlp.parser.TokenAppender;
import org.predict4all.nlp.parser.TokenListAppender;
import org.predict4all.nlp.parser.TokenListProvider;
import org.predict4all.nlp.parser.TokenProvider;
import org.predict4all.nlp.parser.matcher.PatternMatched;
import org.predict4all.nlp.parser.matcher.TokenMatcher;
import org.predict4all.nlp.parser.token.EquivalenceClassToken;
import org.predict4all.nlp.parser.token.Token;
import org.predict4all.nlp.parser.token.WordToken;
import org.predict4all.nlp.trainer.TrainerTask;
import org.predict4all.nlp.trainer.corpus.AbstractTrainingDocument;
import org.predict4all.nlp.trainer.corpus.TrainingCorpus;
import org.predict4all.nlp.trainer.step.TrainingStep;
import org.predict4all.nlp.utils.progressindicator.LoggingProgressIndicator;
import org.predict4all.nlp.utils.progressindicator.NoOpProgressIndicator;
import org.predict4all.nlp.utils.progressindicator.ProgressIndicator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TokenConverter {
    private static final Logger LOGGER = LoggerFactory.getLogger(TokenConverter.class);
    private final TokenMatcher[] tokenMatchers;

    public TokenConverter(TokenMatcher[] termMatchers) {
        this.tokenMatchers = termMatchers;
    }

    public List<Token> executeTermDetection(List<Token> inputTokenList) throws IOException {
        ArrayList<Token> outputTokenList = new ArrayList<Token>(inputTokenList.size());
        this.executeTermPatternMatching(new TokenListProvider(inputTokenList), new TokenListAppender(outputTokenList), NoOpProgressIndicator.INSTANCE);
        return outputTokenList;
    }

    public List<TrainerTask> executeTokenPatternMatching(TrainingCorpus corpus) throws IOException {
        corpus.initStep(TrainingStep.TOKEN_CONVERT);
        LoggingProgressIndicator progressIndicator = new LoggingProgressIndicator("Token conversion", corpus.getTotalCountFor(TrainingStep.TOKEN_CONVERT));
        return corpus.getDocuments(TrainingStep.TOKEN_CONVERT).stream().map(d -> new TokenPatternMatchingTask(progressIndicator, (AbstractTrainingDocument)d)).collect(Collectors.toList());
    }

    private int executeTermPatternMatching(TokenProvider tokenProvider, TokenAppender tokenAppender, ProgressIndicator progressIndicator) throws IOException {
        LOGGER.debug("Start token conversion");
        long start = System.currentTimeMillis();
        int transformCount = 0;
        TObjectIntHashMap ecCounts = new TObjectIntHashMap(EquivalenceClass.values().length);
        int tokenWritten = 0;
        Token current = tokenProvider.getNext();
        while (current != null) {
            boolean matchFound = false;
            progressIndicator.increment();
            for (TokenMatcher matcher : this.tokenMatchers) {
                PatternMatched match = matcher.match(current, tokenProvider);
                matchFound = match != null;
                if (!matchFound) continue;
                ++transformCount;
                if (LOGGER.isDebugEnabled()) {
                    ecCounts.adjustOrPutValue((Object)match.getType(), 1, 1);
                }
                if (match.getType() != null) {
                    tokenAppender.append(EquivalenceClassToken.create(match.getFormattedText(), match.getType()));
                } else {
                    tokenAppender.append(WordToken.create(match.getFormattedText()));
                }
                ++tokenWritten;
                current = match.getPreviousEndToken() != null ? match.getPreviousEndToken().getNext(tokenProvider) : current.getNext(tokenProvider);
                break;
            }
            if (matchFound) continue;
            tokenAppender.append(current);
            ++tokenWritten;
            current = current.getNext(tokenProvider);
        }
        if (LOGGER.isDebugEnabled()) {
            LOGGER.info("Token detection result for each entry : ");
            ecCounts.forEachEntry((TObjectIntProcedure)new TObjectIntProcedure<EquivalenceClass>(){

                public boolean execute(EquivalenceClass type, int count) {
                    LOGGER.debug("\t{} = {}", (Object)type, (Object)count);
                    return true;
                }
            });
            LOGGER.debug("{} total term found in {} s", (Object)transformCount, (Object)((double)(System.currentTimeMillis() - start) / 1000.0));
        }
        return tokenWritten;
    }

    private class TokenPatternMatchingTask
    extends TrainerTask {
        public TokenPatternMatchingTask(ProgressIndicator progressIndicator, AbstractTrainingDocument document) {
            super(progressIndicator, document);
        }

        @Override
        public void run() throws Exception {
            try (TokenFileInputStream tokenFis = new TokenFileInputStream(this.document.getInputFile());
                 TokenFileOutputStream tokenFos = new TokenFileOutputStream(this.document.getOutputFile());){
                int tokenCount = TokenConverter.this.executeTermPatternMatching(tokenFis, tokenFos, this.progressIndicator);
                this.document.writeInformations(tokenCount);
            }
        }
    }
}

