/*
 * Decompiled with CFR 0.152.
 */
package com.itextpdf.pdfocr.tesseract4;

import com.itextpdf.io.util.MessageFormatUtil;
import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
import com.itextpdf.pdfocr.tesseract4.ImagePreprocessingUtil;
import com.itextpdf.pdfocr.tesseract4.OutputFormat;
import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
import com.itextpdf.pdfocr.tesseract4.TesseractHelper;
import com.itextpdf.pdfocr.tesseract4.TesseractOcrUtil;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.TesseractException;
import org.slf4j.LoggerFactory;

public class Tesseract4LibOcrEngine
extends AbstractTesseract4OcrEngine {
    private ITesseract tesseractInstance = TesseractOcrUtil.initializeTesseractInstance(this.isWindows(), null, null, null);
    private static final Pattern ASCII_STRING_PATTERN = Pattern.compile("^[\\u0000-\\u007F]*$");

    public Tesseract4LibOcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
        super(tesseract4OcrEngineProperties);
    }

    public ITesseract getTesseractInstance() {
        return this.tesseractInstance;
    }

    public void initializeTesseract(OutputFormat outputFormat) {
        if (this.getTesseractInstance() == null || TesseractOcrUtil.isTesseractInstanceDisposed(this.getTesseractInstance())) {
            this.tesseractInstance = TesseractOcrUtil.initializeTesseractInstance(this.isWindows(), this.getTessData(), this.getLanguagesAsString(), this.getTesseract4OcrEngineProperties().getPathToUserWordsFile());
        }
        this.getTesseractInstance().setTessVariable("tessedit_create_hocr", outputFormat.equals((Object)OutputFormat.HOCR) ? "1" : "0");
        if (this.getTesseract4OcrEngineProperties().isUseTxtToImproveHocrParsing()) {
            this.getTesseractInstance().setTessVariable("preserve_interword_spaces", "1");
        }
        this.getTesseractInstance().setTessVariable("user_defined_dpi", "300");
        if (this.getTesseract4OcrEngineProperties().getPathToUserWordsFile() != null) {
            this.getTesseractInstance().setTessVariable("load_system_dawg", "0");
            this.getTesseractInstance().setTessVariable("load_freq_dawg", "0");
            this.getTesseractInstance().setTessVariable("user_words_suffix", this.getTesseract4OcrEngineProperties().getDefaultUserWordsSuffix());
            this.getTesseractInstance().setTessVariable("user_words_file", this.getTesseract4OcrEngineProperties().getPathToUserWordsFile());
        }
        TesseractOcrUtil.setTesseractProperties(this.getTesseractInstance(), this.getTessData(), this.getLanguagesAsString(), this.getTesseract4OcrEngineProperties().getPageSegMode(), this.getTesseract4OcrEngineProperties().getPathToUserWordsFile());
    }

    @Override
    void doTesseractOcr(File inputImage, List<File> outputFiles, OutputFormat outputFormat, int pageNumber, boolean dispatchEvent) {
        this.scheduledCheck();
        try {
            Tesseract4LibOcrEngine.validateTessDataPath(this.getTessData());
            this.validateLanguages(this.getTesseract4OcrEngineProperties().getLanguages());
            this.initializeTesseract(outputFormat);
            if (dispatchEvent) {
                this.onEvent();
            }
            List<Object> resultList = new ArrayList<String>();
            if (!this.getTesseract4OcrEngineProperties().isPreprocessingImages() && ImagePreprocessingUtil.isTiffImage(inputImage)) {
                resultList = this.getOcrResultForMultiPage(inputImage, outputFormat);
            } else {
                resultList.add(this.getOcrResultForSinglePage(inputImage, outputFormat, pageNumber));
            }
            for (int i = 0; i < resultList.size(); ++i) {
                File outputFile;
                String result = (String)resultList.get(i);
                File file = outputFile = i >= outputFiles.size() ? null : outputFiles.get(i);
                if (result == null || outputFile == null) continue;
                try (OutputStreamWriter writer = new OutputStreamWriter((OutputStream)new FileOutputStream(outputFile.getAbsolutePath()), StandardCharsets.UTF_8);){
                    writer.write(result);
                    continue;
                }
                catch (IOException e) {
                    LoggerFactory.getLogger(this.getClass()).error(MessageFormatUtil.format((String)"Cannot write to file {0}: {1}", (Object[])new Object[]{e.getMessage()}));
                    throw new Tesseract4OcrException("Tesseract failed. Please check provided parameters");
                }
            }
        }
        catch (Tesseract4OcrException e) {
            LoggerFactory.getLogger(this.getClass()).error(e.getMessage());
            throw new Tesseract4OcrException(e.getMessage(), (Throwable)((Object)e));
        }
        finally {
            if (this.tesseractInstance != null) {
                TesseractOcrUtil.disposeTesseractInstance(this.tesseractInstance);
            }
            if (this.getTesseract4OcrEngineProperties().getPathToUserWordsFile() != null && this.getTesseract4OcrEngineProperties().isUserWordsFileTemporary()) {
                TesseractHelper.deleteFile(this.getTesseract4OcrEngineProperties().getPathToUserWordsFile());
            }
        }
    }

    private static void validateTessDataPath(String tessDataPath) {
        Matcher asciiStringMatcher = ASCII_STRING_PATTERN.matcher(tessDataPath);
        if (!asciiStringMatcher.matches()) {
            throw new Tesseract4OcrException("Path to tess data should contain only ASCII characters");
        }
    }

    private List<String> getOcrResultForMultiPage(File inputImage, OutputFormat outputFormat) {
        ArrayList<String> resultList = new ArrayList<String>();
        try {
            this.initializeTesseract(outputFormat);
            TesseractOcrUtil util = new TesseractOcrUtil();
            util.initializeImagesListFromTiff(inputImage);
            int numOfPages = util.getListOfPages().size();
            for (int i = 0; i < numOfPages; ++i) {
                String result = util.getOcrResultAsString(this.getTesseractInstance(), util.getListOfPages().get(i), outputFormat);
                resultList.add(result);
            }
        }
        catch (TesseractException e) {
            String msg = MessageFormatUtil.format((String)"Tesseract failed: {0}", (Object[])new Object[]{e.getMessage()});
            LoggerFactory.getLogger(this.getClass()).error(msg);
            throw new Tesseract4OcrException("Tesseract failed. Please check provided parameters");
        }
        finally {
            TesseractOcrUtil.disposeTesseractInstance(this.getTesseractInstance());
        }
        return resultList;
    }

    private String getOcrResultForSinglePage(File inputImage, OutputFormat outputFormat, int pageNumber) {
        String result;
        block7: {
            result = null;
            try {
                if (this.getTesseract4OcrEngineProperties().isPreprocessingImages()) {
                    result = new TesseractOcrUtil().getOcrResultAsString(this.getTesseractInstance(), ImagePreprocessingUtil.preprocessImage(inputImage, pageNumber, this.getTesseract4OcrEngineProperties().getImagePreprocessingOptions()), outputFormat);
                }
                if (result != null) break block7;
                BufferedImage bufferedImage = ImagePreprocessingUtil.readImage(inputImage);
                if (bufferedImage != null) {
                    try {
                        result = new TesseractOcrUtil().getOcrResultAsString(this.getTesseractInstance(), bufferedImage, outputFormat);
                    }
                    catch (Exception e) {
                        LoggerFactory.getLogger(this.getClass()).info(MessageFormatUtil.format((String)"Cannot process image: {0}", (Object[])new Object[]{e.getMessage()}));
                    }
                }
                if (result == null) {
                    result = new TesseractOcrUtil().getOcrResultAsString(this.getTesseractInstance(), inputImage, outputFormat);
                }
            }
            catch (Exception e) {
                LoggerFactory.getLogger(this.getClass()).error(MessageFormatUtil.format((String)"Tesseract failed: {0}", (Object[])new Object[]{e.getMessage()}));
                throw new Tesseract4OcrException("Tesseract failed. Please check provided parameters");
            }
        }
        return result;
    }
}

