/*
 * Decompiled with CFR 0.152.
 */
package org.dromara.pdf.pdfbox.core.ext.analyzer;

import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import org.apache.pdfbox.text.PDFTextStripper;
import org.dromara.pdf.pdfbox.core.base.Document;
import org.dromara.pdf.pdfbox.core.ext.analyzer.AbstractTextAnalyzer;

public class TextAnalyzer
extends AbstractTextAnalyzer {
    public TextAnalyzer(Document document) {
        super(document);
    }

    @Override
    public void processText(int pageIndex) {
        AbstractTextAnalyzer.DefaultTextStripper textStripper = new AbstractTextAnalyzer.DefaultTextStripper(pageIndex, this.log);
        try (OutputStreamWriter writer = new OutputStreamWriter(new BufferedOutputStream(new ByteArrayOutputStream()));){
            textStripper.writeText(this.getDocument(), writer);
        }
        this.infoSet.addAll(textStripper.getInfoSet());
    }

    @Override
    public int getCharacterCount(int pageIndex) {
        String regex = "\r|\n|\t|\b|\\s";
        String replacement = "";
        int index = pageIndex + 1;
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setStartPage(index);
        stripper.setEndPage(index);
        return stripper.getText(this.getDocument()).replaceAll("\r|\n|\t|\b|\\s", "").length();
    }
}

