/*
 * Decompiled with CFR 0.152.
 */
package com.adobe.internal.pdftoolkit.services.readingorder;

import com.adobe.internal.pdftoolkit.core.exceptions.PDFFontException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFIOException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFInvalidDocumentException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFSecurityException;
import com.adobe.internal.pdftoolkit.core.fontset.PDFFontSet;
import com.adobe.internal.pdftoolkit.core.types.ASCoordinate;
import com.adobe.internal.pdftoolkit.core.types.ASQuad;
import com.adobe.internal.pdftoolkit.pdf.document.PDFDocument;
import com.adobe.internal.pdftoolkit.pdf.page.PDFPage;
import com.adobe.internal.pdftoolkit.services.readingorder.impl.FindHVBreaks;
import com.adobe.internal.pdftoolkit.services.readingorder.impl.SortedWord;
import com.adobe.internal.pdftoolkit.services.textextraction.ParagraphIterator;
import com.adobe.internal.pdftoolkit.services.textextraction.SentenceIterator;
import com.adobe.internal.pdftoolkit.services.textextraction.TextExtractionOptions;
import com.adobe.internal.pdftoolkit.services.textextraction.TextExtractor;
import com.adobe.internal.pdftoolkit.services.textextraction.Word;
import com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator;
import com.adobe.internal.pdftoolkit.services.textextraction.impl.Base14FontSetUtil;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;

public class ReadingOrderTextExtractor {
    private PDFDocument pdfDoc;
    private PDFFontSet fontSet;
    private boolean useStructureInfo;
    private TextExtractionOptions teOptions;
    private HashMap<Double, Integer> sFreq = new HashMap();
    TreeMap<Double, SortedWord> vLine = new TreeMap();
    TreeMap<Double, SortedWord> hLine = new TreeMap();
    private static final int trace = -1;
    List<List<List<List<Word>>>> wordsInDocument = new ArrayList<List<List<List<Word>>>>();
    private boolean resolveHyphenation = false;

    public static ReadingOrderTextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new ReadingOrderTextExtractor(pdfDoc, clientFontSet);
    }

    public static ReadingOrderTextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet, boolean useStructure) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new ReadingOrderTextExtractor(pdfDoc, clientFontSet, useStructure);
    }

    public static ReadingOrderTextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet, TextExtractionOptions options) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new ReadingOrderTextExtractor(pdfDoc, clientFontSet, options);
    }

    private ReadingOrderTextExtractor(PDFDocument pdfDoc, PDFFontSet clientFontSet) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        this.pdfDoc = pdfDoc;
        this.fontSet = Base14FontSetUtil.buildBase14FontSet(clientFontSet, pdfDoc);
    }

    private ReadingOrderTextExtractor(PDFDocument pdfDoc, PDFFontSet clientFontSet, boolean useStructure) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        this.pdfDoc = pdfDoc;
        this.useStructureInfo = useStructure;
        this.fontSet = Base14FontSetUtil.buildBase14FontSet(clientFontSet, pdfDoc);
    }

    private ReadingOrderTextExtractor(PDFDocument pdfDoc, PDFFontSet clientFontSet, TextExtractionOptions options) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        this.pdfDoc = pdfDoc;
        this.fontSet = Base14FontSetUtil.buildBase14FontSet(clientFontSet, pdfDoc);
        this.teOptions = options;
        this.useStructureInfo = options.isUseStructure();
    }

    private void startingFrequency(Word word, List<ASQuad> quads) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        if (quads != null && !quads.isEmpty()) {
            ASCoordinate topLeft = quads.get(0).p1();
            double x = topLeft.x();
            Double key = new Double(x);
            Double keyInt = new Double((double)Math.round((float)(key * 100.0)) / 100.0);
            if (this.sFreq.containsKey(keyInt)) {
                int count = this.sFreq.get(keyInt);
                this.sFreq.put(keyInt, count + 1);
            } else {
                this.sFreq.put(keyInt, 1);
            }
        }
    }

    private void pickHighStartingFreqs(HashMap<Double, Integer> highsFreq) {
        if (!this.sFreq.isEmpty()) {
            for (Map.Entry<Double, Integer> entry : this.sFreq.entrySet()) {
                if (entry.getValue() <= 10) continue;
                highsFreq.put(entry.getKey(), entry.getValue());
            }
        }
    }

    private void columnBreaks(Word word, int order, List<ASQuad> quads) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        double DELTA = 1.0E-5;
        SortedWord nw = new SortedWord(word, order);
        if (quads != null && !quads.isEmpty()) {
            ASCoordinate topLeft = quads.get(0).p1();
            double lowX = topLeft.x();
            double lowY = topLeft.y();
            Double vkey = new Double(lowX);
            while (this.vLine.containsKey(vkey)) {
                vkey = lowX += 1.0E-5;
            }
            this.vLine.put(vkey, nw);
            Double hkey = new Double(lowY);
            while (this.hLine.containsKey(hkey)) {
                hkey = lowY += 1.0E-5;
            }
            this.hLine.put(hkey, nw);
        }
    }

    private static void debug(int tr, String debugString) {
        if (-1 >= tr) {
            System.out.print(debugString);
        }
    }

    private TreeMap<Double, SortedWord> getVLine() {
        return this.vLine;
    }

    private TreeMap<Double, SortedWord> getHLine() {
        return this.hLine;
    }

    public List<List<List<List<Word>>>> getWordsInDocument() {
        return this.wordsInDocument;
    }

    public void setResolveHyphenation(boolean val) {
        this.resolveHyphenation = val;
    }

    public boolean isResolveHyphenation() {
        return this.resolveHyphenation;
    }

    public List<Word> getReadingOrderedTextFromPDF(PDFDocument pdfDoc, PDFFontSet clientFontSet, PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
        TextExtractor extractor = TextExtractor.newInstance(pdfDoc, clientFontSet, this.teOptions);
        WordsIterator wordsIter = extractor.getROTEWordsIterator(page, pageIndex);
        ArrayList<Word> wordList = new ArrayList<Word>();
        HashMap<Double, Integer> sHighFreqs = new HashMap<Double, Integer>();
        int i = 0;
        int prevPageNum = 0;
        while (wordsIter.hasNext()) {
            Word word = wordsIter.next();
            List<ASQuad> quads = word.getBoundingQuads();
            if (prevPageNum != 0 && word.getPageNumber() != prevPageNum && word.getPageNumber() == pageIndex) {
                wordList = new ArrayList();
                ReadingOrderTextExtractor.debug(1, "For Page" + prevPageNum + ", start vertical/horizontal projections ==> ");
                this.pickHighStartingFreqs(sHighFreqs);
                if (this.getVLine().size() > 0) {
                    FindHVBreaks dcb = new FindHVBreaks();
                    dcb.setStartingFrequency(sHighFreqs);
                    dcb.processDetermineBreaks(this.getVLine(), this.getHLine());
                    ReadingOrderTextExtractor.debug(1, "blocks determined ==> ");
                    dcb.breakWithStartingFreqs(dcb.allGroupsWithPrior);
                    ReadingOrderTextExtractor.debug(1, "write reading-order text ==> ");
                    dcb.printReadingOrderText(dcb.allGroupsWithPrior, wordList);
                    this.wordsInDocument.add(dcb.getWordsInPage());
                    ReadingOrderTextExtractor.debug(1, "Finished.\n");
                }
                this.vLine = new TreeMap();
                this.hLine = new TreeMap();
                this.sFreq.clear();
                sHighFreqs.clear();
                i = 0;
            }
            this.startingFrequency(word, quads);
            this.columnBreaks(word, i, quads);
            ++i;
            prevPageNum = word.getPageNumber();
        }
        if (this.getVLine().size() > 0) {
            ReadingOrderTextExtractor.debug(1, "For Page" + prevPageNum + ", start vertical/horizontal projections ==> ");
            this.pickHighStartingFreqs(sHighFreqs);
            FindHVBreaks dcb = new FindHVBreaks();
            dcb.setStartingFrequency(sHighFreqs);
            dcb.processDetermineBreaks(this.getVLine(), this.getHLine());
            ReadingOrderTextExtractor.debug(1, "blocks determined ==> ");
            dcb.breakWithStartingFreqs(dcb.allGroupsWithPrior);
            ReadingOrderTextExtractor.debug(1, "write reading-order text ==> ");
            dcb.printReadingOrderText(dcb.allGroupsWithPrior, wordList);
            this.wordsInDocument.add(dcb.getWordsInPage());
            ReadingOrderTextExtractor.debug(1, "Finished.\n");
        }
        return wordList;
    }

    public List<String> buildSentences(List<List<Word>> para) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        StringBuilder paraBuffer = new StringBuilder();
        ArrayList<String> sentencesInPara = new ArrayList<String>();
        for (int l = 0; l < para.size(); ++l) {
            List<Word> line = para.get(l);
            for (int k = 0; k < line.size(); ++k) {
                Word w = line.get(k);
                paraBuffer.append(w.toString());
            }
        }
        String unicode = paraBuffer.toString();
        boolean isWordEmpty = this.isWordEmpty(unicode);
        if (!isWordEmpty) {
            BreakIterator brkit = BreakIterator.getSentenceInstance(Locale.getDefault());
            brkit.setText(unicode);
            int start = brkit.first();
            int end = brkit.next();
            while (end != -1) {
                int newend = end;
                int newstart = start;
                if (end > unicode.length()) {
                    newstart = newend = unicode.length();
                }
                String unicode1 = unicode.substring(newstart, newend);
                sentencesInPara.add(unicode1);
                start = end;
                end = brkit.next();
            }
        }
        return sentencesInPara;
    }

    private boolean isWordEmpty(String strVal) {
        return strVal != null && strVal.length() == 0;
    }

    public ParagraphIterator getParagraphIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException, IOException {
        return new DocumentParagraphIterator();
    }

    public ParagraphIterator getParagraphIterator(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<List<List<Word>>> words = null;
        words = this.extractParagraphs(page, pageIndex);
        return new ParagraphListIterator(words);
    }

    public SentenceIterator getSentenceIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException, IOException {
        return new DocumentSentenceIterator();
    }

    public SentenceIterator getSentenceIterator(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<String> words = null;
        words = this.extractSentences(page, pageIndex);
        return new SentenceListIterator(words);
    }

    private List<String> extractSentences(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        ArrayList<String> words = new ArrayList<String>();
        ReadingOrderTextExtractor toe = this.teOptions != null ? new ReadingOrderTextExtractor(this.pdfDoc, this.fontSet, this.teOptions) : new ReadingOrderTextExtractor(this.pdfDoc, this.fontSet, this.useStructureInfo);
        toe.getReadingOrderedTextFromPDF(this.pdfDoc, this.fontSet, page, pageIndex);
        List<List<List<List<Word>>>> doc = toe.getWordsInDocument();
        for (int i = 0; i < doc.size(); ++i) {
            List<List<List<Word>>> pageList = doc.get(i);
            for (int j = 0; j < pageList.size(); ++j) {
                List<List<Word>> para = pageList.get(j);
                List<String> sentences = toe.buildSentences(para);
                words.addAll(sentences);
            }
        }
        return words;
    }

    private List<List<List<Word>>> extractParagraphs(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        ArrayList<List<List<Word>>> words = new ArrayList<List<List<Word>>>();
        ReadingOrderTextExtractor toe = this.teOptions != null ? new ReadingOrderTextExtractor(this.pdfDoc, this.fontSet, this.teOptions) : new ReadingOrderTextExtractor(this.pdfDoc, this.fontSet, this.useStructureInfo);
        toe.getReadingOrderedTextFromPDF(this.pdfDoc, this.fontSet, page, pageIndex);
        if (toe.getWordsInDocument().size() > 0) {
            words.addAll(0, (Collection<List<List<Word>>>)toe.getWordsInDocument().get(0));
        }
        return words;
    }

    public WordsIterator getWordsIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException, IOException {
        return new DocumentWordsIterator();
    }

    public WordsIterator getWordsIterator(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<Word> words = null;
        words = this.extractWords(page, pageIndex);
        return new WordListIterator(words);
    }

    private List<Word> extractWords(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        ArrayList<Word> words = new ArrayList<Word>();
        ReadingOrderTextExtractor toe = this.teOptions != null ? new ReadingOrderTextExtractor(this.pdfDoc, this.fontSet, this.teOptions) : new ReadingOrderTextExtractor(this.pdfDoc, this.fontSet, this.useStructureInfo);
        words.addAll(0, toe.getReadingOrderedTextFromPDF(this.pdfDoc, this.fontSet, page, pageIndex));
        return words;
    }

    public boolean isUseStructureInfo() {
        return this.useStructureInfo;
    }

    public void setUseStructureInfo(boolean useStructureInfo) {
        this.useStructureInfo = useStructureInfo;
    }

    static class SentenceListIterator
    implements SentenceIterator {
        Iterator<String> wordsIter;

        SentenceListIterator(List<String> wordList) {
            this.wordsIter = wordList.iterator();
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            return this.wordsIter.hasNext();
        }

        @Override
        public String next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            return this.wordsIter.next();
        }
    }

    static class ParagraphListIterator
    implements ParagraphIterator {
        Iterator<List<List<Word>>> wordsIter;

        ParagraphListIterator(List<List<List<Word>>> wordList) {
            this.wordsIter = wordList.iterator();
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            return this.wordsIter.hasNext();
        }

        @Override
        public List<List<Word>> next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            return this.wordsIter.next();
        }
    }

    static class WordListIterator
    implements WordsIterator {
        Iterator<Word> wordsIter;

        WordListIterator(List<Word> wordList) {
            this.wordsIter = wordList.iterator();
        }

        @Override
        public boolean hasNext() {
            return this.wordsIter.hasNext();
        }

        @Override
        public Word next() {
            return this.wordsIter.next();
        }
    }

    class DocumentWordsIterator
    implements WordsIterator {
        private int pageIndex = 0;
        Iterator<PDFPage> pagesIter;
        WordsIterator wordsIter;

        DocumentWordsIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = ReadingOrderTextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage page = this.pagesIter.next();
                ++this.pageIndex;
                this.wordsIter = ReadingOrderTextExtractor.this.getWordsIterator(page, this.pageIndex);
            }
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (this.pagesIter.hasNext()) {
                while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                    PDFPage page = this.pagesIter.next();
                    ++this.pageIndex;
                    this.wordsIter = ReadingOrderTextExtractor.this.getWordsIterator(page, this.pageIndex);
                    if (!this.wordsIter.hasNext()) continue;
                    return this.wordsIter.hasNext();
                }
                return false;
            }
            return false;
        }

        @Override
        public Word next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }

    class DocumentSentenceIterator
    implements SentenceIterator {
        private int pageIndex = 0;
        Iterator<PDFPage> pagesIter;
        SentenceIterator wordsIter;

        DocumentSentenceIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = ReadingOrderTextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage page = this.pagesIter.next();
                ++this.pageIndex;
                this.wordsIter = ReadingOrderTextExtractor.this.getSentenceIterator(page, this.pageIndex);
            }
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (this.pagesIter.hasNext()) {
                while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                    PDFPage page = this.pagesIter.next();
                    ++this.pageIndex;
                    this.wordsIter = ReadingOrderTextExtractor.this.getSentenceIterator(page, this.pageIndex);
                    if (!this.wordsIter.hasNext()) continue;
                    return this.wordsIter.hasNext();
                }
                return false;
            }
            return false;
        }

        @Override
        public String next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }

    class DocumentParagraphIterator
    implements ParagraphIterator {
        private int pageIndex = 0;
        Iterator<PDFPage> pagesIter;
        ParagraphIterator wordsIter;

        DocumentParagraphIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = ReadingOrderTextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage page = this.pagesIter.next();
                ++this.pageIndex;
                this.wordsIter = ReadingOrderTextExtractor.this.getParagraphIterator(page, this.pageIndex);
            }
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (this.pagesIter.hasNext()) {
                while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                    PDFPage page = this.pagesIter.next();
                    ++this.pageIndex;
                    this.wordsIter = ReadingOrderTextExtractor.this.getParagraphIterator(page, this.pageIndex);
                    if (!this.wordsIter.hasNext()) continue;
                    return this.wordsIter.hasNext();
                }
                return false;
            }
            return false;
        }

        @Override
        public List<List<Word>> next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }
}

