/*
 * Decompiled with CFR 0.152.
 */
package wiki.xsx.core.pdf.doc;

import java.awt.Rectangle;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import wiki.xsx.core.pdf.doc.XEasyPdfDocument;

public class XEasyPdfDocumentExtractor
implements Serializable {
    private static final long serialVersionUID = 8464226596006053066L;
    private final PDDocument document;
    private final XEasyPdfDocument pdfDocument;
    private final SimpleExtractor simpleExtractor;
    private final RegionExtractor regionExtractor;
    private static final Pattern TABLE_PATTERN = Pattern.compile("(\\S[^\\n\\r]+)");

    XEasyPdfDocumentExtractor(XEasyPdfDocument pdfDocument) {
        this.pdfDocument = pdfDocument;
        this.document = this.pdfDocument.build(true);
        this.simpleExtractor = new SimpleExtractor(this.document);
        this.regionExtractor = new RegionExtractor();
    }

    public XEasyPdfDocumentExtractor addRegion(String regionName, Rectangle rectangle) {
        this.regionExtractor.addRegion(regionName, rectangle);
        return this;
    }

    public XEasyPdfDocumentExtractor clearRegion() {
        this.regionExtractor.clearRegion();
        return this;
    }

    public XEasyPdfDocumentExtractor extractText(List<String> textList, int ... pageIndex) {
        this.extractText(textList, null, pageIndex);
        return this;
    }

    public XEasyPdfDocumentExtractor extractText(List<String> textList, String regex, int ... pageIndex) {
        this.simpleExtractor.extract(textList, regex, pageIndex);
        return this;
    }

    public XEasyPdfDocumentExtractor extractTextByRegions(List<Map<String, String>> dataList, int ... pageIndex) {
        if (pageIndex != null && pageIndex.length > 0) {
            for (int index : pageIndex) {
                this.addText(dataList, index);
            }
        } else {
            int count = this.document.getNumberOfPages() - 1;
            for (int index = 0; index < count; ++index) {
                this.addText(dataList, index);
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractTextForSimpleTable(List<List<String>> textList, int pageIndex) {
        PDRectangle rectangle = this.document.getPage(pageIndex).getMediaBox();
        int maxSize = Math.max((int)rectangle.getWidth() + 1, (int)rectangle.getHeight() + 1);
        this.extractTextByRegionsForSimpleTable(textList, new Rectangle(maxSize, maxSize), pageIndex);
        return this;
    }

    public XEasyPdfDocumentExtractor extractTextByRegionsForSimpleTable(List<List<String>> textList, Rectangle rectangle, int pageIndex) {
        String key = "table";
        String wordSeparator = "X-EasyPdf-Separator";
        RegionExtractor regionExtractor = new RegionExtractor();
        regionExtractor.addRegion("table", rectangle);
        String text = regionExtractor.extract(this.document.getPage(pageIndex), "X-EasyPdf-Separator").get("table");
        if (text != null && text.length() > 0) {
            ArrayList<String> sourceList = new ArrayList<String>(textList.size());
            Matcher matcher = TABLE_PATTERN.matcher(text);
            while (matcher.find()) {
                sourceList.add(matcher.group());
            }
            for (String rowText : sourceList) {
                textList.add(Arrays.asList(rowText.split("X-EasyPdf-Separator")));
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractImage(List<BufferedImage> imageList) {
        PDPageTree pages = this.document.getPages();
        for (PDPage page : pages) {
            this.addImage(imageList, page.getResources());
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractImage(List<BufferedImage> imageList, int ... pageIndex) {
        if (pageIndex != null && pageIndex.length > 0) {
            PDPageTree pages = this.document.getPages();
            for (int index : pageIndex) {
                if (index < 0) continue;
                this.addImage(imageList, pages.get(index).getResources());
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractForm(Map<String, String> formMap) {
        PDAcroForm form = this.document.getDocumentCatalog().getAcroForm();
        if (form != null) {
            List fields = form.getFields();
            for (PDField field : fields) {
                formMap.put(field.getFullyQualifiedName(), field.getValueAsString());
            }
        }
        return this;
    }

    public XEasyPdfDocument finish() {
        return this.pdfDocument.reset();
    }

    private void addText(List<Map<String, String>> dataList, int pageIndex) {
        Map<String, String> data = this.regionExtractor.extract(this.document.getPage(pageIndex));
        if (!data.isEmpty()) {
            dataList.add(data);
        }
    }

    private void addImage(List<BufferedImage> imageList, PDResources resources) {
        Iterable objectNames = resources.getXObjectNames();
        for (COSName objectName : objectNames) {
            PDXObject xObject = resources.getXObject(objectName);
            if (xObject instanceof PDImage) {
                imageList.add(((PDImage)xObject).getImage());
                continue;
            }
            if (!(xObject instanceof PDFormXObject)) continue;
            this.addImage(imageList, ((PDFormXObject)xObject).getResources());
        }
    }

    private static class RegionExtractor
    extends PDFTextStripper {
        private Map<String, ArrayList<List<TextPosition>>> regionCharacterList = new HashMap<String, ArrayList<List<TextPosition>>>(32);
        private Map<String, StringWriter> regionText = new HashMap<String, StringWriter>(32);
        private Map<String, Rectangle> regionArea = new HashMap<String, Rectangle>(32);

        RegionExtractor() throws IOException {
            super.setSortByPosition(true);
        }

        void addRegion(String regionName, Rectangle rectangle) {
            this.regionArea.put(regionName, rectangle);
        }

        void clearRegion() {
            this.regionCharacterList = new HashMap<String, ArrayList<List<TextPosition>>>(32);
            this.regionText = new HashMap<String, StringWriter>(32);
            this.regionArea = new HashMap<String, Rectangle>(32);
        }

        Map<String, String> extract(PDPage page) {
            return this.extract(page, " ");
        }

        Map<String, String> extract(PDPage page, String wordSeparator) {
            HashMap<String, String> data;
            if (this.regionArea.isEmpty()) {
                data = new HashMap<String, String>(0);
            } else {
                Set<String> keySet = this.regionArea.keySet();
                data = new HashMap(keySet.size());
                for (String region : keySet) {
                    this.setStartPage(this.getCurrentPageNo());
                    this.setEndPage(this.getCurrentPageNo());
                    this.setWordSeparator(wordSeparator);
                    ArrayList regionCharactersByArticle = new ArrayList(256);
                    regionCharactersByArticle.add(new ArrayList(256));
                    this.regionCharacterList.put(region, regionCharactersByArticle);
                    this.regionText.put(region, new StringWriter());
                }
                if (page.hasContents()) {
                    this.processPage(page);
                }
                for (String region : keySet) {
                    data.put(region, this.regionText.get(region).toString());
                }
            }
            return data;
        }

        protected void processTextPosition(TextPosition text) {
            Set<Map.Entry<String, Rectangle>> entrySet = this.regionArea.entrySet();
            for (Map.Entry<String, Rectangle> regionAreaEntry : entrySet) {
                Rectangle2D rect = regionAreaEntry.getValue();
                if (!rect.contains(text.getX(), text.getY())) continue;
                this.charactersByArticle = this.regionCharacterList.get(regionAreaEntry.getKey());
                super.processTextPosition(text);
            }
        }

        protected void writePage() throws IOException {
            Set<String> keySet = this.regionArea.keySet();
            for (String region : keySet) {
                this.charactersByArticle = this.regionCharacterList.get(region);
                this.output = this.regionText.get(region);
                super.writePage();
            }
        }
    }

    private static class SimpleExtractor
    extends PDFTextStripper {
        private final PDDocument document;

        public SimpleExtractor(PDDocument document) throws IOException {
            this.document = document;
        }

        void extract(List<String> textList, String regex, int ... pageIndex) {
            if (pageIndex != null && pageIndex.length > 0) {
                for (int index : pageIndex) {
                    this.setStartPage(index + 1);
                    this.setEndPage(index + 1);
                    this.extract(textList, regex);
                }
            } else {
                this.extract(textList, regex);
            }
        }

        private void extract(List<String> textList, String regex) {
            String text = this.getText(this.document);
            if (regex != null && regex.trim().length() > 0) {
                Matcher matcher = Pattern.compile(regex).matcher(text);
                while (matcher.find()) {
                    textList.add(matcher.group());
                }
            } else {
                textList.add(text);
            }
        }
    }
}

