/*
 * Decompiled with CFR 0.152.
 */
package org.springframework.ai.reader.pdf;

import java.awt.Rectangle;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.pdf.config.ParagraphManager;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.reader.pdf.layout.PDFLayoutTextStripperByArea;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

public class ParagraphPdfDocumentReader
implements DocumentReader {
    private static final String METADATA_START_PAGE = "page_number";
    private static final String METADATA_END_PAGE = "end_page_number";
    private static final String METADATA_TITLE = "title";
    private static final String METADATA_LEVEL = "level";
    private static final String METADATA_FILE_NAME = "file_name";
    protected final PDDocument document;
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    private final ParagraphManager paragraphTextExtractor;
    protected String resourceFileName;
    private PdfDocumentReaderConfig config;

    public ParagraphPdfDocumentReader(String resourceUrl) {
        this(new DefaultResourceLoader().getResource(resourceUrl));
    }

    public ParagraphPdfDocumentReader(Resource pdfResource) {
        this(pdfResource, PdfDocumentReaderConfig.defaultConfig());
    }

    public ParagraphPdfDocumentReader(String resourceUrl, PdfDocumentReaderConfig config) {
        this(new DefaultResourceLoader().getResource(resourceUrl), config);
    }

    public ParagraphPdfDocumentReader(Resource pdfResource, PdfDocumentReaderConfig config) {
        try {
            PDFParser pdfParser = new PDFParser((RandomAccessRead)new RandomAccessReadBuffer(pdfResource.getInputStream()));
            this.document = pdfParser.parse();
            this.config = config;
            this.paragraphTextExtractor = new ParagraphManager(this.document);
            this.resourceFileName = pdfResource.getFilename();
        }
        catch (IllegalArgumentException iae) {
            throw iae;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public List<Document> get() {
        List<ParagraphManager.Paragraph> paragraphs = this.paragraphTextExtractor.flatten();
        ArrayList<Document> documents = new ArrayList<Document>(paragraphs.size());
        if (!CollectionUtils.isEmpty(paragraphs)) {
            this.logger.info("Start processing paragraphs from PDF");
            Iterator<ParagraphManager.Paragraph> itr = paragraphs.iterator();
            ParagraphManager.Paragraph current = itr.next();
            if (!itr.hasNext()) {
                documents.add(this.toDocument(current, current));
            } else {
                while (itr.hasNext()) {
                    ParagraphManager.Paragraph next = itr.next();
                    Document document = this.toDocument(current, next);
                    if (document != null && StringUtils.hasText((String)document.getContent())) {
                        documents.add(this.toDocument(current, next));
                    }
                    current = next;
                }
            }
        }
        this.logger.info("End processing paragraphs from PDF");
        return documents;
    }

    protected Document toDocument(ParagraphManager.Paragraph from, ParagraphManager.Paragraph to) {
        String docText = this.getTextBetweenParagraphs(from, to);
        if (!StringUtils.hasText((String)docText)) {
            return null;
        }
        Document document = new Document(docText);
        this.addMetadata(from, to, document);
        return document;
    }

    protected void addMetadata(ParagraphManager.Paragraph from, ParagraphManager.Paragraph to, Document document) {
        document.getMetadata().put(METADATA_TITLE, from.title());
        document.getMetadata().put(METADATA_START_PAGE, from.startPageNumber());
        document.getMetadata().put(METADATA_END_PAGE, to.startPageNumber());
        document.getMetadata().put(METADATA_LEVEL, from.level());
        document.getMetadata().put(METADATA_FILE_NAME, this.resourceFileName);
    }

    public String getTextBetweenParagraphs(ParagraphManager.Paragraph fromParagraph, ParagraphManager.Paragraph toParagraph) {
        int startPage = fromParagraph.startPageNumber() - 1;
        int endPage = toParagraph.startPageNumber() - 1;
        try {
            StringBuilder sb = new StringBuilder();
            PDFLayoutTextStripperByArea pdfTextStripper = new PDFLayoutTextStripperByArea();
            pdfTextStripper.setSortByPosition(true);
            for (int pageNumber = startPage; pageNumber <= endPage; ++pageNumber) {
                PDPage page = this.document.getPage(pageNumber);
                int fromPosition = fromParagraph.position();
                int toPosition = toParagraph.position();
                if (this.config.reversedParagraphPosition) {
                    fromPosition = (int)(page.getMediaBox().getHeight() - (float)fromPosition);
                    toPosition = (int)(page.getMediaBox().getHeight() - (float)toPosition);
                }
                int x0 = (int)page.getMediaBox().getLowerLeftX();
                int xW = (int)page.getMediaBox().getWidth();
                int y0 = (int)page.getMediaBox().getLowerLeftY();
                int yW = (int)page.getMediaBox().getHeight();
                if (pageNumber == startPage) {
                    y0 = fromPosition;
                    yW = (int)page.getMediaBox().getHeight() - y0;
                }
                if (pageNumber == endPage) {
                    yW = toPosition - y0;
                }
                if (y0 + yW == (int)page.getMediaBox().getHeight()) {
                    yW -= this.config.pageBottomMargin;
                }
                if (y0 == 0) {
                    y0 += this.config.pageTopMargin;
                    yW -= this.config.pageTopMargin;
                }
                pdfTextStripper.addRegion("pdfPageRegion", new Rectangle(x0, y0, xW, yW));
                pdfTextStripper.extractRegions(page);
                String text = pdfTextStripper.getTextForRegion("pdfPageRegion");
                if (StringUtils.hasText((String)text)) {
                    sb.append(text);
                }
                pdfTextStripper.removeRegion("pdfPageRegion");
            }
            String text = sb.toString();
            if (StringUtils.hasText((String)text)) {
                text = this.config.pageExtractedTextFormatter.format(text, startPage);
            }
            return text;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

