/*
 * Decompiled with CFR 0.152.
 */
package com.alibaba.cloud.ai.parser.tika;

import com.alibaba.cloud.ai.document.DocumentParser;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.function.Supplier;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.ExtractedTextFormatter;
import org.xml.sax.ContentHandler;

public class TikaDocumentParser
implements DocumentParser {
    private static final int NO_WRITE_LIMIT = -1;
    public static final Supplier<Parser> DEFAULT_PARSER_SUPPLIER = AutoDetectParser::new;
    public static final Supplier<Metadata> DEFAULT_METADATA_SUPPLIER = Metadata::new;
    public static final Supplier<ParseContext> DEFAULT_PARSE_CONTEXT_SUPPLIER = ParseContext::new;
    public static final Supplier<ContentHandler> DEFAULT_CONTENT_HANDLER_SUPPLIER = () -> new BodyContentHandler(-1);
    private final Supplier<Parser> parserSupplier;
    private final Supplier<ContentHandler> contentHandlerSupplier;
    private final Supplier<Metadata> metadataSupplier;
    private final Supplier<ParseContext> parseContextSupplier;
    private final ExtractedTextFormatter textFormatter;

    public TikaDocumentParser() {
        this(null, null, null, null, ExtractedTextFormatter.defaults());
    }

    public TikaDocumentParser(ExtractedTextFormatter textFormatter) {
        this(null, null, null, null, textFormatter);
    }

    public TikaDocumentParser(Supplier<ContentHandler> contentHandlerSupplier, ExtractedTextFormatter textFormatter) {
        this(null, contentHandlerSupplier, null, null, textFormatter);
    }

    public TikaDocumentParser(Supplier<Parser> parserSupplier, Supplier<ContentHandler> contentHandlerSupplier, Supplier<Metadata> metadataSupplier, Supplier<ParseContext> parseContextSupplier) {
        this(parserSupplier, contentHandlerSupplier, metadataSupplier, parseContextSupplier, ExtractedTextFormatter.defaults());
    }

    public TikaDocumentParser(Supplier<Parser> parserSupplier, Supplier<ContentHandler> contentHandlerSupplier, Supplier<Metadata> metadataSupplier, Supplier<ParseContext> parseContextSupplier, ExtractedTextFormatter textFormatter) {
        this.parserSupplier = TikaDocumentParser.getOrDefault(parserSupplier, () -> DEFAULT_PARSER_SUPPLIER);
        this.contentHandlerSupplier = TikaDocumentParser.getOrDefault(contentHandlerSupplier, () -> DEFAULT_CONTENT_HANDLER_SUPPLIER);
        this.metadataSupplier = TikaDocumentParser.getOrDefault(metadataSupplier, () -> DEFAULT_METADATA_SUPPLIER);
        this.parseContextSupplier = TikaDocumentParser.getOrDefault(parseContextSupplier, () -> DEFAULT_PARSE_CONTEXT_SUPPLIER);
        this.textFormatter = textFormatter;
    }

    public List<Document> parse(InputStream inputStream) {
        try {
            Parser parser = this.parserSupplier.get();
            ContentHandler contentHandler = this.contentHandlerSupplier.get();
            Metadata metadata = this.metadataSupplier.get();
            ParseContext parseContext = this.parseContextSupplier.get();
            parser.parse(inputStream, contentHandler, metadata, parseContext);
            String text = contentHandler.toString();
            if (Objects.isNull(text)) {
                throw new ZeroByteFileException("The content is blank!");
            }
            return Collections.singletonList(this.toDocument(text));
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private Document toDocument(String docText) {
        docText = Objects.requireNonNullElse(docText, "");
        docText = this.textFormatter.format(docText);
        return new Document(docText);
    }

    private static <T> T getOrDefault(T value, Supplier<T> defaultValueSupplier) {
        return value != null ? value : defaultValueSupplier.get();
    }
}

