/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.mime;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Pattern;
import org.apache.any23.extractor.csv.CSVReaderBuilder;
import org.apache.any23.mime.MIMEType;
import org.apache.any23.mime.MIMETypeDetector;
import org.apache.any23.mime.purifier.Purifier;
import org.apache.any23.mime.purifier.WhiteSpacesPurifier;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;

public class TikaMIMETypeDetector
implements MIMETypeDetector {
    private Purifier purifier;
    public static final String CSV_MIMETYPE = "text/csv";
    public static final String RESOURCE_NAME = "/org/apache/any23/mime/tika-config.xml";
    private static final Pattern[] N3_PATTERNS = new Pattern[]{Pattern.compile("^\\S+\\s*<\\S+>\\s*<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*_:\\S+\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(@\\S+)?\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(\\^\\^\\S+)?\\s*\\.")};
    private static final Pattern[] NQUADS_PATTERNS = new Pattern[]{Pattern.compile("^\\S+\\s*<\\S+>\\s*<\\S+>\\s*\\<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*_:\\S+\\s*\\<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(@\\S+)?\\s*\\<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(\\^\\^\\S+)?\\s*\\<\\S+>\\s*\\.")};
    private static TikaConfig config = null;
    private static Tika tika;
    private static MimeTypes types;

    public static boolean checkN3Format(InputStream is) throws IOException {
        return TikaMIMETypeDetector.findPattern(N3_PATTERNS, '.', is);
    }

    public static boolean checkNQuadsFormat(InputStream is) throws IOException {
        return TikaMIMETypeDetector.findPattern(NQUADS_PATTERNS, '.', is);
    }

    public static boolean checkTurtleFormat(InputStream is) throws IOException {
        String sample = TikaMIMETypeDetector.extractDataSample(is, '.');
        RDFParser turtleParser = Rio.createParser((RDFFormat)RDFFormat.TURTLE);
        turtleParser.setDatatypeHandling(RDFParser.DatatypeHandling.VERIFY);
        turtleParser.setStopAtFirstError(true);
        turtleParser.setVerifyData(true);
        ByteArrayInputStream bais = new ByteArrayInputStream(sample.getBytes());
        try {
            turtleParser.parse((InputStream)bais, "");
            return true;
        }
        catch (Exception e) {
            return false;
        }
    }

    public static boolean checkCSVFormat(InputStream is) throws IOException {
        return CSVReaderBuilder.isCSV((InputStream)is);
    }

    private static boolean findPattern(Pattern[] patterns, char delimiterChar, InputStream is) throws IOException {
        String sample = TikaMIMETypeDetector.extractDataSample(is, delimiterChar);
        for (Pattern pattern : patterns) {
            if (!pattern.matcher(sample).find()) continue;
            return true;
        }
        return false;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static String extractDataSample(InputStream is, char breakChar) throws IOException {
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        StringBuilder sb = new StringBuilder();
        int MAX_SIZE = 2048;
        boolean insideBlock = false;
        int read = 0;
        br.mark(2048);
        try {
            int c;
            while ((c = br.read()) != -1) {
                if (++read > 2048) {
                } else {
                    if (60 == c) {
                        insideBlock = true;
                    } else if (62 == c) {
                        insideBlock = false;
                    } else if (34 == c) {
                        insideBlock = !insideBlock;
                    }
                    sb.append((char)c);
                    if (insideBlock || breakChar != c) continue;
                }
                break;
            }
        }
        finally {
            is.reset();
            br.reset();
        }
        return sb.toString();
    }

    public TikaMIMETypeDetector(Purifier purifier) {
        this.purifier = purifier;
        InputStream is = this.getResourceAsStream();
        if (config == null) {
            try {
                config = new TikaConfig(is);
            }
            catch (Exception e) {
                throw new RuntimeException("Error while loading Tika configuration.", e);
            }
        }
        if (types == null) {
            types = config.getMimeRepository();
        }
        if (tika == null) {
            tika = new Tika(config);
        }
    }

    public TikaMIMETypeDetector() {
        this(new WhiteSpacesPurifier());
    }

    public MIMEType guessMIMEType(String fileName, InputStream input, MIMEType mimeTypeFromMetadata) {
        String type;
        if (input != null) {
            try {
                this.purifier.purify(input);
            }
            catch (IOException e) {
                throw new RuntimeException("Error while purifying the provided input", e);
            }
        }
        Metadata meta = new Metadata();
        if (mimeTypeFromMetadata != null) {
            meta.set("Content-Type", mimeTypeFromMetadata.getFullType());
        }
        if (fileName != null) {
            meta.set("resourceName", fileName);
        }
        try {
            String mt = this.guessMimeTypeByInputAndMeta(input, meta);
            type = !"application/octet-stream".equals(mt) ? mt : (TikaMIMETypeDetector.checkN3Format(input) ? RDFFormat.N3.getDefaultMIMEType() : (TikaMIMETypeDetector.checkNQuadsFormat(input) ? RDFFormat.NQUADS.getDefaultMIMEType() : (TikaMIMETypeDetector.checkTurtleFormat(input) ? RDFFormat.TURTLE.getDefaultMIMEType() : (TikaMIMETypeDetector.checkCSVFormat(input) ? CSV_MIMETYPE : "application/octet-stream"))));
        }
        catch (IOException ioe) {
            throw new RuntimeException("Error while retrieving mime type.", ioe);
        }
        return MIMEType.parse((String)type);
    }

    private InputStream getResourceAsStream() {
        InputStream result = TikaMIMETypeDetector.class.getResourceAsStream(RESOURCE_NAME);
        if (result == null && (result = TikaMIMETypeDetector.class.getClassLoader().getResourceAsStream(RESOURCE_NAME)) == null) {
            result = ClassLoader.getSystemResourceAsStream(RESOURCE_NAME);
        }
        return result;
    }

    private String guessMimeTypeByInputAndMeta(InputStream stream, Metadata metadata) throws IOException {
        MimeType type;
        String resourceName;
        String type2;
        if (stream != null && (type2 = tika.detect(stream)) != null && !this.isGenericMIMEType(type2)) {
            return type2;
        }
        String contentType = metadata.get("Content-Type");
        String candidateMIMEType = null;
        if (contentType != null) {
            try {
                MimeType type3 = types.forName(contentType);
                if (type3 != null) {
                    if (!this.isPlainMIMEType(type3.getName())) {
                        return type3.getName();
                    }
                    candidateMIMEType = type3.getName();
                }
            }
            catch (MimeTypeException type3) {
                // empty catch block
            }
        }
        if ((resourceName = metadata.get("resourceName")) != null && (type = types.getMimeType(resourceName)) != null) {
            return type.getName();
        }
        if (candidateMIMEType != null) {
            return candidateMIMEType;
        }
        return "application/octet-stream";
    }

    private boolean isPlainMIMEType(String type) {
        return type.equals("application/octet-stream") || type.equals("text/plain");
    }

    private boolean isGenericMIMEType(String type) {
        return this.isPlainMIMEType(type) || type.equals("application/xml");
    }
}

