/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.openxml;

import java.io.BufferedInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.ontoware.rdf2go.exception.ModelException;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Node;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.SimpleSAXAdapter;
import org.semanticdesktop.aperture.util.SimpleSAXListener;
import org.semanticdesktop.aperture.util.SimpleSAXParser;
import org.semanticdesktop.aperture.util.UriUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

public class OpenXmlExtractor
implements Extractor {
    private static final String CONTENT_TYPES_FILE = "[Content_Types].xml";
    private static final int BUFFER_SIZE = 0x400000;
    private static final String END_OF_LINE = System.getProperty("line.separator", "\n");
    private static final HashSet<String> TEXT_ELEMENT_TYPES = new HashSet();
    private static final HashMap<String, String> TEXT_ATTRIBUTE_TYPES = new HashMap();
    private Logger logger = LoggerFactory.getLogger(this.getClass());
    private ContentTypes contentTypes;
    private StringBuilder fullText = new StringBuilder(262144);

    public void extract(URI uRI, InputStream inputStream, Charset charset, String string, RDFContainer rDFContainer) throws ExtractorException {
        String string2;
        Object object;
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream, 0x400000);
        }
        inputStream.mark(0x400000);
        ZipInputStream zipInputStream = null;
        try {
            zipInputStream = new ZipInputStream(inputStream);
            object = null;
            while ((object = zipInputStream.getNextEntry()) != null) {
                string2 = ((ZipEntry)object).getName();
                if (CONTENT_TYPES_FILE.equals(string2)) {
                    this.parseContentTypes(zipInputStream);
                    break;
                }
                zipInputStream.closeEntry();
            }
        }
        catch (IOException iOException) {
            throw new ExtractorException(iOException);
        }
        if (this.contentTypes == null) {
            throw new ExtractorException("missing [Content_Types].xml file");
        }
        try {
            inputStream.reset();
        }
        catch (IOException iOException) {
            throw new ExtractorException("Unable to reset stream", iOException);
        }
        try {
            zipInputStream = new ZipInputStream(inputStream);
            object = null;
            while ((object = zipInputStream.getNextEntry()) != null) {
                string2 = ((ZipEntry)object).getName();
                String string3 = this.contentTypes.getType(string2 = this.toAbsoluteName(string2));
                if (string3 != null) {
                    if (TEXT_ELEMENT_TYPES.contains(string3)) {
                        this.process(zipInputStream, new TextCollector());
                    } else if (TEXT_ATTRIBUTE_TYPES.containsKey(string3)) {
                        this.process(zipInputStream, new AttributeTextCollector(TEXT_ATTRIBUTE_TYPES.get(string3)));
                    } else if ("application/vnd.openxmlformats-package.core-properties+xml".equals(string3) || "application/vnd.openxmlformats-officedocument.extended-properties+xml".equals(string3)) {
                        this.extractMetadata(zipInputStream, rDFContainer);
                    }
                }
                zipInputStream.closeEntry();
            }
        }
        catch (IOException iOException) {
            throw new ExtractorException(iOException);
        }
        object = this.fullText.toString();
        if (((String)object).length() > 0) {
            rDFContainer.add(NIE.plainTextContent, (String)object);
        }
    }

    private void parseContentTypes(InputStream inputStream) throws ExtractorException {
        Document document = this.getDocument(inputStream, false);
        this.contentTypes = new ContentTypes();
        Element element = document.getDocumentElement();
        NodeList nodeList = element.getChildNodes();
        int n = nodeList.getLength();
        for (int i = 0; i < n; ++i) {
            String string;
            org.w3c.dom.Node node = nodeList.item(i);
            if (node.getNodeType() != 1) continue;
            Element element2 = (Element)node;
            String string2 = element2.getTagName();
            String string3 = element2.getAttribute("ContentType");
            if ("Default".equals(string2)) {
                string = element2.getAttribute("Extension");
                if (string == null || string3 == null) continue;
                this.contentTypes.addDefault(string, string3);
                continue;
            }
            if (!"Override".equals(string2) || (string = element2.getAttribute("PartName")) == null || string3 == null) continue;
            this.contentTypes.addOverride(string, string3);
        }
    }

    private Document getDocument(InputStream inputStream, boolean bl) throws ExtractorException {
        DocumentBuilder documentBuilder;
        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        documentBuilderFactory.setNamespaceAware(bl);
        documentBuilderFactory.setValidating(false);
        documentBuilderFactory.setExpandEntityReferences(false);
        try {
            documentBuilder = documentBuilderFactory.newDocumentBuilder();
        }
        catch (ParserConfigurationException parserConfigurationException) {
            throw new RuntimeException("unable to instantiate DocumentBuilder", parserConfigurationException);
        }
        try {
            return documentBuilder.parse(new NonCloseableStream(inputStream));
        }
        catch (SAXException sAXException) {
            throw new ExtractorException(sAXException);
        }
        catch (IOException iOException) {
            throw new ExtractorException(iOException);
        }
    }

    private String toAbsoluteName(String string) {
        if (string.startsWith("/")) {
            return string;
        }
        return "/" + string;
    }

    private void process(InputStream inputStream, SimpleSAXListener simpleSAXListener) {
        SimpleSAXParser simpleSAXParser = null;
        try {
            simpleSAXParser = new SimpleSAXParser();
        }
        catch (Exception exception) {
            throw new RuntimeException("unable to instantiate SAXParser", exception);
        }
        simpleSAXParser.setListener(simpleSAXListener);
        simpleSAXParser.setTrimWhiteSpace(false);
        try {
            simpleSAXParser.parse(new NonCloseableStream(inputStream));
        }
        catch (Exception exception) {
            this.logger.warn("Exception while parsing XML", (Throwable)exception);
        }
    }

    private void extractMetadata(InputStream inputStream, RDFContainer rDFContainer) throws ExtractorException {
        Document document = this.getDocument(inputStream, true);
        Element element = document.getDocumentElement();
        rDFContainer.add(RDF.type, (Node)NFO.Document);
        NodeList nodeList = element.getChildNodes();
        int n = nodeList.getLength();
        for (int i = 0; i < n; ++i) {
            Element element2;
            String string;
            org.w3c.dom.Node node = nodeList.item(i);
            if (node.getNodeType() != 1 || (string = this.getText(element2 = (Element)node)) == null) continue;
            String string2 = element2.getNamespaceURI();
            if (string2 != null) {
                if (!string2.endsWith("/")) {
                    string2 = string2 + "/";
                }
                try {
                    String string3 = string2 + element2.getLocalName();
                    URI uRI = rDFContainer.getValueFactory().createURI(string3);
                    rDFContainer.add(uRI, string);
                }
                catch (ModelException modelException) {
                    this.logger.error("ModelException while adding statement, ignoring", (Throwable)modelException);
                }
            }
            this.mapToApertureProperty(element2, string, rDFContainer);
        }
    }

    private void mapToApertureProperty(Element element, String string, RDFContainer rDFContainer) {
        String string2 = element.getLocalName();
        if ("title".equals(string2)) {
            rDFContainer.add(NIE.title, string);
        } else if ("subject".equals(string2)) {
            rDFContainer.add(NIE.subject, string);
        } else if ("created".equals(string2)) {
            rDFContainer.add(NIE.contentCreated, this.convertStringToDate(string));
        } else if ("creator".equals(string2)) {
            this.addContactStatement(NCO.creator, string, rDFContainer);
        } else if ("description".equals(string2)) {
            rDFContainer.add(NIE.description, string);
        } else if ("lastModifiedBy".equals(string2)) {
            this.addContactStatement(NCO.contributor, string, rDFContainer);
        } else if ("modified".equals(string2)) {
            rDFContainer.add(NIE.contentLastModified, this.convertStringToDate(string));
        } else if ("Application".equals(string2)) {
            rDFContainer.add(NIE.generator, string);
        } else if ("Pages".equals(string2)) {
            try {
                rDFContainer.add(RDF.type, (Node)NFO.PaginatedTextDocument);
                rDFContainer.add(NFO.pageCount, Integer.parseInt(string));
            }
            catch (NumberFormatException numberFormatException) {}
        } else if ("keywords".equals(string2)) {
            StringTokenizer stringTokenizer = new StringTokenizer(string, " \t.,;|/\\", false);
            while (stringTokenizer.hasMoreTokens()) {
                String string3 = stringTokenizer.nextToken();
                rDFContainer.add(NIE.keyword, string3);
            }
        }
    }

    private Date convertStringToDate(String string) {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        Date date = null;
        try {
            date = simpleDateFormat.parse(string);
            return date;
        }
        catch (ParseException parseException) {
            SimpleDateFormat simpleDateFormat2 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
            try {
                date = simpleDateFormat2.parse(string);
                return date;
            }
            catch (ParseException parseException2) {
                return null;
            }
        }
    }

    private void addContactStatement(URI uRI, String string, RDFContainer rDFContainer) {
        Model model = rDFContainer.getModel();
        Resource resource = UriUtil.generateRandomResource(model);
        model.addStatement(resource, RDF.type, (Node)NCO.Contact);
        model.addStatement(resource, NCO.fullname, string);
        rDFContainer.add(uRI, (Node)resource);
    }

    private String getText(Element element) {
        org.w3c.dom.Node node = element.getFirstChild();
        if (node instanceof Text) {
            return ((Text)node).getWholeText();
        }
        return null;
    }

    static {
        TEXT_ELEMENT_TYPES.add("application/vnd.ms-word.document.macroEnabled.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.ms-word.template.macroEnabledTemplate.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slide+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml", "p:cmAuthor");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.main+xml", "sheet");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml", "sheet");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml", "tableColumn");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml", "p:cSld");
    }

    private class AttributeTextCollector
    extends SimpleSAXAdapter {
        private String fullTextTag;

        public AttributeTextCollector(String string) {
            this.fullTextTag = string;
        }

        public void startTag(String string, Map map, String string2) throws SAXException {
            Object v;
            if (this.fullTextTag.equals(string) && (v = map.get("name")) instanceof String) {
                OpenXmlExtractor.this.fullText.append(v);
                OpenXmlExtractor.this.fullText.append(END_OF_LINE);
            }
        }
    }

    private class TextCollector
    extends SimpleSAXAdapter {
        private int initialLength;
        private boolean insideTabs = false;

        private TextCollector() {
        }

        public void startDocument() throws SAXException {
            this.initialLength = OpenXmlExtractor.this.fullText.length();
        }

        public void startTag(String string, Map map, String string2) throws SAXException {
            if ("w:t".equals(string)) {
                OpenXmlExtractor.this.fullText.append(string2);
            } else if ("t".equals(string) || "p:text".equals(string) || "a:t".equals(string) || "st:t".equals(string) || "v".equals(string)) {
                OpenXmlExtractor.this.fullText.append(string2);
                OpenXmlExtractor.this.fullText.append(' ');
            } else if ("w:tab".equals(string) && !this.insideTabs) {
                OpenXmlExtractor.this.fullText.append('\t');
            } else if ("w:tabs".equals(string)) {
                this.insideTabs = true;
            }
        }

        public void endTag(String string) throws SAXException {
            if ("w:p".equals(string)) {
                OpenXmlExtractor.this.fullText.append(END_OF_LINE);
            } else if ("w:tabs".equals(string)) {
                this.insideTabs = false;
            }
        }

        public void endDocument() throws SAXException {
            if (OpenXmlExtractor.this.fullText.length() > this.initialLength) {
                OpenXmlExtractor.this.fullText.append(END_OF_LINE);
            }
        }
    }

    private static class NonCloseableStream
    extends FilterInputStream {
        public NonCloseableStream(InputStream inputStream) {
            super(inputStream);
        }

        public void close() throws IOException {
        }
    }

    private static class ContentTypes {
        private HashMap<String, String> defaults = new HashMap();
        private HashMap<String, String> overrides = new HashMap();

        public void addDefault(String string, String string2) {
            this.defaults.put(string, string2);
        }

        public void addOverride(String string, String string2) {
            this.overrides.put(string, string2);
        }

        public String getDefault(String string) {
            return this.defaults.get(string);
        }

        public String getOverride(String string) {
            return this.overrides.get(string);
        }

        public String getType(String string) {
            String string2 = this.getOverride(string);
            if (string2 == null) {
                int n = string.lastIndexOf(46);
                if (n >= 0 && n < string.length() - 1) {
                    String string3 = string.substring(n + 1);
                    return this.getDefault(string3);
                }
                return null;
            }
            return string2;
        }

        public String toString() {
            return "ContentTypes[default=" + this.defaults + ",overrides=" + this.overrides + "]";
        }
    }
}

