/*
 * Copyright 1997-2011 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.dam.handler.standard.msoffice;

import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.Property;
import org.apache.poi.hpsf.CustomProperty;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.slf4j.LoggerFactory;

/**
 * Apache POI (POIFS) reader listener for MSOffice 2004 documents
 */
public class MetaDataReaderListener implements POIFSReaderListener {

    private static final org.slf4j.Logger log = LoggerFactory.getLogger(MSOfficeHandler.class);

    /**
     * table of XML name start chars (<= 0xFF)
     */
    private boolean[] xmlNameStartChars;

    /**
     * table of XML name chars (<= 0xFF)
     */
    private boolean[] xmlNameChars;

    private MSOfficeMetadata metadata;

    private boolean debug;

    public MetaDataReaderListener() {
        if (log.isDebugEnabled()) {
            debug = true;
        }
        initCharTables();       // initializing character tables for checking valid XML Name
    }

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {
        PropertySet ps;

        try {
            ps = PropertySetFactory.create(event.getStream());
        } catch (NoPropertySetStreamException e) {
            log.warn("Failed to read metadata: {}", e.getMessage());
            if (debug) {
                log.debug("Failed to read metadata", e);
            }
            return;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        if (ps.isDocumentSummaryInformation()) {
            DocumentSummaryInformation info = (DocumentSummaryInformation) ps;
            for (Property prop : info.getProperties()) {
                if (debug) {
                    log.debug("Setting document metadata property {} = {}",
                            IdToNameMap.getDocumentPropertyName(prop.getID()), prop.getValue());
                }
                if (isXMLName(
                        IdToNameMap.getDocumentPropertyName(prop.getID())))
                    metadata.setProperty(
                            IdToNameMap.getDocumentPropertyName(prop.getID()),
                            prop.getValue(), prop.getType());
            }
            // add all custom properties
            if (null != info.getCustomProperties()) {
                for (Object obj : info.getCustomProperties().values()) {
                    CustomProperty prop = (CustomProperty)obj;
                    if (debug) {
                        log.debug("Setting custom metadata property {} = {}",
                                prop.getName(), prop.getValue());
                    }
                    metadata.setProperty(prop.getName(), prop.getValue(),
                            prop.getType());
                }
            }
        } else if (ps.isSummaryInformation()) {
            try {
                SummaryInformation info = (SummaryInformation) ps;
                for (Property prop : info.getProperties()) {
                    if (debug) {
                        log.debug("Setting summary metadata property {} = {}",
                                IdToNameMap.getSummaryPropertyName(prop.getID()), prop.getValue());
                    }
                    if (isXMLName(
                            IdToNameMap.getSummaryPropertyName(prop.getID())))
                        metadata.setProperty(IdToNameMap.getSummaryPropertyName(
                                prop.getID()), prop.getValue(), prop.getType());
                }
                // get Thumbnail
                byte[] picture = info.getThumbnail();
                if (picture != null) {
                    metadata.setProperty("picture", picture);
                }
            } catch (Exception e) {
                log.warn("Unable to get SummaryInformation: " + e.getMessage());
            }
        }
    }

    public void setMetadata(MSOfficeMetadata metadata) {
        this.metadata = metadata;
    }

    /**
     * Simple check for valid XMLNames. Within ASCII range<br>
     * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
     * are accepted, above all characters (which is not entirely
     * correct according to the XML Spec.
     *
     * @param name an XML Name
     * @return Return <code>true</code> if the name is correct.
     */
    private boolean isXMLName(String name) {
        if (name.length() > 0 && !isNameStartChar(name.charAt(0))) {
            return false;
        }
        for (int i = 1; i < name.length(); i++) {
            if (!isNameChar(name.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    /**
     * Simple check if a character is a valid XML start name char.
     * All characters according to the XML Spec 1.1 are accepted:
     * http://www.w3.org/TR/xml11/#NT-NameStartChar
     *
     * @param ch a character
     * @return Returns true if the character is a valid first char of an XML name.
     */
    private boolean isNameStartChar(char ch) {
        return (ch <= 0xFF && xmlNameStartChars[ch]) ||
                (ch >= 0x100 && ch <= 0x2FF) ||
                (ch >= 0x370 && ch <= 0x37D) ||
                (ch >= 0x37F && ch <= 0x1FFF) ||
                (ch >= 0x200C && ch <= 0x200D) ||
                (ch >= 0x2070 && ch <= 0x218F) ||
                (ch >= 0x2C00 && ch <= 0x2FEF) ||
                (ch >= 0x3001 && ch <= 0xD7FF) ||
                (ch >= 0xF900 && ch <= 0xFDCF) ||
                (ch >= 0xFDF0 && ch <= 0xFFFD) ||
                (ch >= 0x10000 && ch <= 0xEFFFF);
    }

    /**
     * Simple check if a character is a valid XML name char
     * (every char except the first one), according to the XML Spec 1.1:
     * http://www.w3.org/TR/xml11/#NT-NameChar
     *
     * @param ch a character
     * @return Returns true if the character is a valid char of an XML name.
     */
    private boolean isNameChar(char ch) {
        return (ch <= 0xFF && xmlNameChars[ch]) ||
                isNameStartChar(ch) ||
                (ch >= 0x300 && ch <= 0x36F) ||
                (ch >= 0x203F && ch <= 0x2040);
    }

    /**
     * Initializes the char tables for the chars 0x00-0xFF for later use,
     * according to the XML 1.1 specification
     * http://www.w3.org/TR/xml11
     */
    private void initCharTables() {
        xmlNameChars = new boolean[0x0100];
        xmlNameStartChars = new boolean[0x0100];

        for (char ch = 0; ch < xmlNameChars.length; ch++) {
            xmlNameStartChars[ch] = ch == ':' ||
                    ('A' <= ch && ch <= 'Z') ||
                    ch == '_' ||
                    ('a' <= ch && ch <= 'z') ||
                    (0xC0 <= ch && ch <= 0xD6) ||
                    (0xD8 <= ch && ch <= 0xF6) ||
                    (0xF8 <= ch && ch <= 0xFF);

            xmlNameChars[ch] = xmlNameStartChars[ch] ||
                    ch == '-' ||
                    ch == '.' ||
                    ('0' <= ch && ch <= '9') ||
                    ch == 0xB7;
        }
    }

}
