/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * ___________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/
package com.day.cq.dam.word.process;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.jcr.RepositoryException;

import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xwpf.usermodel.Document;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.osgi.framework.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.metadata.MetaDataMap;
import com.day.cq.workflow.exec.WorkItem;

/**
 * A {@link WorkflowProcess} for extracting images from a Word document.
 * <p>
 * A workflow process that extracts all images from a Word document (.docx and .doc format) and adds them as sub-assets.
 *
 * @see WorkflowProcess
 */

@Component
@Service
@Properties({
	@Property(name = Constants.SERVICE_DESCRIPTION, value = "Extracts images from a Word document and adds them to the DAM as sub-assets."),
	@Property(name = Constants.SERVICE_VENDOR, value = "Adobe"),
	@Property(name="process.label", value = "Extract Images From Word")})
public class ExtractImagesProcess extends AbstractAssetWorkflowProcess {
	
	private static final Logger log = LoggerFactory.getLogger(ExtractImagesProcess.class);
	
	private static final String BMP_MIME_TYPE = "image/bmp";
	private static final String DIB_MIME_TYPE = "image/dib";
	private static final String EMF_MIME_TYPE = "image/x-emf";
	private static final String EPS_MIME_TYPE = "image/eps";
	private static final String GIF_MIME_TYPE = "image/gif";
	private static final String JPG_MIME_TYPE = "image/jpeg";
	private static final String PICT_MIME_TYPE = "image/pict";
	private static final String PNG_MIME_TYPE = "image/png";
	private static final String WMF_MIME_TYPE = "image/wmf";
	private static final String WPG_MIME_TYPE = "image/wpg";
	
	private static Map<Integer, String> mimeTypeMap = null;
	
	private void extractFromDoc(Asset asset, WorkflowSession session) throws WorkflowException {
		log.info("Extracting images from: " + asset.getPath());
		boolean oldBatchMode = false;
		
		try {
			HWPFDocument doc = new HWPFDocument(asset.getOriginal().getStream());
			
			oldBatchMode = asset.isBatchMode();
			asset.setBatchMode(true);
			
			List<Picture> pics = doc.getPicturesTable().getAllPictures(); 
			
			log.debug("Found " + pics.size() + " images to extract.");
			
			Iterator<Picture> picIter = pics.iterator();
			while (picIter.hasNext()) {
				Picture pic = picIter.next();
				String filename = pic.suggestFullFileName();
				String mimeType = pic.getMimeType();
				InputStream stream = new BufferedInputStream(new ByteArrayInputStream(pic.getRawContent()));
				asset.addSubAsset(filename, mimeType, stream);
			}
			session.getSession().save();
			
			log.info("Done extracting images from: " + asset.getPath());
		} catch (Throwable t) {
			try {
				session.getSession().refresh(false);
			} catch (RepositoryException e) {
			}
			throw new WorkflowException(t.getMessage(), t);
		} finally {
			if (asset != null) {
				asset.setBatchMode(oldBatchMode);
			}
		}
	}
	
	private void extractFromDocx(Asset asset, WorkflowSession session) throws WorkflowException {
		log.info("Extracting images from: " + asset.getPath());
		
		boolean oldBatchMode = false;
		
		try {
			XWPFDocument doc = new XWPFDocument(asset.getOriginal().getStream());
			
			oldBatchMode = asset.isBatchMode();
			asset.setBatchMode(true);
			
			List<XWPFPictureData> pics = doc.getAllPictures();
			
			log.debug("Found " + pics.size() + " images to extract.");
			
			Iterator<XWPFPictureData> picIter = pics.iterator();
			while (picIter.hasNext()) {
				XWPFPictureData pic = picIter.next();
				String filename = pic.getFileName();
				String mimeType = getMimeType(pic.getPictureType());
				InputStream stream = new BufferedInputStream(new ByteArrayInputStream(pic.getData()));
				asset.addSubAsset(filename, mimeType, stream);
			}
			session.getSession().save();
			
			log.info("Done extracting images from: " + asset.getPath());
		} catch (Throwable t) {
			try {
				session.getSession().refresh(false);
			} catch (RepositoryException e) {
			}
			throw new WorkflowException(t.getMessage(), t);
		} finally {
			if (asset != null) {
				asset.setBatchMode(oldBatchMode);
			}
		}
	}
	
	private String getMimeType(int picType) {
		if (mimeTypeMap == null) {
			mimeTypeMap = new HashMap<Integer, String>();
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_BMP), BMP_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_DIB), DIB_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_EMF), EMF_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_EPS), EPS_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_GIF), GIF_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_JPEG), JPG_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_PICT), PICT_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_PNG), PNG_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_WMF), WMF_MIME_TYPE);
			mimeTypeMap.put(new Integer(Document.PICTURE_TYPE_WPG), WPG_MIME_TYPE);
		}
		
		String mimeType = mimeTypeMap.get(new Integer(picType));
		if (mimeType == null) {
			//this should not happen, but just in case use a generic mime type
			mimeType = "application/octet-stream";
		}
		
		return mimeType;
	}
	
    public void execute(WorkItem item,WorkflowSession session,MetaDataMap args)
			throws WorkflowException {
		
		Asset asset = getAssetFromPayload(item, session.getSession());
		final String assetMime = asset.getMimeType();
		if (assetMime.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
			 extractFromDocx(asset, session);
		} else if (assetMime.matches("application.*msword")) {
			extractFromDoc(asset, session);
		}
	}
}
