ExtractImagesProcess.java 7.2 KB
/*
 * Decompiled with CFR 0_118.
 * 
 * Could not load the following classes:
 *  com.day.cq.dam.api.Asset
 *  com.day.cq.dam.api.Rendition
 *  com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess
 *  com.day.cq.workflow.WorkflowException
 *  com.day.cq.workflow.WorkflowSession
 *  com.day.cq.workflow.exec.WorkItem
 *  com.day.cq.workflow.metadata.MetaDataMap
 *  javax.jcr.RepositoryException
 *  javax.jcr.Session
 *  org.apache.felix.scr.annotations.Component
 *  org.apache.felix.scr.annotations.Properties
 *  org.apache.felix.scr.annotations.Property
 *  org.apache.felix.scr.annotations.Service
 *  org.apache.poi.hwpf.HWPFDocument
 *  org.apache.poi.hwpf.model.PicturesTable
 *  org.apache.poi.hwpf.usermodel.Picture
 *  org.apache.poi.xwpf.usermodel.XWPFDocument
 *  org.apache.poi.xwpf.usermodel.XWPFPictureData
 *  org.slf4j.Logger
 *  org.slf4j.LoggerFactory
 */
package com.day.cq.dam.word.process;

import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.exec.WorkItem;
import com.day.cq.workflow.metadata.MetaDataMap;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Component
@Service
@Properties(value={@Property(name="service.description", value={"Extracts images from a Word document and adds them to the DAM as sub-assets."}), @Property(name="service.vendor", value={"Adobe"}), @Property(name="process.label", value={"Extract Images From Word"})})
public class ExtractImagesProcess
extends AbstractAssetWorkflowProcess {
    private static final Logger log = LoggerFactory.getLogger(ExtractImagesProcess.class);
    private static final String BMP_MIME_TYPE = "image/bmp";
    private static final String DIB_MIME_TYPE = "image/dib";
    private static final String EMF_MIME_TYPE = "image/x-emf";
    private static final String EPS_MIME_TYPE = "image/eps";
    private static final String GIF_MIME_TYPE = "image/gif";
    private static final String JPG_MIME_TYPE = "image/jpeg";
    private static final String PICT_MIME_TYPE = "image/pict";
    private static final String PNG_MIME_TYPE = "image/png";
    private static final String WMF_MIME_TYPE = "image/wmf";
    private static final String WPG_MIME_TYPE = "image/wpg";
    private static Map<Integer, String> mimeTypeMap = null;

    private void extractFromDoc(Asset asset, WorkflowSession session) throws WorkflowException {
        log.info("Extracting images from: " + asset.getPath());
        boolean oldBatchMode = false;
        try {
            HWPFDocument doc = new HWPFDocument(asset.getOriginal().getStream());
            oldBatchMode = asset.isBatchMode();
            asset.setBatchMode(true);
            List pics = doc.getPicturesTable().getAllPictures();
            log.debug("Found " + pics.size() + " images to extract.");
            for (Picture pic : pics) {
                String filename = pic.suggestFullFileName();
                String mimeType = pic.getMimeType();
                BufferedInputStream stream = new BufferedInputStream(new ByteArrayInputStream(pic.getRawContent()));
                asset.addSubAsset(filename, mimeType, (InputStream)stream);
            }
            session.getSession().save();
            log.info("Done extracting images from: " + asset.getPath());
        }
        catch (Throwable t) {
            try {
                session.getSession().refresh(false);
            }
            catch (RepositoryException e) {
                // empty catch block
            }
            throw new WorkflowException(t.getMessage(), t);
        }
        finally {
            if (asset != null) {
                asset.setBatchMode(oldBatchMode);
            }
        }
    }

    private void extractFromDocx(Asset asset, WorkflowSession session) throws WorkflowException {
        log.info("Extracting images from: " + asset.getPath());
        boolean oldBatchMode = false;
        try {
            XWPFDocument doc = new XWPFDocument(asset.getOriginal().getStream());
            oldBatchMode = asset.isBatchMode();
            asset.setBatchMode(true);
            List pics = doc.getAllPictures();
            log.debug("Found " + pics.size() + " images to extract.");
            for (XWPFPictureData pic : pics) {
                String filename = pic.getFileName();
                String mimeType = this.getMimeType(pic.getPictureType());
                BufferedInputStream stream = new BufferedInputStream(new ByteArrayInputStream(pic.getData()));
                asset.addSubAsset(filename, mimeType, (InputStream)stream);
            }
            session.getSession().save();
            log.info("Done extracting images from: " + asset.getPath());
        }
        catch (Throwable t) {
            try {
                session.getSession().refresh(false);
            }
            catch (RepositoryException e) {
                // empty catch block
            }
            throw new WorkflowException(t.getMessage(), t);
        }
        finally {
            if (asset != null) {
                asset.setBatchMode(oldBatchMode);
            }
        }
    }

    private String getMimeType(int picType) {
        String mimeType;
        if (mimeTypeMap == null) {
            mimeTypeMap = new HashMap<Integer, String>();
            mimeTypeMap.put(new Integer(11), "image/bmp");
            mimeTypeMap.put(new Integer(7), "image/dib");
            mimeTypeMap.put(new Integer(2), "image/x-emf");
            mimeTypeMap.put(new Integer(10), "image/eps");
            mimeTypeMap.put(new Integer(8), "image/gif");
            mimeTypeMap.put(new Integer(5), "image/jpeg");
            mimeTypeMap.put(new Integer(4), "image/pict");
            mimeTypeMap.put(new Integer(6), "image/png");
            mimeTypeMap.put(new Integer(3), "image/wmf");
            mimeTypeMap.put(new Integer(12), "image/wpg");
        }
        if ((mimeType = mimeTypeMap.get(new Integer(picType))) == null) {
            mimeType = "application/octet-stream";
        }
        return mimeType;
    }

    public void execute(WorkItem item, WorkflowSession session, MetaDataMap args) throws WorkflowException {
        Asset asset = this.getAssetFromPayload(item, session.getSession());
        String assetMime = asset.getMimeType();
        if (assetMime.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
            this.extractFromDocx(asset, session);
        } else if (assetMime.matches("application.*msword")) {
            this.extractFromDoc(asset, session);
        }
    }
}