ExtractPlainProcess.java 6.93 KB
/*
 * Decompiled with CFR 0_118.
 * 
 * Could not load the following classes:
 *  com.adobe.granite.workflow.WorkflowException
 *  com.adobe.granite.workflow.WorkflowSession
 *  com.adobe.granite.workflow.exec.WorkItem
 *  com.adobe.granite.workflow.exec.WorkflowData
 *  com.adobe.granite.workflow.exec.WorkflowProcess
 *  com.adobe.granite.workflow.metadata.MetaDataMap
 *  com.day.cq.dam.api.Asset
 *  com.day.cq.dam.api.Rendition
 *  com.day.cq.dam.commons.util.DamUtil
 *  javax.jcr.Session
 *  org.apache.felix.scr.annotations.Component
 *  org.apache.felix.scr.annotations.Properties
 *  org.apache.felix.scr.annotations.Property
 *  org.apache.felix.scr.annotations.Reference
 *  org.apache.felix.scr.annotations.Service
 *  org.apache.poi.hwpf.HWPFDocument
 *  org.apache.poi.hwpf.usermodel.Paragraph
 *  org.apache.poi.hwpf.usermodel.Range
 *  org.apache.poi.xwpf.usermodel.XWPFDocument
 *  org.apache.poi.xwpf.usermodel.XWPFParagraph
 *  org.apache.sling.api.resource.LoginException
 *  org.apache.sling.api.resource.ModifiableValueMap
 *  org.apache.sling.api.resource.Resource
 *  org.apache.sling.api.resource.ResourceResolver
 *  org.apache.sling.api.resource.ResourceResolverFactory
 *  org.slf4j.Logger
 *  org.slf4j.LoggerFactory
 */
package com.day.cq.dam.word.process;

import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.WorkflowData;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.commons.util.DamUtil;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.jcr.Session;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ModifiableValueMap;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Component
@Service(value={WorkflowProcess.class})
@Properties(value={@Property(name="service.description", value={"Extracts plain text from a Word document and adds it as a rendition."}), @Property(name="service.vendor", value={"Adobe"}), @Property(name="process.label", value={"Extract Plain Text From Word"})})
public class ExtractPlainProcess
implements WorkflowProcess {
    private static final Logger log = LoggerFactory.getLogger(ExtractPlainProcess.class);
    private static final String JCR_PATH = "JCR_PATH";
    @Reference
    ResourceResolverFactory resourceResolverFactory;

    public void execute(WorkItem item, WorkflowSession session, MetaDataMap args) throws WorkflowException {
        try {
            String doc;
            Session jcrSession = (Session)session.adaptTo(Session.class);
            HashMap<String, Session> params = new HashMap<String, Session>();
            params.put("user.jcr.session", jcrSession);
            ResourceResolver resourceResolver = this.resourceResolverFactory.getResourceResolver(params);
            Asset asset = this.getPayloadAsset(item, resourceResolver);
            String assetMime = asset.getMimeType();
            if (assetMime.matches("application.*msword")) {
                doc = this.extractFromDoc(asset);
            } else if (assetMime.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
                doc = this.extractFromDocx(asset);
            } else {
                String msg = "No appropriate extractor found for: " + asset.getName();
                throw new IOException(msg);
            }
            Rendition rendition = asset.addRendition("plain", (InputStream)new ByteArrayInputStream(doc.getBytes("UTF-8")), "text/plain");
            Resource resource = rendition.getChild("jcr:content");
            ModifiableValueMap contentProps = (ModifiableValueMap)resource.adaptTo(ModifiableValueMap.class);
            contentProps.put((Object)"jcr:encoding", (Object)"UTF-8");
            ResourceResolver resResolver = resource.getResourceResolver();
            resResolver.commit();
        }
        catch (IOException ex) {
            log.error("Could not generate plain text rendition: " + ex);
        }
        catch (LoginException ex) {
            log.error("Could not generate plain text rendition: " + (Object)ex);
        }
    }

    private String extractFromDoc(Asset asset) throws IOException {
        Paragraph[] paragraphs;
        String result = "";
        HWPFDocument doc = new HWPFDocument(asset.getOriginal().getStream());
        for (Paragraph paragraph : paragraphs = this.getParagraphs(doc.getRange())) {
            result = result + paragraph.text() + "\n\n";
        }
        return result;
    }

    private String extractFromDocx(Asset asset) throws IOException {
        String result = "";
        XWPFDocument doc = new XWPFDocument(asset.getOriginal().getStream());
        Iterator paraIter = doc.getParagraphsIterator();
        while (paraIter.hasNext()) {
            result = result + ((XWPFParagraph)paraIter.next()).getText() + "\n\n";
        }
        return result;
    }

    private Asset getPayloadAsset(WorkItem item, ResourceResolver resourceResolver) {
        Asset asset = null;
        if (item.getWorkflowData().getPayloadType().equals("JCR_PATH")) {
            Resource resource;
            String path = item.getWorkflowData().getPayload().toString();
            if (resourceResolver != null && null != (resource = resourceResolver.getResource(path))) {
                asset = DamUtil.resolveToAsset((Resource)resource);
            }
        }
        return asset;
    }

    private Paragraph[] getParagraphs(Range range) {
        Paragraph[] paragraphs = new Paragraph[range.numParagraphs()];
        for (int i = 0; i < paragraphs.length; ++i) {
            paragraphs[i] = range.getParagraph(i);
        }
        return paragraphs;
    }

    protected void bindResourceResolverFactory(ResourceResolverFactory resourceResolverFactory) {
        this.resourceResolverFactory = resourceResolverFactory;
    }

    protected void unbindResourceResolverFactory(ResourceResolverFactory resourceResolverFactory) {
        if (this.resourceResolverFactory == resourceResolverFactory) {
            this.resourceResolverFactory = null;
        }
    }
}