DocxImporter.java 7.25 KB
/*
 * Decompiled with CFR 0_118.
 * 
 * Could not load the following classes:
 *  org.apache.poi.POIXMLException
 *  org.apache.poi.xwpf.usermodel.UnderlinePatterns
 *  org.apache.poi.xwpf.usermodel.VerticalAlign
 *  org.apache.poi.xwpf.usermodel.XWPFDocument
 *  org.apache.poi.xwpf.usermodel.XWPFParagraph
 *  org.apache.poi.xwpf.usermodel.XWPFPicture
 *  org.apache.poi.xwpf.usermodel.XWPFPictureData
 *  org.apache.poi.xwpf.usermodel.XWPFRun
 *  org.slf4j.Logger
 *  org.slf4j.LoggerFactory
 */
package com.day.cq.wcm.offline;

import com.day.cq.wcm.offline.HtmlUtil;
import com.day.cq.wcm.offline.Paragraph;
import com.day.cq.wcm.offline.Picture;
import com.day.cq.wcm.offline.TextDocumentImporter;
import com.day.cq.wcm.offline.TextImportException;
import com.day.cq.wcm.offline.WordStyleSupport;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.poi.POIXMLException;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DocxImporter
implements TextDocumentImporter {
    private static final Logger log = LoggerFactory.getLogger(DocxImporter.class);
    private final XWPFDocument document;
    private final List<XWPFParagraph> paragraphs;

    public DocxImporter(InputStream stream) throws TextImportException, IOException {
        try {
            this.document = new XWPFDocument(stream);
            ArrayList<XWPFParagraph> tmp = new ArrayList<XWPFParagraph>();
            for (XWPFParagraph p : this.document.getParagraphs()) {
                if (p.isEmpty() || p.isPageBreak()) continue;
                tmp.add(p);
            }
            this.paragraphs = Collections.unmodifiableList(tmp);
        }
        catch (POIXMLException ex) {
            throw new TextImportException("not a docx file", (Throwable)ex);
        }
    }

    public String getTitle() {
        for (XWPFParagraph p : this.paragraphs) {
            String t = p.getText().trim();
            if ("".equals(t)) continue;
            return t;
        }
        return null;
    }

    public int getNumberOfParagraphs() {
        return this.paragraphs.size();
    }

    public Paragraph getParagraph(int index) {
        return new DocxParagraph(this.paragraphs.get(index));
    }

    private class DocxPicture
    implements Picture {
        private final XWPFPicture picture;

        public DocxPicture(XWPFPicture picture) {
            this.picture = picture;
        }

        public String getMediaType() {
            switch (this.picture.getPictureData().getPictureType()) {
                case 8: {
                    return "image/gif";
                }
                case 5: {
                    return "image/jepg";
                }
                case 6: {
                    return "image/png";
                }
            }
            log.error("Unknown picture type " + this.picture.getPictureData().getPictureType() + " - need to define media type mapping");
            return null;
        }

        public byte[] getBytes() {
            return this.picture.getPictureData().getData();
        }

        public String toString() {
            return this.getMediaType() + " (" + this.getBytes().length + " bytes)";
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private class DocxParagraph
    implements Paragraph {
        private final XWPFParagraph p;
        private final String text;
        private final String textHTML;
        private final List<Picture> pictures;

        public DocxParagraph(XWPFParagraph p) {
            this.p = p;
            String classname = WordStyleSupport.makeClassName(p.getStyle());
            String container = "p";
            String elemname = WordStyleSupport.toHtmlElement(classname);
            if (elemname != null) {
                container = elemname;
                classname = null;
            }
            StringBuilder sbtext = new StringBuilder();
            StringBuilder sbhtml = new StringBuilder();
            ArrayList<DocxPicture> pics = new ArrayList<DocxPicture>();
            for (XWPFRun run : this.p.getRuns()) {
                String textdata;
                if (run.isBold()) {
                    sbhtml.append("<b>");
                }
                if (run.isItalic()) {
                    sbhtml.append("<i>");
                }
                if (run.isStrike()) {
                    sbhtml.append("<del>");
                }
                if (run.getUnderline() != UnderlinePatterns.NONE) {
                    sbhtml.append("<u>");
                }
                if (run.getSubscript() == VerticalAlign.SUBSCRIPT) {
                    sbhtml.append("<sub>");
                }
                if (run.getSubscript() == VerticalAlign.SUPERSCRIPT) {
                    sbhtml.append("<sup>");
                }
                if ((textdata = run.getText(0)) != null) {
                    sbtext.append(textdata);
                    sbhtml.append(HtmlUtil.escapeHtmlText(textdata));
                }
                if (run.getSubscript() == VerticalAlign.SUPERSCRIPT) {
                    sbhtml.append("</sup>");
                }
                if (run.getSubscript() == VerticalAlign.SUBSCRIPT) {
                    sbhtml.append("</sub>");
                }
                if (run.getUnderline() != UnderlinePatterns.NONE) {
                    sbhtml.append("</u>");
                }
                if (run.isStrike()) {
                    sbhtml.append("</del>");
                }
                if (run.isItalic()) {
                    sbhtml.append("</i>");
                }
                if (run.isBold()) {
                    sbhtml.append("</b>");
                }
                for (XWPFPicture pic : run.getEmbeddedPictures()) {
                    pics.add(new DocxPicture(pic));
                }
            }
            String result = sbhtml.toString().trim();
            if (result.length() > 0) {
                StringBuilder tmp = new StringBuilder();
                tmp.append("<");
                tmp.append(container);
                if (classname != null) {
                    tmp.append(" class='" + HtmlUtil.escapeHtmlAttr(classname) + "'");
                }
                tmp.append(">");
                tmp.append(result);
                tmp.append("</");
                tmp.append(container);
                tmp.append(">");
                result = tmp.toString();
            }
            this.text = sbtext.toString().trim();
            this.textHTML = result;
            this.pictures = Collections.unmodifiableList(pics);
        }

        @Override
        public String getText() {
            return this.text;
        }

        @Override
        public String getHTML() {
            return this.textHTML;
        }

        @Override
        public List<Picture> getPictures() {
            return this.pictures;
        }
    }

}