DocImporter.java 7.54 KB
/*
 * Decompiled with CFR 0_118.
 * 
 * Could not load the following classes:
 *  org.apache.poi.hwpf.HWPFDocument
 *  org.apache.poi.hwpf.model.PicturesTable
 *  org.apache.poi.hwpf.model.StyleDescription
 *  org.apache.poi.hwpf.model.StyleSheet
 *  org.apache.poi.hwpf.usermodel.CharacterRun
 *  org.apache.poi.hwpf.usermodel.Paragraph
 *  org.apache.poi.hwpf.usermodel.Picture
 *  org.apache.poi.hwpf.usermodel.Range
 *  org.apache.poi.poifs.filesystem.OfficeXmlFileException
 */
package com.day.cq.wcm.offline;

import com.day.cq.wcm.offline.HtmlUtil;
import com.day.cq.wcm.offline.Paragraph;
import com.day.cq.wcm.offline.Picture;
import com.day.cq.wcm.offline.TextDocumentImporter;
import com.day.cq.wcm.offline.TextImportException;
import com.day.cq.wcm.offline.WordStyleSupport;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.model.StyleSheet;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;

public class DocImporter
implements TextDocumentImporter {
    private final HWPFDocument document;
    private final Range range;

    public DocImporter(InputStream stream) throws TextImportException {
        try {
            this.document = new HWPFDocument(stream);
            this.range = this.document.getRange();
        }
        catch (OfficeXmlFileException ex) {
            throw new TextImportException("this is a docx file", (Throwable)ex);
        }
        catch (IOException ex) {
            throw new TextImportException(ex.getMessage(), ex);
        }
    }

    public String getTitle() {
        for (int i = 0; i < this.range.numParagraphs(); ++i) {
            org.apache.poi.hwpf.usermodel.Paragraph p = this.range.getParagraph(i);
            String t = p.text().trim();
            if ("".equals(t)) continue;
            return t;
        }
        return null;
    }

    public int getNumberOfParagraphs() {
        return this.range.numParagraphs();
    }

    public Paragraph getParagraph(int index) {
        return new DocParagraph(this.range.getParagraph(index));
    }

    private class DocPicture
    implements Picture {
        private final org.apache.poi.hwpf.usermodel.Picture picture;

        public DocPicture(org.apache.poi.hwpf.usermodel.Picture picture) {
            this.picture = picture;
        }

        public String getMediaType() {
            return this.picture.getMimeType();
        }

        public byte[] getBytes() {
            return this.picture.getContent();
        }

        public String toString() {
            return this.getMediaType() + " (" + this.getBytes().length + " bytes)";
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private class DocParagraph
    implements Paragraph {
        private final org.apache.poi.hwpf.usermodel.Paragraph p;
        private final String text;
        private final String textHTML;
        private final List<Picture> pictures;

        public DocParagraph(org.apache.poi.hwpf.usermodel.Paragraph paragraph) {
            this.p = paragraph;
            StyleDescription sd = DocImporter.this.document.getStyleSheet().getStyleDescription((int)this.p.getStyleIndex());
            String classname = WordStyleSupport.makeClassName(sd.getName());
            String container = "p";
            String elemname = WordStyleSupport.toHtmlElement(classname);
            if (elemname != null) {
                container = elemname;
                classname = null;
            }
            StringBuilder sbtext = new StringBuilder();
            StringBuilder sbhtml = new StringBuilder();
            ArrayList<DocPicture> pics = new ArrayList<DocPicture>();
            for (int i = 0; i < this.p.numCharacterRuns(); ++i) {
                boolean onlyWhiteSpace;
                CharacterRun characters = this.p.getCharacterRun(i);
                if (DocImporter.this.document.getPicturesTable().hasPicture(characters)) {
                    pics.add(new DocPicture(DocImporter.this.document.getPicturesTable().extractPicture(characters, true)));
                    continue;
                }
                String contents = characters.text();
                boolean bl = onlyWhiteSpace = contents.trim().length() == 0;
                if (!onlyWhiteSpace) {
                    if (characters.isBold()) {
                        sbhtml.append("<b>");
                    }
                    if (characters.isItalic()) {
                        sbhtml.append("<i>");
                    }
                    if (characters.isStrikeThrough()) {
                        sbhtml.append("<del>");
                    }
                    if (characters.getUnderlineCode() != 0) {
                        sbhtml.append("<u>");
                    }
                    if (characters.getSubSuperScriptIndex() == 1) {
                        sbhtml.append("<sup>");
                    }
                    if (characters.getSubSuperScriptIndex() == 2) {
                        sbhtml.append("<sub>");
                    }
                }
                sbtext.append(contents);
                sbhtml.append(HtmlUtil.escapeHtmlText(contents));
                if (onlyWhiteSpace) continue;
                if (characters.getSubSuperScriptIndex() == 2) {
                    sbhtml.append("</sub>");
                }
                if (characters.getSubSuperScriptIndex() == 1) {
                    sbhtml.append("</sup>");
                }
                if (characters.getUnderlineCode() != 0) {
                    sbhtml.append("</u>");
                }
                if (characters.isStrikeThrough()) {
                    sbhtml.append("</del>");
                }
                if (characters.isItalic()) {
                    sbhtml.append("</i>");
                }
                if (!characters.isBold()) continue;
                sbhtml.append("</b>");
            }
            String result = sbhtml.toString().trim();
            if (result.length() > 0) {
                StringBuilder tmp = new StringBuilder();
                tmp.append("<");
                tmp.append(container);
                String style = null;
                if (this.p.getJustification() == 2) {
                    style = "text-align: right;";
                } else if (this.p.getJustification() == 1) {
                    style = "text-align: center;";
                }
                if (style != null) {
                    tmp.append(" style='" + style + "'");
                }
                if (classname != null) {
                    tmp.append(" class='" + HtmlUtil.escapeHtmlAttr(classname) + "'");
                }
                tmp.append(">");
                tmp.append(result);
                tmp.append("</");
                tmp.append(container);
                tmp.append(">");
                result = tmp.toString();
            }
            this.text = sbtext.toString().trim();
            this.textHTML = result;
            this.pictures = Collections.unmodifiableList(pics);
        }

        @Override
        public String getText() {
            return this.text;
        }

        @Override
        public String getHTML() {
            return this.textHTML;
        }

        @Override
        public List<Picture> getPictures() {
            return this.pictures;
        }
    }

}