Android word/Excel文档转PDF

首先grable文件导入jar包,

    implementation 'org.apache.poi:poi:3.17'
    // Excel XLS
    implementation 'org.apache.poi:poi-ooxml:3.17'
    // Excel XLSX DOCX
    implementation 'org.apache.poi:poi-scratchpad:3.17'
    // Word DOC
    implementation 'org.apache.poi:poi-ooxml-schemas:3.17'
    implementation files('libs/xdocreport-2.0.1.jar')
    implementation files('libs/rt.jar')
    //项目要使用iText
    implementation 'com.itextpdf:itextpdf:5.5.13.1'
    implementation 'com.itextpdf.tool:xmlworker:5.5.11'
    implementation 'org.apache.xmlbeans:xmlbeans:3.1.0'
    implementation 'org.apache.commons:commons-collections4:4.4'
    //输出中文,还要引入下面itext-asian.jar包
    implementation 'com.itextpdf:itext-asian:5.2.0'
    implementation 'org.jsoup:jsoup:1.11.3'

运行项目中可能会有重复包或文件,在grable的android中添加:

packagingOptions {
        exclude 'com/itextpdf/text/pdf/fonts/cmap_info.txt'
        exclude 'MATE-INF/LICENSE'
    }

解决重复引入包的问题。
核心代码:

import android.util.Log;

import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.VerticalAlignment;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.w3c.dom.Document;

import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;


public class ExcelToHtml {

    public static String readExcelToHtml(String xlsPath) {
        try {
            Workbook wb = readExcel(xlsPath);
            return excel07ToHtml(wb);
//            if (wb instanceof XSSFWorkbook) {
//                XSSFWorkbook XWb = (XSSFWorkbook) wb;
//                return excel07ToHtml(XWb);
//            } else if (wb instanceof HSSFWorkbook) {
//                HSSFWorkbook HWb = (HSSFWorkbook) wb;
//                return excel03ToHtml(HWb);
//            } else {
//                return new String("Temporary does not support.".getBytes(), StandardCharsets.UTF_8);
//            }
        } catch (Exception e) {
            e.printStackTrace();
            Log.e("ExcelToHtml", "e:" + e.getMessage());
        }
        return null;
    }

    private static Workbook readExcel(String fileName) {
        Workbook wb = null;
        if (fileName == null) {
            return null;
        }
        String extString = fileName.substring(fileName.lastIndexOf("."));
        InputStream is = null;
        try {
            is = new FileInputStream(fileName);
            if (".xls".equals(extString)) {
                return wb = new HSSFWorkbook(is);
            } else if (".xlsx".equals(extString)) {
                return wb = new XSSFWorkbook(is);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return wb;
    }

    /**
     * excel03转html
     * filename:要读取的文件所在文件夹
     * filepath:文件名
     * htmlname:生成html名称
     * path:html存放路径
     */
    public static String excel03ToHtml(HSSFWorkbook excelBook) throws ParserConfigurationException, TransformerException, IOException {
        ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        excelToHtmlConverter.processWorkbook(excelBook);//excel转html
        Document htmlDocument = excelToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();//字节数组输出流
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        /** 将document中的内容写入文件中,创建html页面 */
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        return outStream.toString("UTF-8");
    }

    private static Map<String, Object> map[];


    /**
     * excel07转html
     * filename:要读取的文件所在文件夹
     * filepath:文件名
     * htmlname:生成html名称
     * path:html存放路径
     */
    public static String excel07ToHtml(Workbook workbook) {
        ByteArrayOutputStream baos = null;
        StringBuilder html = new StringBuilder();
        try {
            for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
                Sheet sheet = workbook.getSheetAt(numSheet);
                if (sheet == null) {
                    continue;
                }
                html.append("=======================").append(sheet.getSheetName()).append("=========================<br><br>");

                int firstRowIndex = sheet.getFirstRowNum();
                int lastRowIndex = sheet.getLastRowNum();
//                html.append("<table style='border-collapse:collapse;width:100%;' align='left'>");
                html.append("<table style='" +
                        "        font-size:11px;" +
                        "        color:#333333;" +
                        "        border-width: 0.1px;" +
                        "        border-color: #666666;" +
                        "        border-collapse: collapse;width:100%;' align='left'>");

                map = getRowSpanColSpanMap(sheet);
                //行
                for (int rowIndex = firstRowIndex; rowIndex <= lastRowIndex; rowIndex++) {
                    Row currentRow = sheet.getRow(rowIndex);
                    if (null == currentRow) {
                        html.append("<tr><td >  </td></tr>");
                        continue;
                    } else if (currentRow.getZeroHeight()) {
                        continue;
                    }
                    html.append("<tr>");
                    int firstColumnIndex = currentRow.getFirstCellNum();
                    int lastColumnIndex = currentRow.getLastCellNum();
                    //列
                    for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
                        Cell currentCell = currentRow.getCell(columnIndex);
                        if (currentCell == null) {
                            continue;
                        }
                        String currentCellValue = getCellValue(currentCell);
                        if (map[0].containsKey(rowIndex + "," + columnIndex)) {
                            String pointString = (String) map[0].get(rowIndex + "," + columnIndex);
                            int bottomeRow = Integer.valueOf(pointString.split(",")[0]);
                            int bottomeCol = Integer.valueOf(pointString.split(",")[1]);
                            int rowSpan = bottomeRow - rowIndex + 1;
                            int colSpan = bottomeCol - columnIndex + 1;
                            if (map[2].containsKey(rowIndex + "," + columnIndex)) {
                                rowSpan = rowSpan - (Integer) map[2].get(rowIndex + "," + columnIndex);
                            }
                            html.append("<td style='border-width: 0.1px;" +
                                    "        border-style: solid;" +
                                    "        border-color: #666666;" +
                                    "        background-color: #ffffff;'")
                                    .append("rowspan= '")
                                    .append(rowSpan)
                                    .append("' colspan= '")
                                    .append(colSpan)
                                    .append("' ");
                            if (map.length > 3 && map[3].containsKey(rowIndex + "," + columnIndex)) {
                                //此类数据首行被隐藏,value为空,需使用其他方式获取值
                                currentCellValue = getMergedRegionValue(sheet, rowIndex, columnIndex);
                            }
                        } else if (map[1].containsKey(rowIndex + "," + columnIndex)) {
                            map[1].remove(rowIndex + "," + columnIndex);
                            continue;
                        } else {
                            html.append("<td style='border-width: 0.1px;" +
                                    "        border-style: solid;" +
                                    "        border-color: #666666;" +
                                    "        background-color: #ffffff;' ");
                        }
                        CellStyle cellStyle = currentCell.getCellStyle();
                        if (cellStyle != null) {
                            html.append("align='").append(getHAlignByExcel(cellStyle.getAlignmentEnum())).append("' ");//单元格内容的水平对齐方式
                            html.append("valign='").append(getVAlignByExcel(cellStyle.getVerticalAlignmentEnum())).append("' ");//单元格中内容的垂直排列方式
                        }
                        html.append(">");
                        if (currentCellValue != null && !"".equals(currentCellValue)) {
                            html.append(currentCellValue.replace(String.valueOf((char) 160), " "));
                        }
                        html.append("</td>");
                    }
                    html.append("</tr>");
                }
                html.append("</table>");

                baos = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource();
                StreamResult streamResult = new StreamResult(baos);
                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                baos.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (baos != null) {
                    baos.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return new String(html.toString().getBytes(), StandardCharsets.UTF_8);
    }

    /**
     * 分析excel表格,记录合并单元格相关的参数,用于之后html页面元素的合并操作
     *
     * @param sheet
     * @return
     */
    private static Map<String, Object>[] getRowSpanColSpanMap(Sheet sheet) {
        Map<String, String> map0 = new HashMap<String, String>();    //保存合并单元格的对应起始和截止单元格
        Map<String, String> map1 = new HashMap<String, String>();    //保存被合并的那些单元格
        Map<String, Integer> map2 = new HashMap<String, Integer>();    //记录被隐藏的单元格个数
        Map<String, String> map3 = new HashMap<String, String>();    //记录合并了单元格,但是合并的首行被隐藏的情况
        int mergedNum = sheet.getNumMergedRegions();
        CellRangeAddress range = null;
        Row row = null;
        for (int i = 0; i < mergedNum; i++) {
            range = sheet.getMergedRegion(i);
            int topRow = range.getFirstRow();
            int topCol = range.getFirstColumn();
            int bottomRow = range.getLastRow();
            int bottomCol = range.getLastColumn();
            /**
             * 此类数据为合并了单元格的数据
             * 1.处理隐藏(只处理行隐藏,列隐藏poi已经处理)
             */
            if (topRow != bottomRow) {
                int zeroRoleNum = 0;
                int tempRow = topRow;
                for (int j = topRow; j <= bottomRow; j++) {
                    row = sheet.getRow(j);
                    if (row.getZeroHeight() || row.getHeight() == 0) {
                        if (j == tempRow) {
                            //首行就进行隐藏,将rowTop向后移
                            tempRow++;
                            continue;//由于top下移,后面计算rowSpan时会扣除移走的列,所以不必增加zeroRoleNum;
                        }
                        zeroRoleNum++;
                    }
                }
                if (tempRow != topRow) {
                    map3.put(tempRow + "," + topCol, topRow + "," + topCol);
                    topRow = tempRow;
                }
                if (zeroRoleNum != 0) map2.put(topRow + "," + topCol, zeroRoleNum);
            }
            map0.put(topRow + "," + topCol, bottomRow + "," + bottomCol);
            int tempRow = topRow;
            while (tempRow <= bottomRow) {
                int tempCol = topCol;
                while (tempCol <= bottomCol) {
                    map1.put(tempRow + "," + tempCol, topRow + "," + topCol);
                    tempCol++;
                }
                tempRow++;
            }
            map1.remove(topRow + "," + topCol);
        }
        Map[] map = {map0, map1, map2, map3};
        System.err.println(map0);
        return map;
    }

    /**
     * 获取合并单元格的值
     *
     * @param sheet
     * @param row
     * @param column
     * @return
     */
    public static String getMergedRegionValue(Sheet sheet, int row, int column) {
        int sheetMergeCount = sheet.getNumMergedRegions();
        for (int i = 0; i < sheetMergeCount; i++) {
            CellRangeAddress ca = sheet.getMergedRegion(i);
            int firstColumn = ca.getFirstColumn();
            int lastColumn = ca.getLastColumn();
            int firstRow = ca.getFirstRow();
            int lastRow = ca.getLastRow();

            if (row >= firstRow && row <= lastRow) {

                if (column >= firstColumn && column <= lastColumn) {
                    Row fRow = sheet.getRow(firstRow);
                    Cell fCell = fRow.getCell(firstColumn);

                    return getCellValue(fCell);
                }
            }
        }
        return null;
    }

    /**
     * 读取单元格
     */
    private static String getCellValue(Cell cell) {
        if (cell == null) {
            return "";
        }
        cell.setCellType(CellType.STRING);
        return cell.getStringCellValue();
    }


    private static String getVAlignByExcel(VerticalAlignment align) {
        String result = "middle";
        if (align == VerticalAlignment.BOTTOM) {
            result = "bottom";
        }
        if (align == VerticalAlignment.CENTER) {
            result = "center";
        }
        if (align == VerticalAlignment.JUSTIFY) {
            result = "justify";
        }
        if (align == VerticalAlignment.TOP) {
            result = "top";
        }
        return result;
    }

    protected static String getHAlignByExcel(HorizontalAlignment align) {
        String result = "left";
        if (align == HorizontalAlignment.LEFT) {
            result = "left";
        }
        if (align == HorizontalAlignment.RIGHT) {
            result = "right";
        }
        if (align == HorizontalAlignment.JUSTIFY) {
            result = "justify";
        }
        if (align == HorizontalAlignment.CENTER) {
            result = "center";
        }
        return result;
    }

    private static String fillWithZero(String str) {
        if (str != null && str.length() < 2) {
            return "0" + str;
        }
        return str;
    }

}
import android.util.Log;
import android.util.Xml;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.xmlpull.v1.XmlPullParser;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

/**
 * Created by WYJ on 2020/4/17 22:07.
 * WordToHtml java类作用描述
 */
public class WordToHtml {
    private final static String TAG = "WordToHtml";
    public String htmlPath;
    private String docPath;
    private String picturePath;
    private List<Picture> pictures;
    private TableIterator tableIterator;
    private int presentPicture = 0;
    private FileOutputStream output;

    private String htmlBegin = "<html><meta charset=\"utf-8\"><body>";
    private String htmlEnd = "</body></html>";
    private String tableBegin = "<table  align=\"center\" style=\"border-collapse:collapse\" border=1 bordercolor=\"black\">";
    private String tableEnd = "</table>";
    private String rowBegin = "<tr>", rowEnd = "</tr>";
    private String columnBegin = "<td>", columnEnd = "</td>";
    private String lineBegin = "<p>", lineEnd = "</p>";
    private String centerBegin = "<center>",
            centerEnd = "</center>";
    private String boldBegin = "<b>", boldEnd = "</b>";
    private String underlineBegin = "<u>", underlineEnd = "</u>";
    private String italicBegin = "<i>", italicEnd = "</i>";
    private String fontSizeTag = "<font size=\"%d\">";
    private String fontColorTag = "<font color=\"%s\">";
    private String fontEnd = "</font>";
    private String spanColor = "<span style=\"color:%s;\">", spanEnd = "</span>";
    private String divRight = "<div align=\"right\">", divEnd = "</div>";
    private String imgBegin = "<img src=\"file://%s\" />";

    public WordToHtml(String doc_name,File pdfpath) {
        docPath = doc_name;
        htmlPath = WordFileUtil.createFile(pdfpath.getAbsolutePath(), WordFileUtil.getFileName(docPath) + ".html");
        Log.d(TAG, "htmlPath=" + htmlPath);
    }

    public String getContent() {
        try {
            output = new FileOutputStream(new File(htmlPath));
            presentPicture = 0;
            output.write(htmlBegin.getBytes());
            if (docPath.endsWith(".doc")) {
                readDOC();
            } else if (docPath.endsWith(".docx")) {
                readDOCX();
            }
            output.write(htmlEnd.getBytes());
            output.close();
            return getString(htmlPath);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    // 获取文件内容
    public static String getString(String htmlPath) {
        InputStreamReader inputStreamReader;
        //创建字符缓冲流
        StringBuffer sb = new StringBuffer("");
        String line;
        try {
            inputStreamReader = new InputStreamReader(new FileInputStream(htmlPath));
            BufferedReader reader = new BufferedReader(inputStreamReader);
            //读取每行学生
            while ((line = reader.readLine()) != null) {
                //添加到字符缓冲流中
                sb.append(line);
                //一条一行
                sb.append("\n");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        //返回学生名单字符串
        return sb.toString();
    }

    //读取word中的内容并写到sd卡上的html文件中
    private void readDOC() {
        try {
            FileInputStream in = new FileInputStream(docPath);
            POIFSFileSystem pfs = new POIFSFileSystem(in);
            HWPFDocument hwpf = new HWPFDocument(pfs);
            Range range = hwpf.getRange();
            pictures = hwpf.getPicturesTable().getAllPictures();
            tableIterator = new TableIterator(range);
            int numParagraphs = range.numParagraphs();// 得到页面所有的段落数
            for (int i = 0; i < numParagraphs; i++) { // 遍历段落数
                Paragraph p = range.getParagraph(i); // 得到文档中的每一个段落
                if (p.isInTable()) {
                    int temp = i;
                    if (tableIterator.hasNext()) {
                        Table table = tableIterator.next();
                        output.write(tableBegin.getBytes());
                        int rows = table.numRows();
                        for (int r = 0; r < rows; r++) {
                            output.write(rowBegin.getBytes());
                            TableRow row = table.getRow(r);
                            int cols = row.numCells();
                            int rowNumParagraphs = row.numParagraphs();
                            int colsNumParagraphs = 0;
                            for (int c = 0; c < cols; c++) {
                                output.write(columnBegin.getBytes());
                                TableCell cell = row.getCell(c);
                                int max = temp + cell.numParagraphs();
                                colsNumParagraphs = colsNumParagraphs + cell.numParagraphs();
                                for (int cp = temp; cp < max; cp++) {
                                    Paragraph p1 = range.getParagraph(cp);
                                    output.write(lineBegin.getBytes());
                                    writeParagraphContent(p1);
                                    output.write(lineEnd.getBytes());
                                    temp++;
                                }
                                output.write(columnEnd.getBytes());
                            }
                            int max1 = temp + rowNumParagraphs;
                            for (int m = temp + colsNumParagraphs; m < max1; m++) {
                                temp++;
                            }
                            output.write(rowEnd.getBytes());
                        }
                        output.write(tableEnd.getBytes());
                    }
                    i = temp;
                } else {
                    output.write(lineBegin.getBytes());
                    writeParagraphContent(p);
                    output.write(lineEnd.getBytes());
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void readDOCX() {
        try {
            ZipFile docxFile = new ZipFile(new File(docPath));
            ZipEntry sharedStringXML = docxFile.getEntry("word/document.xml");
            InputStream inputStream = docxFile.getInputStream(sharedStringXML);
            XmlPullParser xmlParser = Xml.newPullParser();
            xmlParser.setInput(inputStream, "utf-8");
            boolean isTable = false; // 表格
            boolean isSize = false; // 文字大小
            boolean isColor = false; // 文字颜色
            boolean isCenter = false; // 居中对齐
            boolean isRight = false; // 靠右对齐
            boolean isItalic = false; // 斜体
            boolean isUnderline = false; // 下划线
            boolean isBold = false; // 加粗
            boolean isRegion = false; // 在那个区域中
            int pic_ndex = 1; // docx中的图片名从image1开始,所以索引从1开始
            int event_type = xmlParser.getEventType();
            while (event_type != XmlPullParser.END_DOCUMENT) {
                switch (event_type) {
                    case XmlPullParser.START_TAG: // 开始标签
                        String tagBegin = xmlParser.getName();
                        if (tagBegin.equalsIgnoreCase("r")) {
                            isRegion = true;
                        }
                        if (tagBegin.equalsIgnoreCase("jc")) { // 判断对齐方式
                            String align = xmlParser.getAttributeValue(0);
                            if (align.equals("center")) {
                                output.write(centerBegin.getBytes());
                                isCenter = true;
                            }
                            if (align.equals("right")) {
                                output.write(divRight.getBytes());
                                isRight = true;
                            }
                        }
                        if (tagBegin.equalsIgnoreCase("color")) { // 判断文字颜色
                            String color = xmlParser.getAttributeValue(0);
                            output.write(String.format(spanColor, color).getBytes());
                            isColor = true;
                        }
                        if (tagBegin.equalsIgnoreCase("sz")) { // 判断文字大小
                            if (isRegion == true) {
                                int size = getSize(Integer.valueOf(xmlParser.getAttributeValue(0)));
                                output.write(String.format(fontSizeTag, size).getBytes());
                                isSize = true;
                            }
                        }
                        if (tagBegin.equalsIgnoreCase("tbl")) { // 检测到表格
                            output.write(tableBegin.getBytes());
                            isTable = true;
                        } else if (tagBegin.equalsIgnoreCase("tr")) { // 表格行
                            output.write(rowBegin.getBytes());
                        } else if (tagBegin.equalsIgnoreCase("tc")) { // 表格列
                            output.write(columnBegin.getBytes());
                        }
                        if (tagBegin.equalsIgnoreCase("pic")) { // 检测到图片
                            ZipEntry pic_entry = WordFileUtil.getPicEntry(docxFile, pic_ndex);
                            if (pic_entry != null) {
                                byte[] pictureBytes = WordFileUtil.getPictureBytes(docxFile, pic_entry);
                                writeDocumentPicture(pictureBytes);
                            }
                            pic_ndex++; // 转换一张后,索引+1
                        }
                        if (tagBegin.equalsIgnoreCase("p") && !isTable) {// 检测到段落,如果在表格中就无视
                            output.write(lineBegin.getBytes());
                        }
                        if (tagBegin.equalsIgnoreCase("b")) { // 检测到加粗
                            isBold = true;
                        }
                        if (tagBegin.equalsIgnoreCase("u")) { // 检测到下划线
                            isUnderline = true;
                        }
                        if (tagBegin.equalsIgnoreCase("i")) { // 检测到斜体
                            isItalic = true;
                        }
                        // 检测到文本
                        if (tagBegin.equalsIgnoreCase("t")) {
                            if (isBold == true) { // 加粗
                                output.write(boldBegin.getBytes());
                            }
                            if (isUnderline == true) { // 检测到下划线,输入<u>
                                output.write(underlineBegin.getBytes());
                            }
                            if (isItalic == true) { // 检测到斜体,输入<i>
                                output.write(italicBegin.getBytes());
                            }
                            String text = xmlParser.nextText();
                            output.write(text.getBytes()); // 写入文本
                            if (isItalic == true) { // 输入斜体结束标签</i>
                                output.write(italicEnd.getBytes());
                                isItalic = false;
                            }
                            if (isUnderline == true) { // 输入下划线结束标签</u>
                                output.write(underlineEnd.getBytes());
                                isUnderline = false;
                            }
                            if (isBold == true) { // 输入加粗结束标签</b>
                                output.write(boldEnd.getBytes());
                                isBold = false;
                            }
                            if (isSize == true) { // 输入字体结束标签</font>
                                output.write(fontEnd.getBytes());
                                isSize = false;
                            }
                            if (isColor == true) { // 输入跨度结束标签</span>
                                output.write(spanEnd.getBytes());
                                isColor = false;
                            }
                            if (isCenter == true) { // 输入居中结束标签</center>。要在段落结束之前再输入该标签,因为该标签会强制换行
                                output.write(centerEnd.getBytes());
                                isCenter = false;
                            }
                            if (isRight == true) { // 输入区块结束标签</div>
                                output.write(divEnd.getBytes());
                                isRight = false;
                            }
                        }
                        break;
                    // 结束标签
                    case XmlPullParser.END_TAG:
                        String tagEnd = xmlParser.getName();
                        if (tagEnd.equalsIgnoreCase("tbl")) { // 输入表格结束标签</table>
                            output.write(tableEnd.getBytes());
                            isTable = false;
                        }
                        if (tagEnd.equalsIgnoreCase("tr")) { // 输入表格行结束标签</tr>
                            output.write(rowEnd.getBytes());
                        }
                        if (tagEnd.equalsIgnoreCase("tc")) { // 输入表格列结束标签</td>
                            output.write(columnEnd.getBytes());
                        }
                        if (tagEnd.equalsIgnoreCase("p")) { // 输入段落结束标签</p>,如果在表格中就无视
                            if (isTable == false) {
                                if (isCenter == true) { // 输入居中结束标签</center>
                                    output.write(centerEnd.getBytes());
                                    isCenter = false;
                                }
                                output.write(lineEnd.getBytes());
                            }
                        }
                        if (tagEnd.equalsIgnoreCase("r")) {
                            isRegion = false;
                        }
                        break;
                    default:
                        break;
                }
                event_type = xmlParser.next();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private int getSize(int sizeType) {
        if (sizeType >= 1 && sizeType <= 8) {
            return 1;
        } else if (sizeType >= 9 && sizeType <= 11) {
            return 2;
        } else if (sizeType >= 12 && sizeType <= 14) {
            return 3;
        } else if (sizeType >= 15 && sizeType <= 19) {
            return 4;
        } else if (sizeType >= 20 && sizeType <= 29) {
            return 5;
        } else if (sizeType >= 30 && sizeType <= 39) {
            return 6;
        } else if (sizeType >= 40) {
            return 7;
        } else {
            return 3;
        }
    }

    private String getColor(int colorType) {
        if (colorType == 1) {
            return "#000000";
        } else if (colorType == 2) {
            return "#0000FF";
        } else if (colorType == 3 || colorType == 4) {
            return "#00FF00";
        } else if (colorType == 5 || colorType == 6) {
            return "#FF0000";
        } else if (colorType == 7) {
            return "#FFFF00";
        } else if (colorType == 8) {
            return "#FFFFFF";
        } else if (colorType == 9 || colorType == 15) {
            return "#CCCCCC";
        } else if (colorType == 10 || colorType == 11) {
            return "#00FF00";
        } else if (colorType == 12 || colorType == 16) {
            return "#080808";
        } else if (colorType == 13 || colorType == 14) {
            return "#FFFF00";
        } else {
            return "#000000";
        }
    }

    public void writeDocumentPicture(byte[] pictureBytes) {
        String fileName = WordFileUtil.getFileName(docPath) + presentPicture + ".jpg";
        picturePath = WordFileUtil.createFile("html", fileName);
        WordFileUtil.writePicture(picturePath, pictureBytes);
        presentPicture++;
        String imageString = String.format(imgBegin, picturePath);
        try {
            output.write(imageString.getBytes());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void writeParagraphContent(Paragraph paragraph) {
        Paragraph p = paragraph;
        int pnumCharacterRuns = p.numCharacterRuns();
        for (int j = 0; j < pnumCharacterRuns; j++) {
            CharacterRun run = p.getCharacterRun(j);
            if (run.getPicOffset() == 0 || run.getPicOffset() >= 1000) {
                if (presentPicture < pictures.size()) {
                    writeDocumentPicture(pictures.get(presentPicture).getContent());
                }
            } else {
                try {
                    String text = run.text();
                    if (text.length() >= 2 && pnumCharacterRuns < 2) {
                        output.write(text.getBytes());
                    } else {
                        String fontSizeBegin = String.format(fontSizeTag, getSize(run.getFontSize()));
                        String fontColorBegin = String.format(fontColorTag, getColor(run.getColor()));
                        output.write(fontSizeBegin.getBytes());
                        output.write(fontColorBegin.getBytes());
                        if (run.isBold()) {
                            output.write(boldBegin.getBytes());
                        }
                        if (run.isItalic()) {
                            output.write(italicBegin.getBytes());
                        }
                        output.write(text.getBytes());
                        if (run.isBold()) {
                            output.write(boldEnd.getBytes());
                        }
                        if (run.isItalic()) {
                            output.write(italicEnd.getBytes());
                        }
                        output.write(fontEnd.getBytes());
                        output.write(fontEnd.getBytes());
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }
}
public class WordFileUtil {

    private final static String TAG = "FileUtil";

    public static String getFileName(String pathandname) {
        int start = pathandname.lastIndexOf("/");
        int end = pathandname.lastIndexOf(".");
        if (start != -1 && end != -1) {
            return pathandname.substring(start + 1, end);
        } else {
            return "";
        }
    }

    public static String createFile(String dir_path1, String file_name) {
//        String sdcard_path = Environment.getExternalStorageDirectory().getAbsolutePath();
//        String dir_path = String.format("%s/pdfCache/%s", dir_path1,);
        String dir_path=dir_path1+"/pdfCache";

        String file_path = String.format("%s/%s", dir_path, file_name);
        try {
            File dirFile = new File(dir_path);
            if (!dirFile.exists()) {
                dirFile.mkdir();
            }
            File myFile = new File(file_path);
            myFile.createNewFile();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return file_path;
    }

    public static ZipEntry getPicEntry(ZipFile docxFile, int pic_index) {
        String entry_jpg = "word/media/image" + pic_index + ".jpeg";
        String entry_png = "word/media/image" + pic_index + ".png";
        String entry_gif = "word/media/image" + pic_index + ".gif";
        String entry_wmf = "word/media/image" + pic_index + ".wmf";
        ZipEntry pic_entry = null;
        pic_entry = docxFile.getEntry(entry_jpg);
        // 以下为读取docx的图片 转化为流数组
        if (pic_entry == null) {
            pic_entry = docxFile.getEntry(entry_png);
        }
        if (pic_entry == null) {
            pic_entry = docxFile.getEntry(entry_gif);
        }
        if (pic_entry == null) {
            pic_entry = docxFile.getEntry(entry_wmf);
        }
        return pic_entry;
    }

    public static byte[] getPictureBytes(ZipFile docxFile, ZipEntry pic_entry) {
        byte[] pictureBytes = null;
        try {
            InputStream pictIS = docxFile.getInputStream(pic_entry);
            ByteArrayOutputStream pOut = new ByteArrayOutputStream();
            byte[] b = new byte[1000];
            int len = 0;
            while ((len = pictIS.read(b)) != -1) {
                pOut.write(b, 0, len);
            }
            pictIS.close();
            pOut.close();
            pictureBytes = pOut.toByteArray();
            Log.d(TAG, "pictureBytes.length=" + pictureBytes.length);
            if (pictIS != null) {
                pictIS.close();
            }
            if (pOut != null) {
                pOut.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return pictureBytes;

    }

    public static void writePicture(String pic_path, byte[] pictureBytes) {
        File myPicture = new File(pic_path);
        try {
            FileOutputStream outputPicture = new FileOutputStream(myPicture);
            outputPicture.write(pictureBytes);
            outputPicture.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 201,924评论 5 474
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 84,781评论 2 378
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 148,813评论 0 335
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,264评论 1 272
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,273评论 5 363
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,383评论 1 281
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,800评论 3 393
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,482评论 0 256
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,673评论 1 295
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,497评论 2 318
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,545评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,240评论 4 318
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,802评论 3 304
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,866评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,101评论 1 258
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 42,673评论 2 348
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,245评论 2 341

推荐阅读更多精彩内容