Java使用poi将word转换为html

使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。

1.导入Maven包

<dependency> 
 <groupId>org.apache.poi</groupId> 
 <artifactId>poi</artifactId> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupId>org.apache.poi</groupId> 
 <artifactId>poi-scratchpad</artifactId> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupId>org.apache.poi</groupId> 
 <artifactId>poi-ooxml</artifactId> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupId>fr.opensagres.xdocreport</groupId> 
 <artifactId>xdocreport</artifactId> 
 <version>1.0.6</version> 
</dependency> 
<dependency> 
 <groupId>org.apache.poi</groupId> 
 <artifactId>poi-ooxml-schemas</artifactId> 
 <version>3.14</version> 
</dependency> 
<dependency> 
 <groupId>org.apache.poi</groupId> 
 <artifactId>ooxml-schemas</artifactId> 
 <version>1.3</version> 
</dependency> 

2.转换代码

import org.apache.poi.hwpf.HWPFDocument; 
import org.apache.poi.hwpf.converter.WordToHtmlConverter; 
import org.apache.poi.xwpf.converter.core.BasicURIResolver; 
import org.apache.poi.xwpf.converter.core.FileImageExtractor; 
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; 
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; 
import org.apache.poi.xwpf.usermodel.XWPFDocument; 
import org.w3c.dom.Document; 
 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.transform.OutputKeys; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.OutputStreamWriter; 
 
public class Test { 
  // doc转换为html 
  void docToHtml() throws Exception { 
    String sourceFileName = "C:\\doc\\test.doc"; 
    String targetFileName = "C:\\html\\test.html"; 
    String imagePathStr = "C:\\html\\image\\"; 
    HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName)); 
    Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document); 
    // 保存图片,并返回图片的相对路径 
    wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> { 
      try(FileOutputStream out = new FileOutputStream(imagePathStr + name)){ 
         out.write(content); 
      } catch (Exception e) { 
        e.printStackTrace(); 
      }  
      return "image/" + name; 
    }); 
    wordToHtmlConverter.processDocument(wordDocument); 
    Document htmlDocument = wordToHtmlConverter.getDocument(); 
    DOMSource domSource = new DOMSource(htmlDocument); 
    StreamResult streamResult = new StreamResult(new File(targetFileName)); 
 
    TransformerFactory tf = TransformerFactory.newInstance(); 
    Transformer serializer = tf.newTransformer(); 
    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); 
    serializer.setOutputProperty(OutputKeys.INDENT, "yes"); 
    serializer.setOutputProperty(OutputKeys.METHOD, "html"); 
    serializer.transform(domSource, streamResult); 
  } 
  // docx转换为html 
  public void docxToHtml() throws Exception { 
    String sourceFileName = "D:\\ac\\00.docx"; 
    String targetFileName = "D:\\ac\\test.html"; 
    String imagePathStr = "D:\\ac\\image\\"; 
    OutputStreamWriter outputStreamWriter = null; 
    try { 
      XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName)); 
      XHTMLOptions options = XHTMLOptions.create(); 
      // 存放图片的文件夹 
      options.setExtractor(new FileImageExtractor(new File(imagePathStr))); 
      // html中图片的路径 
      options.URIResolver(new BasicURIResolver("image")); 
      outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8"); 
      XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance(); 
      xhtmlConverter.convert(document, outputStreamWriter, options); 
    } finally { 
      if (outputStreamWriter != null) { 
        outputStreamWriter.close(); 
      } 
    } 
  } 

演示地址: https://www.xiaoyun.studio/app/preview.html

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持呐喊教程。

声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:notice#nhooo.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。