vlambda博客
学习文章列表

JAVA实现海量凭证数据导进Excel处理

这功能最大的barrier

pdf非格式化的原因解析pdf单据获取字段数据,使用表单域获取并不现实。PdfReaderContentParser获取的文档内容,也都是内容拼接成没有规律的String。这个需求将数据精确到字段才可靠,截取String来达到解析显然行不通。底层代码解析是逐字随机字符解析,所以只有通过字段所在坐标范围来获取字段内容数据。

土豆的三种思路

一.

/*** Author 土豆张*/@Overrideprotected ModelAndView onSubmit(HttpServletRequest request, HttpServletResponse response, Object command, BindException errors) throws Exception {FileUploadForm form = (FileUploadForm) command;SimpleResult result = SimpleResult.create(false);if (form.getFile() == null || form.getFile().getSize() == 0) {result.setMessage("上传凭证或票据pdf文件");return new ModelAndView(new JsonView(result));}if (!"pdf".equals(FilenameUtils.getExtension(form.getFile().getOriginalFilename().toLowerCase()))) {result.setMessage("上传文件需要为PDF格式");return new ModelAndView(new JsonView(result));}InputStream inputStream = form.getFile().getInputStream();List<List<Map<StringString>>> listAll = exportPdfList(inputStream);final String path = "/excel模板/xls/fillbls.xls";Workbook workbook = ExcelLoader.loadXls(this.getClass().getResourceAsStream(path));List<ExcelRow> sheet = new ArrayList<>();for (int i = 0; i < listAll.size(); i++) {List<Map<String, String>> listdata = listAll.get(i);for (Map<String, String> map : listdata) {String orderNum = map.get("orderNum");String trackNum = map.get("trackNum");String serviceType = map.get("serviceType");String actualWeight = map.get("actualWeight");String actualWeightUnits = map.get("actualWeightUnits");String ratedWeight = map.get("ratedWeight");String ratedWeightUnits = map.get("ratedWeightUnits");String amount = map.get("amount"); String chargeDesion1 = map.get("chargeDesion1");String chargeDesionCash1 = map.get("chargeDesionCash1");ExcelRow itemRow = new ExcelRow();itemRow.add(orderNum);itemRow.add(trackNum);itemRow.add(serviceType);itemRow.add(actualWeight);itemRow.add(actualWeightUnits);itemRow.add(ratedWeight);itemRow.add(ratedWeightUnits);itemRow.add(amount);itemRow.add(chargeDesion1);itemRow.add(chargeDesionCash1);sheet.add(itemRow);}}ExcelWriter.write(workbook, sheet, 0, 1);InputStream outStream = ExcelWriter.close(workbook);String fileName = "原始凭证.xls";fileName = java.net.URLEncoder.encode(fileName, "UTF-8");return new ModelAndView(new DownloadView(outStream, fileName));//读取pdf数据  这里代码还有待优化。public List<List<Map<StringString>>> exportPdfList(InputStream inputStream) {List<List<Map<String, String>>> listAll = new ArrayList<>();try {Map<String, byte[]> pdfData = LabelSpliter.byPageNum(inputStream);List<Map.Entry<String, byte[]>> list = new ArrayList<>(pdfData.entrySet());Collections.sort(list, new Comparator<Map.Entry<String, byte[]>>() {public int compare(Map.Entry<String, byte[]> o1, Map.Entry<String, byte[]> o2) {return (new Integer(o1.getKey())).compareTo(new Integer(o2.getKey()));}});for (Map.Entry<String, byte[]> entry : list) {System.out.println(entry.getKey() + "-------------------------------------" + entry.getValue()+"-------------------------------------");byte[] pdfBypage = entry.getValue();InputStream inputfjsb = new ByteArrayInputStream(pdfBypage);PDDocument document = PDDocument.load(inputfjsb);if (!document.isEncrypted()) {PDFTextStripperByArea stripper = new PDFTextStripperByArea();stripper.setSortByPosition(true);PDFTextStripper tStripper = new PDFTextStripper();String pdfFileInText = tStripper.getText(document);listMap.add(map);listAll.add(listMap);return listAll;}}

二.

土豆第二种思路:将PDF票据凭证先存到数据库,通过数据库表结构设计后更符合甲方需求的传到Excel,将所有凭证数据一次性导入。

实现代码如下

  1. 将PDF导入数据库

    注意:这里我数据库端口号是3306。

package com.server;
import java.io.*;
import java.sql.*;
import java.util.*;
import java.text.*;
import java.util.regex.*;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.*;
import org.mortbay.jetty.Response;
import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.sql.*;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.ServletInputStream.*;
import java.io.PrintWriter;
public class XmlServlet extends HttpServlet {undefined
public void doPost(HttpServletRequest req,HttpServletResponse res)
{undefined
File uploadedFile;
System.out.print("on server");
try{undefined
Class.forName("com.mysql.jdbc.Driver");
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/image","root","root1");
PrintWriter out=res.getWriter();
//out.println("Content type is :: " +contentType);
//to get the content type information from JSP Request Header
String contentType = req.getContentType();
int flag=0;
FileInputStream fis=null;
FileOutputStream fileOut=null;
//here we are checking the content type is not equal to Null and as well as the passed data from mulitpart/form-data is greater than or equal to 0
if ((contentType != null) && (contentType.indexOf("multipart/form-data") >= 0))
{undefined
DataInputStream in = new DataInputStream(req.getInputStream());
//we are taking the length of Content type data
int formDataLength = req.getContentLength();
byte dataBytes[] = new byte[formDataLength];
int byteRead = 0;
int totalBytesRead = 0;
//this loop converting the uploaded file into byte code
while (totalBytesRead < formDataLength) {undefined
byteRead = in.read(dataBytes, totalBytesRead,formDataLength);
totalBytesRead += byteRead;
}
String file = new String(dataBytes);
//for saving the file name
String saveFile = file.substring(file.indexOf("filename=\"") + 10);
saveFile = saveFile.substring(0, saveFile.indexOf("\n"));
out.println("savefiledddd"+saveFile);
int extension_save=saveFile.lastIndexOf("\"");
String extension_saveName=saveFile.substring(extension_save);
//Here we are invoking the absolute path out of the encrypted data
saveFile = saveFile.substring(saveFile.lastIndexOf("\\")+ 1,saveFile.indexOf("\""));
int lastIndex = contentType.lastIndexOf("=");
String boundary = contentType.substring(lastIndex + 1,contentType.length());
int pos;
//extracting the index of file
pos = file.indexOf("filename=\"");
pos = file.indexOf("\n", pos) + 1;
pos = file.indexOf("\n", pos) + 1;
pos = file.indexOf("\n", pos) + 1;
int boundaryLocation = file.indexOf(boundary, pos) - 4;
int startPos = ((file.substring(0, pos)).getBytes()).length;
int endPos = ((file.substring(0, boundaryLocation)).getBytes()).length;
out.println("savefile"+saveFile);
int file_No=22;
uploadedFile=new File("./war/img");
uploadedFile.mkdir();
String kk=uploadedFile.getAbsolutePath();
String pathname_dir=kk+"/"+saveFile;
//String pathname_dir="C:\\Program Files\\Apache Software Foundation\\Tomcat 6.0\\jk\\"+saveFile;
File filepath=new File(pathname_dir);
out.println("filepath_ "+filepath);
fileOut = new FileOutputStream(filepath);
fileOut.write(dataBytes, startPos, (endPos - startPos));
fileOut.flush();

2.导出到EXCEL

import org.apache.log4j.LogManager;import org.apache.log4j.Logger;import org.apache.poi.hssf.usermodel.HSSFCell;import org.apache.poi.hssf.usermodel.HSSFCellStyle;import org.apache.poi.hssf.usermodel.HSSFFont;import org.apache.poi.hssf.usermodel.HSSFRow;import org.apache.poi.hssf.usermodel.HSSFSheet;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hssf.util.CellRangeAddress;import org.apache.poi.hssf.util.HSSFColor;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.web.bind.annotation.GetMapping;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RestController; import java.io.FileOutputStream;import java.util.List; /** * ClassName:FactoryCreateTransactionController * Description: */@RestController@RequestMapping("/factory/create/transaction")public class FactoryCreateTransactionController { private static final Logger logger = LogManager.getLogger(FactoryCreateTransactionController.class);  @Autowired private FactoryCreateTransactionService factoryCreateTransactionService;   // 调用数据库数据生成excel表格 @GetMapping("/Get/gasPrice") public String get() throws Exception { List<FactoryCreateTransaction> all = factoryCreateTransactionService.findAll();/* List listTimestamp = new ArrayList(); List listGasPrice = new ArrayList(); for(int i=0; i<all.size(); i++){ String timestamp = all.get(i).getTimeStamp(); String gasprice = all.get(i).getGasPrice(); listTimestamp.add(timestamp); listGasPrice.add(gasprice); logger.info("timestamp ========== " + timestamp); logger.info("gasPrice ========== " + gasprice); }*/ String sheetName = "gasPrice统计表单"; String titleName = "gasPrice统计表"; String fileName = "gasPriceAll"; int columnNumber = 3; int[] columnWidth = { 10, 20, 30 }; String[][] dataList = { {String.valueOf(all.get(0).getId()), all.get(0).getTimeStamp(), all.get(0).getGasPrice()}, {String.valueOf(all.get(1).getId()), all.get(1).getTimeStamp(), all.get(1).getGasPrice()}, {String.valueOf(all.get(2).getId()), all.get(2).getTimeStamp(), all.get(2).getGasPrice()}, {String.valueOf(all.get(3).getId()), all.get(3).getTimeStamp(), all.get(3).getGasPrice()}, {String.valueOf(all.get(4).getId()), all.get(4).getTimeStamp(), all.get(4).getGasPrice()}, {String.valueOf(all.get(5).getId()), all.get(5).getTimeStamp(), all.get(5).getGasPrice()}, {String.valueOf(all.get(6).getId()), all.get(6).getTimeStamp(), all.get(6).getGasPrice()}, {String.valueOf(all.get(7).getId()), all.get(7).getTimeStamp(), all.get(7).getGasPrice()}, {String.valueOf(all.get(8).getId()), all.get(8).getTimeStamp(), all.get(8).getGasPrice()}, {String.valueOf(all.get(9).getId()), all.get(9).getTimeStamp(), all.get(9).getGasPrice()}, {String.valueOf(all.get(10).getId()), all.get(10).getTimeStamp(), all.get(10).getGasPrice()}, {String.valueOf(all.get(11).getId()), all.get(11).getTimeStamp(), all.get(11).getGasPrice()}, {String.valueOf(all.get(12).getId()), all.get(12).getTimeStamp(), all.get(12).getGasPrice()}, {String.valueOf(all.get(13).getId()), all.get(13).getTimeStamp(), all.get(13).getGasPrice()} }; String[] columnName = { "id", "timeStamp", "gasPrice" }; ExportNoResponse(sheetName, titleName, fileName, columnNumber, columnWidth, columnName, dataList);  return "success"; }  public static void ExportNoResponse(String sheetName, String titleName, String fileName, int columnNumber, int[] columnWidth, String[] columnName, String[][] dataList) throws Exception { if (columnNumber == columnWidth.length&& columnWidth.length == columnName.length) { // 第一步,创建一个webbook,对应一个Excel文件 HSSFWorkbook wb = new HSSFWorkbook(); // 第二步,在webbook中添加一个sheet,对应Excel文件中的sheet HSSFSheet sheet = wb.createSheet(sheetName); // sheet.setDefaultColumnWidth(15); //统一设置列宽 for (int i = 0; i < columnNumber; i++) { for (int j = 0; j <= i; j++) { if (i == j) { sheet.setColumnWidth(i, columnWidth[j] * 256); // 单独设置每列的宽 } } } // 创建第0行 也就是标题 HSSFRow row1 = sheet.createRow((int) 0); row1.setHeightInPoints(50);// 设备标题的高度 // 第三步创建标题的单元格样式style2以及字体样式headerFont1 HSSFCellStyle style2 = wb.createCellStyle(); style2.setAlignment(HSSFCellStyle.ALIGN_CENTER); style2.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); style2.setFillForegroundColor(HSSFColor.LIGHT_TURQUOISE.index); style2.setFillPattern(HSSFCellStyle.SOLID_FOREGROUND); HSSFFont headerFont1 = (HSSFFont) wb.createFont(); // 创建字体样式 headerFont1.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD); // 字体加粗 headerFont1.setFontName("黑体"); // 设置字体类型 headerFont1.setFontHeightInPoints((short) 15); // 设置字体大小 style2.setFont(headerFont1); // 为标题样式设置字体样式  HSSFCell cell1 = row1.createCell(0);// 创建标题第一列 sheet.addMergedRegion(new CellRangeAddress(0, 0, 0, columnNumber - 1)); // 合并第0到第17列 cell1.setCellValue(titleName); // 设置值标题 cell1.setCellStyle(style2); // 设置标题样式  // 创建第1行 也就是表头 HSSFRow row = sheet.createRow((int) 1); row.setHeightInPoints(37);// 设置表头高度  // 第四步,创建表头单元格样式 以及表头的字体样式 HSSFCellStyle style = wb.createCellStyle(); style.setWrapText(true);// 设置自动换行 style.setAlignment(HSSFCellStyle.ALIGN_CENTER); style.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); // 创建一个居中格式  style.setBottomBorderColor(HSSFColor.BLACK.index); style.setBorderBottom(HSSFCellStyle.BORDER_THIN); style.setBorderLeft(HSSFCellStyle.BORDER_THIN); style.setBorderRight(HSSFCellStyle.BORDER_THIN); style.setBorderTop(HSSFCellStyle.BORDER_THIN);  HSSFFont headerFont = (HSSFFont) wb.createFont(); // 创建字体样式 headerFont.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD); // 字体加粗 headerFont.setFontName("黑体"); // 设置字体类型 headerFont.setFontHeightInPoints((short) 10); // 设置字体大小 style.setFont(headerFont); // 为标题样式设置字体样式  // 第四.一步,创建表头的列 for (int i = 0; i < columnNumber; i++) { HSSFCell cell = row.createCell(i); cell.setCellValue(columnName[i]); cell.setCellStyle(style); }  // 第五步,创建单元格,并设置值 for (int i = 0; i < dataList.length; i++) { row = sheet.createRow((int) i + 2); // 为数据内容设置特点新单元格样式1 自动换行 上下居中 HSSFCellStyle zidonghuanhang = wb.createCellStyle(); zidonghuanhang.setWrapText(true);// 设置自动换行 zidonghuanhang .setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); // 创建一个居中格式  // 设置边框 zidonghuanhang.setBottomBorderColor(HSSFColor.BLACK.index); zidonghuanhang.setBorderBottom(HSSFCellStyle.BORDER_THIN); zidonghuanhang.setBorderLeft(HSSFCellStyle.BORDER_THIN); zidonghuanhang.setBorderRight(HSSFCellStyle.BORDER_THIN);                zidonghuanhang.setBorderTop(HSSFCellStyle.BORDER_THIN); // 为数据内容设置特点新单元格样式2 自动换行 上下居中左右也居中 HSSFCellStyle zidonghuanhang2 = wb.createCellStyle(); zidonghuanhang2.setWrapText(true);// 设置自动换行 zidonghuanhang2 .setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); // 创建一个上下居中格式                zidonghuanhang2.setAlignment(HSSFCellStyle.ALIGN_CENTER);// 左右居中 // 设置边框 zidonghuanhang2.setBottomBorderColor(HSSFColor.BLACK.index); zidonghuanhang2.setBorderBottom(HSSFCellStyle.BORDER_THIN); zidonghuanhang2.setBorderLeft(HSSFCellStyle.BORDER_THIN); zidonghuanhang2.setBorderRight(HSSFCellStyle.BORDER_THIN); zidonghuanhang2.setBorderTop(HSSFCellStyle.BORDER_THIN); HSSFCell datacell = null; for (int j = 0; j < columnNumber; j++) { datacell = row.createCell(j); datacell.setCellValue(dataList[i][j]); datacell.setCellStyle(zidonghuanhang2); }            } // 第六步,将文件存到指定位置 try { FileOutputStream fout = new FileOutputStream("D://"+fileName+".xls");  wb.write(fout); String str = "导出" + fileName + "成功!"; System.out.println(str); fout.close(); } catch (Exception e) { e.printStackTrace(); String str1 = "导出" + fileName + "失败!"; System.out.println(str1); } } else { System.out.println("列数目长度名称三个数组长度要一致");        } }}

三.

import com.spire.pdf.FileFormat;import com.spire.pdf.PdfDocument;
public class ToXLS { public static void main(String[] args) { //创建PdfDocument实例 PdfDocument pdf = new PdfDocument(); //加载示例PDF文档 pdf.loadFromFile("C:\\Users\\Test1\\Desktop\\Sample.pdf"); //保存为Excel pdf.saveToFile("output/ToXLS.xlsx", FileFormat.XLSX); }}

最后,可以看到第二种方法代码量很大,因为它连接数据库做出了更为“多态“的需求细分,相比之下第三种比较水的十几行代码搞定就没有很好的延展性

我就知道你会点赞关注加“在看”)

这里将持续更新互联网技术和一些故事,同时也会分享一些土豆认为有价值的财会知识和趣事(土豆本科学的财会)

如果文章对你的生活有帮助,可点在看或分享,感谢帅哥美女对土豆的支持。