JAVA实现海量凭证数据导进Excel处理
这功能最大的barrier
pdf非格式化的原因,解析pdf单据获取字段数据,使用表单域获取并不现实。PdfReaderContentParser获取的文档内容,也都是内容拼接成没有规律的String。这个需求将数据精确到字段才可靠,截取String来达到解析显然行不通。底层代码解析是逐字随机字符解析,所以只有通过字段所在坐标范围来获取字段内容数据。
土豆的三种思路
一.
/*** Author 土豆张*/@Overrideprotected ModelAndView onSubmit(HttpServletRequest request, HttpServletResponse response, Object command, BindException errors) throws Exception {FileUploadForm form = (FileUploadForm) command;SimpleResult result = SimpleResult.create(false);if (form.getFile() == null || form.getFile().getSize() == 0) {result.setMessage("上传凭证或票据pdf文件");return new ModelAndView(new JsonView(result));}if (!"pdf".equals(FilenameUtils.getExtension(form.getFile().getOriginalFilename().toLowerCase()))) {result.setMessage("上传文件需要为PDF格式");return new ModelAndView(new JsonView(result));}InputStream inputStream = form.getFile().getInputStream();List<List<Map<String, String>>> listAll = exportPdfList(inputStream);final String path = "/excel模板/xls/fillbls.xls";Workbook workbook = ExcelLoader.loadXls(this.getClass().getResourceAsStream(path));List<ExcelRow> sheet = new ArrayList<>();for (int i = 0; i < listAll.size(); i++) {List<Map<String, String>> listdata = listAll.get(i);for (Map<String, String> map : listdata) {String orderNum = map.get("orderNum");String trackNum = map.get("trackNum");String serviceType = map.get("serviceType");String actualWeight = map.get("actualWeight");String actualWeightUnits = map.get("actualWeightUnits");String ratedWeight = map.get("ratedWeight");String ratedWeightUnits = map.get("ratedWeightUnits");String amount = map.get("amount");String chargeDesion1 = map.get("chargeDesion1");String chargeDesionCash1 = map.get("chargeDesionCash1");ExcelRow itemRow = new ExcelRow();itemRow.add(orderNum);itemRow.add(trackNum);itemRow.add(serviceType);itemRow.add(actualWeight);itemRow.add(actualWeightUnits);itemRow.add(ratedWeight);itemRow.add(ratedWeightUnits);itemRow.add(amount);itemRow.add(chargeDesion1);itemRow.add(chargeDesionCash1);sheet.add(itemRow);}}ExcelWriter.write(workbook, sheet, 0, 1);InputStream outStream = ExcelWriter.close(workbook);String fileName = "原始凭证.xls";fileName = java.net.URLEncoder.encode(fileName, "UTF-8");return new ModelAndView(new DownloadView(outStream, fileName));}//读取pdf数据 这里代码还有待优化。public List<List<Map<String, String>>> exportPdfList(InputStream inputStream) {List<List<Map<String, String>>> listAll = new ArrayList<>();try {Map<String, byte[]> pdfData = LabelSpliter.byPageNum(inputStream);List<Map.Entry<String, byte[]>> list = new ArrayList<>(pdfData.entrySet());Collections.sort(list, new Comparator<Map.Entry<String, byte[]>>() {public int compare(Map.Entry<String, byte[]> o1, Map.Entry<String, byte[]> o2) {return (new Integer(o1.getKey())).compareTo(new Integer(o2.getKey()));}});for (Map.Entry<String, byte[]> entry : list) {System.out.println(entry.getKey() + "-------------------------------------" + entry.getValue()+"-------------------------------------");byte[] pdfBypage = entry.getValue();InputStream inputfjsb = new ByteArrayInputStream(pdfBypage);PDDocument document = PDDocument.load(inputfjsb);if (!document.isEncrypted()) {PDFTextStripperByArea stripper = new PDFTextStripperByArea();stripper.setSortByPosition(true);PDFTextStripper tStripper = new PDFTextStripper();String pdfFileInText = tStripper.getText(document);listMap.add(map);listAll.add(listMap);return listAll;}}
二.
土豆第二种思路:将PDF票据凭证先存到数据库,通过数据库表结构设计后更符合甲方需求的传到Excel,将所有凭证数据一次性导入。
实现代码如下
将PDF导入数据库
注意:这里我数据库端口号是3306。
package com.server;import java.io.*;import java.sql.*;import java.util.*;import java.text.*;import java.util.regex.*;import org.apache.commons.fileupload.servlet.ServletFileUpload;import org.apache.commons.fileupload.disk.DiskFileItemFactory;import org.apache.commons.fileupload.*;import org.mortbay.jetty.Response;import javax.servlet.*;import javax.servlet.http.*;import java.io.*;import java.sql.*;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import javax.servlet.ServletInputStream.*;import java.io.PrintWriter;public class XmlServlet extends HttpServlet {undefinedpublic void doPost(HttpServletRequest req,HttpServletResponse res){undefinedFile uploadedFile;System.out.print("on server");try{undefinedClass.forName("com.mysql.jdbc.Driver");Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/image","root","root1");PrintWriter out=res.getWriter();//out.println("Content type is :: " +contentType);//to get the content type information from JSP Request HeaderString contentType = req.getContentType();int flag=0;FileInputStream fis=null;FileOutputStream fileOut=null;//here we are checking the content type is not equal to Null and as well as the passed data from mulitpart/form-data is greater than or equal to 0if ((contentType != null) && (contentType.indexOf("multipart/form-data") >= 0)){undefinedDataInputStream in = new DataInputStream(req.getInputStream());//we are taking the length of Content type dataint formDataLength = req.getContentLength();byte dataBytes[] = new byte[formDataLength];int byteRead = 0;int totalBytesRead = 0;//this loop converting the uploaded file into byte codewhile (totalBytesRead < formDataLength) {undefinedbyteRead = in.read(dataBytes, totalBytesRead,formDataLength);totalBytesRead += byteRead;}String file = new String(dataBytes);//for saving the file nameString saveFile = file.substring(file.indexOf("filename=\"") + 10);saveFile = saveFile.substring(0, saveFile.indexOf("\n"));out.println("savefiledddd"+saveFile);int extension_save=saveFile.lastIndexOf("\"");String extension_saveName=saveFile.substring(extension_save);//Here we are invoking the absolute path out of the encrypted datasaveFile = saveFile.substring(saveFile.lastIndexOf("\\")+ 1,saveFile.indexOf("\""));int lastIndex = contentType.lastIndexOf("=");String boundary = contentType.substring(lastIndex + 1,contentType.length());int pos;//extracting the index of filepos = file.indexOf("filename=\"");pos = file.indexOf("\n", pos) + 1;pos = file.indexOf("\n", pos) + 1;pos = file.indexOf("\n", pos) + 1;int boundaryLocation = file.indexOf(boundary, pos) - 4;int startPos = ((file.substring(0, pos)).getBytes()).length;int endPos = ((file.substring(0, boundaryLocation)).getBytes()).length;out.println("savefile"+saveFile);int file_No=22;uploadedFile=new File("./war/img");uploadedFile.mkdir();String kk=uploadedFile.getAbsolutePath();String pathname_dir=kk+"/"+saveFile;//String pathname_dir="C:\\Program Files\\Apache Software Foundation\\Tomcat 6.0\\jk\\"+saveFile;File filepath=new File(pathname_dir);out.println("filepath_ "+filepath);fileOut = new FileOutputStream(filepath);fileOut.write(dataBytes, startPos, (endPos - startPos));fileOut.flush();
2.导出到EXCEL
import org.apache.log4j.LogManager;import org.apache.log4j.Logger;import org.apache.poi.hssf.usermodel.HSSFCell;import org.apache.poi.hssf.usermodel.HSSFCellStyle;import org.apache.poi.hssf.usermodel.HSSFFont;import org.apache.poi.hssf.usermodel.HSSFRow;import org.apache.poi.hssf.usermodel.HSSFSheet;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hssf.util.CellRangeAddress;import org.apache.poi.hssf.util.HSSFColor;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.web.bind.annotation.GetMapping;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RestController;import java.io.FileOutputStream;import java.util.List;/*** ClassName:FactoryCreateTransactionController* Description:*/@RestController@RequestMapping("/factory/create/transaction")public class FactoryCreateTransactionController {private static final Logger logger = LogManager.getLogger(FactoryCreateTransactionController.class);@Autowiredprivate FactoryCreateTransactionService factoryCreateTransactionService;// 调用数据库数据生成excel表格@GetMapping("/Get/gasPrice")public String get() throws Exception {List<FactoryCreateTransaction> all = factoryCreateTransactionService.findAll();/* List listTimestamp = new ArrayList();List listGasPrice = new ArrayList();for(int i=0; i<all.size(); i++){String timestamp = all.get(i).getTimeStamp();String gasprice = all.get(i).getGasPrice();listTimestamp.add(timestamp);listGasPrice.add(gasprice);logger.info("timestamp ========== " + timestamp);logger.info("gasPrice ========== " + gasprice);}*/String sheetName = "gasPrice统计表单";String titleName = "gasPrice统计表";String fileName = "gasPriceAll";int columnNumber = 3;int[] columnWidth = { 10, 20, 30 };String[][] dataList = { {String.valueOf(all.get(0).getId()), all.get(0).getTimeStamp(), all.get(0).getGasPrice()},{String.valueOf(all.get(1).getId()), all.get(1).getTimeStamp(), all.get(1).getGasPrice()},{String.valueOf(all.get(2).getId()), all.get(2).getTimeStamp(), all.get(2).getGasPrice()},{String.valueOf(all.get(3).getId()), all.get(3).getTimeStamp(), all.get(3).getGasPrice()},{String.valueOf(all.get(4).getId()), all.get(4).getTimeStamp(), all.get(4).getGasPrice()},{String.valueOf(all.get(5).getId()), all.get(5).getTimeStamp(), all.get(5).getGasPrice()},{String.valueOf(all.get(6).getId()), all.get(6).getTimeStamp(), all.get(6).getGasPrice()},{String.valueOf(all.get(7).getId()), all.get(7).getTimeStamp(), all.get(7).getGasPrice()},{String.valueOf(all.get(8).getId()), all.get(8).getTimeStamp(), all.get(8).getGasPrice()},{String.valueOf(all.get(9).getId()), all.get(9).getTimeStamp(), all.get(9).getGasPrice()},{String.valueOf(all.get(10).getId()), all.get(10).getTimeStamp(), all.get(10).getGasPrice()},{String.valueOf(all.get(11).getId()), all.get(11).getTimeStamp(), all.get(11).getGasPrice()},{String.valueOf(all.get(12).getId()), all.get(12).getTimeStamp(), all.get(12).getGasPrice()},{String.valueOf(all.get(13).getId()), all.get(13).getTimeStamp(), all.get(13).getGasPrice()}};String[] columnName = { "id", "timeStamp", "gasPrice" };ExportNoResponse(sheetName, titleName, fileName,columnNumber, columnWidth, columnName, dataList);return "success";}public static void ExportNoResponse(String sheetName, String titleName,String fileName, int columnNumber, int[] columnWidth,String[] columnName, String[][] dataList) throws Exception {if (columnNumber == columnWidth.length&& columnWidth.length == columnName.length) {// 第一步,创建一个webbook,对应一个Excel文件HSSFWorkbook wb = new HSSFWorkbook();// 第二步,在webbook中添加一个sheet,对应Excel文件中的sheetHSSFSheet sheet = wb.createSheet(sheetName);// sheet.setDefaultColumnWidth(15); //统一设置列宽for (int i = 0; i < columnNumber; i++){for (int j = 0; j <= i; j++){if (i == j){sheet.setColumnWidth(i, columnWidth[j] * 256); // 单独设置每列的宽}}}// 创建第0行 也就是标题HSSFRow row1 = sheet.createRow((int) 0);row1.setHeightInPoints(50);// 设备标题的高度// 第三步创建标题的单元格样式style2以及字体样式headerFont1HSSFCellStyle style2 = wb.createCellStyle();style2.setAlignment(HSSFCellStyle.ALIGN_CENTER);style2.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER);style2.setFillForegroundColor(HSSFColor.LIGHT_TURQUOISE.index);style2.setFillPattern(HSSFCellStyle.SOLID_FOREGROUND);HSSFFont headerFont1 = (HSSFFont) wb.createFont(); // 创建字体样式headerFont1.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD); // 字体加粗headerFont1.setFontName("黑体"); // 设置字体类型headerFont1.setFontHeightInPoints((short) 15); // 设置字体大小style2.setFont(headerFont1); // 为标题样式设置字体样式HSSFCell cell1 = row1.createCell(0);// 创建标题第一列sheet.addMergedRegion(new CellRangeAddress(0, 0, 0,columnNumber - 1)); // 合并第0到第17列cell1.setCellValue(titleName); // 设置值标题cell1.setCellStyle(style2); // 设置标题样式// 创建第1行 也就是表头HSSFRow row = sheet.createRow((int) 1);row.setHeightInPoints(37);// 设置表头高度// 第四步,创建表头单元格样式 以及表头的字体样式HSSFCellStyle style = wb.createCellStyle();style.setWrapText(true);// 设置自动换行style.setAlignment(HSSFCellStyle.ALIGN_CENTER);style.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); // 创建一个居中格式style.setBottomBorderColor(HSSFColor.BLACK.index);style.setBorderBottom(HSSFCellStyle.BORDER_THIN);style.setBorderLeft(HSSFCellStyle.BORDER_THIN);style.setBorderRight(HSSFCellStyle.BORDER_THIN);style.setBorderTop(HSSFCellStyle.BORDER_THIN);HSSFFont headerFont = (HSSFFont) wb.createFont(); // 创建字体样式headerFont.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD); // 字体加粗headerFont.setFontName("黑体"); // 设置字体类型headerFont.setFontHeightInPoints((short) 10); // 设置字体大小style.setFont(headerFont); // 为标题样式设置字体样式// 第四.一步,创建表头的列for (int i = 0; i < columnNumber; i++){HSSFCell cell = row.createCell(i);cell.setCellValue(columnName[i]);cell.setCellStyle(style);}// 第五步,创建单元格,并设置值for (int i = 0; i < dataList.length; i++){row = sheet.createRow((int) i + 2);// 为数据内容设置特点新单元格样式1 自动换行 上下居中HSSFCellStyle zidonghuanhang = wb.createCellStyle();zidonghuanhang.setWrapText(true);// 设置自动换行zidonghuanhang.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); // 创建一个居中格式// 设置边框zidonghuanhang.setBottomBorderColor(HSSFColor.BLACK.index);zidonghuanhang.setBorderBottom(HSSFCellStyle.BORDER_THIN);zidonghuanhang.setBorderLeft(HSSFCellStyle.BORDER_THIN);zidonghuanhang.setBorderRight(HSSFCellStyle.BORDER_THIN);zidonghuanhang.setBorderTop(HSSFCellStyle.BORDER_THIN);// 为数据内容设置特点新单元格样式2 自动换行 上下居中左右也居中HSSFCellStyle zidonghuanhang2 = wb.createCellStyle();zidonghuanhang2.setWrapText(true);// 设置自动换行zidonghuanhang2.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER); // 创建一个上下居中格式zidonghuanhang2.setAlignment(HSSFCellStyle.ALIGN_CENTER);// 左右居中// 设置边框zidonghuanhang2.setBottomBorderColor(HSSFColor.BLACK.index);zidonghuanhang2.setBorderBottom(HSSFCellStyle.BORDER_THIN);zidonghuanhang2.setBorderLeft(HSSFCellStyle.BORDER_THIN);zidonghuanhang2.setBorderRight(HSSFCellStyle.BORDER_THIN);zidonghuanhang2.setBorderTop(HSSFCellStyle.BORDER_THIN);HSSFCell datacell = null;for (int j = 0; j < columnNumber; j++){datacell = row.createCell(j);datacell.setCellValue(dataList[i][j]);datacell.setCellStyle(zidonghuanhang2);}}// 第六步,将文件存到指定位置try {FileOutputStream fout = new FileOutputStream("D://"+fileName+".xls");wb.write(fout);String str = "导出" + fileName + "成功!";System.out.println(str);fout.close();} catch (Exception e) {e.printStackTrace();String str1 = "导出" + fileName + "失败!";System.out.println(str1);}} else {System.out.println("列数目长度名称三个数组长度要一致");}}}
三.
import com.spire.pdf.FileFormat;import com.spire.pdf.PdfDocument;public class ToXLS {public static void main(String[] args) {//创建PdfDocument实例PdfDocument pdf = new PdfDocument();//加载示例PDF文档pdf.loadFromFile("C:\\Users\\Test1\\Desktop\\Sample.pdf");//保存为Excelpdf.saveToFile("output/ToXLS.xlsx", FileFormat.XLSX);}}
最后,可以看到第二种方法代码量很大,因为它连接数据库做出了更为“多态“的需求细分,相比之下第三种比较水的十几行代码搞定就没有很好的延展性
这里将持续更新互联网技术和一些故事,同时也会分享一些土豆认为有价值的财会知识和趣事(土豆本科学的财会)
如果文章对你的生活有帮助,可点在看或分享,感谢帅哥美女对土豆的支持。
