利用Tornado搭建文档预览系统
本项目现在已支持8种文档格式的格式,分别为:
-
text/html: 如html文件等; -
text/plain: 如txt/log文件等; -
text/csv: csv文件; -
application/json: json文件; -
application/pdf: pdf文件; -
text/x-python: Python脚本文件; -
image/*: 各种图片文件,比如jpg, png等; -
markdown文件
准备工作
项目代码
python -m http.server 8081
python -m SimpleHTTPServer 8081
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>文件上传</title></head><body><div align="center"><br><br><h1>文件上传</h1><form action='file' enctype="multipart/form-data" method='post'><div class="am-form-group am-form-file"><input id="doc-form-file" type="file" name="file" multiple></div><div id="file-list"></div><p><button type="submit" class="am-btn am-btn-default">提交</button></p></form></div></body></html>
<!DOCTYPE html><html><head><meta charset="UTF-8"><title>Markdown文件展示</title><script ></script><script>function convert(){var converter = new showdown.Converter();var text = "{{ md_content }}";var html = converter.makeHtml(text.replace(/newline/g, "\n"));document.getElementById("result").innerHTML = html;}</script></head><body onload="convert()"><div id="result" ></div></body></html>
# -*- coding: utf-8 -*-import osimport loggingimport tracebackimport tornado.ioloopimport tornado.webfrom tornado import optionsfrom parse_file import *# 文档上传与解析class UploadFileHandler(tornado.web.RequestHandler):# get函数def get(self):self.render('upload.html')def post(self):# 文件的存放路径upload_path = os.path.join(os.path.dirname(__file__), 'pdfjs/web/files')# 提取表单中‘name’为‘file’的文件元数据# 暂时只支持单文档的上传file_meta = self.request.files['file'][0]filename = file_meta['filename']# 保存文件with open(os.path.join(upload_path, filename), 'wb') as up:up.write(file_meta['body'])text = file_meta["body"]# 解析文件的内容mtype = file_meta["content_type"]logging.info('POST "%s" "%s" %d bytes', filename, mtype, len(text))if mtype in ["text/x-python", "text/x-python-script"]:self.write(parse_python(str(text, encoding="utf-8")))elif mtype in ["text/plain", "text/csv"]:self.write(parse_text_plain(str(text, encoding="utf-8")))elif mtype == "text/html":self.write(str(text, encoding="utf-8"))elif mtype.startswith("image"):self.write(parse_image(mtype, text))elif mtype == "application/json":self.write(parse_application_json(str(text, encoding="utf-8")))elif mtype == "application/pdf":self.redirect("http://127.0.0.1:8081/web/viewer.html?file=files/%s" % filename)elif mtype == "application/octet-stream" and filename.endswith(".md"):self.render("markdown.html", md_content=r"%s" % str(text, encoding="utf-8").replace("\n", "newline"))else: # 其余文件格式try:self.write(str(text, encoding="utf-8").replace("\n", "<br>"))except Exception:logging.error(traceback.format_exc())self.write('<font color=red>系统不支持的文件解析格式!</font>')def make_app():return tornado.web.Application([(r"/file", UploadFileHandler)],template_path=os.path.join(os.path.dirname(__file__), "templates")) # 模板路径if __name__ == "__main__":# Tornado configures logging.options.parse_command_line()app = make_app()app.listen(8888)tornado.ioloop.IOLoop.current().start()parse_file.py用于解析各种格式的文档,并返回HTML展示的格式,完整代码如下:
# -*- coding: utf-8 -*-# author: Jclian91# place: Pudong Shanghai# time: 2020/6/5 1:05 下午# filename: parse_file.py# 用于解析各种文件类型的数据import jsonimport base64import loggingimport tracebackfrom json import JSONDecodeError# 解析text/plain或者text/csv文件格式def parse_text_plain(text):return "<html><head></head><body>%s</body></html>" % text.replace("\n", "<br>")# 解析application/json文件格式def parse_application_json(text):try:data_dict = json.loads(text)return json.dumps(data_dict, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")except JSONDecodeError:try:data_list = [json.loads(_) for _ in text.split("\n") if _]return json.dumps(data_list, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")except JSONDecodeError:logging.error(traceback.format_exc())return "JSON文件格式解析错误"except Exception as err:logging.error(traceback.format_exc())return "未知错误: %s" % err# 解析image/*文件格式def parse_image(mtype, text):return '<html><head></head><body><img ></body></html>' % \(mtype, str(base64.b64encode(text), "utf-8"))# 解析Python文件def parse_python(text):# indent和换行text = text.replace("\n", "<br>").replace(" ", " ").replace("\t", " " * 4)# 关键字配色color_list = ["gray", "red", "green", "blue", "orange", "purple", "pink", "brown", "wheat", "seagreen", "orchid", "olive"]key_words = ["self", "from", "import", "def", ":", "return", "open", "class", "try", "except", '"', "print"]for word, color in zip(key_words, color_list):text = text.replace(word, '<font color=%s>%s</font>' % (color, word))colors = ["peru"] * 7punctuations = list("[](){}#")for punctuation, color in zip(punctuations, colors):text = text.replace(punctuation, '<font color=%s>%s</font>' % (color, punctuation))html = "<html><head></head><body>%s</body></html>" % textreturn html
实现方式
text/html: 如html文件等
self.write(str(text, encoding="utf-8"))
text/plain: txt/log文件等
# 解析text/plain或者text/csv文件格式def parse_text_plain(text):return "<html><head></head><body>%s</body></html>" % text.replace("\n","<br>")
text/csv: csv文件
application/json: json文件
# 解析application/json文件格式def parse_application_json(text):try:data_dict = json.loads(text)return json.dumps(data_dict, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")except JSONDecodeError:try:data_list = [json.loads(_) for _ in text.split("\n") if _]return json.dumps(data_list, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")except JSONDecodeError:logging.error(traceback.format_exc())return "JSON文件格式解析错误"except Exception as err:logging.error(traceback.format_exc())return "未知错误: %s" % err
application/pdf: pdf文件
elif mtype == "application/pdf":self.redirect("http://127.0.0.1:8081/web/viewer.html?file=files/%s" % filename)
text/x-python: Python脚本文件
# 解析Python文件def parse_python(text):# indent和换行text = text.replace("\n", "<br>").replace(" ", " ").replace("\t", " " * 4)# 关键字配色color_list = ["gray", "red", "green", "blue", "orange", "purple", "pink", "brown", "wheat", "seagreen", "orchid", "olive"]key_words = ["self", "from", "import", "def", ":", "return", "open", "class", "try", "except", '"', "print"]for word, color in zip(key_words, color_list):text = text.replace(word, '<font color=%s>%s</font>' % (color, word))colors = ["peru"] * 7punctuations = list("[](){}#")for punctuation, color in zip(punctuations, colors):text = text.replace(punctuation, '<font color=%s>%s</font>' % (color, punctuation))html = "<html><head></head><body>%s</body></html>" % textreturn html
image/*: 各种图片文件,比如jpg, png等
<img >
import base64# 解析image/*文件格式def parse_image(mtype, text):return '<html><head></head><body><img ></body></html>' % \(mtype, str(base64.b64encode(text), "utf-8"))
markdown文件
elif mtype == "application/octet-stream" and filename.endswith(".md"):self.render("markdown.html", md_content=r"%s" % str(text, encoding="utf-8").replace("\n", "newline"))
<script>function convert(){var converter = new showdown.Converter();var text = "{{ md_content }}";var html = converter.makeHtml(text.replace(/newline/g, "\n"));document.getElementById("result").innerHTML = html;}</script>
效果demo
下面将给出上述8中文档格式在本系统中的预览效果。
text/html: 如html文件等
text/plain: txt/log文件等
text/csv: csv文件
application/json: json文件
application/pdf: pdf文件
text/x-python: Python脚本文件
image/*: 各种图片文件,比如jpg, png等
markdown文件
上传文件为Scrapy爬取动态网页.md,预览效果如下:
文件上传记录
总结
https://github.com/percent4/document_reviewer
参考文档
PDF.js官方网址:http://mozilla.github.io/pdf.js/
showdown.js官方网址:https://github.com/showdownjs/showdown
-
让你的站点也支持Markdown——showdown.js使用教程:https://www.jianshu.com/p/747d6f8dddb0
-END-
已有50万人
领取Python案例实战课世界正在奖励坚持学习的人!
