利用Tornado搭建文档预览系统
本项目现在已支持8种文档格式的格式,分别为:
-
text/html: 如html文件等; -
text/plain: 如txt/log文件等; -
text/csv: csv文件; -
application/json: json文件; -
application/pdf: pdf文件; -
text/x-python: Python脚本文件; -
image/*: 各种图片文件,比如jpg, png等; -
markdown文件
准备工作
项目代码
python -m http.server 8081
python -m SimpleHTTPServer 8081
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文件上传</title>
</head>
<body>
<div align="center">
<br><br>
<h1>文件上传</h1>
<form action='file' enctype="multipart/form-data" method='post'>
<div class="am-form-group am-form-file">
<input id="doc-form-file" type="file" name="file" multiple>
</div>
<div id="file-list"></div>
<p>
<button type="submit" class="am-btn am-btn-default">提交</button>
</p>
</form>
</div>
</body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Markdown文件展示</title>
<script ></script>
<script>
function convert(){
var converter = new showdown.Converter();
var text = "{{ md_content }}";
var html = converter.makeHtml(text.replace(/newline/g, "\n"));
document.getElementById("result").innerHTML = html;
}
</script>
</head>
<body onload="convert()">
<div id="result" ></div>
</body>
</html>
# -*- coding: utf-8 -*-
import os
import logging
import traceback
import tornado.ioloop
import tornado.web
from tornado import options
from parse_file import *
# 文档上传与解析
class UploadFileHandler(tornado.web.RequestHandler):
# get函数
def get(self):
self.render('upload.html')
def post(self):
# 文件的存放路径
upload_path = os.path.join(os.path.dirname(__file__), 'pdfjs/web/files')
# 提取表单中‘name’为‘file’的文件元数据
# 暂时只支持单文档的上传
file_meta = self.request.files['file'][0]
filename = file_meta['filename']
# 保存文件
with open(os.path.join(upload_path, filename), 'wb') as up:
up.write(file_meta['body'])
text = file_meta["body"]
# 解析文件的内容
mtype = file_meta["content_type"]
logging.info('POST "%s" "%s" %d bytes', filename, mtype, len(text))
if mtype in ["text/x-python", "text/x-python-script"]:
self.write(parse_python(str(text, encoding="utf-8")))
elif mtype in ["text/plain", "text/csv"]:
self.write(parse_text_plain(str(text, encoding="utf-8")))
elif mtype == "text/html":
self.write(str(text, encoding="utf-8"))
elif mtype.startswith("image"):
self.write(parse_image(mtype, text))
elif mtype == "application/json":
self.write(parse_application_json(str(text, encoding="utf-8")))
elif mtype == "application/pdf":
self.redirect("http://127.0.0.1:8081/web/viewer.html?file=files/%s" % filename)
elif mtype == "application/octet-stream" and filename.endswith(".md"):
self.render("markdown.html", md_content=r"%s" % str(text, encoding="utf-8").replace("\n", "newline"))
else: # 其余文件格式
try:
self.write(str(text, encoding="utf-8").replace("\n", "<br>"))
except Exception:
logging.error(traceback.format_exc())
self.write('<font color=red>系统不支持的文件解析格式!</font>')
def make_app():
return tornado.web.Application([(r"/file", UploadFileHandler)],
template_path=os.path.join(os.path.dirname(__file__), "templates")) # 模板路径
if __name__ == "__main__":
# Tornado configures logging.
options.parse_command_line()
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
parse_file.py用于解析各种格式的文档,并返回HTML展示的格式,完整代码如下:
# -*- coding: utf-8 -*-
# author: Jclian91
# place: Pudong Shanghai
# time: 2020/6/5 1:05 下午
# filename: parse_file.py
# 用于解析各种文件类型的数据
import json
import base64
import logging
import traceback
from json import JSONDecodeError
# 解析text/plain或者text/csv文件格式
def parse_text_plain(text):
return "<html><head></head><body>%s</body></html>" % text.replace("\n", "<br>")
# 解析application/json文件格式
def parse_application_json(text):
try:
data_dict = json.loads(text)
return json.dumps(data_dict, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")
except JSONDecodeError:
try:
data_list = [json.loads(_) for _ in text.split("\n") if _]
return json.dumps(data_list, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")
except JSONDecodeError:
logging.error(traceback.format_exc())
return "JSON文件格式解析错误"
except Exception as err:
logging.error(traceback.format_exc())
return "未知错误: %s" % err
# 解析image/*文件格式
def parse_image(mtype, text):
return '<html><head></head><body><img ></body></html>' % \
(mtype, str(base64.b64encode(text), "utf-8"))
# 解析Python文件
def parse_python(text):
# indent和换行
text = text.replace("\n", "<br>").replace(" ", " ").replace("\t", " " * 4)
# 关键字配色
color_list = ["gray", "red", "green", "blue", "orange", "purple", "pink", "brown", "wheat", "seagreen", "orchid", "olive"]
key_words = ["self", "from", "import", "def", ":", "return", "open", "class", "try", "except", '"', "print"]
for word, color in zip(key_words, color_list):
text = text.replace(word, '<font color=%s>%s</font>' % (color, word))
colors = ["peru"] * 7
punctuations = list("[](){}#")
for punctuation, color in zip(punctuations, colors):
text = text.replace(punctuation, '<font color=%s>%s</font>' % (color, punctuation))
html = "<html><head></head><body>%s</body></html>" % text
return html
实现方式
text/html: 如html文件等
self.write(str(text, encoding="utf-8"))
text/plain: txt/log文件等
# 解析text/plain或者text/csv文件格式
def parse_text_plain(text):
return "<html><head></head><body>%s</body></html>" % text.replace("\n","<br>")
text/csv: csv文件
application/json: json文件
# 解析application/json文件格式
def parse_application_json(text):
try:
data_dict = json.loads(text)
return json.dumps(data_dict, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")
except JSONDecodeError:
try:
data_list = [json.loads(_) for _ in text.split("\n") if _]
return json.dumps(data_list, ensure_ascii=False, indent=2).replace("\n", "<br>").replace(" ", " ")
except JSONDecodeError:
logging.error(traceback.format_exc())
return "JSON文件格式解析错误"
except Exception as err:
logging.error(traceback.format_exc())
return "未知错误: %s" % err
application/pdf: pdf文件
elif mtype == "application/pdf":
self.redirect("http://127.0.0.1:8081/web/viewer.html?file=files/%s" % filename)
text/x-python: Python脚本文件
# 解析Python文件
def parse_python(text):
# indent和换行
text = text.replace("\n", "<br>").replace(" ", " ").replace("\t", " " * 4)
# 关键字配色
color_list = ["gray", "red", "green", "blue", "orange", "purple", "pink", "brown", "wheat", "seagreen", "orchid", "olive"]
key_words = ["self", "from", "import", "def", ":", "return", "open", "class", "try", "except", '"', "print"]
for word, color in zip(key_words, color_list):
text = text.replace(word, '<font color=%s>%s</font>' % (color, word))
colors = ["peru"] * 7
punctuations = list("[](){}#")
for punctuation, color in zip(punctuations, colors):
text = text.replace(punctuation, '<font color=%s>%s</font>' % (color, punctuation))
html = "<html><head></head><body>%s</body></html>" % text
return html
image/*: 各种图片文件,比如jpg, png等
<img >
import base64
# 解析image/*文件格式
def parse_image(mtype, text):
return '<html><head></head><body><img ></body></html>' % \
(mtype, str(base64.b64encode(text), "utf-8"))
markdown文件
elif mtype == "application/octet-stream" and filename.endswith(".md"):
self.render("markdown.html", md_content=r"%s" % str(text, encoding="utf-8").replace("\n", "newline"))
<script>
function convert(){
var converter = new showdown.Converter();
var text = "{{ md_content }}";
var html = converter.makeHtml(text.replace(/newline/g, "\n"));
document.getElementById("result").innerHTML = html;
}
</script>
效果demo
下面将给出上述8中文档格式在本系统中的预览效果。
text/html: 如html文件等
text/plain: txt/log文件等
text/csv: csv文件
application/json: json文件
application/pdf: pdf文件
text/x-python: Python脚本文件
image/*: 各种图片文件,比如jpg, png等
markdown文件
上传文件为Scrapy爬取动态网页.md,预览效果如下:
文件上传记录
总结
https://github.com/percent4/document_reviewer
参考文档
PDF.js官方网址:http://mozilla.github.io/pdf.js/
showdown.js官方网址:https://github.com/showdownjs/showdown
-
让你的站点也支持Markdown——showdown.js使用教程:https://www.jianshu.com/p/747d6f8dddb0
-END-
已有50万人
领取Python案例实战课世界正在奖励坚持学习的人!