改进一下对XML数据库操作
-----聊摄影 谈技术 不老的攻城狮-----
#-*- coding: GBK -*-
from lxml import etree as ET
from mailmerge import MailMerge
import base64
import time
import docx
import re
gbs=[]
# INCLUDEPICTURE "{ MERGEFIELD "zp" }" \* MERGEFORMAT
def parsexml():
tree=ET.parse("xmldata/A01.xml")
i=0
#for box in tree.xpath('//NewDataSet/Table/A0101[text()="潘琳琳"]/parent::Table'):
#for box in tree.xpath('//NewDataSet/Table/A0101[contains(text(),"秦")]/parent::Table'):
for box in tree.xpath('//NewDataSet/Table'):
pid=box.find('A00').text
xm =box.find('A0101').text
zy=''
if not box.find('A0130') is None:
zy=box.find('A0130').text
sr=box.find('A0107').text[0:10]
xrzw=''
if not box.find('BJXA0102B') is None:
xrzw=getDw(box.find('BJXA0102B').text)
zp=box.find('BJXA0106').text
i=i+1
print(i,xm,xrzw)
gb={
'xm':xm,
'zy':zy,
'sr':sr,
'xrzw':xrzw,
'zp':'E:/mypy/xmltest/xmldata/pic/'+zp,
'jtgx':getjtgx(pid),
'grll':getlvli(pid)
}
gbs.append(gb)
print("")
if i>1940:
time.sleep(11)
def getjtgx(cpid):
jtgx=''
tree1=ET.parse("xmldata/A36.xml")
ctj='//NewDataSet/Table/A00[text()="%s"]/parent::Table'%cpid
for box in tree1.xpath(ctj):
xm =box.find('A3601').text
nl=''
if not box.find('A3607') is None:
nl =box.find('A3607').text[0:10]
dw=''
if not box.find('A3611A') is None:
if not box.find('A3611A').text is None:
dw =box.find('A3611A').text
print("==>",xm,nl,dw)
jtgx=jtgx+xm+'\t'+dw+'\n'
return jtgx
def getlvli(cpid):
grll=''
tree1=ET.parse("xmldata/A16.xml")
ctj='//NewDataSet/Table/A00[text()="%s"]/parent::Table'%cpid
for box in tree1.xpath(ctj):
sjq =box.find('A1601').text[0:10]
sjz =''
if not box.find('A1604') is None:
sjz=box.find('A1604').text[0:10]
dw =box.find('A1607').text
print(" ",sjq,"~",sjz,dw)
grll=grll+sjq+'-'+sjz+'\t'+dw+'\n'
return grll
#查找单位
def getDw(cpid):
tree1=ET.parse("xmldata/B01.xml")
ctj='//NewDataSet/Table/B00[text()="%s"]/parent::Table'%cpid
dw=''
for box in tree1.xpath(ctj):
dw =box.find('B0101').text
return dw
def saveword(filename,picname,gbt):
document = docx.Document('E:/mypy/xmltest/aa.docx')
run = document.paragraphs[0].add_run()
run.add_picture(picname, width=None, height=None)
run.add_break(break_type=6)
document.add_paragraph(gbt['xm'])
document.add_paragraph(gbt['xrzw'])
document.add_paragraph(gbt['zy'])
document.save(filename)
def mergeWord(lst):
temple='aa.docx'
doc=MailMerge(temple)
doc.merge(xm=lst['xm'],zy=lst['zy'],sr=lst['sr'],xrzw=lst['xrzw'],zp=lst['zp'],jtgx=lst['jtgx'],grll=lst['grll'])
doc.write('E:/mypy/ry/%s.docx'%lst['xm'])
def regWord(cstr,keyWord):
return len(re.findall(keyWord,cstr))
def main():
parsexml()
keys='研发|博士|教授|高级|美国|英国|瑞士'
for gb in gbs:
#print(gb['xm'],gb['zy'],gb['zp'])
if len(gb['zp'])>10:
#saveword('E:/mypy/ry/'+gb['xm']+'.docx','E:/mypy/xmltest/xmldata/pic/'+gb['zp'],gb)
if regWord(gb['jtgx'],keys):
mergeWord(gb)
if __name__ == '__main__':
main()
身与JAVA同行 心与Python同梦
怀中却拥抱着佳能5DV入眠