python实现批量将word转html并将html内容发布至网站的方法

本文实例讲述了python实现批量将word转html并将html内容发布至网站的方法。分享给大家供大家参考。具体实现方法如下:

#coding=utf-8
__author__ = ‘zhm’
from win32com import client as wc
import os
import time
import random
import mysqldb
import re
def wordstohtml(dir):
#批量把文件夹的word文档转换成html文件
#金山wps调用,抢先版的用kwps,正式版wps
word = wc.dispatch(‘kwps.application’)
for path, subdirs, files in os.walk(dir):
for wordfile in files:
wordfullname = os.path.join(path, wordfile)
#print “word:” + wordfullname
doc = word.documents.open(wordfullname)
wordfile2 = unicode(wordfile, “gbk”)
dotindex = wordfile2.rfind(“.”)
if(dotindex == -1):
print ‘********************error: 未取得后缀名!’
filesuffix = wordfile2[(dotindex + 1) : ]
if(filesuffix == “doc” or filesuffix == “docx”):
filename = wordfile2[ : dotindex]
htmlname = filename + “.html”
htmlfullname = os.path.join(unicode(path, “gbk”), htmlname)
# htmlfullname = unicode(path, “gbk”) + “\\” + htmlname
print u’生成了html文件:’ + htmlfullname
doc.saveas(htmlfullname, 8)
doc.close()
word.quit()
print “”
print “finished!”
def html_add_to_db(dir):
#将转换成功的html文件批量插入数据库中。
conn = mysqldb.connect(
host=’localhost’,
port=3306,
user=’root’,
passwd=’root’,
db=’test’,
charset=’utf8′
)
cur = conn.cursor()
for path, subdirs, files in os.walk(dir):
for htmlfile in files:
htmlfullname = os.path.join(path, htmlfile)
title = os.path.splitext(htmlfile)[0]
targetdir = ‘d:/files/htmls/’
#d:/files为web服务器配置的静态目录
sconds = time.time()
msconds = sconds * 1000
targetfile = os.path.join(targetdir, str(int(msconds))+str(random.randint(100, 10000)) +’.html’)
htmlfile2 = unicode(htmlfile, “gbk”)
dotindex = htmlfile2.rfind(“.”)
if(dotindex == -1):
print ‘********************error: 未取得后缀名!’
filesuffix = htmlfile2[(dotindex + 1) : ]
if(filesuffix == “htm” or filesuffix == “html”):
if not os.path.exists(targetdir):
os.makedirs(targetdir)
htmlfullname = os.path.join(unicode(path, “gbk”), htmlfullname)
htfile = open(htmlfullname,’rb’)
#获取网页内容
htmstrcotent = htfile.read()
#找出里面的图片
img=re.compile(r”””

Posted in 未分类

发表评论