mdb格式文件可以通过mdbtools工具将内中包含的每张表导出到csv格式文件。由于access数据库和postgresql数据库格式上会存在不通性,所以使用python的文件处理,将所得csv文件修改成正确、能识别的格式。
导入脚本说明(此脚本运行于linux):
1.apt-get install mdbtools,安装mdbtools工具
2.将mdb 文件拷贝到linux虚拟机中,修改脚本中mdb文件目录‘dir’
3.修改服务器及数据库配置
4.执行脚本
代码如下:
# -*- encoding: utf-8 -*-import osimport reimport psycopg2import csv
#mdb文件目录dir = r’/home/kotaimen/mdb_file/’mdb_tbl_dic = {}
def make_create_sql(): if os.path.isfile(dir + ‘create.sql’): os.remove(dir + ‘create.sql’)
for mdb_file in os.walk(dir): if len(mdb_file[2]) >0: for file_p in mdb_file[2]: if file_p[-3:] == ‘mdb’: print file_p cmd = ‘mdb-schema %s >>/home/kotaimen/mdb_file/create.sql’ cmd = cmd % (dir + file_p) print cmd os.system(cmd) cmd = ‘mdb-tables -1 %s ‘ % (dir + file_p) val = os.popen(cmd).read() mdb_tbl_dic[file_p] = val.split(‘\n’) print mdb_tbl_dic
def modefy_create_sql(): sql_file_name = dir + ‘create.sql’ sql_file_name_des = sql_file_name + ‘_new’ fobj = open(sql_file_name, ‘r’) fobj_des = open(sql_file_name_des, ‘w’) for eachline in fobj: #判断表名中是否含有空格 if eachline.find(‘table ‘) >= 0: if eachline.find(‘;’) >= 0: start_loc = eachline.find(‘table ‘) + 6 end_loc = eachline.find(‘;’) tbl_name = eachline[start_loc:end_loc] eachline = eachline.replace(tbl_name, ‘”‘ + tbl_name + ‘”‘) else: start_loc = eachline.find(‘table ‘) + 6 end_loc = eachline.find(‘\n’) tbl_name = eachline[start_loc:end_loc] eachline = eachline.replace(tbl_name, ‘”‘ + tbl_name + ‘”‘)
if eachline.find(‘drop table’) >= 0 : eachline = eachline.replace(‘drop table’, ‘drop table if exists’) if eachline.find(‘table’) >= 0 : eachline = eachline.replace(‘table’, ‘”table”‘) #create 语句,最后一行没有逗号 if eachline.find(‘text ‘) >= 0 and eachline.find(‘,’) >0: loc = eachline.find(‘text ‘) eachline = eachline[0:loc] + ‘ text,\n’ elif eachline.find(‘text ‘) >= 0 and eachline.find(‘,’) < 0: loc = eachline.find('text ') eachline = eachline[0:loc] + ' text \n' fobj_des.writelines(eachline) fobj.close() fobj_des.close() os.remove(sql_file_name) os.rename(sql_file_name_des, sql_file_name)
def make_insert_csv(): for file_p in mdb_tbl_dic.keys(): for tbl in mdb_tbl_dic[file_p]: if len(tbl) >0: cmd = ‘mdb-export %s %s >%s.csv’ % (dir + file_p, ‘”‘ + tbl + ‘”‘, dir + ‘”‘ + tbl + ‘”‘)# tbl.replace(‘ ‘, ‘_’).replace(‘&’, ‘_’)) os.system(cmd)
def modefy_insert_csv(): for sql_file in os.walk(dir): if len(sql_file[2]) >0: for file_p in sql_file[2]: if file_p[-3:] == ‘csv’ : sql_file_name = dir + file_p sql_file_name_des = sql_file_name + ‘_new’ fobj = open(sql_file_name, ‘r’) fobj_des = open(sql_file_name_des, ‘w’) for (num, val) in enumerate(fobj): eachline = val if num == 0: col_list = eachline.split(‘,’) stat = ‘copy ‘ + ‘”‘ + (file_p[0:-4]) + ‘”‘ + ‘ (‘ #+ (‘%s,’*len(line))[:-1]+’)’ for col in col_list: if col == ‘table’: col = ‘”‘ + ‘table’ + ‘”‘ if col.find(‘\n’) >= 0: col.replace(‘\n’, ”) stat = stat + col + ‘,’ stat = stat[:-2] + ‘)’ + ‘ from stdin with csv ;\n’ eachline = stat
fobj_des.writelines(eachline) fobj.close() fobj_des.close() os.remove(sql_file_name) os.rename(sql_file_name_des, sql_file_name)
def insert_into_database():
cmd = ‘psql -h 172.26.11.205 -d ap_mapmyindia_full_sample -u postgres -f %s 2>>log.txt’ % (dir + ‘create.sql’) os.system(cmd)
for sql_file in os.walk(dir): if len(sql_file[2]) >0: for file_p in sql_file[2]: print file_p if file_p[-3:] == ‘csv’ : cmd = ‘psql -h 172.26.11.205 -d ap_mapmyindia_full_sample -u postgres -f %s 2>>log.txt’ % (dir + ‘”‘ + file_p + ‘”‘) os.system(cmd)
if __name__ == “__main__”: #1.制作mdb文件中所包含table的create脚本 make_create_sql() #2.修改掉create脚本中的不合法字符 modefy_create_sql() #3.将mdb中各表导出到csv文件中 make_insert_csv() #4.修改csv脚本首行,改成copy形式 modefy_insert_csv()
insert_into_database()