本文实例讲述了python实现从订阅源下载图片的方法。分享给大家供大家参考。具体如下:
这段代码是基于python 3.4实现的,和python2.x 比起来有了好多差别啊。
这是一个练习,数据源来自网易订阅。代码如下:
代码如下:
__author__ = ‘saint’
import os
import urllib.request
import json
from html.parser import htmlparser
# 从获取的网页内容筛选图片的内容
class myhtmlparser(htmlparser):
links = []
def handle_starttag(self, tag, attrs):
if tag == “img”:
if len(attrs) == 0:
pass
else:
for name, value in attrs:
if name == “src”:
self.links.append(value)
class down(object):
# 总的目录
img_path = “e:/saint”
# 下载目录
dir = ”
# 采集源地址
collect_links = [“http://dy.163.com/v2/media/articlelist/t1374483113516-1”, “http://dy.163.com/v2/media/articlelist/t1420776257254-1”, “http://dy.163.com/v2/media/articlelist/t1376641060407-1”]
img_links = “http://dy.163.com/v2/article”
def handlecollect(self):
for collect_link in self.collect_links:
notice = “开始从[” + collect_link + “]采集图片”
print(notice)
# 建立下载的目录
dir_name = collect_link.split(“/”)[-1]
self.isdirexists(dir_name)
dict = self.getlistfromsubscribe(collect_link)
if dict == false:
print(“数据采集失败,是否继续(y/n)”)
op = input();
if op == “y”:
os.system(“cls”)
pass
elif op == “n”:
print(“停止采集”)
break
else:
os.system(“cls”)
print(“非法输入”)
break
else:
for page in dict:
page_uri = self.img_links + “/” + page[“tid”] + “/” + page[“docid”]
self.getimgfromuri(page_uri)
print(“是否继续(y/n)”)
new_op = input();
if new_op == “n”:
os.system(“cls”)
print(“采集完毕”)
break
print(“ok”)
# 从订阅源获取目录
def getlistfromsubscribe(self, uri):
res = urllib.request.urlopen(uri)
if res.code < 200 or res.code > 300:
os.system(“clear”)
return false
else:
result = res.read().decode(“gbk”) # 3.4版本的read()返回的是byte类型,需要decode()处理,选项是网页编码
dict = json.loads(result)
if dict[‘code’] != 1:
print(dict[‘msg’])
return false
else:
return dict[‘data’]
# 获取本期订阅的网页,并从网页中提取出来需要的图片
def getimgfromuri(self, uri):
html_code = urllib.request.urlopen(uri).read().decode(“gbk”)
hp = myhtmlparser()
hp.feed(html_code)
hp.close()
for link in hp.links: # hp.links 是图片的下载地址的列表
self.writetodisk(link)
# 检查文件目录是否存在,如果不存在,则创建目录
def isdirexists(self, dir_name):
self.dir = self.img_path + dir_name
isexists = os.path.exists(self.dir)
if not isexists:
os.makedirs(self.dir)
return true
else:
return true
# 下载文件,并且写入磁盘
def writetodisk(self, url):
os.chdir(self.dir)
file = urllib.request.urlopen(url).read()
file_name = url.split(“/”)[-1]
open(file_name, “wb”).write(file)
return true
if __name__ == “__main__”:
down = down()
down.handlecollect()
希望本文所述对大家的python程序设计有所帮助。