python获取糗百图片代码实例

代码如下:

from sgmllib import sgmlparserimport urllib2

class sgm(sgmlparser): def reset(self): sgmlparser.reset(self) self.srcs=[] self.istrue=true

def start_p(self,artts): for k,v in artts: if v==”author”: self.istrue=false def end_p(self): self.istrue=true def start_img(self,artts): for k,v in artts: if k==”src” and self.istrue==true: self.srcs.append(v)

def download(self): for src in self.srcs: f=open(src[-12:],”wb”) print src img=urllib2.urlopen(src) f.write(img.read()) f.close()sgm=sgm()for page in range(1,500): url=”http://www.qiushibaike.com/late/page/%s?s=4622726″ % page data=urllib2.urlopen(url).read() sgm.feed(data) sgm.download()

Posted in 未分类

发表评论