采集内容常需要得到网页返回的验证码做进一步处理
下面代码是用python写的用来获取网页http状态码的脚本
#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#filename:states_code.py
import urllib2
url = ‘http://www.bitscn.com/’
response = none
try:
response = urllib2.urlopen(url,timeout=5)
except urllib2.urlerror as e:
if hasattr(e, ‘code’):
print ‘error code:’,e.code
elif hasattr(e, ‘reason’):
print ‘reason:’,e.reason
finally:
if response:
response.close()