python爬虫

抓取超级课程表话题数据。

#!/usr/local/bin/python2.7
# -*- coding: utf8 -*-
“””
超级课程表话题抓取
“””
import urllib2
from cookielib import cookiejar
import json
”’ 读json数据 ”’
def fetch_data(json_data):
data = json_data[‘data’]
timestamplong = data[‘timestamplong’]
messagebo = data[‘messagebos’]
topiclist = []
for each in messagebo:
topicdict = {}
if each.get(‘content’, false):
topicdict[‘content’] = each[‘content’]
topicdict[‘schoolname’] = each[‘schoolname’]
topicdict[‘messageid’] = each[‘messageid’]
topicdict[‘gender’] = each[‘studentbo’][‘gender’]
topicdict[‘time’] = each[‘issuetime’]
print each[‘schoolname’],each[‘content’]
topiclist.append(topicdict)
return timestamplong, topiclist
”’ 加载更多 ”’
def load(timestamp, headers, url):
headers[‘content-length’] = ‘159’
loaddata = ‘timestamp=%s&phonebrand=meizu&platform=1&gendertype=-1&topic % timestamp
req = urllib2.request(url, loaddata, headers)
loadresult = opener.open(req).read()
loginstatus = json.loads(loadresult).get(‘status’, false)
if loginstatus == 1:
print ‘load successful!’
timestamp, topiclist = fetch_data(json.loads(loadresult))
load(timestamp, headers, url)
else:
print ‘load fail’
print loadresult
return false
loginurl = ‘http://120.55.151.61/v2/studentskip/logincheckv4.action’
topicurl = ‘http://120.55.151.61/v2/treehole/message/getmessagebytopicidv3.action’
headers = {
‘content-type’: ‘application/x-www-form-urlencoded; charset=utf-8’,
‘user-agent’: ‘dalvik/1.6.0 (linux; u; android 4.1.1; m040 build/jro03h)’,
‘host’: ‘120.55.151.61’,
‘connection’: ‘keep-alive’,
‘accept-encoding’: ‘gzip’,
‘content-length’: ‘207’,
}
”’ —登录部分— ”’
logindata = ‘phonebrand=meizu&platform=1&devicecode=868033014919494&account=fcf030e1f2f6341c1c93be5bbc422a3d&phoneversion=16&password=a55b48bb75c79200379d82a18c5f47d6&channel=mxmarket&phonemodel=m040&versionnumber=7.2.1&’
cookiejar = cookiejar()
opener = urllib2.build_opener(urllib2.httpcookieprocessor(cookiejar))
req = urllib2.request(loginurl, logindata, headers)
loginresult = opener.open(req).read()
loginstatus = json.loads(loginresult).get(‘data’, false)
if loginresult:
print ‘login successful!’
else:
print ‘login fail’
print loginresult
”’ —获取话题— ”’
topicdata = ‘timestamp=0&phonebrand=meizu&platform=1&gendertype=-1&topic
headers[‘content-length’] = ‘147’
topicrequest = urllib2.request(topicurl, topicdata, headers)
topichtml = opener.open(topicrequest).read()
topicjson = json.loads(topichtml)
topicstatus = topicjson.get(‘status’, false)
print topicjson
if topicstatus == 1:
print ‘fetch topic success!’
timestamp, topiclist = fetch_data(topicjson)
data = load(timestamp, headers, topicurl)
if data:
timestamp, topiclist = fetch_data(data)

Posted in 未分类

发表评论