代码如下:
import socketimport re
”’广东省公安厅出入境政务服务网护照,通行证办理进度查询。分析网址格式为 http://www.gdcrj.com/wsyw/tcustomer/tcustomer.do?&method=find&apply”def gethtmlbyidentityid(identityid): s = socket.socket(socket.af_inet, socket.sock_stream) host = ‘www.gdcrj.com’; suburl = ‘/wsyw/tcustomer/tcustomer.do?&method=find&apply port = 80;
remote_ip = socket.gethostbyname(host) s.connect((remote_ip , port))
print(‘【info】:socket连接成功’)
message = ‘get ‘+ suburl.format(identityid) +’ http/1.1\r\nhost: ‘+ host +’\r\n\r\n’
# str 2 bytes m_bytes = message.encode(‘utf-8’)
# send bytes s.sendall(m_bytes)
print(‘【info】:远程下载中…’)
recevstr = ” while true: # return bytes recev = s.recv(4096) # bytes 2 str recevstr += recev.decode(encoding = ‘utf-8’, errors = ‘ignore’) if not recev: s.close() print(‘【info】:远程下载网页完成’) break return recevstr
”’利用正则表达式从上步获取的网页html内容里找出查询结果”’def getresultfromhtml(htmlstr): linebreaks = re.compile(r’\n\s*’) space = re.compile(‘( )+’) resultreg = re.compile(r’\([^