python实现提取谷歌音乐搜索结果的方法

本文实例讲述了python实现提取谷歌音乐搜索结果的方法。分享给大家供大家参考。具体如下:

python的简单脚本,用于提取谷歌音乐搜索页面中的歌曲信息,包括歌曲名,作者,专辑名,现在链接等,最多只提取10页结果。

#! /usr/bin/env python
#coding=utf-8
”’
created on 2011-8-19
@author: yaoboyuan
”’
from urllib import request,parse
import re,sys
def extractsongrawdata(text):
‘抓取每一首歌的原始数据’
text = re.sub(‘\n+’,”,text)
songlist = re.findall(‘\0:
return td[0]
else:
return song
def extractlink(song):
‘提取歌曲下载链接’
td = re.findall(”’\”’,song)
if len(td) == 0:
return ‘null’
s = str(td[0])
rawlink = re.findall(‘http.*?(?=\?)’,s)
if len(rawlink) == 0:
return s
link = rawlink[0]
link = re.sub(‘%3d’,’=’,link)
id = extractid(song)
return link + ‘?id=’ + id
def extractpagenums(text):
‘提取返回结果的页数,最多要10页’
pagelist = re.findall(‘page_link’,text)
return len(pagelist)
def extractsonginfo(song):
‘提取歌曲信息,返回歌曲列表’
songlist = []
for i in range(len(song)):
songname = extractsongname(song[i])
authorname = extractauthorname(song[i])
albumname = extrackalbumname(song[i])
link = extractlink(song[i])
songitem = [songname,authorname,albumname,link]
songlist.append(songitem)
index = ”
if i0:
for i in range(num):
start = (i+1)*20
next_page = ‘&cat=song&start=%d’%(start)
#next_page = parse.quote(next_page) #统一编码成utf-8
url += next_page
mf = request.urlopen(url)
c = mf.readall()
c = str(c,encoding = ‘utf-8’)
song = extractsongrawdata(c)
songlist += extractsonginfo(song) #find all results
for i in range(len(songlist)): #print the result
index = ”
if i

Posted in 未分类

发表评论