python去除html标签

python去除html标签,自己写的,若有不足请指正:

#! /usr/bin/env python
#coding=utf-8
# blueel 2013-01-19
from htmlparser import htmlparser
class mlstripper(htmlparser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ”.join(self.fed)
def strip_tags(html):
s = mlstripper()
s.feed(html)
return s.get_data()

调用:

html = ‘ou 12x de r$ 116,58 sem juros’

print strip_tags(html)

Posted in 未分类

发表评论