您可以使用lxml
解析器和xpath表达式来获取所需的内容。例如,要提取title
YouTube视频,
import lxml
from lxml import etree
youtube = etree.HTML(urllib.urlopen("http://www.youtube.com/watch?v=KQEOBZLx-Z8").read()) //enter your youtube url here
video_title = youtube.xpath("//span[@id='eow-title']/@title") //get xpath using firepath firefox addon
print ''.join(video_title)
“圣诞节的12天-圣诞颂歌”