并不是我一直在寻找的东西,但是我发现自己现在是一个不错的解决方案(如果我可以做得更好的话,我可以对其进行编辑)。我像在Google中一样搜索(仅返回URL)和Beautiful Soup软件包结合在一起来解析HTML页面:
from google import search
import urllib
from bs4 import BeautifulSoup
def google_scrape(url):
thepage = urllib.urlopen(url)
soup = BeautifulSoup(thepage, "html.parser")
return soup.title.text
i = 1
query = 'search this'
for url in search(query, stop=10):
a = google_scrape(url)
print str(i) + ". " + a
print url
print " "
i += 1
from google import search
import requests
for url in search(ip, stop=10):
r = requests.get(url)
title = everything_between(r.text, '<title>', '</title>')