这不是要与之交互的琐碎页面,需要使用“ 显式等待”来等待“加载”指示器的隐形。
这是可以用作起点的完整且可行的实现:
# -*- coding: utf-8 -*-
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import webdriverwait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
import time
url = "http://data.eastmoney.com/xg/xg/"
driver = webdriver.PhantomJS()
driver.get(url)
def get_table_results(driver):
for row in driver.find_elements_by_css_selector("table#dt_1 tr[class]"):
print [cell.text for cell in row.find_elements_by_tag_name("td")]
# initial wait for results
webdriverwait(driver, 10).until(EC.invisibility_of_element_located((By.XPATH, u"//th[. = '加载中......']")))
while True:
# print current page number
page_number = driver.find_element_by_id("gopage").get_attribute("value")
print "Page #" + page_number
get_table_results(driver)
next_link = driver.find_element_by_link_text("下一页")
if "nolink" in next_link.get_attribute("class"):
break
next_link.click()
time.sleep(2) # TODO: fix?
# wait for results to load
webdriverwait(driver, 10).until(EC.invisibility_of_element_located((By.XPATH, u"//img[contains(@src, 'loading')]")))
print "------"
想法是要有一个无限循环,只有当“下一页”链接被禁用(没有更多可用页面)时,我们才会退出。在每次迭代中,获取表结果(为示例起见,在控制台上打印),单击下一个链接,然后等待出现在网格顶部的“正在加载”旋转圆的隐形性。