您好, 欢迎来到 !    登录 | 注册 | | 设为首页 | 收藏本站

python爬虫 双色球数据更新

5b51 2022/1/14 8:23:21 python 字数 11347 阅读 627 来源 www.jb51.cc/python

python爬虫双色球数据更新解析网站并获取数据#彩票数据所在的urlurl='http://datachart.500.com/ssq/'#提取数据response=requests.get(url,headers={"User-Agent":UserAgent().chrome})#通过xpath去解析e=etree.HTML(response.text)date_times=e.xpa

概述

Python爬虫 双色球数据更新

解析网站并获取数据

# 彩票数据所在的url
url = 'http://datachart.500.com/ssq/'
# 提取数据
response = requests.get(url, headers={
  "User-Agent": UserAgent().chrome})
# 通过xpath去解析
e = etree.HTML(response.text)
date_times = e.xpath('//tbody[@id="tdata"]/tr/td[1]/text()')
trs = e.xpath('//tbody[@id="tdata"]/tr[not(@class)]')

链接数据库

# 链接数据库
client = pyMysqL.connect(host='localhost', port=3306, user='root', password='123456', charset='utf8', db='bangumi')
cursor = client.cursor()

普通获取数据

# 插入数据的sql
sql = 'insert into doubleballs values(0,%s,%s,%s)'
for data_time, tr in zip(date_times, trs):
    red_ball = '-'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
    blue_ball = tr.xpath('./td[@class="chartBall02"]/text()')[0]
    print("第" + data_time + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    cursor.execute(sql, [data_time, red_ball, blue_ball])
    client.commit()

更新数据这部分从数据库获取数据,然后反转顺序,index作为计数器,循环遍历假如有新数据要更新,那result返回值为0,index+1。

# 查看数据是否存在
select_new_sql = "select * from doubleballs where date_time = %s"
date_times.reverse()
# 记录有多少条新数据
index = 0
for data_time in date_times:
    result = cursor.execute(select_new_sql, [data_time])
    # 判断数据是否存在
    if result == 1:
        break
    index+=1

数据顺序反转,按照网站上xpath写法获取第1个数据,即最新的数据放到数据库中。

# 数据从新到旧排序
trs.reverse()
for i in range(index):
    # 提取红球
    red_ball = '-'.join(trs[i].xpath('./td[@class="chartBall01"]/text()'))
    # 提取蓝球
    blue_ball = trs[i].xpath('./td[@class="chartBall02"]/text()')[0]
    print("第" + date_times[i] + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    cursor.execute(sql, [date_times[i], red_ball, blue_ball])
    client.commit()

完整代码

import requests
from fake_useragent import UserAgent
from lxml import html
import pyMysqL

etree = html.etree
# 彩票数据所在的url
url = 'http://datachart.500.com/ssq/'
# 提取数据
response = requests.get(url, headers={
  "User-Agent": UserAgent().chrome})
# 通过xpath去解析
e = etree.HTML(response.text)
date_times = e.xpath('//tbody[@id="tdata"]/tr/td[1]/text()')
trs = e.xpath('//tbody[@id="tdata"]/tr[not(@class)]')
# 链接数据库
client = pyMysqL.connect(host='localhost', port=3306, user='root', password='123456', charset='utf8', db='bangumi')
cursor = client.cursor()
# 插入数据的sql
sql = 'insert into doubleballs values(0,%s,%s,%s)'
# 查看数据是否存在
select_new_sql = "select * from doubleballs where date_time = %s"
date_times.reverse()
# 记录有多少条新数据
index = 0
for data_time in date_times:
    reslut = cursor.execute(select_new_sql, [data_time])
    # 判断数据是否存在
    if reslut == 1:
        break
    index += 1
# 数据从新到旧排序
trs.reverse()
for i in range(index):
    # 提取红球
    red_ball = '-'.join(trs[i].xpath('./td[@class="chartBall01"]/text()'))
    # 提取蓝球
    blue_ball = trs[i].xpath('./td[@class="chartBall02"]/text()')[0]
    print("第" + date_times[i] + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    cursor.execute(sql, [date_times[i], red_ball, blue_ball])
    client.commit()
# for data_time, tr in zip(date_times, trs):
#     red_ball = '-'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
#     blue_ball = tr.xpath('./td[@class="chartBall02"]/text()')[0]
#     print("第" + data_time + "红球是:" + red_ball + " 蓝球:" + blue_ball)
#     cursor.execute(sql, [data_time, red_ball, blue_ball])
#     client.commit()
cursor.close()
client.close()

这样排序会让更新之后最新的在数据最后,但是一开始排序的时候不会出现问题,是从新到旧的排序。于是因为有点强迫症,最后还是改成从旧到新排序。只需要改动几行代码即可,反转数据在提取红球蓝球数据前,保证数据最新,判断时每次存在就让index+1,之后加入index = count - index其中count = data_time.__len__()。接着将循环中的i变成index-i-1。最终代码如下,

import requests
from fake_useragent import UserAgent
from lxml import html
import pyMysqL

etree = html.etree
# 彩票数据所在的url
url = 'http://datachart.500.com/ssq/'
# 提取数据
response = requests.get(url, headers={
  "User-Agent": UserAgent().chrome})
# 通过xpath去解析
e = etree.HTML(response.text)
date_times = e.xpath('//tbody[@id="tdata"]/tr/td[1]/text()')
trs = e.xpath('//tbody[@id="tdata"]/tr[not(@class)]')
count =date_times.__len__()
# 链接数据库
client = pyMysqL.connect(host='localhost', port=3306, user='root', password='123456', charset='utf8', db='bangumi')
cursor = client.cursor()
# 插入数据的sql
sql = 'insert into doubleballs values(0,%s,%s,%s)'
# 查看数据是否存在
select_new_sql = "select * from doubleballs where date_time = %s"
# date_times.reverse()
# 记录有多少条新数据
index = 0
for data_time in date_times:
    reslut = cursor.execute(select_new_sql, [data_time])
    # 判断数据是否存在
    if reslut == 1:
        index += 1
index = count - index
# 数据从新到旧排序
# trs.reverse()
date_times.reverse()
trs.reverse()
for i in range(index):
    # 提取红球
    red_ball = '-'.join(trs[index-i-1].xpath('./td[@class="chartBall01"]/text()'))
    # 提取蓝球
    blue_ball = trs[index-i-1].xpath('./td[@class="chartBall02"]/text()')[0]
    print("第" + date_times[index-i-1] + "红球是:" + red_ball + " 蓝球:" + blue_ball)
    cursor.execute(sql, [date_times[index-i-1], red_ball, blue_ball])
    client.commit()
# for data_time, tr in zip(date_times, trs):
#     red_ball = '-'.join(tr.xpath('./td[@class="chartBall01"]/text()'))
#     blue_ball = tr.xpath('./td[@class="chartBall02"]/text()')[0]
#     print("第" + data_time + "红球是:" + red_ball + " 蓝球:" + blue_ball)
#     cursor.execute(sql, [data_time, red_ball, blue_ball])
#     client.commit()
cursor.close()
client.close()

总结

以上是编程之家为你收集整理的python爬虫 双色球数据更新全部内容,希望文章能够帮你解决python爬虫 双色球数据更新所遇到的程序开发问题。


如果您也喜欢它,动动您的小指点个赞吧

除非注明,文章均由 laddyq.com 整理发布,欢迎转载。

转载请注明:
链接:http://laddyq.com
来源:laddyq.com
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。


联系我
置顶