暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

python爬虫获取中国天气网天气数据 requests BeautifulSoup re

捷创源科技 2022-02-22
1566
点击上方蓝字 ● 关注捷创源科技



python获取中国天气网天气数据:http://www.weather.com.cn/textFC/henan.shtml


main.py

    # -*- coding: utf-8 -*-
    import requests
    from xpinyin import Pinyin
    from bs4 import BeautifulSoup
    import re


    """
    主函数
    """
    if __name__ == '__main__':
    # 中国天气网主页面
    urlMainPage = "http://www.weather.com.cn"


    strProvinceName = input("中国天气网可以查询天气预报,请输入一个省份的名称,例:河南、山东等:")
    # 实例拼音转换对象
    p = Pinyin()
    # 进行拼音转换(去掉分隔符-)
    province = p.get_pinyin(strProvinceName, '')
    # print(province)


    # 计算查询的xx省份天气子页面
    urlChildPage = urlMainPage + "/textFC/" + province + ".shtml"
    print(urlChildPage)


    # 用户代理
    dic = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/93.0.4577.82 Safari/537.36 Edg/93.0.961.52"}


    respChildPage = requests.get(urlChildPage, headers=dic) # 处理一个小小的反爬
    respChildPage.encoding = 'utf-8'
    # print(respChildPage.text) # 打印子页面源代码


    # 解析数据
    # 1.把页面源代码交给BeautifulSoup进行处理,生成bs对象
    bsPage = BeautifulSoup(respChildPage.text, "html.parser") # 指定html解析器
    # 2.从bs对象中查找数据
    # find(标签, 属性=值)
    # find_all(标签, 属性=值)
    hanml = bsPage.find("div", attrs={"class": "hanml"})
    # print(hanml)


    # ---------------------------------------------------------------
    # 一个星期7天的日期数据
    tableDates = hanml.find_all("div", attrs={"class": "conMidtab5"})
    listDate = []
    for tableDate in tableDates:
    # print(tableDate)
    tds = tableDate.find_all("td")
    # print(tds[2].text)
    listDate.append(re.findall(r'[(](.*?)[)]', tds[2].text)[0])
    # print(listDate)


    # ---------------------------------------------------------------
    tableDays = hanml.find_all("div", attrs={"class": "conMidtab"})


    for tableDay in tableDays: # 一周中的每一天天气数据
    print(listDate[tableDays.index(tableDay)])
    # print(tableDay)
    tableCitys = tableDay.find_all("div", attrs={"class": "conMidtab3"})
    # print(tableCitys)
    for tableCity in tableCitys: # xx省份的每一个市的天气数据
    # print(tableCity)
    listCityNames = tableCity.find_all("td", attrs={"class": "rowsPan"})
    for listCityName in listCityNames: # xx市的每一个区/县的天气数据
    print(listCityName.text) # 打印市名
    countys = tableCity.find_all("tr")
    # print(countys)


    tds = countys[0].find_all("td") # 一个市的所有区/县的天气数据
    """每个区/县的天气数据"""
    countyName = tds[1].find("a").text # 区/县名称
    weatherDay = tds[2].text # 白天天气现象


    wind = tds[3].find_all("span")
    windDirectionDay = wind[0].text + wind[1].text # 白天风向风力


    maxTemperature = tds[4].text # 白天最高气温
    weatherNight = tds[5].text # 夜间天气现象


    wind = tds[6].find_all("span")
    windDirectionNight = wind[0].text + wind[1].text # 夜间风向风力


    minTemperature = tds[7].text # 夜间最低气温
    # 打印数据
    print([countyName, weatherDay, windDirectionDay, maxTemperature,
    weatherNight, windDirectionNight, minTemperature])


    for county in countys[1:]:
    tds = county.find_all("td")
    countyName = tds[0].find("a").text # 区/县名称
    weatherDay = tds[1].text # 白天天气现象


    wind = tds[2].find_all("span")
    windDirectionDay = wind[0].text + wind[1].text # 白天风向风力


    maxTemperature = tds[3].text # 白天最高气温
    weatherNight = tds[4].text # 夜间天气现象


    wind = tds[5].find_all("span")
    windDirectionNight = wind[0].text + wind[1].text # 夜间风向风力


    minTemperature = tds[6].text # 夜间最低气温
    # 打印输出
    print([countyName, weatherDay, windDirectionDay, maxTemperature,
    weatherNight, windDirectionNight, minTemperature])


    respChildPage.close() # 关闭resp响应连接
    print("结束")

    运行测试:输入“河南"

    可以获取一个星期7天的天气数据

      http://www.weather.com.cn/textFC/henan.shtml
      925
      郑州
      ['郑州', '中雨', '东北风4-5级', '23', '小雨', '东北风3-4级', '19']
      ['巩义', '小雨', '东风4-5级', '21', '小雨', '东风4-5级', '19']
      ['荥阳', '中雨', '东北风4-5级', '22', '小雨', '东北风3-4级', '19']
      ['登封', '小雨', '东北风3-4级', '22', '小雨', '东北风<3级', '17']
      ['新密', '小雨', '东北风4-5级', '22', '小雨', '东北风3-4级', '18']
      ['新郑', '小雨', '北风4-5级', '22', '小雨', '北风3-4级', '19']
      ['中牟', '中雨', '北风4-5级', '22', '小雨', '东北风3-4级', '19']
      ['上街', '中雨', '东北风4-5级', '22', '小雨', '东北风3-4级', '19']
      ['中原', '中雨', '东北风4-5级', '23', '小雨', '东北风3-4级', '19']
      ['二七', '中雨', '东北风4-5级', '23', '小雨', '东北风3-4级', '19']
      ['管城', '中雨', '东北风4-5级', '23', '小雨', '东北风3-4级', '19']
      ['金水', '中雨', '东北风4-5级', '23', '小雨', '东北风3-4级', '19']
      ['惠济', '中雨', '东北风4-5级', '23', '小雨', '东北风3-4级', '19']
      安阳
      ['安阳', '小雨', '北风4-5级', '19', '大雨', '北风3-4级', '18']
      ['汤阴', '小雨', '北风4-5级', '20', '大雨', '北风3-4级', '18']
      ['滑县', '小雨', '北风4-5级', '20', '大雨', '北风4-5级', '18']
      ['内黄', '中雨', '北风3-4级', '20', '大雨', '北风3-4级', '18']
      ['林州', '中雨', '北风3-4级', '18', '大雨', '北风<3级', '17']
      ['文峰', '小雨', '北风4-5级', '19', '大雨', '北风3-4级', '18']
      ['北关', '小雨', '北风4-5级', '19', '大雨', '北风3-4级', '18']
      ['殷都', '小雨', '北风4-5级', '19', '大雨', '北风3-4级', '18']
      ['龙安', '小雨', '北风4-5级', '19', '大雨', '北风3-4级', '18']


      ......


      驻马店
      ['驻马店', '晴', '东南风<3级', '28', '晴', '南风<3级', '17']
      ['西平', '晴', '南风<3级', '28', '晴', '南风<3级', '17']
      ['遂平', '晴', '南风<3级', '28', '晴', '南风<3级', '16']
      ['上蔡', '晴', '南风<3级', '28', '晴', '南风<3级', '18']
      ['汝南', '晴', '东南风<3级', '28', '晴', '南风<3级', '17']
      ['泌阳', '晴', '东南风<3级', '28', '晴', '南风<3级', '16']
      ['平舆', '晴', '东南风<3级', '28', '晴', '南风<3级', '17']
      ['新蔡', '晴', '东南风<3级', '28', '晴', '南风<3级', '17']
      ['确山', '晴', '东南风<3级', '27', '晴', '西南风<3级', '17']
      ['正阳', '晴', '东南风<3级', '28', '晴', '南风<3级', '16']
      ['驿城', '晴', '东南风<3级', '28', '晴', '南风<3级', '17']
      三门峡
      ['三门峡', '晴', '东南风<3级', '28', '晴', '东风<3级', '16']
      ['灵宝', '晴', '南风3-4级', '28', '晴', '南风<3级', '16']
      ['渑池', '晴', '东风3-4级', '24', '晴', '东风<3级', '15']
      ['卢氏', '晴', '南风3-4级', '24', '晴', '西南风<3级', '14']
      ['义马', '晴', '东风3-4级', '24', '晴', '东风<3级', '15']
      ['湖滨', '晴', '东南风<3级', '28', '晴', '东风<3级', '16']
      ['陕州', '晴', '东南风<3级', '28', '晴', '东风<3级', '16']
      济源
      ['济源', '晴', '东风<3级', '26', '晴', '西北风<3级', '16']
      结束



      关注上面微信公众号“捷创源科技”,每天获取技术干货,让我们一起成长!

      文章转载自捷创源科技,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

      评论