3D立体图:使用ax.plot_trisurf和ax.grid方法绘制月最高温和最低温3D曲面图。(说明,这里本来要做抓取12个月 每个月份生成一张对应的可视化图,再将这些可视化图合作一个GIF动态图,达到类似轮播图的效果。直到我刚好看见一个动态图,有了新的想法。。。)
import requests from lxml import etree import csv import numpy as np from collections import defaultdict from datetime import datetime import matplotlib.pyplot as plt from matplotlib import font_manager from matplotlib import rcParams from mpl_toolkits.mplot3d import Axes3D 用于创建3D图形 import imageio.v2 as imageio from IPython.display import Image, display from wordcloud import WordCloud from PIL import Image # 设置字体为支持减号的字体,例如"SimHei" rcParams['font.sans-serif'] = ['SimHei'] font = font_manager.FontProperties(fname="C:/Windows/Fonts/simsun.ttc", size=14) # 请将路径替换为你的字体文件路径 plt.rcParams['axes.unicode_minus'] = False # 解决保存图像时负号'-'显示为方块的问题
def getWeather(url): # 请求头信息:浏览器版本型号,接收数据的编码格式 headers = { # 必填,不填拿不到数据 # 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36' } # 请求 接收到了响应数据 resp = requests.get(url, headers=headers) # 使用lxml解析HTML tree = lxml.html.fromstring(resp.text) resp_list = tree.xpath("//ul[@class='thrui']/li") # for循环迭代遍历 day_weather_info = [] for li in resp_list: dates = li.xpath('.//div[@class="th200"]/text()') # 提取日期 max_temperatures = li.xpath('.//div[@class="th140"]/text()') # 提取最高气温 min_temperatures = li.xpath('.//div[@class="th140"][2]/text()') # 提取最低气温 weather_conditions = li.xpath('.//div[@class="th140"][3]/text()') # 提取天气情况 wind_directions = li.xpath('.//div[@class="th140"][4]/text()')[0].split(' ')[0] # 提取风向 wind_speeds = li.xpath('.//div[@class="th140"][4]/text()')[0].split(' ')[1] # 提取风速 # 将提取的信息存储到day_weather_info列表中 day_weather_info.append({ "日期": dates[0], "最高气温": max_temperatures[0], "最低气温": min_temperatures[0], "天气情况": weather_conditions[0], "风向": wind_directions, "风速": wind_speeds }) return day_weather_info
weathers = [getWeather(f'https://lishi.tianqi.com/dongguan/2023{str(month).zfill(2)}.html') for month in range(1, 13)] print(weathers)
# 数据写入(一次性写入) with open("test01_weather.csv", "w",newline='') as csvfile: writer = csv.writer(csvfile) # 先写入列名:columns_name 日期 最高气温 最低气温 天气 writer.writerow(["日期", "最高气温", "最低气温", '天气',"风向","风速"]) # 一次写入多行用writerows(写入的数据类型是列表,一个列表对应一行) writer.writerows([list(day_weather_dict.values()) for month_weather in weathers for day_weather_dict in month_weather]) print("写入成功!")
# 读取数据 with open("weather.csv", "r", newline='') as csvfile: # with open("weather_test.csv", "r", newline='') as csvfile: reader = csv.reader(csvfile) # 跳过列名 # next(reader) # 逐行读取数据 read = [] # for row in reader: # if row: # 检查行是否为空 # read.append(row) read = [ row for row in reader # 检查行是否为空 if any(cell.strip() for cell in row)] print("读取成功!") print(read)
# 数据标准化处理 original_data = read # 初始化一个空列表来存储处理后的数据 formatted_data = [] # 遍历原始数据 for item in original_data[1:]: # 从第二行开始,因为第一行是表头 formatted_item = { '日期时间': item[0], '最高温度': item[1], '最低温度': item[2], '天气状况': item[3] } formatted_data.append(formatted_item) # 打印处理后的数据 print(formatted_data[0])
def show(date_time,high_temp,low_temp,weather): # 绘制折线图 plt.plot(date_time, high_temp, label='最高温度') plt.plot(date_time, low_temp, label='最低温度') plt.xlabel('日期时间') plt.ylabel('温度') plt.title(str(month)+'月最高温度和最低温度折线图') plt.legend() plt.savefig('./show/plot_'+str(month)+'.jpg') # 将图片保存 plt.show() # print('plot_'+str(month)+'.jpg保存成功!') # 使用scatter()函数绘制散点图 plt.scatter(date_time, high_temp, label='最高温度') plt.scatter(date_time, low_temp, label='最低温度') # 添加颜色条 cbar = plt.colorbar() cbar.set_label('Color Intensity') # 设置标题和标签 plt.title(str(month)+'月最高温度和最低温度散点图') # 设置x轴和y轴的标签 plt.xlabel('日期时间') plt.ylabel('温度') # 添加图例 plt.legend() plt.savefig('./show/scatter_'+str(month)+'.jpg') # 将图片保存 # 显示图形 plt.show() # print('scatter_'+str(month)+'.jpg保存成功!') # 绘制饼状图 # 提取数据 weather = [item['天气状况'] for item in monthly_data[month]] # 计算每个天气状况的数量 weather_counts = {weather_type: weather.count(weather_type) for weather_type in set(weather)} plt.pie(weather_counts.values(), labels=weather_counts.keys(), autopct='%1.1f%%') plt.title(str(month)+'月天气状况分布饼状图') plt.savefig('./show/pie_'+str(month)+'.jpg') # 将图片保存 plt.show() # print('pie_'+str(month)+'.jpg保存成功!') # 绘制热力图 # 整合最高温度和最低温度数据 temp_range = np.array([high - low for high, low in zip(high_temp, low_temp)]) # 创建一个新的图形 plt.figure() plt.imshow(temp_range.reshape(-1, 1), cmap='hot', aspect='auto') # 设置x轴和y轴的标签 plt.xlabel('日期时间') plt.ylabel('温度范围') # 添加颜色条 plt.colorbar(label='温度差') # 添加标题 plt.title(str(month)+'月最高温度和最低温度热力图') plt.savefig('./show/hot_'+str(month)+'.jpg') # 将图片保存 # 显示图形 plt.show() # print('hot_'+str(month)+'.jpg保存成功!') #3D曲面图 # 创建一个新的图形 fig = plt.figure(figsize=(8,5),dpi=100) ax = fig.add_subplot(111, projection='3d') # 提取数据 X = [item['日期时间'][-2:] for item in monthly_data[month]] Y = [int(item['最高温度']) for item in monthly_data[month]] Z = [int(item['最低温度']) for item in monthly_data[month]] # 绘制带有网格线的曲面图 ax.plot_trisurf(X, Y, Z, cmap='viridis', edgecolor='none') ax.grid(True) plt.title(str(month)+'月最高温度和最低温度3D曲面图') # 设置x轴和y轴的标签 ax.set_xlabel('日期时间') ax.set_ylabel('最高温度') ax.set_zlabel('最低温度') plt.savefig('./show/3D_'+str(month)+'.jpg') # 将图片保存 # 显示图形 plt.show() # print('3D_'+str(month)+'.jpg保存成功!')
# 初始化 monthly_data 字典 monthly_data = defaultdict(list) # 遍历 formatted_data,将日期时间字符串转换为月份并添加到 monthly_data 字典中 for item in formatted_data: month = datetime.strptime(item['日期时间'], '%Y-%m-%d').month if 1 <= month <= 12: monthly_data[month].append(item) # 提取数据 for month in range(1, 12): date_time = [item['日期时间'][-2:] for item in monthly_data[month]] high_temp = [int(item['最高温度']) for item in monthly_data[month]] low_temp = [int(item['最低温度']) for item in monthly_data[month]] weather = [item['天气状况'] for item in monthly_data[month]] # 调用 show() 函数绘制折线图、散点图、饼状图和热力图、3D曲面图 show(date_time, high_temp, low_temp, weather)
import numpy as np from PIL import Image from wordcloud import WordCloud import matplotlib.pyplot as plt # 制作静态词云图方法 def generate_wordcloud(image_path, font_path, weather_data, save_path=None): # 读取字图片文件,生成轮廓 mask_array = np.array(Image.open(image_path)) # 从formatted_data变量中提取天气状况数据,将其转换为字符串 weather = weather_data weather_str = ''.join(weather) # 生成词云图,并显示出来 wordcloud = WordCloud(background_color='white', font_path=font_path, mask=mask_array, max_words=len(weather_str)).generate(weather_str) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') # 如果提供了保存路径,则保存词云图 if save_path: plt.savefig(save_path) print("词云图已保存至"+save_path) # else: # plt.show()
# 示例用法 # 生成静态词云图 image_path = "./sucai/basketball.jpg" font_path = 'simhei.ttf' weather_data = [item['天气状况'] for item in formatted_data]*2 # 调用函数时提供保存路径,将保存词云图为指定路径的文件 save_path = "./sucai/basketball2.jpg" generate_wordcloud(image_path, font_path, weather_data, save_path) display(Image.open(image_path).resize((100, 100)))
# 把GIF逐帧分解保存 import cv2 def process_gif_to_jpg(gif_path, output_folder): # 读取GIF图像 gif = cv2.VideoCapture(gif_path) # 检查是否成功打开GIF文件 if not gif.isOpened(): print("无法打开GIF文件") return # 获取GIF的帧数 frame_count = int(gif.get(cv2.CAP_PROP_FRAME_COUNT)) # 遍历每一帧 for i in range(frame_count): # 读取当前帧 ret, frame = gif.read() # 检查是否成功读取帧 if not ret: print("无法读取帧") break # 构建输出文件名 output_file = f"{output_folder}/frame_{i}.jpg" print(output_file) # 保存当前帧为jpg图 cv2.imwrite(output_file, frame) # 释放GIF资源 gif.release() # 使用示例 gif_path = r'./sucai/basketball.gif' output_folder = r'./sucai/tmp' process_gif_to_jpg(gif_path, output_folder)
# 生成动态词云图 import imageio from IPython.display import Image, display from imageio import imread, mimsave def create_gif(image_files, output_file, fps=10): images = [] for image_file in image_files: images.append(imread(image_file)) mimsave(output_file, images, fps=fps) display(Image(filename=output_file)) # 使用函数的方式创建GIF images_path = ['./sucai/tmp2/jitu_{}.jpg'.format(i) for i in range(160)] output_path='./sucai/dance.gif' create_gif(images_path, output_path)
# 打开文件,准备获取入近6年天气数据 with open("6years_weather.csv", "w", newline='') as csvfile: writer = csv.writer(csvfile) # 先写入列名:columns_name 日期 最高气温 最低气温 天气 writer.writerow(["日期", "最高气温", "最低气温", '天气']) # for循环生成 for year in range(2018, 2024): yearly_weather = [] # 创建一个新的列表来存储每年的天气数据 for month in range(1, 13): # 获取某一月的天气信息 # 三元表达式 weather_time = str(year) + ('0' + str(month) if month < 10 else str(month)) print(weather_time) url = f'https://lishi.tianqi.com/dongguan/{weather_time}.html' # 爬虫获取这个月的天气信息 weather = getWeather(url) # 将这个月的天气数据添加到年度天气数据列表中 yearly_weather.extend([list(day_weather_dict.values()) for day_weather_dict in weather]) # 一次写入一年的天气数据 writer.writerows(yearly_weather) print(str(year)+"年数据存入完成。") # 清空年度天气数据列表,以便于存储下一年的数据 yearly_weather.clear() print("6年数据写入成功!")
# 读取数据 with open("6years_weather.csv", "r", newline='') as csvfile: reader = csv.reader(csvfile) read = [ row for row in reader # 检查行是否为空 if any(cell.strip() for cell in row)] print("6年数据读取成功!") print(read)
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn import metrics import matplotlib.pyplot as plt # 嵌套列表 data = read # 转换为DataFrame df = pd.DataFrame(data[1:], columns=data[0]) # 划分数据集 X = df[['最高气温']].astype(float) # 特征变量 y = df['最低气温'].astype(float) # 目标变量 # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) # 训练模型 model = LinearRegression() # 创建线性回归模型 model.fit(X_train, y_train) # 使用训练集训练模型 # 预测 y_pred = model.predict(X_test) # 使用模型对测试集进行预测 # 绘制散点图和回归线 plt.scatter(X_test, y_test, color='blue') plt.plot(X_test, y_pred, color='red') plt.title('最高气温与最低气温的线性回归模型') plt.xlabel('最高气温') plt.ylabel('最低气温') plt.show()