| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- import os
- import glob
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from matplotlib.font_manager import FontProperties
- # ===================== 配置 =====================
- data_dir = r"/datasets/UF_longting_data/processed\segments"
- target_col = "cycle_long_r2"
- # ===================== 中文字体设置 =====================
- # 注意:这里使用 SimHei 字体,可显示中文
- font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12)
- # ===================== 读取所有 CSV =====================
- all_files = glob.glob(os.path.join(data_dir, "*.csv"))
- values = []
- for file in all_files:
- try:
- df = pd.read_csv(file)
- if target_col in df.columns:
- vals = df[target_col].dropna().values
- values.append(vals)
- except Exception as e:
- print(f"读取失败: {file}, 错误: {e}")
- # 合并所有数据
- if len(values) == 0:
- raise ValueError("未在任何 CSV 中找到有效的 cycle_long_R2 数据")
- data = np.concatenate(values)
- total_count = len(data)
- # ===================== 定义区间 =====================
- bins = [
- -np.inf,
- 0.0,
- 0.5,
- 0.6,
- 0.7,
- 0.8,
- 0.9,
- 1.0
- ]
- labels = [
- "<0",
- "0 – 0.5",
- "0.5 – 0.6",
- "0.6 – 0.7",
- "0.7 – 0.8",
- "0.8 – 0.9",
- "0.9 – 1.0"
- ]
- # ===================== 统计分布 =====================
- counts = pd.cut(
- data,
- bins=bins,
- labels=labels,
- right=True,
- include_lowest=True
- ).value_counts().sort_index()
- ratios = counts / total_count * 100
- # ===================== 输出结果 =====================
- result = pd.DataFrame({
- "样本数": counts,
- "占比 (%)": ratios.round(2)
- })
- print(f"\n总样本数: {total_count}\n")
- print(result)
- # ===================== 绘制柱状图 =====================
- plt.figure(figsize=(10, 6))
- plt.bar(labels, ratios, color='skyblue', edgecolor='black')
- plt.title("cycle_long_R2 数据分布柱状图", fontproperties=font)
- plt.xlabel("区间", fontproperties=font)
- plt.ylabel("占比 (%)", fontproperties=font)
- plt.ylim(0, 100)
- plt.grid(axis='y', linestyle='--', alpha=0.7)
- # 在柱子上显示百分比
- for i, v in enumerate(ratios):
- plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10, fontproperties=font)
- plt.tight_layout()
- plt.show()
|