|
|
@@ -1,93 +0,0 @@
|
|
|
-import os
|
|
|
-import glob
|
|
|
-import pandas as pd
|
|
|
-import numpy as np
|
|
|
-import matplotlib.pyplot as plt
|
|
|
-from matplotlib.font_manager import FontProperties
|
|
|
-
|
|
|
-# ===================== 配置 =====================
|
|
|
-data_dir = r"E:\Greentech\models\uf-rl\datasets\processed\segments"
|
|
|
-target_col = "cycle_long_r2"
|
|
|
-
|
|
|
-# ===================== 中文字体设置 =====================
|
|
|
-# 注意:这里使用 SimHei 字体,可显示中文
|
|
|
-font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12)
|
|
|
-
|
|
|
-# ===================== 读取所有 CSV =====================
|
|
|
-all_files = glob.glob(os.path.join(data_dir, "*.csv"))
|
|
|
-
|
|
|
-values = []
|
|
|
-
|
|
|
-for file in all_files:
|
|
|
- try:
|
|
|
- df = pd.read_csv(file)
|
|
|
- if target_col in df.columns:
|
|
|
- vals = df[target_col].dropna().values
|
|
|
- values.append(vals)
|
|
|
- except Exception as e:
|
|
|
- print(f"读取失败: {file}, 错误: {e}")
|
|
|
-
|
|
|
-# 合并所有数据
|
|
|
-if len(values) == 0:
|
|
|
- raise ValueError("未在任何 CSV 中找到有效的 cycle_long_R2 数据")
|
|
|
-
|
|
|
-data = np.concatenate(values)
|
|
|
-total_count = len(data)
|
|
|
-
|
|
|
-# ===================== 定义区间 =====================
|
|
|
-bins = [
|
|
|
- -np.inf,
|
|
|
- 0.0,
|
|
|
- 0.5,
|
|
|
- 0.6,
|
|
|
- 0.7,
|
|
|
- 0.8,
|
|
|
- 0.9,
|
|
|
- 1.0
|
|
|
-]
|
|
|
-
|
|
|
-labels = [
|
|
|
- "<0",
|
|
|
- "0 – 0.5",
|
|
|
- "0.5 – 0.6",
|
|
|
- "0.6 – 0.7",
|
|
|
- "0.7 – 0.8",
|
|
|
- "0.8 – 0.9",
|
|
|
- "0.9 – 1.0"
|
|
|
-]
|
|
|
-
|
|
|
-# ===================== 统计分布 =====================
|
|
|
-counts = pd.cut(
|
|
|
- data,
|
|
|
- bins=bins,
|
|
|
- labels=labels,
|
|
|
- right=True,
|
|
|
- include_lowest=True
|
|
|
-).value_counts().sort_index()
|
|
|
-
|
|
|
-ratios = counts / total_count * 100
|
|
|
-
|
|
|
-# ===================== 输出结果 =====================
|
|
|
-result = pd.DataFrame({
|
|
|
- "样本数": counts,
|
|
|
- "占比 (%)": ratios.round(2)
|
|
|
-})
|
|
|
-
|
|
|
-print(f"\n总样本数: {total_count}\n")
|
|
|
-print(result)
|
|
|
-
|
|
|
-# ===================== 绘制柱状图 =====================
|
|
|
-plt.figure(figsize=(10, 6))
|
|
|
-plt.bar(labels, ratios, color='skyblue', edgecolor='black')
|
|
|
-plt.title("cycle_long_R2 数据分布柱状图", fontproperties=font)
|
|
|
-plt.xlabel("区间", fontproperties=font)
|
|
|
-plt.ylabel("占比 (%)", fontproperties=font)
|
|
|
-plt.ylim(0, 100)
|
|
|
-plt.grid(axis='y', linestyle='--', alpha=0.7)
|
|
|
-
|
|
|
-# 在柱子上显示百分比
|
|
|
-for i, v in enumerate(ratios):
|
|
|
- plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10, fontproperties=font)
|
|
|
-
|
|
|
-plt.tight_layout()
|
|
|
-plt.show()
|