import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

# ===================== 配置 =====================
data_dir = r"E:\Greentech\models\uf-rl\datasets\processed\segments"
target_col = "cycle_long_r2"

# ===================== 中文字体设置 =====================
# 注意：这里使用 SimHei 字体，可显示中文
font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=12)

# ===================== 读取所有 CSV =====================
all_files = glob.glob(os.path.join(data_dir, "*.csv"))

values = []

for file in all_files:
    try:
        df = pd.read_csv(file)
        if target_col in df.columns:
            vals = df[target_col].dropna().values
            values.append(vals)
    except Exception as e:
        print(f"读取失败: {file}, 错误: {e}")

# 合并所有数据
if len(values) == 0:
    raise ValueError("未在任何 CSV 中找到有效的 cycle_long_R2 数据")

data = np.concatenate(values)
total_count = len(data)

# ===================== 定义区间 =====================
bins = [
    -np.inf,
    0.0,
    0.5,
    0.6,
    0.7,
    0.8,
    0.9,
    1.0
]

labels = [
    "<0",
    "0 – 0.5",
    "0.5 – 0.6",
    "0.6 – 0.7",
    "0.7 – 0.8",
    "0.8 – 0.9",
    "0.9 – 1.0"
]

# ===================== 统计分布 =====================
counts = pd.cut(
    data,
    bins=bins,
    labels=labels,
    right=True,
    include_lowest=True
).value_counts().sort_index()

ratios = counts / total_count * 100

# ===================== 输出结果 =====================
result = pd.DataFrame({
    "样本数": counts,
    "占比 (%)": ratios.round(2)
})

print(f"\n总样本数: {total_count}\n")
print(result)

# ===================== 绘制柱状图 =====================
plt.figure(figsize=(10, 6))
plt.bar(labels, ratios, color='skyblue', edgecolor='black')
plt.title("cycle_long_R2 数据分布柱状图", fontproperties=font)
plt.xlabel("区间", fontproperties=font)
plt.ylabel("占比 (%)", fontproperties=font)
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# 在柱子上显示百分比
for i, v in enumerate(ratios):
    plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10, fontproperties=font)

plt.tight_layout()
plt.show()