import yaml import pandas as pd from pathlib import Path from loader import load_all_units_cycles, resolve_path from state_construction import build_chem_cycle_state, STATE_COLUMNS def load_config(path: Path) -> dict: with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) def extract_state_space_bounds(config_path: Path): cfg = load_config(config_path) project_root = Path(cfg["Paths"]["project_root"]) output_dir = resolve_path( project_root, cfg["Paths"]["data_to_rl"]["output_dir"] ) output_dir.mkdir(parents=True, exist_ok=True) df = load_all_units_cycles(cfg) df = df[df["chem_cycle_valid"] == True] state_df = build_chem_cycle_state(df) percentiles = cfg["StateSpaceExtraction"]["statistics"]["percentiles"] stats = {} for col in STATE_COLUMNS: desc = state_df[col].describe( percentiles=[p / 100 for p in percentiles] ) stats[col] = { "min": desc["min"], "max": desc["max"], "mean": desc["mean"], "std": desc["std"], **{f"p{p}": desc[f"{p}%"] for p in percentiles} } pd.DataFrame(stats).T.to_csv( output_dir / "state_space_bounds.csv" )