ソースを参照

Initial commit

wmy 5 ヶ月 前
コミット
2ededf55d4
50 ファイル変更5371 行追加0 行削除
  1. 8 0
      .idea/.gitignore
  2. 88 0
      README.md
  3. 2 0
      api/__init__.py
  4. 2 0
      api/routers/__init__.py
  5. 108 0
      api/routers/health.py
  6. 146 0
      api/routers/models.py
  7. 127 0
      api/routers/users.py
  8. 79 0
      ci/model_build.yml
  9. 97 0
      ci/model_deploy.yml
  10. 77 0
      ci/scripts/deploy_model.sh
  11. 2 0
      common/__init__.py
  12. 2 0
      common/data/__init__.py
  13. 142 0
      common/data/loader.py
  14. 2 0
      common/metrics/__init__.py
  15. 179 0
      common/metrics/classification.py
  16. 157 0
      common/metrics/regression.py
  17. 2 0
      common/preprocessing/__init__.py
  18. 166 0
      common/preprocessing/image.py
  19. 146 0
      common/preprocessing/text.py
  20. 2 0
      common/utils/__init__.py
  21. 191 0
      common/utils/config.py
  22. 151 0
      common/utils/logger.py
  23. 33 0
      env.example
  24. 55 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/args.py
  25. 85 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/data_export.py
  26. 276 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/data_preprocessor.py
  27. 256 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/data_trainer.py
  28. BIN
      models/pressure_prediction_model/20分钟TMP预测模型源码/edge_index.pt
  29. 99 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/gat_lstm.py
  30. 70 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/main.py
  31. BIN
      models/pressure_prediction_model/20分钟TMP预测模型源码/model.pth
  32. 314 0
      models/pressure_prediction_model/20分钟TMP预测模型源码/predict.py
  33. BIN
      models/pressure_prediction_model/20分钟TMP预测模型源码/scaler.pkl
  34. 55 0
      models/pressure_prediction_model/90天TMP预测模型源码/args.py
  35. 255 0
      models/pressure_prediction_model/90天TMP预测模型源码/data_preprocessor.py
  36. 247 0
      models/pressure_prediction_model/90天TMP预测模型源码/data_trainer.py
  37. 94 0
      models/pressure_prediction_model/90天TMP预测模型源码/gat_lstm.py
  38. 70 0
      models/pressure_prediction_model/90天TMP预测模型源码/main.py
  39. BIN
      models/pressure_prediction_model/90天TMP预测模型源码/model.pth
  40. 254 0
      models/pressure_prediction_model/90天TMP预测模型源码/predict.py
  41. BIN
      models/pressure_prediction_model/90天TMP预测模型源码/scaler.pkl
  42. 246 0
      models/uf-rl/DQN_decide.py
  43. 340 0
      models/uf-rl/DQN_env.py
  44. 244 0
      models/uf-rl/DQN_train.py
  45. 405 0
      models/uf-rl/UF_decide.py
  46. 33 0
      models/uf-rl/UF_models.py
  47. BIN
      models/uf-rl/uf_bw.pth
  48. BIN
      models/uf-rl/uf_fp.pth
  49. 18 0
      requirements-dev.txt
  50. 46 0
      requirements.txt

+ 8 - 0
.idea/.gitignore

@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

+ 88 - 0
README.md

@@ -0,0 +1,88 @@
+# DualFlow - 多模型协作平台
+
+## 项目简介
+
+DualFlow 是一个简洁的多模型协作平台,支持多个独立的机器学习模型,便于团队协作开发。
+
+## 项目结构
+
+```
+DualFlow/
+├── models/                 # 各模型独立
+│   ├── nlp_bert/          # NLP BERT模型
+│   ├── vision_resnet/     # 计算机视觉ResNet模型
+│   └── recommender_v2/    # 推荐系统模型
+├── common/                # 公共逻辑 (共享函数)
+│   ├── data/              # 数据处理
+│   ├── preprocessing/    # 数据预处理
+│   ├── metrics/          # 评估指标
+│   └── utils/            # 工具函数
+├── ci/                   # CI/CD 流程模板
+│   ├── model_build.yml   # 模型构建流程
+│   ├── model_deploy.yml  # 模型部署流程
+│   └── scripts/          # 部署脚本
+├── configs/              # 全局配置
+│   ├── registry.yaml     # 模型注册表
+│   └── model_matrix.yaml # 模型负责人矩阵
+├── tests/                # 项目测试
+├── scripts/              # 项目脚本
+│   └── run_all_models.py # 运行所有模型
+└── README.md
+```
+
+## 快速开始
+
+### 环境要求
+
+- Python 3.9+
+- Git
+
+### 安装和运行
+
+1. 克隆项目
+```bash
+git clone <repository-url>
+cd DualFlow
+```
+
+2. 安装依赖
+```bash
+pip install -r requirements.txt
+```
+
+3. 运行特定模型
+```bash
+cd models/nlp_bert
+python train.py
+```
+
+4. 运行所有模型
+```bash
+python scripts/run_all_models.py
+```
+
+## 开发指南
+
+### 添加新模型
+
+1. 在 `models/` 目录下创建新的模型目录
+2. 按照现有模型结构创建必要的文件
+3. 在 `configs/registry.yaml` 中注册新模型
+4. 添加相应的测试
+
+### 代码规范
+
+- 使用 Black 进行代码格式化
+- 使用 isort 进行导入排序
+- 使用 pytest 进行测试
+
+## 贡献指南
+
+1. Fork 项目
+2. 创建功能分支
+3. 提交更改
+4. 创建 Pull Request
+
+## 许可证
+
+MIT License

+ 2 - 0
api/__init__.py

@@ -0,0 +1,2 @@
+# API Gateway Package
+

+ 2 - 0
api/routers/__init__.py

@@ -0,0 +1,2 @@
+# API Routers Package
+

+ 108 - 0
api/routers/health.py

@@ -0,0 +1,108 @@
+"""
+Health Check Router
+"""
+from fastapi import APIRouter, Depends
+from fastapi.responses import JSONResponse
+from sqlalchemy.orm import Session
+from shared.database import get_db
+from shared.models.health import HealthStatus
+import asyncio
+import time
+
+router = APIRouter()
+
+
+@router.get("/health", response_model=HealthStatus)
+async def health_check(db: Session = Depends(get_db)):
+    """Health check endpoint"""
+    start_time = time.time()
+    
+    # Check database connection
+    try:
+        db.execute("SELECT 1")
+        db_status = "healthy"
+    except Exception as e:
+        db_status = f"unhealthy: {str(e)}"
+    
+    # Check external services
+    services_status = await check_external_services()
+    
+    response_time = time.time() - start_time
+    
+    return HealthStatus(
+        status="healthy" if db_status == "healthy" else "unhealthy",
+        timestamp=time.time(),
+        response_time=response_time,
+        database=db_status,
+        services=services_status
+    )
+
+
+@router.get("/health/ready")
+async def readiness_check():
+    """Readiness check for Kubernetes"""
+    return {"status": "ready"}
+
+
+@router.get("/health/live")
+async def liveness_check():
+    """Liveness check for Kubernetes"""
+    return {"status": "alive"}
+
+
+async def check_external_services():
+    """Check external services status"""
+    services = {
+        "redis": await check_redis(),
+        "rabbitmq": await check_rabbitmq(),
+        "model_service": await check_model_service(),
+        "user_service": await check_user_service(),
+        "task_service": await check_task_service()
+    }
+    return services
+
+
+async def check_redis():
+    """Check Redis connection"""
+    try:
+        # Implementation would check Redis connection
+        return "healthy"
+    except Exception:
+        return "unhealthy"
+
+
+async def check_rabbitmq():
+    """Check RabbitMQ connection"""
+    try:
+        # Implementation would check RabbitMQ connection
+        return "healthy"
+    except Exception:
+        return "unhealthy"
+
+
+async def check_model_service():
+    """Check model service"""
+    try:
+        # Implementation would check model service
+        return "healthy"
+    except Exception:
+        return "unhealthy"
+
+
+async def check_user_service():
+    """Check user service"""
+    try:
+        # Implementation would check user service
+        return "healthy"
+    except Exception:
+        return "unhealthy"
+
+
+async def check_task_service():
+    """Check task service"""
+    try:
+        # Implementation would check task service
+        return "healthy"
+    except Exception:
+        return "unhealthy"
+

+ 146 - 0
api/routers/models.py

@@ -0,0 +1,146 @@
+"""
+Models Router - Proxy to Model Service
+"""
+from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
+from fastapi.responses import JSONResponse
+from typing import List, Optional
+import httpx
+import asyncio
+from shared.config import settings
+from shared.auth import get_current_user
+from shared.models.user import User
+from shared.models.model import ModelRequest, ModelResponse, ModelList
+
+router = APIRouter()
+
+# Model service URL
+MODEL_SERVICE_URL = f"http://model-service:8001"
+
+
+@router.get("/models", response_model=List[ModelList])
+async def list_models(
+    current_user: User = Depends(get_current_user),
+    skip: int = 0,
+    limit: int = 100
+):
+    """List available models"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{MODEL_SERVICE_URL}/models",
+                params={"skip": skip, "limit": limit},
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"Model service error: {str(e)}")
+
+
+@router.post("/models/{model_id}/predict", response_model=ModelResponse)
+async def predict(
+    model_id: str,
+    request: ModelRequest,
+    background_tasks: BackgroundTasks,
+    current_user: User = Depends(get_current_user)
+):
+    """Make prediction with specified model"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{MODEL_SERVICE_URL}/models/{model_id}/predict",
+                json=request.dict(),
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            
+            # Log prediction request
+            background_tasks.add_task(
+                log_prediction_request,
+                model_id,
+                current_user.id,
+                request
+            )
+            
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"Model service error: {str(e)}")
+
+
+@router.get("/models/{model_id}", response_model=ModelList)
+async def get_model(
+    model_id: str,
+    current_user: User = Depends(get_current_user)
+):
+    """Get model details"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{MODEL_SERVICE_URL}/models/{model_id}",
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"Model service error: {str(e)}")
+
+
+@router.post("/models/{model_id}/train")
+async def train_model(
+    model_id: str,
+    training_data: dict,
+    background_tasks: BackgroundTasks,
+    current_user: User = Depends(get_current_user)
+):
+    """Start model training"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{MODEL_SERVICE_URL}/models/{model_id}/train",
+                json=training_data,
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            
+            # Log training request
+            background_tasks.add_task(
+                log_training_request,
+                model_id,
+                current_user.id,
+                training_data
+            )
+            
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"Model service error: {str(e)}")
+
+
+@router.get("/models/{model_id}/status")
+async def get_model_status(
+    model_id: str,
+    current_user: User = Depends(get_current_user)
+):
+    """Get model training status"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{MODEL_SERVICE_URL}/models/{model_id}/status",
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"Model service error: {str(e)}")
+
+
+async def log_prediction_request(model_id: str, user_id: str, request: ModelRequest):
+    """Log prediction request for analytics"""
+    # Implementation would log to database or analytics service
+    pass
+
+
+async def log_training_request(model_id: str, user_id: str, training_data: dict):
+    """Log training request for analytics"""
+    # Implementation would log to database or analytics service
+    pass
+

+ 127 - 0
api/routers/users.py

@@ -0,0 +1,127 @@
+"""
+Users Router - Proxy to User Service
+"""
+from fastapi import APIRouter, HTTPException, Depends
+from typing import List, Optional
+import httpx
+from shared.config import settings
+from shared.auth import get_current_user, get_current_admin
+from shared.models.user import User, UserCreate, UserUpdate, UserResponse
+
+router = APIRouter()
+
+# User service URL
+USER_SERVICE_URL = f"http://user-service:8002"
+
+
+@router.get("/users/me", response_model=UserResponse)
+async def get_current_user_info(
+    current_user: User = Depends(get_current_user)
+):
+    """Get current user information"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{USER_SERVICE_URL}/users/me",
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"User service error: {str(e)}")
+
+
+@router.put("/users/me", response_model=UserResponse)
+async def update_current_user(
+    user_update: UserUpdate,
+    current_user: User = Depends(get_current_user)
+):
+    """Update current user information"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.put(
+                f"{USER_SERVICE_URL}/users/me",
+                json=user_update.dict(exclude_unset=True),
+                headers={"Authorization": f"Bearer {current_user.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"User service error: {str(e)}")
+
+
+@router.get("/users", response_model=List[UserResponse])
+async def list_users(
+    skip: int = 0,
+    limit: int = 100,
+    current_admin: User = Depends(get_current_admin)
+):
+    """List all users (admin only)"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{USER_SERVICE_URL}/users",
+                params={"skip": skip, "limit": limit},
+                headers={"Authorization": f"Bearer {current_admin.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"User service error: {str(e)}")
+
+
+@router.get("/users/{user_id}", response_model=UserResponse)
+async def get_user(
+    user_id: str,
+    current_admin: User = Depends(get_current_admin)
+):
+    """Get user by ID (admin only)"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"{USER_SERVICE_URL}/users/{user_id}",
+                headers={"Authorization": f"Bearer {current_admin.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"User service error: {str(e)}")
+
+
+@router.put("/users/{user_id}", response_model=UserResponse)
+async def update_user(
+    user_id: str,
+    user_update: UserUpdate,
+    current_admin: User = Depends(get_current_admin)
+):
+    """Update user by ID (admin only)"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.put(
+                f"{USER_SERVICE_URL}/users/{user_id}",
+                json=user_update.dict(exclude_unset=True),
+                headers={"Authorization": f"Bearer {current_admin.token}"}
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"User service error: {str(e)}")
+
+
+@router.delete("/users/{user_id}")
+async def delete_user(
+    user_id: str,
+    current_admin: User = Depends(get_current_admin)
+):
+    """Delete user by ID (admin only)"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.delete(
+                f"{USER_SERVICE_URL}/users/{user_id}",
+                headers={"Authorization": f"Bearer {current_admin.token}"}
+            )
+            response.raise_for_status()
+            return {"message": "User deleted successfully"}
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"User service error: {str(e)}")
+

+ 79 - 0
ci/model_build.yml

@@ -0,0 +1,79 @@
+# 模型构建CI/CD流程
+name: Model Build
+
+on:
+  push:
+    branches: [ main, develop ]
+    paths:
+      - 'models/**'
+  pull_request:
+    branches: [ main, develop ]
+    paths:
+      - 'models/**'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    
+    strategy:
+      matrix:
+        model: [nlp_bert, vision_resnet, recommender_v2]
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+    
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.9'
+    
+    - name: Cache pip dependencies
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('models/${{ matrix.model }}/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+    
+    - name: Install dependencies
+      run: |
+        cd models/${{ matrix.model }}
+        pip install -r requirements.txt
+    
+    - name: Lint code
+      run: |
+        cd models/${{ matrix.model }}
+        pip install flake8 black isort
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        black --check .
+        isort --check-only .
+    
+    - name: Run tests
+      run: |
+        cd models/${{ matrix.model }}
+        python -m pytest tests/ -v --cov=. --cov-report=xml
+    
+    - name: Upload coverage reports
+      uses: codecov/codecov-action@v3
+      with:
+        file: models/${{ matrix.model }}/coverage.xml
+        flags: ${{ matrix.model }}
+    
+    - name: Build Docker image
+      run: |
+        cd models/${{ matrix.model }}
+        docker build -t dualflow-${{ matrix.model }}:${{ github.sha }} .
+    
+    - name: Test Docker image
+      run: |
+        cd models/${{ matrix.model }}
+        docker run --rm dualflow-${{ matrix.model }}:${{ github.sha }} python -c "import torch; print('Docker image works!')"
+    
+    - name: Upload model artifacts
+      uses: actions/upload-artifact@v3
+      with:
+        name: ${{ matrix.model }}-artifacts
+        path: models/${{ matrix.model }}/outputs/
+        if-no-files-found: warn
+

+ 97 - 0
ci/model_deploy.yml

@@ -0,0 +1,97 @@
+# 模型部署CI/CD流程
+name: Model Deploy
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'models/**'
+  workflow_dispatch:
+    inputs:
+      model_name:
+        description: 'Model to deploy'
+        required: true
+        default: 'nlp_bert'
+        type: choice
+        options:
+          - nlp_bert
+          - vision_resnet
+          - recommender_v2
+      environment:
+        description: 'Deployment environment'
+        required: true
+        default: 'staging'
+        type: choice
+        options:
+          - staging
+          - production
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    environment: ${{ github.event.inputs.environment || 'staging' }}
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+    
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v2
+    
+    - name: Login to Container Registry
+      uses: docker/login-action@v2
+      with:
+        registry: ghcr.io
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    
+    - name: Extract metadata
+      id: meta
+      uses: docker/metadata-action@v4
+      with:
+        images: ghcr.io/${{ github.repository }}/${{ github.event.inputs.model_name || 'nlp_bert' }}
+        tags: |
+          type=ref,event=branch
+          type=ref,event=pr
+          type=sha,prefix={{branch}}-
+          type=raw,value=latest,enable={{is_default_branch}}
+    
+    - name: Build and push Docker image
+      uses: docker/build-push-action@v4
+      with:
+        context: models/${{ github.event.inputs.model_name || 'nlp_bert' }}
+        push: true
+        tags: ${{ steps.meta.outputs.tags }}
+        labels: ${{ steps.meta.outputs.labels }}
+        cache-from: type=gha
+        cache-to: type=gha,mode=max
+    
+    - name: Deploy to Kubernetes
+      if: github.event.inputs.environment == 'production'
+      run: |
+        echo "Deploying to Kubernetes production environment"
+        # 这里添加Kubernetes部署脚本
+        # kubectl apply -f k8s/${{ github.event.inputs.model_name || 'nlp_bert' }}/
+    
+    - name: Deploy to Docker Compose
+      if: github.event.inputs.environment == 'staging'
+      run: |
+        echo "Deploying to Docker Compose staging environment"
+        # 这里添加Docker Compose部署脚本
+        # docker-compose -f docker-compose.staging.yml up -d
+    
+    - name: Run health checks
+      run: |
+        echo "Running health checks for ${{ github.event.inputs.model_name || 'nlp_bert' }}"
+        # 这里添加健康检查脚本
+        # curl -f http://localhost:8000/health || exit 1
+    
+    - name: Notify deployment status
+      if: always()
+      run: |
+        if [ "${{ job.status }}" == "success" ]; then
+          echo "✅ Deployment successful"
+        else
+          echo "❌ Deployment failed"
+        fi
+

+ 77 - 0
ci/scripts/deploy_model.sh

@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# 模型部署脚本
+# 用法: ./deploy_model.sh <model_name> <environment>
+
+set -e
+
+MODEL_NAME=${1:-"nlp_bert"}
+ENVIRONMENT=${2:-"staging"}
+DOCKER_IMAGE="dualflow-${MODEL_NAME}:latest"
+
+echo "🚀 开始部署模型: ${MODEL_NAME} 到环境: ${ENVIRONMENT}"
+
+# 检查Docker是否运行
+if ! docker info > /dev/null 2>&1; then
+    echo "❌ Docker未运行,请启动Docker"
+    exit 1
+fi
+
+# 检查模型目录是否存在
+if [ ! -d "models/${MODEL_NAME}" ]; then
+    echo "❌ 模型目录不存在: models/${MODEL_NAME}"
+    exit 1
+fi
+
+# 构建Docker镜像
+echo "📦 构建Docker镜像..."
+cd models/${MODEL_NAME}
+docker build -t ${DOCKER_IMAGE} .
+
+# 停止现有容器
+echo "🛑 停止现有容器..."
+docker stop ${MODEL_NAME}-container 2>/dev/null || true
+docker rm ${MODEL_NAME}-container 2>/dev/null || true
+
+# 运行新容器
+echo "🏃 启动新容器..."
+if [ "${ENVIRONMENT}" = "production" ]; then
+    # 生产环境配置
+    docker run -d \
+        --name ${MODEL_NAME}-container \
+        --restart unless-stopped \
+        -p 8000:8000 \
+        -e ENVIRONMENT=production \
+        ${DOCKER_IMAGE}
+else
+    # 开发/测试环境配置
+    docker run -d \
+        --name ${MODEL_NAME}-container \
+        -p 8000:8000 \
+        -e ENVIRONMENT=${ENVIRONMENT} \
+        ${DOCKER_IMAGE}
+fi
+
+# 等待服务启动
+echo "⏳ 等待服务启动..."
+sleep 10
+
+# 健康检查
+echo "🔍 执行健康检查..."
+if curl -f http://localhost:8000/health > /dev/null 2>&1; then
+    echo "✅ 模型部署成功!"
+    echo "🌐 服务地址: http://localhost:8000"
+    echo "📊 健康检查: http://localhost:8000/health"
+else
+    echo "❌ 健康检查失败"
+    echo "📋 容器日志:"
+    docker logs ${MODEL_NAME}-container
+    exit 1
+fi
+
+# 显示容器状态
+echo "📊 容器状态:"
+docker ps | grep ${MODEL_NAME}-container
+
+echo "🎉 部署完成!"
+

+ 2 - 0
common/__init__.py

@@ -0,0 +1,2 @@
+# 公共逻辑包
+

+ 2 - 0
common/data/__init__.py

@@ -0,0 +1,2 @@
+# 数据处理模块
+

+ 142 - 0
common/data/loader.py

@@ -0,0 +1,142 @@
+"""
+通用数据加载器
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from typing import Union, Dict, Any, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class DataLoader:
+    """通用数据加载器"""
+    
+    def __init__(self, data_path: Union[str, Path]):
+        """初始化数据加载器"""
+        self.data_path = Path(data_path)
+        
+    def load_csv(self, **kwargs) -> pd.DataFrame:
+        """加载CSV文件"""
+        try:
+            df = pd.read_csv(self.data_path, **kwargs)
+            logger.info(f"成功加载CSV文件: {self.data_path}")
+            return df
+        except Exception as e:
+            logger.error(f"加载CSV文件失败: {e}")
+            raise
+    
+    def load_json(self, **kwargs) -> pd.DataFrame:
+        """加载JSON文件"""
+        try:
+            df = pd.read_json(self.data_path, **kwargs)
+            logger.info(f"成功加载JSON文件: {self.data_path}")
+            return df
+        except Exception as e:
+            logger.error(f"加载JSON文件失败: {e}")
+            raise
+    
+    def load_parquet(self, **kwargs) -> pd.DataFrame:
+        """加载Parquet文件"""
+        try:
+            df = pd.read_parquet(self.data_path, **kwargs)
+            logger.info(f"成功加载Parquet文件: {self.data_path}")
+            return df
+        except Exception as e:
+            logger.error(f"加载Parquet文件失败: {e}")
+            raise
+    
+    def save_csv(self, df: pd.DataFrame, **kwargs) -> None:
+        """保存为CSV文件"""
+        try:
+            df.to_csv(self.data_path, **kwargs)
+            logger.info(f"成功保存CSV文件: {self.data_path}")
+        except Exception as e:
+            logger.error(f"保存CSV文件失败: {e}")
+            raise
+    
+    def save_json(self, df: pd.DataFrame, **kwargs) -> None:
+        """保存为JSON文件"""
+        try:
+            df.to_json(self.data_path, **kwargs)
+            logger.info(f"成功保存JSON文件: {self.data_path}")
+        except Exception as e:
+            logger.error(f"保存JSON文件失败: {e}")
+            raise
+    
+    def save_parquet(self, df: pd.DataFrame, **kwargs) -> None:
+        """保存为Parquet文件"""
+        try:
+            df.to_parquet(self.data_path, **kwargs)
+            logger.info(f"成功保存Parquet文件: {self.data_path}")
+        except Exception as e:
+            logger.error(f"保存Parquet文件失败: {e}")
+            raise
+
+
+class ImageDataLoader:
+    """图像数据加载器"""
+    
+    def __init__(self, data_dir: Union[str, Path]):
+        """初始化图像数据加载器"""
+        self.data_dir = Path(data_dir)
+    
+    def load_images(self, extensions: list = ['.jpg', '.jpeg', '.png', '.bmp']) -> list:
+        """加载图像文件路径"""
+        image_paths = []
+        for ext in extensions:
+            image_paths.extend(self.data_dir.glob(f'**/*{ext}'))
+            image_paths.extend(self.data_dir.glob(f'**/*{ext.upper()}'))
+        
+        logger.info(f"找到 {len(image_paths)} 个图像文件")
+        return sorted(image_paths)
+    
+    def create_dataset_info(self, image_paths: list, label_func: callable = None) -> pd.DataFrame:
+        """创建数据集信息DataFrame"""
+        data = []
+        for img_path in image_paths:
+            if label_func:
+                label = label_func(img_path)
+            else:
+                # 默认从文件夹名获取标签
+                label = img_path.parent.name
+            
+            data.append({
+                'image_path': str(img_path),
+                'label': label,
+                'filename': img_path.name
+            })
+        
+        return pd.DataFrame(data)
+
+
+class TextDataLoader:
+    """文本数据加载器"""
+    
+    def __init__(self, data_path: Union[str, Path]):
+        """初始化文本数据加载器"""
+        self.data_path = Path(data_path)
+    
+    def load_text(self, encoding: str = 'utf-8') -> str:
+        """加载文本文件"""
+        try:
+            with open(self.data_path, 'r', encoding=encoding) as f:
+                text = f.read()
+            logger.info(f"成功加载文本文件: {self.data_path}")
+            return text
+        except Exception as e:
+            logger.error(f"加载文本文件失败: {e}")
+            raise
+    
+    def load_lines(self, encoding: str = 'utf-8') -> list:
+        """按行加载文本文件"""
+        try:
+            with open(self.data_path, 'r', encoding=encoding) as f:
+                lines = f.readlines()
+            logger.info(f"成功加载文本文件,共 {len(lines)} 行: {self.data_path}")
+            return lines
+        except Exception as e:
+            logger.error(f"加载文本文件失败: {e}")
+            raise
+

+ 2 - 0
common/metrics/__init__.py

@@ -0,0 +1,2 @@
+# 评估指标模块
+

+ 179 - 0
common/metrics/classification.py

@@ -0,0 +1,179 @@
+"""
+分类任务评估指标
+"""
+import numpy as np
+from sklearn.metrics import (
+    accuracy_score, precision_score, recall_score, f1_score,
+    confusion_matrix, classification_report, roc_auc_score
+)
+from typing import List, Dict, Any, Optional, Union
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ClassificationMetrics:
+    """分类任务评估指标"""
+    
+    def __init__(self):
+        """初始化分类指标"""
+        pass
+    
+    def accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """准确率"""
+        return accuracy_score(y_true, y_pred)
+    
+    def precision(self, y_true: np.ndarray, y_pred: np.ndarray, 
+                  average: str = 'weighted') -> float:
+        """精确率"""
+        return precision_score(y_true, y_pred, average=average, zero_division=0)
+    
+    def recall(self, y_true: np.ndarray, y_pred: np.ndarray, 
+               average: str = 'weighted') -> float:
+        """召回率"""
+        return recall_score(y_true, y_pred, average=average, zero_division=0)
+    
+    def f1_score(self, y_true: np.ndarray, y_pred: np.ndarray, 
+                 average: str = 'weighted') -> float:
+        """F1分数"""
+        return f1_score(y_true, y_pred, average=average, zero_division=0)
+    
+    def confusion_matrix(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
+        """混淆矩阵"""
+        return confusion_matrix(y_true, y_pred)
+    
+    def classification_report(self, y_true: np.ndarray, y_pred: np.ndarray, 
+                            target_names: Optional[List[str]] = None) -> str:
+        """分类报告"""
+        return classification_report(y_true, y_pred, target_names=target_names)
+    
+    def roc_auc(self, y_true: np.ndarray, y_pred_proba: np.ndarray, 
+                average: str = 'weighted') -> float:
+        """ROC AUC分数"""
+        try:
+            return roc_auc_score(y_true, y_pred_proba, average=average)
+        except ValueError as e:
+            logger.warning(f"无法计算ROC AUC: {e}")
+            return 0.0
+    
+    def compute_all_metrics(self, y_true: np.ndarray, y_pred: np.ndarray, 
+                           y_pred_proba: Optional[np.ndarray] = None) -> Dict[str, float]:
+        """计算所有指标"""
+        metrics = {
+            'accuracy': self.accuracy(y_true, y_pred),
+            'precision': self.precision(y_true, y_pred),
+            'recall': self.recall(y_true, y_pred),
+            'f1_score': self.f1_score(y_true, y_pred)
+        }
+        
+        if y_pred_proba is not None:
+            metrics['roc_auc'] = self.roc_auc(y_true, y_pred_proba)
+        
+        return metrics
+
+
+class MultiClassMetrics:
+    """多分类任务评估指标"""
+    
+    def __init__(self):
+        """初始化多分类指标"""
+        pass
+    
+    def macro_precision(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """宏平均精确率"""
+        return precision_score(y_true, y_pred, average='macro', zero_division=0)
+    
+    def macro_recall(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """宏平均召回率"""
+        return recall_score(y_true, y_pred, average='macro', zero_division=0)
+    
+    def macro_f1(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """宏平均F1分数"""
+        return f1_score(y_true, y_pred, average='macro', zero_division=0)
+    
+    def micro_precision(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """微平均精确率"""
+        return precision_score(y_true, y_pred, average='micro', zero_division=0)
+    
+    def micro_recall(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """微平均召回率"""
+        return recall_score(y_true, y_pred, average='micro', zero_division=0)
+    
+    def micro_f1(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """微平均F1分数"""
+        return f1_score(y_true, y_pred, average='micro', zero_division=0)
+    
+    def per_class_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, Dict[str, float]]:
+        """每个类别的指标"""
+        unique_labels = np.unique(np.concatenate([y_true, y_pred]))
+        per_class = {}
+        
+        for label in unique_labels:
+            # 二分类指标
+            y_true_binary = (y_true == label).astype(int)
+            y_pred_binary = (y_pred == label).astype(int)
+            
+            tp = np.sum((y_true_binary == 1) & (y_pred_binary == 1))
+            fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1))
+            fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0))
+            
+            precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+            recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+            f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
+            
+            per_class[str(label)] = {
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        return per_class
+
+
+class BinaryClassificationMetrics:
+    """二分类任务评估指标"""
+    
+    def __init__(self):
+        """初始化二分类指标"""
+        pass
+    
+    def true_positive(self, y_true: np.ndarray, y_pred: np.ndarray) -> int:
+        """真正例"""
+        return np.sum((y_true == 1) & (y_pred == 1))
+    
+    def false_positive(self, y_true: np.ndarray, y_pred: np.ndarray) -> int:
+        """假正例"""
+        return np.sum((y_true == 0) & (y_pred == 1))
+    
+    def true_negative(self, y_true: np.ndarray, y_pred: np.ndarray) -> int:
+        """真负例"""
+        return np.sum((y_true == 0) & (y_pred == 0))
+    
+    def false_negative(self, y_true: np.ndarray, y_pred: np.ndarray) -> int:
+        """假负例"""
+        return np.sum((y_true == 1) & (y_pred == 0))
+    
+    def sensitivity(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """敏感性 (召回率)"""
+        tp = self.true_positive(y_true, y_pred)
+        fn = self.false_negative(y_true, y_pred)
+        return tp / (tp + fn) if (tp + fn) > 0 else 0
+    
+    def specificity(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """特异性"""
+        tn = self.true_negative(y_true, y_pred)
+        fp = self.false_positive(y_true, y_pred)
+        return tn / (tn + fp) if (tn + fp) > 0 else 0
+    
+    def positive_predictive_value(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """阳性预测值 (精确率)"""
+        tp = self.true_positive(y_true, y_pred)
+        fp = self.false_positive(y_true, y_pred)
+        return tp / (tp + fp) if (tp + fp) > 0 else 0
+    
+    def negative_predictive_value(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """阴性预测值"""
+        tn = self.true_negative(y_true, y_pred)
+        fn = self.false_negative(y_true, y_pred)
+        return tn / (tn + fn) if (tn + fn) > 0 else 0
+

+ 157 - 0
common/metrics/regression.py

@@ -0,0 +1,157 @@
+"""
+回归任务评估指标
+"""
+import numpy as np
+from sklearn.metrics import (
+    mean_squared_error, mean_absolute_error, r2_score,
+    mean_absolute_percentage_error, median_absolute_error
+)
+from typing import Dict, Any
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class RegressionMetrics:
+    """回归任务评估指标"""
+    
+    def __init__(self):
+        """初始化回归指标"""
+        pass
+    
+    def mse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """均方误差 (Mean Squared Error)"""
+        return mean_squared_error(y_true, y_pred)
+    
+    def rmse(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """均方根误差 (Root Mean Squared Error)"""
+        return np.sqrt(mean_squared_error(y_true, y_pred))
+    
+    def mae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """平均绝对误差 (Mean Absolute Error)"""
+        return mean_absolute_error(y_true, y_pred)
+    
+    def mape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """平均绝对百分比误差 (Mean Absolute Percentage Error)"""
+        return mean_absolute_percentage_error(y_true, y_pred)
+    
+    def r2_score(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """决定系数 (R-squared)"""
+        return r2_score(y_true, y_pred)
+    
+    def median_ae(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """中位数绝对误差 (Median Absolute Error)"""
+        return median_absolute_error(y_true, y_pred)
+    
+    def mape_robust(self, y_true: np.ndarray, y_pred: np.ndarray, 
+                   epsilon: float = 1e-8) -> float:
+        """鲁棒的平均绝对百分比误差"""
+        return np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + epsilon))) * 100
+    
+    def smape(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """对称平均绝对百分比误差 (Symmetric Mean Absolute Percentage Error)"""
+        return np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 100
+    
+    def mase(self, y_true: np.ndarray, y_pred: np.ndarray, 
+             y_naive: np.ndarray) -> float:
+        """平均绝对标度误差 (Mean Absolute Scaled Error)"""
+        mae = self.mae(y_true, y_pred)
+        mae_naive = self.mae(y_true, y_naive)
+        return mae / mae_naive if mae_naive != 0 else 0
+    
+    def compute_all_metrics(self, y_true: np.ndarray, y_pred: np.ndarray, 
+                           y_naive: Optional[np.ndarray] = None) -> Dict[str, float]:
+        """计算所有回归指标"""
+        metrics = {
+            'mse': self.mse(y_true, y_pred),
+            'rmse': self.rmse(y_true, y_pred),
+            'mae': self.mae(y_true, y_pred),
+            'mape': self.mape(y_true, y_pred),
+            'r2_score': self.r2_score(y_true, y_pred),
+            'median_ae': self.median_ae(y_true, y_pred),
+            'mape_robust': self.mape_robust(y_true, y_pred),
+            'smape': self.smape(y_true, y_pred)
+        }
+        
+        if y_naive is not None:
+            metrics['mase'] = self.mase(y_true, y_pred, y_naive)
+        
+        return metrics
+
+
+class TimeSeriesMetrics:
+    """时间序列评估指标"""
+    
+    def __init__(self):
+        """初始化时间序列指标"""
+        pass
+    
+    def directional_accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """方向准确率"""
+        true_direction = np.diff(y_true)
+        pred_direction = np.diff(y_pred)
+        return np.mean((true_direction * pred_direction) > 0)
+    
+    def theil_u(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """Theil's U统计量"""
+        mse = np.mean((y_true - y_pred) ** 2)
+        mse_naive = np.mean((y_true[1:] - y_true[:-1]) ** 2)
+        return np.sqrt(mse / mse_naive) if mse_naive != 0 else 0
+    
+    def mean_absolute_scaled_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """平均绝对标度误差"""
+        mae = np.mean(np.abs(y_true - y_pred))
+        mae_naive = np.mean(np.abs(y_true[1:] - y_true[:-1]))
+        return mae / mae_naive if mae_naive != 0 else 0
+    
+    def mean_absolute_percentage_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """平均绝对百分比误差"""
+        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
+
+
+class RankingMetrics:
+    """排序任务评估指标"""
+    
+    def __init__(self):
+        """初始化排序指标"""
+        pass
+    
+    def ndcg(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = None) -> float:
+        """归一化折扣累积增益 (Normalized Discounted Cumulative Gain)"""
+        if k is None:
+            k = len(y_true)
+        
+        # 按预测分数排序
+        sorted_indices = np.argsort(y_pred)[::-1]
+        sorted_true = y_true[sorted_indices]
+        
+        # 计算DCG
+        dcg = 0
+        for i in range(min(k, len(sorted_true))):
+            dcg += sorted_true[i] / np.log2(i + 2)
+        
+        # 计算IDCG
+        sorted_true_ideal = np.sort(y_true)[::-1]
+        idcg = 0
+        for i in range(min(k, len(sorted_true_ideal))):
+            idcg += sorted_true_ideal[i] / np.log2(i + 2)
+        
+        return dcg / idcg if idcg > 0 else 0
+    
+    def hit_rate(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
+        """命中率 (Hit Rate)"""
+        # 获取前k个预测
+        top_k_indices = np.argsort(y_pred)[::-1][:k]
+        return np.sum(y_true[top_k_indices] > 0) / k
+    
+    def precision_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
+        """K位置精确率"""
+        top_k_indices = np.argsort(y_pred)[::-1][:k]
+        return np.sum(y_true[top_k_indices] > 0) / k
+    
+    def recall_at_k(self, y_true: np.ndarray, y_pred: np.ndarray, k: int = 10) -> float:
+        """K位置召回率"""
+        top_k_indices = np.argsort(y_pred)[::-1][:k]
+        relevant_items = np.sum(y_true > 0)
+        return np.sum(y_true[top_k_indices] > 0) / relevant_items if relevant_items > 0 else 0
+

+ 2 - 0
common/preprocessing/__init__.py

@@ -0,0 +1,2 @@
+# 数据预处理模块
+

+ 166 - 0
common/preprocessing/image.py

@@ -0,0 +1,166 @@
+"""
+图像预处理工具
+"""
+import cv2
+import numpy as np
+from PIL import Image, ImageEnhance, ImageFilter
+from typing import Tuple, Optional, Union
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ImagePreprocessor:
+    """图像预处理器"""
+    
+    def __init__(self):
+        """初始化图像预处理器"""
+        pass
+    
+    def resize(self, image: np.ndarray, size: Tuple[int, int], 
+               method: str = 'bilinear') -> np.ndarray:
+        """调整图像大小"""
+        if method == 'bilinear':
+            return cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
+        elif method == 'nearest':
+            return cv2.resize(image, size, interpolation=cv2.INTER_NEAREST)
+        elif method == 'cubic':
+            return cv2.resize(image, size, interpolation=cv2.INTER_CUBIC)
+        else:
+            raise ValueError(f"不支持的插值方法: {method}")
+    
+    def crop(self, image: np.ndarray, x: int, y: int, 
+             width: int, height: int) -> np.ndarray:
+        """裁剪图像"""
+        return image[y:y+height, x:x+width]
+    
+    def center_crop(self, image: np.ndarray, size: Tuple[int, int]) -> np.ndarray:
+        """中心裁剪"""
+        h, w = image.shape[:2]
+        crop_h, crop_w = size
+        
+        start_h = (h - crop_h) // 2
+        start_w = (w - crop_w) // 2
+        
+        return image[start_h:start_h+crop_h, start_w:start_w+crop_w]
+    
+    def normalize(self, image: np.ndarray, 
+                 mean: Tuple[float, float, float] = (0.485, 0.456, 0.406),
+                 std: Tuple[float, float, float] = (0.229, 0.224, 0.225)) -> np.ndarray:
+        """标准化图像"""
+        image = image.astype(np.float32) / 255.0
+        
+        if len(image.shape) == 3:
+            for i in range(3):
+                image[:, :, i] = (image[:, :, i] - mean[i]) / std[i]
+        else:
+            image = (image - mean[0]) / std[0]
+        
+        return image
+    
+    def to_tensor(self, image: np.ndarray) -> np.ndarray:
+        """转换为张量格式 (H, W, C) -> (C, H, W)"""
+        if len(image.shape) == 3:
+            return np.transpose(image, (2, 0, 1))
+        return image
+
+
+class ImageAugmenter:
+    """图像增强器"""
+    
+    def __init__(self):
+        """初始化图像增强器"""
+        pass
+    
+    def random_horizontal_flip(self, image: np.ndarray, p: float = 0.5) -> np.ndarray:
+        """随机水平翻转"""
+        if np.random.random() < p:
+            return cv2.flip(image, 1)
+        return image
+    
+    def random_vertical_flip(self, image: np.ndarray, p: float = 0.5) -> np.ndarray:
+        """随机垂直翻转"""
+        if np.random.random() < p:
+            return cv2.flip(image, 0)
+        return image
+    
+    def random_rotation(self, image: np.ndarray, max_angle: float = 15) -> np.ndarray:
+        """随机旋转"""
+        angle = np.random.uniform(-max_angle, max_angle)
+        h, w = image.shape[:2]
+        center = (w // 2, h // 2)
+        matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
+        return cv2.warpAffine(image, matrix, (w, h))
+    
+    def random_brightness(self, image: np.ndarray, factor_range: Tuple[float, float] = (0.8, 1.2)) -> np.ndarray:
+        """随机亮度调整"""
+        factor = np.random.uniform(factor_range[0], factor_range[1])
+        return np.clip(image * factor, 0, 255).astype(np.uint8)
+    
+    def random_contrast(self, image: np.ndarray, factor_range: Tuple[float, float] = (0.8, 1.2)) -> np.ndarray:
+        """随机对比度调整"""
+        factor = np.random.uniform(factor_range[0], factor_range[1])
+        mean = np.mean(image)
+        return np.clip((image - mean) * factor + mean, 0, 255).astype(np.uint8)
+    
+    def random_saturation(self, image: np.ndarray, factor_range: Tuple[float, float] = (0.8, 1.2)) -> np.ndarray:
+        """随机饱和度调整"""
+        if len(image.shape) == 3:
+            hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
+            factor = np.random.uniform(factor_range[0], factor_range[1])
+            hsv[:, :, 1] = np.clip(hsv[:, :, 1] * factor, 0, 255)
+            return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
+        return image
+    
+    def random_noise(self, image: np.ndarray, noise_factor: float = 0.1) -> np.ndarray:
+        """随机噪声"""
+        noise = np.random.normal(0, noise_factor * 255, image.shape)
+        return np.clip(image + noise, 0, 255).astype(np.uint8)
+    
+    def random_crop(self, image: np.ndarray, crop_size: Tuple[int, int]) -> np.ndarray:
+        """随机裁剪"""
+        h, w = image.shape[:2]
+        crop_h, crop_w = crop_size
+        
+        if h < crop_h or w < crop_w:
+            return cv2.resize(image, crop_size)
+        
+        start_h = np.random.randint(0, h - crop_h + 1)
+        start_w = np.random.randint(0, w - crop_w + 1)
+        
+        return image[start_h:start_h+crop_h, start_w:start_w+crop_w]
+
+
+class ImageTransforms:
+    """图像变换组合"""
+    
+    def __init__(self, transforms: list):
+        """初始化变换组合"""
+        self.transforms = transforms
+    
+    def __call__(self, image: np.ndarray) -> np.ndarray:
+        """应用所有变换"""
+        for transform in self.transforms:
+            image = transform(image)
+        return image
+    
+    @staticmethod
+    def get_train_transforms(image_size: Tuple[int, int] = (224, 224)):
+        """获取训练时的变换"""
+        return ImageTransforms([
+            lambda img: ImageAugmenter().random_horizontal_flip(img, p=0.5),
+            lambda img: ImageAugmenter().random_rotation(img, max_angle=15),
+            lambda img: ImageAugmenter().random_brightness(img),
+            lambda img: ImageAugmenter().random_contrast(img),
+            lambda img: ImagePreprocessor().resize(img, image_size),
+            lambda img: ImagePreprocessor().normalize(img)
+        ])
+    
+    @staticmethod
+    def get_test_transforms(image_size: Tuple[int, int] = (224, 224)):
+        """获取测试时的变换"""
+        return ImageTransforms([
+            lambda img: ImagePreprocessor().resize(img, image_size),
+            lambda img: ImagePreprocessor().normalize(img)
+        ])
+

+ 146 - 0
common/preprocessing/text.py

@@ -0,0 +1,146 @@
+"""
+文本预处理工具
+"""
+import re
+import string
+from typing import List, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class TextPreprocessor:
+    """文本预处理器"""
+    
+    def __init__(self):
+        """初始化文本预处理器"""
+        self.stop_words = set()  # 可以加载停用词表
+    
+    def clean_text(self, text: str, 
+                   remove_punctuation: bool = True,
+                   remove_numbers: bool = False,
+                   remove_extra_spaces: bool = True,
+                   to_lower: bool = True) -> str:
+        """清理文本"""
+        if not isinstance(text, str):
+            return ""
+        
+        # 转换为小写
+        if to_lower:
+            text = text.lower()
+        
+        # 移除标点符号
+        if remove_punctuation:
+            text = text.translate(str.maketrans('', '', string.punctuation))
+        
+        # 移除数字
+        if remove_numbers:
+            text = re.sub(r'\d+', '', text)
+        
+        # 移除多余空格
+        if remove_extra_spaces:
+            text = re.sub(r'\s+', ' ', text).strip()
+        
+        return text
+    
+    def tokenize(self, text: str, method: str = 'split') -> List[str]:
+        """分词"""
+        if method == 'split':
+            return text.split()
+        elif method == 'regex':
+            # 使用正则表达式分词
+            return re.findall(r'\b\w+\b', text)
+        else:
+            raise ValueError(f"不支持的分词方法: {method}")
+    
+    def remove_stopwords(self, tokens: List[str], custom_stopwords: Optional[List[str]] = None) -> List[str]:
+        """移除停用词"""
+        if custom_stopwords:
+            stop_words = set(custom_stopwords)
+        else:
+            stop_words = self.stop_words
+        
+        return [token for token in tokens if token not in stop_words]
+    
+    def preprocess(self, text: str, 
+                   clean: bool = True,
+                   tokenize: bool = True,
+                   remove_stopwords: bool = False,
+                   **kwargs) -> List[str]:
+        """完整的文本预处理流程"""
+        if clean:
+            text = self.clean_text(text, **kwargs)
+        
+        if tokenize:
+            tokens = self.tokenize(text)
+        else:
+            tokens = [text]
+        
+        if remove_stopwords:
+            tokens = self.remove_stopwords(tokens)
+        
+        return tokens
+
+
+class TextNormalizer:
+    """文本标准化器"""
+    
+    @staticmethod
+    def normalize_whitespace(text: str) -> str:
+        """标准化空白字符"""
+        return re.sub(r'\s+', ' ', text).strip()
+    
+    @staticmethod
+    def normalize_unicode(text: str) -> str:
+        """标准化Unicode字符"""
+        import unicodedata
+        return unicodedata.normalize('NFKD', text)
+    
+    @staticmethod
+    def remove_html_tags(text: str) -> str:
+        """移除HTML标签"""
+        import re
+        clean = re.compile('<.*?>')
+        return re.sub(clean, '', text)
+    
+    @staticmethod
+    def remove_urls(text: str) -> str:
+        """移除URL"""
+        import re
+        return re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
+    
+    @staticmethod
+    def remove_emails(text: str) -> str:
+        """移除邮箱地址"""
+        import re
+        return re.sub(r'\S+@\S+', '', text)
+
+
+class TextAugmenter:
+    """文本增强器"""
+    
+    @staticmethod
+    def synonym_replacement(text: str, replacement_ratio: float = 0.1) -> str:
+        """同义词替换"""
+        # 这里可以实现同义词替换逻辑
+        # 需要同义词词典或使用NLP库
+        return text
+    
+    @staticmethod
+    def random_insertion(text: str, insertion_ratio: float = 0.1) -> str:
+        """随机插入"""
+        # 实现随机插入逻辑
+        return text
+    
+    @staticmethod
+    def random_swap(text: str, swap_ratio: float = 0.1) -> str:
+        """随机交换"""
+        # 实现随机交换逻辑
+        return text
+    
+    @staticmethod
+    def random_deletion(text: str, deletion_ratio: float = 0.1) -> str:
+        """随机删除"""
+        # 实现随机删除逻辑
+        return text
+

+ 2 - 0
common/utils/__init__.py

@@ -0,0 +1,2 @@
+# 工具函数模块
+

+ 191 - 0
common/utils/config.py

@@ -0,0 +1,191 @@
+"""
+配置管理工具
+"""
+import yaml
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class Config:
+    """配置管理类"""
+    
+    def __init__(self, config_path: Optional[Union[str, Path]] = None):
+        """初始化配置"""
+        self.config = {}
+        if config_path:
+            self.load_config(config_path)
+    
+    def load_config(self, config_path: Union[str, Path]) -> None:
+        """加载配置文件"""
+        config_path = Path(config_path)
+        
+        if not config_path.exists():
+            raise FileNotFoundError(f"配置文件不存在: {config_path}")
+        
+        if config_path.suffix.lower() == '.yaml' or config_path.suffix.lower() == '.yml':
+            self.load_yaml(config_path)
+        elif config_path.suffix.lower() == '.json':
+            self.load_json(config_path)
+        else:
+            raise ValueError(f"不支持的配置文件格式: {config_path.suffix}")
+    
+    def load_yaml(self, config_path: Path) -> None:
+        """加载YAML配置文件"""
+        try:
+            with open(config_path, 'r', encoding='utf-8') as f:
+                self.config = yaml.safe_load(f)
+            logger.info(f"成功加载YAML配置文件: {config_path}")
+        except Exception as e:
+            logger.error(f"加载YAML配置文件失败: {e}")
+            raise
+    
+    def load_json(self, config_path: Path) -> None:
+        """加载JSON配置文件"""
+        try:
+            with open(config_path, 'r', encoding='utf-8') as f:
+                self.config = json.load(f)
+            logger.info(f"成功加载JSON配置文件: {config_path}")
+        except Exception as e:
+            logger.error(f"加载JSON配置文件失败: {e}")
+            raise
+    
+    def get(self, key: str, default: Any = None) -> Any:
+        """获取配置值"""
+        keys = key.split('.')
+        value = self.config
+        
+        for k in keys:
+            if isinstance(value, dict) and k in value:
+                value = value[k]
+            else:
+                return default
+        
+        return value
+    
+    def set(self, key: str, value: Any) -> None:
+        """设置配置值"""
+        keys = key.split('.')
+        config = self.config
+        
+        for k in keys[:-1]:
+            if k not in config:
+                config[k] = {}
+            config = config[k]
+        
+        config[keys[-1]] = value
+    
+    def update(self, other_config: Dict[str, Any]) -> None:
+        """更新配置"""
+        self.config.update(other_config)
+    
+    def save_yaml(self, config_path: Union[str, Path]) -> None:
+        """保存为YAML文件"""
+        config_path = Path(config_path)
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        try:
+            with open(config_path, 'w', encoding='utf-8') as f:
+                yaml.dump(self.config, f, default_flow_style=False, allow_unicode=True)
+            logger.info(f"成功保存YAML配置文件: {config_path}")
+        except Exception as e:
+            logger.error(f"保存YAML配置文件失败: {e}")
+            raise
+    
+    def save_json(self, config_path: Union[str, Path]) -> None:
+        """保存为JSON文件"""
+        config_path = Path(config_path)
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        try:
+            with open(config_path, 'w', encoding='utf-8') as f:
+                json.dump(self.config, f, indent=2, ensure_ascii=False)
+            logger.info(f"成功保存JSON配置文件: {config_path}")
+        except Exception as e:
+            logger.error(f"保存JSON配置文件失败: {e}")
+            raise
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """转换为字典"""
+        return self.config.copy()
+    
+    def __getitem__(self, key: str) -> Any:
+        """支持字典式访问"""
+        return self.get(key)
+    
+    def __setitem__(self, key: str, value: Any) -> None:
+        """支持字典式设置"""
+        self.set(key, value)
+    
+    def __contains__(self, key: str) -> bool:
+        """支持in操作符"""
+        return self.get(key) is not None
+
+
+class EnvironmentConfig:
+    """环境变量配置"""
+    
+    def __init__(self, prefix: str = ""):
+        """初始化环境配置"""
+        self.prefix = prefix.upper()
+    
+    def get(self, key: str, default: Any = None, type_func: type = str) -> Any:
+        """获取环境变量"""
+        env_key = f"{self.prefix}_{key.upper()}" if self.prefix else key.upper()
+        value = os.getenv(env_key, default)
+        
+        if value is None:
+            return default
+        
+        try:
+            return type_func(value)
+        except (ValueError, TypeError):
+            logger.warning(f"无法转换环境变量 {env_key} 为 {type_func.__name__}, 使用默认值: {default}")
+            return default
+    
+    def get_bool(self, key: str, default: bool = False) -> bool:
+        """获取布尔环境变量"""
+        value = self.get(key, str(default))
+        return value.lower() in ('true', '1', 'yes', 'on')
+    
+    def get_int(self, key: str, default: int = 0) -> int:
+        """获取整数环境变量"""
+        return self.get(key, default, int)
+    
+    def get_float(self, key: str, default: float = 0.0) -> float:
+        """获取浮点数环境变量"""
+        return self.get(key, default, float)
+    
+    def get_list(self, key: str, default: list = None, separator: str = ',') -> list:
+        """获取列表环境变量"""
+        if default is None:
+            default = []
+        
+        value = self.get(key, "")
+        if not value:
+            return default
+        
+        return [item.strip() for item in value.split(separator)]
+
+
+def load_config_from_env(config_class: type, env_prefix: str = "") -> Any:
+    """从环境变量加载配置类"""
+    env_config = EnvironmentConfig(env_prefix)
+    
+    # 获取配置类的所有属性
+    config_instance = config_class()
+    
+    for attr_name in dir(config_instance):
+        if not attr_name.startswith('_'):
+            attr_value = getattr(config_instance, attr_name)
+            if not callable(attr_value):
+                # 从环境变量获取值
+                env_value = env_config.get(attr_name, attr_value)
+                setattr(config_instance, attr_name, env_value)
+    
+    return config_instance
+

+ 151 - 0
common/utils/logger.py

@@ -0,0 +1,151 @@
+"""
+日志工具
+"""
+import logging
+import sys
+from pathlib import Path
+from typing import Optional
+import json
+from datetime import datetime
+
+
+class ColoredFormatter(logging.Formatter):
+    """彩色日志格式化器"""
+    
+    COLORS = {
+        'DEBUG': '\033[36m',    # 青色
+        'INFO': '\033[32m',     # 绿色
+        'WARNING': '\033[33m',  # 黄色
+        'ERROR': '\033[31m',    # 红色
+        'CRITICAL': '\033[35m', # 紫色
+    }
+    RESET = '\033[0m'
+    
+    def format(self, record):
+        log_color = self.COLORS.get(record.levelname, '')
+        record.levelname = f"{log_color}{record.levelname}{self.RESET}"
+        return super().format(record)
+
+
+class JSONFormatter(logging.Formatter):
+    """JSON日志格式化器"""
+    
+    def format(self, record):
+        log_entry = {
+            'timestamp': datetime.fromtimestamp(record.created).isoformat(),
+            'level': record.levelname,
+            'logger': record.name,
+            'message': record.getMessage(),
+            'module': record.module,
+            'function': record.funcName,
+            'line': record.lineno
+        }
+        
+        if record.exc_info:
+            log_entry['exception'] = self.formatException(record.exc_info)
+        
+        return json.dumps(log_entry, ensure_ascii=False)
+
+
+def setup_logger(name: str, 
+                 level: str = 'INFO',
+                 log_file: Optional[str] = None,
+                 format_type: str = 'colored',
+                 max_bytes: int = 10 * 1024 * 1024,  # 10MB
+                 backup_count: int = 5) -> logging.Logger:
+    """设置日志器"""
+    
+    logger = logging.getLogger(name)
+    logger.setLevel(getattr(logging, level.upper()))
+    
+    # 清除现有的处理器
+    logger.handlers.clear()
+    
+    # 控制台处理器
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(getattr(logging, level.upper()))
+    
+    if format_type == 'colored':
+        formatter = ColoredFormatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+    elif format_type == 'json':
+        formatter = JSONFormatter()
+    else:
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+    
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    
+    # 文件处理器
+    if log_file:
+        from logging.handlers import RotatingFileHandler
+        
+        # 确保日志目录存在
+        log_path = Path(log_file)
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        file_handler = RotatingFileHandler(
+            log_file, maxBytes=max_bytes, backupCount=backup_count
+        )
+        file_handler.setLevel(getattr(logging, level.upper()))
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+    
+    return logger
+
+
+def get_logger(name: str) -> logging.Logger:
+    """获取日志器"""
+    return logging.getLogger(name)
+
+
+class LoggerMixin:
+    """日志器混入类"""
+    
+    @property
+    def logger(self) -> logging.Logger:
+        """获取日志器"""
+        if not hasattr(self, '_logger'):
+            self._logger = get_logger(self.__class__.__name__)
+        return self._logger
+
+
+def log_function_call(func):
+    """函数调用日志装饰器"""
+    def wrapper(*args, **kwargs):
+        logger = get_logger(func.__module__)
+        logger.debug(f"调用函数: {func.__name__}, 参数: args={args}, kwargs={kwargs}")
+        try:
+            result = func(*args, **kwargs)
+            logger.debug(f"函数 {func.__name__} 执行成功")
+            return result
+        except Exception as e:
+            logger.error(f"函数 {func.__name__} 执行失败: {e}")
+            raise
+    return wrapper
+
+
+def log_execution_time(func):
+    """执行时间日志装饰器"""
+    import time
+    
+    def wrapper(*args, **kwargs):
+        logger = get_logger(func.__module__)
+        start_time = time.time()
+        logger.info(f"开始执行: {func.__name__}")
+        
+        try:
+            result = func(*args, **kwargs)
+            execution_time = time.time() - start_time
+            logger.info(f"执行完成: {func.__name__}, 耗时: {execution_time:.2f}秒")
+            return result
+        except Exception as e:
+            execution_time = time.time() - start_time
+            logger.error(f"执行失败: {func.__name__}, 耗时: {execution_time:.2f}秒, 错误: {e}")
+            raise
+    
+    return wrapper
+

+ 33 - 0
env.example

@@ -0,0 +1,33 @@
+# Database Configuration
+DATABASE_URL=postgresql://postgres:password@localhost:5432/dualflow
+REDIS_URL=redis://localhost:6379
+
+# Message Queue
+RABBITMQ_URL=amqp://guest:guest@localhost:5672
+
+# API Configuration
+API_HOST=0.0.0.0
+API_PORT=8000
+API_DEBUG=True
+
+# Security
+SECRET_KEY=your-secret-key-here
+ALGORITHM=HS256
+ACCESS_TOKEN_EXPIRE_MINUTES=30
+
+# External Services
+OPENAI_API_KEY=your-openai-api-key
+HUGGINGFACE_API_KEY=your-huggingface-api-key
+
+# Monitoring
+PROMETHEUS_ENDPOINT=http://localhost:9090
+GRAFANA_ENDPOINT=http://localhost:3000
+
+# Logging
+LOG_LEVEL=INFO
+LOG_FORMAT=json
+
+# Development
+DEBUG=True
+RELOAD=True
+

+ 55 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/args.py

@@ -0,0 +1,55 @@
+# args.py
+import argparse
+
+def lstm_args_parser():
+    parser = argparse.ArgumentParser(description="LSTM模型训练参数")
+    
+    # 数据集划分
+    parser.add_argument('--train_start_date', type=str, default='2024-02-23', help='训练集开始日期')
+    parser.add_argument('--train_end_date', type=str, default='2025-08-13', help='训练集结束日期')
+    parser.add_argument('--val_start_date', type=str, default='2025-08-01', help='验证集开始日期')
+    parser.add_argument('--val_end_date', type=str, default='2025-08-13', help='验证集结束日期')
+    parser.add_argument('--test_start_date', type=str, default='2025-08-01', help='测试集开始日期')
+    parser.add_argument('--test_end_date', type=str, default='2025-08-13', help='测试集结束日期')
+
+    # 模型相关参数
+    parser.add_argument('--seq_len', type=int, default=60, help='输入序列的长度(输入步长)')
+    parser.add_argument('--output_size', type=int, default=5, help='输出数据的维度(预测步长)')
+    parser.add_argument('--step_size', type=int, default=5, help='输入数据间隔')
+    parser.add_argument('--resolution', type=int, default=60, help='输入数据分辨率(每多少个数据取一次)')
+    parser.add_argument('--epochs', type=int, default=1000, help='训练轮数')
+    parser.add_argument('--feature_num', type=int, default=79, help='特征维度')
+    parser.add_argument('--labels_num', type=int, default=16, help='标签维度(子模型数量)')
+    parser.add_argument('--hidden_size', type=int, default=64, help='隐藏层大小')
+    parser.add_argument('--num_layers', type=int, default=1, help='LSTM层数')
+    parser.add_argument('--dropout', type=float, default=0, help='dropout的概率')
+    parser.add_argument('--lr', type=float, default=0.01, help='学习率')
+    parser.add_argument('--batch_size', type=int, default=1024, help='批次大小')
+    
+    # 学习率调度器
+    parser.add_argument('--scheduler_step_size', type=int, default=100, help='学习率调整步长')
+    parser.add_argument('--scheduler_gamma', type=float, default=0.9, help='学习率衰减率')
+    
+    # 早停
+    parser.add_argument('--patience', type=int, default=500, help='早停耐心值')
+    parser.add_argument('--min_delta', type=float, default=1e-10, help='最小改善阈值')
+    
+    # 设备选择
+    parser.add_argument('--device', type=int, default=0, help='选择使用的GPU设备')
+
+    # 数据处理相关参数
+    parser.add_argument('--start_files', type=int, default=1, help='开始文件索引')
+    parser.add_argument('--end_files', type=int, default=51, help='结束文件索引')
+    parser.add_argument('--data_dir', type=str, default='datasets_xishan', help='数据文件夹路径')
+    parser.add_argument('--file_pattern', type=str, default='data_process_{}.csv', help='数据文件命名模式')
+    
+    # 模型保存路径
+    parser.add_argument('--model_path', type=str, default='model.pth', help='模型保存路径')
+    parser.add_argument('--output_csv_path', type=str, default='predictions.csv', help='预测文件保存路径')
+    
+    # 随机种子
+    parser.add_argument('--random_seed', type=int, default=1314, help='随机种子')
+
+    args = parser.parse_args()
+    
+    return args

+ 85 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/data_export.py

@@ -0,0 +1,85 @@
+from sqlalchemy import create_engine
+import pandas as pd
+import os
+
+username = os.getenv('DB_USERNAME', 'whu')
+password = os.getenv('DB_PASSWORD', '09093f4e6b33ddd')
+host = os.getenv('DB_HOST', '222.130.26.206')
+database = os.getenv('DB_DATABASE', 'ws_data')
+port = int(os.getenv('DB_PORT', '4000'))
+database_url = f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}?charset=utf8mb4'
+engine = create_engine(database_url)
+
+start_time = "2025-08-01 00:01:00"
+end_time = "2025-09-10 00:00:00"
+
+query = """
+SELECT * 
+FROM dc_item_history_data_92
+WHERE item_name in (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    AND h_time >= %s
+    AND h_time <= %s
+"""
+
+params=(
+        "C.M.RO1_FT_JS@out",     # RO1反渗透进水流量
+        "C.M.RO2_FT_JS@out",     # RO2反渗透进水流量
+        "C.M.RO3_FT_JS@out",     # RO3反渗透进水流量
+        "C.M.RO4_FT_JS@out",     # RO4反渗透进水流量
+        "C.M.RO_TT_ZJS@out",     # 反渗透总进水温度
+        'C.M.RO_Cond_ZJS@out',   # 反渗透总进水电导
+        'C.M.RO_Cond_ZCS@out',   # 反渗透总产水电导
+        'C.M.RO1_DB@DPT_1',      # RO1一段压差
+        'C.M.RO1_DB@DPT_2',      # RO1二段压差
+        'C.M.RO2_DB@DPT_1',      # RO2一段压差
+        'C.M.RO2_DB@DPT_2',      # RO2二段压差
+        'C.M.RO3_DB@DPT_1',      # RO3一段压差
+        'C.M.RO3_DB@DPT_2',      # RO3二段压差
+        'C.M.RO4_DB@DPT_1',      # RO4一段压差
+        'C.M.RO4_DB@DPT_2',      # RO4二段压差
+
+        start_time,
+        end_time)
+
+data_origin = pd.read_sql(query, engine, params=params)
+
+engine.dispose()
+data = data_origin.pivot_table(index='h_time', columns='item_name', values='val')
+data = data.reset_index()
+
+# 定义标准时间序列(每1秒一个时间戳)
+standard_times = pd.date_range(start=start_time, end=end_time, freq='s')
+
+# 对齐到标准时间序列
+data_aligned = data.set_index('h_time').reindex(standard_times).reset_index()
+
+# 缺失值向前填充
+data_filled = data_aligned.ffill()
+
+# 每60个数据取一次
+data_sampled = data_filled.iloc[::60, :]
+            
+# 数据导出
+current_directory = os.getcwd()
+parent_directory = os.path.dirname(current_directory)
+folder_name = 'datasets_xishan'
+folder_path = os.path.join(parent_directory, folder_name)
+# 确保文件夹存在
+if not os.path.exists(folder_path):
+    os.makedirs(folder_path)
+
+# 多文件并存
+chunk_size = 500000 
+
+file_count = 8  # 初始化文件计数器
+for start in range(0, len(data_sampled), chunk_size):
+    end = min(start + chunk_size, len(data_sampled))
+    chunk = data_sampled.iloc[start:end]
+    
+    # 生成唯一的文件名
+    file_name = f'data_{file_count+1}.csv'
+    file_path = os.path.join(folder_path, file_name)
+    chunk.to_csv(file_path, index=False, encoding='utf_8_sig')
+    
+    file_count += 1 
+

+ 276 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/data_preprocessor.py

@@ -0,0 +1,276 @@
+# data_preprocessor.py
+import os
+import torch
+import joblib
+import numpy as np
+import pandas as pd
+from tqdm import tqdm    # 进度条显示
+from sklearn.preprocessing import MinMaxScaler    # 数据归一化工具
+from torch.utils.data import DataLoader, TensorDataset    # PyTorch数据加载工具
+from concurrent.futures import ThreadPoolExecutor    # 多线程读取文件
+
+class DataPreprocessor:
+    """数据预处理类,负责数据加载、划分、转换为模型可输入的格式"""
+    
+    @staticmethod
+    def load_and_process_data(args, data):
+        
+        """
+        加载并处理数据,划分训练/验证/测试集,创建数据加载器
+        参数:
+            args: 配置参数(包含数据集划分日期、序列长度等)
+            data: 预处理后的完整数据(含日期列)
+        返回:
+            train_loader: 训练集数据加载器
+            val_loader: 验证集数据加载器
+            test_loader: 测试集数据加载器
+            data: 原始数据(用于后续处理)
+        """
+        
+        # 处理日期列
+        data['date'] = pd.to_datetime(data['date'])
+        time_interval = pd.Timedelta(minutes=(4 * args.resolution / 60))
+        window_time_span = time_interval * (args.seq_len + 1)
+
+        # 划分训练/验证/测试集(调整起始日期以适应滑动窗口)
+        val_start_date = pd.to_datetime(args.val_start_date)
+        test_start_date = pd.to_datetime(args.test_start_date)
+        
+        # 调整验证集/测试集起始时间(向前推一个窗口,确保有足够历史数据构建输入序列)
+        adjusted_val_start = val_start_date - window_time_span
+        adjusted_test_start = test_start_date - window_time_span
+        
+        # 构建数据集掩码(按日期筛选)
+        train_mask = (data['date'] >= pd.to_datetime(args.train_start_date)) & \
+                     (data['date'] <= pd.to_datetime(args.train_end_date))
+
+        val_mask = (data['date'] >= adjusted_val_start) & \
+                   (data['date'] <= pd.to_datetime(args.val_end_date))
+
+        test_mask = (data['date'] >= adjusted_test_start) & \
+                    (data['date'] <= pd.to_datetime(args.test_end_date))
+
+        # 筛选数据并重置索引
+        train_data = data[train_mask].reset_index(drop=True)
+        val_data = data[val_mask].reset_index(drop=True)
+        test_data = data[test_mask].reset_index(drop=True)
+        
+        # 移除日期列用于建模
+        train_data = train_data.drop(columns=['date'])
+        val_data = val_data.drop(columns=['date'])
+        test_data = test_data.drop(columns=['date'])
+    
+        # 创建监督学习数据集(输入序列+目标序列)
+        train_supervised = DataPreprocessor.create_supervised_dataset(
+            args,
+            train_data,
+            1
+        )
+        
+        val_supervised = DataPreprocessor.create_supervised_dataset(
+            args,
+            val_data,
+            1
+        )
+        
+        test_supervised = DataPreprocessor.create_supervised_dataset(
+            args,
+            test_data,
+            args.step_size
+        )
+        
+        # 转换为DataLoader
+        train_loader = DataPreprocessor.load_data(
+            args, 
+            train_supervised,
+            shuffle=True
+        )
+        
+        val_loader = DataPreprocessor.load_data(
+            args, 
+            val_supervised,
+            shuffle=False
+        )
+        
+        test_loader = DataPreprocessor.load_data(
+            args, 
+            test_supervised,
+            shuffle=False
+        )
+        
+        return train_loader, val_loader, test_loader, data  # 返回原始数据用于后续处理
+    
+    @staticmethod
+    def read_and_combine_csv_files(args):
+        """
+        多线程读取并合并多个CSV文件,进行下采样、日期处理和归一化
+        参数:
+            args: 配置参数(包含数据路径、文件范围等)
+        返回:
+            chunk: 预处理后的合并数据(含日期和归一化特征)
+        """
+        current_dir = os.path.dirname(__file__)
+        parent_dir = os.path.dirname(current_dir)
+        args.data_dir = os.path.join(parent_dir, args.data_dir)
+        
+        def read_file(file_count):
+            """读取单个CSV文件的函数(供多线程调用)"""
+            file_name = args.file_pattern.format(file_count)
+            file_path = os.path.join(args.data_dir, file_name)
+            return pd.read_csv(file_path)
+        
+        # 生成待读取的文件索引列表
+        file_indices = list(range(args.start_files, args.end_files + 1))
+        
+        # 多线程读取文件(加速大文件读取)
+        max_workers = os.cpu_count()
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            results = list(tqdm(executor.map(read_file, file_indices),
+                                total=len(file_indices),
+                                desc="正在读取文件"))
+        
+        all_data = pd.concat(results, ignore_index=True)
+        # 按分辨率下采样
+        chunk = all_data.iloc[::args.resolution, :].reset_index(drop=True)
+        
+        # 处理日期和时间特征
+        chunk = DataPreprocessor.process_date(chunk)
+        # 归一化
+        chunk = DataPreprocessor.scaler_data(chunk)
+        
+        return chunk
+    
+    @staticmethod
+    def process_date(data):
+        """
+        处理日期列,生成周期性时间特征(与Predictor中的方法一致,保证一致性)
+        参数:
+            data: 含'index'列(原始日期)的DataFrame
+        返回:
+            data: 处理后的DataFrame(含日期列和时间特征)
+        """
+        data = data.rename(columns={'index': 'date'})
+        data['date'] = pd.to_datetime(data['date'])
+
+        # 生成周期性时间特征
+        data['minute_of_day'] = data['date'].dt.hour * 60 + data['date'].dt.minute
+        data['day_of_year'] = data['date'].dt.dayofyear
+
+        # 周期性编码(正弦/余弦转换,确保时间连续性)
+        data['minute_sin'] = np.sin(2 * np.pi * data['minute_of_day'] / 1440)
+        data['minute_cos'] = np.cos(2 * np.pi * data['minute_of_day'] / 1440)
+        data['day_year_sin'] = np.sin(2 * np.pi * data['day_of_year'] / 366)
+        data['day_year_cos'] = np.cos(2 * np.pi * data['day_of_year'] / 366)
+
+        # 移除原始时间列,保留特征列
+        data.drop(columns=['minute_of_day', 'day_of_year'], inplace=True)
+
+        # 调整列顺序(日期+时间特征+其他特征)
+        time_features = ['minute_sin', 'minute_cos', 'day_year_sin', 'day_year_cos']
+        other_columns = [col for col in data.columns if col not in ['date'] and col not in time_features]
+        data = data[['date'] + time_features + other_columns]
+
+        return data
+    
+    @staticmethod
+    def scaler_data(data):
+        """
+        对数据进行归一化(0-1缩放),并保存归一化器(供预测时反归一化)
+        参数:
+            data: 含'date'列和特征列的DataFrame
+        返回:
+            scaled_data: 归一化后的DataFrame(含日期列)
+        """
+        date_col = data[['date']]
+        data_to_scale = data.drop(columns=['date'])
+
+        scaler = MinMaxScaler(feature_range=(0, 1))
+        scaled_data = scaler.fit_transform(data_to_scale)
+        joblib.dump(scaler, 'scaler.pkl')  # 保存归一化器
+
+        # 转换为DataFrame并拼接日期列
+        scaled_data = pd.DataFrame(scaled_data, columns=data_to_scale.columns)
+        scaled_data = pd.concat([date_col.reset_index(drop=True), scaled_data], axis=1)
+        
+        return scaled_data
+    
+    @staticmethod
+    def create_supervised_dataset(args, data, step_size):
+        """
+        创建监督学习数据集(输入序列+目标序列)
+        输入序列:历史seq_len个时间步的所有特征
+        目标序列:未来output_size个时间步的标签特征(最后labels_num列)
+        参数:
+            args: 配置参数(含seq_len、output_size等)
+            data: 输入数据(不含日期列的特征数据)
+            step_size: 采样步长(每隔step_size取一个样本)
+        返回:
+            dataset: 监督学习数据集(DataFrame)
+        """
+        data = pd.DataFrame(data)
+        cols = []
+        col_names = []
+        
+        feature_columns = data.columns.tolist()
+
+        # 输入序列(t-0到t-(seq_len-1))
+        for col in feature_columns:
+            for i in range(args.seq_len - 1, -1, -1):
+                cols.append(data[[col]].shift(i))
+                col_names.append(f"{col}(t-{i})")
+        
+        # 目标序列(仅取最后labels_num列作为预测目标)
+        target_columns = feature_columns[-args.labels_num:]
+        for i in range(1, args.output_size + 1):
+            for col in target_columns:
+                cols.append(data[[col]].shift(-i))
+                col_names.append(f"{col}(t+{i})")
+
+        # 合并并清洗数据
+        dataset = pd.concat(cols, axis=1)
+        dataset.columns = col_names
+        dataset = dataset.iloc[::step_size, :]  # 按步长采样
+        dataset.dropna(inplace=True)  # 移除含缺失值的行
+        
+        return dataset
+
+    @staticmethod
+    def load_data(args, dataset, shuffle):
+        """
+        将监督学习数据集转换为PyTorch张量,并创建DataLoader
+        参数:
+            args: 配置参数(含特征数、批大小等)
+            dataset: 监督学习数据集(DataFrame)
+            shuffle: 是否打乱数据(训练集True,验证/测试集False)
+        返回:
+            data_loader: PyTorch DataLoader
+        """
+        input_length = args.seq_len
+        n_features = args.feature_num
+        labels_num = args.labels_num
+    
+        n_features_total = n_features * input_length  # 输入特征总维度
+        n_labels_total = args.output_size * labels_num  # 目标总维度
+
+        # 分割输入和目标
+        X = dataset.values[:, :n_features_total]
+        y = dataset.values[:, n_features_total:n_features_total + n_labels_total]
+    
+        # 重塑输入为[样本数, 序列长度, 特征数]
+        X = X.reshape(X.shape[0], input_length, n_features)
+        X = torch.tensor(X, dtype=torch.float32).to(args.device)
+        y = torch.tensor(y, dtype=torch.float32).to(args.device)
+
+        # 创建数据集和数据加载器
+        dataset_tensor = TensorDataset(X, y)
+        generator = torch.Generator()
+        generator.manual_seed(args.random_seed)  # 固定随机种子确保可复现
+        
+        data_loader = DataLoader(
+            dataset_tensor, 
+            batch_size=args.batch_size, 
+            shuffle=shuffle,
+            generator=generator
+        )
+    
+        return data_loader

+ 256 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/data_trainer.py

@@ -0,0 +1,256 @@
+# data_trainer.py
+import torch
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.metrics import r2_score
+from datetime import datetime, timedelta
+from sklearn.preprocessing import MinMaxScaler
+
+class Trainer:
+    def __init__(self, model, args, data):
+        """
+        模型训练器类,负责模型训练、验证、保存和评估
+        参数:
+            model: 待训练的模型实例
+            args: 配置参数
+            data: 原始数据(用于生成评估的时间戳)
+        """
+        self.args = args
+        self.model = model
+        self.data = data
+        
+        # 早停相关参数
+        self.patience = args.patience
+        self.min_delta = args.min_delta
+        self.counter = 0
+        self.early_stop = False
+        self.best_val_loss = float('inf')
+        self.best_model_state = None
+        self.best_epoch = 0
+
+    def train_full_model(self, train_loader, val_loader, optimizer, criterion, scheduler):
+        """
+        联合训练所有16个子模型(端到端训练)
+        参数:
+            train_loader: 训练集数据加载器
+            val_loader: 验证集数据加载器
+            optimizer: 优化器(如Adam)
+            criterion: 损失函数(如MSE)
+            scheduler: 学习率调度器
+        返回:
+            训练好的模型(加载最佳权重)
+        """
+        self.counter = 0
+        self.best_val_loss = float('inf')
+        self.early_stop = False
+        self.best_model_state = None
+        self.best_epoch = 0
+        max_epochs = self.args.epochs
+
+        for epoch in range(max_epochs):
+            self.model.train()
+            running_loss = 0.0
+            
+            for inputs, targets in train_loader:
+                inputs = inputs.to(self.args.device)
+                targets = targets.to(self.args.device)  # 整体目标值(包含所有16个因变量)
+                
+                optimizer.zero_grad()
+                outputs = self.model(inputs)  # 整体模型输出
+                
+                loss = criterion(outputs, targets)  # 计算整体损失
+                loss.backward()
+                optimizer.step()
+                running_loss += loss.item()
+            
+            train_loss = running_loss / len(train_loader)
+            val_loss = self.validate_full(val_loader, criterion) if val_loader else 0.0
+
+            print(f'Epoch {epoch+1}/{max_epochs}, '
+                  f'Train Loss: {train_loss:.6f}, '
+                  f'Val Loss: {val_loss:.6f}, '
+                  f'LR: {optimizer.param_groups[0]["lr"]:.6f}')
+
+            # 早停逻辑(基于整体验证损失)
+            if val_loader:
+                improved = val_loss < (self.best_val_loss - self.min_delta)
+                if improved:
+                    self.best_val_loss = val_loss
+                    self.counter = 0
+                    self.best_model_state = self.model.state_dict()
+                    self.best_epoch = epoch
+                else:
+                    self.counter += 1
+                    if self.counter >= self.patience:
+                        self.early_stop = True
+                        print(f"早停触发")
+                        
+            scheduler.step()
+            torch.cuda.empty_cache()
+            if self.early_stop:
+                break
+
+        # 加载最佳状态
+        if self.best_model_state is not None:
+            self.model.load_state_dict(self.best_model_state)
+        print(f"最佳迭代: {self.best_epoch+1}, 最佳验证损失: {self.best_val_loss:.6f}")
+        return self.model
+
+    def validate_full(self, val_loader, criterion):
+        """
+        验证整个模型(计算验证集损失)
+        参数:
+            val_loader: 验证集数据加载器
+            criterion: 损失函数
+        返回:
+            平均验证损失
+        """
+        self.model.eval()
+        total_loss = 0.0
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                inputs = inputs.to(self.args.device)
+                targets = targets.to(self.args.device)  # 整体目标值
+                
+                outputs = self.model(inputs)  # 整体模型输出
+                loss = criterion(outputs, targets)  # 整体损失计算
+                total_loss += loss.item()
+        return total_loss / len(val_loader)
+
+    def save_model(self):
+        """保存模型最佳权重到指定路径"""
+        torch.save(self.model.state_dict(), self.args.model_path)
+        print(f"模型已保存到:{self.args.model_path}")
+            
+    def evaluate_model(self, test_loader, criterion):
+        """
+        评估模型在测试集上的性能,计算R方、RMSE、MAPE等指标,并保存结果
+        参数:
+            test_loader: 测试集数据加载器
+            criterion: 损失函数(用于计算测试损失)
+        返回:
+            各指标的字典(R方、RMSE、MAPE)
+        """
+        self.model.eval()
+        scaler_path = 'scaler.pkl'
+        scaler = joblib.load(scaler_path)
+        predictions = []
+        true_values = []
+        device = self.args.device
+        
+        with torch.no_grad():
+            for inputs, targets in test_loader:
+                inputs = inputs.to(device)
+                targets = targets.to(device)
+                outputs = self.model(inputs)
+                predictions.append(outputs.cpu().numpy())
+                true_values.append(targets.cpu().numpy())
+    
+        predictions = np.concatenate(predictions, axis=0)
+        true_values = np.concatenate(true_values, axis=0)
+    
+        # 重塑预测值和真实值形状以匹配反归一化要求
+        reshaped_predictions = predictions.reshape(predictions.shape[0], 
+                                                   self.args.output_size, 
+                                                   self.args.labels_num)
+        predictions = reshaped_predictions.reshape(-1, self.args.labels_num)
+        
+        reshaped_true_values = true_values.reshape(true_values.shape[0], 
+                                                   self.args.output_size, 
+                                                   self.args.labels_num)
+        true_values = reshaped_true_values.reshape(-1, self.args.labels_num)
+    
+        # 反归一化(仅对标签列)
+        column_scaler = MinMaxScaler(feature_range=(0, 1))
+        column_scaler.min_ = scaler.min_[-self.args.labels_num:] 
+        column_scaler.scale_ = scaler.scale_[-self.args.labels_num:] 
+        
+        true_values = column_scaler.inverse_transform(true_values)
+        predictions = column_scaler.inverse_transform(predictions)
+    
+        # 定义列名(16个因变量)
+        column_names = [
+            'C.M.UF1_DB@press_PV', 'C.M.UF2_DB@press_PV', 'C.M.UF3_DB@press_PV', 'C.M.UF4_DB@press_PV',
+            'C.M.RO1_DB@DPT_1', 'C.M.RO2_DB@DPT_1', 'C.M.RO3_DB@DPT_1', 'C.M.RO4_DB@DPT_1',
+            'C.M.RO1_DB@DPT_2', 'C.M.RO2_DB@DPT_2', 'C.M.RO3_DB@DPT_2', 'C.M.RO4_DB@DPT_2',
+            'RO1_CSFlow', 'RO2_CSFlow', 'RO3_CSFlow', 'RO4_CSFlow'
+        ]
+    
+        # 生成时间序列
+        start_datetime = datetime.strptime(self.args.test_start_date, "%Y-%m-%d")
+        time_interval = timedelta(minutes=(4 * self.args.resolution / 60))
+        total_points = len(predictions)
+        date_times = [start_datetime + i * time_interval for i in range(total_points)]
+        
+        # 保存结果到DataFrame
+        results = pd.DataFrame({'date': date_times})
+
+        # 计算评估指标
+        r2_scores = {}
+        rmse_scores = {}
+        mape_scores = {}
+        metrics_details = []
+        
+        for i, col_name in enumerate(column_names):
+            results[f'{col_name}_True'] = true_values[:, i]
+            results[f'{col_name}_Predicted'] = predictions[:, i]
+
+            var_true = true_values[:, i]
+            var_pred = predictions[:, i]
+
+            # 过滤零值(避免除零错误)
+            non_zero_mask = var_true != 0
+            var_true_nonzero = var_true[non_zero_mask]
+            var_pred_nonzero = var_pred[non_zero_mask]
+
+            r2 = float('nan')
+            rmse = float('nan')
+            mape = float('nan')
+            
+            if len(var_true_nonzero) > 0:
+                r2 = r2_score(var_true_nonzero, var_pred_nonzero)
+                rmse = np.sqrt(np.mean((var_true_nonzero - var_pred_nonzero) ** 2))
+                mape = np.mean(np.abs((var_true_nonzero - var_pred_nonzero) / np.abs(var_true_nonzero))) * 100
+                
+                r2_scores[col_name] = r2
+                rmse_scores[col_name] = rmse
+                mape_scores[col_name] = mape
+                
+                detail = f"{col_name}:\n  R方 = {r2:.6f}\n  RMSE = {rmse:.6f}\n  MAPE = {mape:.6f}%"
+                metrics_details.append(detail)
+                print(f"{col_name} R方: {r2:.6f}")
+            else:
+                metrics_details.append(f"{col_name}: 没有有效数据用于计算指标")
+                print(f"{col_name} 没有有效数据用于计算R方")
+
+        # 计算平均指标
+        valid_r2 = [score for score in r2_scores.values() if not np.isnan(score)]
+        valid_rmse = [score for score in rmse_scores.values() if not np.isnan(score)]
+        valid_mape = [score for score in mape_scores.values() if not np.isnan(score)]
+        
+        avg_r2 = np.mean(valid_r2) if valid_r2 else float('nan')
+        avg_rmse = np.mean(valid_rmse) if valid_rmse else float('nan')
+        avg_mape = np.mean(valid_mape) if valid_mape else float('nan')
+
+        avg_detail = f"\n平均指标:\n  R方 = {avg_r2:.6f}\n  RMSE = {avg_rmse:.6f}\n  MAPE = {avg_mape:.6f}%"
+        if np.isnan(avg_r2):
+            avg_detail = "\n平均指标: 没有有效的指标可用于计算平均值"
+        
+        metrics_details.append(avg_detail)
+        print(avg_detail)
+
+        # 保存结果
+        results.to_csv(self.args.output_csv_path, index=False)
+        print(f"预测结果已保存到:{self.args.output_csv_path}")
+
+        txt_path = self.args.output_csv_path.replace('.csv', '_metrics_results.txt')
+        with open(txt_path, 'w') as f:
+            f.write("各变量预测指标结果:\n")
+            f.write("===================\n\n")
+            for detail in metrics_details:
+                f.write(detail + '\n')
+        
+        print(f"预测指标结果已保存到:{txt_path}")
+        
+        return r2_scores, rmse_scores, mape_scores

BIN
models/pressure_prediction_model/20分钟TMP预测模型源码/edge_index.pt


+ 99 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/gat_lstm.py

@@ -0,0 +1,99 @@
+# gat_lstm.py
+import torch
+import torch.nn as nn    # PyTorch神经网络模块
+
+# 单个独立模型(对应1个因变量)
+class SingleGATLSTM(nn.Module):
+    def __init__(self, args):
+        """
+        单个子模型:包含GAT-LSTM层和输出层,用于预测1个目标指标
+        参数:
+            args: 配置参数(含特征数、隐藏层大小等)
+        """
+        super(SingleGATLSTM, self).__init__()
+        self.args = args
+        
+        # 独立的LSTM层
+        self.lstm = nn.LSTM(
+            input_size=args.feature_num,
+            hidden_size=args.hidden_size,
+            num_layers=args.num_layers,
+            batch_first=True
+        )
+        
+        # 独立的输出层
+        self.final_linear = nn.Sequential(
+            nn.Linear(args.hidden_size, args.hidden_size),
+            nn.LeakyReLU(0.01),
+            nn.Dropout(args.dropout * 0.4),
+            nn.Linear(args.hidden_size, args.output_size)
+        )
+        
+        self._init_weights()
+        
+    def _init_weights(self):
+        """初始化网络权重,加速模型收敛"""
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # 初始化LSTM权重
+        for name, param in self.lstm.named_parameters():
+            if 'weight_ih' in name:
+                nn.init.xavier_uniform_(param.data)
+            elif 'weight_hh' in name:
+                nn.init.orthogonal_(param.data)
+            elif 'bias' in name:
+                param.data.fill_(0)
+                n = param.size(0)
+                start, end = n // 4, n // 2
+                param.data[start:end].fill_(1)
+        
+    def forward(self, x):
+        """
+        前向传播:输入序列经过LSTM和输出层,得到预测结果
+        参数:
+            x: 输入序列,形状为[batch_size, seq_len, feature_num]
+        返回:
+            output: 预测结果,形状为[batch_size, output_size]
+        """
+        batch_size, seq_len, feature_num = x.size()
+        lstm_out, _ = self.lstm(x)
+        # 取最后一个时间步的输出
+        last_out = lstm_out[:, -1, :]
+        
+        # 输出层预测
+        output = self.final_linear(last_out)
+        return output  # [batch_size, output_size]
+
+
+# 16个独立模型的容器(总模型)
+class GAT_LSTM(nn.Module):
+    def __init__(self, args):
+        """
+        总模型:包含多个SingleGATLSTM子模型,分别预测不同的目标
+        参数:
+            args: 配置参数(含labels_num,即子模型数量)
+        """
+        super(GAT_LSTM, self).__init__()
+        self.args = args
+        # 创建16个独立模型(数量由labels_num指定)
+        self.models = nn.ModuleList([SingleGATLSTM(args) for _ in range(args.labels_num)])
+    
+    def forward(self, x):
+        """
+        前向传播:所有子模型并行处理输入,拼接预测结果
+        参数:
+            x: 输入序列,形状为[batch_size, seq_len, feature_num]
+        返回:
+            拼接后的预测结果,形状为[batch_size, output_size * labels_num]
+        """
+        outputs = []
+        for model in self.models:
+            outputs.append(model(x))  # 每个输出为[batch, output_size]
+        return torch.cat(outputs, dim=1)  # 拼接后[batch, output_size * labels_num]

+ 70 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/main.py

@@ -0,0 +1,70 @@
+# main.py
+import os
+import torch
+import numpy as np
+import random
+from gat_lstm import GAT_LSTM
+from data_trainer import Trainer
+from args import lstm_args_parser
+from torch.nn import MSELoss
+from data_preprocessor import DataPreprocessor
+
+def set_seed(seed):
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+def main():
+    args = lstm_args_parser()
+    set_seed(args.random_seed)
+    
+    device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
+    args.device = device  # 将device存入args,方便后续使用
+
+    # 数据预处理
+    data = DataPreprocessor.read_and_combine_csv_files(args)
+    train_loader, val_loader, test_loader, _ = DataPreprocessor.load_and_process_data(args, data)
+    
+    # 初始化包含16个子模型的整体模型
+    model = GAT_LSTM(args).to(device)
+
+    # 初始化训练器和MSE损失函数
+    trainer = Trainer(model, args, data)
+    criterion = MSELoss()
+
+    # 优化器:优化所有子模型的参数(联合训练)
+    optimizer = torch.optim.Adam(
+        model.parameters(),  # 整体模型参数
+        lr=args.lr
+    )
+    scheduler = torch.optim.lr_scheduler.StepLR(
+        optimizer,
+        step_size=args.scheduler_step_size,
+        gamma=args.scheduler_gamma
+    )
+
+    # 整体训练大模型(包含所有16个子模型)
+    print("=== 开始训练包含16个子模型的整体模型 ===")
+    trainer.train_full_model(
+        train_loader,
+        val_loader,
+        optimizer,
+        criterion,
+        scheduler
+    )
+
+    # 保存包含所有16个子模型参数的整体模型
+    trainer.save_model()
+    print("\n=== 模型训练完成,已保存整体模型 ===")
+
+    # 评估模型
+    trainer.evaluate_model(test_loader, MSELoss())
+
+if __name__ == "__main__":
+    main()

BIN
models/pressure_prediction_model/20分钟TMP预测模型源码/model.pth


+ 314 - 0
models/pressure_prediction_model/20分钟TMP预测模型源码/predict.py

@@ -0,0 +1,314 @@
+import os
+import torch
+import pandas as pd
+import numpy as np
+import joblib
+import pywt
+from datetime import datetime, timedelta
+from torch.utils.data import DataLoader, TensorDataset
+from gat_lstm import GAT_LSTM    # 导入自定义的GAT-LSTM模型
+from tqdm import tqdm
+
+def set_seed(seed):
+    """设置随机种子,保证实验可重复性"""
+    import random
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+class Predictor:
+    """预测器类,用于加载数据、模型并执行预测流程"""
+    def __init__(self):
+        self.seq_len = 10    # 输入序列长度(历史时间步)
+        self.output_size = 5    # 预测步长(未来预测的时间步数)
+        self.labels_num = 16    # 预测目标数量(16个待预测的指标)
+        self.feature_num = 79   # 输入特征总维度
+        self.step_size = 5      # 数据采样步长(每隔step_size取一个样本)
+        self.dropout = 0        # dropout概率(防止过拟合)
+        self.lr = 0.01          # 学习率(训练时使用,预测时仅作参数记录)
+        self.num_heads = 8      # 注意力头数(模型结构参数)
+        self.hidden_size = 32   # 隐藏层维度
+        self.batch_size = 512   # 批处理大小
+        self.num_layers = 1     # LSTM层数
+        self.resolution = 60    # 数据分辨率(原始数据每隔60条取一条,下采样)
+        self.test_start_date = '2025-07-01'  # 测试集起始日期(初始值,会动态更新)
+        self.wavelet = 'db4'    # 小波变换类型(预留,未实际使用)
+        self.level = 3          # 小波分解层数(预留)
+        self.level_after = 4    # 后续小波处理层数(预留)
+        self.mode = 'soft'      # 小波阈值模式(预留)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 计算设备(GPU优先)
+        self.model_path = 'model.pth'   # 模型权重保存路径
+        self.output_csv_path = 'predictions.csv'   # 预测结果保存路径
+        self.random_seed = 1314    # 随机种子
+        self.uf_threshold = 0.001   # UF指标阈值(预留)
+        self.ro_threshold = 0.01    # RO指标阈值(预留)
+        self.flow_threshold = 1.0   # 流量阈值(预留)
+
+        set_seed(self.random_seed)    # 初始化随机种子
+        self.scaler = joblib.load('scaler.pkl')    # 加载数据归一化器(训练时保存)
+        self.model = None         # 模型实例(后续加载)
+        self.edge_index = None    # 图结构边索引(图模型用)
+        self.test_loader = None   # 测试数据加载器(后续创建)
+        
+    def reorder_columns(self, df):
+        """
+        调整数据列顺序,确保与训练时的特征顺序一致
+        避免因列顺序不一致导致模型输入特征错位
+        """
+        desired_order = [
+            'index',
+            'C.M.FT_ZGJJY1@out','C.M.RO1_FT_JS@out','C.M.RO2_FT_JS@out','C.M.RO3_FT_JS@out',
+            'C.M.RO4_FT_JS@out','C.M.UF1_FT_JS@out','C.M.UF2_FT_JS@out','C.M.UF3_FT_JS@out',
+            'C.M.UF4_FT_JS@out','C.M.UF_FT_ZCS@out','C.M.FT_ZGJJY2@out','C.M.FT_ZGJJY3@out',
+            'C.M.FT_ZGJJY4@out','C.M.RO1_PT_JS@out','C.M.RO2_PT_JS@out','C.M.RO3_PT_JS@out',
+            'C.M.UF1_PT_JS@out','C.M.UF2_PT_JS@out','C.M.UF3_PT_JS@out','C.M.UF4_PT_JS@out',
+            'C.M.LT_JSC@out','C.M.RO1_PT_CS@out','C.M.RO1_PT_DJ2@out','C.M.RO2_PT_CS@out',
+            'C.M.RO2_PT_DJ2@out','C.M.RO3_PT_CS@out','C.M.RO3_PT_DJ2@out','C.M.RO4_PT_CS@out',
+            'C.M.RO4_PT_DJ2@out','C.M.RO4_PT_JS@out','C.M.LT_HCl@out','C.M.LT_NaClO@out',
+            'C.M.LT_PAC@out','C.M.LT_QSC@out','C.M.RO_Cond_ZCS@out','C.M.RO_TT_ZJS@out',
+            'C.M.UF1_JSF_kd@out','C.M.UF2_JSF_kd@out','C.M.UF_GSB4_fre@out','C.M.UF_ORP_ZCS@out',
+            'C.M.JYB2_ZGJ1_fre@out','C.M.JYB2_ZGJ2_fre@out','C.M.JYB2_ZGJ3_fre@out','C.M.JYB2_ZGJ4_fre@out',
+            'C.M.RO1_GYB_fre@out','C.M.RO2_GYB_fre@out','C.M.RO3_GYB_fre@out','C.M.RO4_GYB_fre@out',
+            'C.M.UF3_JSF_kd@out','C.M.UF4_JSF_kd@out','C.M.UF_FXB2_fre@out','C.M.RO1_DJB_fre@out',
+            'C.M.RO1_GYBF_kd@out','C.M.RO2_DJB_fre@out','C.M.RO2_GYBF_kd@out','C.M.RO3_DJB_fre@out',
+            'C.M.RO3_GYBF_kd@out','C.M.RO4_DJB_fre@out','C.M.RO4_GYBF_kd@out',
+            'C.M.UF1_DB@press_PV','C.M.UF2_DB@press_PV','C.M.UF3_DB@press_PV','C.M.UF4_DB@press_PV',
+            'C.M.RO1_DB@DPT_1','C.M.RO2_DB@DPT_1','C.M.RO3_DB@DPT_1','C.M.RO4_DB@DPT_1',
+            'C.M.RO1_DB@DPT_2','C.M.RO2_DB@DPT_2','C.M.RO3_DB@DPT_2','C.M.RO4_DB@DPT_2',
+            'RO1_CSFlow','RO2_CSFlow','RO3_CSFlow','RO4_CSFlow'
+        ]
+        return df.loc[:, desired_order]
+
+    def process_date(self, data):
+        """
+        处理日期列,生成周期性时间特征(捕捉时间周期性模式)
+        包括:分钟级正弦/余弦特征(每日周期)、年中日正弦/余弦特征(年度周期)
+        """
+        if 'index' in data.columns:
+            data = data.rename(columns={'index': 'date'})
+        data['date'] = pd.to_datetime(data['date'])
+        data['minute_of_day'] = data['date'].dt.hour * 60 + data['date'].dt.minute
+        data['day_of_year'] = data['date'].dt.dayofyear
+        
+        # 周期性编码(将时间转换为正弦/余弦值,确保周期性连续)
+        data['minute_sin'] = np.sin(2 * np.pi * data['minute_of_day'] / 1440)  # 分钟正弦特征
+        data['minute_cos'] = np.cos(2 * np.pi * data['minute_of_day'] / 1440)  # 分钟余弦特征
+        data['day_year_sin'] = np.sin(2 * np.pi * data['day_of_year'] / 366)   # 年中日正弦特征
+        data['day_year_cos'] = np.cos(2 * np.pi * data['day_of_year'] / 366)   # 年中日余弦特征
+        # 移除原始时间列(仅保留编码后的特征)
+        data.drop(columns=['minute_of_day', 'day_of_year'], inplace=True)
+        
+        # 调整列顺序:日期 + 时间特征 + 其他特征
+        time_features = ['minute_sin', 'minute_cos', 'day_year_sin', 'day_year_cos']
+        other_columns = [col for col in data.columns if col not in ['date'] + time_features]
+        return data[['date'] + time_features + other_columns]
+
+    def scaler_data(self, data):
+        """
+        对数据进行归一化(使用训练时保存的scaler)
+        保持与训练数据的归一化方式一致(0-1缩放)
+        """
+        date_col = data[['date']]
+        data_to_scale = data.drop(columns=['date'])
+        scaled = self.scaler.transform(data_to_scale)
+        scaled_df = pd.DataFrame(scaled, columns=data_to_scale.columns)
+        # 拼接日期列和归一化后的特征列
+        return pd.concat([date_col.reset_index(drop=True), scaled_df], axis=1)
+    
+    def remove_outliers(self, predictions):
+        """
+        用四分位法处理预测结果中的异常值
+        异常值定义:小于Q1-1.5*IQR或大于Q3+1.5*IQR的值
+        异常值替换为正常值的平均值(避免极端值影响)
+        """
+        cleaned = predictions.copy()
+        # 遍历每个特征列(16个标签)
+        for col in range(cleaned.shape[1]):
+            values = cleaned[:, col]
+            # 计算四分位数
+            q1 = np.percentile(values, 25)
+            q3 = np.percentile(values, 75)
+            iqr = q3 - q1
+            # 异常值边界
+            lower_bound = q1 - 1.5 * iqr
+            upper_bound = q3 + 1.5 * iqr
+            # 筛选正常值
+            normal_values = values[(values >= lower_bound) & (values <= upper_bound)]
+            # 用正常值的平均值替换异常值
+            if len(normal_values) > 0:
+                mean_normal = np.mean(normal_values)
+                cleaned[(values < lower_bound) | (values > upper_bound), col] = mean_normal
+        return cleaned
+    
+    def smooth_predictions(self, predictions):
+        """
+        对预测结果进行加权平滑处理,减少预测波动
+        采用滑动窗口加权平均:中间值权重为2,前后邻居权重为1(边缘值特殊处理)
+        """
+        smoothed = predictions.copy()
+        n_timesteps = predictions.shape[0]
+        if n_timesteps <= 1:
+            return smoothed
+        
+        # 遍历每个特征列
+        for col in range(predictions.shape[1]):
+            values = predictions[:, col]
+            # 第一个值:加权前两个值(避免边缘过度平滑)
+            smoothed[0, col] = (2 * values[0] + values[1]) / 3
+            # 中间值:加权前后邻居(核心平滑)
+            for i in range(1, n_timesteps - 1):
+                smoothed[i, col] = (values[i-1] + 2 * values[i] + values[i+1]) / 4
+            # 最后一个值:加权最后两个值(避免边缘过度平滑)
+            smoothed[-1, col] = (values[-2] + 2 * values[-1]) / 3
+        return smoothed
+
+    def create_test_loader(self, df):
+        """
+        构建测试数据加载器(将原始数据转换为模型输入格式)
+        输入:预处理后的DataFrame
+        输出:PyTorch DataLoader(批量加载模型输入)
+        """
+        df['date'] = pd.to_datetime(df['date'])
+        # 计算时间间隔(根据分辨率,单位:分钟)
+        time_interval = pd.Timedelta(minutes=(4 * self.resolution / 60))
+        # 计算窗口时间跨度(确保能覆盖输入序列长度+预测步长)
+        window_time_span = time_interval * (self.seq_len + 2)
+        # 调整测试集起始时间(确保有足够的历史数据构建输入序列)
+        adjusted_test_start = pd.to_datetime(self.test_start_date) - window_time_span
+        # 筛选所需的历史数据
+        test_df = df[df['date'] >= adjusted_test_start].reset_index(drop=True)
+
+        test_df = test_df.drop(columns=['date'])
+
+        # 构建监督学习数据集(输入序列+目标序列的占位)
+        feature_columns = test_df.columns.tolist()
+        cols = []
+        
+        # 构建输入序列(历史seq_len个时间步的特征)
+        for col in feature_columns:
+            for i in range(self.seq_len - 1, -1, -1):
+                cols.append(test_df[[col]].shift(i))   # 滞后i步的特征(t-0到t-(seq_len-1))
+                
+        # 构建目标序列占位(未来output_size个时间步的标签,预测时不使用真实值)
+        for i in range(1, self.output_size + 1):
+            for col in feature_columns[-self.labels_num:]:
+                cols.append(test_df[[col]].shift(-i))    # 超前i步的标签(t+1到t+output_size)
+                
+        # 合并列并按步长采样,最后取最后一行作为预测输入(最新的历史数据)
+        dataset = pd.concat(cols, axis=1).iloc[::self.step_size]
+        dataset = dataset.iloc[[-1]]
+    
+        # 提取输入特征(前n_features_total列)
+        n_features_total = self.feature_num * self.seq_len
+        supervised_data = dataset.iloc[:, :n_features_total]
+
+        # 转换为模型输入格式:[样本数, 序列长度, 特征数]
+        X = supervised_data.values.reshape(-1, self.seq_len, self.feature_num)
+        X = torch.tensor(X, dtype=torch.float32).to(self.device)
+        tensor_dataset = TensorDataset(X)
+        loader = DataLoader(tensor_dataset, batch_size=self.batch_size, shuffle=False)
+        return loader
+
+    def load_data(self, df):
+        """
+        数据加载主流程:重排列、下采样、日期处理、归一化、创建测试加载器
+        确保输入数据格式与训练时一致
+        """
+        df = self.reorder_columns(df)
+        df = df.iloc[::self.resolution, :].reset_index(drop=True)
+        df = self.process_date(df)
+        df = self.scaler_data(df)
+        self.test_loader = self.create_test_loader(df)
+        self.edge_index = torch.load('edge_index.pt', weights_only=True)
+
+    def load_model(self):
+        """加载模型结构和预训练权重,并设置为评估模式"""
+        self.model = GAT_LSTM(self).to(self.device)
+        if self.edge_index is not None:
+            self.model.set_edge_index(self.edge_index.to(self.device))   # 设置图边索引
+        self.model.load_state_dict(torch.load(self.model_path, map_location=self.device, weights_only=True))
+        self.model.eval()
+
+    def predict(self, df):
+        """
+        执行预测主流程:更新测试起始时间、加载数据、加载模型、执行预测、反归一化
+        输入:原始数据DataFrame
+        输出:反归一化后的预测结果(numpy数组)
+        """
+        
+        # 更新测试起始时间为输入数据最新时间+4分钟(预测起始点)
+        self.test_start_date = (pd.to_datetime(df['index']).max() + timedelta(minutes=4)).strftime("%Y-%m-%d %H:%M:%S")
+        self.load_data(df)
+        self.load_model()
+
+        all_predictions = []
+        with torch.no_grad():
+            for batch in self.test_loader:
+                inputs = batch[0].to(self.device)
+                outputs = self.model(inputs)
+                all_predictions.append(outputs.cpu().numpy())
+        
+        # 拼接所有批次的预测结果,并重塑为[时间步, 标签数]
+        predictions = np.concatenate(all_predictions, axis=0).reshape(-1, self.labels_num)
+        
+        # 反归一化(仅对标签列,使用训练时的scaler参数)
+        from sklearn.preprocessing import MinMaxScaler
+        inverse_scaler = MinMaxScaler()
+        inverse_scaler.min_ = self.scaler.min_[-self.labels_num:]
+        inverse_scaler.scale_ = self.scaler.scale_[-self.labels_num:]
+        predictions = inverse_scaler.inverse_transform(predictions)
+        
+        # 可选:异常值处理和平滑(当前注释掉,可根据需求启用)
+        # predictions = self.remove_outliers(predictions)  # 处理异常值
+        # predictions = self.smooth_predictions(predictions)  # 平滑处理
+        return predictions
+
+    def save_predictions(self, predictions):
+        """
+        将预测结果保存为CSV文件,包含时间戳和各指标的预测值
+        输入:反归一化后的预测结果(numpy数组)
+        """
+        start_time = datetime.strptime(self.test_start_date, "%Y-%m-%d %H:%M:%S")
+        time_interval = timedelta(minutes=(4 * self.resolution / 60))
+        timestamps = [start_time + i * time_interval for i in range(len(predictions))]
+
+        # 定义16个预测目标的原始列名
+        base_columns = [
+            'C.M.UF1_DB@press_PV', 'C.M.UF2_DB@press_PV', 'C.M.UF3_DB@press_PV', 'C.M.UF4_DB@press_PV',
+            'C.M.RO1_DB@DPT_1', 'C.M.RO2_DB@DPT_1', 'C.M.RO3_DB@DPT_1', 'C.M.RO4_DB@DPT_1',
+            'C.M.RO1_DB@DPT_2', 'C.M.RO2_DB@DPT_2', 'C.M.RO3_DB@DPT_2', 'C.M.RO4_DB@DPT_2',
+            'RO1_CSFlow', 'RO2_CSFlow', 'RO3_CSFlow', 'RO4_CSFlow'
+        ]
+        pred_columns = [f'{col}_pred' for col in base_columns]
+        df_result = pd.DataFrame(predictions, columns=pred_columns)
+        df_result.insert(0, 'date', timestamps)
+        df_result.to_csv(self.output_csv_path, index=False)
+        print(f"预测结果保存至:{self.output_csv_path}")
+
+if __name__ == '__main__':
+    """主函数:初始化预测器、加载数据、执行预测并保存结果"""
+    predictor = Predictor()
+    
+    base_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
+    data_dir = os.path.join(base_dir, 'datasets_xishan')
+    file_pattern = 'data_process_{}.csv'
+    file_indices = range(46, 50)
+
+    dfs = []
+    for i in file_indices:
+        file_path = os.path.join(data_dir, file_pattern.format(i))
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"未找到文件: {file_path}")
+        print(f"读取文件:{file_path}")
+        df = pd.read_csv(file_path)
+        dfs.append(df)
+
+    df = pd.concat(dfs, ignore_index=True)
+    
+    predictions = predictor.predict(df)
+    predictor.save_predictions(predictions)

BIN
models/pressure_prediction_model/20分钟TMP预测模型源码/scaler.pkl


+ 55 - 0
models/pressure_prediction_model/90天TMP预测模型源码/args.py

@@ -0,0 +1,55 @@
+# args.py
+import argparse
+
+def lstm_args_parser():
+    parser = argparse.ArgumentParser(description="LSTM模型训练参数")
+    
+    # 数据集划分
+    parser.add_argument('--train_start_date', type=str, default='2024-02-23', help='训练集开始日期')
+    parser.add_argument('--train_end_date', type=str, default='2025-09-10', help='训练集结束日期')
+    parser.add_argument('--val_start_date', type=str, default='2025-01-01', help='验证集开始日期')
+    parser.add_argument('--val_end_date', type=str, default='2025-09-10', help='验证集结束日期')
+    parser.add_argument('--test_start_date', type=str, default='2025-01-01', help='测试集开始日期')
+    parser.add_argument('--test_end_date', type=str, default='2025-09-10', help='测试集结束日期')
+
+    # 模型相关参数
+    parser.add_argument('--seq_len', type=int, default=4320, help='输入序列的长度(输入步长)')
+    parser.add_argument('--output_size', type=int, default=2160, help='输出数据的维度(预测步长)')
+    parser.add_argument('--step_size', type=int, default=2160, help='输入数据间隔')
+    parser.add_argument('--resolution', type=int, default=60, help='输入数据分辨率(每多少个数据取一次)')
+    parser.add_argument('--epochs', type=int, default=1000, help='训练轮数')
+    parser.add_argument('--feature_num', type=int, default=16, help='特征维度')
+    parser.add_argument('--labels_num', type=int, default=8, help='标签维度(子模型数量)')
+    parser.add_argument('--hidden_size', type=int, default=64, help='隐藏层大小')
+    parser.add_argument('--num_layers', type=int, default=1, help='LSTM层数')
+    parser.add_argument('--dropout', type=float, default=0, help='dropout的概率')
+    parser.add_argument('--lr', type=float, default=0.01, help='学习率')
+    parser.add_argument('--batch_size', type=int, default=128, help='批次大小')
+    
+    # 学习率调度器
+    parser.add_argument('--scheduler_step_size', type=int, default=100, help='学习率调整步长')
+    parser.add_argument('--scheduler_gamma', type=float, default=0.9, help='学习率衰减率')
+    
+    # 早停
+    parser.add_argument('--patience', type=int, default=500, help='早停耐心值')
+    parser.add_argument('--min_delta', type=float, default=1e-10, help='最小改善阈值')
+    
+    # 设备选择
+    parser.add_argument('--device', type=int, default=0, help='选择使用的GPU设备')
+
+    # 数据处理相关参数
+    parser.add_argument('--start_files', type=int, default=1, help='开始文件索引')
+    parser.add_argument('--end_files', type=int, default=9, help='结束文件索引')
+    parser.add_argument('--data_dir', type=str, default='datasets_xishan', help='数据文件夹路径')
+    parser.add_argument('--file_pattern', type=str, default='data_process_{}.csv', help='数据文件命名模式')
+    
+    # 模型保存路径
+    parser.add_argument('--model_path', type=str, default='model.pth', help='模型保存路径')
+    parser.add_argument('--output_csv_path', type=str, default='predictions.csv', help='预测文件保存路径')
+    
+    # 随机种子
+    parser.add_argument('--random_seed', type=int, default=1314, help='随机种子')
+
+    args = parser.parse_args()
+    
+    return args

+ 255 - 0
models/pressure_prediction_model/90天TMP预测模型源码/data_preprocessor.py

@@ -0,0 +1,255 @@
+# data_preprocessor.py
+import os
+import torch
+import joblib
+import numpy as np
+import pandas as pd
+from tqdm import tqdm  # 进度条工具
+from sklearn.preprocessing import MinMaxScaler  # 数据标准化
+from torch.utils.data import DataLoader, TensorDataset  # PyTorch数据加载工具
+from concurrent.futures import ThreadPoolExecutor  # 多线程并行处理
+
+class DataPreprocessor:
+    
+    @staticmethod
+    def load_and_process_data(args, data):
+        """
+        加载并处理数据,划分训练/验证/测试集,生成数据加载器
+        :param args: 配置参数(包含日期范围、序列长度等)
+        :param data: 原始数据(DataFrame格式)
+        :return: 训练/验证/测试数据加载器、原始数据
+        """
+        # 处理日期列
+        data['date'] = pd.to_datetime(data['date'])
+        time_interval = pd.Timedelta(hours=(args.resolution / 60))
+        window_time_span = time_interval * (args.seq_len + 314)
+
+        # 划分训练/验证/测试集(调整起始日期以适应滑动窗口)
+        val_start_date = pd.to_datetime(args.val_start_date)
+        test_start_date = pd.to_datetime(args.test_start_date)
+        
+        # 调整验证集/测试集的起始日期(提前窗口跨度,确保能生成完整输入序列)
+        adjusted_val_start = val_start_date - window_time_span
+        adjusted_test_start = test_start_date - window_time_span
+
+        # 生成训练/验证/测试集的掩码(布尔索引)
+        train_mask = (data['date'] >= pd.to_datetime(args.train_start_date)) & \
+                     (data['date'] <= pd.to_datetime(args.train_end_date))
+
+        val_mask = (data['date'] >= adjusted_val_start) & \
+                   (data['date'] <= pd.to_datetime(args.val_end_date))
+
+        test_mask = (data['date'] >= adjusted_test_start) & \
+                    (data['date'] <= pd.to_datetime(args.test_end_date))
+
+        # 应用掩码并重置索引
+        train_data = data[train_mask].reset_index(drop=True)
+        val_data = data[val_mask].reset_index(drop=True)
+        test_data = data[test_mask].reset_index(drop=True)
+        
+        # 移除日期列用于建模
+        train_data = train_data.drop(columns=['date'])
+        val_data = val_data.drop(columns=['date'])
+        test_data = test_data.drop(columns=['date'])
+    
+        # 创建监督学习数据集(输入序列+目标序列)
+        train_supervised = DataPreprocessor.create_supervised_dataset(
+            args,
+            train_data,
+            1
+        )
+        
+        val_supervised = DataPreprocessor.create_supervised_dataset(
+            args,
+            val_data,
+            1
+        )
+        
+        test_supervised = DataPreprocessor.create_supervised_dataset(
+            args,
+            test_data,
+            args.step_size
+        )
+        
+        # 转换为DataLoader
+        train_loader = DataPreprocessor.load_data(
+            args, 
+            train_supervised,
+            shuffle=True
+        )
+        
+        val_loader = DataPreprocessor.load_data(
+            args, 
+            val_supervised,
+            shuffle=False
+        )
+        
+        test_loader = DataPreprocessor.load_data(
+            args, 
+            test_supervised,
+            shuffle=False
+        )
+        
+        return train_loader, val_loader, test_loader, data  # 返回原始数据用于后续处理
+    
+    @staticmethod
+    def read_and_combine_csv_files(args):
+        """
+        读取并合并多个CSV文件(支持多线程加速)
+        :param args: 配置参数(包含数据目录、文件命名模式等)
+        :return: 合并并预处理后的DataFrame
+        """
+        current_dir = os.path.dirname(__file__)
+        parent_dir = os.path.dirname(current_dir)
+        args.data_dir = os.path.join(parent_dir, args.data_dir)
+        
+        def read_file(file_count):
+            """内部函数:读取单个CSV文件"""
+            file_name = args.file_pattern.format(file_count)
+            file_path = os.path.join(args.data_dir, file_name)
+            return pd.read_csv(file_path)
+        
+        # 生成文件索引列表(从start_files到end_files)
+        file_indices = list(range(args.start_files, args.end_files + 1))
+        
+        # 多线程读取文件(加速大文件读取)
+        max_workers = os.cpu_count()    # 按CPU核心数设置线程数
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            results = list(tqdm(executor.map(read_file, file_indices),
+                                total=len(file_indices),
+                                desc="正在读取文件"))
+        
+        # 合并所有数据并重置索引
+        all_data = pd.concat(results, ignore_index=True)
+        
+        # 按分辨率下采样
+        chunk = all_data.iloc[::args.resolution, :].reset_index(drop=True)
+        
+        # 处理日期和时间特征
+        chunk = DataPreprocessor.process_date(chunk)
+        # 归一化
+        chunk = DataPreprocessor.scaler_data(chunk)
+        
+        return chunk
+    
+    @staticmethod
+    def process_date(data):
+        """
+        处理日期列,生成周期性时间特征(年周期)
+        :param data: 包含'index'列(日期字符串)的DataFrame
+        :return: 增加时间特征后的DataFrame
+        """
+        data = data.rename(columns={'index': 'date'})
+        data['date'] = pd.to_datetime(data['date'])
+
+        # 生成周期性时间特征
+        data['day_of_year'] = data['date'].dt.dayofyear
+
+        data['day_year_sin'] = np.sin(2 * np.pi * data['day_of_year'] / 366)
+        data['day_year_cos'] = np.cos(2 * np.pi * data['day_of_year'] / 366)
+
+        # 移除原始时间列,保留特征列
+        data.drop(columns=['day_of_year'], inplace=True)
+
+        # 调整列顺序(日期+时间特征+其他特征)
+        time_features = ['day_year_sin', 'day_year_cos']
+        other_columns = [col for col in data.columns if col not in ['date'] and col not in time_features]
+        data = data[['date'] + time_features + other_columns]
+
+        return data
+    
+    @staticmethod
+    def scaler_data(data):
+        """
+        对数据进行归一化(除日期列外),并保存标准化器
+        :param data: 包含'date'列的DataFrame
+        :return: 标准化后的DataFrame
+        """
+        date_col = data[['date']]
+        data_to_scale = data.drop(columns=['date'])
+
+        scaler = MinMaxScaler(feature_range=(0, 1))
+        scaled_data = scaler.fit_transform(data_to_scale)
+        joblib.dump(scaler, 'scaler.pkl')  # 保存归一化器
+
+        scaled_data = pd.DataFrame(scaled_data, columns=data_to_scale.columns)
+        
+        # 拼接日期列和标准化后的数据
+        scaled_data = pd.concat([date_col.reset_index(drop=True), scaled_data], axis=1)
+        
+        return scaled_data
+    
+    @staticmethod
+    def create_supervised_dataset(args, data, step_size):
+        """
+        将时间序列数据转换为监督学习格式(输入序列+目标序列)
+        :param args: 配置参数(序列长度、输出长度等)
+        :param data: 输入数据(DataFrame,不含日期列)
+        :param step_size: 滑动窗口步长
+        :return: 监督学习数据集(DataFrame)
+        """
+        data = pd.DataFrame(data)
+        cols = []
+        col_names = []
+        
+        feature_columns = data.columns.tolist()
+
+        # 输入序列(t-0到t-(seq_len-1))
+        for col in feature_columns:
+            for i in range(args.seq_len - 1, -1, -1):
+                cols.append(data[[col]].shift(i))
+                col_names.append(f"{col}(t-{i})")
+        
+        # 目标序列(仅取最后labels_num列作为预测目标)
+        target_columns = feature_columns[-args.labels_num:]
+        for i in range(1, args.output_size + 1):
+            for col in target_columns:
+                cols.append(data[[col]].shift(-i))
+                col_names.append(f"{col}(t+{i})")
+
+        # 合并并清洗数据
+        dataset = pd.concat(cols, axis=1)
+        dataset.columns = col_names
+        dataset = dataset.iloc[::step_size, :]  # 按步长采样
+        dataset.dropna(inplace=True)  # 移除含缺失值的行
+        
+        return dataset
+
+    @staticmethod
+    def load_data(args, dataset, shuffle):
+        """
+        将监督学习数据集转换为PyTorch张量,并创建DataLoader
+        :param args: 配置参数
+        :param dataset: 监督学习数据集(DataFrame)
+        :param shuffle: 是否打乱数据
+        :return: DataLoader对象
+        """
+        input_length = args.seq_len
+        n_features = args.feature_num
+        labels_num = args.labels_num
+    
+        n_features_total = n_features * input_length  # 输入特征总维度
+        n_labels_total = args.output_size * labels_num  # 目标总维度
+
+        # 分割输入和目标
+        X = dataset.values[:, :n_features_total]
+        y = dataset.values[:, n_features_total:n_features_total + n_labels_total]
+    
+        # 重塑输入为[样本数, 序列长度, 特征数]
+        X = X.reshape(X.shape[0], input_length, n_features)
+        X = torch.tensor(X, dtype=torch.float32).to(args.device)
+        y = torch.tensor(y, dtype=torch.float32).to(args.device)
+
+        # 创建数据集和数据加载器
+        dataset_tensor = TensorDataset(X, y)
+        generator = torch.Generator()
+        generator.manual_seed(args.random_seed)  # 固定随机种子确保可复现
+        
+        data_loader = DataLoader(
+            dataset_tensor, 
+            batch_size=args.batch_size, 
+            shuffle=shuffle,
+            generator=generator
+        )
+    
+        return data_loader

+ 247 - 0
models/pressure_prediction_model/90天TMP预测模型源码/data_trainer.py

@@ -0,0 +1,247 @@
+# data_trainer.py
+import torch
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.metrics import r2_score
+from datetime import datetime, timedelta
+from sklearn.preprocessing import MinMaxScaler
+
+class Trainer:
+    def __init__(self, model, args, data):
+        """
+        初始化训练器
+        :param model: 待训练的模型
+        :param args: 配置参数(训练轮数、早停参数等)
+        :param data: 原始数据(用于生成评估时的时间戳)
+        """
+        self.args = args
+        self.model = model
+        self.data = data
+        
+        # 早停相关参数
+        self.patience = args.patience
+        self.min_delta = args.min_delta
+        self.counter = 0
+        self.early_stop = False
+        self.best_val_loss = float('inf')
+        self.best_model_state = None
+        self.best_epoch = 0
+
+    def train_full_model(self, train_loader, val_loader, optimizer, criterion, scheduler):
+        """
+        联合训练所有子模型(端到端训练)
+        :param train_loader: 训练数据加载器
+        :param val_loader: 验证数据加载器
+        :param optimizer: 优化器(如Adam)
+        :param criterion: 损失函数(如MSE)
+        :param scheduler: 学习率调度器
+        :return: 训练好的模型(加载最佳权重)
+        """
+        self.counter = 0
+        self.best_val_loss = float('inf')
+        self.early_stop = False
+        self.best_model_state = None
+        self.best_epoch = 0
+        max_epochs = self.args.epochs
+
+        for epoch in range(max_epochs):
+            self.model.train()
+            running_loss = 0.0
+            
+            for inputs, targets in train_loader:
+                inputs = inputs.to(self.args.device)
+                targets = targets.to(self.args.device)  # 整体目标值(包含所有16个因变量)
+                
+                optimizer.zero_grad()
+                outputs = self.model(inputs)  # 整体模型输出
+                
+                loss = criterion(outputs, targets)  # 计算整体损失
+                loss.backward()
+                optimizer.step()
+                running_loss += loss.item()
+            
+            train_loss = running_loss / len(train_loader)
+            val_loss = self.validate_full(val_loader, criterion) if val_loader else 0.0
+
+            print(f'Epoch {epoch+1}/{max_epochs}, '
+                  f'Train Loss: {train_loss:.6f}, '
+                  f'Val Loss: {val_loss:.6f}, '
+                  f'LR: {optimizer.param_groups[0]["lr"]:.6f}')
+
+            # 早停逻辑(基于整体验证损失)
+            if val_loader:
+                improved = val_loss < (self.best_val_loss - self.min_delta)
+                if improved:
+                    self.best_val_loss = val_loss
+                    self.counter = 0
+                    self.best_model_state = self.model.state_dict()
+                    self.best_epoch = epoch
+                else:
+                    self.counter += 1
+                    if self.counter >= self.patience:
+                        self.early_stop = True
+                        print(f"早停触发")
+                        
+            scheduler.step()
+            torch.cuda.empty_cache()
+            if self.early_stop:
+                break
+
+        # 加载最佳状态
+        if self.best_model_state is not None:
+            self.model.load_state_dict(self.best_model_state)
+        print(f"最佳迭代: {self.best_epoch+1}, 最佳验证损失: {self.best_val_loss:.6f}")
+        return self.model
+
+    def validate_full(self, val_loader, criterion):
+        """
+        验证整个模型(计算验证损失)
+        :param val_loader: 验证数据加载器
+        :param criterion: 损失函数
+        :return: 平均验证损失
+        """
+        self.model.eval()
+        total_loss = 0.0
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                inputs = inputs.to(self.args.device)
+                targets = targets.to(self.args.device)  # 整体目标值
+                
+                outputs = self.model(inputs)  # 整体模型输出
+                loss = criterion(outputs, targets)  # 整体损失计算
+                total_loss += loss.item()
+        return total_loss / len(val_loader)
+
+    def save_model(self):
+        """保存模型权重到指定路径"""
+        torch.save(self.model.state_dict(), self.args.model_path)
+        print(f"模型已保存到:{self.args.model_path}")
+            
+    def evaluate_model(self, test_loader, criterion):
+        """
+        评估模型在测试集上的性能,计算R²、RMSE、MAPE等指标并保存结果
+        :param test_loader: 测试数据加载器
+        :param criterion: 损失函数
+        :return: 各指标字典(R²、RMSE、MAPE)
+        """
+        self.model.eval()
+        scaler_path = 'scaler.pkl'
+        scaler = joblib.load(scaler_path)
+        predictions = []
+        true_values = []
+        device = self.args.device
+        
+        with torch.no_grad():
+            for inputs, targets in test_loader:
+                inputs = inputs.to(device)
+                targets = targets.to(device)
+                outputs = self.model(inputs)
+                predictions.append(outputs.cpu().numpy())
+                true_values.append(targets.cpu().numpy())
+    
+        predictions = np.concatenate(predictions, axis=0)
+        true_values = np.concatenate(true_values, axis=0)
+    
+        # 重塑预测值和真实值形状以匹配反归一化要求
+        reshaped_predictions = predictions.reshape(predictions.shape[0], 
+                                                   self.args.output_size, 
+                                                   self.args.labels_num)
+        predictions = reshaped_predictions.reshape(-1, self.args.labels_num)
+        
+        reshaped_true_values = true_values.reshape(true_values.shape[0], 
+                                                   self.args.output_size, 
+                                                   self.args.labels_num)
+        true_values = reshaped_true_values.reshape(-1, self.args.labels_num)
+    
+        # 反归一化(仅对标签列)
+        column_scaler = MinMaxScaler(feature_range=(0, 1))
+        column_scaler.min_ = scaler.min_[-self.args.labels_num:] 
+        column_scaler.scale_ = scaler.scale_[-self.args.labels_num:] 
+        
+        true_values = column_scaler.inverse_transform(true_values)
+        predictions = column_scaler.inverse_transform(predictions)
+    
+        # 定义列名(16个因变量)
+        column_names = [
+            'C.M.RO1_DB@DPT_1', 'C.M.RO2_DB@DPT_1', 'C.M.RO3_DB@DPT_1', 'C.M.RO4_DB@DPT_1',
+            'C.M.RO1_DB@DPT_2', 'C.M.RO2_DB@DPT_2', 'C.M.RO3_DB@DPT_2', 'C.M.RO4_DB@DPT_2',
+        ]
+    
+        # 生成时间序列
+        start_datetime = datetime.strptime(self.args.test_start_date, "%Y-%m-%d")
+        time_interval = timedelta(minutes=(4 * self.args.resolution / 60))
+        total_points = len(predictions)
+        date_times = [start_datetime + i * time_interval for i in range(total_points)]
+        
+        # 保存结果到DataFrame
+        results = pd.DataFrame({'date': date_times})
+
+        # 计算评估指标
+        r2_scores = {}
+        rmse_scores = {}
+        mape_scores = {}
+        metrics_details = []
+        
+        for i, col_name in enumerate(column_names):
+            results[f'{col_name}_True'] = true_values[:, i]
+            results[f'{col_name}_Predicted'] = predictions[:, i]
+
+            var_true = true_values[:, i]
+            var_pred = predictions[:, i]
+
+            # 过滤零值(避免除零错误)
+            non_zero_mask = var_true != 0
+            var_true_nonzero = var_true[non_zero_mask]
+            var_pred_nonzero = var_pred[non_zero_mask]
+
+            r2 = float('nan')
+            rmse = float('nan')
+            mape = float('nan')
+            
+            if len(var_true_nonzero) > 0:
+                r2 = r2_score(var_true_nonzero, var_pred_nonzero)
+                rmse = np.sqrt(np.mean((var_true_nonzero - var_pred_nonzero) ** 2))
+                mape = np.mean(np.abs((var_true_nonzero - var_pred_nonzero) / np.abs(var_true_nonzero))) * 100
+                
+                r2_scores[col_name] = r2
+                rmse_scores[col_name] = rmse
+                mape_scores[col_name] = mape
+                
+                detail = f"{col_name}:\n  R方 = {r2:.6f}\n  RMSE = {rmse:.6f}\n  MAPE = {mape:.6f}%"
+                metrics_details.append(detail)
+                print(f"{col_name} R方: {r2:.6f}")
+            else:
+                metrics_details.append(f"{col_name}: 没有有效数据用于计算指标")
+                print(f"{col_name} 没有有效数据用于计算R方")
+
+        # 计算平均指标
+        valid_r2 = [score for score in r2_scores.values() if not np.isnan(score)]
+        valid_rmse = [score for score in rmse_scores.values() if not np.isnan(score)]
+        valid_mape = [score for score in mape_scores.values() if not np.isnan(score)]
+        
+        avg_r2 = np.mean(valid_r2) if valid_r2 else float('nan')
+        avg_rmse = np.mean(valid_rmse) if valid_rmse else float('nan')
+        avg_mape = np.mean(valid_mape) if valid_mape else float('nan')
+
+        avg_detail = f"\n平均指标:\n  R方 = {avg_r2:.6f}\n  RMSE = {avg_rmse:.6f}\n  MAPE = {avg_mape:.6f}%"
+        if np.isnan(avg_r2):
+            avg_detail = "\n平均指标: 没有有效的指标可用于计算平均值"
+        
+        metrics_details.append(avg_detail)
+        print(avg_detail)
+
+        # 保存结果
+        results.to_csv(self.args.output_csv_path, index=False)
+        print(f"预测结果已保存到:{self.args.output_csv_path}")
+
+        txt_path = self.args.output_csv_path.replace('.csv', '_metrics_results.txt')
+        with open(txt_path, 'w') as f:
+            f.write("各变量预测指标结果:\n")
+            f.write("===================\n\n")
+            for detail in metrics_details:
+                f.write(detail + '\n')
+        
+        print(f"预测指标结果已保存到:{txt_path}")
+        
+        return r2_scores, rmse_scores, mape_scores

+ 94 - 0
models/pressure_prediction_model/90天TMP预测模型源码/gat_lstm.py

@@ -0,0 +1,94 @@
+# gat_lstm.py
+import torch
+import torch.nn as nn
+
+# 单个独立模型(对应1个因变量)
+class SingleGATLSTM(nn.Module):
+    def __init__(self, args):
+        """
+        初始化单个预测子模型
+        :param args: 包含模型参数的配置对象(如输入特征数、隐藏层大小等)
+        """
+        super(SingleGATLSTM, self).__init__()
+        self.args = args
+        
+        # 独立的LSTM层
+        self.lstm = nn.LSTM(
+            input_size=args.feature_num,
+            hidden_size=args.hidden_size,
+            num_layers=args.num_layers,
+            batch_first=True
+        )
+        
+        # 独立的输出层:将LSTM输出映射到预测结果(输出长度为output_size)
+        self.final_linear = nn.Sequential(
+            nn.Linear(args.hidden_size, args.hidden_size),
+            nn.LeakyReLU(0.01),
+            nn.Dropout(args.dropout * 0.4),
+            nn.Linear(args.hidden_size, args.output_size)
+        )
+        
+        self._init_weights()
+        
+    def _init_weights(self):
+        """初始化模型权重,提升训练稳定性和收敛速度"""
+        # 初始化线性层权重
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # 初始化LSTM权重
+        for name, param in self.lstm.named_parameters():
+            if 'weight_ih' in name:
+                nn.init.xavier_uniform_(param.data)
+            elif 'weight_hh' in name:
+                nn.init.orthogonal_(param.data)
+            elif 'bias' in name:
+                param.data.fill_(0)
+                n = param.size(0)
+                start, end = n // 4, n // 2
+                param.data[start:end].fill_(1)
+        
+    def forward(self, x):
+        """
+        前向传播:输入序列 -> LSTM处理 -> 输出层预测
+        :param x: 输入张量,形状为[batch_size, seq_len, feature_num]
+        :return: 预测结果,形状为[batch_size, output_size]
+        """
+        batch_size, seq_len, feature_num = x.size()
+        lstm_out, _ = self.lstm(x)
+        # 取最后一个时间步的输出
+        last_out = lstm_out[:, -1, :]
+        
+        # 输出层预测
+        output = self.final_linear(last_out)
+        return output  # [batch_size, output_size]
+
+
+# 16个独立模型的容器(总模型)
+class GAT_LSTM(nn.Module):
+    def __init__(self, args):
+        """
+        初始化多输出模型容器
+        :param args: 配置参数,labels_num指定目标特征数量(即子模型数量)
+        """
+        super(GAT_LSTM, self).__init__()
+        self.args = args
+        # 创建16个独立模型(数量由labels_num指定)
+        self.models = nn.ModuleList([SingleGATLSTM(args) for _ in range(args.labels_num)])
+    
+    def forward(self, x):
+        """
+        前向传播:所有子模型并行处理输入,拼接结果
+        :param x: 输入张量,形状为[batch_size, seq_len, feature_num]
+        :return: 拼接后的预测结果,形状为[batch_size, output_size * labels_num]
+        """
+        outputs = []
+        for model in self.models:
+            outputs.append(model(x))  # 每个输出为[batch, output_size]
+        return torch.cat(outputs, dim=1)  # 拼接后[batch, output_size * labels_num]

+ 70 - 0
models/pressure_prediction_model/90天TMP预测模型源码/main.py

@@ -0,0 +1,70 @@
+# main.py
+import os
+import torch
+import numpy as np
+import random
+from gat_lstm import GAT_LSTM
+from data_trainer import Trainer
+from args import lstm_args_parser
+from torch.nn import MSELoss
+from data_preprocessor import DataPreprocessor
+
+def set_seed(seed):
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+def main():
+    args = lstm_args_parser()
+    set_seed(args.random_seed)
+    
+    device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
+    args.device = device  # 将device存入args,方便后续使用
+
+    # 数据预处理
+    data = DataPreprocessor.read_and_combine_csv_files(args)
+    train_loader, val_loader, test_loader, _ = DataPreprocessor.load_and_process_data(args, data)
+    
+    # 初始化包含16个子模型的整体模型
+    model = GAT_LSTM(args).to(device)
+
+    # 初始化训练器和MSE损失函数
+    trainer = Trainer(model, args, data)
+    criterion = MSELoss()
+
+    # 优化器:优化所有子模型的参数(联合训练)
+    optimizer = torch.optim.Adam(
+        model.parameters(),  # 整体模型参数
+        lr=args.lr
+    )
+    scheduler = torch.optim.lr_scheduler.StepLR(
+        optimizer,
+        step_size=args.scheduler_step_size,
+        gamma=args.scheduler_gamma
+    )
+
+    # 整体训练大模型(包含所有16个子模型)
+    print("=== 开始训练包含16个子模型的整体模型 ===")
+    trainer.train_full_model(
+        train_loader,
+        val_loader,
+        optimizer,
+        criterion,
+        scheduler
+    )
+
+    # 保存包含所有16个子模型参数的整体模型
+    trainer.save_model()
+    print("\n=== 模型训练完成,已保存整体模型 ===")
+
+    # 评估模型
+    trainer.evaluate_model(test_loader, MSELoss())
+
+if __name__ == "__main__":
+    main()

BIN
models/pressure_prediction_model/90天TMP预测模型源码/model.pth


+ 254 - 0
models/pressure_prediction_model/90天TMP预测模型源码/predict.py

@@ -0,0 +1,254 @@
+import os
+import torch
+import pandas as pd
+import numpy as np
+import joblib
+from datetime import datetime, timedelta
+from torch.utils.data import DataLoader, TensorDataset
+from gat_lstm import GAT_LSTM    # 导入自定义的GAT-LSTM模型
+from scipy.signal import savgol_filter    # Savitzky-Golay滤波工具
+from sklearn.preprocessing import MinMaxScaler    # 数据标准化工具
+
+def set_seed(seed):
+    """设置随机种子,保证实验可复现性"""
+    import random
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True 
+    torch.backends.cudnn.benchmark = False
+
+class Predictor:
+    """预测器类,封装了数据处理、模型加载、预测和结果保存的完整流程"""
+    def __init__(self):
+        # 模型和数据相关参数
+        self.seq_len = 4320  # 输入序列长度
+        self.output_size = 2160  # 预测输出长度
+        self.labels_num = 8  # 预测目标特征数量
+        self.feature_num = 16  # 输入特征总数量
+        self.step_size = 2160  # 滑动窗口步长
+        self.dropout = 0  # 模型dropout参数
+        self.lr = 0.01  # 学习率
+        self.hidden_size = 64  # LSTM隐藏层大小
+        self.batch_size = 128  # 批处理大小
+        self.num_layers = 1  # LSTM层数
+        self.resolution = 60  # 数据时间分辨率(单位:秒)
+        self.test_start_date = '2025-09-24'  # 预测起始日期(动态更新)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.model_path = 'model.pth'  # 模型权重路径(可外部修改)
+        self.output_csv_path = 'predictions.csv'  # 结果保存路径(可外部修改)
+        self.random_seed = 1314  # 随机种子
+
+        # 预测结果平滑参数
+        self.smooth_window = 30    # 滑动平均窗口大小
+        self.ema_alpha = 0.1    # 指数移动平均系数(权重)
+        self.use_savitzky = True    # 是否使用Savitzky-Golay滤波
+        self.sg_window = 25    # Savitzky-Golay窗口大小
+        self.sg_polyorder = 2    # Savitzky-Golay多项式阶数
+
+        # 初始化设置
+        set_seed(self.random_seed)    # 设置随机种子
+        self.scaler = joblib.load('scaler.pkl')  # 加载标准化器(确保文件存在)
+        self.model = None
+        self.edge_index = None
+        self.test_loader = None
+        
+    def reorder_columns(self, df):
+        """
+        调整DataFrame列顺序,确保与模型训练时的特征顺序一致
+        (特征顺序对模型输入至关重要,必须与训练时保持一致)
+        """
+        desired_order = [
+            'index',  # 时间索引列
+            'C.M.RO1_FT_JS@out','C.M.RO2_FT_JS@out','C.M.RO3_FT_JS@out','C.M.RO4_FT_JS@out',
+            'C.M.RO_TT_ZJS@out','C.M.RO_Cond_ZJS@out',
+            'C.M.RO1_DB@DPT_1','C.M.RO1_DB@DPT_2',
+            'C.M.RO2_DB@DPT_1','C.M.RO2_DB@DPT_2',
+            'C.M.RO3_DB@DPT_1','C.M.RO3_DB@DPT_2',
+            'C.M.RO4_DB@DPT_1','C.M.RO4_DB@DPT_2',
+        ]
+        return df.loc[:, desired_order]
+
+    def process_date(self, data):
+        """
+        处理日期特征,生成周期性时间编码(年周期)
+        将时间特征转换为正弦/余弦编码,捕捉周期性规律(如季节变化)
+        """
+        if 'index' in data.columns:
+            data = data.rename(columns={'index': 'date'})
+        data['date'] = pd.to_datetime(data['date'])
+        data['day_of_year'] = data['date'].dt.dayofyear
+        # 生成正弦/余弦编码(周期为366天,适应闰年)
+        data['day_year_sin'] = np.sin(2 * np.pi * data['day_of_year'] / 366)
+        data['day_year_cos'] = np.cos(2 * np.pi * data['day_of_year'] / 366)
+        data.drop(columns=['day_of_year'], inplace=True)
+        
+        # 调整列顺序:日期 + 时间特征 + 其他特征
+        time_features = ['day_year_sin', 'day_year_cos']
+        other_columns = [col for col in data.columns if col not in ['date'] + time_features]
+        return data[['date'] + time_features + other_columns]
+
+    def scaler_data(self, data):
+        """
+        使用预训练的标准化器对数据进行标准化(保留date列不处理)
+        标准化是为了让不同量级的特征在模型中权重均衡
+        """
+        date_col = data[['date']]    # 提取日期列(不参与标准化)
+        data_to_scale = data.drop(columns=['date'])
+        scaled = self.scaler.transform(data_to_scale)
+        scaled_df = pd.DataFrame(scaled, columns=data_to_scale.columns)
+        return pd.concat([date_col.reset_index(drop=True), scaled_df], axis=1)   # 拼接日期列和标准化后的数据
+
+    def create_test_loader(self, df):
+        """
+        将预处理后的DataFrame转换为模型输入的测试数据加载器
+        生成符合模型要求的张量格式([样本数, 序列长度, 特征数])
+        """
+        if 'date' in df.columns:
+            test_data = df.drop(columns=['date']).values
+        else:
+            test_data = df.values
+
+        # 重塑为LSTM输入格式:[样本数, 序列长度, 特征数]
+        X = test_data.reshape(-1, self.seq_len, self.feature_num)
+        X = torch.tensor(X, dtype=torch.float32).to(self.device)
+        tensor_dataset = TensorDataset(X)  # 创建数据集(仅输入,无标签)
+        
+        # 创建数据加载器(不打乱顺序,按批次加载)
+        return DataLoader(tensor_dataset, batch_size=self.batch_size, shuffle=False)
+    
+    def load_data(self, df):
+        """数据加载与预处理统一接口,依次执行列重排、日期处理、标准化和生成数据加载器"""
+        df = self.reorder_columns(df)    # 调整列顺序
+        df = self.process_date(df)    # 处理日期特征
+        df = self.scaler_data(df)    # 标准化数据
+        self.test_loader = self.create_test_loader(df)
+
+    def load_model(self):
+        """加载预训练模型并设置为评估模式(关闭dropout等训练特有层)"""
+        self.model = GAT_LSTM(self).to(self.device)
+        # 加载模型权重(map_location确保在指定设备加载,weights_only=True提高安全性)
+        self.model.load_state_dict(torch.load(self.model_path, map_location=self.device, weights_only=True))
+        self.model.eval()
+
+    def moving_average_smooth(self, data):
+        """
+        滑动平均平滑处理:对每个特征单独做滑动平均,减少高频噪声
+        采用边缘填充避免边界效应
+        """
+        smoothed = []
+        for i in range(data.shape[1]):
+            feature = data[:, i]
+            
+            # 边缘填充:用边缘值填充窗口外的部分,避免边界数据失真
+            padded = np.pad(feature, (self.smooth_window//2, self.smooth_window//2), mode='edge')
+            window = np.ones(self.smooth_window) / self.smooth_window    # 平均窗口权重
+            smoothed_feature = np.convolve(padded, window, mode='valid')    # 卷积计算滑动平均
+            smoothed.append(smoothed_feature.reshape(-1, 1))    # 保留维度并收集结果
+        return np.concatenate(smoothed, axis=1)    # 拼接所有特征
+
+    def exponential_smooth(self, data):
+        """
+        指数移动平均平滑:对每个特征做指数加权平均,近期数据权重更高
+        相比简单滑动平均更关注近期趋势
+        """
+        smoothed = []
+        for i in range(data.shape[1]):   # 遍历每个特征
+            feature = data[:, i]
+            smoothed_feature = np.zeros_like(feature)
+            smoothed_feature[0] = feature[0]
+            for t in range(1, len(feature)):
+                smoothed_feature[t] = self.ema_alpha * feature[t] + (1 - self.ema_alpha) * smoothed_feature[t-1]
+            smoothed.append(smoothed_feature.reshape(-1, 1))
+        return np.concatenate(smoothed, axis=1)
+
+    def savitzky_golay_smooth(self, data):
+        """
+        Savitzky-Golay滤波:基于多项式拟合的滑动窗口滤波,保留趋势的同时降噪
+        窗口大小需为奇数,若数据长度不足则调整窗口
+        """
+        smoothed = []
+        for i in range(data.shape[1]):
+            feature = data[:, i]
+            # 确保窗口为奇数且不超过数据长度
+            window = min(self.sg_window, len(feature) if len(feature) % 2 == 1 else len(feature)-1)
+            if window < 3:    # 窗口过小则不滤波(至少需要3个点拟合2阶多项式)
+                smoothed.append(feature.reshape(-1, 1))
+                continue
+            # 应用Savitzky-Golay滤波
+            smoothed_feature = savgol_filter(feature, window_length=window, polyorder=self.sg_polyorder)
+            smoothed.append(smoothed_feature.reshape(-1, 1))
+        return np.concatenate(smoothed, axis=1)
+
+    def smooth_predictions(self, predictions):
+        """
+        组合多步平滑策略处理预测结果:先滑动平均,再指数平滑,最后可选Savitzky-Golay滤波
+        多层平滑进一步降低噪声,使预测曲线更平滑
+        """
+        smoothed = self.moving_average_smooth(predictions)
+        smoothed = self.exponential_smooth(smoothed)
+        if self.use_savitzky and len(predictions) >= self.sg_window:
+            smoothed = self.savitzky_golay_smooth(smoothed)
+        return smoothed
+
+    def predict(self, df):
+        """
+        核心预测接口:输入原始数据,返回处理后的预测结果
+        流程:更新起始时间 -> 数据预处理 -> 加载模型 -> 批量预测 -> 反标准化 -> 平滑处理
+        """
+        # 预测起始时间为输入数据的最大时间+3小时(根据业务需求设定)
+        self.test_start_date = (pd.to_datetime(df['index']).max() + timedelta(hours=3)).strftime("%Y-%m-%d %H:%M:%S")
+        self.load_data(df)
+        self.load_model()
+
+        all_predictions = []
+        with torch.no_grad():
+            for batch in self.test_loader:
+                inputs = batch[0].to(self.device)
+                outputs = self.model(inputs)
+                all_predictions.append(outputs.cpu().numpy())   # 结果移回CPU并转为numpy
+        
+        # 拼接所有批次结果并重塑为[样本数, 目标特征数]
+        predictions = np.concatenate(all_predictions, axis=0).reshape(-1, self.labels_num)
+        
+        # 反标准化处理
+        inverse_scaler = MinMaxScaler()
+        
+        # 复用训练时的标准化参数(仅使用目标特征对应的参数)
+        inverse_scaler.min_ = self.scaler.min_[-self.labels_num:]
+        inverse_scaler.scale_ = self.scaler.scale_[-self.labels_num:]
+        predictions = inverse_scaler.inverse_transform(predictions)
+        predictions = np.clip(predictions, 0, None)
+        
+        # 平滑处理
+        predictions = self.smooth_predictions(predictions)
+        
+        return predictions
+
+    def save_predictions(self, predictions):
+        """
+        保存预测结果到CSV文件,包含时间戳和各目标特征的预测值
+        时间戳根据起始时间和数据分辨率生成
+        """
+        # 解析预测起始时间
+        start_time = datetime.strptime(self.test_start_date, "%Y-%m-%d %H:%M:%S")
+        # 计算时间间隔(根据分辨率转换为小时)
+        time_interval = pd.Timedelta(hours=(self.resolution / 60))
+        # 生成所有预测时间戳
+        timestamps = [start_time + i * time_interval for i in range(len(predictions))]
+        
+        # 定义目标特征列名(与训练时一致)
+        base_columns = [
+            'C.M.RO1_DB@DPT_1', 'C.M.RO2_DB@DPT_1', 'C.M.RO3_DB@DPT_1', 'C.M.RO4_DB@DPT_1',
+            'C.M.RO1_DB@DPT_2', 'C.M.RO2_DB@DPT_2', 'C.M.RO3_DB@DPT_2', 'C.M.RO4_DB@DPT_2',
+        ]
+        pred_columns = [f'{col}_pred' for col in base_columns]
+        df_result = pd.DataFrame(predictions, columns=pred_columns)
+        df_result.insert(0, 'date', timestamps)
+        df_result.to_csv(self.output_csv_path, index=False)
+        print(f"预测结果保存至:{self.output_csv_path}")
+        
+        

BIN
models/pressure_prediction_model/90天TMP预测模型源码/scaler.pkl


+ 246 - 0
models/uf-rl/DQN_decide.py

@@ -0,0 +1,246 @@
+import numpy as np
+from stable_baselines3 import DQN
+from UF_super_RL.DQN_env import UFSuperCycleEnv
+from UF_super_RL.DQN_env import UFParams
+
+# 模型路径
+MODEL_PATH = "dqn_model.zip"
+
+# 加载模型(只加载一次,提高效率)
+model = DQN.load(MODEL_PATH)
+
+def run_uf_DQN_decide(uf_params, TMP0_value: float):
+    """
+    单步决策函数:输入原始 TMP0,预测并执行动作
+
+    参数:
+        TMP0_value (float): 当前 TMP0 值(单位与环境一致)
+
+    返回:
+        dict: 包含模型选择的动作、动作参数、新状态、奖励等
+    """
+    # 1. 实例化环境
+    base_params = uf_params
+    env = UFSuperCycleEnv(base_params)
+
+    # 2. 将输入的 TMP0 写入环境
+    env.current_params.TMP0 = TMP0_value
+
+    # 3. 获取归一化状态
+    obs = env._get_obs().reshape(1, -1)
+
+    # 4. 模型预测动作
+    action, _ = model.predict(obs, deterministic=True)
+
+    # 5. 解析动作对应的 L_s 和 t_bw_s
+    L_s, t_bw_s = env._get_action_values(action[0])
+
+    # 6. 在环境中执行该动作
+    next_obs, reward, terminated, truncated, info = env.step(action[0])
+
+    # 7. 整理结果
+    result = {
+        "action": int(action[0]),
+        "L_s": float(L_s),
+        "t_bw_s": float(t_bw_s),
+        "next_obs": next_obs,
+        "reward": reward,
+        "terminated": terminated,
+        "truncated": truncated,
+        "info": info
+    }
+
+    # 8. 关闭环境
+    env.close()
+
+    return result
+
+def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
+    """
+    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
+
+    新增功能:
+    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
+       如果工厂当前值也为None,则返回None并提示错误。
+    """
+    # 参数配置保持不变
+    params = UFParams(
+        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
+        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
+    )
+
+    # 参数解包
+    L_step_s = params.L_step_s
+    t_bw_step_s = params.t_bw_step_s
+    L_min_s = params.L_min_s
+    L_max_s = params.L_max_s
+    t_bw_min_s = params.t_bw_min_s
+    t_bw_max_s = params.t_bw_max_s
+    adjustment_threshold = 1.0
+
+    # 处理None值情况
+    if model_prev_L_s is None:
+        if current_L_s is None:
+            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
+            return None, None
+        else:
+            # 使用工厂当前值作为基准
+            effective_current_L = current_L_s
+            source_L = "工厂当前值(模型上一轮值为None)"
+    else:
+        # 模型上一轮值不为None,继续检查工厂当前值
+        if current_L_s is None:
+            effective_current_L = model_prev_L_s
+            source_L = "模型上一轮值(工厂当前值为None)"
+        else:
+            effective_current_L = model_prev_L_s
+            source_L = "模型上一轮值"
+
+    # 对反洗时长进行同样的处理
+    if model_prev_t_bw_s is None:
+        if current_t_bw_s is None:
+            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
+            return None, None
+        else:
+            effective_current_t_bw = current_t_bw_s
+            source_t_bw = "工厂当前值(模型上一轮值为None)"
+    else:
+        if current_t_bw_s is None:
+            effective_current_t_bw = model_prev_t_bw_s
+            source_t_bw = "模型上一轮值(工厂当前值为None)"
+        else:
+            effective_current_t_bw = model_prev_t_bw_s
+            source_t_bw = "模型上一轮值"
+
+    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
+    # 工厂当前值检查(警告)
+    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
+        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
+        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
+
+    # 模型上一轮决策值检查(警告)
+    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
+        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
+        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
+
+    # 模型当前轮决策值检查(错误)
+    if model_L_s is None:
+        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
+    elif not (L_min_s <= model_L_s <= L_max_s):
+        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+
+    if model_t_bw_s is None:
+        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
+    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
+        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
+
+    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
+    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
+
+    # 使用选定的基准值进行计算调整
+    L_diff = model_L_s - effective_current_L
+    L_adjustment = 0
+    if abs(L_diff) >= adjustment_threshold * L_step_s:
+        if L_diff >= 0:
+            L_adjustment = L_step_s
+        else:
+            L_adjustment = -L_step_s
+    next_L_s = effective_current_L + L_adjustment
+
+    t_bw_diff = model_t_bw_s - effective_current_t_bw
+    t_bw_adjustment = 0
+    if abs(t_bw_diff) >= adjustment_threshold * t_bw_step_s:
+        if t_bw_diff >= 0:
+            t_bw_adjustment = t_bw_step_s
+        else:
+            t_bw_adjustment = -t_bw_step_s
+    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
+
+    return next_L_s, next_t_bw_s
+
+
+from UF_super_RL.DQN_env import simulate_one_supercycle
+def calc_uf_cycle_metrics(p, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s: float, t_bw_s: float):
+    """
+    计算 UF 超滤系统的核心性能指标
+
+    参数:
+        p (UFParams): UF 系统参数
+        L_s (float): 单次过滤时间(秒)
+        t_bw_s (float): 单次反洗时间(秒)
+
+    返回:
+        dict: {
+            "k_bw_per_ceb": 小周期次数,
+            "ton_water_energy_kWh_per_m3": 吨水电耗,
+            "recovery": 回收率,
+            "net_delivery_rate_m3ph": 净供水率 (m³/h),
+            "daily_prod_time_h": 日均产水时间 (小时/天)
+            "max_permeability": 全周期最高渗透率(lmh/bar)
+        }
+    """
+    # 将跨膜压差写入参数
+    p.TMP0 = TMP0
+
+    # 模拟该参数下的超级周期
+    feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
+
+    # 获得模型模拟周期信息
+    k_bw_per_ceb = info["k_bw_per_ceb"]
+    ton_water_energy_kWh_per_m3 = info["ton_water_energy_kWh_per_m3"]
+    recovery = info["recovery"]
+    net_delivery_rate_m3ph = info["net_delivery_rate_m3ph"]
+    daily_prod_time_h = info["daily_prod_time_h"]
+
+    # 获得模型模拟周期内最高跨膜压差/最低跨膜压差
+    if max_tmp_during_filtration is None:
+        max_tmp_during_filtration = info["max_TMP_during_filtration"]
+    if min_tmp_during_filtration is None:
+        min_tmp_during_filtration = info["min_TMP_during_filtration"]
+
+    # 计算最高渗透率
+    max_permeability = 100 * p.q_UF / (128*40) / min_tmp_during_filtration
+
+
+    return {
+        "k_bw_per_ceb": k_bw_per_ceb,
+        "ton_water_energy_kWh_per_m3": ton_water_energy_kWh_per_m3,
+        "recovery": recovery,
+        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
+        "daily_prod_time_h": daily_prod_time_h,
+        "max_permeability": max_permeability
+    }
+
+
+# ==============================
+# 示例调用
+# ==============================
+if __name__ == "__main__":
+    uf_params = UFParams()
+    TMP0 = 0.03 # 原始 TMP0
+    model_decide_result = run_uf_DQN_decide(uf_params, TMP0) # 调用模型获得动作
+    model_L_s = model_decide_result['L_s'] # 获得模型决策产水时长
+    model_t_bw_s = model_decide_result['t_bw_s'] # 获得模型决策反洗时长
+
+    current_L_s = 3800
+    current_t_bw_s = 40
+    model_prev_L_s = 4040
+    model_prev_t_bw_s = 60
+    L_s, t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s) # 获取模型下发指令
+
+    L_s = 4100
+    t_bw_s = 96
+    max_tmp_during_filtration = 0.050176 # 新增工厂数据接口:周期最高/最低跨膜压差,无工厂数据接入时传入None,calc_uf_cycle_metrics()自动获取模拟周期中的跨膜压差最值
+    min_tmp_during_filtration = 0.012496
+    execution_result = calc_uf_cycle_metrics(uf_params, TMP0, max_tmp_during_filtration, min_tmp_during_filtration, L_s, t_bw_s)
+    print("\n===== 单步决策结果 =====")
+    print(f"模型选择的动作: {model_decide_result['action']}")
+    print(f"模型选择的L_s: {model_L_s} 秒, 模型选择的t_bw_s: {model_t_bw_s} 秒")
+    print(f"指令下发的L_s: {L_s} 秒, 指令下发的t_bw_s: {t_bw_s} 秒")
+    print(f"指令对应的反洗次数: {execution_result['k_bw_per_ceb']}")
+    print(f"指令对应的吨水电耗: {execution_result['ton_water_energy_kWh_per_m3']}")
+    print(f"指令对应的回收率: {execution_result['recovery']}")
+    print(f"指令对应的日均产水时间: {execution_result['daily_prod_time_h']}")
+    print(f"指令对应的最高渗透率: {execution_result['max_permeability']}")

+ 340 - 0
models/uf-rl/DQN_env.py

@@ -0,0 +1,340 @@
+import os
+import time
+import random
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+from stable_baselines3 import DQN
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+from typing import Dict, Tuple, Optional
+import torch
+import torch.nn as nn
+from dataclasses import dataclass, asdict
+from UF_models import TMPIncreaseModel, TMPDecreaseModel  # 导入模型类
+import copy
+
+
+# ==== 定义膜的基础运行参数 ====
+@dataclass
+class UFParams:
+    # —— 膜与运行参数 ——
+    q_UF: float = 360.0  # 过滤进水流量(m^3/h)
+    TMP0: float = 0.03  # 初始TMP(MPa)
+    TMP_max: float = 0.06  # TMP硬上限(MPa)
+
+    # —— 膜污染动力学 ——
+    alpha: float = 1e-6  # TMP增长系数
+    belta: float = 1.1  # 幂指数
+
+    # —— 反洗参数(固定) ——
+    q_bw_m3ph: float = 1000.0  # 物理反洗流量(m^3/h)
+
+    # —— CEB参数(固定) ——
+    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
+    v_ceb_m3: float = 30.0  # CEB用水体积(m^3)
+    t_ceb_s: float = 40 * 60.0  # CEB时长(s)
+    phi_ceb: float = 1.0  # CEB去除比例(简化:完全恢复到TMP0)
+
+    # —— 约束与收敛 ——
+    dTMP: float = 0.001  # 单次产水结束时,相对TMP0最大升幅(MPa)
+
+    # —— 搜索范围(秒) ——
+    L_min_s: float = 3800.0  # 过滤时长下限(s)
+    L_max_s: float = 6000.0  # 过滤时长上限(s)
+    t_bw_min_s: float = 40.0  # 物洗时长下限(s)
+    t_bw_max_s: float = 60.0  # 物洗时长上限(s)
+
+    # —— 物理反洗恢复函数参数 ——
+    phi_bw_min: float = 0.7  # 物洗去除比例最小值
+    phi_bw_max: float = 1.0  # 物洗去除比例最大值
+    L_ref_s: float = 4000.0  # 过滤时长影响时间尺度
+    tau_bw_s: float = 20.0  # 物洗时长影响时间尺度
+    gamma_t: float = 1.0  # 物洗时长作用指数
+
+    # —— 网格 ——
+    L_step_s: float = 60.0  # 过滤时长步长(s)
+    t_bw_step_s: float = 5.0  # 物洗时长步长(s)
+
+    # 多目标加权及高TMP惩罚
+    w_rec: float = 0.8  # 回收率权重
+    w_rate: float = 0.2  # 净供水率权重
+    w_headroom: float = 0.2  # 贴边惩罚权重
+    r_headroom: float = 2.0  # 贴边惩罚幂次
+    headroom_hardcap: float = 0.98  # 超过此比例直接视为不可取
+
+# ==== 加载模拟环境模型 ====
+# 初始化模型
+model_fp = TMPIncreaseModel()
+model_bw = TMPDecreaseModel()
+
+# 加载参数
+model_fp.load_state_dict(torch.load("uf_fp.pth"))
+model_bw.load_state_dict(torch.load("uf_bw.pth"))
+
+# 切换到推理模式
+model_fp.eval()
+model_bw.eval()
+
+
+def _delta_tmp(p, L_h: float) -> float:
+    """
+    过滤时段TMP上升量:调用 uf_fp.pth 模型
+    """
+    return model_fp(p, L_h)
+
+def phi_bw_of(p, L_s: float, t_bw_s: float) -> float:
+    """
+    物洗去除比例:调用 uf_bw.pth 模型
+    """
+    return model_bw(p, L_s, t_bw_s)
+
+def _tmp_after_ceb(p, L_s: float, t_bw_s: float) -> float:
+    """
+    计算化学清洗(CEB)后的TMP,当前为恢复初始跨膜压差
+    """
+    return p.TMP0
+
+def _v_bw_m3(p, t_bw_s: float) -> float:
+    """
+    物理反洗水耗
+    """
+    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
+
+def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
+    """
+    返回 (是否可行, 指标字典)
+    - 支持动态CEB次数:48h固定间隔
+    - 增加日均产水时间和吨水电耗
+    - 增加最小TMP记录
+    """
+    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
+
+    tmp = p.TMP0
+    max_tmp_during_filtration = tmp
+    min_tmp_during_filtration = tmp  # 新增:初始化最小TMP
+    max_residual_increase = 0.0
+
+    # 小周期总时长(h)
+    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
+
+    # 计算超级周期内CEB次数
+    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
+    if k_bw_per_ceb < 1:
+        k_bw_per_ceb = 1  # 至少一个小周期
+
+    # ton水电耗查表
+    energy_lookup = {
+        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
+        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
+        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
+    }
+
+    for _ in range(k_bw_per_ceb):
+        tmp_run_start = tmp
+
+        # 过滤阶段TMP增长
+        dtmp = _delta_tmp(p, L_h)
+        tmp_peak = tmp_run_start + dtmp
+
+        # 约束1:峰值不得超过硬上限
+        if tmp_peak > p.TMP_max + 1e-12:
+            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
+
+        # 更新最大和最小TMP
+        if tmp_peak > max_tmp_during_filtration:
+            max_tmp_during_filtration = tmp_peak
+        if tmp_run_start < min_tmp_during_filtration:  # 新增:记录运行开始时的最小TMP
+            min_tmp_during_filtration = tmp_run_start
+
+        # 物理反洗
+        phi = phi_bw_of(p, L_s, t_bw_s)
+        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
+
+        # 约束2:单次残余增量控制
+        residual_inc = tmp_after_bw - tmp_run_start
+        if residual_inc > p.dTMP + 1e-12:
+            return False, {
+                "reason": "residual TMP increase after BW exceeded dTMP",
+                "residual_increase": residual_inc,
+                "limit_dTMP": p.dTMP
+            }
+        if residual_inc > max_residual_increase:
+            max_residual_increase = residual_inc
+
+        tmp = tmp_after_bw
+
+    # CEB
+    tmp_after_ceb = p.TMP0
+
+    # 体积与回收率
+    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
+    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
+    V_net = max(0.0, V_feed_super - V_loss_super)
+    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
+
+    # 时间与净供水率
+    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
+    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
+
+    # 贴边比例与硬限
+    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
+    if headroom_ratio > p.headroom_hardcap + 1e-12:
+        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
+
+    # —— 新增指标 1:日均产水时间(h/d) ——
+    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
+
+    # —— 新增指标 2:吨水电耗(kWh/m³) ——
+    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
+    ton_water_energy = energy_lookup[closest_L]
+
+    info = {
+        "recovery": recovery,
+        "V_feed_super_m3": V_feed_super,
+        "V_loss_super_m3": V_loss_super,
+        "V_net_super_m3": V_net,
+        "supercycle_time_h": T_super_h,
+        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
+        "max_TMP_during_filtration": max_tmp_during_filtration,
+        "min_TMP_during_filtration": min_tmp_during_filtration,  # 新增:最小TMP
+        "max_residual_increase_per_run": max_residual_increase,
+        "phi_bw_effective": phi,
+        "TMP_after_ceb": tmp_after_ceb,
+        "headroom_ratio": headroom_ratio,
+        "daily_prod_time_h": daily_prod_time_h,
+        "ton_water_energy_kWh_per_m3": ton_water_energy,
+        "k_bw_per_ceb": k_bw_per_ceb
+    }
+
+    return True, info
+
+def _score(p: UFParams, rec: dict) -> float:
+    """综合评分:越大越好。通过非线性放大奖励差异,强化区分好坏动作"""
+
+    # —— 无量纲化净供水率 ——
+    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
+
+    # —— TMP soft penalty (sigmoid) ——
+    tmp_ratio = rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)
+    k = 10.0
+    headroom_penalty = 1.0 / (1.0 + np.exp(-k * (tmp_ratio - 1.0)))
+
+    # —— 基础 reward(0.6~0.9左右)——
+    base_reward = (
+        p.w_rec * rec["recovery"]
+        + p.w_rate * rate_norm
+        - p.w_headroom * headroom_penalty
+    )
+
+    # —— 非线性放大:平方映射 + 缩放 ——
+    # 目的是放大好坏动作差异,同时限制最大值,避免 TD-error 过大
+    amplified_reward = (base_reward - 0.5) ** 2 * 5.0
+
+    # —— 可选:保留符号,区分负奖励
+    if base_reward < 0.5:
+        amplified_reward = -amplified_reward
+
+    return amplified_reward
+
+
+class UFSuperCycleEnv(gym.Env):
+    """超滤系统环境(超级周期级别决策)"""
+
+    metadata = {"render_modes": ["human"]}
+
+    def __init__(self, base_params, max_episode_steps: int = 20):
+        super(UFSuperCycleEnv, self).__init__()
+
+        self.base_params = base_params
+        self.current_params = copy.deepcopy(base_params)
+        self.max_episode_steps = max_episode_steps
+        self.current_step = 0
+
+        # 计算离散动作空间
+        self.L_values = np.arange(
+            self.base_params.L_min_s,
+            self.base_params.L_max_s + self.base_params.L_step_s,
+            self.base_params.L_step_s
+        )
+        self.t_bw_values = np.arange(
+            self.base_params.t_bw_min_s,
+            self.base_params.t_bw_max_s + self.base_params.t_bw_step_s,
+            self.base_params.t_bw_step_s
+        )
+
+        self.num_L = len(self.L_values)
+        self.num_bw = len(self.t_bw_values)
+
+        # 单一离散动作空间
+        self.action_space = spaces.Discrete(self.num_L * self.num_bw)
+
+        # 状态空间增加 TMP0, 上一次动作(L_s, t_bw_s), 本周期最高 TMP
+        # 状态归一化均在 _get_obs 内处理
+        self.observation_space = spaces.Box(
+            low=np.zeros(4, dtype=np.float32),
+            high=np.ones(4, dtype=np.float32),
+            dtype=np.float32
+        )
+
+        # 初始化状态
+        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
+        self.max_TMP_during_filtration = self.current_params.TMP0
+        self.reset(seed=None)
+
+    def _get_obs(self):
+        TMP0 = self.current_params.TMP0
+        TMP0_norm = (TMP0 - 0.01) / (0.05 - 0.01)
+
+        L_s, t_bw_s = self.last_action
+        L_norm = (L_s - self.base_params.L_min_s) / (self.base_params.L_max_s - self.base_params.L_min_s)
+        t_bw_norm = (t_bw_s - self.base_params.t_bw_min_s) / (self.base_params.t_bw_max_s - self.base_params.t_bw_min_s)
+
+        max_TMP_norm = (self.max_TMP_during_filtration - 0.01) / (0.05 - 0.01)
+
+        return np.array([TMP0_norm, L_norm, t_bw_norm, max_TMP_norm], dtype=np.float32)
+
+    def _get_action_values(self, action):
+        L_idx = action // self.num_bw
+        t_bw_idx = action % self.num_bw
+        return self.L_values[L_idx], self.t_bw_values[t_bw_idx]
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.current_params.TMP0 = np.random.uniform(0.01, 0.03)
+        self.current_step = 0
+        self.last_action = (self.base_params.L_min_s, self.base_params.t_bw_min_s)
+        self.max_TMP_during_filtration = self.current_params.TMP0
+        return self._get_obs(), {}
+
+    def step(self, action):
+        self.current_step += 1
+        L_s, t_bw_s = self._get_action_values(action)
+        L_s = np.clip(L_s, self.base_params.L_min_s, self.base_params.L_max_s)
+        t_bw_s = np.clip(t_bw_s, self.base_params.t_bw_min_s, self.base_params.t_bw_max_s)
+
+        # 模拟超级周期
+        feasible, info = simulate_one_supercycle(self.current_params, L_s, t_bw_s)
+
+        if feasible:
+            reward = _score(self.current_params, info)
+            self.current_params.TMP0 = info["TMP_after_ceb"]
+            self.max_TMP_during_filtration = info["max_TMP_during_filtration"]
+            terminated = False
+        else:
+            reward = -20
+            terminated = True
+
+        truncated = self.current_step >= self.max_episode_steps
+        self.last_action = (L_s, t_bw_s)
+        next_obs = self._get_obs()
+
+        info["feasible"] = feasible
+        info["step"] = self.current_step
+
+        return next_obs, reward, terminated, truncated, info
+
+
+
+

+ 244 - 0
models/uf-rl/DQN_train.py

@@ -0,0 +1,244 @@
+import os
+import time
+import random
+import numpy as np
+import torch
+
+import gymnasium as gym
+from gymnasium import spaces
+from stable_baselines3 import DQN
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+
+from DQN_env import UFParams, UFSuperCycleEnv
+
+
+# ==== 定义强化学习超参数 ====
+class DQNParams:
+    """
+    DQN 超参数定义类
+    用于统一管理模型训练参数
+    """
+    # 学习率,控制神经网络更新步长
+    learning_rate: float = 1e-4
+
+    # 经验回放缓冲区大小(步数)
+    buffer_size: int = 10000
+
+    # 学习开始前需要收集的步数
+    learning_starts: int = 200
+
+    # 每次从经验池中采样的样本数量
+    batch_size: int = 32
+
+    # 折扣因子,越接近1越重视长期奖励
+    gamma: float = 0.95
+
+    # 每隔多少步训练一次
+    train_freq: int = 4
+
+    # 目标网络更新间隔
+    target_update_interval: int = 2000
+
+    # 初始探索率 ε
+    exploration_initial_eps: float = 1.0
+
+    # 从初始ε衰减到最终ε所占的训练比例
+    exploration_fraction: float = 0.3
+
+    # 最终探索率 ε
+    exploration_final_eps: float = 0.02
+
+    # 日志备注(用于区分不同实验)
+    remark: str = "default"
+
+class UFEpisodeRecorder:
+    """记录episode中的决策和结果"""
+
+    def __init__(self):
+        self.episode_data = []
+        self.current_episode = []
+
+    def record_step(self, obs, action, reward, done, info):
+        """记录单步信息"""
+        step_data = {
+            "obs": obs.copy(),
+            "action": action.copy(),
+            "reward": reward,
+            "done": done,
+            "info": info.copy() if info else {}
+        }
+        self.current_episode.append(step_data)
+
+        if done:
+            self.episode_data.append(self.current_episode)
+            self.current_episode = []
+
+    def get_episode_stats(self, episode_idx=-1):
+        """获取episode统计信息"""
+        if not self.episode_data:
+            return {}
+
+        episode = self.episode_data[episode_idx]
+        total_reward = sum(step["reward"] for step in episode)
+        avg_recovery = np.mean([step["info"].get("recovery", 0) for step in episode if "recovery" in step["info"]])
+        feasible_steps = sum(1 for step in episode if step["info"].get("feasible", False))
+
+        return {
+            "total_reward": total_reward,
+            "avg_recovery": avg_recovery,
+            "feasible_steps": feasible_steps,
+            "total_steps": len(episode)
+        }
+
+
+# ==== 定义强化学习训练回调器 ====
+class UFTrainingCallback(BaseCallback):
+    """
+    强化学习训练回调,用于记录每一步的数据到 recorder。
+    1. 不依赖环境内部 last_* 属性
+    2. 使用环境接口提供的 obs、actions、rewards、dones、infos
+    3. 自动处理 episode 结束时的统计
+    """
+
+    def __init__(self, recorder, verbose=0):
+        super(UFTrainingCallback, self).__init__(verbose)
+        self.recorder = recorder
+
+    def _on_step(self) -> bool:
+        try:
+            new_obs = self.locals.get("new_obs")
+            actions = self.locals.get("actions")
+            rewards = self.locals.get("rewards")
+            dones = self.locals.get("dones")
+            infos = self.locals.get("infos")
+
+            if len(new_obs) > 0:
+                step_obs = new_obs[0]
+                step_action = actions[0] if actions is not None else None
+                step_reward = rewards[0] if rewards is not None else 0.0
+                step_done = dones[0] if dones is not None else False
+                step_info = infos[0] if infos is not None else {}
+
+                # 打印当前 step 的信息
+                if self.verbose:
+                    print(f"[Step {self.num_timesteps}] 动作={step_action}, 奖励={step_reward:.3f}, Done={step_done}")
+
+                # 记录数据
+                self.recorder.record_step(
+                    obs=step_obs,
+                    action=step_action,
+                    reward=step_reward,
+                    done=step_done,
+                    info=step_info,
+                )
+
+        except Exception as e:
+            if self.verbose:
+                print(f"[Callback Error] {e}")
+
+        return True
+
+
+
+
+class DQNTrainer:
+    def __init__(self, env, params, callback=None):
+        self.env = env
+        self.params = params
+        self.callback = callback
+        self.log_dir = self._create_log_dir()
+        self.model = self._create_model()
+
+    def _create_log_dir(self):
+        # 创建训练日志
+        timestamp = time.strftime("%Y%m%d-%H%M%S")
+        log_name = (
+            f"DQN_lr{self.params.learning_rate}_buf{self.params.buffer_size}_bs{self.params.batch_size}"
+            f"_gamma{self.params.gamma}_exp{self.params.exploration_fraction}"
+            f"_{self.params.remark}_{timestamp}"
+        )
+        log_dir = os.path.join("./uf_dqn_tensorboard", log_name)
+        os.makedirs(log_dir, exist_ok=True)
+        return log_dir
+
+    def _create_model(self):
+        return DQN(
+            policy="MlpPolicy",
+            env=self.env,
+            learning_rate=self.params.learning_rate,
+            buffer_size=self.params.buffer_size,
+            learning_starts=self.params.learning_starts,
+            batch_size=self.params.batch_size,
+            gamma=self.params.gamma,
+            train_freq=self.params.train_freq,
+            target_update_interval=1,
+            tau=0.005,
+            exploration_initial_eps=self.params.exploration_initial_eps,
+            exploration_fraction=self.params.exploration_fraction,
+            exploration_final_eps=self.params.exploration_final_eps,
+            verbose=1,
+            tensorboard_log=self.log_dir
+        )
+
+    def train(self, total_timesteps: int):
+        if self.callback:
+            self.model.learn(total_timesteps=total_timesteps, callback=self.callback)
+        else:
+            self.model.learn(total_timesteps=total_timesteps)
+        print(f"模型训练完成,日志保存在:{self.log_dir}")
+
+    def save(self, path=None):
+        if path is None:
+            path = os.path.join(self.log_dir, "dqn_model.zip")
+        self.model.save(path)
+        print(f"模型已保存到:{path}")
+
+    def load(self, path):
+        self.model = DQN.load(path, env=self.env)
+        print(f"模型已从 {path} 加载")
+
+
+def set_global_seed(seed: int):
+    """固定全局随机种子,保证训练可复现"""
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)  # 如果使用GPU
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+def train_uf_rl_agent(params: UFParams, total_timesteps: int = 10000, seed: int = 2025):
+    set_global_seed(seed)
+    recorder = UFEpisodeRecorder()
+    callback = UFTrainingCallback(recorder, verbose=1)
+
+    def make_env():
+        env = UFSuperCycleEnv(params)
+        env = Monitor(env)
+        return env
+
+    env = DummyVecEnv([make_env])
+
+    dqn_params = DQNParams()
+    trainer = DQNTrainer(env, dqn_params, callback=callback)
+    trainer.train(total_timesteps)
+    trainer.save()
+
+    stats = callback.recorder.get_episode_stats()
+    print(f"训练完成 - 总奖励: {stats.get('total_reward', 0):.2f}, 平均回收率: {stats.get('avg_recovery', 0):.3f}")
+
+    return trainer.model
+
+
+# 训练
+if __name__ == "__main__":
+    # 初始化参数
+    params = UFParams()
+
+    # 训练RL代理
+    print("开始训练RL代理...")
+    train_uf_rl_agent(params, total_timesteps=50000)
+

+ 405 - 0
models/uf-rl/UF_decide.py

@@ -0,0 +1,405 @@
+# UF_decide.py
+from dataclasses import dataclass
+import numpy as np
+
+@dataclass
+class UFParams:
+    # —— 膜与运行参数 ——
+    q_UF: float = 360.0           # 过滤进水流量(m^3/h)
+    TMP0: float = 0.03            # 初始TMP(MPa)
+    TMP_max: float = 0.06         # TMP硬上限(MPa)
+
+    # —— 膜污染动力学 ——
+    alpha: float = 1e-6           # TMP增长系数
+    belta: float = 1.1            # 幂指数
+
+    # —— 反洗参数(固定) ——
+    q_bw_m3ph: float = 1000.0     # 物理反洗流量(m^3/h)
+
+    # —— CEB参数(固定) ——
+    T_ceb_interval_h: float = 48.0  # 固定每 k 小时做一次CEB
+    v_ceb_m3: float = 30.0        # CEB用水体积(m^3)
+    t_ceb_s: float = 40 * 60.0    # CEB时长(s)
+    phi_ceb: float = 1.0          # CEB去除比例(简化:完全恢复到TMP0)
+
+    # —— 约束与收敛 ——
+    dTMP: float = 0.0005          # 单次产水结束时,相对TMP0最大升幅(MPa)
+
+    # —— 搜索范围(秒) ——
+    L_min_s: float = 3600.0       # 过滤时长下限(s)
+    L_max_s: float = 4200.0       # 过滤时长上限(s)
+    t_bw_min_s: float = 40.0      # 物洗时长下限(s)
+    t_bw_max_s: float = 60.0      # 物洗时长上限(s)
+
+    # —— 物理反洗恢复函数参数 ——
+    phi_bw_min: float = 0.7       # 物洗去除比例最小值
+    phi_bw_max: float = 1.0       # 物洗去除比例最大值
+    L_ref_s: float = 4000.0       # 过滤时长影响时间尺度
+    tau_bw_s: float = 30.0        # 物洗时长影响时间尺度
+    gamma_t: float = 1.0          # 物洗时长作用指数
+    
+    # —— 网格 ——
+    L_step_s: float = 60.0        # 过滤时长步长(s)
+    t_bw_step_s: float = 5.0      # 物洗时长步长(s)
+
+    # 多目标加权及高TMP惩罚
+    w_rec: float = 0.8            # 回收率权重
+    w_rate: float = 0.2           # 净供水率权重
+    w_headroom: float = 0.3       # 贴边惩罚权重
+    r_headroom: float = 2.0       # 贴边惩罚幂次
+    headroom_hardcap: float = 0.98 # 超过此比例直接视为不可取
+
+def _delta_tmp(p: UFParams, L_h: float) -> float:
+    # 过滤时段TMP上升量
+    return float(p.alpha * (p.q_UF ** p.belta) * L_h)
+
+def _v_bw_m3(p: UFParams, t_bw_s: float) -> float:
+    # 物理反洗水耗
+    return float(p.q_bw_m3ph * (float(t_bw_s) / 3600.0))
+
+def phi_bw_of(p: UFParams, L_s: float, t_bw_s: float) -> float:
+    # 物洗去除比例:随过滤时长增长上界收缩,随物洗时长增长趋饱和
+    L = max(float(L_s), 1.0)
+    t = max(float(t_bw_s), 1e-6)
+    upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
+    time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
+    phi = upper_L * time_gain
+    return float(np.clip(phi, 0.0, 0.999))
+
+def simulate_one_supercycle(p: UFParams, L_s: float, t_bw_s: float):
+    """
+    返回 (是否可行, 指标字典)
+    - 支持动态CEB次数:48h固定间隔
+    - 增加日均产水时间和吨水电耗
+    """
+    L_h = float(L_s) / 3600.0  # 小周期过滤时间(h)
+
+    tmp = p.TMP0
+    max_tmp_during_filtration = tmp
+    max_residual_increase = 0.0
+
+    # 小周期总时长(h)
+    t_small_cycle_h = (L_s + t_bw_s) / 3600.0
+
+    # 计算超级周期内CEB次数
+    k_bw_per_ceb = int(np.floor(p.T_ceb_interval_h / t_small_cycle_h))
+    if k_bw_per_ceb < 1:
+        k_bw_per_ceb = 1  # 至少一个小周期
+
+    # ton水电耗查表
+    energy_lookup = {
+        3600: 0.1034, 3660: 0.1031, 3720: 0.1029, 3780: 0.1026,
+        3840: 0.1023, 3900: 0.1021, 3960: 0.1019, 4020: 0.1017,
+        4080: 0.1015, 4140: 0.1012, 4200: 0.1011
+    }
+
+    for _ in range(k_bw_per_ceb):
+        tmp_run_start = tmp
+
+        # 过滤阶段TMP增长
+        dtmp = _delta_tmp(p, L_h)
+        tmp_peak = tmp_run_start + dtmp
+
+        # 约束1:峰值不得超过硬上限
+        if tmp_peak > p.TMP_max + 1e-12:
+            return False, {"reason": "TMP_max violated during filtration", "TMP_peak": tmp_peak}
+
+        if tmp_peak > max_tmp_during_filtration:
+            max_tmp_during_filtration = tmp_peak
+
+        # 物理反洗
+        phi = phi_bw_of(p, L_s, t_bw_s)
+        tmp_after_bw = tmp_peak - phi * (tmp_peak - tmp_run_start)
+
+        # 约束2:单次残余增量控制
+        residual_inc = tmp_after_bw - tmp_run_start
+        if residual_inc > p.dTMP + 1e-12:
+            return False, {
+                "reason": "residual TMP increase after BW exceeded dTMP",
+                "residual_increase": residual_inc,
+                "limit_dTMP": p.dTMP
+            }
+        if residual_inc > max_residual_increase:
+            max_residual_increase = residual_inc
+
+        tmp = tmp_after_bw
+
+    # CEB
+    tmp_after_ceb = p.TMP0
+
+    # 体积与回收率
+    V_feed_super = k_bw_per_ceb * p.q_UF * L_h
+    V_loss_super = k_bw_per_ceb * _v_bw_m3(p, t_bw_s) + p.v_ceb_m3
+    V_net = max(0.0, V_feed_super - V_loss_super)
+    recovery = max(0.0, V_net / max(V_feed_super, 1e-12))
+
+    # 时间与净供水率
+    T_super_h = k_bw_per_ceb * (L_s + t_bw_s) / 3600.0 + p.t_ceb_s / 3600.0
+    net_delivery_rate_m3ph = V_net / max(T_super_h, 1e-12)
+
+    # 贴边比例与硬限
+    headroom_ratio = max_tmp_during_filtration / max(p.TMP_max, 1e-12)
+    if headroom_ratio > p.headroom_hardcap + 1e-12:
+        return False, {"reason": "headroom hardcap exceeded", "headroom_ratio": headroom_ratio}
+
+    # —— 新增指标 1:日均产水时间(h/d) ——
+    daily_prod_time_h = k_bw_per_ceb * L_h / T_super_h * 24.0
+
+    # —— 新增指标 2:吨水电耗(kWh/m³) ——
+    closest_L = min(energy_lookup.keys(), key=lambda x: abs(x - L_s))
+    ton_water_energy = energy_lookup[closest_L]
+
+    info = {
+        "recovery": recovery,
+        "V_feed_super_m3": V_feed_super,
+        "V_loss_super_m3": V_loss_super,
+        "V_net_super_m3": V_net,
+        "supercycle_time_h": T_super_h,
+        "net_delivery_rate_m3ph": net_delivery_rate_m3ph,
+        "max_TMP_during_filtration": max_tmp_during_filtration,
+        "max_residual_increase_per_run": max_residual_increase,
+        "phi_bw_effective": phi,
+        "TMP_after_ceb": tmp_after_ceb,
+        "headroom_ratio": headroom_ratio,
+        "daily_prod_time_h": daily_prod_time_h,
+        "ton_water_energy_kWh_per_m3": ton_water_energy,
+        "k_bw_per_ceb": k_bw_per_ceb
+    }
+
+    return True, info
+
+def _score(p: UFParams, rec: dict) -> float:
+    """综合评分:越大越好。不同TMP0会改变max_TMP→改变惩罚→得到不同解。"""
+    # 无量纲化净供水率
+    rate_norm = rec["net_delivery_rate_m3ph"] / max(p.q_UF, 1e-12)
+    headroom_penalty = (rec["max_TMP_during_filtration"] / max(p.TMP_max, 1e-12)) ** p.r_headroom
+    return (p.w_rec * rec["recovery"]
+            + p.w_rate * rate_norm
+            - p.w_headroom * headroom_penalty)
+
+def optimize_2d(p: UFParams,
+                L_min_s=None, L_max_s=None, L_step_s=None,
+                t_bw_min_s=None, t_bw_max_s=None, t_bw_step_s=None):
+    # 网格生成
+    L_lo = p.L_min_s if L_min_s is None else float(L_min_s)
+    L_hi = p.L_max_s if L_max_s is None else float(L_max_s)
+    L_st = p.L_step_s if L_step_s is None else float(L_step_s)
+
+    t_lo = p.t_bw_min_s if t_bw_min_s is None else float(t_bw_min_s)
+    t_hi = p.t_bw_max_s if t_bw_max_s is None else float(t_bw_max_s)
+    t_st = p.t_bw_step_s if t_bw_step_s is None else float(t_bw_step_s)
+
+    L_vals = np.arange(L_lo, L_hi + 1e-9, L_st)
+    t_vals = np.arange(t_lo, t_hi + 1e-9, t_st)
+
+    best = None
+    best_score = -np.inf
+
+    for L_s in L_vals:
+        for t_bw_s in t_vals:
+            feasible, info = simulate_one_supercycle(p, L_s, t_bw_s)
+            if not feasible:
+                continue
+
+            rec = {"L_s": float(L_s), "t_bw_s": float(t_bw_s)}
+            rec.update(info)
+
+            score = _score(p, rec)
+
+            if score > best_score + 1e-14:
+                best_score = score
+                best = rec.copy()
+                best["score"] = float(score)
+            # 若分数相同,偏好回收率更高,再偏好净供水率更高
+            elif abs(score - best_score) <= 1e-14:
+                if (rec["recovery"] > best["recovery"] + 1e-12) or (
+                    abs(rec["recovery"] - best["recovery"]) <= 1e-12 and
+                    rec["net_delivery_rate_m3ph"] > best["net_delivery_rate_m3ph"] + 1e-12
+                ):
+                    best = rec.copy()
+                    best["score"] = float(score)
+
+    if best is None:
+        return {"status": "no-feasible-solution"}
+    best["status"] = "feasible"
+    return best
+
+def run_uf_decision(TMP0: float = None) -> dict:
+    if TMP0 is None:
+        rng = np.random.default_rng()
+        TMP0 = rng.uniform(0.03, 0.04)  # 初始TMP随机
+
+    params = UFParams(
+        q_UF=360.0,
+        TMP_max=0.05,
+        alpha=1.2e-6,
+        belta=1.0,
+        q_bw_m3ph=1000.0,
+        T_ceb_interval_h=48,
+        v_ceb_m3=30.0,
+        t_ceb_s=40*60.0,
+        phi_ceb=1.0,
+        dTMP=0.001,
+
+        L_min_s=3600.0, L_max_s=4200.0, L_step_s=30.0,
+        t_bw_min_s=90.0, t_bw_max_s=100.0, t_bw_step_s=2.0,
+
+        phi_bw_min=0.70, phi_bw_max=1.00,
+        L_ref_s=500.0, tau_bw_s=40.0, gamma_t=1.0,
+
+        TMP0=TMP0,
+
+        w_rec=0.7, w_rate=0.3, w_headroom=0.3, r_headroom=2.0, headroom_hardcap=0.9
+    )
+
+    result = optimize_2d(params)
+    if result.get("status") == "feasible":
+        return {
+            "L_s": result["L_s"],
+            "t_bw_s": result["t_bw_s"],
+            "recovery": result["recovery"],
+            "k_bw_per_ceb": result["k_bw_per_ceb"],
+            "daily_prod_time_h": result["daily_prod_time_h"],
+            "ton_water_energy_kWh_per_m3": result["ton_water_energy_kWh_per_m3"]
+        }
+
+    # 若没有可行解,返回最小过滤时间和默认值
+    return {
+        "L_s": params.L_min_s,
+        "t_bw_s": params.t_bw_min_s,
+        "recovery": 0.0,
+        "k_bw_per_ceb": 1,
+        "daily_prod_time_h": 0.0,
+        "ton_water_energy_kWh_per_m3": 0.0
+    }
+
+
+def generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s):
+    """
+    根据工厂当前值、模型上一轮决策值和模型当前轮决策值,生成PLC指令。
+
+    新增功能:
+    1. 处理None值情况:如果模型上一轮值为None,则使用工厂当前值;
+       如果工厂当前值也为None,则返回None并提示错误。
+    """
+    # 参数配置保持不变
+    params = UFParams(
+        L_min_s=3600.0, L_max_s=6000.0, L_step_s=60.0,
+        t_bw_min_s=40.0, t_bw_max_s=60.0, t_bw_step_s=5.0,
+    )
+
+    # 参数解包
+    L_step_s = params.L_step_s
+    t_bw_step_s = params.t_bw_step_s
+    L_min_s = params.L_min_s
+    L_max_s = params.L_max_s
+    t_bw_min_s = params.t_bw_min_s
+    t_bw_max_s = params.t_bw_max_s
+    adjustment_threshold = 1.0
+
+    # 处理None值情况
+    if model_prev_L_s is None:
+        if current_L_s is None:
+            print("错误: 过滤时长的工厂当前值和模型上一轮值均为None")
+            return None, None
+        else:
+            # 使用工厂当前值作为基准
+            effective_current_L = current_L_s
+            source_L = "工厂当前值(模型上一轮值为None)"
+    else:
+        # 模型上一轮值不为None,继续检查工厂当前值
+        if current_L_s is None:
+            effective_current_L = model_prev_L_s
+            source_L = "模型上一轮值(工厂当前值为None)"
+        else:
+            # 两个值都不为None,比较哪个更接近模型当前建议值
+            current_to_model_diff = abs(current_L_s - model_L_s)
+            prev_to_model_diff = abs(model_prev_L_s - model_L_s)
+
+            if current_to_model_diff <= prev_to_model_diff:
+                effective_current_L = current_L_s
+                source_L = "工厂当前值"
+            else:
+                effective_current_L = model_prev_L_s
+                source_L = "模型上一轮值"
+
+    # 对反洗时长进行同样的处理
+    if model_prev_t_bw_s is None:
+        if current_t_bw_s is None:
+            print("错误: 反洗时长的工厂当前值和模型上一轮值均为None")
+            return None, None
+        else:
+            effective_current_t_bw = current_t_bw_s
+            source_t_bw = "工厂当前值(模型上一轮值为None)"
+    else:
+        if current_t_bw_s is None:
+            effective_current_t_bw = model_prev_t_bw_s
+            source_t_bw = "模型上一轮值(工厂当前值为None)"
+        else:
+            current_to_model_t_bw_diff = abs(current_t_bw_s - model_t_bw_s)
+            prev_to_model_t_bw_diff = abs(model_prev_t_bw_s - model_t_bw_s)
+
+            if current_to_model_t_bw_diff <= prev_to_model_t_bw_diff:
+                effective_current_t_bw = current_t_bw_s
+                source_t_bw = "工厂当前值"
+            else:
+                effective_current_t_bw = model_prev_t_bw_s
+                source_t_bw = "模型上一轮值"
+
+    # 检测所有输入值是否在规定范围内(只对非None值进行检查)
+    # 工厂当前值检查(警告)
+    if current_L_s is not None and not (L_min_s <= current_L_s <= L_max_s):
+        print(f"警告: 当前过滤时长 {current_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+    if current_t_bw_s is not None and not (t_bw_min_s <= current_t_bw_s <= t_bw_max_s):
+        print(f"警告: 当前反洗时长 {current_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
+
+    # 模型上一轮决策值检查(警告)
+    if model_prev_L_s is not None and not (L_min_s <= model_prev_L_s <= L_max_s):
+        print(f"警告: 模型上一轮过滤时长 {model_prev_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+    if model_prev_t_bw_s is not None and not (t_bw_min_s <= model_prev_t_bw_s <= t_bw_max_s):
+        print(f"警告: 模型上一轮反洗时长 {model_prev_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
+
+    # 模型当前轮决策值检查(错误)
+    if model_L_s is None:
+        raise ValueError("错误: 决策模型建议的过滤时长不能为None")
+    elif not (L_min_s <= model_L_s <= L_max_s):
+        raise ValueError(f"错误: 决策模型建议的过滤时长 {model_L_s} 秒不在允许范围内 [{L_min_s}, {L_max_s}]")
+
+    if model_t_bw_s is None:
+        raise ValueError("错误: 决策模型建议的反洗时长不能为None")
+    elif not (t_bw_min_s <= model_t_bw_s <= t_bw_max_s):
+        raise ValueError(f"错误: 决策模型建议的反洗时长 {model_t_bw_s} 秒不在允许范围内 [{t_bw_min_s}, {t_bw_max_s}]")
+
+    print(f"过滤时长基准: {source_L}, 值: {effective_current_L}")
+    print(f"反洗时长基准: {source_t_bw}, 值: {effective_current_t_bw}")
+
+    # 使用选定的基准值进行计算调整
+    L_diff = model_L_s - effective_current_L
+    L_adjustment = 0
+    if abs(L_diff) > adjustment_threshold * L_step_s:
+        if L_diff > 0:
+            L_adjustment = L_step_s
+        else:
+            L_adjustment = -L_step_s
+    next_L_s = effective_current_L + L_adjustment
+
+    t_bw_diff = model_t_bw_s - effective_current_t_bw
+    t_bw_adjustment = 0
+    if abs(t_bw_diff) > adjustment_threshold * t_bw_step_s:
+        if t_bw_diff > 0:
+            t_bw_adjustment = t_bw_step_s
+        else:
+            t_bw_adjustment = -t_bw_step_s
+    next_t_bw_s = effective_current_t_bw + t_bw_adjustment
+
+    return next_L_s, next_t_bw_s
+
+
+current_L_s = 3920
+current_t_bw_s = 98
+model_prev_L_s = None
+model_prev_t_bw_s = None
+model_L_s = 4160
+model_t_bw_s = 96
+next_L_s, next_t_bw_s = generate_plc_instructions(current_L_s, current_t_bw_s, model_prev_L_s, model_prev_t_bw_s, model_L_s, model_t_bw_s)
+print(f"next_L_s={next_L_s}, next_t_bw_s={next_t_bw_s}")

+ 33 - 0
models/uf-rl/UF_models.py

@@ -0,0 +1,33 @@
+import torch
+import numpy as np
+
+# TMP 上升量模型
+class TMPIncreaseModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, p, L_h):
+        return float(p.alpha * (p.q_UF ** p.belta) * L_h)
+
+# 反洗 TMP 去除模型
+class TMPDecreaseModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, p, L_s, t_bw_s):
+        L = max(float(L_s), 1.0)
+        t = max(float(t_bw_s), 1e-6)
+        upper_L = p.phi_bw_min + (p.phi_bw_max - p.phi_bw_min) * np.exp(- L / p.L_ref_s)
+        time_gain = 1.0 - np.exp(- (t / p.tau_bw_s) ** p.gamma_t)
+        phi = upper_L * time_gain
+        return float(np.clip(phi, 0.0, 0.999))
+
+
+if __name__ == "__main__":
+    model_fp = TMPIncreaseModel()
+    model_bw = TMPDecreaseModel()
+
+
+    torch.save(model_fp.state_dict(), "uf_fp.pth")
+    torch.save(model_bw.state_dict(), "uf_bw.pth")
+
+
+    print("模型已安全保存为 uf_fp.pth、uf_bw.pth")

BIN
models/uf-rl/uf_bw.pth


BIN
models/uf-rl/uf_fp.pth


+ 18 - 0
requirements-dev.txt

@@ -0,0 +1,18 @@
+# Development Dependencies
+-r requirements.txt
+
+# Additional Development Tools
+jupyter==1.0.0
+notebook==7.0.6
+ipython==8.18.1
+
+# API Testing
+newman==0.0.1
+
+# Documentation
+mkdocs==1.5.3
+mkdocs-material==9.4.8
+
+# Performance Testing
+locust==2.17.0
+

+ 46 - 0
requirements.txt

@@ -0,0 +1,46 @@
+# Core Framework
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+pydantic==2.5.0
+pydantic-settings==2.1.0
+
+# Database
+sqlalchemy==2.0.23
+alembic==1.13.1
+psycopg2-binary==2.9.9
+redis==5.0.1
+
+# HTTP Client
+httpx==0.25.2
+aiohttp==3.9.1
+
+# Authentication & Security
+python-jose[cryptography]==3.3.0
+passlib[bcrypt]==1.7.4
+python-multipart==0.0.6
+
+# Message Queue
+celery==5.3.4
+kombu==5.3.4
+
+# Monitoring & Logging
+prometheus-client==0.19.0
+structlog==23.2.0
+
+# Testing
+pytest==7.4.3
+pytest-asyncio==0.21.1
+pytest-cov==4.1.0
+httpx==0.25.2
+
+# Code Quality
+black==23.11.0
+isort==5.12.0
+mypy==1.7.1
+flake8==6.1.0
+pre-commit==3.6.0
+
+# Development
+python-dotenv==1.0.0
+watchfiles==0.21.0
+