From 13f27e12e8e6a0879ba32370d24c64f7cfe0d7aa Mon Sep 17 00:00:00 2001 From: Nils Reiners Date: Thu, 23 Oct 2025 13:21:28 +0200 Subject: [PATCH] first version of load forecaster implemented - not yet running --- forecaster/load_forecaster.py | 349 ++++++++++++++++++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100644 forecaster/load_forecaster.py diff --git a/forecaster/load_forecaster.py b/forecaster/load_forecaster.py new file mode 100644 index 0000000..59e352b --- /dev/null +++ b/forecaster/load_forecaster.py @@ -0,0 +1,349 @@ +# load_forecaster.py +# -*- coding: utf-8 -*- +""" +LoadForecaster: builds a 36-hour forecast at 15-min resolution from InfluxDB data. + +- Data source: InfluxDB (Flux query provided by user) +- Target: House load = M_AC_real - I_AC_real +- Frequency: 15 minutes (changeable via init) +- Model: Keras (LSTM by default, pluggable) +- Persistence: Saves model (H5) and scaler (joblib) + +Usage (example): + + from load_forecaster import LoadForecaster + import tensorflow as tf + + lf = LoadForecaster( + url="http://localhost:8086", + token="", + org="", + bucket="allmende_db", + agg_every="15m", + input_hours=72, + output_hours=36, + model_path="model/load_forecaster.h5", + scaler_path="model/scaler.joblib", + ) + + # Train or retrain + lf.train_and_save(train_days=90, epochs=60) + + # Load model and forecast + model = lf.load_model() + forecast_df = lf.get_15min_forecast(model) + print(forecast_df.head()) + +""" +from __future__ import annotations +import os +import math +import json +import warnings +from dataclasses import dataclass +from typing import Optional, Tuple + +import numpy as np +import pandas as pd +from influxdb_client import InfluxDBClient +from influxdb_client.client.warnings import MissingPivotFunction +from sklearn.preprocessing import StandardScaler +from sklearn.exceptions import NotFittedError +import joblib + +# TensorFlow / Keras +import tensorflow as tf +from tensorflow.keras.models import Sequential, load_model +from tensorflow.keras.layers import LSTM, Dense, Dropout +from tensorflow.keras.callbacks import EarlyStopping + +warnings.filterwarnings("ignore", category=MissingPivotFunction) + +@dataclass +class InfluxParams: + url: str + token: str + org: str + bucket: str = "allmende_db" + +class LoadForecaster: + def __init__( + self, + url: str, + token: str, + org: str, + bucket: str = "allmende_db", + agg_every: str = "15m", + input_hours: int = 72, + output_hours: int = 36, + model_path: str = "model/load_forecaster.h5", + scaler_path: str = "model/scaler.joblib", + feature_config: Optional[dict] = None, + ) -> None: + self.influx = InfluxParams(url=url, token=token, org=org, bucket=bucket) + self.agg_every = agg_every + self.input_steps = int((input_hours * 60) / self._freq_minutes(agg_every)) + self.output_steps = int((output_hours * 60) / self._freq_minutes(agg_every)) + self.model_path = model_path + self.scaler_path = scaler_path + self.feature_config = feature_config or {"use_temp": True, "use_time_cyc": True} + self._scaler: Optional[StandardScaler] = None + + # Ensure model dir exists + os.makedirs(os.path.dirname(model_path), exist_ok=True) + + # ---------------------------- Public API ---------------------------- # + def get_15min_forecast(self, model: tf.keras.Model) -> pd.DataFrame: + """Create a 36-hour forecast at 15-min resolution using the latest data. + Assumes a StandardScaler has been fitted during training and saved. + The method uses the most recent input window from InfluxDB. + """ + # Pull just enough history for one input window + history_hours = math.ceil(self.input_steps * self._freq_minutes(self.agg_every) / 60) + df = self._query_and_prepare(range_hours=history_hours) + if len(df) < self.input_steps: + raise RuntimeError(f"Not enough data: need {self.input_steps} steps, got {len(df)}") + + # Build features for the latest window + feats = self._build_features(df) + X_window = feats[-self.input_steps :] + + # Load scaler + scaler = self._load_or_get_scaler() + X_scaled = scaler.transform(X_window) + + # Predict + pred_scaled = model.predict(X_scaled[np.newaxis, ...], verbose=0)[0] + + # Inverse transform only the target column (index 0 is Load) + # Reconstruct a full array to inverse_transform + inv = np.zeros((self.output_steps, X_scaled.shape[1])) + inv[:, 0] = pred_scaled + inv_full = scaler.inverse_transform(inv) + y_pred = inv_full[:, 0] + + # Build forecast index + last_ts = df.index[-1] + freq = pd.tseries.frequencies.to_offset(self.agg_every) + idx = pd.date_range(last_ts + freq, periods=self.output_steps, freq=freq) + out = pd.DataFrame({"Forecast_Load": y_pred}, index=idx) + out.index.name = "timestamp" + return out + + def train_and_save( + self, + train_days: int = 90, + epochs: int = 80, + batch_size: int = 128, + validation_split: float = 0.2, + learning_rate: float = 1e-3, + fine_tune: bool = False, + ) -> tf.keras.Model: + """Train (or fine-tune) a model from recent history and persist model + scaler.""" + df = self._query_and_prepare(range_hours=24 * train_days) + feats = self._build_features(df) + + # Prepare windows + X, y = self._make_windows(feats) + if len(X) < 10: + raise RuntimeError("Not enough windowed samples to train.") + + # Fit scaler on full X + scaler = StandardScaler() + X_scaled = scaler.fit_transform(X) + self._scaler = scaler + joblib.dump(scaler, self.scaler_path) + + # Build model (or load existing for fine-tune) + if fine_tune and os.path.exists(self.model_path): + model = load_model(self.model_path) + else: + model = self._build_default_model(input_dim=X.shape[1], output_dim=self.output_steps, lr=learning_rate) + + # Train + es = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True) + model.fit( + X_scaled.reshape((-1, self.input_steps, X.shape[1] // self.input_steps)), + y, + epochs=epochs, + batch_size=batch_size, + validation_split=validation_split, + callbacks=[es], + verbose=1, + ) + + model.save(self.model_path) + return model + + # A convenience wrapper to be called from an external script once per day + def retrain_daily(self, train_days: int = 90, epochs: int = 40, fine_tune: bool = True) -> None: + self.train_and_save(train_days=train_days, epochs=epochs, fine_tune=fine_tune) + + def load_model(self) -> tf.keras.Model: + if not os.path.exists(self.model_path): + raise FileNotFoundError(f"Model not found at {self.model_path}") + return load_model(self.model_path) + + # ------------------------- Internals: Data ------------------------- # + def _query_and_prepare(self, range_hours: int) -> pd.DataFrame: + """Query InfluxDB for the last `range_hours` and construct the Load series. + Expected fields (exactly as in DB): + - "40206 - M_AC_Power" + - "40210 - M_AC_Power_SF" + - "40083 - I_AC_Power" + - "40084 - I_AC_Power_SF" + - "300 - Aussentemperatur" + """ + start_str = f"-{range_hours}h" + flux = f''' +from(bucket: "{self.influx.bucket}") + |> range(start: {start_str}) + |> filter(fn: (r) => r["_measurement"] == "solaredge_meter" or r["_measurement"] == "solaredge_master" or r["_measurement"] == "hp_master") + |> filter(fn: (r) => r["_field"] == "40206 - M_AC_Power" or r["_field"] == "40210 - M_AC_Power_SF" or r["_field"] == "40083 - I_AC_Power" or r["_field"] == "40084 - I_AC_Power_SF" or r["_field"] == "300 - Aussentemperatur") + |> aggregateWindow(every: {self.agg_every}, fn: mean, createEmpty: false) + |> yield(name: "mean") +''' + with InfluxDBClient(url=self.influx.url, token=self.influx.token, org=self.influx.org) as client: + tables = client.query_api().query_data_frame(flux) + + # Concatenate if list of frames is returned + if isinstance(tables, list): + df = pd.concat(tables, ignore_index=True) + else: + df = tables + + # Keep relevant columns and pivot + df = df[["_time", "_field", "_value"]] + df = df.pivot(index="_time", columns="_field", values="_value").reset_index() + df = df.rename( + columns={ + "_time": "timestamp", + "40206 - M_AC_Power": "M_AC", + "40210 - M_AC_Power_SF": "M_SF", + "40083 - I_AC_Power": "I_AC", + "40084 - I_AC_Power_SF": "I_SF", + "300 - Aussentemperatur": "Temp", + } + ) + df = df.sort_values("timestamp").set_index("timestamp") + + # Forward-fill reasonable gaps (e.g., scaler factors and temp) + df[["M_SF", "I_SF", "Temp"]] = df[["M_SF", "I_SF", "Temp"]].ffill() + + # Apply scaling: real = value * 10^sf + df["I_AC_real"] = df["I_AC"] * np.power(10.0, df["I_SF"]).astype(float) + df["M_AC_real"] = df["M_AC"] * np.power(10.0, df["M_SF"]).astype(float) + + # Compute load + df["Load"] = df["M_AC_real"] - df["I_AC_real"] + + # Ensure regular 15-min grid + df = df.asfreq(self.agg_every) + df[["Load", "Temp"]] = df[["Load", "Temp"]].interpolate(limit_direction="both") + + return df[["Load", "Temp"]] + + def _build_features(self, df: pd.DataFrame) -> np.ndarray: + """Create feature matrix: [Load, Temp?, sin/cos day, sin/cos dow].""" + feats = [df["Load"].values.reshape(-1, 1)] + + if self.feature_config.get("use_temp", True): + feats.append(df["Temp"].values.reshape(-1, 1)) + + if self.feature_config.get("use_time_cyc", True): + idx = df.index + minute_of_day = (idx.hour * 60 + idx.minute).values.astype(float) + sod = 2 * np.pi * minute_of_day / (24 * 60) + dow = 2 * np.pi * idx.dayofweek.values.astype(float) / 7.0 + feats.append(np.sin(sod).reshape(-1, 1)) + feats.append(np.cos(sod).reshape(-1, 1)) + feats.append(np.sin(dow).reshape(-1, 1)) + feats.append(np.cos(dow).reshape(-1, 1)) + + X = np.hstack(feats) # shape: (T, n_features) + + # Flatten windows to 2D for scaler fitting, but model expects 3D; we reshape later + return X + + def _make_windows(self, X_2d: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Create sliding windows: returns (X_flat, y) where X_flat stacks the windowed features. + For Keras we later reshape X_flat -> (N, input_steps, n_features). + """ + n = X_2d.shape[0] + n_features = X_2d.shape[1] + X_list, y_list = [], [] + for i in range(n - self.input_steps - self.output_steps): + xw = X_2d[i : i + self.input_steps, :] + yw = X_2d[i + self.input_steps : i + self.input_steps + self.output_steps, 0] # target: Load + X_list.append(xw.reshape(-1)) # flatten + y_list.append(yw) + X_flat = np.stack(X_list) + y = np.stack(y_list) + return X_flat, y + + # ----------------------- Internals: Modeling ----------------------- # + def _build_default_model(self, input_dim: int, output_dim: int, lr: float = 1e-3) -> tf.keras.Model: + n_features = input_dim // self.input_steps + model = Sequential([ + LSTM(96, input_shape=(self.input_steps, n_features), return_sequences=False), + Dropout(0.1), + Dense(output_dim) + ]) + model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss="mse") + return model + + def _load_or_get_scaler(self) -> StandardScaler: + if self._scaler is not None: + return self._scaler + if not os.path.exists(self.scaler_path): + raise NotFittedError("Scaler not found. Train the model first to create scaler.") + self._scaler = joblib.load(self.scaler_path) + return self._scaler + + @staticmethod + def _freq_minutes(spec: str) -> int: + # supports formats like "15m", "1h" + if spec.endswith("m"): + return int(spec[:-1]) + if spec.endswith("h"): + return int(spec[:-1]) * 60 + raise ValueError(f"Unsupported frequency spec: {spec}") + + +# ----------------------------- retrain_daily.py ----------------------------- +# A tiny script you can run once per day (e.g., via cron/systemd) to retrain the model. +# It delegates the work to LoadForecaster.retrain_daily(). + +if __name__ == "__main__": + # Read credentials/config from env vars or fill here + URL = os.getenv("INFLUX_URL", "http://localhost:8086") + TOKEN = os.getenv("INFLUX_TOKEN", "") + ORG = os.getenv("INFLUX_ORG", "") + BUCKET = os.getenv("INFLUX_BUCKET", "allmende_db") + + lf = LoadForecaster( + url=URL, + token=TOKEN, + org=ORG, + bucket=BUCKET, + agg_every="15m", + input_hours=72, + output_hours=36, + model_path=os.getenv("FORECASTER_MODEL", "model/load_forecaster.h5"), + scaler_path=os.getenv("FORECASTER_SCALER", "model/scaler.joblib"), + ) + + # One call per day is enough; decrease epochs for faster daily updates + lf.retrain_daily(train_days=int(os.getenv("TRAIN_DAYS", "120")), epochs=int(os.getenv("EPOCHS", "30")), fine_tune=True) + + # Optionally, produce a fresh forecast right after training + try: + model = lf.load_model() + fc = lf.get_15min_forecast(model) + # Save latest forecast to CSV for dashboards/consumers + out_path = os.getenv("FORECAST_OUT", "model/latest_forecast_15min.csv") + os.makedirs(os.path.dirname(out_path), exist_ok=True) + fc.to_csv(out_path) + print(f"Saved forecast: {out_path}") + except Exception as e: + print(f"Forecast generation failed: {e}")