Compare commits
1 Commits
lüftungsan
...
load_forec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
13f27e12e8 |
349
forecaster/load_forecaster.py
Normal file
349
forecaster/load_forecaster.py
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
# load_forecaster.py
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
LoadForecaster: builds a 36-hour forecast at 15-min resolution from InfluxDB data.
|
||||||
|
|
||||||
|
- Data source: InfluxDB (Flux query provided by user)
|
||||||
|
- Target: House load = M_AC_real - I_AC_real
|
||||||
|
- Frequency: 15 minutes (changeable via init)
|
||||||
|
- Model: Keras (LSTM by default, pluggable)
|
||||||
|
- Persistence: Saves model (H5) and scaler (joblib)
|
||||||
|
|
||||||
|
Usage (example):
|
||||||
|
|
||||||
|
from load_forecaster import LoadForecaster
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
lf = LoadForecaster(
|
||||||
|
url="http://localhost:8086",
|
||||||
|
token="<YOUR_TOKEN>",
|
||||||
|
org="<YOUR_ORG>",
|
||||||
|
bucket="allmende_db",
|
||||||
|
agg_every="15m",
|
||||||
|
input_hours=72,
|
||||||
|
output_hours=36,
|
||||||
|
model_path="model/load_forecaster.h5",
|
||||||
|
scaler_path="model/scaler.joblib",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Train or retrain
|
||||||
|
lf.train_and_save(train_days=90, epochs=60)
|
||||||
|
|
||||||
|
# Load model and forecast
|
||||||
|
model = lf.load_model()
|
||||||
|
forecast_df = lf.get_15min_forecast(model)
|
||||||
|
print(forecast_df.head())
|
||||||
|
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
import math
|
||||||
|
import json
|
||||||
|
import warnings
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from influxdb_client import InfluxDBClient
|
||||||
|
from influxdb_client.client.warnings import MissingPivotFunction
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.exceptions import NotFittedError
|
||||||
|
import joblib
|
||||||
|
|
||||||
|
# TensorFlow / Keras
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.models import Sequential, load_model
|
||||||
|
from tensorflow.keras.layers import LSTM, Dense, Dropout
|
||||||
|
from tensorflow.keras.callbacks import EarlyStopping
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore", category=MissingPivotFunction)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class InfluxParams:
|
||||||
|
url: str
|
||||||
|
token: str
|
||||||
|
org: str
|
||||||
|
bucket: str = "allmende_db"
|
||||||
|
|
||||||
|
class LoadForecaster:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
token: str,
|
||||||
|
org: str,
|
||||||
|
bucket: str = "allmende_db",
|
||||||
|
agg_every: str = "15m",
|
||||||
|
input_hours: int = 72,
|
||||||
|
output_hours: int = 36,
|
||||||
|
model_path: str = "model/load_forecaster.h5",
|
||||||
|
scaler_path: str = "model/scaler.joblib",
|
||||||
|
feature_config: Optional[dict] = None,
|
||||||
|
) -> None:
|
||||||
|
self.influx = InfluxParams(url=url, token=token, org=org, bucket=bucket)
|
||||||
|
self.agg_every = agg_every
|
||||||
|
self.input_steps = int((input_hours * 60) / self._freq_minutes(agg_every))
|
||||||
|
self.output_steps = int((output_hours * 60) / self._freq_minutes(agg_every))
|
||||||
|
self.model_path = model_path
|
||||||
|
self.scaler_path = scaler_path
|
||||||
|
self.feature_config = feature_config or {"use_temp": True, "use_time_cyc": True}
|
||||||
|
self._scaler: Optional[StandardScaler] = None
|
||||||
|
|
||||||
|
# Ensure model dir exists
|
||||||
|
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
||||||
|
|
||||||
|
# ---------------------------- Public API ---------------------------- #
|
||||||
|
def get_15min_forecast(self, model: tf.keras.Model) -> pd.DataFrame:
|
||||||
|
"""Create a 36-hour forecast at 15-min resolution using the latest data.
|
||||||
|
Assumes a StandardScaler has been fitted during training and saved.
|
||||||
|
The method uses the most recent input window from InfluxDB.
|
||||||
|
"""
|
||||||
|
# Pull just enough history for one input window
|
||||||
|
history_hours = math.ceil(self.input_steps * self._freq_minutes(self.agg_every) / 60)
|
||||||
|
df = self._query_and_prepare(range_hours=history_hours)
|
||||||
|
if len(df) < self.input_steps:
|
||||||
|
raise RuntimeError(f"Not enough data: need {self.input_steps} steps, got {len(df)}")
|
||||||
|
|
||||||
|
# Build features for the latest window
|
||||||
|
feats = self._build_features(df)
|
||||||
|
X_window = feats[-self.input_steps :]
|
||||||
|
|
||||||
|
# Load scaler
|
||||||
|
scaler = self._load_or_get_scaler()
|
||||||
|
X_scaled = scaler.transform(X_window)
|
||||||
|
|
||||||
|
# Predict
|
||||||
|
pred_scaled = model.predict(X_scaled[np.newaxis, ...], verbose=0)[0]
|
||||||
|
|
||||||
|
# Inverse transform only the target column (index 0 is Load)
|
||||||
|
# Reconstruct a full array to inverse_transform
|
||||||
|
inv = np.zeros((self.output_steps, X_scaled.shape[1]))
|
||||||
|
inv[:, 0] = pred_scaled
|
||||||
|
inv_full = scaler.inverse_transform(inv)
|
||||||
|
y_pred = inv_full[:, 0]
|
||||||
|
|
||||||
|
# Build forecast index
|
||||||
|
last_ts = df.index[-1]
|
||||||
|
freq = pd.tseries.frequencies.to_offset(self.agg_every)
|
||||||
|
idx = pd.date_range(last_ts + freq, periods=self.output_steps, freq=freq)
|
||||||
|
out = pd.DataFrame({"Forecast_Load": y_pred}, index=idx)
|
||||||
|
out.index.name = "timestamp"
|
||||||
|
return out
|
||||||
|
|
||||||
|
def train_and_save(
|
||||||
|
self,
|
||||||
|
train_days: int = 90,
|
||||||
|
epochs: int = 80,
|
||||||
|
batch_size: int = 128,
|
||||||
|
validation_split: float = 0.2,
|
||||||
|
learning_rate: float = 1e-3,
|
||||||
|
fine_tune: bool = False,
|
||||||
|
) -> tf.keras.Model:
|
||||||
|
"""Train (or fine-tune) a model from recent history and persist model + scaler."""
|
||||||
|
df = self._query_and_prepare(range_hours=24 * train_days)
|
||||||
|
feats = self._build_features(df)
|
||||||
|
|
||||||
|
# Prepare windows
|
||||||
|
X, y = self._make_windows(feats)
|
||||||
|
if len(X) < 10:
|
||||||
|
raise RuntimeError("Not enough windowed samples to train.")
|
||||||
|
|
||||||
|
# Fit scaler on full X
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X_scaled = scaler.fit_transform(X)
|
||||||
|
self._scaler = scaler
|
||||||
|
joblib.dump(scaler, self.scaler_path)
|
||||||
|
|
||||||
|
# Build model (or load existing for fine-tune)
|
||||||
|
if fine_tune and os.path.exists(self.model_path):
|
||||||
|
model = load_model(self.model_path)
|
||||||
|
else:
|
||||||
|
model = self._build_default_model(input_dim=X.shape[1], output_dim=self.output_steps, lr=learning_rate)
|
||||||
|
|
||||||
|
# Train
|
||||||
|
es = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
|
||||||
|
model.fit(
|
||||||
|
X_scaled.reshape((-1, self.input_steps, X.shape[1] // self.input_steps)),
|
||||||
|
y,
|
||||||
|
epochs=epochs,
|
||||||
|
batch_size=batch_size,
|
||||||
|
validation_split=validation_split,
|
||||||
|
callbacks=[es],
|
||||||
|
verbose=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
model.save(self.model_path)
|
||||||
|
return model
|
||||||
|
|
||||||
|
# A convenience wrapper to be called from an external script once per day
|
||||||
|
def retrain_daily(self, train_days: int = 90, epochs: int = 40, fine_tune: bool = True) -> None:
|
||||||
|
self.train_and_save(train_days=train_days, epochs=epochs, fine_tune=fine_tune)
|
||||||
|
|
||||||
|
def load_model(self) -> tf.keras.Model:
|
||||||
|
if not os.path.exists(self.model_path):
|
||||||
|
raise FileNotFoundError(f"Model not found at {self.model_path}")
|
||||||
|
return load_model(self.model_path)
|
||||||
|
|
||||||
|
# ------------------------- Internals: Data ------------------------- #
|
||||||
|
def _query_and_prepare(self, range_hours: int) -> pd.DataFrame:
|
||||||
|
"""Query InfluxDB for the last `range_hours` and construct the Load series.
|
||||||
|
Expected fields (exactly as in DB):
|
||||||
|
- "40206 - M_AC_Power"
|
||||||
|
- "40210 - M_AC_Power_SF"
|
||||||
|
- "40083 - I_AC_Power"
|
||||||
|
- "40084 - I_AC_Power_SF"
|
||||||
|
- "300 - Aussentemperatur"
|
||||||
|
"""
|
||||||
|
start_str = f"-{range_hours}h"
|
||||||
|
flux = f'''
|
||||||
|
from(bucket: "{self.influx.bucket}")
|
||||||
|
|> range(start: {start_str})
|
||||||
|
|> filter(fn: (r) => r["_measurement"] == "solaredge_meter" or r["_measurement"] == "solaredge_master" or r["_measurement"] == "hp_master")
|
||||||
|
|> filter(fn: (r) => r["_field"] == "40206 - M_AC_Power" or r["_field"] == "40210 - M_AC_Power_SF" or r["_field"] == "40083 - I_AC_Power" or r["_field"] == "40084 - I_AC_Power_SF" or r["_field"] == "300 - Aussentemperatur")
|
||||||
|
|> aggregateWindow(every: {self.agg_every}, fn: mean, createEmpty: false)
|
||||||
|
|> yield(name: "mean")
|
||||||
|
'''
|
||||||
|
with InfluxDBClient(url=self.influx.url, token=self.influx.token, org=self.influx.org) as client:
|
||||||
|
tables = client.query_api().query_data_frame(flux)
|
||||||
|
|
||||||
|
# Concatenate if list of frames is returned
|
||||||
|
if isinstance(tables, list):
|
||||||
|
df = pd.concat(tables, ignore_index=True)
|
||||||
|
else:
|
||||||
|
df = tables
|
||||||
|
|
||||||
|
# Keep relevant columns and pivot
|
||||||
|
df = df[["_time", "_field", "_value"]]
|
||||||
|
df = df.pivot(index="_time", columns="_field", values="_value").reset_index()
|
||||||
|
df = df.rename(
|
||||||
|
columns={
|
||||||
|
"_time": "timestamp",
|
||||||
|
"40206 - M_AC_Power": "M_AC",
|
||||||
|
"40210 - M_AC_Power_SF": "M_SF",
|
||||||
|
"40083 - I_AC_Power": "I_AC",
|
||||||
|
"40084 - I_AC_Power_SF": "I_SF",
|
||||||
|
"300 - Aussentemperatur": "Temp",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
df = df.sort_values("timestamp").set_index("timestamp")
|
||||||
|
|
||||||
|
# Forward-fill reasonable gaps (e.g., scaler factors and temp)
|
||||||
|
df[["M_SF", "I_SF", "Temp"]] = df[["M_SF", "I_SF", "Temp"]].ffill()
|
||||||
|
|
||||||
|
# Apply scaling: real = value * 10^sf
|
||||||
|
df["I_AC_real"] = df["I_AC"] * np.power(10.0, df["I_SF"]).astype(float)
|
||||||
|
df["M_AC_real"] = df["M_AC"] * np.power(10.0, df["M_SF"]).astype(float)
|
||||||
|
|
||||||
|
# Compute load
|
||||||
|
df["Load"] = df["M_AC_real"] - df["I_AC_real"]
|
||||||
|
|
||||||
|
# Ensure regular 15-min grid
|
||||||
|
df = df.asfreq(self.agg_every)
|
||||||
|
df[["Load", "Temp"]] = df[["Load", "Temp"]].interpolate(limit_direction="both")
|
||||||
|
|
||||||
|
return df[["Load", "Temp"]]
|
||||||
|
|
||||||
|
def _build_features(self, df: pd.DataFrame) -> np.ndarray:
|
||||||
|
"""Create feature matrix: [Load, Temp?, sin/cos day, sin/cos dow]."""
|
||||||
|
feats = [df["Load"].values.reshape(-1, 1)]
|
||||||
|
|
||||||
|
if self.feature_config.get("use_temp", True):
|
||||||
|
feats.append(df["Temp"].values.reshape(-1, 1))
|
||||||
|
|
||||||
|
if self.feature_config.get("use_time_cyc", True):
|
||||||
|
idx = df.index
|
||||||
|
minute_of_day = (idx.hour * 60 + idx.minute).values.astype(float)
|
||||||
|
sod = 2 * np.pi * minute_of_day / (24 * 60)
|
||||||
|
dow = 2 * np.pi * idx.dayofweek.values.astype(float) / 7.0
|
||||||
|
feats.append(np.sin(sod).reshape(-1, 1))
|
||||||
|
feats.append(np.cos(sod).reshape(-1, 1))
|
||||||
|
feats.append(np.sin(dow).reshape(-1, 1))
|
||||||
|
feats.append(np.cos(dow).reshape(-1, 1))
|
||||||
|
|
||||||
|
X = np.hstack(feats) # shape: (T, n_features)
|
||||||
|
|
||||||
|
# Flatten windows to 2D for scaler fitting, but model expects 3D; we reshape later
|
||||||
|
return X
|
||||||
|
|
||||||
|
def _make_windows(self, X_2d: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||||
|
"""Create sliding windows: returns (X_flat, y) where X_flat stacks the windowed features.
|
||||||
|
For Keras we later reshape X_flat -> (N, input_steps, n_features).
|
||||||
|
"""
|
||||||
|
n = X_2d.shape[0]
|
||||||
|
n_features = X_2d.shape[1]
|
||||||
|
X_list, y_list = [], []
|
||||||
|
for i in range(n - self.input_steps - self.output_steps):
|
||||||
|
xw = X_2d[i : i + self.input_steps, :]
|
||||||
|
yw = X_2d[i + self.input_steps : i + self.input_steps + self.output_steps, 0] # target: Load
|
||||||
|
X_list.append(xw.reshape(-1)) # flatten
|
||||||
|
y_list.append(yw)
|
||||||
|
X_flat = np.stack(X_list)
|
||||||
|
y = np.stack(y_list)
|
||||||
|
return X_flat, y
|
||||||
|
|
||||||
|
# ----------------------- Internals: Modeling ----------------------- #
|
||||||
|
def _build_default_model(self, input_dim: int, output_dim: int, lr: float = 1e-3) -> tf.keras.Model:
|
||||||
|
n_features = input_dim // self.input_steps
|
||||||
|
model = Sequential([
|
||||||
|
LSTM(96, input_shape=(self.input_steps, n_features), return_sequences=False),
|
||||||
|
Dropout(0.1),
|
||||||
|
Dense(output_dim)
|
||||||
|
])
|
||||||
|
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss="mse")
|
||||||
|
return model
|
||||||
|
|
||||||
|
def _load_or_get_scaler(self) -> StandardScaler:
|
||||||
|
if self._scaler is not None:
|
||||||
|
return self._scaler
|
||||||
|
if not os.path.exists(self.scaler_path):
|
||||||
|
raise NotFittedError("Scaler not found. Train the model first to create scaler.")
|
||||||
|
self._scaler = joblib.load(self.scaler_path)
|
||||||
|
return self._scaler
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _freq_minutes(spec: str) -> int:
|
||||||
|
# supports formats like "15m", "1h"
|
||||||
|
if spec.endswith("m"):
|
||||||
|
return int(spec[:-1])
|
||||||
|
if spec.endswith("h"):
|
||||||
|
return int(spec[:-1]) * 60
|
||||||
|
raise ValueError(f"Unsupported frequency spec: {spec}")
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------- retrain_daily.py -----------------------------
|
||||||
|
# A tiny script you can run once per day (e.g., via cron/systemd) to retrain the model.
|
||||||
|
# It delegates the work to LoadForecaster.retrain_daily().
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Read credentials/config from env vars or fill here
|
||||||
|
URL = os.getenv("INFLUX_URL", "http://localhost:8086")
|
||||||
|
TOKEN = os.getenv("INFLUX_TOKEN", "<YOUR_TOKEN>")
|
||||||
|
ORG = os.getenv("INFLUX_ORG", "<YOUR_ORG>")
|
||||||
|
BUCKET = os.getenv("INFLUX_BUCKET", "allmende_db")
|
||||||
|
|
||||||
|
lf = LoadForecaster(
|
||||||
|
url=URL,
|
||||||
|
token=TOKEN,
|
||||||
|
org=ORG,
|
||||||
|
bucket=BUCKET,
|
||||||
|
agg_every="15m",
|
||||||
|
input_hours=72,
|
||||||
|
output_hours=36,
|
||||||
|
model_path=os.getenv("FORECASTER_MODEL", "model/load_forecaster.h5"),
|
||||||
|
scaler_path=os.getenv("FORECASTER_SCALER", "model/scaler.joblib"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# One call per day is enough; decrease epochs for faster daily updates
|
||||||
|
lf.retrain_daily(train_days=int(os.getenv("TRAIN_DAYS", "120")), epochs=int(os.getenv("EPOCHS", "30")), fine_tune=True)
|
||||||
|
|
||||||
|
# Optionally, produce a fresh forecast right after training
|
||||||
|
try:
|
||||||
|
model = lf.load_model()
|
||||||
|
fc = lf.get_15min_forecast(model)
|
||||||
|
# Save latest forecast to CSV for dashboards/consumers
|
||||||
|
out_path = os.getenv("FORECAST_OUT", "model/latest_forecast_15min.csv")
|
||||||
|
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
||||||
|
fc.to_csv(out_path)
|
||||||
|
print(f"Saved forecast: {out_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Forecast generation failed: {e}")
|
||||||
Reference in New Issue
Block a user