Coverage for rivapy/marketdata_tools/pfc_shaper.py: 92%
89 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-01 15:21 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-01 15:21 +0000
1import abc
2import holidays
3import numpy as np
4import pandas as pd
5import datetime as dt
6import rivapy.tools.interfaces as interfaces
7import rivapy.tools._validators as validator
8from rivapy.tools.scheduler import SimpleSchedule
9from typing import List, Dict, Literal, Optional
12class PFCShaper(interfaces.FactoryObject):
13 """PFCShaper interface. Each shaping model for energy price forward curves must inherit from this base class.
15 Args:
16 spot_prices (pd.DataFrame): Data used to calibrate the shaping model.
17 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.
18 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.
19 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.
20 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.
21 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.
22 """
24 def __init__(
25 self,
26 spot_prices: pd.DataFrame,
27 holiday_calendar: holidays.HolidayBase,
28 normalization_config: Optional[Dict[Literal["D", "W", "ME"], Optional[int]]] = None,
29 ):
30 super().__init__()
31 validator._check_pandas_index_for_datetime(spot_prices)
32 self.spot_prices = spot_prices
33 self.holiday_calendar = holiday_calendar
34 self.normalization_config = normalization_config
36 # normalization order containing also the resampling string pattern for pandas resample method
37 self.__normalization_order = [("D", "%Y-%m-%d"), ("W", "%G-%V"), ("ME", "%Y-%m")]
39 @abc.abstractmethod
40 def calibrate(self) -> np.ndarray:
41 """Calibration of the shaping model
43 Returns:
44 np.ndarray: Numpy array containing the fit.
45 """
46 pass
48 @abc.abstractmethod
49 def apply(self, apply_schedule: SimpleSchedule):
50 """Applies the model on a schedule in order to generate a shape for future dates.
52 Args:
53 apply_schedule (SimpleSchedule): Schedule object in order to generate a shape for future dates.
54 """
55 pass
57 @abc.abstractmethod
58 def _set_regression_parameters(self, params: np.ndarray):
59 self._regression_parameters = params
61 def normalize_shape(self, shape: pd.DataFrame) -> pd.DataFrame:
62 """Normalizes the shape based on ``normalization_config``.\n
63 ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.\n
64 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.\n
65 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one.
66 The remaining shape is then normalized over the individual years.\n
68 Example:
69 ``D`` is 2, ``W`` is 2 and ``ME`` is 1. The shape starts at 03.03.2025 (monday).
70 Since ``D`` is 2, the shape is normalized for 03.03.2025 and 04.03.2025 individually.\n
71 The weeks are normalized from 05.03.2025 to 09.03.2025 and from 10.03.2025 to 16.03.2025.\n
72 The month is then normalized from 17.03.2025 to 31.03.2025.
73 The remaining shape (starting from 01.04.2025) is normalized on a yearly level.
75 Args:
76 shape (pd.DataFrame): Shape which should be normalized
78 Returns:
79 pd.DataFrame: Normalized shape
80 """
82 datetime_list: List[dt.datetime] = list(shape.index.copy())
84 # yearly normalization
85 def _normalize_year(shape: pd.DataFrame, datetime_list: List[dt.datetime]) -> pd.DataFrame:
86 base_y = shape.resample("YE").mean()
87 _shape = shape.rename(index=lambda x: x.strftime("%Y")).divide(base_y.rename(index=lambda x: x.strftime("%Y")), axis="index")
89 shape_df = _shape.reset_index(drop=True)
90 shape_df.index = datetime_list
91 return shape_df
93 if self.normalization_config is None:
94 shape_df = _normalize_year(shape=shape, datetime_list=datetime_list)
95 return shape_df
96 else:
97 # the normalization through the normalization_config is done in different parts
98 normalized_datetimes = []
99 normalized_shapes = []
101 # iterate over the correct normalization order
102 for resample_freq, resample_format in self.__normalization_order:
103 if self.normalization_config.get(resample_freq, None) is None:
104 continue
105 else:
106 # if the whole shape is already normalized by the previous normalization processes, the loop is stopped
107 if len(normalized_datetimes) == len(shape):
108 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)
110 # get the part of the shape which was not part of any previous normalizations
111 temp_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]
113 # normalize shape by the cofigured amount of days, weeks or months
114 resampled_shape = temp_shape.resample(resample_freq).mean()
115 resampled_shape = resampled_shape.iloc[: self.normalization_config[resample_freq], :]
117 partially_normalized_shape = temp_shape.rename(index=lambda x: x.strftime(resample_format)).divide(
118 resampled_shape.rename(index=lambda x: x.strftime(resample_format)), axis="index"
119 )
121 # Due to the operations done in the previous lines, the partially_normalized_shape does not contain the exact datetime but rather
122 # a datetime corresponding to the resampled frequency. Hence, the correct datetimes are added to the DataFrame and set as an index.
123 # This allows to concatenate the partially normalized shapes more easily at a later stage
124 partially_normalized_shape["datetimes"] = list(temp_shape.index)
125 partially_normalized_shape = partially_normalized_shape.reset_index(drop=True).set_index("datetimes").dropna()
126 normalized_datetimes += list(partially_normalized_shape.index)
127 normalized_shapes.append(partially_normalized_shape)
129 if len(normalized_datetimes) == len(shape):
130 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)
132 # the remaining shape is normalized on a yearly basis
133 leftover_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]
134 leftover_datetime = list(leftover_shape.index)
135 yearly_normalized_shape = _normalize_year(shape=leftover_shape, datetime_list=leftover_datetime)
137 return pd.concat(normalized_shapes + [yearly_normalized_shape], axis=0).sort_index(ascending=True)
139 def _to_dict(self):
140 return {"spot_prices": self.spot_prices, "holiday_calendar": self.holiday_calendar, "normalization_config": self.normalization_config}
143class CategoricalRegression(PFCShaper):
144 """Linear regression model using categorical predictor variables to construct a PFC shape.
146 .. math::
148 S(t) = S_0 + \\sum^{23}_{i=1}\\beta^h_i\\cdot\\mathbb{I}_{h(t)=i} + \\beta^d\\cdot\\mathbb{I}_{d(t)=1} + \\beta^H\\cdot\\mathbb{I}_{H(t)=1} + \\sum^{12}_{i=2}\\beta^m_i\\cdot\\mathbb{I}_{m(t)=i}
150 Where:
152 - :math:`S_0`: Spot price level
153 - :math:`\\mathbb{I}_x = \\begin{cases} 1, & \\text{if the } x \\text{ expression renders true} \\\\ 0, & \\text{if the } x \\text{ expression renders false} \\end{cases}`
154 - :math:`h(t)`: Hour of t
155 - :math:`d(t) = \\begin{cases} 1, & \\text{if t is a weekday} \\\\ 0, & \\text{if t is a day on a weekend} \\end{cases}`
156 - :math:`H(t) = \\begin{cases} 1, & \\text{if t public holidy} \\\\ 0, & \\text{if t is not a public holiday} \\end{cases}`
157 - :math:`m(t)`: Month of t
159 Args:
160 spot_prices (pd.DataFrame): Data used to calibrate the shaping model.
161 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.
162 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.
163 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.
164 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.
165 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.
166 """
168 def __init__(
169 self,
170 spot_prices: pd.DataFrame,
171 holiday_calendar: holidays.HolidayBase,
172 normalization_config: Optional[Dict[Literal["D", "W", "M"], Optional[int]]] = None,
173 ):
174 super().__init__(spot_prices=spot_prices, holiday_calendar=holiday_calendar, normalization_config=normalization_config)
176 def _transform(self, datetimes_list: List[dt.datetime]) -> np.ndarray:
177 """Transforms a list of datetimes in a numpy array which can then be used for the linear regression.
179 Args:
180 datetimes_list (List[dt.datetime]): List of datetimes
182 Returns:
183 np.ndarray: Numpy array containing the transformed datetimes list
184 """
185 _datetime_series = pd.Series(datetimes_list)
187 weekday = _datetime_series.dt.weekday.isin([0, 1, 2, 3, 4]).astype(int).to_numpy().reshape(-1, 1)
188 holiday = _datetime_series.isin(pd.to_datetime(list(self.holiday_calendar.keys()))).astype(int).to_numpy().reshape(-1, 1)
190 predictors = [weekday, holiday]
192 if len(_datetime_series.dt.hour.unique()) > 1:
193 hours = (
194 pd.get_dummies(_datetime_series.dt.hour, prefix="hour", drop_first=True)
195 .astype(int)
196 .to_numpy()
197 .reshape(-1, len(_datetime_series.dt.hour.unique()) - 1)
198 )
199 predictors.append(hours)
201 month = pd.get_dummies(_datetime_series.dt.month, prefix="month", drop_first=True).astype(int).to_numpy().reshape(-1, 11)
203 offset = np.ones(shape=(len(_datetime_series), 1))
204 return np.concatenate([offset, weekday, holiday, hours, month], axis=1)
206 def _set_regression_parameters(self, params: np.ndarray):
207 super()._set_regression_parameters(params=params)
209 def calibrate(self) -> np.ndarray:
210 data_array = self._transform(datetimes_list=self.spot_prices.index)
211 self._set_regression_parameters(
212 np.linalg.inv(data_array.T @ data_array) @ data_array.T @ self.spot_prices.iloc[:, 0].to_numpy().reshape(-1, 1)
213 )
214 return data_array @ self._regression_parameters
216 def apply(self, apply_schedule: SimpleSchedule) -> pd.DataFrame:
217 apply_schedule_datetime_list = apply_schedule.get_schedule()
218 data_array = self._transform(datetimes_list=apply_schedule_datetime_list)
219 shape = data_array @ self._regression_parameters
221 shape_df = pd.DataFrame({"shape": shape.squeeze()}, index=apply_schedule_datetime_list)
222 shape_df = self.normalize_shape(shape=shape_df)
223 return shape_df
225 def _to_dict(self):
226 return super()._to_dict()