Coverage for rivapy/marketdata_tools/pfc_shaper.py: 92%
89 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-05 14:27 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-05 14:27 +0000
1import abc
2import holidays
3import numpy as np
4import pandas as pd
5import datetime as dt
6import rivapy.tools.interfaces as interfaces
7import rivapy.tools._validators as validator
8from rivapy.tools.scheduler import SimpleSchedule
9from typing import List, Dict, Literal, Optional
12class PFCShaper(interfaces.FactoryObject):
13 """PFCShaper interface. Each shaping model for energy price forward curves must inherit from this base class.
15 Args:
16 spot_prices (pd.DataFrame): Data used to calibrate the shaping model.
17 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.
18 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.
19 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.
20 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.
21 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.
22 """
24 def __init__(
25 self,
26 spot_prices: pd.DataFrame,
27 holiday_calendar: holidays.HolidayBase,
28 normalization_config: Optional[Dict[Literal["D", "W", "ME"], Optional[int]]] = None,
29 ):
30 super().__init__()
31 validator._check_pandas_index_for_datetime(spot_prices)
32 self.spot_prices = spot_prices
33 self.holiday_calendar = holiday_calendar
34 self.normalization_config = normalization_config
36 # normalization order containing also the resampling string pattern for pandas resample method
37 self.__normalization_order = [("D", "%Y-%m-%d"), ("W", "%G-%V"), ("ME", "%Y-%m")]
39 @abc.abstractmethod
40 def calibrate(self) -> np.ndarray:
41 """Calibration of the shaping model
43 Returns:
44 np.ndarray: Numpy array containing the fit.
45 """
46 pass
48 @abc.abstractmethod
49 def apply(self, apply_schedule: SimpleSchedule):
50 """Applies the model on a schedule in order to generate a shape for future dates.
52 Args:
53 apply_schedule (SimpleSchedule): Schedule object in order to generate a shape for future dates.
54 """
55 pass
57 @abc.abstractmethod
58 def _set_regression_parameters(self, params: np.ndarray):
59 self._regression_parameters = params
61 def normalize_shape(self, shape: pd.DataFrame) -> pd.DataFrame:
62 """Normalizes the shape based on ``normalization_config``.\n
63 ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.\n
64 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.\n
65 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one.
66 The remaining shape is then normalized over the individual years.\n
68 Example:
69 ``D`` is 2, ``W`` is 2 and ``ME`` is 1. The shape starts at 03.03.2025 (monday).
70 Since ``D`` is 2, the shape is normalized for 03.03.2025 and 04.03.2025 individually.\n
71 The weeks are normalized from 05.03.2025 to 09.03.2025 and from 10.03.2025 to 16.03.2025.\n
72 The month is then normalized from 17.03.2025 to 31.03.2025.
73 The remaining shape (starting from 01.04.2025) is normalized on a yearly level.
75 Args:
76 shape (pd.DataFrame): Shape which should be normalized
78 Returns:
79 pd.DataFrame: Normalized shape
80 """
82 datetime_list: List[dt.datetime] = list(shape.index.copy())
84 # yearly normalization
85 def _normalize_year(shape: pd.DataFrame, datetime_list: List[dt.datetime]) -> pd.DataFrame:
86 base_y = shape.resample("YE").mean()
87 _shape = shape.rename(index=lambda x: x.strftime("%Y")).divide(base_y.rename(index=lambda x: x.strftime("%Y")), axis="index")
89 shape_df = _shape.reset_index(drop=True)
90 shape_df.index = datetime_list
91 return shape_df
93 if self.normalization_config is None:
94 shape_df = _normalize_year(shape=shape, datetime_list=datetime_list)
95 return shape_df
96 else:
97 # the normalization through the normalization_config is done in different parts
98 normalized_datetimes = []
99 normalized_shapes = []
101 # iterate over the correct normalization order
102 for resample_freq, resample_format in self.__normalization_order:
103 if self.normalization_config.get(resample_freq, None) is None:
104 continue
105 else:
106 # if the whole shape is already normalized by the previous normalization processes, the loop is stopped
107 if len(normalized_datetimes) == len(shape):
108 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)
110 # get the part of the shape which was not part of any previous normalizations
111 temp_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]
113 # normalize shape by the cofigured amount of days, weeks or months
114 resampled_shape = temp_shape.resample(resample_freq).mean()
115 resampled_shape = resampled_shape.iloc[: self.normalization_config[resample_freq], :]
117 partially_normalized_shape = temp_shape.rename(index=lambda x: x.strftime(resample_format)).divide(
118 resampled_shape.rename(index=lambda x: x.strftime(resample_format)), axis="index"
119 )
121 # Due to the operations done in the previous lines, the partially_normalized_shape does not contain the exact datetime but rather
122 # a datetime corresponding to the resampled frequency. Hence, the correct datetimes are added to the DataFrame and set as an index.
123 # This allows to concatenate the partially normalized shapes more easily at a later stage
124 partially_normalized_shape["datetimes"] = list(temp_shape.index)
125 partially_normalized_shape = partially_normalized_shape.reset_index(drop=True).set_index("datetimes").dropna()
126 normalized_datetimes += list(partially_normalized_shape.index)
127 normalized_shapes.append(partially_normalized_shape)
129 if len(normalized_datetimes) == len(shape):
130 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)
132 # the remaining shape is normalized on a yearly basis
133 leftover_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]
134 leftover_datetime = list(leftover_shape.index)
135 yearly_normalized_shape = _normalize_year(shape=leftover_shape, datetime_list=leftover_datetime)
137 return pd.concat(normalized_shapes + [yearly_normalized_shape], axis=0).sort_index(ascending=True)
139 def _to_dict(self):
140 return {"spot_prices": self.spot_prices, "holiday_calendar": self.holiday_calendar, "normalization_config": self.normalization_config}
143class CategoricalRegression(PFCShaper):
144 """Linear regression model using categorical predictor variables to construct a PFC shape.
146 .. math::
148 S(t) = S_0 + \sum^{23}_{i=1}\\beta^h_i\cdot\mathbb{I}_{h(t)=i} + \\beta^d\cdot\mathbb{I}_{d(t)=1} + \\beta^H\cdot\mathbb{I}_{H(t)=1} + \sum^{12}_{i=2}\\beta^m_i\cdot\mathbb{I}_{m(t)=i}
150 where:\n
151 :math:`S_0`: Spot price level\n
152 :math:`\mathbb{I}_x = \\begin{cases} 1, & \\text{if the } x \\text{ expression renders true} \\\\ 0, & \\text{if the } x \\text{ expression renders false} \\end{cases}` \n
153 :math:`h(t)`: Hour of t\n
154 :math:`d(t) = \\begin{cases} 1, & \\text{if t is a weekday} \\\\ 0, & \\text{if t is a day on a weekend} \\end{cases}` \n
155 :math:`H(t) = \\begin{cases} 1, & \\text{if t public holidy} \\\\ 0, & \\text{if t is not a public holiday} \\end{cases}` \n
156 :math:`m(t)`: Month of t\n
158 Args:
159 spot_prices (pd.DataFrame): Data used to calibrate the shaping model.
160 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.
161 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.
162 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.
163 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.
164 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.
165 """
167 def __init__(
168 self,
169 spot_prices: pd.DataFrame,
170 holiday_calendar: holidays.HolidayBase,
171 normalization_config: Optional[Dict[Literal["D", "W", "M"], Optional[int]]] = None,
172 ):
173 super().__init__(spot_prices=spot_prices, holiday_calendar=holiday_calendar, normalization_config=normalization_config)
175 def _transform(self, datetimes_list: List[dt.datetime]) -> np.ndarray:
176 """Transforms a list of datetimes in a numpy array which can then be used for the linear regression.
178 Args:
179 datetimes_list (List[dt.datetime]): List of datetimes
181 Returns:
182 np.ndarray: Numpy array containing the transformed datetimes list
183 """
184 _datetime_series = pd.Series(datetimes_list)
186 weekday = _datetime_series.dt.weekday.isin([0, 1, 2, 3, 4]).astype(int).to_numpy().reshape(-1, 1)
187 holiday = _datetime_series.isin(pd.to_datetime(list(self.holiday_calendar.keys()))).astype(int).to_numpy().reshape(-1, 1)
189 predictors = [weekday, holiday]
191 if len(_datetime_series.dt.hour.unique()) > 1:
192 hours = (
193 pd.get_dummies(_datetime_series.dt.hour, prefix="hour", drop_first=True)
194 .astype(int)
195 .to_numpy()
196 .reshape(-1, len(_datetime_series.dt.hour.unique()) - 1)
197 )
198 predictors.append(hours)
200 month = pd.get_dummies(_datetime_series.dt.month, prefix="month", drop_first=True).astype(int).to_numpy().reshape(-1, 11)
202 offset = np.ones(shape=(len(_datetime_series), 1))
203 return np.concatenate([offset, weekday, holiday, hours, month], axis=1)
205 def _set_regression_parameters(self, params: np.ndarray):
206 super()._set_regression_parameters(params=params)
208 def calibrate(self) -> np.ndarray:
209 data_array = self._transform(datetimes_list=self.spot_prices.index)
210 self._set_regression_parameters(
211 np.linalg.inv(data_array.T @ data_array) @ data_array.T @ self.spot_prices.iloc[:, 0].to_numpy().reshape(-1, 1)
212 )
213 return data_array @ self._regression_parameters
215 def apply(self, apply_schedule: SimpleSchedule) -> pd.DataFrame:
216 apply_schedule_datetime_list = apply_schedule.get_schedule()
217 data_array = self._transform(datetimes_list=apply_schedule_datetime_list)
218 shape = data_array @ self._regression_parameters
220 shape_df = pd.DataFrame({"shape": shape.squeeze()}, index=apply_schedule_datetime_list)
221 shape_df = self.normalize_shape(shape=shape_df)
222 return shape_df
224 def _to_dict(self):
225 return super()._to_dict()