Coverage for rivapy/marketdata_tools/pfc_shaper.py: 92%

89 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-08-01 15:21 +0000

1import abc 

2import holidays 

3import numpy as np 

4import pandas as pd 

5import datetime as dt 

6import rivapy.tools.interfaces as interfaces 

7import rivapy.tools._validators as validator 

8from rivapy.tools.scheduler import SimpleSchedule 

9from typing import List, Dict, Literal, Optional 

10 

11 

12class PFCShaper(interfaces.FactoryObject): 

13 """PFCShaper interface. Each shaping model for energy price forward curves must inherit from this base class. 

14 

15 Args: 

16 spot_prices (pd.DataFrame): Data used to calibrate the shaping model. 

17 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays. 

18 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods. 

19 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one. 

20 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one. 

21 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None. 

22 """ 

23 

24 def __init__( 

25 self, 

26 spot_prices: pd.DataFrame, 

27 holiday_calendar: holidays.HolidayBase, 

28 normalization_config: Optional[Dict[Literal["D", "W", "ME"], Optional[int]]] = None, 

29 ): 

30 super().__init__() 

31 validator._check_pandas_index_for_datetime(spot_prices) 

32 self.spot_prices = spot_prices 

33 self.holiday_calendar = holiday_calendar 

34 self.normalization_config = normalization_config 

35 

36 # normalization order containing also the resampling string pattern for pandas resample method 

37 self.__normalization_order = [("D", "%Y-%m-%d"), ("W", "%G-%V"), ("ME", "%Y-%m")] 

38 

39 @abc.abstractmethod 

40 def calibrate(self) -> np.ndarray: 

41 """Calibration of the shaping model 

42 

43 Returns: 

44 np.ndarray: Numpy array containing the fit. 

45 """ 

46 pass 

47 

48 @abc.abstractmethod 

49 def apply(self, apply_schedule: SimpleSchedule): 

50 """Applies the model on a schedule in order to generate a shape for future dates. 

51 

52 Args: 

53 apply_schedule (SimpleSchedule): Schedule object in order to generate a shape for future dates. 

54 """ 

55 pass 

56 

57 @abc.abstractmethod 

58 def _set_regression_parameters(self, params: np.ndarray): 

59 self._regression_parameters = params 

60 

61 def normalize_shape(self, shape: pd.DataFrame) -> pd.DataFrame: 

62 """Normalizes the shape based on ``normalization_config``.\n 

63 ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.\n 

64 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.\n 

65 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. 

66 The remaining shape is then normalized over the individual years.\n 

67 

68 Example: 

69 ``D`` is 2, ``W`` is 2 and ``ME`` is 1. The shape starts at 03.03.2025 (monday). 

70 Since ``D`` is 2, the shape is normalized for 03.03.2025 and 04.03.2025 individually.\n 

71 The weeks are normalized from 05.03.2025 to 09.03.2025 and from 10.03.2025 to 16.03.2025.\n 

72 The month is then normalized from 17.03.2025 to 31.03.2025. 

73 The remaining shape (starting from 01.04.2025) is normalized on a yearly level. 

74 

75 Args: 

76 shape (pd.DataFrame): Shape which should be normalized 

77 

78 Returns: 

79 pd.DataFrame: Normalized shape 

80 """ 

81 

82 datetime_list: List[dt.datetime] = list(shape.index.copy()) 

83 

84 # yearly normalization 

85 def _normalize_year(shape: pd.DataFrame, datetime_list: List[dt.datetime]) -> pd.DataFrame: 

86 base_y = shape.resample("YE").mean() 

87 _shape = shape.rename(index=lambda x: x.strftime("%Y")).divide(base_y.rename(index=lambda x: x.strftime("%Y")), axis="index") 

88 

89 shape_df = _shape.reset_index(drop=True) 

90 shape_df.index = datetime_list 

91 return shape_df 

92 

93 if self.normalization_config is None: 

94 shape_df = _normalize_year(shape=shape, datetime_list=datetime_list) 

95 return shape_df 

96 else: 

97 # the normalization through the normalization_config is done in different parts 

98 normalized_datetimes = [] 

99 normalized_shapes = [] 

100 

101 # iterate over the correct normalization order 

102 for resample_freq, resample_format in self.__normalization_order: 

103 if self.normalization_config.get(resample_freq, None) is None: 

104 continue 

105 else: 

106 # if the whole shape is already normalized by the previous normalization processes, the loop is stopped 

107 if len(normalized_datetimes) == len(shape): 

108 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True) 

109 

110 # get the part of the shape which was not part of any previous normalizations 

111 temp_shape = shape.loc[~shape.index.isin(normalized_datetimes), :] 

112 

113 # normalize shape by the cofigured amount of days, weeks or months 

114 resampled_shape = temp_shape.resample(resample_freq).mean() 

115 resampled_shape = resampled_shape.iloc[: self.normalization_config[resample_freq], :] 

116 

117 partially_normalized_shape = temp_shape.rename(index=lambda x: x.strftime(resample_format)).divide( 

118 resampled_shape.rename(index=lambda x: x.strftime(resample_format)), axis="index" 

119 ) 

120 

121 # Due to the operations done in the previous lines, the partially_normalized_shape does not contain the exact datetime but rather 

122 # a datetime corresponding to the resampled frequency. Hence, the correct datetimes are added to the DataFrame and set as an index. 

123 # This allows to concatenate the partially normalized shapes more easily at a later stage 

124 partially_normalized_shape["datetimes"] = list(temp_shape.index) 

125 partially_normalized_shape = partially_normalized_shape.reset_index(drop=True).set_index("datetimes").dropna() 

126 normalized_datetimes += list(partially_normalized_shape.index) 

127 normalized_shapes.append(partially_normalized_shape) 

128 

129 if len(normalized_datetimes) == len(shape): 

130 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True) 

131 

132 # the remaining shape is normalized on a yearly basis 

133 leftover_shape = shape.loc[~shape.index.isin(normalized_datetimes), :] 

134 leftover_datetime = list(leftover_shape.index) 

135 yearly_normalized_shape = _normalize_year(shape=leftover_shape, datetime_list=leftover_datetime) 

136 

137 return pd.concat(normalized_shapes + [yearly_normalized_shape], axis=0).sort_index(ascending=True) 

138 

139 def _to_dict(self): 

140 return {"spot_prices": self.spot_prices, "holiday_calendar": self.holiday_calendar, "normalization_config": self.normalization_config} 

141 

142 

143class CategoricalRegression(PFCShaper): 

144 """Linear regression model using categorical predictor variables to construct a PFC shape. 

145 

146 .. math:: 

147 

148 S(t) = S_0 + \\sum^{23}_{i=1}\\beta^h_i\\cdot\\mathbb{I}_{h(t)=i} + \\beta^d\\cdot\\mathbb{I}_{d(t)=1} + \\beta^H\\cdot\\mathbb{I}_{H(t)=1} + \\sum^{12}_{i=2}\\beta^m_i\\cdot\\mathbb{I}_{m(t)=i} 

149 

150 Where: 

151 

152 - :math:`S_0`: Spot price level 

153 - :math:`\\mathbb{I}_x = \\begin{cases} 1, & \\text{if the } x \\text{ expression renders true} \\\\ 0, & \\text{if the } x \\text{ expression renders false} \\end{cases}` 

154 - :math:`h(t)`: Hour of t 

155 - :math:`d(t) = \\begin{cases} 1, & \\text{if t is a weekday} \\\\ 0, & \\text{if t is a day on a weekend} \\end{cases}` 

156 - :math:`H(t) = \\begin{cases} 1, & \\text{if t public holidy} \\\\ 0, & \\text{if t is not a public holiday} \\end{cases}` 

157 - :math:`m(t)`: Month of t 

158 

159 Args: 

160 spot_prices (pd.DataFrame): Data used to calibrate the shaping model. 

161 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays. 

162 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods. 

163 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one. 

164 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one. 

165 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None. 

166 """ 

167 

168 def __init__( 

169 self, 

170 spot_prices: pd.DataFrame, 

171 holiday_calendar: holidays.HolidayBase, 

172 normalization_config: Optional[Dict[Literal["D", "W", "M"], Optional[int]]] = None, 

173 ): 

174 super().__init__(spot_prices=spot_prices, holiday_calendar=holiday_calendar, normalization_config=normalization_config) 

175 

176 def _transform(self, datetimes_list: List[dt.datetime]) -> np.ndarray: 

177 """Transforms a list of datetimes in a numpy array which can then be used for the linear regression. 

178 

179 Args: 

180 datetimes_list (List[dt.datetime]): List of datetimes 

181 

182 Returns: 

183 np.ndarray: Numpy array containing the transformed datetimes list 

184 """ 

185 _datetime_series = pd.Series(datetimes_list) 

186 

187 weekday = _datetime_series.dt.weekday.isin([0, 1, 2, 3, 4]).astype(int).to_numpy().reshape(-1, 1) 

188 holiday = _datetime_series.isin(pd.to_datetime(list(self.holiday_calendar.keys()))).astype(int).to_numpy().reshape(-1, 1) 

189 

190 predictors = [weekday, holiday] 

191 

192 if len(_datetime_series.dt.hour.unique()) > 1: 

193 hours = ( 

194 pd.get_dummies(_datetime_series.dt.hour, prefix="hour", drop_first=True) 

195 .astype(int) 

196 .to_numpy() 

197 .reshape(-1, len(_datetime_series.dt.hour.unique()) - 1) 

198 ) 

199 predictors.append(hours) 

200 

201 month = pd.get_dummies(_datetime_series.dt.month, prefix="month", drop_first=True).astype(int).to_numpy().reshape(-1, 11) 

202 

203 offset = np.ones(shape=(len(_datetime_series), 1)) 

204 return np.concatenate([offset, weekday, holiday, hours, month], axis=1) 

205 

206 def _set_regression_parameters(self, params: np.ndarray): 

207 super()._set_regression_parameters(params=params) 

208 

209 def calibrate(self) -> np.ndarray: 

210 data_array = self._transform(datetimes_list=self.spot_prices.index) 

211 self._set_regression_parameters( 

212 np.linalg.inv(data_array.T @ data_array) @ data_array.T @ self.spot_prices.iloc[:, 0].to_numpy().reshape(-1, 1) 

213 ) 

214 return data_array @ self._regression_parameters 

215 

216 def apply(self, apply_schedule: SimpleSchedule) -> pd.DataFrame: 

217 apply_schedule_datetime_list = apply_schedule.get_schedule() 

218 data_array = self._transform(datetimes_list=apply_schedule_datetime_list) 

219 shape = data_array @ self._regression_parameters 

220 

221 shape_df = pd.DataFrame({"shape": shape.squeeze()}, index=apply_schedule_datetime_list) 

222 shape_df = self.normalize_shape(shape=shape_df) 

223 return shape_df 

224 

225 def _to_dict(self): 

226 return super()._to_dict()