Coverage for rivapy/marketdata_tools/pfc_shaper.py: 92%

89 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-05 14:27 +0000

1import abc 

2import holidays 

3import numpy as np 

4import pandas as pd 

5import datetime as dt 

6import rivapy.tools.interfaces as interfaces 

7import rivapy.tools._validators as validator 

8from rivapy.tools.scheduler import SimpleSchedule 

9from typing import List, Dict, Literal, Optional 

10 

11 

12class PFCShaper(interfaces.FactoryObject): 

13 """PFCShaper interface. Each shaping model for energy price forward curves must inherit from this base class. 

14 

15 Args: 

16 spot_prices (pd.DataFrame): Data used to calibrate the shaping model. 

17 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays. 

18 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods. 

19 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one. 

20 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one. 

21 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None. 

22 """ 

23 

24 def __init__( 

25 self, 

26 spot_prices: pd.DataFrame, 

27 holiday_calendar: holidays.HolidayBase, 

28 normalization_config: Optional[Dict[Literal["D", "W", "ME"], Optional[int]]] = None, 

29 ): 

30 super().__init__() 

31 validator._check_pandas_index_for_datetime(spot_prices) 

32 self.spot_prices = spot_prices 

33 self.holiday_calendar = holiday_calendar 

34 self.normalization_config = normalization_config 

35 

36 # normalization order containing also the resampling string pattern for pandas resample method 

37 self.__normalization_order = [("D", "%Y-%m-%d"), ("W", "%G-%V"), ("ME", "%Y-%m")] 

38 

39 @abc.abstractmethod 

40 def calibrate(self) -> np.ndarray: 

41 """Calibration of the shaping model 

42 

43 Returns: 

44 np.ndarray: Numpy array containing the fit. 

45 """ 

46 pass 

47 

48 @abc.abstractmethod 

49 def apply(self, apply_schedule: SimpleSchedule): 

50 """Applies the model on a schedule in order to generate a shape for future dates. 

51 

52 Args: 

53 apply_schedule (SimpleSchedule): Schedule object in order to generate a shape for future dates. 

54 """ 

55 pass 

56 

57 @abc.abstractmethod 

58 def _set_regression_parameters(self, params: np.ndarray): 

59 self._regression_parameters = params 

60 

61 def normalize_shape(self, shape: pd.DataFrame) -> pd.DataFrame: 

62 """Normalizes the shape based on ``normalization_config``.\n 

63 ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.\n 

64 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.\n 

65 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. 

66 The remaining shape is then normalized over the individual years.\n 

67 

68 Example: 

69 ``D`` is 2, ``W`` is 2 and ``ME`` is 1. The shape starts at 03.03.2025 (monday). 

70 Since ``D`` is 2, the shape is normalized for 03.03.2025 and 04.03.2025 individually.\n 

71 The weeks are normalized from 05.03.2025 to 09.03.2025 and from 10.03.2025 to 16.03.2025.\n 

72 The month is then normalized from 17.03.2025 to 31.03.2025. 

73 The remaining shape (starting from 01.04.2025) is normalized on a yearly level. 

74 

75 Args: 

76 shape (pd.DataFrame): Shape which should be normalized 

77 

78 Returns: 

79 pd.DataFrame: Normalized shape 

80 """ 

81 

82 datetime_list: List[dt.datetime] = list(shape.index.copy()) 

83 

84 # yearly normalization 

85 def _normalize_year(shape: pd.DataFrame, datetime_list: List[dt.datetime]) -> pd.DataFrame: 

86 base_y = shape.resample("YE").mean() 

87 _shape = shape.rename(index=lambda x: x.strftime("%Y")).divide(base_y.rename(index=lambda x: x.strftime("%Y")), axis="index") 

88 

89 shape_df = _shape.reset_index(drop=True) 

90 shape_df.index = datetime_list 

91 return shape_df 

92 

93 if self.normalization_config is None: 

94 shape_df = _normalize_year(shape=shape, datetime_list=datetime_list) 

95 return shape_df 

96 else: 

97 # the normalization through the normalization_config is done in different parts 

98 normalized_datetimes = [] 

99 normalized_shapes = [] 

100 

101 # iterate over the correct normalization order 

102 for resample_freq, resample_format in self.__normalization_order: 

103 if self.normalization_config.get(resample_freq, None) is None: 

104 continue 

105 else: 

106 # if the whole shape is already normalized by the previous normalization processes, the loop is stopped 

107 if len(normalized_datetimes) == len(shape): 

108 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True) 

109 

110 # get the part of the shape which was not part of any previous normalizations 

111 temp_shape = shape.loc[~shape.index.isin(normalized_datetimes), :] 

112 

113 # normalize shape by the cofigured amount of days, weeks or months 

114 resampled_shape = temp_shape.resample(resample_freq).mean() 

115 resampled_shape = resampled_shape.iloc[: self.normalization_config[resample_freq], :] 

116 

117 partially_normalized_shape = temp_shape.rename(index=lambda x: x.strftime(resample_format)).divide( 

118 resampled_shape.rename(index=lambda x: x.strftime(resample_format)), axis="index" 

119 ) 

120 

121 # Due to the operations done in the previous lines, the partially_normalized_shape does not contain the exact datetime but rather 

122 # a datetime corresponding to the resampled frequency. Hence, the correct datetimes are added to the DataFrame and set as an index. 

123 # This allows to concatenate the partially normalized shapes more easily at a later stage 

124 partially_normalized_shape["datetimes"] = list(temp_shape.index) 

125 partially_normalized_shape = partially_normalized_shape.reset_index(drop=True).set_index("datetimes").dropna() 

126 normalized_datetimes += list(partially_normalized_shape.index) 

127 normalized_shapes.append(partially_normalized_shape) 

128 

129 if len(normalized_datetimes) == len(shape): 

130 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True) 

131 

132 # the remaining shape is normalized on a yearly basis 

133 leftover_shape = shape.loc[~shape.index.isin(normalized_datetimes), :] 

134 leftover_datetime = list(leftover_shape.index) 

135 yearly_normalized_shape = _normalize_year(shape=leftover_shape, datetime_list=leftover_datetime) 

136 

137 return pd.concat(normalized_shapes + [yearly_normalized_shape], axis=0).sort_index(ascending=True) 

138 

139 def _to_dict(self): 

140 return {"spot_prices": self.spot_prices, "holiday_calendar": self.holiday_calendar, "normalization_config": self.normalization_config} 

141 

142 

143class CategoricalRegression(PFCShaper): 

144 """Linear regression model using categorical predictor variables to construct a PFC shape. 

145 

146 .. math:: 

147 

148 S(t) = S_0 + \sum^{23}_{i=1}\\beta^h_i\cdot\mathbb{I}_{h(t)=i} + \\beta^d\cdot\mathbb{I}_{d(t)=1} + \\beta^H\cdot\mathbb{I}_{H(t)=1} + \sum^{12}_{i=2}\\beta^m_i\cdot\mathbb{I}_{m(t)=i} 

149 

150 where:\n 

151 :math:`S_0`: Spot price level\n 

152 :math:`\mathbb{I}_x = \\begin{cases} 1, & \\text{if the } x \\text{ expression renders true} \\\\ 0, & \\text{if the } x \\text{ expression renders false} \\end{cases}` \n 

153 :math:`h(t)`: Hour of t\n 

154 :math:`d(t) = \\begin{cases} 1, & \\text{if t is a weekday} \\\\ 0, & \\text{if t is a day on a weekend} \\end{cases}` \n 

155 :math:`H(t) = \\begin{cases} 1, & \\text{if t public holidy} \\\\ 0, & \\text{if t is not a public holiday} \\end{cases}` \n 

156 :math:`m(t)`: Month of t\n 

157 

158 Args: 

159 spot_prices (pd.DataFrame): Data used to calibrate the shaping model. 

160 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays. 

161 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods. 

162 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one. 

163 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one. 

164 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None. 

165 """ 

166 

167 def __init__( 

168 self, 

169 spot_prices: pd.DataFrame, 

170 holiday_calendar: holidays.HolidayBase, 

171 normalization_config: Optional[Dict[Literal["D", "W", "M"], Optional[int]]] = None, 

172 ): 

173 super().__init__(spot_prices=spot_prices, holiday_calendar=holiday_calendar, normalization_config=normalization_config) 

174 

175 def _transform(self, datetimes_list: List[dt.datetime]) -> np.ndarray: 

176 """Transforms a list of datetimes in a numpy array which can then be used for the linear regression. 

177 

178 Args: 

179 datetimes_list (List[dt.datetime]): List of datetimes 

180 

181 Returns: 

182 np.ndarray: Numpy array containing the transformed datetimes list 

183 """ 

184 _datetime_series = pd.Series(datetimes_list) 

185 

186 weekday = _datetime_series.dt.weekday.isin([0, 1, 2, 3, 4]).astype(int).to_numpy().reshape(-1, 1) 

187 holiday = _datetime_series.isin(pd.to_datetime(list(self.holiday_calendar.keys()))).astype(int).to_numpy().reshape(-1, 1) 

188 

189 predictors = [weekday, holiday] 

190 

191 if len(_datetime_series.dt.hour.unique()) > 1: 

192 hours = ( 

193 pd.get_dummies(_datetime_series.dt.hour, prefix="hour", drop_first=True) 

194 .astype(int) 

195 .to_numpy() 

196 .reshape(-1, len(_datetime_series.dt.hour.unique()) - 1) 

197 ) 

198 predictors.append(hours) 

199 

200 month = pd.get_dummies(_datetime_series.dt.month, prefix="month", drop_first=True).astype(int).to_numpy().reshape(-1, 11) 

201 

202 offset = np.ones(shape=(len(_datetime_series), 1)) 

203 return np.concatenate([offset, weekday, holiday, hours, month], axis=1) 

204 

205 def _set_regression_parameters(self, params: np.ndarray): 

206 super()._set_regression_parameters(params=params) 

207 

208 def calibrate(self) -> np.ndarray: 

209 data_array = self._transform(datetimes_list=self.spot_prices.index) 

210 self._set_regression_parameters( 

211 np.linalg.inv(data_array.T @ data_array) @ data_array.T @ self.spot_prices.iloc[:, 0].to_numpy().reshape(-1, 1) 

212 ) 

213 return data_array @ self._regression_parameters 

214 

215 def apply(self, apply_schedule: SimpleSchedule) -> pd.DataFrame: 

216 apply_schedule_datetime_list = apply_schedule.get_schedule() 

217 data_array = self._transform(datetimes_list=apply_schedule_datetime_list) 

218 shape = data_array @ self._regression_parameters 

219 

220 shape_df = pd.DataFrame({"shape": shape.squeeze()}, index=apply_schedule_datetime_list) 

221 shape_df = self.normalize_shape(shape=shape_df) 

222 return shape_df 

223 

224 def _to_dict(self): 

225 return super()._to_dict()