Coverage for rivapy/marketdata_tools/pfc

1import abc

2import holidays

3import numpy as np

4import pandas as pd

5import datetime as dt

6import rivapy.tools.interfaces as interfaces

7import rivapy.tools._validators as validator

8from rivapy.tools.scheduler import SimpleSchedule

9from typing import List, Dict, Literal, Optional

12class PFCShaper(interfaces.FactoryObject):

13 """PFCShaper interface. Each shaping model for energy price forward curves must inherit from this base class.

15 Args:

16 spot_prices (pd.DataFrame): Data used to calibrate the shaping model.

17 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.

18 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.

19 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.

20 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.

21 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.

22 """

24 def __init__(

25 self,

26 spot_prices: pd.DataFrame,

27 holiday_calendar: holidays.HolidayBase,

28 normalization_config: Optional[Dict[Literal["D", "W", "ME"], Optional[int]]] = None,

29 ):

30 super().__init__()

31 validator._check_pandas_index_for_datetime(spot_prices)

32 self.spot_prices = spot_prices

33 self.holiday_calendar = holiday_calendar

34 self.normalization_config = normalization_config

36 # normalization order containing also the resampling string pattern for pandas resample method

37 self.__normalization_order = [("D", "%Y-%m-%d"), ("W", "%G-%V"), ("ME", "%Y-%m")]

39 @abc.abstractmethod

40 def calibrate(self) -> np.ndarray:

41 """Calibration of the shaping model

43 Returns:

44 np.ndarray: Numpy array containing the fit.

45 """

46 pass

48 @abc.abstractmethod

49 def apply(self, apply_schedule: SimpleSchedule):

50 """Applies the model on a schedule in order to generate a shape for future dates.

52 Args:

53 apply_schedule (SimpleSchedule): Schedule object in order to generate a shape for future dates.

54 """

55 pass

57 @abc.abstractmethod

58 def _set_regression_parameters(self, params: np.ndarray):

59 self._regression_parameters = params

61 def normalize_shape(self, shape: pd.DataFrame) -> pd.DataFrame:

62 """Normalizes the shape based on ``normalization_config``.\n

63 ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.\n

64 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.\n

65 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one.

66 The remaining shape is then normalized over the individual years.\n

68 Example:

69 ``D`` is 2, ``W`` is 2 and ``ME`` is 1. The shape starts at 03.03.2025 (monday).

70 Since ``D`` is 2, the shape is normalized for 03.03.2025 and 04.03.2025 individually.\n

71 The weeks are normalized from 05.03.2025 to 09.03.2025 and from 10.03.2025 to 16.03.2025.\n

72 The month is then normalized from 17.03.2025 to 31.03.2025.

73 The remaining shape (starting from 01.04.2025) is normalized on a yearly level.

75 Args:

76 shape (pd.DataFrame): Shape which should be normalized

78 Returns:

79 pd.DataFrame: Normalized shape

80 """

82 datetime_list: List[dt.datetime] = list(shape.index.copy())

84 # yearly normalization

85 def _normalize_year(shape: pd.DataFrame, datetime_list: List[dt.datetime]) -> pd.DataFrame:

86 base_y = shape.resample("YE").mean()

87 _shape = shape.rename(index=lambda x: x.strftime("%Y")).divide(base_y.rename(index=lambda x: x.strftime("%Y")), axis="index")

89 shape_df = _shape.reset_index(drop=True)

90 shape_df.index = datetime_list

91 return shape_df

93 if self.normalization_config is None:

94 shape_df = _normalize_year(shape=shape, datetime_list=datetime_list)

95 return shape_df

96 else:

97 # the normalization through the normalization_config is done in different parts

98 normalized_datetimes = []

99 normalized_shapes = []

100

101 # iterate over the correct normalization order

102 for resample_freq, resample_format in self.__normalization_order:

103 if self.normalization_config.get(resample_freq, None) is None:

104 continue

105 else:

106 # if the whole shape is already normalized by the previous normalization processes, the loop is stopped

107 if len(normalized_datetimes) == len(shape):

108 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)

109

110 # get the part of the shape which was not part of any previous normalizations

111 temp_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]

112

113 # normalize shape by the cofigured amount of days, weeks or months

114 resampled_shape = temp_shape.resample(resample_freq).mean()

115 resampled_shape = resampled_shape.iloc[: self.normalization_config[resample_freq], :]

116

117 partially_normalized_shape = temp_shape.rename(index=lambda x: x.strftime(resample_format)).divide(

118 resampled_shape.rename(index=lambda x: x.strftime(resample_format)), axis="index"

119 )

120

121 # Due to the operations done in the previous lines, the partially_normalized_shape does not contain the exact datetime but rather

122 # a datetime corresponding to the resampled frequency. Hence, the correct datetimes are added to the DataFrame and set as an index.

123 # This allows to concatenate the partially normalized shapes more easily at a later stage

124 partially_normalized_shape["datetimes"] = list(temp_shape.index)

125 partially_normalized_shape = partially_normalized_shape.reset_index(drop=True).set_index("datetimes").dropna()

126 normalized_datetimes += list(partially_normalized_shape.index)

127 normalized_shapes.append(partially_normalized_shape)

128

129 if len(normalized_datetimes) == len(shape):

130 return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)

131

132 # the remaining shape is normalized on a yearly basis

133 leftover_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]

134 leftover_datetime = list(leftover_shape.index)

135 yearly_normalized_shape = _normalize_year(shape=leftover_shape, datetime_list=leftover_datetime)

136

137 return pd.concat(normalized_shapes + [yearly_normalized_shape], axis=0).sort_index(ascending=True)

138

139 def _to_dict(self):

140 return {"spot_prices": self.spot_prices, "holiday_calendar": self.holiday_calendar, "normalization_config": self.normalization_config}

141

142

143class CategoricalRegression(PFCShaper):

144 """Linear regression model using categorical predictor variables to construct a PFC shape.

145

146 .. math::

147

148 S(t) = S_0 + \sum^{23}_{i=1}\\beta^h_i\cdot\mathbb{I}_{h(t)=i} + \\beta^d\cdot\mathbb{I}_{d(t)=1} + \\beta^H\cdot\mathbb{I}_{H(t)=1} + \sum^{12}_{i=2}\\beta^m_i\cdot\mathbb{I}_{m(t)=i}

149

150 where:\n

151 :math:`S_0`: Spot price level\n

152 :math:`\mathbb{I}_x = \\begin{cases} 1, & \\text{if the } x \\text{ expression renders true} \\\\ 0, & \\text{if the } x \\text{ expression renders false} \\end{cases}` \n

153 :math:`h(t)`: Hour of t\n

154 :math:`d(t) = \\begin{cases} 1, & \\text{if t is a weekday} \\\\ 0, & \\text{if t is a day on a weekend} \\end{cases}` \n

155 :math:`H(t) = \\begin{cases} 1, & \\text{if t public holidy} \\\\ 0, & \\text{if t is not a public holiday} \\end{cases}` \n

156 :math:`m(t)`: Month of t\n

157

158 Args:

159 spot_prices (pd.DataFrame): Data used to calibrate the shaping model.

160 holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.

161 normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.

162 Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.

163 ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.

164 ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.

165 """

166

167 def __init__(

168 self,

169 spot_prices: pd.DataFrame,

170 holiday_calendar: holidays.HolidayBase,

171 normalization_config: Optional[Dict[Literal["D", "W", "M"], Optional[int]]] = None,

172 ):

173 super().__init__(spot_prices=spot_prices, holiday_calendar=holiday_calendar, normalization_config=normalization_config)

174

175 def _transform(self, datetimes_list: List[dt.datetime]) -> np.ndarray:

176 """Transforms a list of datetimes in a numpy array which can then be used for the linear regression.

177

178 Args:

179 datetimes_list (List[dt.datetime]): List of datetimes

180

181 Returns:

182 np.ndarray: Numpy array containing the transformed datetimes list

183 """

184 _datetime_series = pd.Series(datetimes_list)

185

186 weekday = _datetime_series.dt.weekday.isin([0, 1, 2, 3, 4]).astype(int).to_numpy().reshape(-1, 1)

187 holiday = _datetime_series.isin(pd.to_datetime(list(self.holiday_calendar.keys()))).astype(int).to_numpy().reshape(-1, 1)

188

189 predictors = [weekday, holiday]

190

191 if len(_datetime_series.dt.hour.unique()) > 1:

192 hours = (

193 pd.get_dummies(_datetime_series.dt.hour, prefix="hour", drop_first=True)

194 .astype(int)

195 .to_numpy()

196 .reshape(-1, len(_datetime_series.dt.hour.unique()) - 1)

197 )

198 predictors.append(hours)

199

200 month = pd.get_dummies(_datetime_series.dt.month, prefix="month", drop_first=True).astype(int).to_numpy().reshape(-1, 11)

201

202 offset = np.ones(shape=(len(_datetime_series), 1))

203 return np.concatenate([offset, weekday, holiday, hours, month], axis=1)

204

205 def _set_regression_parameters(self, params: np.ndarray):

206 super()._set_regression_parameters(params=params)

207

208 def calibrate(self) -> np.ndarray:

209 data_array = self._transform(datetimes_list=self.spot_prices.index)

210 self._set_regression_parameters(

211 np.linalg.inv(data_array.T @ data_array) @ data_array.T @ self.spot_prices.iloc[:, 0].to_numpy().reshape(-1, 1)

212 )

213 return data_array @ self._regression_parameters

214

215 def apply(self, apply_schedule: SimpleSchedule) -> pd.DataFrame:

216 apply_schedule_datetime_list = apply_schedule.get_schedule()

217 data_array = self._transform(datetimes_list=apply_schedule_datetime_list)

218 shape = data_array @ self._regression_parameters

219

220 shape_df = pd.DataFrame({"shape": shape.squeeze()}, index=apply_schedule_datetime_list)

221 shape_df = self.normalize_shape(shape=shape_df)

222 return shape_df

223

224 def _to_dict(self):

225 return super()._to_dict()

Coverage for rivapy/marketdata_tools/pfc_shaper.py: 92%

89 statements