Source code for rivapy.marketdata_tools.pfc_shaper

import abc
import holidays
import numpy as np
import pandas as pd
import datetime as dt
import rivapy.tools.interfaces as interfaces
import rivapy.tools._validators as validator
from rivapy.tools.scheduler import SimpleSchedule
from typing import List, Dict, Literal, Optional



[docs]
class PFCShaper(interfaces.FactoryObject):
    """PFCShaper interface. Each shaping model for energy price forward curves must inherit from this base class.

    Args:
        spot_prices (pd.DataFrame): Data used to calibrate the shaping model.
        holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.
        normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.
            Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.
            ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.
            ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.
    """

    def __init__(
        self,
        spot_prices: pd.DataFrame,
        holiday_calendar: holidays.HolidayBase,
        normalization_config: Optional[Dict[Literal["D", "W", "ME"], Optional[int]]] = None,
    ):
        super().__init__()
        validator._check_pandas_index_for_datetime(spot_prices)
        self.spot_prices = spot_prices
        self.holiday_calendar = holiday_calendar
        self.normalization_config = normalization_config

        # normalization order containing also the resampling string pattern for pandas resample method
        self.__normalization_order = [("D", "%Y-%m-%d"), ("W", "%G-%V"), ("ME", "%Y-%m")]


[docs]
    @abc.abstractmethod
    def calibrate(self) -> np.ndarray:
        """Calibration of the shaping model

        Returns:
            np.ndarray: Numpy array containing the fit.
        """
        pass



[docs]
    @abc.abstractmethod
    def apply(self, apply_schedule: SimpleSchedule):
        """Applies the model on a schedule in order to generate a shape for future dates.

        Args:
            apply_schedule (SimpleSchedule): Schedule object in order to generate a shape for future dates.
        """
        pass


    @abc.abstractmethod
    def _set_regression_parameters(self, params: np.ndarray):
        self._regression_parameters = params


[docs]
    def normalize_shape(self, shape: pd.DataFrame) -> pd.DataFrame:
        """Normalizes the shape based on ``normalization_config``.\n
        ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.\n
        ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.\n
        ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one.
        The remaining shape is then normalized over the individual years.\n

        Example:
        ``D`` is 2, ``W`` is 2 and ``ME`` is 1. The shape starts at 03.03.2025 (monday).
        Since ``D`` is 2, the shape is normalized for 03.03.2025 and 04.03.2025 individually.\n
        The weeks are normalized from 05.03.2025 to 09.03.2025 and from 10.03.2025 to 16.03.2025.\n
        The month is then normalized from 17.03.2025 to 31.03.2025.
        The remaining shape (starting from 01.04.2025) is normalized on a yearly level.

        Args:
            shape (pd.DataFrame): Shape which should be normalized

        Returns:
            pd.DataFrame: Normalized shape
        """

        datetime_list: List[dt.datetime] = list(shape.index.copy())

        # yearly normalization
        def _normalize_year(shape: pd.DataFrame, datetime_list: List[dt.datetime]) -> pd.DataFrame:
            base_y = shape.resample("YE").mean()
            _shape = shape.rename(index=lambda x: x.strftime("%Y")).divide(base_y.rename(index=lambda x: x.strftime("%Y")), axis="index")

            shape_df = _shape.reset_index(drop=True)
            shape_df.index = datetime_list
            return shape_df

        if self.normalization_config is None:
            shape_df = _normalize_year(shape=shape, datetime_list=datetime_list)
            return shape_df
        else:
            # the normalization through the normalization_config is done in different parts
            normalized_datetimes = []
            normalized_shapes = []

            # iterate over the correct normalization order
            for resample_freq, resample_format in self.__normalization_order:
                if self.normalization_config.get(resample_freq, None) is None:
                    continue
                else:
                    # if the whole shape is already normalized by the previous normalization processes, the loop is stopped
                    if len(normalized_datetimes) == len(shape):
                        return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)

                    # get the part of the shape which was not part of any previous normalizations
                    temp_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]

                    # normalize shape by the cofigured amount of days, weeks or months
                    resampled_shape = temp_shape.resample(resample_freq).mean()
                    resampled_shape = resampled_shape.iloc[: self.normalization_config[resample_freq], :]

                    partially_normalized_shape = temp_shape.rename(index=lambda x: x.strftime(resample_format)).divide(
                        resampled_shape.rename(index=lambda x: x.strftime(resample_format)), axis="index"
                    )

                    # Due to the operations done in the previous lines, the partially_normalized_shape does not contain the exact datetime but rather
                    # a datetime corresponding to the resampled frequency. Hence, the correct datetimes are added to the DataFrame and set as an index.
                    # This allows to concatenate the partially normalized shapes more easily at a later stage
                    partially_normalized_shape["datetimes"] = list(temp_shape.index)
                    partially_normalized_shape = partially_normalized_shape.reset_index(drop=True).set_index("datetimes").dropna()
                    normalized_datetimes += list(partially_normalized_shape.index)
                    normalized_shapes.append(partially_normalized_shape)

            if len(normalized_datetimes) == len(shape):
                return pd.concat(normalized_shapes, axis=0).sort_index(ascending=True)

            # the remaining shape is normalized on a yearly basis
            leftover_shape = shape.loc[~shape.index.isin(normalized_datetimes), :]
            leftover_datetime = list(leftover_shape.index)
            yearly_normalized_shape = _normalize_year(shape=leftover_shape, datetime_list=leftover_datetime)

            return pd.concat(normalized_shapes + [yearly_normalized_shape], axis=0).sort_index(ascending=True)


    def _to_dict(self):
        return {"spot_prices": self.spot_prices, "holiday_calendar": self.holiday_calendar, "normalization_config": self.normalization_config}




[docs]
class CategoricalRegression(PFCShaper):
    """Linear regression model using categorical predictor variables to construct a PFC shape.

    .. math::

        S(t) = S_0 + \sum^{23}_{i=1}\\beta^h_i\cdot\mathbb{I}_{h(t)=i} + \\beta^d\cdot\mathbb{I}_{d(t)=1}  + \\beta^H\cdot\mathbb{I}_{H(t)=1} + \sum^{12}_{i=2}\\beta^m_i\cdot\mathbb{I}_{m(t)=i}

    where:\n
    :math:`S_0`: Spot price level\n
    :math:`\mathbb{I}_x = \\begin{cases} 1, & \\text{if the } x \\text{ expression renders true} \\\\ 0, & \\text{if the } x \\text{ expression renders false} \\end{cases}` \n
    :math:`h(t)`: Hour of t\n
    :math:`d(t) = \\begin{cases} 1, & \\text{if t is a weekday} \\\\ 0, & \\text{if t is a day on a weekend} \\end{cases}` \n
    :math:`H(t) = \\begin{cases} 1, & \\text{if t public holidy} \\\\ 0, & \\text{if t is not a public holiday} \\end{cases}` \n
    :math:`m(t)`: Month of t\n

    Args:
        spot_prices (pd.DataFrame): Data used to calibrate the shaping model.
        holiday_calendar (holidays.HolidayBase): Calendar object to obtain country specific holidays.
        normalization_config (Optional[Dict[Literal["D", "W", "ME"], Optional[int]]], optional): A dictionary configurating the shape normalization periods.
            Here ``D`` defines the number of days at the beginning of the shape over which the individual mean is normalized to one.
            ``W`` defines the number of weeks at the beginning of the shape over which the individual mean is normalized to one.
            ``ME`` defines the number of months at the beginning of the shape over which the individual mean is normalized to one. The remaining shape is then normalized over the individual years.Defaults to None.
    """

    def __init__(
        self,
        spot_prices: pd.DataFrame,
        holiday_calendar: holidays.HolidayBase,
        normalization_config: Optional[Dict[Literal["D", "W", "M"], Optional[int]]] = None,
    ):
        super().__init__(spot_prices=spot_prices, holiday_calendar=holiday_calendar, normalization_config=normalization_config)

    def _transform(self, datetimes_list: List[dt.datetime]) -> np.ndarray:
        """Transforms a list of datetimes in a numpy array which can then be used for the linear regression.

        Args:
            datetimes_list (List[dt.datetime]): List of datetimes

        Returns:
            np.ndarray: Numpy array containing the transformed datetimes list
        """
        _datetime_series = pd.Series(datetimes_list)

        weekday = _datetime_series.dt.weekday.isin([0, 1, 2, 3, 4]).astype(int).to_numpy().reshape(-1, 1)
        holiday = _datetime_series.isin(pd.to_datetime(list(self.holiday_calendar.keys()))).astype(int).to_numpy().reshape(-1, 1)

        predictors = [weekday, holiday]

        if len(_datetime_series.dt.hour.unique()) > 1:
            hours = (
                pd.get_dummies(_datetime_series.dt.hour, prefix="hour", drop_first=True)
                .astype(int)
                .to_numpy()
                .reshape(-1, len(_datetime_series.dt.hour.unique()) - 1)
            )
            predictors.append(hours)

        month = pd.get_dummies(_datetime_series.dt.month, prefix="month", drop_first=True).astype(int).to_numpy().reshape(-1, 11)

        offset = np.ones(shape=(len(_datetime_series), 1))
        return np.concatenate([offset, weekday, holiday, hours, month], axis=1)

    def _set_regression_parameters(self, params: np.ndarray):
        super()._set_regression_parameters(params=params)


[docs]
    def calibrate(self) -> np.ndarray:
        data_array = self._transform(datetimes_list=self.spot_prices.index)
        self._set_regression_parameters(
            np.linalg.inv(data_array.T @ data_array) @ data_array.T @ self.spot_prices.iloc[:, 0].to_numpy().reshape(-1, 1)
        )
        return data_array @ self._regression_parameters



[docs]
    def apply(self, apply_schedule: SimpleSchedule) -> pd.DataFrame:
        apply_schedule_datetime_list = apply_schedule.get_schedule()
        data_array = self._transform(datetimes_list=apply_schedule_datetime_list)
        shape = data_array @ self._regression_parameters

        shape_df = pd.DataFrame({"shape": shape.squeeze()}, index=apply_schedule_datetime_list)
        shape_df = self.normalize_shape(shape=shape_df)
        return shape_df


    def _to_dict(self):
        return super()._to_dict()