Skip to content

EPM

EPM

The EPM (Empirical Performance Model) class is a wrapper for machine learning models that includes preprocessing, normalization, and optional inverse transformation of predictions.

Attributes:

Name Type Description
predictor_class Type[AbstractPredictor] | Type[RegressorMixin]

The class of the predictor to use.

normalization_class Type[AbstractNormalization]

The normalization class to apply to the target variable.

transform_back bool

Whether to apply inverse transformation to predictions.

features_preprocessing Union[str, TransformerMixin]

Preprocessing pipeline for features.

predictor_config Optional[dict]

Configuration for the predictor.

predictor_kwargs Optional[dict]

Additional keyword arguments for the predictor.

Source code in asf/epm/epm.py
class EPM:
    """
    The EPM (Empirical Performance Model) class is a wrapper for machine learning models
    that includes preprocessing, normalization, and optional inverse transformation of predictions.

    Attributes:
        predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
        normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
        transform_back (bool): Whether to apply inverse transformation to predictions.
        features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
        predictor_config (Optional[dict]): Configuration for the predictor.
        predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
    """

    def __init__(
        self,
        predictor_class: Union[Type[AbstractPredictor], Type[RegressorMixin]],
        normalization_class: Type[AbstractNormalization] = LogNormalization,
        transform_back: bool = True,
        features_preprocessing: Union[str, TransformerMixin] = "default",
        categorical_features: Optional[list] = None,
        numerical_features: Optional[list] = None,
        predictor_config: Optional[dict] = None,
        predictor_kwargs: Optional[dict] = None,
    ):
        """
        Initialize the EPM model.

        Parameters:
            predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
            normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
            transform_back (bool): Whether to apply inverse transformation to predictions.
            features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
            categorical_features (Optional[list]): List of categorical feature names.
            numerical_features (Optional[list]): List of numerical feature names.
            predictor_config (Optional[dict]): Configuration for the predictor.
            predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
        """
        if isinstance(predictor_class, type) and issubclass(
            predictor_class, (RegressorMixin)
        ):
            self.model_class = partial(SklearnWrapper, predictor_class)
        else:
            self.model_class = predictor_class

        self.predictor_class = predictor_class
        self.normalization_class = normalization_class
        self.transform_back = transform_back
        self.predictor_config = predictor_config
        self.predictor_kwargs = predictor_kwargs or {}
        self.numpy = False

        if features_preprocessing == "default":
            self.features_preprocessing = get_default_preprocessor(
                categorical_features=categorical_features,
                numerical_features=numerical_features,
            )
        else:
            self.features_preprocessing = features_preprocessing

    def fit(
        self,
        X: Union[pd.DataFrame, pd.Series, list],
        y: Union[pd.Series, list],
        sample_weight: Optional[list] = None,
    ) -> "EPM":
        """
        Fit the EPM model to the data.

        Parameters:
            X (Union[pd.DataFrame, pd.Series, list]): Features.
            y (Union[pd.Series, list]): Target variable.
            sample_weight (Optional[list]): Sample weights (optional).

        Returns:
            EPM: The fitted EPM model.
        """
        if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
            X = pd.DataFrame(
                X,
                index=range(len(X)),
                columns=[f"f_{i}" for i in range(X.shape[1])],
            )
            y = pd.Series(
                y,
                index=range(len(y)),
            )
            self.numpy = True

        if self.features_preprocessing is not None:
            X = self.features_preprocessing.fit_transform(X)

        self.normalization = self.normalization_class()
        self.normalization.fit(y)
        y = self.normalization.transform(y)

        if self.predictor_config is None:
            self.predictor = self.predictor_class()
        else:
            self.predictor = self.predictor_class.get_from_configuration(
                self.predictor_config, **self.predictor_kwargs
            )()

        self.predictor.fit(X, y, sample_weight=sample_weight)
        return self

    def predict(self, X: Union[pd.DataFrame, pd.Series, list]) -> list:
        """
        Predict using the fitted EPM model.

        Parameters:
            X (Union[pd.DataFrame, pd.Series, list]): Features.

        Returns:
            list: Predicted values.
        """
        if self.numpy:
            if isinstance(X, np.ndarray):
                X = pd.DataFrame(
                    X,
                    index=range(len(X)),
                    columns=[f"f_{i}" for i in range(X.shape[1])],
                )

        if self.features_preprocessing is not None:
            X = self.features_preprocessing.transform(X)

        y_pred = self.predictor.predict(X)

        if self.transform_back:
            y_pred = self.normalization.inverse_transform(y_pred)

        return y_pred

__init__(predictor_class, normalization_class=LogNormalization, transform_back=True, features_preprocessing='default', categorical_features=None, numerical_features=None, predictor_config=None, predictor_kwargs=None)

Initialize the EPM model.

Parameters:

Name Type Description Default
predictor_class Type[AbstractPredictor] | Type[RegressorMixin]

The class of the predictor to use.

required
normalization_class Type[AbstractNormalization]

The normalization class to apply to the target variable.

LogNormalization
transform_back bool

Whether to apply inverse transformation to predictions.

True
features_preprocessing Union[str, TransformerMixin]

Preprocessing pipeline for features.

'default'
categorical_features Optional[list]

List of categorical feature names.

None
numerical_features Optional[list]

List of numerical feature names.

None
predictor_config Optional[dict]

Configuration for the predictor.

None
predictor_kwargs Optional[dict]

Additional keyword arguments for the predictor.

None
Source code in asf/epm/epm.py
def __init__(
    self,
    predictor_class: Union[Type[AbstractPredictor], Type[RegressorMixin]],
    normalization_class: Type[AbstractNormalization] = LogNormalization,
    transform_back: bool = True,
    features_preprocessing: Union[str, TransformerMixin] = "default",
    categorical_features: Optional[list] = None,
    numerical_features: Optional[list] = None,
    predictor_config: Optional[dict] = None,
    predictor_kwargs: Optional[dict] = None,
):
    """
    Initialize the EPM model.

    Parameters:
        predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
        normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
        transform_back (bool): Whether to apply inverse transformation to predictions.
        features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
        categorical_features (Optional[list]): List of categorical feature names.
        numerical_features (Optional[list]): List of numerical feature names.
        predictor_config (Optional[dict]): Configuration for the predictor.
        predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
    """
    if isinstance(predictor_class, type) and issubclass(
        predictor_class, (RegressorMixin)
    ):
        self.model_class = partial(SklearnWrapper, predictor_class)
    else:
        self.model_class = predictor_class

    self.predictor_class = predictor_class
    self.normalization_class = normalization_class
    self.transform_back = transform_back
    self.predictor_config = predictor_config
    self.predictor_kwargs = predictor_kwargs or {}
    self.numpy = False

    if features_preprocessing == "default":
        self.features_preprocessing = get_default_preprocessor(
            categorical_features=categorical_features,
            numerical_features=numerical_features,
        )
    else:
        self.features_preprocessing = features_preprocessing

fit(X, y, sample_weight=None)

Fit the EPM model to the data.

Parameters:

Name Type Description Default
X Union[DataFrame, Series, list]

Features.

required
y Union[Series, list]

Target variable.

required
sample_weight Optional[list]

Sample weights (optional).

None

Returns:

Name Type Description
EPM EPM

The fitted EPM model.

Source code in asf/epm/epm.py
def fit(
    self,
    X: Union[pd.DataFrame, pd.Series, list],
    y: Union[pd.Series, list],
    sample_weight: Optional[list] = None,
) -> "EPM":
    """
    Fit the EPM model to the data.

    Parameters:
        X (Union[pd.DataFrame, pd.Series, list]): Features.
        y (Union[pd.Series, list]): Target variable.
        sample_weight (Optional[list]): Sample weights (optional).

    Returns:
        EPM: The fitted EPM model.
    """
    if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
        X = pd.DataFrame(
            X,
            index=range(len(X)),
            columns=[f"f_{i}" for i in range(X.shape[1])],
        )
        y = pd.Series(
            y,
            index=range(len(y)),
        )
        self.numpy = True

    if self.features_preprocessing is not None:
        X = self.features_preprocessing.fit_transform(X)

    self.normalization = self.normalization_class()
    self.normalization.fit(y)
    y = self.normalization.transform(y)

    if self.predictor_config is None:
        self.predictor = self.predictor_class()
    else:
        self.predictor = self.predictor_class.get_from_configuration(
            self.predictor_config, **self.predictor_kwargs
        )()

    self.predictor.fit(X, y, sample_weight=sample_weight)
    return self

predict(X)

Predict using the fitted EPM model.

Parameters:

Name Type Description Default
X Union[DataFrame, Series, list]

Features.

required

Returns:

Name Type Description
list list

Predicted values.

Source code in asf/epm/epm.py
def predict(self, X: Union[pd.DataFrame, pd.Series, list]) -> list:
    """
    Predict using the fitted EPM model.

    Parameters:
        X (Union[pd.DataFrame, pd.Series, list]): Features.

    Returns:
        list: Predicted values.
    """
    if self.numpy:
        if isinstance(X, np.ndarray):
            X = pd.DataFrame(
                X,
                index=range(len(X)),
                columns=[f"f_{i}" for i in range(X.shape[1])],
            )

    if self.features_preprocessing is not None:
        X = self.features_preprocessing.transform(X)

    y_pred = self.predictor.predict(X)

    if self.transform_back:
        y_pred = self.normalization.inverse_transform(y_pred)

    return y_pred

tune_epm(X, y, model_class, normalization_class=LogNormalization, features_preprocessing='default', categorical_features=None, numerical_features=None, groups=None, cv=5, timeout=3600, runcount_limit=100, output_dir='./smac_output', seed=0, smac_metric=mean_squared_error, smac_scenario_kwargs={}, smac_kwargs={}, predictor_kwargs={})

Tune the Empirical Performance Model (EPM) using SMAC (Sequential Model-based Algorithm Configuration).

Parameters:

X : np.ndarray Feature matrix for training and validation. y : np.ndarray Target values corresponding to the feature matrix. model_class : Type[AbstractPredictor] The predictor class to be tuned. normalization_class : Type[AbstractNormalization], optional The normalization class to be applied to the data. Defaults to LogNormalization. features_preprocessing : Union[str, TransformerMixin], optional Preprocessing method for features. Defaults to "default". categorical_features : Optional[list], optional List of categorical feature names. Defaults to None. numerical_features : Optional[list], optional List of numerical feature names. Defaults to None. groups : Optional[np.ndarray], optional Group labels for cross-validation. Defaults to None. cv : int, optional Number of cross-validation folds. Defaults to 5. timeout : int, optional Time limit for the tuning process in seconds. Defaults to 3600. runcount_limit : int, optional Maximum number of configurations to evaluate. Defaults to 100. output_dir : str, optional Directory to store SMAC output. Defaults to "./smac_output". seed : int, optional Random seed for reproducibility. Defaults to 0. smac_metric : callable, optional Metric function to evaluate model performance. Defaults to mean_squared_error. smac_scenario_kwargs : Optional[dict], optional Additional keyword arguments for the SMAC scenario. Defaults to None. smac_kwargs : Optional[dict], optional Additional keyword arguments for SMAC optimization. Defaults to None. predictor_kwargs : Optional[dict], optional Additional keyword arguments for the predictor. Defaults to None.

Returns:

EPM The tuned Empirical Performance Model instance.

Source code in asf/epm/epm_tuner.py
def tune_epm(
    X: np.ndarray,
    y: np.ndarray,
    model_class: Type[AbstractPredictor],
    normalization_class: Type[AbstractNormalization] = LogNormalization,
    features_preprocessing: Union[str, TransformerMixin] = "default",
    categorical_features: Optional[list] = None,
    numerical_features: Optional[list] = None,
    groups: Optional[np.ndarray] = None,
    cv: int = 5,
    timeout: int = 3600,
    runcount_limit: int = 100,
    output_dir: str = "./smac_output",
    seed: int = 0,
    smac_metric: callable = mean_squared_error,  # Fixed incorrect import
    smac_scenario_kwargs: Optional[dict] = {},
    smac_kwargs: Optional[dict] = {},
    predictor_kwargs: Optional[dict] = {},
) -> EPM:
    """
    Tune the Empirical Performance Model (EPM) using SMAC (Sequential Model-based Algorithm Configuration).

    Parameters:
    ----------
    X : np.ndarray
        Feature matrix for training and validation.
    y : np.ndarray
        Target values corresponding to the feature matrix.
    model_class : Type[AbstractPredictor]
        The predictor class to be tuned.
    normalization_class : Type[AbstractNormalization], optional
        The normalization class to be applied to the data. Defaults to LogNormalization.
    features_preprocessing : Union[str, TransformerMixin], optional
        Preprocessing method for features. Defaults to "default".
    categorical_features : Optional[list], optional
        List of categorical feature names. Defaults to None.
    numerical_features : Optional[list], optional
        List of numerical feature names. Defaults to None.
    groups : Optional[np.ndarray], optional
        Group labels for cross-validation. Defaults to None.
    cv : int, optional
        Number of cross-validation folds. Defaults to 5.
    timeout : int, optional
        Time limit for the tuning process in seconds. Defaults to 3600.
    runcount_limit : int, optional
        Maximum number of configurations to evaluate. Defaults to 100.
    output_dir : str, optional
        Directory to store SMAC output. Defaults to "./smac_output".
    seed : int, optional
        Random seed for reproducibility. Defaults to 0.
    smac_metric : callable, optional
        Metric function to evaluate model performance. Defaults to mean_squared_error.
    smac_scenario_kwargs : Optional[dict], optional
        Additional keyword arguments for the SMAC scenario. Defaults to None.
    smac_kwargs : Optional[dict], optional
        Additional keyword arguments for SMAC optimization. Defaults to None.
    predictor_kwargs : Optional[dict], optional
        Additional keyword arguments for the predictor. Defaults to None.

    Returns:
    -------
    EPM
        The tuned Empirical Performance Model instance.
    """
    if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
        X = pd.DataFrame(
            X,
            index=range(len(X)),
            columns=[f"f_{i}" for i in range(X.shape[1])],
        )
        y = pd.Series(
            y,
            index=range(len(y)),
        )

    scenario = Scenario(
        configspace=model_class.get_configuration_space(),
        n_trials=runcount_limit,
        walltime_limit=timeout,
        deterministic=True,
        output_directory=output_dir,
        seed=seed,
        **smac_scenario_kwargs,
    )

    def target_function(config, seed):
        if groups is not None:
            kfold = GroupKFoldShuffle(n_splits=cv, shuffle=True, random_state=seed)
        else:
            kfold = KFold(n_splits=cv, shuffle=True, random_state=seed)

        scores = []
        for train_idx, test_idx in kfold.split(X, y, groups):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            epm = EPM(
                predictor_class=model_class,
                normalization_class=normalization_class,
                transform_back=True,
                predictor_config=config,
                predictor_kwargs=predictor_kwargs,
                features_preprocessing=features_preprocessing,
                categorical_features=categorical_features,
                numerical_features=numerical_features,
            )
            epm.fit(X_train, y_train)

            y_pred = epm.predict(X_test)
            score = smac_metric(y_test, y_pred)
            scores.append(score)

        return np.mean(scores)

    smac = HyperparameterOptimizationFacade(scenario, target_function, **smac_kwargs)
    best_config = smac.optimize()

    return EPM(
        predictor_class=model_class,
        normalization_class=normalization_class,
        transform_back=True,
        predictor_config=best_config,
        features_preprocessing=features_preprocessing,
        categorical_features=categorical_features,
        numerical_features=numerical_features,
    )