Skip to content

Selectors

AbstractFeatureGenerator

AbstractFeatureGenerator is a base class for generating additional features based on a set of base features. Subclasses should implement the methods to define specific feature generation logic.

Source code in asf/selectors/feature_generator.py
class AbstractFeatureGenerator:
    """
    AbstractFeatureGenerator is a base class for generating additional features
    based on a set of base features. Subclasses should implement the methods
    to define specific feature generation logic.
    """

    def __init__(self) -> None:
        """
        Initialize the AbstractFeatureGenerator.
        """
        pass

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
        Generate additional features based on the provided base features.

        Parameters
        ----------
        base_features : pd.DataFrame
            The input DataFrame containing the base features.

        Returns
        -------
        pd.DataFrame
            A DataFrame containing the generated features.

        Raises
        ------
        NotImplementedError
            If the method is not implemented in a subclass.
        """
        raise NotImplementedError(
            "generate_features() must be implemented in a subclass"
        )

__init__()

Initialize the AbstractFeatureGenerator.

Source code in asf/selectors/feature_generator.py
def __init__(self) -> None:
    """
    Initialize the AbstractFeatureGenerator.
    """
    pass

generate_features(base_features)

Generate additional features based on the provided base features.

Parameters

base_features : pd.DataFrame The input DataFrame containing the base features.

Returns

pd.DataFrame A DataFrame containing the generated features.

Raises

NotImplementedError If the method is not implemented in a subclass.

Source code in asf/selectors/feature_generator.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
    Generate additional features based on the provided base features.

    Parameters
    ----------
    base_features : pd.DataFrame
        The input DataFrame containing the base features.

    Returns
    -------
    pd.DataFrame
        A DataFrame containing the generated features.

    Raises
    ------
    NotImplementedError
        If the method is not implemented in a subclass.
    """
    raise NotImplementedError(
        "generate_features() must be implemented in a subclass"
    )

AbstractModelBasedSelector

Bases: AbstractSelector

An abstract base class for selectors that utilize a machine learning model for selection purposes. This class provides functionality to initialize with a model class, save the selector to a file, and load it back.

Attributes:

Name Type Description
model_class Callable

A callable that represents the model class to be used. If the provided model_class is a subclass of ClassifierMixin or RegressorMixin, it is wrapped using SklearnWrapper.

Methods:

Name Description
save

Union[str, Path]) -> None: Saves the current instance of the selector to the specified file path.

load

Union[str, Path]) -> "AbstractModelBasedSelector": Loads a previously saved instance of the selector from the specified file path.

Source code in asf/selectors/abstract_model_based_selector.py
class AbstractModelBasedSelector(AbstractSelector):
    """
    An abstract base class for selectors that utilize a machine learning model
    for selection purposes. This class provides functionality to initialize
    with a model class, save the selector to a file, and load it back.

    Attributes:
        model_class (Callable): A callable that represents the model class to
            be used. If the provided model_class is a subclass of
            `ClassifierMixin` or `RegressorMixin`, it is wrapped using
            `SklearnWrapper`.

    Methods:
        save(path: Union[str, Path]) -> None:
            Saves the current instance of the selector to the specified file path.
        load(path: Union[str, Path]) -> "AbstractModelBasedSelector":
            Loads a previously saved instance of the selector from the specified file path.
    """

    def __init__(self, model_class: Type[AbstractPredictor], **kwargs: Any) -> None:
        """
        Initializes the AbstractModelBasedSelector.

        Args:
            model_class (Union[Type, Callable]): The model class or a callable
                that returns a model instance. If a scikit-learn compatible
                class is provided, it's wrapped with SklearnWrapper.
            **kwargs (Any): Additional keyword arguments passed to the
                parent class initializer.
        """
        super().__init__(**kwargs)

        if isinstance(model_class, type) and issubclass(
            model_class, (ClassifierMixin, RegressorMixin)
        ):
            self.model_class: Callable = partial(SklearnWrapper, model_class)
        else:
            self.model_class: Callable = model_class

    def save(self, path: Union[str, Path]) -> None:
        """
        Saves the selector instance to the specified file path.

        Args:
            path (Union[str, Path]): The file path to save the selector.
        """
        joblib.dump(self, path)

    @staticmethod
    def load(path: Union[str, Path]) -> "AbstractModelBasedSelector":
        """
        Loads a selector instance from the specified file path.

        Args:
            path (Union[str, Path]): The file path to load the selector from.

        Returns:
            AbstractModelBasedSelector: The loaded selector instance.
        """
        return joblib.load(path)

__init__(model_class, **kwargs)

Initializes the AbstractModelBasedSelector.

Parameters:

Name Type Description Default
model_class Union[Type, Callable]

The model class or a callable that returns a model instance. If a scikit-learn compatible class is provided, it's wrapped with SklearnWrapper.

required
**kwargs Any

Additional keyword arguments passed to the parent class initializer.

{}
Source code in asf/selectors/abstract_model_based_selector.py
def __init__(self, model_class: Type[AbstractPredictor], **kwargs: Any) -> None:
    """
    Initializes the AbstractModelBasedSelector.

    Args:
        model_class (Union[Type, Callable]): The model class or a callable
            that returns a model instance. If a scikit-learn compatible
            class is provided, it's wrapped with SklearnWrapper.
        **kwargs (Any): Additional keyword arguments passed to the
            parent class initializer.
    """
    super().__init__(**kwargs)

    if isinstance(model_class, type) and issubclass(
        model_class, (ClassifierMixin, RegressorMixin)
    ):
        self.model_class: Callable = partial(SklearnWrapper, model_class)
    else:
        self.model_class: Callable = model_class

load(path) staticmethod

Loads a selector instance from the specified file path.

Parameters:

Name Type Description Default
path Union[str, Path]

The file path to load the selector from.

required

Returns:

Name Type Description
AbstractModelBasedSelector AbstractModelBasedSelector

The loaded selector instance.

Source code in asf/selectors/abstract_model_based_selector.py
@staticmethod
def load(path: Union[str, Path]) -> "AbstractModelBasedSelector":
    """
    Loads a selector instance from the specified file path.

    Args:
        path (Union[str, Path]): The file path to load the selector from.

    Returns:
        AbstractModelBasedSelector: The loaded selector instance.
    """
    return joblib.load(path)

save(path)

Saves the selector instance to the specified file path.

Parameters:

Name Type Description Default
path Union[str, Path]

The file path to save the selector.

required
Source code in asf/selectors/abstract_model_based_selector.py
def save(self, path: Union[str, Path]) -> None:
    """
    Saves the selector instance to the specified file path.

    Args:
        path (Union[str, Path]): The file path to save the selector.
    """
    joblib.dump(self, path)

AbstractSelector

AbstractSelector is a base class for implementing feature selection algorithms. It provides a framework for fitting, predicting, and managing hierarchical feature generators and configuration spaces.

Attributes

maximize : bool Indicates whether the objective is to maximize or minimize the performance metric. budget : int or None The budget for the selector, if applicable. feature_groups : list[str] or None Groups of features to be considered during selection. hierarchical_generator : AbstractFeatureGenerator or None A generator for hierarchical features, if applicable. algorithm_features : pd.DataFrame or None Additional features related to algorithms, if provided.

Source code in asf/selectors/abstract_selector.py
class AbstractSelector:
    """
    AbstractSelector is a base class for implementing feature selection algorithms.
    It provides a framework for fitting, predicting, and managing hierarchical feature
    generators and configuration spaces.

    Attributes
    ----------
    maximize : bool
        Indicates whether the objective is to maximize or minimize the performance metric.
    budget : int or None
        The budget for the selector, if applicable.
    feature_groups : list[str] or None
        Groups of features to be considered during selection.
    hierarchical_generator : AbstractFeatureGenerator or None
        A generator for hierarchical features, if applicable.
    algorithm_features : pd.DataFrame or None
        Additional features related to algorithms, if provided.
    """

    def __init__(
        self,
        budget: int | None = None,
        maximize: bool = False,
        feature_groups: list[str] | None = None,
        hierarchical_generator: AbstractFeatureGenerator | None = None,
    ):
        """
        Initialize the AbstractSelector.

        Parameters
        ----------
        budget : int or None, optional
            The budget for the selector, if applicable. Defaults to None.
        maximize : bool, optional
            Indicates whether to maximize the performance metric. Defaults to False.
        feature_groups : list[str] or None, optional
            Groups of features to be considered during selection. Defaults to None.
        hierarchical_generator : AbstractFeatureGenerator or None, optional
            A generator for hierarchical features, if applicable. Defaults to None.
        """
        self.maximize = maximize
        self.budget = budget
        self.feature_groups = feature_groups
        self.hierarchical_generator = hierarchical_generator
        self.algorithm_features: pd.DataFrame | None = None

    def fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        algorithm_features: pd.DataFrame | None = None,
        **kwargs,
    ) -> None:
        """
        Fit the selector to the given features and performance data.

        Parameters
        ----------
        features : pd.DataFrame
            The input features for the selector.
        performance : pd.DataFrame
            The performance data corresponding to the features.
        algorithm_features : pd.DataFrame or None, optional
            Additional features related to algorithms, if provided. Defaults to None.
        **kwargs : dict
            Additional keyword arguments for fitting.
        """
        if isinstance(features, np.ndarray) and isinstance(performance, np.ndarray):
            features = pd.DataFrame(
                features,
                index=range(len(features)),
                columns=[f"f_{i}" for i in range(features.shape[1])],
            )
            performance = pd.DataFrame(
                performance,
                index=range(len(performance)),
                columns=[f"algo_{i}" for i in range(performance.shape[1])],
            )
        elif isinstance(features, pd.DataFrame) and isinstance(
            performance, pd.DataFrame
        ):
            pass
        else:
            raise ValueError(
                "features and performance must be either numpy arrays or pandas DataFrames"
            )

        if self.hierarchical_generator is not None:
            self.hierarchical_generator.fit(features, performance, algorithm_features)
            features = pd.concat(
                [features, self.hierarchical_generator.generate_features(features)],
                axis=1,
            )
        self.algorithms: list[str] = performance.columns.to_list()
        self.features: list[str] = features.columns.to_list()
        self.algorithm_features = algorithm_features
        self._fit(features, performance, **kwargs)

    def predict(self, features: pd.DataFrame) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the ranking or selection of features for the given input features.

        Parameters
        ----------
        features : pd.DataFrame
            The input features for prediction.

        Returns
        -------
        dict[str, list[tuple[str, float]]]
            A dictionary where keys are algorithm names and values are lists of tuples
            containing feature names and their corresponding scores.
        """
        if self.hierarchical_generator is not None:
            features = pd.concat(
                [features, self.hierarchical_generator.generate_features(features)],
                axis=1,
            )
        return self._predict(features)

    def save(self, path: str) -> None:
        """
        Save the selector's state to the specified path.

        Parameters
        ----------
        path : str
            The file path where the selector's state will be saved.
        """
        pass

    def load(self, path: str) -> None:
        """
        Load the selector's state from the specified path.

        Parameters
        ----------
        path : str
            The file path from which the selector's state will be loaded.
        """
        pass

    if CONFIGSPACE_AVAILABLE:

        @staticmethod
        def get_configuration_space(
            cs: ConfigurationSpace | None = None, **kwargs
        ) -> ConfigurationSpace:
            """
            Get the configuration space for the selector.

            Parameters
            ----------
            cs : ConfigurationSpace or None, optional
                The configuration space to use. If None, a new one will be created.
            **kwargs : dict
                Additional keyword arguments for configuration space creation.

            Returns
            -------
            ConfigurationSpace
                The configuration space for the selector.

            Raises
            ------
            NotImplementedError
                If the method is not implemented in a subclass.
            """
            raise NotImplementedError(
                "get_configuration_space() is not implemented for this selector"
            )

        @staticmethod
        def get_from_configuration(configuration: Configuration) -> "AbstractSelector":
            """
            Create a selector instance from a configuration.

            Parameters
            ----------
            configuration : Configuration
                The configuration object.

            Returns
            -------
            AbstractSelector
                The selector instance.

            Raises
            ------
            NotImplementedError
                If the method is not implemented in a subclass.
            """
            raise NotImplementedError(
                "get_from_configuration() is not implemented for this selector"
            )

        @staticmethod
        def _add_hierarchical_generator_space(
            cs: ConfigurationSpace,
            hierarchical_generator: list[AbstractFeatureGenerator] | None = None,
            **kwargs,
        ) -> ConfigurationSpace:
            """
            Add the hierarchical generator space to the configuration space.

            Parameters
            ----------
            cs : ConfigurationSpace
                The configuration space to use.
            hierarchical_generator : list[AbstractFeatureGenerator] or None, optional
                The list of hierarchical generators to add. Defaults to None.
            **kwargs : dict
                Additional keyword arguments to pass to the model class.

            Returns
            -------
            ConfigurationSpace
                The updated configuration space.
            """
            if hierarchical_generator is not None:
                if "hierarchical_generator" in cs:
                    return

                cs.add(
                    Categorical(
                        name="hierarchical_generator",
                        items=hierarchical_generator,
                    )
                )

                for generator in hierarchical_generator:
                    generator.get_configuration_space(cs=cs, **kwargs)

            return cs

__init__(budget=None, maximize=False, feature_groups=None, hierarchical_generator=None)

Initialize the AbstractSelector.

Parameters

budget : int or None, optional The budget for the selector, if applicable. Defaults to None. maximize : bool, optional Indicates whether to maximize the performance metric. Defaults to False. feature_groups : list[str] or None, optional Groups of features to be considered during selection. Defaults to None. hierarchical_generator : AbstractFeatureGenerator or None, optional A generator for hierarchical features, if applicable. Defaults to None.

Source code in asf/selectors/abstract_selector.py
def __init__(
    self,
    budget: int | None = None,
    maximize: bool = False,
    feature_groups: list[str] | None = None,
    hierarchical_generator: AbstractFeatureGenerator | None = None,
):
    """
    Initialize the AbstractSelector.

    Parameters
    ----------
    budget : int or None, optional
        The budget for the selector, if applicable. Defaults to None.
    maximize : bool, optional
        Indicates whether to maximize the performance metric. Defaults to False.
    feature_groups : list[str] or None, optional
        Groups of features to be considered during selection. Defaults to None.
    hierarchical_generator : AbstractFeatureGenerator or None, optional
        A generator for hierarchical features, if applicable. Defaults to None.
    """
    self.maximize = maximize
    self.budget = budget
    self.feature_groups = feature_groups
    self.hierarchical_generator = hierarchical_generator
    self.algorithm_features: pd.DataFrame | None = None

fit(features, performance, algorithm_features=None, **kwargs)

Fit the selector to the given features and performance data.

Parameters

features : pd.DataFrame The input features for the selector. performance : pd.DataFrame The performance data corresponding to the features. algorithm_features : pd.DataFrame or None, optional Additional features related to algorithms, if provided. Defaults to None. **kwargs : dict Additional keyword arguments for fitting.

Source code in asf/selectors/abstract_selector.py
def fit(
    self,
    features: pd.DataFrame,
    performance: pd.DataFrame,
    algorithm_features: pd.DataFrame | None = None,
    **kwargs,
) -> None:
    """
    Fit the selector to the given features and performance data.

    Parameters
    ----------
    features : pd.DataFrame
        The input features for the selector.
    performance : pd.DataFrame
        The performance data corresponding to the features.
    algorithm_features : pd.DataFrame or None, optional
        Additional features related to algorithms, if provided. Defaults to None.
    **kwargs : dict
        Additional keyword arguments for fitting.
    """
    if isinstance(features, np.ndarray) and isinstance(performance, np.ndarray):
        features = pd.DataFrame(
            features,
            index=range(len(features)),
            columns=[f"f_{i}" for i in range(features.shape[1])],
        )
        performance = pd.DataFrame(
            performance,
            index=range(len(performance)),
            columns=[f"algo_{i}" for i in range(performance.shape[1])],
        )
    elif isinstance(features, pd.DataFrame) and isinstance(
        performance, pd.DataFrame
    ):
        pass
    else:
        raise ValueError(
            "features and performance must be either numpy arrays or pandas DataFrames"
        )

    if self.hierarchical_generator is not None:
        self.hierarchical_generator.fit(features, performance, algorithm_features)
        features = pd.concat(
            [features, self.hierarchical_generator.generate_features(features)],
            axis=1,
        )
    self.algorithms: list[str] = performance.columns.to_list()
    self.features: list[str] = features.columns.to_list()
    self.algorithm_features = algorithm_features
    self._fit(features, performance, **kwargs)

get_configuration_space(cs=None, **kwargs) staticmethod

Get the configuration space for the selector.

Parameters

cs : ConfigurationSpace or None, optional The configuration space to use. If None, a new one will be created. **kwargs : dict Additional keyword arguments for configuration space creation.

Returns

ConfigurationSpace The configuration space for the selector.

Raises

NotImplementedError If the method is not implemented in a subclass.

Source code in asf/selectors/abstract_selector.py
@staticmethod
def get_configuration_space(
    cs: ConfigurationSpace | None = None, **kwargs
) -> ConfigurationSpace:
    """
    Get the configuration space for the selector.

    Parameters
    ----------
    cs : ConfigurationSpace or None, optional
        The configuration space to use. If None, a new one will be created.
    **kwargs : dict
        Additional keyword arguments for configuration space creation.

    Returns
    -------
    ConfigurationSpace
        The configuration space for the selector.

    Raises
    ------
    NotImplementedError
        If the method is not implemented in a subclass.
    """
    raise NotImplementedError(
        "get_configuration_space() is not implemented for this selector"
    )

get_from_configuration(configuration) staticmethod

Create a selector instance from a configuration.

Parameters

configuration : Configuration The configuration object.

Returns

AbstractSelector The selector instance.

Raises

NotImplementedError If the method is not implemented in a subclass.

Source code in asf/selectors/abstract_selector.py
@staticmethod
def get_from_configuration(configuration: Configuration) -> "AbstractSelector":
    """
    Create a selector instance from a configuration.

    Parameters
    ----------
    configuration : Configuration
        The configuration object.

    Returns
    -------
    AbstractSelector
        The selector instance.

    Raises
    ------
    NotImplementedError
        If the method is not implemented in a subclass.
    """
    raise NotImplementedError(
        "get_from_configuration() is not implemented for this selector"
    )

load(path)

Load the selector's state from the specified path.

Parameters

path : str The file path from which the selector's state will be loaded.

Source code in asf/selectors/abstract_selector.py
def load(self, path: str) -> None:
    """
    Load the selector's state from the specified path.

    Parameters
    ----------
    path : str
        The file path from which the selector's state will be loaded.
    """
    pass

predict(features)

Predict the ranking or selection of features for the given input features.

Parameters

features : pd.DataFrame The input features for prediction.

Returns

dict[str, list[tuple[str, float]]] A dictionary where keys are algorithm names and values are lists of tuples containing feature names and their corresponding scores.

Source code in asf/selectors/abstract_selector.py
def predict(self, features: pd.DataFrame) -> dict[str, list[tuple[str, float]]]:
    """
    Predict the ranking or selection of features for the given input features.

    Parameters
    ----------
    features : pd.DataFrame
        The input features for prediction.

    Returns
    -------
    dict[str, list[tuple[str, float]]]
        A dictionary where keys are algorithm names and values are lists of tuples
        containing feature names and their corresponding scores.
    """
    if self.hierarchical_generator is not None:
        features = pd.concat(
            [features, self.hierarchical_generator.generate_features(features)],
            axis=1,
        )
    return self._predict(features)

save(path)

Save the selector's state to the specified path.

Parameters

path : str The file path where the selector's state will be saved.

Source code in asf/selectors/abstract_selector.py
def save(self, path: str) -> None:
    """
    Save the selector's state to the specified path.

    Parameters
    ----------
    path : str
        The file path where the selector's state will be saved.
    """
    pass

JointRanking

Bases: AbstractSelector, AbstractFeatureGenerator

JointRanking implements a ranking-based approach for selecting the best-performing algorithms for a given set of features. It combines feature generation and model-based selection to predict algorithm performance.

Reference

Ortuzk et al. (2022)

Source code in asf/selectors/joint_ranking.py
class JointRanking(AbstractSelector, AbstractFeatureGenerator):
    """
    JointRanking implements a ranking-based approach for selecting the best-performing
    algorithms for a given set of features. It combines feature generation and model-based
    selection to predict algorithm performance.

    Reference:
        Ortuzk et al. (2022)
    """

    def __init__(
        self,
        model: RankingMLP = None,
        **kwargs,
    ) -> None:
        """
        Initializes the JointRanking selector with the given parameters.

        Args:
            model (RankingMLP, optional): The regression model to be used for ranking.
            **kwargs: Additional arguments passed to the AbstractSelector.
        """
        AbstractSelector.__init__(self, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.model: RankingMLP = model

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the regression models to the given features and performance data.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
            performance (pd.DataFrame): DataFrame containing the performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(np.array(self.algorithms).reshape(-1, 1)),
                index=self.algorithms,
                columns=[f"algo_{i}" for i in range(len(self.algorithms))],
            )

        if self.model is None:
            self.model = RankingMLP(
                input_size=len(self.features) + len(self.algorithms)
            )

        self.model.fit(features[self.features], performance, self.algorithm_features)

    def _predict(self, features: pd.DataFrame) -> dict:
        """
        Predicts the performance of algorithms for the given features.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            dict: A dictionary mapping instance names to the predicted best algorithm
                  and the associated budget.
        """
        predictions = self.generate_features(features)

        return {
            instance_name: [
                (
                    self.algorithms[
                        np.argmax(predictions.loc[i])
                        if self.maximize
                        else np.argmin(predictions.loc[i])
                    ],
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates predictions for the given features using the trained models.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            pd.DataFrame: DataFrame containing the predictions for each algorithm.
        """
        predictions = np.zeros((features.shape[0], len(self.algorithms)))

        features = features[self.features]
        for i, algorithm in enumerate(self.algorithms):
            data = features.assign(**self.algorithm_features.loc[algorithm])
            data = data[self.algorithm_features.columns.to_list() + self.features]
            prediction = self.model.predict(data)
            predictions[:, i] = prediction.flatten()

        return pd.DataFrame(predictions, columns=self.algorithms)

__init__(model=None, **kwargs)

Initializes the JointRanking selector with the given parameters.

Parameters:

Name Type Description Default
model RankingMLP

The regression model to be used for ranking.

None
**kwargs

Additional arguments passed to the AbstractSelector.

{}
Source code in asf/selectors/joint_ranking.py
def __init__(
    self,
    model: RankingMLP = None,
    **kwargs,
) -> None:
    """
    Initializes the JointRanking selector with the given parameters.

    Args:
        model (RankingMLP, optional): The regression model to be used for ranking.
        **kwargs: Additional arguments passed to the AbstractSelector.
    """
    AbstractSelector.__init__(self, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.model: RankingMLP = model

generate_features(features)

Generates predictions for the given features using the trained models.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the predictions for each algorithm.

Source code in asf/selectors/joint_ranking.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates predictions for the given features using the trained models.

    Args:
        features (pd.DataFrame): DataFrame containing the feature data.

    Returns:
        pd.DataFrame: DataFrame containing the predictions for each algorithm.
    """
    predictions = np.zeros((features.shape[0], len(self.algorithms)))

    features = features[self.features]
    for i, algorithm in enumerate(self.algorithms):
        data = features.assign(**self.algorithm_features.loc[algorithm])
        data = data[self.algorithm_features.columns.to_list() + self.features]
        prediction = self.model.predict(data)
        predictions[:, i] = prediction.flatten()

    return pd.DataFrame(predictions, columns=self.algorithms)

MultiClassClassifier

Bases: AbstractModelBasedSelector

A selector that uses a multi-class classification model to predict the best algorithm for a given set of features and performance data.

Source code in asf/selectors/mutli_class.py
class MultiClassClassifier(AbstractModelBasedSelector):
    """
    A selector that uses a multi-class classification model to predict the best algorithm
    for a given set of features and performance data.
    """

    def __init__(self, model_class: Type[AbstractPredictor], **kwargs):
        """
        Initializes the MultiClassClassifier.

        Args:
            model_class: The class of the model to be used for classification.
            **kwargs: Additional keyword arguments to be passed to the parent class.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        self.classifier: object = None

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the classification model to the given feature and performance data.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
                Each row corresponds to an instance, and each column corresponds to a feature.
            performance (pd.DataFrame): DataFrame containing the performance data.
                Each row corresponds to an instance, and each column corresponds to an algorithm.
        """
        assert self.algorithm_features is None, (
            "MultiClassClassifier does not use algorithm features."
        )
        self.classifier = self.model_class()
        # Use the index of the algorithm with the best performance (lowest value) as the target
        self.classifier.fit(features, np.argmin(performance.values, axis=1))

    def _predict(self, features: pd.DataFrame) -> dict:
        """
        Predicts the best algorithm for each instance in the given feature data using simple multi-class classification.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
                Each row corresponds to an instance, and each column corresponds to a feature.

        Returns:
            dict: A dictionary mapping instance names (index of the features DataFrame)
                  to a list containing a tuple of the predicted best algorithm and the budget.
                  Example: {instance_name: [(algorithm_name, budget)]}
        """
        predictions = self.classifier.predict(features)

        return {
            instance_name: [(self.algorithms[predictions[i]], self.budget)]
            for i, instance_name in enumerate(features.index)
        }

__init__(model_class, **kwargs)

Initializes the MultiClassClassifier.

Parameters:

Name Type Description Default
model_class Type[AbstractPredictor]

The class of the model to be used for classification.

required
**kwargs

Additional keyword arguments to be passed to the parent class.

{}
Source code in asf/selectors/mutli_class.py
def __init__(self, model_class: Type[AbstractPredictor], **kwargs):
    """
    Initializes the MultiClassClassifier.

    Args:
        model_class: The class of the model to be used for classification.
        **kwargs: Additional keyword arguments to be passed to the parent class.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    self.classifier: object = None

PairwiseClassifier

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

Source code in asf/selectors/pairwise_classifier.py
class PairwiseClassifier(AbstractModelBasedSelector, AbstractFeatureGenerator):
    PREFIX = "pairwise_classifier"
    """
    PairwiseClassifier is a selector that uses pairwise comparison of algorithms
    to predict the best algorithm for a given instance.

    Attributes:
        PREFIX (str): Prefix used for configuration space parameters.
        classifiers (List[AbstractPredictor]): List of trained classifiers for pairwise comparisons.
        use_weights (bool): Whether to use weights based on performance differences.
    """

    def __init__(
        self, model_class: type[AbstractPredictor], use_weights: bool = True, **kwargs
    ):
        """
        Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

        Args:
            model_class (type[AbstractPredictor]): The classifier model to be used for pairwise comparisons.
            use_weights (bool): Whether to use weights based on performance differences. Defaults to True.
            **kwargs: Additional keyword arguments for the parent class.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.classifiers: List[AbstractPredictor] = []
        self.use_weights: bool = use_weights

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the pairwise classifiers using the provided features and performance data.

        Args:
            features (pd.DataFrame): The feature data for the instances.
            performance (pd.DataFrame): The performance data for the algorithms.
        """
        assert self.algorithm_features is None, (
            "PairwiseClassifier does not use algorithm features."
        )
        for i, algorithm in enumerate(self.algorithms):
            for other_algorithm in self.algorithms[i + 1 :]:
                algo1_times = performance[algorithm]
                algo2_times = performance[other_algorithm]

                if self.maximize:
                    diffs = algo1_times > algo2_times
                else:
                    diffs = algo1_times < algo2_times

                cur_model = self.model_class()
                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None
                    if not self.use_weights
                    else np.abs(algo1_times - algo2_times),
                )
                self.classifiers.append(cur_model)

    def _predict(
        self, features: pd.DataFrame
    ) -> Dict[str, List[Tuple[str, Union[int, float]]]]:
        """
        Predicts the best algorithm for each instance using the trained pairwise classifiers.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            Dict[str, List[Tuple[str, Union[int, float]]]]: A dictionary mapping instance names to the predicted best algorithm and budget.
        """
        predictions_sum = self.generate_features(features)

        return {
            instance_name: [
                (
                    predictions_sum.loc[instance_name].idxmax(),
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates features for the pairwise classifiers.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
        """
        cnt = 0
        predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
        for i, algorithm in enumerate(self.algorithms):
            for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
                prediction = self.classifiers[cnt].predict(features)
                predictions_sum.loc[prediction, algorithm] += 1
                predictions_sum.loc[~prediction, other_algorithm] += 1
                cnt += 1

        return predictions_sum

    if CONFIGSPACE_AVAILABLE:

        @staticmethod
        def get_configuration_space(
            cs: Optional[ConfigurationSpace] = None,
            cs_transform: Optional[Dict[str, dict]] = None,
            model_class: List[type[AbstractPredictor]] = [
                RandomForestClassifierWrapper,
                XGBoostClassifierWrapper,
            ],
            pre_prefix: str = "",
            parent_param: Optional[Hyperparameter] = None,
            parent_value: Optional[str] = None,
            **kwargs,
        ) -> Tuple[ConfigurationSpace, Dict[str, dict]]:
            """
            Get the configuration space for the predictor.

            Args:
                cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
                cs_transform (Optional[Dict[str, dict]]): A dictionary for transforming configuration space parameters.
                model_class (List[type[AbstractPredictor]]): The list of model classes to use. Defaults to [RandomForestClassifierWrapper, XGBoostClassifierWrapper].
                hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
                **kwargs: Additional keyword arguments to pass to the model class.

            Returns:
                Tuple[ConfigurationSpace, Dict[str, dict]]: The configuration space and its transformation dictionary.
            """
            if cs is None:
                cs = ConfigurationSpace()

            if cs_transform is None:
                cs_transform = dict()

            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
            else:
                prefix = PairwiseClassifier.PREFIX

            model_class_param = Categorical(
                name=f"{prefix}:model_class",
                items=[str(c.__name__) for c in model_class],
            )

            cs_transform[f"{prefix}:model_class"] = {
                str(c.__name__): c for c in model_class
            }

            use_weights_param = Categorical(
                name=f"{prefix}:use_weights",
                items=[True, False],
            )

            params = [model_class_param, use_weights_param]

            if parent_param is not None:
                conditions = [
                    EqualsCondition(
                        child=param,
                        parent=parent_param,
                        value=parent_value,
                    )
                    for param in params
                ]
            else:
                conditions = []

            cs.add(params + conditions)

            for model in model_class:
                model.get_configuration_space(
                    cs=cs,
                    pre_prefix=f"{prefix}:model_class",
                    parent_param=model_class_param,
                    parent_value=str(model.__name__),
                    **kwargs,
                )

            return cs, cs_transform

        @staticmethod
        def get_from_configuration(
            configuration: Configuration,
            cs_transform: Dict[str, dict],
            pre_prefix: str = "",
            **kwargs,
        ) -> partial:
            """
            Get the predictor from a given configuration.

            Args:
                configuration (Configuration): The configuration object.
                cs_transform (Dict[str, dict]): The transformation dictionary for the configuration space.

            Returns:
                partial: A partial function to initialize the PairwiseClassifier with the given configuration.
            """

            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
            else:
                prefix = PairwiseClassifier.PREFIX

            model_class = cs_transform[f"{prefix}:model_class"][
                configuration[f"{prefix}:model_class"]
            ]
            use_weights = configuration[f"{prefix}:use_weights"]

            model = model_class.get_from_configuration(
                configuration, pre_prefix=f"{prefix}:model_class"
            )

            return PairwiseClassifier(
                model_class=model,
                use_weights=use_weights,
                hierarchical_generator=None,
                **kwargs,
            )

PREFIX = 'pairwise_classifier' class-attribute instance-attribute

PairwiseClassifier is a selector that uses pairwise comparison of algorithms to predict the best algorithm for a given instance.

Attributes:

Name Type Description
PREFIX str

Prefix used for configuration space parameters.

classifiers List[AbstractPredictor]

List of trained classifiers for pairwise comparisons.

use_weights bool

Whether to use weights based on performance differences.

__init__(model_class, use_weights=True, **kwargs)

Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

Parameters:

Name Type Description Default
model_class type[AbstractPredictor]

The classifier model to be used for pairwise comparisons.

required
use_weights bool

Whether to use weights based on performance differences. Defaults to True.

True
**kwargs

Additional keyword arguments for the parent class.

{}
Source code in asf/selectors/pairwise_classifier.py
def __init__(
    self, model_class: type[AbstractPredictor], use_weights: bool = True, **kwargs
):
    """
    Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

    Args:
        model_class (type[AbstractPredictor]): The classifier model to be used for pairwise comparisons.
        use_weights (bool): Whether to use weights based on performance differences. Defaults to True.
        **kwargs: Additional keyword arguments for the parent class.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.classifiers: List[AbstractPredictor] = []
    self.use_weights: bool = use_weights

generate_features(features)

Generates features for the pairwise classifiers.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Type Description
DataFrame

pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.

Source code in asf/selectors/pairwise_classifier.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates features for the pairwise classifiers.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
    """
    cnt = 0
    predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
    for i, algorithm in enumerate(self.algorithms):
        for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
            prediction = self.classifiers[cnt].predict(features)
            predictions_sum.loc[prediction, algorithm] += 1
            predictions_sum.loc[~prediction, other_algorithm] += 1
            cnt += 1

    return predictions_sum

get_configuration_space(cs=None, cs_transform=None, model_class=[RandomForestClassifierWrapper, XGBoostClassifierWrapper], pre_prefix='', parent_param=None, parent_value=None, **kwargs) staticmethod

Get the configuration space for the predictor.

Parameters:

Name Type Description Default
cs Optional[ConfigurationSpace]

The configuration space to use. If None, a new one will be created.

None
cs_transform Optional[Dict[str, dict]]

A dictionary for transforming configuration space parameters.

None
model_class List[type[AbstractPredictor]]

The list of model classes to use. Defaults to [RandomForestClassifierWrapper, XGBoostClassifierWrapper].

[RandomForestClassifierWrapper, XGBoostClassifierWrapper]
hierarchical_generator Optional[List[AbstractFeatureGenerator]]

List of hierarchical feature generators.

required
**kwargs

Additional keyword arguments to pass to the model class.

{}

Returns:

Type Description
Tuple[ConfigurationSpace, Dict[str, dict]]

Tuple[ConfigurationSpace, Dict[str, dict]]: The configuration space and its transformation dictionary.

Source code in asf/selectors/pairwise_classifier.py
@staticmethod
def get_configuration_space(
    cs: Optional[ConfigurationSpace] = None,
    cs_transform: Optional[Dict[str, dict]] = None,
    model_class: List[type[AbstractPredictor]] = [
        RandomForestClassifierWrapper,
        XGBoostClassifierWrapper,
    ],
    pre_prefix: str = "",
    parent_param: Optional[Hyperparameter] = None,
    parent_value: Optional[str] = None,
    **kwargs,
) -> Tuple[ConfigurationSpace, Dict[str, dict]]:
    """
    Get the configuration space for the predictor.

    Args:
        cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
        cs_transform (Optional[Dict[str, dict]]): A dictionary for transforming configuration space parameters.
        model_class (List[type[AbstractPredictor]]): The list of model classes to use. Defaults to [RandomForestClassifierWrapper, XGBoostClassifierWrapper].
        hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
        **kwargs: Additional keyword arguments to pass to the model class.

    Returns:
        Tuple[ConfigurationSpace, Dict[str, dict]]: The configuration space and its transformation dictionary.
    """
    if cs is None:
        cs = ConfigurationSpace()

    if cs_transform is None:
        cs_transform = dict()

    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
    else:
        prefix = PairwiseClassifier.PREFIX

    model_class_param = Categorical(
        name=f"{prefix}:model_class",
        items=[str(c.__name__) for c in model_class],
    )

    cs_transform[f"{prefix}:model_class"] = {
        str(c.__name__): c for c in model_class
    }

    use_weights_param = Categorical(
        name=f"{prefix}:use_weights",
        items=[True, False],
    )

    params = [model_class_param, use_weights_param]

    if parent_param is not None:
        conditions = [
            EqualsCondition(
                child=param,
                parent=parent_param,
                value=parent_value,
            )
            for param in params
        ]
    else:
        conditions = []

    cs.add(params + conditions)

    for model in model_class:
        model.get_configuration_space(
            cs=cs,
            pre_prefix=f"{prefix}:model_class",
            parent_param=model_class_param,
            parent_value=str(model.__name__),
            **kwargs,
        )

    return cs, cs_transform

get_from_configuration(configuration, cs_transform, pre_prefix='', **kwargs) staticmethod

Get the predictor from a given configuration.

Parameters:

Name Type Description Default
configuration Configuration

The configuration object.

required
cs_transform Dict[str, dict]

The transformation dictionary for the configuration space.

required

Returns:

Name Type Description
partial partial

A partial function to initialize the PairwiseClassifier with the given configuration.

Source code in asf/selectors/pairwise_classifier.py
@staticmethod
def get_from_configuration(
    configuration: Configuration,
    cs_transform: Dict[str, dict],
    pre_prefix: str = "",
    **kwargs,
) -> partial:
    """
    Get the predictor from a given configuration.

    Args:
        configuration (Configuration): The configuration object.
        cs_transform (Dict[str, dict]): The transformation dictionary for the configuration space.

    Returns:
        partial: A partial function to initialize the PairwiseClassifier with the given configuration.
    """

    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
    else:
        prefix = PairwiseClassifier.PREFIX

    model_class = cs_transform[f"{prefix}:model_class"][
        configuration[f"{prefix}:model_class"]
    ]
    use_weights = configuration[f"{prefix}:use_weights"]

    model = model_class.get_from_configuration(
        configuration, pre_prefix=f"{prefix}:model_class"
    )

    return PairwiseClassifier(
        model_class=model,
        use_weights=use_weights,
        hierarchical_generator=None,
        **kwargs,
    )

PairwiseRegressor

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

Source code in asf/selectors/pairwise_regressor.py
class PairwiseRegressor(AbstractModelBasedSelector, AbstractFeatureGenerator):
    PREFIX = "pairwise_regressor"
    """
    PairwiseRegressor is a selector that uses pairwise regression of algorithms
    to predict the best algorithm for a given instance.

    Attributes:
        model_class (type): The regression model class to be used for pairwise comparisons.
        regressors (List[AbstractPredictor]): List of trained regressors for pairwise comparisons.
    """

    def __init__(self, model_class: type, **kwargs):
        """
        Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

        Args:
            model_class (type): The regression model class to be used for pairwise comparisons.
            kwargs: Additional keyword arguments for the parent classes.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.regressors: List[AbstractPredictor] = []

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the pairwise regressors using the provided features and performance data.

        Args:
            features (pd.DataFrame): The feature data for the instances.
            performance (pd.DataFrame): The performance data for the algorithms.
        """
        assert self.algorithm_features is None, (
            "PairwiseRegressor does not use algorithm features."
        )
        for i, algorithm in enumerate(self.algorithms):
            for other_algorithm in self.algorithms[i + 1 :]:
                algo1_times = performance[algorithm]
                algo2_times = performance[other_algorithm]

                diffs = algo1_times - algo2_times
                cur_model = self.model_class()
                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None,
                )
                self.regressors.append(cur_model)

    def _predict(self, features: pd.DataFrame) -> Dict[str, List[Tuple[str, float]]]:
        """
        Predicts the best algorithm for each instance using the trained pairwise regressors.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            Dict[str, List[Tuple[str, float]]]: A dictionary mapping instance names to the predicted best algorithm
            and the associated budget.
        """
        predictions_sum = self.generate_features(features)
        return {
            instance_name: [
                (
                    predictions_sum.loc[instance_name].idxmax()
                    if self.maximize
                    else predictions_sum.loc[instance_name].idxmin(),
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates features for the pairwise regressors.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
        """
        cnt = 0
        predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
        for i, algorithm in enumerate(self.algorithms):
            for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
                prediction = self.regressors[cnt].predict(features)
                predictions_sum[algorithm] += prediction
                predictions_sum[other_algorithm] -= prediction
                cnt += 1

        return predictions_sum

    if CONFIGSPACE_AVAILABLE:

        @staticmethod
        def get_configuration_space(
            cs: Optional[ConfigurationSpace] = None,
            cs_transform: Optional[Dict[str, Dict[str, type]]] = None,
            model_class: List[type[AbstractPredictor]] = [
                RandomForestRegressorWrapper,
                XGBoostRegressorWrapper,
            ],
            pre_prefix: str = "",
            parent_param: Optional[Hyperparameter] = None,
            parent_value: Optional[str] = None,
            **kwargs,
        ) -> Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]:
            """
            Get the configuration space for the predictor.

            Args:
                cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
                cs_transform (Optional[Dict[str, Dict[str, type]]]): A dictionary for transforming configuration space values.
                model_class (List[type]): The list of model classes to use. Defaults to [RandomForestRegressorWrapper, XGBoostRegressorWrapper].
                hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
                kwargs: Additional keyword arguments to pass to the model class.

            Returns:
                Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]: The configuration space and its transformation dictionary.
            """
            if cs is None:
                cs = ConfigurationSpace()

            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
            else:
                prefix = PairwiseRegressor.PREFIX

            model_class_param = Categorical(
                name=f"{prefix}:model_class",
                items=[str(c.__name__) for c in model_class],
            )

            cs_transform[f"{prefix}:model_class"] = {
                str(c.__name__): c for c in model_class
            }

            params = [model_class_param]

            if parent_param is not None:
                conditions = [
                    EqualsCondition(
                        child=param,
                        parent=parent_param,
                        value=parent_value,
                    )
                    for param in params
                ]
            else:
                conditions = []

            cs.add(params + conditions)

            for model in model_class:
                model.get_configuration_space(
                    cs=cs,
                    pre_prefix=f"{prefix}:model_class",
                    parent_param=model_class_param,
                    parent_value=str(model.__name__),
                    **kwargs,
                )

            return cs, cs_transform

        @staticmethod
        def get_from_configuration(
            configuration: Configuration,
            cs_transform: Dict[str, Dict[str, type]],
            pre_prefix: str = "",
            **kwargs,
        ) -> partial:
            """
            Get the configuration space for the predictor.

            Args:
                configuration (Configuration): The configuration object.
                cs_transform (Dict[str, Dict[str, type]]): The transformation dictionary for the configuration space.

            Returns:
                partial: A partial function to initialize the PairwiseRegressor with the given configuration.
            """
            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
            else:
                prefix = PairwiseRegressor.PREFIX

            model_class = cs_transform[f"{prefix}:model_class"][
                configuration[f"{prefix}:model_class"]
            ]

            model = model_class.get_from_configuration(
                configuration, pre_prefix=f"{prefix}:model_class"
            )

            return PairwiseRegressor(
                model_class=model,
                hierarchical_generator=None,
                **kwargs,
            )

PREFIX = 'pairwise_regressor' class-attribute instance-attribute

PairwiseRegressor is a selector that uses pairwise regression of algorithms to predict the best algorithm for a given instance.

Attributes:

Name Type Description
model_class type

The regression model class to be used for pairwise comparisons.

regressors List[AbstractPredictor]

List of trained regressors for pairwise comparisons.

__init__(model_class, **kwargs)

Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

Parameters:

Name Type Description Default
model_class type

The regression model class to be used for pairwise comparisons.

required
kwargs

Additional keyword arguments for the parent classes.

{}
Source code in asf/selectors/pairwise_regressor.py
def __init__(self, model_class: type, **kwargs):
    """
    Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

    Args:
        model_class (type): The regression model class to be used for pairwise comparisons.
        kwargs: Additional keyword arguments for the parent classes.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.regressors: List[AbstractPredictor] = []

generate_features(features)

Generates features for the pairwise regressors.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Type Description
DataFrame

pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.

Source code in asf/selectors/pairwise_regressor.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates features for the pairwise regressors.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
    """
    cnt = 0
    predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
    for i, algorithm in enumerate(self.algorithms):
        for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
            prediction = self.regressors[cnt].predict(features)
            predictions_sum[algorithm] += prediction
            predictions_sum[other_algorithm] -= prediction
            cnt += 1

    return predictions_sum

get_configuration_space(cs=None, cs_transform=None, model_class=[RandomForestRegressorWrapper, XGBoostRegressorWrapper], pre_prefix='', parent_param=None, parent_value=None, **kwargs) staticmethod

Get the configuration space for the predictor.

Parameters:

Name Type Description Default
cs Optional[ConfigurationSpace]

The configuration space to use. If None, a new one will be created.

None
cs_transform Optional[Dict[str, Dict[str, type]]]

A dictionary for transforming configuration space values.

None
model_class List[type]

The list of model classes to use. Defaults to [RandomForestRegressorWrapper, XGBoostRegressorWrapper].

[RandomForestRegressorWrapper, XGBoostRegressorWrapper]
hierarchical_generator Optional[List[AbstractFeatureGenerator]]

List of hierarchical feature generators.

required
kwargs

Additional keyword arguments to pass to the model class.

{}

Returns:

Type Description
Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]

Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]: The configuration space and its transformation dictionary.

Source code in asf/selectors/pairwise_regressor.py
@staticmethod
def get_configuration_space(
    cs: Optional[ConfigurationSpace] = None,
    cs_transform: Optional[Dict[str, Dict[str, type]]] = None,
    model_class: List[type[AbstractPredictor]] = [
        RandomForestRegressorWrapper,
        XGBoostRegressorWrapper,
    ],
    pre_prefix: str = "",
    parent_param: Optional[Hyperparameter] = None,
    parent_value: Optional[str] = None,
    **kwargs,
) -> Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]:
    """
    Get the configuration space for the predictor.

    Args:
        cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
        cs_transform (Optional[Dict[str, Dict[str, type]]]): A dictionary for transforming configuration space values.
        model_class (List[type]): The list of model classes to use. Defaults to [RandomForestRegressorWrapper, XGBoostRegressorWrapper].
        hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
        kwargs: Additional keyword arguments to pass to the model class.

    Returns:
        Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]: The configuration space and its transformation dictionary.
    """
    if cs is None:
        cs = ConfigurationSpace()

    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
    else:
        prefix = PairwiseRegressor.PREFIX

    model_class_param = Categorical(
        name=f"{prefix}:model_class",
        items=[str(c.__name__) for c in model_class],
    )

    cs_transform[f"{prefix}:model_class"] = {
        str(c.__name__): c for c in model_class
    }

    params = [model_class_param]

    if parent_param is not None:
        conditions = [
            EqualsCondition(
                child=param,
                parent=parent_param,
                value=parent_value,
            )
            for param in params
        ]
    else:
        conditions = []

    cs.add(params + conditions)

    for model in model_class:
        model.get_configuration_space(
            cs=cs,
            pre_prefix=f"{prefix}:model_class",
            parent_param=model_class_param,
            parent_value=str(model.__name__),
            **kwargs,
        )

    return cs, cs_transform

get_from_configuration(configuration, cs_transform, pre_prefix='', **kwargs) staticmethod

Get the configuration space for the predictor.

Parameters:

Name Type Description Default
configuration Configuration

The configuration object.

required
cs_transform Dict[str, Dict[str, type]]

The transformation dictionary for the configuration space.

required

Returns:

Name Type Description
partial partial

A partial function to initialize the PairwiseRegressor with the given configuration.

Source code in asf/selectors/pairwise_regressor.py
@staticmethod
def get_from_configuration(
    configuration: Configuration,
    cs_transform: Dict[str, Dict[str, type]],
    pre_prefix: str = "",
    **kwargs,
) -> partial:
    """
    Get the configuration space for the predictor.

    Args:
        configuration (Configuration): The configuration object.
        cs_transform (Dict[str, Dict[str, type]]): The transformation dictionary for the configuration space.

    Returns:
        partial: A partial function to initialize the PairwiseRegressor with the given configuration.
    """
    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
    else:
        prefix = PairwiseRegressor.PREFIX

    model_class = cs_transform[f"{prefix}:model_class"][
        configuration[f"{prefix}:model_class"]
    ]

    model = model_class.get_from_configuration(
        configuration, pre_prefix=f"{prefix}:model_class"
    )

    return PairwiseRegressor(
        model_class=model,
        hierarchical_generator=None,
        **kwargs,
    )

PerformanceModel

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

PerformanceModel is a class that predicts the performance of algorithms based on given features. It can handle both single-target and multi-target regression models.

Attributes:

Name Type Description
model_class Type

The class of the regression model to be used.

use_multi_target bool

Indicates whether to use multi-target regression.

normalize str

Method to normalize the performance data. Default is "log".

regressors Union[List, object]

List of trained regression models or a single model for multi-target regression.

algorithm_features Optional[DataFrame]

Features specific to each algorithm, if applicable.

algorithms List[str]

List of algorithm names.

maximize bool

Whether to maximize or minimize the performance metric.

budget float

Budget associated with the predictions.

Source code in asf/selectors/performance_model.py
class PerformanceModel(AbstractModelBasedSelector, AbstractFeatureGenerator):
    """
    PerformanceModel is a class that predicts the performance of algorithms
    based on given features. It can handle both single-target and multi-target
    regression models.

    Attributes:
        model_class (Type): The class of the regression model to be used.
        use_multi_target (bool): Indicates whether to use multi-target regression.
        normalize (str): Method to normalize the performance data. Default is "log".
        regressors (Union[List, object]): List of trained regression models or a single model for multi-target regression.
        algorithm_features (Optional[pd.DataFrame]): Features specific to each algorithm, if applicable.
        algorithms (List[str]): List of algorithm names.
        maximize (bool): Whether to maximize or minimize the performance metric.
        budget (float): Budget associated with the predictions.
    """

    def __init__(
        self,
        model_class: Type,
        use_multi_target: bool = False,
        normalize: str = "log",
        **kwargs,
    ):
        """
        Initializes the PerformanceModel with the given parameters.

        Args:
            model_class (Type): The class of the regression model to be used.
            use_multi_target (bool): Indicates whether to use multi-target regression.
            normalize (str): Method to normalize the performance data. Default is "log".
            **kwargs: Additional arguments for the parent classes.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.regressors: Union[List, object] = []
        self.use_multi_target: bool = use_multi_target
        self.normalize: str = normalize

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the regression models to the given features and performance data.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
            performance (pd.DataFrame): DataFrame containing the performance data.
        """
        assert self.algorithm_features is None, (
            "PerformanceModel does not use algorithm features."
        )
        if self.normalize == "log":
            performance = np.log10(performance + 1e-6)

        regressor_init_args = {}
        if "input_size" in inspect.signature(self.model_class).parameters.keys():
            regressor_init_args["input_size"] = features.shape[1]

        if self.use_multi_target:
            assert self.algorithm_features is None, (
                "PerformanceModel does not use algorithm features for multi-target regression."
            )
            self.regressors = self.model_class(**regressor_init_args)
            self.regressors.fit(features, performance)
        else:
            if self.algorithm_features is None:
                for i, algorithm in enumerate(self.algorithms):
                    algo_times = performance.iloc[:, i]

                    cur_model = self.model_class(**regressor_init_args)
                    cur_model.fit(features, algo_times)
                    self.regressors.append(cur_model)
            else:
                train_data = []
                for i, algorithm in enumerate(self.algorithms):
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[algorithm],
                        left_index=True,
                        right_index=True,
                    )
                    data = pd.merge(
                        data, performance.iloc[:, i], left_index=True, right_index=True
                    )
                    train_data.append(data)
                train_data = pd.concat(train_data)
                self.regressors = self.model_class(**regressor_init_args)
                self.regressors.fit(train_data.iloc[:, :-1], train_data.iloc[:, -1])

    def _predict(self, features: pd.DataFrame) -> Dict[str, List[tuple]]:
        """
        Predicts the performance of algorithms for the given features.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            Dict[str, List[tuple]]: A dictionary mapping instance names to the predicted best algorithm
            and the associated budget.
        """
        predictions = self.generate_features(features)

        return {
            instance_name: [
                (
                    self.algorithms[
                        np.argmax(predictions[i])
                        if self.maximize
                        else np.argmin(predictions[i])
                    ],
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> np.ndarray:
        """
        Generates predictions for the given features using the trained models.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            np.ndarray: Array containing the predictions for each algorithm.
        """
        if self.use_multi_target:
            predictions = self.regressors.predict(features)
        else:
            if self.algorithm_features is None:
                predictions = np.zeros((features.shape[0], len(self.algorithms)))
                for i, algorithm in enumerate(self.algorithms):
                    prediction = self.regressors[i].predict(features)
                    predictions[:, i] = prediction
            else:
                predictions = np.zeros((features.shape[0], len(self.algorithms)))
                for i, algorithm in enumerate(self.algorithms):
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[algorithm],
                        left_index=True,
                        right_index=True,
                    )
                    prediction = self.regressors.predict(data)
                    predictions[:, i] = prediction

        return predictions

__init__(model_class, use_multi_target=False, normalize='log', **kwargs)

Initializes the PerformanceModel with the given parameters.

Parameters:

Name Type Description Default
model_class Type

The class of the regression model to be used.

required
use_multi_target bool

Indicates whether to use multi-target regression.

False
normalize str

Method to normalize the performance data. Default is "log".

'log'
**kwargs

Additional arguments for the parent classes.

{}
Source code in asf/selectors/performance_model.py
def __init__(
    self,
    model_class: Type,
    use_multi_target: bool = False,
    normalize: str = "log",
    **kwargs,
):
    """
    Initializes the PerformanceModel with the given parameters.

    Args:
        model_class (Type): The class of the regression model to be used.
        use_multi_target (bool): Indicates whether to use multi-target regression.
        normalize (str): Method to normalize the performance data. Default is "log".
        **kwargs: Additional arguments for the parent classes.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.regressors: Union[List, object] = []
    self.use_multi_target: bool = use_multi_target
    self.normalize: str = normalize

generate_features(features)

Generates predictions for the given features using the trained models.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description
ndarray

np.ndarray: Array containing the predictions for each algorithm.

Source code in asf/selectors/performance_model.py
def generate_features(self, features: pd.DataFrame) -> np.ndarray:
    """
    Generates predictions for the given features using the trained models.

    Args:
        features (pd.DataFrame): DataFrame containing the feature data.

    Returns:
        np.ndarray: Array containing the predictions for each algorithm.
    """
    if self.use_multi_target:
        predictions = self.regressors.predict(features)
    else:
        if self.algorithm_features is None:
            predictions = np.zeros((features.shape[0], len(self.algorithms)))
            for i, algorithm in enumerate(self.algorithms):
                prediction = self.regressors[i].predict(features)
                predictions[:, i] = prediction
        else:
            predictions = np.zeros((features.shape[0], len(self.algorithms)))
            for i, algorithm in enumerate(self.algorithms):
                data = pd.merge(
                    features,
                    self.algorithm_features.loc[algorithm],
                    left_index=True,
                    right_index=True,
                )
                prediction = self.regressors.predict(data)
                predictions[:, i] = prediction

    return predictions

SelectorPipeline

A pipeline for applying a sequence of preprocessing, feature selection, and algorithm selection steps before fitting a final selector model.

Attributes:

Name Type Description
selector AbstractSelector

The main selector model to be used.

preprocessor Optional[Callable]

A callable for preprocessing the input data.

pre_solving Optional[Callable]

A callable for pre-solving steps.

feature_selector Optional[Callable]

A callable for feature selection.

algorithm_pre_selector Optional[Callable]

A callable for algorithm pre-selection.

budget Optional[Any]

The budget constraint for the selector.

maximize bool

Whether to maximize the objective function.

feature_groups Optional[Any]

Feature groups to be used by the selector.

Source code in asf/selectors/selector_pipeline.py
class SelectorPipeline:
    """
    A pipeline for applying a sequence of preprocessing, feature selection, and algorithm selection
    steps before fitting a final selector model.

    Attributes:
        selector (AbstractSelector): The main selector model to be used.
        preprocessor (Optional[Callable]): A callable for preprocessing the input data.
        pre_solving (Optional[Callable]): A callable for pre-solving steps.
        feature_selector (Optional[Callable]): A callable for feature selection.
        algorithm_pre_selector (Optional[Callable]): A callable for algorithm pre-selection.
        budget (Optional[Any]): The budget constraint for the selector.
        maximize (bool): Whether to maximize the objective function.
        feature_groups (Optional[Any]): Feature groups to be used by the selector.
    """

    def __init__(
        self,
        selector: AbstractSelector,
        preprocessor: Optional[Callable] = None,
        pre_solving: TransformerMixin = None,
        feature_selector: Optional[Callable] = None,
        algorithm_pre_selector: Optional[Callable] = None,
        budget: Optional[Any] = None,
        maximize: bool = False,
        feature_groups: Optional[Any] = None,
    ) -> None:
        """
        Initializes the SelectorPipeline.

        Args:
            selector (AbstractSelector): The main selector model to be used.
            preprocessor (Optional[Callable], optional): A callable for preprocessing the input data. Defaults to None.
            pre_solving (Optional[Callable], optional): A callable for pre-solving steps. Defaults to None.
            feature_selector (Optional[Callable], optional): A callable for feature selection. Defaults to None.
            algorithm_pre_selector (Optional[Callable], optional): A callable for algorithm pre-selection. Defaults to None.
            budget (Optional[Any], optional): The budget constraint for the selector. Defaults to None.
            maximize (bool, optional): Whether to maximize the objective function. Defaults to False.
            feature_groups (Optional[Any], optional): Feature groups to be used by the selector. Defaults to None.
        """
        self.selector = selector
        self.preprocessor = preprocessor
        self.pre_solving = pre_solving
        self.feature_selector = feature_selector
        self.algorithm_pre_selector = algorithm_pre_selector
        self.budget = budget
        self.maximize = maximize

    def fit(self, X: Any, y: Any) -> None:
        """
        Fits the pipeline to the input data.

        Args:
            X (Any): The input features.
            y (Any): The target labels.
        """
        if self.preprocessor:
            X = self.preprocessor.fit_transform(X)

        if self.algorithm_pre_selector:
            y = self.algorithm_pre_selector.fit_transform(y)

        if self.feature_selector:
            X, y = self.feature_selector.fit_transform(X, y)

        if self.pre_solving:
            self.pre_solving.fit(X, y)

        self.selector.fit(X, y)

    def predict(self, X: Any) -> Any:
        """
        Makes predictions using the fitted pipeline.

        Args:
            X (Any): The input features.

        Returns:
            Any: The predictions made by the selector.
        """
        if self.preprocessor:
            X = self.preprocessor.transform(X)

        if self.pre_solving:
            X = self.pre_solving.transform(X)

        if self.feature_selector:
            X = self.feature_selector.transform(X)

        return self.selector.predict(X)

    def save(self, path: str) -> None:
        """
        Saves the pipeline to a file.

        Args:
            path (str): The file path where the pipeline will be saved.
        """
        import joblib

        joblib.dump(self, path)

    @staticmethod
    def load(path: str) -> "SelectorPipeline":
        """
        Loads a pipeline from a file.

        Args:
            path (str): The file path from which the pipeline will be loaded.

        Returns:
            SelectorPipeline: The loaded pipeline.
        """
        import joblib

        return joblib.load(path)

__init__(selector, preprocessor=None, pre_solving=None, feature_selector=None, algorithm_pre_selector=None, budget=None, maximize=False, feature_groups=None)

Initializes the SelectorPipeline.

Parameters:

Name Type Description Default
selector AbstractSelector

The main selector model to be used.

required
preprocessor Optional[Callable]

A callable for preprocessing the input data. Defaults to None.

None
pre_solving Optional[Callable]

A callable for pre-solving steps. Defaults to None.

None
feature_selector Optional[Callable]

A callable for feature selection. Defaults to None.

None
algorithm_pre_selector Optional[Callable]

A callable for algorithm pre-selection. Defaults to None.

None
budget Optional[Any]

The budget constraint for the selector. Defaults to None.

None
maximize bool

Whether to maximize the objective function. Defaults to False.

False
feature_groups Optional[Any]

Feature groups to be used by the selector. Defaults to None.

None
Source code in asf/selectors/selector_pipeline.py
def __init__(
    self,
    selector: AbstractSelector,
    preprocessor: Optional[Callable] = None,
    pre_solving: TransformerMixin = None,
    feature_selector: Optional[Callable] = None,
    algorithm_pre_selector: Optional[Callable] = None,
    budget: Optional[Any] = None,
    maximize: bool = False,
    feature_groups: Optional[Any] = None,
) -> None:
    """
    Initializes the SelectorPipeline.

    Args:
        selector (AbstractSelector): The main selector model to be used.
        preprocessor (Optional[Callable], optional): A callable for preprocessing the input data. Defaults to None.
        pre_solving (Optional[Callable], optional): A callable for pre-solving steps. Defaults to None.
        feature_selector (Optional[Callable], optional): A callable for feature selection. Defaults to None.
        algorithm_pre_selector (Optional[Callable], optional): A callable for algorithm pre-selection. Defaults to None.
        budget (Optional[Any], optional): The budget constraint for the selector. Defaults to None.
        maximize (bool, optional): Whether to maximize the objective function. Defaults to False.
        feature_groups (Optional[Any], optional): Feature groups to be used by the selector. Defaults to None.
    """
    self.selector = selector
    self.preprocessor = preprocessor
    self.pre_solving = pre_solving
    self.feature_selector = feature_selector
    self.algorithm_pre_selector = algorithm_pre_selector
    self.budget = budget
    self.maximize = maximize

fit(X, y)

Fits the pipeline to the input data.

Parameters:

Name Type Description Default
X Any

The input features.

required
y Any

The target labels.

required
Source code in asf/selectors/selector_pipeline.py
def fit(self, X: Any, y: Any) -> None:
    """
    Fits the pipeline to the input data.

    Args:
        X (Any): The input features.
        y (Any): The target labels.
    """
    if self.preprocessor:
        X = self.preprocessor.fit_transform(X)

    if self.algorithm_pre_selector:
        y = self.algorithm_pre_selector.fit_transform(y)

    if self.feature_selector:
        X, y = self.feature_selector.fit_transform(X, y)

    if self.pre_solving:
        self.pre_solving.fit(X, y)

    self.selector.fit(X, y)

load(path) staticmethod

Loads a pipeline from a file.

Parameters:

Name Type Description Default
path str

The file path from which the pipeline will be loaded.

required

Returns:

Name Type Description
SelectorPipeline SelectorPipeline

The loaded pipeline.

Source code in asf/selectors/selector_pipeline.py
@staticmethod
def load(path: str) -> "SelectorPipeline":
    """
    Loads a pipeline from a file.

    Args:
        path (str): The file path from which the pipeline will be loaded.

    Returns:
        SelectorPipeline: The loaded pipeline.
    """
    import joblib

    return joblib.load(path)

predict(X)

Makes predictions using the fitted pipeline.

Parameters:

Name Type Description Default
X Any

The input features.

required

Returns:

Name Type Description
Any Any

The predictions made by the selector.

Source code in asf/selectors/selector_pipeline.py
def predict(self, X: Any) -> Any:
    """
    Makes predictions using the fitted pipeline.

    Args:
        X (Any): The input features.

    Returns:
        Any: The predictions made by the selector.
    """
    if self.preprocessor:
        X = self.preprocessor.transform(X)

    if self.pre_solving:
        X = self.pre_solving.transform(X)

    if self.feature_selector:
        X = self.feature_selector.transform(X)

    return self.selector.predict(X)

save(path)

Saves the pipeline to a file.

Parameters:

Name Type Description Default
path str

The file path where the pipeline will be saved.

required
Source code in asf/selectors/selector_pipeline.py
def save(self, path: str) -> None:
    """
    Saves the pipeline to a file.

    Args:
        path (str): The file path where the pipeline will be saved.
    """
    import joblib

    joblib.dump(self, path)

SimpleRanking

Bases: AbstractModelBasedSelector

Algorithm Selection via Ranking (Oentaryo et al.) + algo features (optional). Attributes: model_class: The class of the classification model to be used. metadata: Metadata containing information about the algorithms. classifier: The trained classification model.

Source code in asf/selectors/simple_ranking.py
class SimpleRanking(AbstractModelBasedSelector):
    """
    Algorithm Selection via Ranking (Oentaryo et al.) + algo features (optional).
    Attributes:
        model_class: The class of the classification model to be used.
        metadata: Metadata containing information about the algorithms.
        classifier: The trained classification model.
    """

    def __init__(self, model_class: AbstractPredictor, **kwargs):
        """
        Initializes the MultiClassClassifier with the given parameters.

        Args:
            model_class: The class of the classification model to be used. Assumes XGBoost API.
            metadata: Metadata containing information about the algorithms.
            hierarchical_generator: Feature generator to be used.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        self.classifier = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
    ):
        """
        Fits the classification model to the given feature and performance data.

        Args:
            features: DataFrame containing the feature data.
            performance: DataFrame containing the performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(np.array(self.algorithms).reshape(-1, 1)),
                index=self.algorithms,
                columns=[f"algo_{i}" for i in range(len(self.algorithms))],
            )

        performance = performance[self.algorithms]
        features = features[self.features]
        features.index.name = "INSTANCE_ID"

        self.algorithm_features.index.name = "ALGORITHM"

        total_features = pd.merge(
            features.reset_index(), self.algorithm_features.reset_index(), how="cross"
        )

        stacked_performance = performance.stack().reset_index()
        stacked_performance.columns = [
            "INSTANCE_ID",
            "ALGORITHM",
            "PERFORMANCE",
        ]
        merged = total_features.merge(
            stacked_performance,
            right_on=["INSTANCE_ID", "ALGORITHM"],
            left_on=["INSTANCE_ID", "ALGORITHM"],
            how="left",
        )

        gdfs = []
        for group, gdf in merged.groupby("INSTANCE_ID"):
            gdf["rank"] = gdf["PERFORMANCE"].rank(
                ascending=True, method="max" if self.maximize else "min"
            )
            gdfs.append(gdf)
        merged = pd.concat(gdfs)

        total_features = merged.drop(
            columns=[
                "INSTANCE_ID",
                "ALGORITHM",
                "PERFORMANCE",
                "rank",
                self.algorithm_features.index.name,
            ]
        )
        qid = merged["INSTANCE_ID"].values
        encoder = OrdinalEncoder()
        qid = encoder.fit_transform(qid.reshape(-1, 1)).flatten()

        self.classifier = self.model_class()
        self.classifier.fit(
            total_features,
            merged["rank"],
            qid=qid,
        )

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the best algorithm for each instance in the given feature data.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            A dictionary mapping instance names to the predicted best algorithm.
        """

        features = features[self.features]

        total_features = pd.merge(
            features.reset_index(), self.algorithm_features.reset_index(), how="cross"
        )

        predictions = self.classifier.predict(
            total_features[list(self.features) + list(self.algorithm_features.columns)]
        )

        scheds = {}
        for instance_name in features.index.unique():
            ids = total_features[features.index.name] == instance_name
            chosen = predictions[ids].argmin()
            scheds[instance_name] = [
                (
                    total_features.loc[ids].iloc[chosen]["ALGORITHM"],
                    self.budget,
                )
            ]

        return scheds

__init__(model_class, **kwargs)

Initializes the MultiClassClassifier with the given parameters.

Parameters:

Name Type Description Default
model_class AbstractPredictor

The class of the classification model to be used. Assumes XGBoost API.

required
metadata

Metadata containing information about the algorithms.

required
hierarchical_generator

Feature generator to be used.

required
Source code in asf/selectors/simple_ranking.py
def __init__(self, model_class: AbstractPredictor, **kwargs):
    """
    Initializes the MultiClassClassifier with the given parameters.

    Args:
        model_class: The class of the classification model to be used. Assumes XGBoost API.
        metadata: Metadata containing information about the algorithms.
        hierarchical_generator: Feature generator to be used.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    self.classifier = None

tune_selector(X, y, selector_class, selector_kwargs={}, preprocessing_class=None, pre_solving=None, feature_selector=None, algorithm_pre_selector=None, budget=None, maximize=False, feature_groups=None, output_dir='./smac_output', smac_metric=running_time_selector_performance, smac_kwargs={}, smac_scenario_kwargs={}, runcount_limit=100, timeout=np.inf, seed=0, cv=10, groups=None)

Tunes a selector model using SMAC for hyperparameter optimization.

Parameters:

Name Type Description Default
X DataFrame

Feature matrix for training and testing.

required
y DataFrame

Target matrix for training and testing.

required
selector_class list[AbstractSelector]

List of selector classes to tune. Defaults to [PairwiseClassifier, PairwiseRegressor].

required
selector_space_kwargs dict

Additional arguments for the selector's configuration space.

required
selector_kwargs dict

Additional arguments for the selector's instantiation.

{}
preprocessing_class AbstractPreprocessor

Preprocessing class to apply before selector. Defaults to None.

None
pre_solving object

Pre-solving strategy to use. Defaults to None.

None
feature_selector object

Feature selector to use. Defaults to None.

None
algorithm_pre_selector object

Algorithm pre-selector to use. Defaults to None.

None
budget float

Budget for the selector. Defaults to None.

None
maximize bool

Whether to maximize the metric. Defaults to False.

False
feature_groups list

Feature groups to consider. Defaults to None.

None
output_dir str

Directory to store SMAC output. Defaults to "./smac_output".

'./smac_output'
smac_metric callable

Metric function to evaluate the selector's performance. Defaults to running_time_selector_performance.

running_time_selector_performance
smac_kwargs dict

Additional arguments for SMAC's optimization facade.

{}
smac_scenario_kwargs dict

Additional arguments for SMAC's scenario configuration.

{}
runcount_limit int

Maximum number of function evaluations. Defaults to 100.

100
timeout float

Maximum wall-clock time for optimization. Defaults to np.inf.

inf
seed int

Random seed for reproducibility. Defaults to None.

0
cv int

Number of cross-validation splits. Defaults to 10.

10
groups ndarray

Group labels for cross-validation. Defaults to None.

None

Returns:

Name Type Description
SelectorPipeline SelectorPipeline

A pipeline with the best-tuned selector and preprocessing steps.

Source code in asf/selectors/selector_tuner.py
def tune_selector(
    X: pd.DataFrame,
    y: pd.DataFrame,
    selector_class: list[AbstractSelector]
    | AbstractSelector
    | list[tuple[AbstractSelector, dict]],
    selector_kwargs: dict = {},
    preprocessing_class: TransformerMixin = None,
    pre_solving: object = None,
    feature_selector: object = None,
    algorithm_pre_selector: object = None,
    budget: float = None,
    maximize: bool = False,
    feature_groups: list = None,
    output_dir: str = "./smac_output",
    smac_metric: callable = running_time_selector_performance,
    smac_kwargs: dict = {},
    smac_scenario_kwargs: dict = {},
    runcount_limit: int = 100,
    timeout: float = np.inf,
    seed: int = 0,
    cv: int = 10,
    groups: np.ndarray = None,
) -> SelectorPipeline:
    """
    Tunes a selector model using SMAC for hyperparameter optimization.

    Parameters:
        X (pd.DataFrame): Feature matrix for training and testing.
        y (pd.DataFrame): Target matrix for training and testing.
        selector_class (list[AbstractSelector]): List of selector classes to tune. Defaults to [PairwiseClassifier, PairwiseRegressor].
        selector_space_kwargs (dict): Additional arguments for the selector's configuration space.
        selector_kwargs (dict): Additional arguments for the selector's instantiation.
        preprocessing_class (AbstractPreprocessor, optional): Preprocessing class to apply before selector. Defaults to None.
        pre_solving (object, optional): Pre-solving strategy to use. Defaults to None.
        feature_selector (object, optional): Feature selector to use. Defaults to None.
        algorithm_pre_selector (object, optional): Algorithm pre-selector to use. Defaults to None.
        budget (float, optional): Budget for the selector. Defaults to None.
        maximize (bool): Whether to maximize the metric. Defaults to False.
        feature_groups (list, optional): Feature groups to consider. Defaults to None.
        output_dir (str): Directory to store SMAC output. Defaults to "./smac_output".
        smac_metric (callable): Metric function to evaluate the selector's performance. Defaults to `running_time_selector_performance`.
        smac_kwargs (dict): Additional arguments for SMAC's optimization facade.
        smac_scenario_kwargs (dict): Additional arguments for SMAC's scenario configuration.
        runcount_limit (int): Maximum number of function evaluations. Defaults to 100.
        timeout (float): Maximum wall-clock time for optimization. Defaults to np.inf.
        seed (int, optional): Random seed for reproducibility. Defaults to None.
        cv (int): Number of cross-validation splits. Defaults to 10.
        groups (np.ndarray, optional): Group labels for cross-validation. Defaults to None.

    Returns:
        SelectorPipeline: A pipeline with the best-tuned selector and preprocessing steps.
    """
    assert CONFIGSPACE_AVAILABLE, (
        "SMAC is not installed. Please install it to use this function via pip install asf-lib[tune]."
    )
    if type(selector_class) is not list:
        selector_class = [selector_class]

    cs = ConfigurationSpace()
    cs_transform = {}

    if type(selector_class[0]) is tuple:
        selector_param = Categorical(
            name="selector",
            items=[str(c[0].__name__) for c in selector_class],
        )
        cs_transform["selector"] = {str(c[0].__name__): c[0] for c in selector_class}
    else:
        selector_param = Categorical(
            name="selector",
            items=[str(c.__name__) for c in selector_class],
        )
        cs_transform["selector"] = {str(c.__name__): c for c in selector_class}
    cs.add(selector_param)

    for selector in selector_class:
        if type(selector) is tuple:
            selector_space_kwargs = selector[1]
            selector = selector[0]
        else:
            selector_space_kwargs = {}

        cs, cs_transform = selector.get_configuration_space(
            cs=cs,
            cs_transform=cs_transform,
            parent_param=selector_param,
            parent_value=str(selector.__name__),
            **selector_space_kwargs,
        )

    scenario = Scenario(
        configspace=cs,
        n_trials=runcount_limit,
        walltime_limit=timeout,
        deterministic=True,
        output_directory=output_dir,
        seed=seed,
        **smac_scenario_kwargs,
    )

    def target_function(config, seed):
        if groups is not None:
            kfold = GroupKFoldShuffle(n_splits=cv, shuffle=True, random_state=seed)
        else:
            kfold = KFold(n_splits=cv, shuffle=True, random_state=seed)

        scores = []
        for train_idx, test_idx in kfold.split(X, y, groups):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            selector = SelectorPipeline(
                selector=cs_transform["selector"][
                    config["selector"]
                ].get_from_configuration(config, cs_transform, **selector_kwargs),
                preprocessor=preprocessing_class,
                pre_solving=pre_solving,
                feature_selector=feature_selector,
                algorithm_pre_selector=algorithm_pre_selector,
                budget=budget,
                maximize=maximize,
                feature_groups=feature_groups,
            )
            selector.fit(X_train, y_train)

            y_pred = selector.predict(X_test)
            score = smac_metric(y_pred, y_test)
            scores.append(score)

        return np.mean(scores)

    smac = HyperparameterOptimizationFacade(scenario, target_function, **smac_kwargs)
    best_config = smac.optimize()

    del smac  # clean up SMAC to free memory and delete dask client
    return SelectorPipeline(
        selector=cs_transform["selector"][
            best_config["selector"]
        ].get_from_configuration(best_config, cs_transform, **selector_kwargs),
        preprocessor=preprocessing_class,
        pre_solving=pre_solving,
        feature_selector=feature_selector,
        algorithm_pre_selector=algorithm_pre_selector,
        budget=budget,
        maximize=maximize,
        feature_groups=feature_groups,
    )