Skip to content

Selectors

AbstractFeatureGenerator

AbstractFeatureGenerator is a base class for generating additional features based on a set of base features. Subclasses should implement the methods to define specific feature generation logic.

Source code in asf/selectors/feature_generator.py
class AbstractFeatureGenerator:
    """
    AbstractFeatureGenerator is a base class for generating additional features
    based on a set of base features. Subclasses should implement the methods
    to define specific feature generation logic.
    """

    def __init__(self) -> None:
        """
        Initialize the AbstractFeatureGenerator.
        """
        pass

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
        Generate additional features based on the provided base features.

        Parameters
        ----------
        base_features : pd.DataFrame
            The input DataFrame containing the base features.

        Returns
        -------
        pd.DataFrame
            A DataFrame containing the generated features.

        Raises
        ------
        NotImplementedError
            If the method is not implemented in a subclass.
        """
        raise NotImplementedError(
            "generate_features() must be implemented in a subclass"
        )

__init__()

Initialize the AbstractFeatureGenerator.

Source code in asf/selectors/feature_generator.py
def __init__(self) -> None:
    """
    Initialize the AbstractFeatureGenerator.
    """
    pass

generate_features(base_features)

Generate additional features based on the provided base features.

Parameters

base_features : pd.DataFrame The input DataFrame containing the base features.

Returns

pd.DataFrame A DataFrame containing the generated features.

Raises

NotImplementedError If the method is not implemented in a subclass.

Source code in asf/selectors/feature_generator.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
    Generate additional features based on the provided base features.

    Parameters
    ----------
    base_features : pd.DataFrame
        The input DataFrame containing the base features.

    Returns
    -------
    pd.DataFrame
        A DataFrame containing the generated features.

    Raises
    ------
    NotImplementedError
        If the method is not implemented in a subclass.
    """
    raise NotImplementedError(
        "generate_features() must be implemented in a subclass"
    )

AbstractModelBasedSelector

Bases: AbstractSelector

An abstract base class for selectors that utilize a machine learning model for selection purposes. This class provides functionality to initialize with a model class, save the selector to a file, and load it back.

Attributes:

Name Type Description
model_class Callable

A callable that represents the model class to be used. If the provided model_class is a subclass of ClassifierMixin or RegressorMixin, it is wrapped using SklearnWrapper.

Methods:

Name Description
save

Union[str, Path]) -> None: Saves the current instance of the selector to the specified file path.

load

Union[str, Path]) -> "AbstractModelBasedSelector": Loads a previously saved instance of the selector from the specified file path.

Source code in asf/selectors/abstract_model_based_selector.py
class AbstractModelBasedSelector(AbstractSelector):
    """
    An abstract base class for selectors that utilize a machine learning model
    for selection purposes. This class provides functionality to initialize
    with a model class, save the selector to a file, and load it back.

    Attributes:
        model_class (Callable): A callable that represents the model class to
            be used. If the provided model_class is a subclass of
            `ClassifierMixin` or `RegressorMixin`, it is wrapped using
            `SklearnWrapper`.

    Methods:
        save(path: Union[str, Path]) -> None:
            Saves the current instance of the selector to the specified file path.
        load(path: Union[str, Path]) -> "AbstractModelBasedSelector":
            Loads a previously saved instance of the selector from the specified file path.
    """

    def __init__(self, model_class: Type[AbstractPredictor], **kwargs: Any) -> None:
        """
        Initializes the AbstractModelBasedSelector.

        Args:
            model_class (Union[Type, Callable]): The model class or a callable
                that returns a model instance. If a scikit-learn compatible
                class is provided, it's wrapped with SklearnWrapper.
            **kwargs (Any): Additional keyword arguments passed to the
                parent class initializer.
        """
        super().__init__(**kwargs)

        if isinstance(model_class, type) and issubclass(
            model_class, (ClassifierMixin, RegressorMixin)
        ):
            self.model_class: Callable = partial(SklearnWrapper, model_class)
        else:
            self.model_class: Callable = model_class

    def save(self, path: Union[str, Path]) -> None:
        """
        Saves the selector instance to the specified file path.

        Args:
            path (Union[str, Path]): The file path to save the selector.
        """
        joblib.dump(self, path)

    @staticmethod
    def load(path: Union[str, Path]) -> "AbstractModelBasedSelector":
        """
        Loads a selector instance from the specified file path.

        Args:
            path (Union[str, Path]): The file path to load the selector from.

        Returns:
            AbstractModelBasedSelector: The loaded selector instance.
        """
        return joblib.load(path)

__init__(model_class, **kwargs)

Initializes the AbstractModelBasedSelector.

Parameters:

Name Type Description Default
model_class Union[Type, Callable]

The model class or a callable that returns a model instance. If a scikit-learn compatible class is provided, it's wrapped with SklearnWrapper.

required
**kwargs Any

Additional keyword arguments passed to the parent class initializer.

{}
Source code in asf/selectors/abstract_model_based_selector.py
def __init__(self, model_class: Type[AbstractPredictor], **kwargs: Any) -> None:
    """
    Initializes the AbstractModelBasedSelector.

    Args:
        model_class (Union[Type, Callable]): The model class or a callable
            that returns a model instance. If a scikit-learn compatible
            class is provided, it's wrapped with SklearnWrapper.
        **kwargs (Any): Additional keyword arguments passed to the
            parent class initializer.
    """
    super().__init__(**kwargs)

    if isinstance(model_class, type) and issubclass(
        model_class, (ClassifierMixin, RegressorMixin)
    ):
        self.model_class: Callable = partial(SklearnWrapper, model_class)
    else:
        self.model_class: Callable = model_class

load(path) staticmethod

Loads a selector instance from the specified file path.

Parameters:

Name Type Description Default
path Union[str, Path]

The file path to load the selector from.

required

Returns:

Name Type Description
AbstractModelBasedSelector AbstractModelBasedSelector

The loaded selector instance.

Source code in asf/selectors/abstract_model_based_selector.py
@staticmethod
def load(path: Union[str, Path]) -> "AbstractModelBasedSelector":
    """
    Loads a selector instance from the specified file path.

    Args:
        path (Union[str, Path]): The file path to load the selector from.

    Returns:
        AbstractModelBasedSelector: The loaded selector instance.
    """
    return joblib.load(path)

save(path)

Saves the selector instance to the specified file path.

Parameters:

Name Type Description Default
path Union[str, Path]

The file path to save the selector.

required
Source code in asf/selectors/abstract_model_based_selector.py
def save(self, path: Union[str, Path]) -> None:
    """
    Saves the selector instance to the specified file path.

    Args:
        path (Union[str, Path]): The file path to save the selector.
    """
    joblib.dump(self, path)

AbstractSelector

AbstractSelector is a base class for implementing feature selection algorithms. It provides a framework for fitting, predicting, and managing hierarchical feature generators and configuration spaces.

Attributes

maximize : bool Indicates whether the objective is to maximize or minimize the performance metric. budget : int or None The budget for the selector, if applicable. feature_groups : list[str] or None Groups of features to be considered during selection. hierarchical_generator : AbstractFeatureGenerator or None A generator for hierarchical features, if applicable. algorithm_features : pd.DataFrame or None Additional features related to algorithms, if provided.

Source code in asf/selectors/abstract_selector.py
class AbstractSelector:
    """
    AbstractSelector is a base class for implementing feature selection algorithms.
    It provides a framework for fitting, predicting, and managing hierarchical feature
    generators and configuration spaces.

    Attributes
    ----------
    maximize : bool
        Indicates whether the objective is to maximize or minimize the performance metric.
    budget : int or None
        The budget for the selector, if applicable.
    feature_groups : list[str] or None
        Groups of features to be considered during selection.
    hierarchical_generator : AbstractFeatureGenerator or None
        A generator for hierarchical features, if applicable.
    algorithm_features : pd.DataFrame or None
        Additional features related to algorithms, if provided.
    """

    def __init__(
        self,
        budget: int | None = None,
        maximize: bool = False,
        feature_groups: list[str] | None = None,
        hierarchical_generator: AbstractFeatureGenerator | None = None,
    ):
        """
        Initialize the AbstractSelector.

        Parameters
        ----------
        budget : int or None, optional
            The budget for the selector, if applicable. Defaults to None.
        maximize : bool, optional
            Indicates whether to maximize the performance metric. Defaults to False.
        feature_groups : list[str] or None, optional
            Groups of features to be considered during selection. Defaults to None.
        hierarchical_generator : AbstractFeatureGenerator or None, optional
            A generator for hierarchical features, if applicable. Defaults to None.
        """
        self.maximize = maximize
        self.budget = budget
        self.feature_groups = feature_groups
        self.hierarchical_generator = hierarchical_generator
        self.algorithm_features: pd.DataFrame | None = None

    def fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        algorithm_features: pd.DataFrame | None = None,
        **kwargs,
    ) -> None:
        """
        Fit the selector to the given features and performance data.

        Parameters
        ----------
        features : pd.DataFrame
            The input features for the selector.
        performance : pd.DataFrame
            The performance data corresponding to the features.
        algorithm_features : pd.DataFrame or None, optional
            Additional features related to algorithms, if provided. Defaults to None.
        **kwargs : dict
            Additional keyword arguments for fitting.
        """
        if isinstance(features, np.ndarray) and isinstance(performance, np.ndarray):
            features = pd.DataFrame(
                features,
                index=range(len(features)),
                columns=[f"f_{i}" for i in range(features.shape[1])],
            )
            performance = pd.DataFrame(
                performance,
                index=range(len(performance)),
                columns=[f"algo_{i}" for i in range(performance.shape[1])],
            )
        elif isinstance(features, pd.DataFrame) and isinstance(
            performance, pd.DataFrame
        ):
            pass
        else:
            raise ValueError(
                "features and performance must be either numpy arrays or pandas DataFrames"
            )

        if self.hierarchical_generator is not None:
            self.hierarchical_generator.fit(features, performance, algorithm_features)
            features = pd.concat(
                [features, self.hierarchical_generator.generate_features(features)],
                axis=1,
            )
        self.algorithms: list[str] = performance.columns.to_list()
        self.features: list[str] = features.columns.to_list()
        self.algorithm_features = algorithm_features
        self._fit(features, performance, **kwargs)

    def predict(
        self, features: pd.DataFrame, performance: Optional[pd.DataFrame] = None
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the ranking or selection of features for the given input features.

        Parameters
        ----------
        features : pd.DataFrame
            The input features for prediction.
        performance : pd.DataFrame or None, optional
            The (partial) performance data corresponding to the features, if applicable. Defaults to None.

        Returns
        -------
        dict[str, list[tuple[str, float]]]
            A dictionary where keys are algorithm names and values are lists of tuples
            containing feature names and their corresponding scores.
        """
        if self.hierarchical_generator is not None:
            features = pd.concat(
                [features, self.hierarchical_generator.generate_features(features)],
                axis=1,
            )
        if performance is None:
            return self._predict(features)
        else:
            return self._predict(features, performance)

    def save(self, path: str) -> None:
        """
        Save the selector's state to the specified path.

        Parameters
        ----------
        path : str
            The file path where the selector's state will be saved.
        """
        pass

    def load(self, path: str) -> None:
        """
        Load the selector's state from the specified path.

        Parameters
        ----------
        path : str
            The file path from which the selector's state will be loaded.
        """
        pass

    if CONFIGSPACE_AVAILABLE:

        @staticmethod
        def get_configuration_space(
            cs: ConfigurationSpace | None = None, **kwargs
        ) -> ConfigurationSpace:
            """
            Get the configuration space for the selector.

            Parameters
            ----------
            cs : ConfigurationSpace or None, optional
                The configuration space to use. If None, a new one will be created.
            **kwargs : dict
                Additional keyword arguments for configuration space creation.

            Returns
            -------
            ConfigurationSpace
                The configuration space for the selector.

            Raises
            ------
            NotImplementedError
                If the method is not implemented in a subclass.
            """
            raise NotImplementedError(
                "get_configuration_space() is not implemented for this selector"
            )

        @staticmethod
        def get_from_configuration(configuration: Configuration) -> "AbstractSelector":
            """
            Create a selector instance from a configuration.

            Parameters
            ----------
            configuration : Configuration
                The configuration object.

            Returns
            -------
            AbstractSelector
                The selector instance.

            Raises
            ------
            NotImplementedError
                If the method is not implemented in a subclass.
            """
            raise NotImplementedError(
                "get_from_configuration() is not implemented for this selector"
            )

        @staticmethod
        def _add_hierarchical_generator_space(
            cs: ConfigurationSpace,
            hierarchical_generator: list[AbstractFeatureGenerator] | None = None,
            **kwargs,
        ) -> ConfigurationSpace:
            """
            Add the hierarchical generator space to the configuration space.

            Parameters
            ----------
            cs : ConfigurationSpace
                The configuration space to use.
            hierarchical_generator : list[AbstractFeatureGenerator] or None, optional
                The list of hierarchical generators to add. Defaults to None.
            **kwargs : dict
                Additional keyword arguments to pass to the model class.

            Returns
            -------
            ConfigurationSpace
                The updated configuration space.
            """
            if hierarchical_generator is not None:
                if "hierarchical_generator" in cs:
                    return

                cs.add(
                    Categorical(
                        name="hierarchical_generator",
                        items=hierarchical_generator,
                    )
                )

                for generator in hierarchical_generator:
                    generator.get_configuration_space(cs=cs, **kwargs)

            return cs

__init__(budget=None, maximize=False, feature_groups=None, hierarchical_generator=None)

Initialize the AbstractSelector.

Parameters

budget : int or None, optional The budget for the selector, if applicable. Defaults to None. maximize : bool, optional Indicates whether to maximize the performance metric. Defaults to False. feature_groups : list[str] or None, optional Groups of features to be considered during selection. Defaults to None. hierarchical_generator : AbstractFeatureGenerator or None, optional A generator for hierarchical features, if applicable. Defaults to None.

Source code in asf/selectors/abstract_selector.py
def __init__(
    self,
    budget: int | None = None,
    maximize: bool = False,
    feature_groups: list[str] | None = None,
    hierarchical_generator: AbstractFeatureGenerator | None = None,
):
    """
    Initialize the AbstractSelector.

    Parameters
    ----------
    budget : int or None, optional
        The budget for the selector, if applicable. Defaults to None.
    maximize : bool, optional
        Indicates whether to maximize the performance metric. Defaults to False.
    feature_groups : list[str] or None, optional
        Groups of features to be considered during selection. Defaults to None.
    hierarchical_generator : AbstractFeatureGenerator or None, optional
        A generator for hierarchical features, if applicable. Defaults to None.
    """
    self.maximize = maximize
    self.budget = budget
    self.feature_groups = feature_groups
    self.hierarchical_generator = hierarchical_generator
    self.algorithm_features: pd.DataFrame | None = None

fit(features, performance, algorithm_features=None, **kwargs)

Fit the selector to the given features and performance data.

Parameters

features : pd.DataFrame The input features for the selector. performance : pd.DataFrame The performance data corresponding to the features. algorithm_features : pd.DataFrame or None, optional Additional features related to algorithms, if provided. Defaults to None. **kwargs : dict Additional keyword arguments for fitting.

Source code in asf/selectors/abstract_selector.py
def fit(
    self,
    features: pd.DataFrame,
    performance: pd.DataFrame,
    algorithm_features: pd.DataFrame | None = None,
    **kwargs,
) -> None:
    """
    Fit the selector to the given features and performance data.

    Parameters
    ----------
    features : pd.DataFrame
        The input features for the selector.
    performance : pd.DataFrame
        The performance data corresponding to the features.
    algorithm_features : pd.DataFrame or None, optional
        Additional features related to algorithms, if provided. Defaults to None.
    **kwargs : dict
        Additional keyword arguments for fitting.
    """
    if isinstance(features, np.ndarray) and isinstance(performance, np.ndarray):
        features = pd.DataFrame(
            features,
            index=range(len(features)),
            columns=[f"f_{i}" for i in range(features.shape[1])],
        )
        performance = pd.DataFrame(
            performance,
            index=range(len(performance)),
            columns=[f"algo_{i}" for i in range(performance.shape[1])],
        )
    elif isinstance(features, pd.DataFrame) and isinstance(
        performance, pd.DataFrame
    ):
        pass
    else:
        raise ValueError(
            "features and performance must be either numpy arrays or pandas DataFrames"
        )

    if self.hierarchical_generator is not None:
        self.hierarchical_generator.fit(features, performance, algorithm_features)
        features = pd.concat(
            [features, self.hierarchical_generator.generate_features(features)],
            axis=1,
        )
    self.algorithms: list[str] = performance.columns.to_list()
    self.features: list[str] = features.columns.to_list()
    self.algorithm_features = algorithm_features
    self._fit(features, performance, **kwargs)

get_configuration_space(cs=None, **kwargs) staticmethod

Get the configuration space for the selector.

Parameters

cs : ConfigurationSpace or None, optional The configuration space to use. If None, a new one will be created. **kwargs : dict Additional keyword arguments for configuration space creation.

Returns

ConfigurationSpace The configuration space for the selector.

Raises

NotImplementedError If the method is not implemented in a subclass.

Source code in asf/selectors/abstract_selector.py
@staticmethod
def get_configuration_space(
    cs: ConfigurationSpace | None = None, **kwargs
) -> ConfigurationSpace:
    """
    Get the configuration space for the selector.

    Parameters
    ----------
    cs : ConfigurationSpace or None, optional
        The configuration space to use. If None, a new one will be created.
    **kwargs : dict
        Additional keyword arguments for configuration space creation.

    Returns
    -------
    ConfigurationSpace
        The configuration space for the selector.

    Raises
    ------
    NotImplementedError
        If the method is not implemented in a subclass.
    """
    raise NotImplementedError(
        "get_configuration_space() is not implemented for this selector"
    )

get_from_configuration(configuration) staticmethod

Create a selector instance from a configuration.

Parameters

configuration : Configuration The configuration object.

Returns

AbstractSelector The selector instance.

Raises

NotImplementedError If the method is not implemented in a subclass.

Source code in asf/selectors/abstract_selector.py
@staticmethod
def get_from_configuration(configuration: Configuration) -> "AbstractSelector":
    """
    Create a selector instance from a configuration.

    Parameters
    ----------
    configuration : Configuration
        The configuration object.

    Returns
    -------
    AbstractSelector
        The selector instance.

    Raises
    ------
    NotImplementedError
        If the method is not implemented in a subclass.
    """
    raise NotImplementedError(
        "get_from_configuration() is not implemented for this selector"
    )

load(path)

Load the selector's state from the specified path.

Parameters

path : str The file path from which the selector's state will be loaded.

Source code in asf/selectors/abstract_selector.py
def load(self, path: str) -> None:
    """
    Load the selector's state from the specified path.

    Parameters
    ----------
    path : str
        The file path from which the selector's state will be loaded.
    """
    pass

predict(features, performance=None)

Predict the ranking or selection of features for the given input features.

Parameters

features : pd.DataFrame The input features for prediction. performance : pd.DataFrame or None, optional The (partial) performance data corresponding to the features, if applicable. Defaults to None.

Returns

dict[str, list[tuple[str, float]]] A dictionary where keys are algorithm names and values are lists of tuples containing feature names and their corresponding scores.

Source code in asf/selectors/abstract_selector.py
def predict(
    self, features: pd.DataFrame, performance: Optional[pd.DataFrame] = None
) -> dict[str, list[tuple[str, float]]]:
    """
    Predict the ranking or selection of features for the given input features.

    Parameters
    ----------
    features : pd.DataFrame
        The input features for prediction.
    performance : pd.DataFrame or None, optional
        The (partial) performance data corresponding to the features, if applicable. Defaults to None.

    Returns
    -------
    dict[str, list[tuple[str, float]]]
        A dictionary where keys are algorithm names and values are lists of tuples
        containing feature names and their corresponding scores.
    """
    if self.hierarchical_generator is not None:
        features = pd.concat(
            [features, self.hierarchical_generator.generate_features(features)],
            axis=1,
        )
    if performance is None:
        return self._predict(features)
    else:
        return self._predict(features, performance)

save(path)

Save the selector's state to the specified path.

Parameters

path : str The file path where the selector's state will be saved.

Source code in asf/selectors/abstract_selector.py
def save(self, path: str) -> None:
    """
    Save the selector's state to the specified path.

    Parameters
    ----------
    path : str
        The file path where the selector's state will be saved.
    """
    pass

CollaborativeFilteringSelector

Bases: AbstractModelBasedSelector

Collaborative filtering selector using SGD matrix factorization (ALORS-style).

Source code in asf/selectors/collaborative_filtering_selector.py
class CollaborativeFilteringSelector(AbstractModelBasedSelector):
    """
    Collaborative filtering selector using SGD matrix factorization (ALORS-style).
    """

    def __init__(
        self,
        model_class=RidgeRegressorWrapper,
        n_components: int = 10,
        n_iter: int = 100,
        lr: float = 0.001,
        reg: float = 0.1,
        random_state: int = 42,
        **kwargs,
    ):
        """
        Initializes the CollaborativeFilteringSelector.

        Args:
            n_components (int): Number of latent factors.
            n_iter (int): Number of iterations for SGD.
            lr (float): Learning rate for SGD.
            reg (float): Regularization strength.
            random_state (int): Random seed for initialization.
            **kwargs: Additional arguments for the parent classes.
        """
        super().__init__(model_class=model_class, **kwargs)
        self.n_components = n_components
        self.n_iter = n_iter
        self.lr = lr
        self.reg = reg
        self.random_state = random_state
        self.U = None  # Instance latent factors
        self.V = None  # Algorithm latent factors
        self.performance_matrix = None
        self.model = None

        # Bias terms
        self.mu = None  # Global mean
        self.b_U = None  # Instance biases
        self.b_V = None  # Algorithm biases

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the collaborative filtering model to the given data.

        Args:
            features (pd.DataFrame): DataFrame containing problem instance features.
            performance (pd.DataFrame): DataFrame where columns are algorithms and rows are instances.
        """
        self.algorithms = list(performance.columns)
        self.performance_matrix = performance.copy()
        np.random.seed(self.random_state)

        n_instances, n_algorithms = performance.shape
        # Initialize latent factors
        self.U = np.random.normal(scale=0.1, size=(n_instances, self.n_components))
        self.V = np.random.normal(scale=0.1, size=(n_algorithms, self.n_components))

        # Get observed entries
        observed = ~performance.isna()
        rows, cols = np.where(observed.values)

        # --- Bias initialization ---
        # Global mean from observed entries
        self.mu = np.nanmean(performance.values)
        # Instance and algorithm biases
        self.b_U = np.zeros(n_instances)
        self.b_V = np.zeros(n_algorithms)

        # SGD optimization with bias terms
        for it in range(self.n_iter):
            for i, j in zip(rows, cols):
                r_ij = performance.values[i, j]
                pred = (
                    self.mu + self.b_U[i] + self.b_V[j] + np.dot(self.U[i], self.V[j])
                )
                if np.isnan(r_ij) or np.isnan(pred):
                    continue
                err = r_ij - pred
                err = np.clip(err, -10, 10)
                # Update latent factors
                self.U[i] += self.lr * (err * self.V[j] - self.reg * self.U[i])
                self.V[j] += self.lr * (err * self.U[i] - self.reg * self.V[j])
                # Update biases with L2 regularization
                self.b_U[i] += self.lr * (err - self.reg * self.b_U[i])
                self.b_V[j] += self.lr * (err - self.reg * self.b_V[j])

        self.model = self.model_class()
        self.model.fit(features.values, self.U)

    def _predict_cold_start(
        self, instance_features: pd.Series, instance_name: str
    ) -> Tuple[str, float]:
        """
        Predict the best algorithm for a single instance using only its features (cold-start).
        """
        # Align and scale features
        X = instance_features[self.features].values.reshape(1, -1)
        U_new = self.model.predict(X)
        # Compute scores with global and algorithm bias
        scores = self.mu + self.b_V + np.dot(U_new, self.V.T).flatten()
        scores = np.asarray(scores).flatten()
        best_idx = np.argmin(scores)
        best_algo = self.algorithms[best_idx]
        best_score = scores[best_idx]
        return best_algo, best_score

    def _predict(
        self,
        features: Optional[pd.DataFrame] = None,
        performance: Optional[pd.DataFrame] = None,
    ) -> Dict[str, List[Tuple[str, float]]]:
        """
        Predicts the best algorithm for instances according to the scenario described.
        """
        if self.U is None or self.V is None or self.performance_matrix is None:
            raise ValueError("Model has not been fitted yet. Call fit() first.")

        predictions = {}

        # Case 1: Return best algorithm for training instances
        if features is None and performance is None:
            pred_matrix = (
                self.mu + self.b_U[:, None] + self.b_V[None, :] + (self.U @ self.V.T)
            )
            for idx, instance in enumerate(self.performance_matrix.index):
                scores = np.asarray(pred_matrix[idx]).flatten()
                best_idx = np.argmin(scores)
                best_algo = self.algorithms[best_idx]
                predictions[instance] = [(best_algo, self.budget)]
            return predictions

        # Case 2: Performance is not None (ALORS-style prediction for new instances)
        if performance is not None:
            rng = np.random.RandomState(self.random_state)
            for i, instance in enumerate(performance.index):
                perf_row = performance.loc[instance]
                if not perf_row.isnull().all():
                    # Infer latent factors for this instance using observed entries
                    u = rng.normal(scale=0.1, size=(self.n_components,))
                    for _ in range(20):  # few SGD steps
                        for j, algo in enumerate(self.algorithms):
                            if not pd.isna(perf_row[algo]):
                                r_ij = perf_row[algo]
                                pred = self.mu + self.b_V[j] + np.dot(u, self.V[j])
                                err = r_ij - pred
                                u += self.lr * (err * self.V[j] - self.reg * u)
                    scores = self.mu + self.b_V[None, :] + np.dot(u, self.V.T)
                    scores = np.asarray(scores).flatten()
                    best_idx = np.argmin(scores)
                    best_algo = self.algorithms[best_idx]
                    predictions[instance] = [(best_algo, self.budget)]
                else:
                    # True cold-start within the warm-start batch: use features if available
                    if features is None:
                        # Fallback to average if no features are available
                        avg_scores = self.performance_matrix.mean()
                        scores = np.asarray(avg_scores.values).flatten()
                        best_idx = np.argmin(scores)
                        best_algo = self.algorithms[best_idx]
                        predictions[instance] = [(best_algo, self.budget)]
                    else:
                        instance_features = features.loc[instance]
                        best_algo, _ = self._predict_cold_start(
                            instance_features, instance
                        )
                        predictions[instance] = [(best_algo, self.budget)]
                    continue
            return predictions

        # Case 3: Features is not None, Performance is None (cold start)
        if features is not None and performance is None:
            for instance in features.index:
                instance_features = features.loc[instance]
                best_algo, _ = self._predict_cold_start(instance_features, instance)
                predictions[instance] = [(best_algo, self.budget)]
            return predictions

        return predictions

__init__(model_class=RidgeRegressorWrapper, n_components=10, n_iter=100, lr=0.001, reg=0.1, random_state=42, **kwargs)

Initializes the CollaborativeFilteringSelector.

Parameters:

Name Type Description Default
n_components int

Number of latent factors.

10
n_iter int

Number of iterations for SGD.

100
lr float

Learning rate for SGD.

0.001
reg float

Regularization strength.

0.1
random_state int

Random seed for initialization.

42
**kwargs

Additional arguments for the parent classes.

{}
Source code in asf/selectors/collaborative_filtering_selector.py
def __init__(
    self,
    model_class=RidgeRegressorWrapper,
    n_components: int = 10,
    n_iter: int = 100,
    lr: float = 0.001,
    reg: float = 0.1,
    random_state: int = 42,
    **kwargs,
):
    """
    Initializes the CollaborativeFilteringSelector.

    Args:
        n_components (int): Number of latent factors.
        n_iter (int): Number of iterations for SGD.
        lr (float): Learning rate for SGD.
        reg (float): Regularization strength.
        random_state (int): Random seed for initialization.
        **kwargs: Additional arguments for the parent classes.
    """
    super().__init__(model_class=model_class, **kwargs)
    self.n_components = n_components
    self.n_iter = n_iter
    self.lr = lr
    self.reg = reg
    self.random_state = random_state
    self.U = None  # Instance latent factors
    self.V = None  # Algorithm latent factors
    self.performance_matrix = None
    self.model = None

    # Bias terms
    self.mu = None  # Global mean
    self.b_U = None  # Instance biases
    self.b_V = None  # Algorithm biases

ISAC

Bases: AbstractSelector

ISAC (Instance-Specific Algorithm Configuration) selector.

Clusters instances in feature space using a user-provided clusterer (default: GMeans) and assigns to each cluster the best algorithm (by mean or median performance). For a new instance, predicts the cluster and recommends the cluster's best algorithm.

Parameters:

Name Type Description Default
clusterer object

An object with fit(X) and predict(X) methods (e.g., GMeans, KMeans). If None, uses GMeans by default.

GMeans
clusterer_kwargs dict

Optional keyword arguments to instantiate the clusterer if not provided.

None
random_state int

Random seed for reproducibility.

required
**kwargs

Additional arguments for the parent class.

{}
Note

It is recommended to scale features before using ISACSelector.

Source code in asf/selectors/isac.py
class ISAC(AbstractSelector):
    """
    ISAC (Instance-Specific Algorithm Configuration) selector.

    Clusters instances in feature space using a user-provided clusterer (default: GMeans) and assigns to each cluster the best algorithm
    (by mean or median performance). For a new instance, predicts the cluster and
    recommends the cluster's best algorithm.

    Args:
        clusterer (object): An object with fit(X) and predict(X) methods (e.g., GMeans, KMeans).
            If None, uses GMeans by default.
        clusterer_kwargs (dict): Optional keyword arguments to instantiate the clusterer if not provided.
        random_state (int): Random seed for reproducibility.
        **kwargs: Additional arguments for the parent class.

    Note:
        It is recommended to scale features before using ISACSelector.
    """

    def __init__(
        self,
        clusterer: Optional[Any] = GMeans,
        clusterer_kwargs: Optional[dict] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.clusterer = clusterer
        self.clusterer_kwargs = clusterer_kwargs or {}
        self.clusterer_instance = None
        self.cluster_to_best_algo = {}

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fit the ISAC selector.

        Args:
            features (pd.DataFrame): Feature matrix (instances x features).
            performance (pd.DataFrame): Performance matrix (instances x algorithms).
        """
        self.clusterer = self.clusterer(**self.clusterer_kwargs)

        if callable(self.clusterer):
            self.clusterer_instance = self.clusterer(
                random_state=self.random_state, **self.clusterer_kwargs
            )
        elif hasattr(self.clusterer, "fit") and hasattr(self.clusterer, "predict"):
            self.clusterer_instance = self.clusterer
        else:
            raise ValueError(
                "clusterer must be a class or an instance with fit/predict"
            )

        self.clusterer_instance.fit(features.values)
        cluster_labels = self.clusterer_instance.predict(features.values)

        # For each cluster, find the best algorithm (lowest mean performance)
        n_clusters = len(np.unique(cluster_labels))
        for cluster_id in range(n_clusters):
            idxs = np.where(cluster_labels == cluster_id)[0]
            if len(idxs) == 0:
                continue
            cluster_perf = performance.iloc[idxs]
            algo_means = cluster_perf.mean(axis=0)
            best_algo = algo_means.idxmin()
            self.cluster_to_best_algo[cluster_id] = best_algo

    def _predict(
        self,
        features: Optional[pd.DataFrame] = None,
    ) -> Dict[str, List[Tuple[str, float]]]:
        """
        Predict the best algorithm for each instance based on its cluster.

        Args:
            features (pd.DataFrame): Feature matrix for test instances.

        Returns:
            Dict[str, List[Tuple[str, float]]]: Mapping from instance name to [(algorithm, budget)].
        """
        if features is None:
            raise ValueError("Features must be provided for prediction.")
        if self.clusterer_instance is None:
            raise RuntimeError("ISACSelector must be fitted before prediction.")

        cluster_labels = self.clusterer_instance.predict(features.values)
        predictions = {}
        for idx, instance in enumerate(features.index):
            cluster_id = cluster_labels[idx]
            best_algo = self.cluster_to_best_algo.get(cluster_id, None)
            predictions[instance] = (
                [(best_algo, self.budget)] if best_algo else [(None, self.budget)]
            )
        return predictions

JointRanking

Bases: AbstractSelector, AbstractFeatureGenerator

JointRanking implements a ranking-based approach for selecting the best-performing algorithms for a given set of features. It combines feature generation and model-based selection to predict algorithm performance.

Reference

Ortuzk et al. (2022)

Source code in asf/selectors/joint_ranking.py
class JointRanking(AbstractSelector, AbstractFeatureGenerator):
    """
    JointRanking implements a ranking-based approach for selecting the best-performing
    algorithms for a given set of features. It combines feature generation and model-based
    selection to predict algorithm performance.

    Reference:
        Ortuzk et al. (2022)
    """

    def __init__(
        self,
        model: RankingMLP = None,
        **kwargs,
    ) -> None:
        """
        Initializes the JointRanking selector with the given parameters.

        Args:
            model (RankingMLP, optional): The regression model to be used for ranking.
            **kwargs: Additional arguments passed to the AbstractSelector.
        """
        AbstractSelector.__init__(self, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.model: RankingMLP = model

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the regression models to the given features and performance data.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
            performance (pd.DataFrame): DataFrame containing the performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(np.array(self.algorithms).reshape(-1, 1)),
                index=self.algorithms,
                columns=[f"algo_{i}" for i in range(len(self.algorithms))],
            )

        if self.model is None:
            self.model = RankingMLP(
                input_size=len(self.features) + len(self.algorithms)
            )

        self.model.fit(features[self.features], performance, self.algorithm_features)

    def _predict(self, features: pd.DataFrame) -> dict:
        """
        Predicts the performance of algorithms for the given features.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            dict: A dictionary mapping instance names to the predicted best algorithm
                  and the associated budget.
        """
        predictions = self.generate_features(features)

        return {
            instance_name: [
                (
                    self.algorithms[
                        np.argmax(predictions.loc[i])
                        if self.maximize
                        else np.argmin(predictions.loc[i])
                    ],
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates predictions for the given features using the trained models.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            pd.DataFrame: DataFrame containing the predictions for each algorithm.
        """
        predictions = np.zeros((features.shape[0], len(self.algorithms)))

        features = features[self.features]
        for i, algorithm in enumerate(self.algorithms):
            data = features.assign(**self.algorithm_features.loc[algorithm])
            data = data[self.algorithm_features.columns.to_list() + self.features]
            prediction = self.model.predict(data)
            predictions[:, i] = prediction.flatten()

        return pd.DataFrame(predictions, columns=self.algorithms)

__init__(model=None, **kwargs)

Initializes the JointRanking selector with the given parameters.

Parameters:

Name Type Description Default
model RankingMLP

The regression model to be used for ranking.

None
**kwargs

Additional arguments passed to the AbstractSelector.

{}
Source code in asf/selectors/joint_ranking.py
def __init__(
    self,
    model: RankingMLP = None,
    **kwargs,
) -> None:
    """
    Initializes the JointRanking selector with the given parameters.

    Args:
        model (RankingMLP, optional): The regression model to be used for ranking.
        **kwargs: Additional arguments passed to the AbstractSelector.
    """
    AbstractSelector.__init__(self, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.model: RankingMLP = model

generate_features(features)

Generates predictions for the given features using the trained models.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame containing the predictions for each algorithm.

Source code in asf/selectors/joint_ranking.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates predictions for the given features using the trained models.

    Args:
        features (pd.DataFrame): DataFrame containing the feature data.

    Returns:
        pd.DataFrame: DataFrame containing the predictions for each algorithm.
    """
    predictions = np.zeros((features.shape[0], len(self.algorithms)))

    features = features[self.features]
    for i, algorithm in enumerate(self.algorithms):
        data = features.assign(**self.algorithm_features.loc[algorithm])
        data = data[self.algorithm_features.columns.to_list() + self.features]
        prediction = self.model.predict(data)
        predictions[:, i] = prediction.flatten()

    return pd.DataFrame(predictions, columns=self.algorithms)

MultiClassClassifier

Bases: AbstractModelBasedSelector

A selector that uses a multi-class classification model to predict the best algorithm for a given set of features and performance data.

Source code in asf/selectors/mutli_class.py
class MultiClassClassifier(AbstractModelBasedSelector):
    """
    A selector that uses a multi-class classification model to predict the best algorithm
    for a given set of features and performance data.
    """

    def __init__(self, model_class: Type[AbstractPredictor], **kwargs):
        """
        Initializes the MultiClassClassifier.

        Args:
            model_class: The class of the model to be used for classification.
            **kwargs: Additional keyword arguments to be passed to the parent class.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        self.classifier: object = None

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the classification model to the given feature and performance data.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
                Each row corresponds to an instance, and each column corresponds to a feature.
            performance (pd.DataFrame): DataFrame containing the performance data.
                Each row corresponds to an instance, and each column corresponds to an algorithm.
        """
        assert self.algorithm_features is None, (
            "MultiClassClassifier does not use algorithm features."
        )
        self.classifier = self.model_class()
        # Use the index of the algorithm with the best performance (lowest value) as the target
        self.classifier.fit(features, np.argmin(performance.values, axis=1))

    def _predict(self, features: pd.DataFrame) -> dict:
        """
        Predicts the best algorithm for each instance in the given feature data using simple multi-class classification.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
                Each row corresponds to an instance, and each column corresponds to a feature.

        Returns:
            dict: A dictionary mapping instance names (index of the features DataFrame)
                  to a list containing a tuple of the predicted best algorithm and the budget.
                  Example: {instance_name: [(algorithm_name, budget)]}
        """
        predictions = self.classifier.predict(features)

        return {
            instance_name: [(self.algorithms[predictions[i]], self.budget)]
            for i, instance_name in enumerate(features.index)
        }

__init__(model_class, **kwargs)

Initializes the MultiClassClassifier.

Parameters:

Name Type Description Default
model_class Type[AbstractPredictor]

The class of the model to be used for classification.

required
**kwargs

Additional keyword arguments to be passed to the parent class.

{}
Source code in asf/selectors/mutli_class.py
def __init__(self, model_class: Type[AbstractPredictor], **kwargs):
    """
    Initializes the MultiClassClassifier.

    Args:
        model_class: The class of the model to be used for classification.
        **kwargs: Additional keyword arguments to be passed to the parent class.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    self.classifier: object = None

PairwiseClassifier

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

Source code in asf/selectors/pairwise_classifier.py
class PairwiseClassifier(AbstractModelBasedSelector, AbstractFeatureGenerator):
    PREFIX = "pairwise_classifier"
    """
    PairwiseClassifier is a selector that uses pairwise comparison of algorithms
    to predict the best algorithm for a given instance.

    Attributes:
        PREFIX (str): Prefix used for configuration space parameters.
        classifiers (List[AbstractPredictor]): List of trained classifiers for pairwise comparisons.
        use_weights (bool): Whether to use weights based on performance differences.
    """

    def __init__(
        self, model_class: type[AbstractPredictor], use_weights: bool = True, **kwargs
    ):
        """
        Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

        Args:
            model_class (type[AbstractPredictor]): The classifier model to be used for pairwise comparisons.
            use_weights (bool): Whether to use weights based on performance differences. Defaults to True.
            **kwargs: Additional keyword arguments for the parent class.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.classifiers: List[AbstractPredictor] = []
        self.use_weights: bool = use_weights

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the pairwise classifiers using the provided features and performance data.

        Args:
            features (pd.DataFrame): The feature data for the instances.
            performance (pd.DataFrame): The performance data for the algorithms.
        """
        assert self.algorithm_features is None, (
            "PairwiseClassifier does not use algorithm features."
        )
        for i, algorithm in enumerate(self.algorithms):
            for other_algorithm in self.algorithms[i + 1 :]:
                algo1_times = performance[algorithm]
                algo2_times = performance[other_algorithm]

                if self.maximize:
                    diffs = algo1_times > algo2_times
                else:
                    diffs = algo1_times < algo2_times

                # Ensure diffs are integers (0/1), not boolean
                diffs = diffs.astype(int)

                cur_model = self.model_class()
                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None
                    if not self.use_weights
                    else np.abs(algo1_times - algo2_times),
                )
                self.classifiers.append(cur_model)

    def _predict(
        self, features: pd.DataFrame
    ) -> Dict[str, List[Tuple[str, Union[int, float]]]]:
        """
        Predicts the best algorithm for each instance using the trained pairwise classifiers.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            Dict[str, List[Tuple[str, Union[int, float]]]]: A dictionary mapping instance names to the predicted best algorithm and budget.
            Example: {instance_name: [(algorithm_name, budget)]}
        """
        predictions_sum = self.generate_features(features)
        result = {
            instance_name: [
                (
                    predictions_sum.loc[instance_name].idxmax(),
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }
        return result

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates features for the pairwise classifiers.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
        """
        cnt = 0
        predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
        for i, algorithm in enumerate(self.algorithms):
            for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
                prediction = self.classifiers[cnt].predict(features)
                predictions_sum.loc[prediction, algorithm] += 1
                predictions_sum.loc[1 - prediction, other_algorithm] += 1
                cnt += 1

        return predictions_sum

    if CONFIGSPACE_AVAILABLE:

        @staticmethod
        def get_configuration_space(
            cs: Optional[ConfigurationSpace] = None,
            cs_transform: Optional[Dict[str, dict]] = None,
            model_class: List[type[AbstractPredictor]] = [
                RandomForestClassifierWrapper,
                XGBoostClassifierWrapper,
            ],
            pre_prefix: str = "",
            parent_param: Optional[Hyperparameter] = None,
            parent_value: Optional[str] = None,
            **kwargs,
        ) -> Tuple[ConfigurationSpace, Dict[str, dict]]:
            """
            Get the configuration space for the predictor.

            Args:
                cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
                cs_transform (Optional[Dict[str, dict]]): A dictionary for transforming configuration space parameters.
                model_class (List[type[AbstractPredictor]]): The list of model classes to use. Defaults to [RandomForestClassifierWrapper, XGBoostClassifierWrapper].
                hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
                **kwargs: Additional keyword arguments to pass to the model class.

            Returns:
                Tuple[ConfigurationSpace, Dict[str, dict]]: The configuration space and its transformation dictionary.
            """
            if cs is None:
                cs = ConfigurationSpace()

            if cs_transform is None:
                cs_transform = dict()

            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
            else:
                prefix = PairwiseClassifier.PREFIX

            model_class_param = Categorical(
                name=f"{prefix}:model_class",
                items=[str(c.__name__) for c in model_class],
            )

            cs_transform[f"{prefix}:model_class"] = {
                str(c.__name__): c for c in model_class
            }

            use_weights_param = Categorical(
                name=f"{prefix}:use_weights",
                items=[True, False],
            )

            params = [model_class_param, use_weights_param]

            if parent_param is not None:
                conditions = [
                    EqualsCondition(
                        child=param,
                        parent=parent_param,
                        value=parent_value,
                    )
                    for param in params
                ]
            else:
                conditions = []

            cs.add(params + conditions)

            for model in model_class:
                model.get_configuration_space(
                    cs=cs,
                    pre_prefix=f"{prefix}:model_class",
                    parent_param=model_class_param,
                    parent_value=str(model.__name__),
                    **kwargs,
                )

            return cs, cs_transform

        @staticmethod
        def get_from_configuration(
            configuration: Configuration,
            cs_transform: Dict[str, dict],
            pre_prefix: str = "",
            **kwargs,
        ) -> partial:
            """
            Get the predictor from a given configuration.

            Args:
                configuration (Configuration): The configuration object.
                cs_transform (Dict[str, dict]): The transformation dictionary for the configuration space.

            Returns:
                partial: A partial function to initialize the PairwiseClassifier with the given configuration.
            """

            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
            else:
                prefix = PairwiseClassifier.PREFIX

            model_class = cs_transform[f"{prefix}:model_class"][
                configuration[f"{prefix}:model_class"]
            ]
            use_weights = configuration[f"{prefix}:use_weights"]

            model = model_class.get_from_configuration(
                configuration, pre_prefix=f"{prefix}:model_class"
            )

            return PairwiseClassifier(
                model_class=model,
                use_weights=use_weights,
                hierarchical_generator=None,
                **kwargs,
            )

PREFIX = 'pairwise_classifier' class-attribute instance-attribute

PairwiseClassifier is a selector that uses pairwise comparison of algorithms to predict the best algorithm for a given instance.

Attributes:

Name Type Description
PREFIX str

Prefix used for configuration space parameters.

classifiers List[AbstractPredictor]

List of trained classifiers for pairwise comparisons.

use_weights bool

Whether to use weights based on performance differences.

__init__(model_class, use_weights=True, **kwargs)

Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

Parameters:

Name Type Description Default
model_class type[AbstractPredictor]

The classifier model to be used for pairwise comparisons.

required
use_weights bool

Whether to use weights based on performance differences. Defaults to True.

True
**kwargs

Additional keyword arguments for the parent class.

{}
Source code in asf/selectors/pairwise_classifier.py
def __init__(
    self, model_class: type[AbstractPredictor], use_weights: bool = True, **kwargs
):
    """
    Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

    Args:
        model_class (type[AbstractPredictor]): The classifier model to be used for pairwise comparisons.
        use_weights (bool): Whether to use weights based on performance differences. Defaults to True.
        **kwargs: Additional keyword arguments for the parent class.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.classifiers: List[AbstractPredictor] = []
    self.use_weights: bool = use_weights

generate_features(features)

Generates features for the pairwise classifiers.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Type Description
DataFrame

pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.

Source code in asf/selectors/pairwise_classifier.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates features for the pairwise classifiers.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
    """
    cnt = 0
    predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
    for i, algorithm in enumerate(self.algorithms):
        for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
            prediction = self.classifiers[cnt].predict(features)
            predictions_sum.loc[prediction, algorithm] += 1
            predictions_sum.loc[1 - prediction, other_algorithm] += 1
            cnt += 1

    return predictions_sum

get_configuration_space(cs=None, cs_transform=None, model_class=[RandomForestClassifierWrapper, XGBoostClassifierWrapper], pre_prefix='', parent_param=None, parent_value=None, **kwargs) staticmethod

Get the configuration space for the predictor.

Parameters:

Name Type Description Default
cs Optional[ConfigurationSpace]

The configuration space to use. If None, a new one will be created.

None
cs_transform Optional[Dict[str, dict]]

A dictionary for transforming configuration space parameters.

None
model_class List[type[AbstractPredictor]]

The list of model classes to use. Defaults to [RandomForestClassifierWrapper, XGBoostClassifierWrapper].

[RandomForestClassifierWrapper, XGBoostClassifierWrapper]
hierarchical_generator Optional[List[AbstractFeatureGenerator]]

List of hierarchical feature generators.

required
**kwargs

Additional keyword arguments to pass to the model class.

{}

Returns:

Type Description
Tuple[ConfigurationSpace, Dict[str, dict]]

Tuple[ConfigurationSpace, Dict[str, dict]]: The configuration space and its transformation dictionary.

Source code in asf/selectors/pairwise_classifier.py
@staticmethod
def get_configuration_space(
    cs: Optional[ConfigurationSpace] = None,
    cs_transform: Optional[Dict[str, dict]] = None,
    model_class: List[type[AbstractPredictor]] = [
        RandomForestClassifierWrapper,
        XGBoostClassifierWrapper,
    ],
    pre_prefix: str = "",
    parent_param: Optional[Hyperparameter] = None,
    parent_value: Optional[str] = None,
    **kwargs,
) -> Tuple[ConfigurationSpace, Dict[str, dict]]:
    """
    Get the configuration space for the predictor.

    Args:
        cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
        cs_transform (Optional[Dict[str, dict]]): A dictionary for transforming configuration space parameters.
        model_class (List[type[AbstractPredictor]]): The list of model classes to use. Defaults to [RandomForestClassifierWrapper, XGBoostClassifierWrapper].
        hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
        **kwargs: Additional keyword arguments to pass to the model class.

    Returns:
        Tuple[ConfigurationSpace, Dict[str, dict]]: The configuration space and its transformation dictionary.
    """
    if cs is None:
        cs = ConfigurationSpace()

    if cs_transform is None:
        cs_transform = dict()

    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
    else:
        prefix = PairwiseClassifier.PREFIX

    model_class_param = Categorical(
        name=f"{prefix}:model_class",
        items=[str(c.__name__) for c in model_class],
    )

    cs_transform[f"{prefix}:model_class"] = {
        str(c.__name__): c for c in model_class
    }

    use_weights_param = Categorical(
        name=f"{prefix}:use_weights",
        items=[True, False],
    )

    params = [model_class_param, use_weights_param]

    if parent_param is not None:
        conditions = [
            EqualsCondition(
                child=param,
                parent=parent_param,
                value=parent_value,
            )
            for param in params
        ]
    else:
        conditions = []

    cs.add(params + conditions)

    for model in model_class:
        model.get_configuration_space(
            cs=cs,
            pre_prefix=f"{prefix}:model_class",
            parent_param=model_class_param,
            parent_value=str(model.__name__),
            **kwargs,
        )

    return cs, cs_transform

get_from_configuration(configuration, cs_transform, pre_prefix='', **kwargs) staticmethod

Get the predictor from a given configuration.

Parameters:

Name Type Description Default
configuration Configuration

The configuration object.

required
cs_transform Dict[str, dict]

The transformation dictionary for the configuration space.

required

Returns:

Name Type Description
partial partial

A partial function to initialize the PairwiseClassifier with the given configuration.

Source code in asf/selectors/pairwise_classifier.py
@staticmethod
def get_from_configuration(
    configuration: Configuration,
    cs_transform: Dict[str, dict],
    pre_prefix: str = "",
    **kwargs,
) -> partial:
    """
    Get the predictor from a given configuration.

    Args:
        configuration (Configuration): The configuration object.
        cs_transform (Dict[str, dict]): The transformation dictionary for the configuration space.

    Returns:
        partial: A partial function to initialize the PairwiseClassifier with the given configuration.
    """

    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseClassifier.PREFIX}"
    else:
        prefix = PairwiseClassifier.PREFIX

    model_class = cs_transform[f"{prefix}:model_class"][
        configuration[f"{prefix}:model_class"]
    ]
    use_weights = configuration[f"{prefix}:use_weights"]

    model = model_class.get_from_configuration(
        configuration, pre_prefix=f"{prefix}:model_class"
    )

    return PairwiseClassifier(
        model_class=model,
        use_weights=use_weights,
        hierarchical_generator=None,
        **kwargs,
    )

PairwiseRegressor

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

Source code in asf/selectors/pairwise_regressor.py
class PairwiseRegressor(AbstractModelBasedSelector, AbstractFeatureGenerator):
    PREFIX = "pairwise_regressor"
    """
    PairwiseRegressor is a selector that uses pairwise regression of algorithms
    to predict the best algorithm for a given instance.

    Attributes:
        model_class (type): The regression model class to be used for pairwise comparisons.
        regressors (List[AbstractPredictor]): List of trained regressors for pairwise comparisons.
    """

    def __init__(self, model_class: type, **kwargs):
        """
        Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

        Args:
            model_class (type): The regression model class to be used for pairwise comparisons.
            kwargs: Additional keyword arguments for the parent classes.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.regressors: List[AbstractPredictor] = []

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the pairwise regressors using the provided features and performance data.

        Args:
            features (pd.DataFrame): The feature data for the instances.
            performance (pd.DataFrame): The performance data for the algorithms.
        """
        assert self.algorithm_features is None, (
            "PairwiseRegressor does not use algorithm features."
        )
        for i, algorithm in enumerate(self.algorithms):
            for other_algorithm in self.algorithms[i + 1 :]:
                algo1_times = performance[algorithm]
                algo2_times = performance[other_algorithm]

                diffs = algo1_times - algo2_times
                cur_model = self.model_class()
                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None,
                )
                self.regressors.append(cur_model)

    def _predict(self, features: pd.DataFrame) -> Dict[str, List[Tuple[str, float]]]:
        """
        Predicts the best algorithm for each instance using the trained pairwise regressors.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            Dict[str, List[Tuple[str, float]]]: A dictionary mapping instance names to the predicted best algorithm
            and the associated budget.
            Example: {instance_name: [(algorithm_name, budget)]}
        """
        predictions_sum = self.generate_features(features)
        return {
            instance_name: [
                (
                    predictions_sum.loc[instance_name].idxmax()
                    if self.maximize
                    else predictions_sum.loc[instance_name].idxmin(),
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates features for the pairwise regressors.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
        """
        cnt = 0
        predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
        for i, algorithm in enumerate(self.algorithms):
            for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
                prediction = self.regressors[cnt].predict(features)
                predictions_sum[algorithm] += prediction
                predictions_sum[other_algorithm] -= prediction
                cnt += 1

        return predictions_sum

    if CONFIGSPACE_AVAILABLE:

        @staticmethod
        def get_configuration_space(
            cs: Optional[ConfigurationSpace] = None,
            cs_transform: Optional[Dict[str, Dict[str, type]]] = None,
            model_class: List[type[AbstractPredictor]] = [
                RandomForestRegressorWrapper,
                XGBoostRegressorWrapper,
            ],
            pre_prefix: str = "",
            parent_param: Optional[Hyperparameter] = None,
            parent_value: Optional[str] = None,
            **kwargs,
        ) -> Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]:
            """
            Get the configuration space for the predictor.

            Args:
                cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
                cs_transform (Optional[Dict[str, Dict[str, type]]]): A dictionary for transforming configuration space values.
                model_class (List[type]): The list of model classes to use. Defaults to [RandomForestRegressorWrapper, XGBoostRegressorWrapper].
                hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
                kwargs: Additional keyword arguments to pass to the model class.

            Returns:
                Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]: The configuration space and its transformation dictionary.
            """
            if cs is None:
                cs = ConfigurationSpace()

            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
            else:
                prefix = PairwiseRegressor.PREFIX

            model_class_param = Categorical(
                name=f"{prefix}:model_class",
                items=[str(c.__name__) for c in model_class],
            )

            cs_transform[f"{prefix}:model_class"] = {
                str(c.__name__): c for c in model_class
            }

            params = [model_class_param]

            if parent_param is not None:
                conditions = [
                    EqualsCondition(
                        child=param,
                        parent=parent_param,
                        value=parent_value,
                    )
                    for param in params
                ]
            else:
                conditions = []

            cs.add(params + conditions)

            for model in model_class:
                model.get_configuration_space(
                    cs=cs,
                    pre_prefix=f"{prefix}:model_class",
                    parent_param=model_class_param,
                    parent_value=str(model.__name__),
                    **kwargs,
                )

            return cs, cs_transform

        @staticmethod
        def get_from_configuration(
            configuration: Configuration,
            cs_transform: Dict[str, Dict[str, type]],
            pre_prefix: str = "",
            **kwargs,
        ) -> partial:
            """
            Get the configuration space for the predictor.

            Args:
                configuration (Configuration): The configuration object.
                cs_transform (Dict[str, Dict[str, type]]): The transformation dictionary for the configuration space.

            Returns:
                partial: A partial function to initialize the PairwiseRegressor with the given configuration.
            """
            if pre_prefix != "":
                prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
            else:
                prefix = PairwiseRegressor.PREFIX

            model_class = cs_transform[f"{prefix}:model_class"][
                configuration[f"{prefix}:model_class"]
            ]

            model = model_class.get_from_configuration(
                configuration, pre_prefix=f"{prefix}:model_class"
            )

            return PairwiseRegressor(
                model_class=model,
                hierarchical_generator=None,
                **kwargs,
            )

PREFIX = 'pairwise_regressor' class-attribute instance-attribute

PairwiseRegressor is a selector that uses pairwise regression of algorithms to predict the best algorithm for a given instance.

Attributes:

Name Type Description
model_class type

The regression model class to be used for pairwise comparisons.

regressors List[AbstractPredictor]

List of trained regressors for pairwise comparisons.

__init__(model_class, **kwargs)

Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

Parameters:

Name Type Description Default
model_class type

The regression model class to be used for pairwise comparisons.

required
kwargs

Additional keyword arguments for the parent classes.

{}
Source code in asf/selectors/pairwise_regressor.py
def __init__(self, model_class: type, **kwargs):
    """
    Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

    Args:
        model_class (type): The regression model class to be used for pairwise comparisons.
        kwargs: Additional keyword arguments for the parent classes.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.regressors: List[AbstractPredictor] = []

generate_features(features)

Generates features for the pairwise regressors.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Type Description
DataFrame

pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.

Source code in asf/selectors/pairwise_regressor.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates features for the pairwise regressors.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        pd.DataFrame: A DataFrame of predictions for each instance and algorithm pair.
    """
    cnt = 0
    predictions_sum = pd.DataFrame(0, index=features.index, columns=self.algorithms)
    for i, algorithm in enumerate(self.algorithms):
        for j, other_algorithm in enumerate(self.algorithms[i + 1 :]):
            prediction = self.regressors[cnt].predict(features)
            predictions_sum[algorithm] += prediction
            predictions_sum[other_algorithm] -= prediction
            cnt += 1

    return predictions_sum

get_configuration_space(cs=None, cs_transform=None, model_class=[RandomForestRegressorWrapper, XGBoostRegressorWrapper], pre_prefix='', parent_param=None, parent_value=None, **kwargs) staticmethod

Get the configuration space for the predictor.

Parameters:

Name Type Description Default
cs Optional[ConfigurationSpace]

The configuration space to use. If None, a new one will be created.

None
cs_transform Optional[Dict[str, Dict[str, type]]]

A dictionary for transforming configuration space values.

None
model_class List[type]

The list of model classes to use. Defaults to [RandomForestRegressorWrapper, XGBoostRegressorWrapper].

[RandomForestRegressorWrapper, XGBoostRegressorWrapper]
hierarchical_generator Optional[List[AbstractFeatureGenerator]]

List of hierarchical feature generators.

required
kwargs

Additional keyword arguments to pass to the model class.

{}

Returns:

Type Description
Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]

Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]: The configuration space and its transformation dictionary.

Source code in asf/selectors/pairwise_regressor.py
@staticmethod
def get_configuration_space(
    cs: Optional[ConfigurationSpace] = None,
    cs_transform: Optional[Dict[str, Dict[str, type]]] = None,
    model_class: List[type[AbstractPredictor]] = [
        RandomForestRegressorWrapper,
        XGBoostRegressorWrapper,
    ],
    pre_prefix: str = "",
    parent_param: Optional[Hyperparameter] = None,
    parent_value: Optional[str] = None,
    **kwargs,
) -> Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]:
    """
    Get the configuration space for the predictor.

    Args:
        cs (Optional[ConfigurationSpace]): The configuration space to use. If None, a new one will be created.
        cs_transform (Optional[Dict[str, Dict[str, type]]]): A dictionary for transforming configuration space values.
        model_class (List[type]): The list of model classes to use. Defaults to [RandomForestRegressorWrapper, XGBoostRegressorWrapper].
        hierarchical_generator (Optional[List[AbstractFeatureGenerator]]): List of hierarchical feature generators.
        kwargs: Additional keyword arguments to pass to the model class.

    Returns:
        Tuple[ConfigurationSpace, Dict[str, Dict[str, type]]]: The configuration space and its transformation dictionary.
    """
    if cs is None:
        cs = ConfigurationSpace()

    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
    else:
        prefix = PairwiseRegressor.PREFIX

    model_class_param = Categorical(
        name=f"{prefix}:model_class",
        items=[str(c.__name__) for c in model_class],
    )

    cs_transform[f"{prefix}:model_class"] = {
        str(c.__name__): c for c in model_class
    }

    params = [model_class_param]

    if parent_param is not None:
        conditions = [
            EqualsCondition(
                child=param,
                parent=parent_param,
                value=parent_value,
            )
            for param in params
        ]
    else:
        conditions = []

    cs.add(params + conditions)

    for model in model_class:
        model.get_configuration_space(
            cs=cs,
            pre_prefix=f"{prefix}:model_class",
            parent_param=model_class_param,
            parent_value=str(model.__name__),
            **kwargs,
        )

    return cs, cs_transform

get_from_configuration(configuration, cs_transform, pre_prefix='', **kwargs) staticmethod

Get the configuration space for the predictor.

Parameters:

Name Type Description Default
configuration Configuration

The configuration object.

required
cs_transform Dict[str, Dict[str, type]]

The transformation dictionary for the configuration space.

required

Returns:

Name Type Description
partial partial

A partial function to initialize the PairwiseRegressor with the given configuration.

Source code in asf/selectors/pairwise_regressor.py
@staticmethod
def get_from_configuration(
    configuration: Configuration,
    cs_transform: Dict[str, Dict[str, type]],
    pre_prefix: str = "",
    **kwargs,
) -> partial:
    """
    Get the configuration space for the predictor.

    Args:
        configuration (Configuration): The configuration object.
        cs_transform (Dict[str, Dict[str, type]]): The transformation dictionary for the configuration space.

    Returns:
        partial: A partial function to initialize the PairwiseRegressor with the given configuration.
    """
    if pre_prefix != "":
        prefix = f"{pre_prefix}:{PairwiseRegressor.PREFIX}"
    else:
        prefix = PairwiseRegressor.PREFIX

    model_class = cs_transform[f"{prefix}:model_class"][
        configuration[f"{prefix}:model_class"]
    ]

    model = model_class.get_from_configuration(
        configuration, pre_prefix=f"{prefix}:model_class"
    )

    return PairwiseRegressor(
        model_class=model,
        hierarchical_generator=None,
        **kwargs,
    )

PerformanceModel

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

PerformanceModel is a class that predicts the performance of algorithms based on given features. It can handle both single-target and multi-target regression models.

Attributes:

Name Type Description
model_class Type

The class of the regression model to be used.

use_multi_target bool

Indicates whether to use multi-target regression.

normalize str

Method to normalize the performance data. Default is "log".

regressors Union[List, object]

List of trained regression models or a single model for multi-target regression.

algorithm_features Optional[DataFrame]

Features specific to each algorithm, if applicable.

algorithms List[str]

List of algorithm names.

maximize bool

Whether to maximize or minimize the performance metric.

budget float

Budget associated with the predictions.

Source code in asf/selectors/performance_model.py
class PerformanceModel(AbstractModelBasedSelector, AbstractFeatureGenerator):
    """
    PerformanceModel is a class that predicts the performance of algorithms
    based on given features. It can handle both single-target and multi-target
    regression models.

    Attributes:
        model_class (Type): The class of the regression model to be used.
        use_multi_target (bool): Indicates whether to use multi-target regression.
        normalize (str): Method to normalize the performance data. Default is "log".
        regressors (Union[List, object]): List of trained regression models or a single model for multi-target regression.
        algorithm_features (Optional[pd.DataFrame]): Features specific to each algorithm, if applicable.
        algorithms (List[str]): List of algorithm names.
        maximize (bool): Whether to maximize or minimize the performance metric.
        budget (float): Budget associated with the predictions.
    """

    def __init__(
        self,
        model_class: Type,
        use_multi_target: bool = False,
        normalize: str = "log",
        **kwargs,
    ):
        """
        Initializes the PerformanceModel with the given parameters.

        Args:
            model_class (Type): The class of the regression model to be used.
            use_multi_target (bool): Indicates whether to use multi-target regression.
            normalize (str): Method to normalize the performance data. Default is "log".
            **kwargs: Additional arguments for the parent classes.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.regressors: Union[List, object] = []
        self.use_multi_target: bool = use_multi_target
        self.normalize: str = normalize

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fits the regression models to the given features and performance data.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.
            performance (pd.DataFrame): DataFrame containing the performance data.
        """
        assert self.algorithm_features is None, (
            "PerformanceModel does not use algorithm features."
        )
        if self.normalize == "log":
            performance = np.log10(performance + 1e-6)

        regressor_init_args = {}
        if "input_size" in inspect.signature(self.model_class).parameters.keys():
            regressor_init_args["input_size"] = features.shape[1]

        if self.use_multi_target:
            assert self.algorithm_features is None, (
                "PerformanceModel does not use algorithm features for multi-target regression."
            )
            self.regressors = self.model_class(**regressor_init_args)
            self.regressors.fit(features, performance)
        else:
            if self.algorithm_features is None:
                for i, algorithm in enumerate(self.algorithms):
                    algo_times = performance.iloc[:, i]

                    cur_model = self.model_class(**regressor_init_args)
                    cur_model.fit(features, algo_times)
                    self.regressors.append(cur_model)
            else:
                train_data = []
                for i, algorithm in enumerate(self.algorithms):
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[algorithm],
                        left_index=True,
                        right_index=True,
                    )
                    data = pd.merge(
                        data, performance.iloc[:, i], left_index=True, right_index=True
                    )
                    train_data.append(data)
                train_data = pd.concat(train_data)
                self.regressors = self.model_class(**regressor_init_args)
                self.regressors.fit(train_data.iloc[:, :-1], train_data.iloc[:, -1])

    def _predict(self, features: pd.DataFrame) -> Dict[str, List[tuple]]:
        """
        Predicts the performance of algorithms for the given features.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            Dict[str, List[tuple]]: A dictionary mapping instance names to the predicted best algorithm
            and the associated budget.
        """
        predictions = self.generate_features(features)

        return {
            instance_name: [
                (
                    self.algorithms[
                        np.argmax(predictions[i])
                        if self.maximize
                        else np.argmin(predictions[i])
                    ],
                    self.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> np.ndarray:
        """
        Generates predictions for the given features using the trained models.

        Args:
            features (pd.DataFrame): DataFrame containing the feature data.

        Returns:
            np.ndarray: Array containing the predictions for each algorithm.
        """
        if self.use_multi_target:
            predictions = self.regressors.predict(features)
        else:
            if self.algorithm_features is None:
                predictions = np.zeros((features.shape[0], len(self.algorithms)))
                for i, algorithm in enumerate(self.algorithms):
                    prediction = self.regressors[i].predict(features)
                    predictions[:, i] = prediction
            else:
                predictions = np.zeros((features.shape[0], len(self.algorithms)))
                for i, algorithm in enumerate(self.algorithms):
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[algorithm],
                        left_index=True,
                        right_index=True,
                    )
                    prediction = self.regressors.predict(data)
                    predictions[:, i] = prediction

        return predictions

    @classmethod
    def get_configuration_space(
        cls, cs, cs_transform, parent_param, parent_value, **kwargs
    ):
        """
        Adds the configuration space for the PerformanceModel using RandomForestRegressorWrapper.
        """
        cs = RandomForestRegressorWrapper.get_configuration_space(
            cs=cs,
            pre_prefix=cls.__name__,
            parent_param=parent_param,
            parent_value=parent_value,
        )

        def constructor(config, cs_transform, **init_kwargs):
            # Make sure that the random forests get random state from the init_kwargs for reproducibility
            model_init_args = {
                k: init_kwargs[k] for k in ["random_state"] if k in init_kwargs
            }

            # Build model constructor with model-related kwargs
            model_constructor = RandomForestRegressorWrapper.get_from_configuration(
                config, pre_prefix=cls.__name__, **model_init_args
            )

            # Only pass the kwargs intended for PerformanceModel init (not model-specific)
            model_related_keys = ["random_state"]
            selector_kwargs = {
                k: v for k, v in init_kwargs.items() if k not in model_related_keys
            }

            return cls(model_class=model_constructor, **selector_kwargs)

        cs_transform[parent_value] = constructor
        return cs, cs_transform

    @classmethod
    def get_from_configuration(cls, config, cs_transform, **kwargs):
        """
        Instantiates the PerformanceModel from a ConfigSpace configuration.
        """
        constructor = cs_transform[str(cls.__name__)]
        return constructor(config, cs_transform, **kwargs)

__init__(model_class, use_multi_target=False, normalize='log', **kwargs)

Initializes the PerformanceModel with the given parameters.

Parameters:

Name Type Description Default
model_class Type

The class of the regression model to be used.

required
use_multi_target bool

Indicates whether to use multi-target regression.

False
normalize str

Method to normalize the performance data. Default is "log".

'log'
**kwargs

Additional arguments for the parent classes.

{}
Source code in asf/selectors/performance_model.py
def __init__(
    self,
    model_class: Type,
    use_multi_target: bool = False,
    normalize: str = "log",
    **kwargs,
):
    """
    Initializes the PerformanceModel with the given parameters.

    Args:
        model_class (Type): The class of the regression model to be used.
        use_multi_target (bool): Indicates whether to use multi-target regression.
        normalize (str): Method to normalize the performance data. Default is "log".
        **kwargs: Additional arguments for the parent classes.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.regressors: Union[List, object] = []
    self.use_multi_target: bool = use_multi_target
    self.normalize: str = normalize

generate_features(features)

Generates predictions for the given features using the trained models.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description
ndarray

np.ndarray: Array containing the predictions for each algorithm.

Source code in asf/selectors/performance_model.py
def generate_features(self, features: pd.DataFrame) -> np.ndarray:
    """
    Generates predictions for the given features using the trained models.

    Args:
        features (pd.DataFrame): DataFrame containing the feature data.

    Returns:
        np.ndarray: Array containing the predictions for each algorithm.
    """
    if self.use_multi_target:
        predictions = self.regressors.predict(features)
    else:
        if self.algorithm_features is None:
            predictions = np.zeros((features.shape[0], len(self.algorithms)))
            for i, algorithm in enumerate(self.algorithms):
                prediction = self.regressors[i].predict(features)
                predictions[:, i] = prediction
        else:
            predictions = np.zeros((features.shape[0], len(self.algorithms)))
            for i, algorithm in enumerate(self.algorithms):
                data = pd.merge(
                    features,
                    self.algorithm_features.loc[algorithm],
                    left_index=True,
                    right_index=True,
                )
                prediction = self.regressors.predict(data)
                predictions[:, i] = prediction

    return predictions

get_configuration_space(cs, cs_transform, parent_param, parent_value, **kwargs) classmethod

Adds the configuration space for the PerformanceModel using RandomForestRegressorWrapper.

Source code in asf/selectors/performance_model.py
@classmethod
def get_configuration_space(
    cls, cs, cs_transform, parent_param, parent_value, **kwargs
):
    """
    Adds the configuration space for the PerformanceModel using RandomForestRegressorWrapper.
    """
    cs = RandomForestRegressorWrapper.get_configuration_space(
        cs=cs,
        pre_prefix=cls.__name__,
        parent_param=parent_param,
        parent_value=parent_value,
    )

    def constructor(config, cs_transform, **init_kwargs):
        # Make sure that the random forests get random state from the init_kwargs for reproducibility
        model_init_args = {
            k: init_kwargs[k] for k in ["random_state"] if k in init_kwargs
        }

        # Build model constructor with model-related kwargs
        model_constructor = RandomForestRegressorWrapper.get_from_configuration(
            config, pre_prefix=cls.__name__, **model_init_args
        )

        # Only pass the kwargs intended for PerformanceModel init (not model-specific)
        model_related_keys = ["random_state"]
        selector_kwargs = {
            k: v for k, v in init_kwargs.items() if k not in model_related_keys
        }

        return cls(model_class=model_constructor, **selector_kwargs)

    cs_transform[parent_value] = constructor
    return cs, cs_transform

get_from_configuration(config, cs_transform, **kwargs) classmethod

Instantiates the PerformanceModel from a ConfigSpace configuration.

Source code in asf/selectors/performance_model.py
@classmethod
def get_from_configuration(cls, config, cs_transform, **kwargs):
    """
    Instantiates the PerformanceModel from a ConfigSpace configuration.
    """
    constructor = cs_transform[str(cls.__name__)]
    return constructor(config, cs_transform, **kwargs)

SNNAP

Bases: AbstractSelector

SNNAP (Simple Nearest Neighbor Algorithm Portfolio) selector.

Parameters:

Name Type Description Default
k int

number of neighbors to use (default 5).

5
metric str

distance metric for NearestNeighbors (default 'euclidean').

'euclidean'
random_state Optional[int]

Random seed for reproducibility.

None
Source code in asf/selectors/snnap.py
class SNNAP(AbstractSelector):
    """
    SNNAP (Simple Nearest Neighbor Algorithm Portfolio) selector.

    Args:
      k (int): number of neighbors to use (default 5).
      metric (str): distance metric for NearestNeighbors (default 'euclidean').
      random_state (Optional[int]): Random seed for reproducibility.
    """

    def __init__(
        self,
        k: int = 5,
        metric: str = "euclidean",
        random_state: Optional[int] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.k = k
        self.metric = metric
        self.random_state = random_state

        self.features: Optional[pd.DataFrame] = None
        self.performance: Optional[pd.DataFrame] = None
        self.nn_model: Optional[NearestNeighbors] = None

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Store training data and fit the NearestNeighbors model.

        Args:
            features: DataFrame (instances x features)
            performance: DataFrame (instances x algorithms)
        """
        self.features = features.copy()
        self.performance = performance.copy()

        n_neighbors = min(self.k, len(self.features))
        self.nn_model = NearestNeighbors(n_neighbors=n_neighbors, metric=self.metric)
        self.nn_model.fit(self.features.values)

    def _predict(
        self,
        features: Optional[pd.DataFrame] = None,
    ) -> Dict[str, List[Tuple[Optional[str], float]]]:
        """
        Predict the single best algorithm for each instance using majority vote among k neighbors.

        Returns:
            dict: instance_id -> [(algorithm_name or None, budget)]
        """
        if features is None:
            raise ValueError("Features must be provided for prediction.")
        if self.nn_model is None or self.features is None or self.performance is None:
            raise RuntimeError("SNNAPSelector must be fitted before prediction.")

        predictions: Dict[str, List[Tuple[Optional[str], float]]] = {}
        for idx, instance in enumerate(features.index):
            x = features.loc[instance].values.reshape(1, -1)
            n_neighbors = min(self.k, len(self.features))
            dists, neighbor_idxs = self.nn_model.kneighbors(x, n_neighbors=n_neighbors)
            neighbor_idxs = neighbor_idxs.flatten()

            votes: Dict[str, int] = {}
            runtimes_for_candidates: Dict[str, List[float]] = {}

            for ni in neighbor_idxs:
                neighbor_perf = self.performance.iloc[ni]
                valid = neighbor_perf.dropna()
                if valid.empty:
                    continue
                best_algo = valid.idxmin()
                votes[best_algo] = votes.get(best_algo, 0) + 1
                runtimes_for_candidates.setdefault(best_algo, []).append(
                    valid.loc[best_algo]
                )

            if not votes:
                predictions[instance] = [(None, self.budget)]
                continue

            max_votes = max(votes.values())
            candidates = [a for a, c in votes.items() if c == max_votes]

            if len(candidates) == 1:
                chosen = candidates[0]
            else:
                # tie-break: choose candidate with smallest mean runtime across recorded neighbor runtimes
                mean_runtimes = {
                    a: np.mean(runtimes_for_candidates[a])
                    for a in candidates
                    if a in runtimes_for_candidates
                    and len(runtimes_for_candidates[a]) > 0
                }
                if not mean_runtimes:
                    # No candidates have recorded runtimes; fallback to None
                    chosen = None
                else:
                    chosen = min(mean_runtimes.items(), key=lambda x: x[1])[0]
            if chosen is None:
                predictions[instance] = [(None, self.budget)]
            else:
                predictions[instance] = [(chosen, self.budget)]

        return predictions

SelectorPipeline

A pipeline for applying a sequence of preprocessing, feature selection, and algorithm selection steps before fitting a final selector model.

Attributes:

Name Type Description
selector AbstractSelector

The main selector model to be used.

preprocessor Optional[Callable]

A callable for preprocessing the input data.

pre_solving Optional[Callable]

A callable for pre-solving steps.

feature_selector Optional[Callable]

A callable for feature selection.

algorithm_pre_selector Optional[Callable]

A callable for algorithm pre-selection.

budget Optional[Any]

The budget constraint for the selector.

maximize bool

Whether to maximize the objective function.

feature_groups Optional[Any]

Feature groups to be used by the selector.

Source code in asf/selectors/selector_pipeline.py
class SelectorPipeline:
    """
    A pipeline for applying a sequence of preprocessing, feature selection, and algorithm selection
    steps before fitting a final selector model.

    Attributes:
        selector (AbstractSelector): The main selector model to be used.
        preprocessor (Optional[Callable]): A callable for preprocessing the input data.
        pre_solving (Optional[Callable]): A callable for pre-solving steps.
        feature_selector (Optional[Callable]): A callable for feature selection.
        algorithm_pre_selector (Optional[Callable]): A callable for algorithm pre-selection.
        budget (Optional[Any]): The budget constraint for the selector.
        maximize (bool): Whether to maximize the objective function.
        feature_groups (Optional[Any]): Feature groups to be used by the selector.
    """

    def __init__(
        self,
        selector: AbstractSelector,
        preprocessor: Optional[Any] = None,
        pre_solving: AbstractPresolver = None,
        feature_selector: Optional[Callable] = None,
        algorithm_pre_selector: Optional[Callable] = None,
        budget: Optional[Any] = None,
        maximize: bool = False,
        feature_groups: Optional[Any] = None,
    ) -> None:
        """
        Initializes the SelectorPipeline.

        Args:
            selector (AbstractSelector): The main selector model to be used.
            preprocessor (Optional[Callable], optional): A callable for preprocessing the input data. Defaults to None.
            pre_solving (Optional[Callable], optional): A callable for pre-solving steps. Defaults to None.
            feature_selector (Optional[Callable], optional): A callable for feature selection. Defaults to None.
            algorithm_pre_selector (Optional[Callable], optional): A callable for algorithm pre-selection. Defaults to None.
            budget (Optional[Any], optional): The budget constraint for the selector. Defaults to None.
            maximize (bool, optional): Whether to maximize the objective function. Defaults to False.
            feature_groups (Optional[Any], optional): Feature groups to be used by the selector. Defaults to None.
        """
        self.selector = selector
        self.pre_solving = pre_solving
        self.feature_selector = feature_selector
        self.algorithm_pre_selector = algorithm_pre_selector
        self.budget = budget
        self.maximize = maximize

        # Always include SimpleImputer as the first step in the preprocessing pipeline
        if preprocessor is None:
            preprocessor = []
        elif not isinstance(preprocessor, list):
            preprocessor = [preprocessor]
        preprocessor = [SimpleImputer(strategy="mean")] + preprocessor
        steps = [(type(p).__name__, p) for p in preprocessor]
        self.preprocessor = Pipeline(steps)
        self.preprocessor.set_output(transform="pandas")

        self._orig_columns = None
        self._orig_index = None

    def fit(self, X: Any, y: Any) -> None:
        """
        Fits the pipeline to the input data.

        Args:
            X (Any): The input features.
            y (Any): The target labels.
        """
        if isinstance(X, pd.DataFrame):
            self._orig_columns = X.columns
            self._orig_index = X.index

        if self.preprocessor:
            X = self.preprocessor.fit_transform(X)

        if self.pre_solving:
            self.pre_solving.fit(X, y)

        if self.algorithm_pre_selector:
            y = self.algorithm_pre_selector.fit_transform(y)

        if self.feature_selector:
            X, y = self.feature_selector.fit_transform(X, y)

        self.selector.fit(X, y)

    def predict(self, X: Any) -> dict:
        """
        Makes predictions using the fitted pipeline.

        Args:
            X (Any): The input features.

        Returns:
            Any: The predictions made by the selector.
        """
        if self.preprocessor:
            X = self.preprocessor.transform(X)

        scheds = None
        if self.pre_solving:
            scheds = self.pre_solving.predict(X)

        if self.feature_selector:
            X = self.feature_selector.transform(X)

        predictions = self.selector.predict(X)

        # Ensure predictions use the same index as X
        predictions = pd.Series(predictions, index=X.index)
        if scheds is not None:
            for instance_id, pre_schedule in scheds.items():
                if instance_id in predictions:
                    predictions[instance_id] = pre_schedule + predictions[instance_id]
        return predictions.to_dict()

    def save(self, path: str) -> None:
        """
        Saves the pipeline to a file.

        Args:
            path (str): The file path where the pipeline will be saved.
        """
        import joblib

        joblib.dump(self, path)

    @staticmethod
    def load(path: str) -> "SelectorPipeline":
        """
        Loads a pipeline from a file.

        Args:
            path (str): The file path from which the pipeline will be loaded.

        Returns:
            SelectorPipeline: The loaded pipeline.
        """
        import joblib

        return joblib.load(path)

    def get_config(self) -> dict:
        """
        Returns a dictionary with the configuration of the pipeline.

        Returns:
            dict: Configuration details of the pipeline.
        """

        def get_model_class_name(selector):
            if hasattr(selector, "model_class"):
                mc = selector.model_class
                # Handle functools.partial
                if hasattr(mc, "func"):
                    return mc.func.__name__
                elif hasattr(mc, "__name__"):
                    return mc.__name__
                else:
                    return str(type(mc))
            return None

        config = {
            "budget": self.budget,
            "selector": type(self.selector).__name__,
            "selector_model": get_model_class_name(self.selector),
            "pre_solving": type(self.pre_solving).__name__
            if self.pre_solving
            else None,
            "presolving_budget": getattr(self.pre_solving, "budget", None)
            if self.pre_solving
            else None,
            "preprocessor": type(self.preprocessor).__name__
            if self.preprocessor
            else None,
            "preprocessor_steps": [
                type(step[1]).__name__ for step in self.preprocessor.steps
            ]
            if hasattr(self.preprocessor, "steps")
            else None,
            "feature_selector": type(self.feature_selector).__name__
            if self.feature_selector
            else None,
            "algorithm_pre_selector": type(self.algorithm_pre_selector).__name__
            if self.algorithm_pre_selector
            else None,
        }
        return config

__init__(selector, preprocessor=None, pre_solving=None, feature_selector=None, algorithm_pre_selector=None, budget=None, maximize=False, feature_groups=None)

Initializes the SelectorPipeline.

Parameters:

Name Type Description Default
selector AbstractSelector

The main selector model to be used.

required
preprocessor Optional[Callable]

A callable for preprocessing the input data. Defaults to None.

None
pre_solving Optional[Callable]

A callable for pre-solving steps. Defaults to None.

None
feature_selector Optional[Callable]

A callable for feature selection. Defaults to None.

None
algorithm_pre_selector Optional[Callable]

A callable for algorithm pre-selection. Defaults to None.

None
budget Optional[Any]

The budget constraint for the selector. Defaults to None.

None
maximize bool

Whether to maximize the objective function. Defaults to False.

False
feature_groups Optional[Any]

Feature groups to be used by the selector. Defaults to None.

None
Source code in asf/selectors/selector_pipeline.py
def __init__(
    self,
    selector: AbstractSelector,
    preprocessor: Optional[Any] = None,
    pre_solving: AbstractPresolver = None,
    feature_selector: Optional[Callable] = None,
    algorithm_pre_selector: Optional[Callable] = None,
    budget: Optional[Any] = None,
    maximize: bool = False,
    feature_groups: Optional[Any] = None,
) -> None:
    """
    Initializes the SelectorPipeline.

    Args:
        selector (AbstractSelector): The main selector model to be used.
        preprocessor (Optional[Callable], optional): A callable for preprocessing the input data. Defaults to None.
        pre_solving (Optional[Callable], optional): A callable for pre-solving steps. Defaults to None.
        feature_selector (Optional[Callable], optional): A callable for feature selection. Defaults to None.
        algorithm_pre_selector (Optional[Callable], optional): A callable for algorithm pre-selection. Defaults to None.
        budget (Optional[Any], optional): The budget constraint for the selector. Defaults to None.
        maximize (bool, optional): Whether to maximize the objective function. Defaults to False.
        feature_groups (Optional[Any], optional): Feature groups to be used by the selector. Defaults to None.
    """
    self.selector = selector
    self.pre_solving = pre_solving
    self.feature_selector = feature_selector
    self.algorithm_pre_selector = algorithm_pre_selector
    self.budget = budget
    self.maximize = maximize

    # Always include SimpleImputer as the first step in the preprocessing pipeline
    if preprocessor is None:
        preprocessor = []
    elif not isinstance(preprocessor, list):
        preprocessor = [preprocessor]
    preprocessor = [SimpleImputer(strategy="mean")] + preprocessor
    steps = [(type(p).__name__, p) for p in preprocessor]
    self.preprocessor = Pipeline(steps)
    self.preprocessor.set_output(transform="pandas")

    self._orig_columns = None
    self._orig_index = None

fit(X, y)

Fits the pipeline to the input data.

Parameters:

Name Type Description Default
X Any

The input features.

required
y Any

The target labels.

required
Source code in asf/selectors/selector_pipeline.py
def fit(self, X: Any, y: Any) -> None:
    """
    Fits the pipeline to the input data.

    Args:
        X (Any): The input features.
        y (Any): The target labels.
    """
    if isinstance(X, pd.DataFrame):
        self._orig_columns = X.columns
        self._orig_index = X.index

    if self.preprocessor:
        X = self.preprocessor.fit_transform(X)

    if self.pre_solving:
        self.pre_solving.fit(X, y)

    if self.algorithm_pre_selector:
        y = self.algorithm_pre_selector.fit_transform(y)

    if self.feature_selector:
        X, y = self.feature_selector.fit_transform(X, y)

    self.selector.fit(X, y)

get_config()

Returns a dictionary with the configuration of the pipeline.

Returns:

Name Type Description
dict dict

Configuration details of the pipeline.

Source code in asf/selectors/selector_pipeline.py
def get_config(self) -> dict:
    """
    Returns a dictionary with the configuration of the pipeline.

    Returns:
        dict: Configuration details of the pipeline.
    """

    def get_model_class_name(selector):
        if hasattr(selector, "model_class"):
            mc = selector.model_class
            # Handle functools.partial
            if hasattr(mc, "func"):
                return mc.func.__name__
            elif hasattr(mc, "__name__"):
                return mc.__name__
            else:
                return str(type(mc))
        return None

    config = {
        "budget": self.budget,
        "selector": type(self.selector).__name__,
        "selector_model": get_model_class_name(self.selector),
        "pre_solving": type(self.pre_solving).__name__
        if self.pre_solving
        else None,
        "presolving_budget": getattr(self.pre_solving, "budget", None)
        if self.pre_solving
        else None,
        "preprocessor": type(self.preprocessor).__name__
        if self.preprocessor
        else None,
        "preprocessor_steps": [
            type(step[1]).__name__ for step in self.preprocessor.steps
        ]
        if hasattr(self.preprocessor, "steps")
        else None,
        "feature_selector": type(self.feature_selector).__name__
        if self.feature_selector
        else None,
        "algorithm_pre_selector": type(self.algorithm_pre_selector).__name__
        if self.algorithm_pre_selector
        else None,
    }
    return config

load(path) staticmethod

Loads a pipeline from a file.

Parameters:

Name Type Description Default
path str

The file path from which the pipeline will be loaded.

required

Returns:

Name Type Description
SelectorPipeline SelectorPipeline

The loaded pipeline.

Source code in asf/selectors/selector_pipeline.py
@staticmethod
def load(path: str) -> "SelectorPipeline":
    """
    Loads a pipeline from a file.

    Args:
        path (str): The file path from which the pipeline will be loaded.

    Returns:
        SelectorPipeline: The loaded pipeline.
    """
    import joblib

    return joblib.load(path)

predict(X)

Makes predictions using the fitted pipeline.

Parameters:

Name Type Description Default
X Any

The input features.

required

Returns:

Name Type Description
Any dict

The predictions made by the selector.

Source code in asf/selectors/selector_pipeline.py
def predict(self, X: Any) -> dict:
    """
    Makes predictions using the fitted pipeline.

    Args:
        X (Any): The input features.

    Returns:
        Any: The predictions made by the selector.
    """
    if self.preprocessor:
        X = self.preprocessor.transform(X)

    scheds = None
    if self.pre_solving:
        scheds = self.pre_solving.predict(X)

    if self.feature_selector:
        X = self.feature_selector.transform(X)

    predictions = self.selector.predict(X)

    # Ensure predictions use the same index as X
    predictions = pd.Series(predictions, index=X.index)
    if scheds is not None:
        for instance_id, pre_schedule in scheds.items():
            if instance_id in predictions:
                predictions[instance_id] = pre_schedule + predictions[instance_id]
    return predictions.to_dict()

save(path)

Saves the pipeline to a file.

Parameters:

Name Type Description Default
path str

The file path where the pipeline will be saved.

required
Source code in asf/selectors/selector_pipeline.py
def save(self, path: str) -> None:
    """
    Saves the pipeline to a file.

    Args:
        path (str): The file path where the pipeline will be saved.
    """
    import joblib

    joblib.dump(self, path)

SimpleRanking

Bases: AbstractModelBasedSelector

Algorithm Selection via Ranking (Oentaryo et al.) + algo features (optional). Attributes: model_class: The class of the classification model to be used. metadata: Metadata containing information about the algorithms. classifier: The trained classification model.

Source code in asf/selectors/simple_ranking.py
class SimpleRanking(AbstractModelBasedSelector):
    """
    Algorithm Selection via Ranking (Oentaryo et al.) + algo features (optional).
    Attributes:
        model_class: The class of the classification model to be used.
        metadata: Metadata containing information about the algorithms.
        classifier: The trained classification model.
    """

    def __init__(self, model_class: AbstractPredictor, **kwargs):
        """
        Initializes the MultiClassClassifier with the given parameters.

        Args:
            model_class: The class of the classification model to be used. Assumes XGBoost API.
            metadata: Metadata containing information about the algorithms.
            hierarchical_generator: Feature generator to be used.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        self.classifier = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
    ):
        """
        Fits the classification model to the given feature and performance data.

        Args:
            features: DataFrame containing the feature data.
            performance: DataFrame containing the performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(np.array(self.algorithms).reshape(-1, 1)),
                index=self.algorithms,
                columns=[f"algo_{i}" for i in range(len(self.algorithms))],
            )

        performance = performance[self.algorithms]
        features = features[self.features]
        features.index.name = "INSTANCE_ID"

        self.algorithm_features.index.name = "ALGORITHM"

        total_features = pd.merge(
            features.reset_index(), self.algorithm_features.reset_index(), how="cross"
        )

        stacked_performance = performance.stack().reset_index()
        stacked_performance.columns = [
            "INSTANCE_ID",
            "ALGORITHM",
            "PERFORMANCE",
        ]
        merged = total_features.merge(
            stacked_performance,
            right_on=["INSTANCE_ID", "ALGORITHM"],
            left_on=["INSTANCE_ID", "ALGORITHM"],
            how="left",
        )

        gdfs = []
        for group, gdf in merged.groupby("INSTANCE_ID"):
            gdf["rank"] = gdf["PERFORMANCE"].rank(
                ascending=True, method="max" if self.maximize else "min"
            )
            gdfs.append(gdf)
        merged = pd.concat(gdfs)

        total_features = merged.drop(
            columns=[
                "INSTANCE_ID",
                "ALGORITHM",
                "PERFORMANCE",
                "rank",
                self.algorithm_features.index.name,
            ]
        )
        qid = merged["INSTANCE_ID"].values
        encoder = OrdinalEncoder()
        qid = encoder.fit_transform(qid.reshape(-1, 1)).flatten()

        self.classifier = self.model_class()
        self.classifier.fit(
            total_features,
            merged["rank"],
            qid=qid,
        )

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the best algorithm for each instance in the given feature data.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            A dictionary mapping instance names to the predicted best algorithm.
        """

        features = features[self.features]

        total_features = pd.merge(
            features.reset_index(), self.algorithm_features.reset_index(), how="cross"
        )

        predictions = self.classifier.predict(
            total_features[list(self.features) + list(self.algorithm_features.columns)]
        )

        scheds = {}
        for instance_name in features.index.unique():
            ids = total_features[features.index.name] == instance_name
            chosen = predictions[ids].argmin()
            scheds[instance_name] = [
                (
                    total_features.loc[ids].iloc[chosen]["ALGORITHM"],
                    self.budget,
                )
            ]

        return scheds

__init__(model_class, **kwargs)

Initializes the MultiClassClassifier with the given parameters.

Parameters:

Name Type Description Default
model_class AbstractPredictor

The class of the classification model to be used. Assumes XGBoost API.

required
metadata

Metadata containing information about the algorithms.

required
hierarchical_generator

Feature generator to be used.

required
Source code in asf/selectors/simple_ranking.py
def __init__(self, model_class: AbstractPredictor, **kwargs):
    """
    Initializes the MultiClassClassifier with the given parameters.

    Args:
        model_class: The class of the classification model to be used. Assumes XGBoost API.
        metadata: Metadata containing information about the algorithms.
        hierarchical_generator: Feature generator to be used.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    self.classifier = None

SunnySelector

Bases: AbstractSelector

SUNNY/SUNNY-AS2 algorithm selector.

This selector uses k-nearest neighbors (k-NN) in feature space to construct a schedule. When SUNNY-A2 is enabled, k is optimized.

Source code in asf/selectors/sunny_selector.py
class SunnySelector(AbstractSelector):
    """
    SUNNY/SUNNY-AS2 algorithm selector.

    This selector uses k-nearest neighbors (k-NN) in feature space to construct a schedule. When SUNNY-A2 is enabled, k is optimized.
    """

    def __init__(
        self,
        k: int = 10,
        use_v2: bool = False,
        random_state: int = 42,
        n_folds: int = 5,
        k_candidates: list[int] = [3, 5, 7, 10, 20, 50],
        **kwargs,
    ):
        """
        Initialize the SUNNY selector.

        Args:
            k (int): Number of neighbors for k-NN.
            use_v2 (bool): Whether to tune k using cross-validation.
            budget (float): Total time budget for the schedule.
            random_state (int): Random seed.
            **kwargs: Additional arguments for the parent class.
        """
        super().__init__(**kwargs)
        self.k = k
        self.use_v2 = use_v2
        self.random_state = random_state
        self.features = None
        self.performance = None
        self.knn = None
        self.n_folds = n_folds
        self.k_candidates = k_candidates

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame) -> None:
        """
        Fit the SUNNY selector on the training data.

        Caps all performance values above the budget as unsolved (NaN).
        If use_v2 is True, tunes k using internal cross-validation.

        Args:
            features (pd.DataFrame): Training features (instances x features).
            performance (pd.DataFrame): Training performance matrix (instances x algorithms).
        """
        self.features = features.copy()
        perf = performance.copy()
        perf[perf > self.budget] = np.nan
        self.performance = perf

        # SUNNY-AS2: tune k using cross-validation if requested
        if self.use_v2:
            best_k = self.k
            best_score = float("inf")
            kf = KFold(
                n_splits=self.n_folds, shuffle=True, random_state=self.random_state
            )
            instance_indices = np.arange(len(self.features))

            for candidate_k in self.k_candidates:
                fold_scores = []
                for train_idx, val_idx in kf.split(instance_indices):
                    train_features = self.features.iloc[train_idx]
                    train_perf = self.performance.iloc[train_idx]
                    val_features = self.features.iloc[val_idx]
                    val_perf = self.performance.iloc[val_idx]

                    knn = NearestNeighbors(
                        n_neighbors=min(candidate_k, len(train_features)),
                        metric="euclidean",
                    )
                    knn.fit(train_features.values)

                    # For each validation instance, get schedule and compute achieved runtime
                    total_runtime = 0.0
                    n_instances = 0
                    for idx, instance in enumerate(val_features.index):
                        x = val_features.loc[instance].values.reshape(1, -1)
                        dists, neighbor_idxs = knn.kneighbors(
                            x, n_neighbors=min(candidate_k, len(train_features))
                        )
                        neighbor_idxs = neighbor_idxs.flatten()
                        neighbor_perf = train_perf.iloc[neighbor_idxs]
                        schedule = self._construct_sunny_schedule(neighbor_perf)

                        # Evaluate: take the first algorithm in the schedule that solves the instance, or assign budget if none solve it
                        instance_perf = val_perf.loc[instance]
                        solved = False
                        for algo, _ in schedule:
                            runtime = instance_perf[algo]
                            if not np.isnan(runtime) and runtime <= self.budget:
                                total_runtime += runtime
                                solved = True
                                break
                        if not solved:
                            total_runtime += self.budget  # Penalize unsolved
                        n_instances += 1

                    avg_runtime = (
                        total_runtime / n_instances if n_instances > 0 else float("inf")
                    )
                    fold_scores.append(avg_runtime)

                mean_score = np.mean(fold_scores)
                if mean_score < best_score:
                    best_score = mean_score
                    best_k = candidate_k

            self.k = best_k

        # Fit final model with optimal k
        self.knn = NearestNeighbors(
            n_neighbors=min(self.k, len(self.features)), metric="euclidean"
        )
        self.knn.fit(self.features.values)

    def _mine_solvers(
        self,
        neighbor_perf: pd.DataFrame,
        cutoff: int,
        already_selected: Optional[List[str]] = None,
        already_covered: Optional[set] = None,
    ) -> List[str]:
        """
        Recursive greedy set cover to identify a portfolio of solvers.
        Tie-break by minimum total runtime on solved instances.

        Args:
            neighbor_perf (pd.DataFrame): Performance matrix for the k nearest neighbors.
            cutoff (int): Maximum number of solvers to select.
            already_selected (Optional[List[str]]): Solvers already selected (for recursion).
            already_covered (Optional[set]): Instances already covered (for recursion).

        Returns:
            List[str]: List of selected solver names.
        """
        if already_selected is None:
            already_selected = []
        if already_covered is None:
            already_covered = set()

        remaining_instances = set(neighbor_perf.index) - already_covered
        if len(already_selected) >= cutoff or not remaining_instances:
            return already_selected

        # For each solver, count how many new instances it can solve
        best_solver = None
        best_cover = set()
        best_runtime = None
        for algo in self.algorithms:
            if algo in already_selected:
                continue
            # Instances this solver solves and are not yet covered
            covers = (
                set(neighbor_perf.index[neighbor_perf[algo].notna()])
                & remaining_instances
            )
            if not best_solver or len(covers) > len(best_cover):
                best_solver = algo
                best_cover = covers
                # For tie-breaking, sum runtime on these instances
                best_runtime = (
                    neighbor_perf.loc[list(covers), algo].sum() if covers else np.inf
                )
            elif len(covers) == len(best_cover):
                runtime = (
                    neighbor_perf.loc[list(covers), algo].sum() if covers else np.inf
                )
                if runtime < best_runtime:
                    best_solver = algo
                    best_cover = covers
                    best_runtime = runtime

        if not best_cover:
            return already_selected

        already_selected.append(best_solver)
        already_covered |= best_cover
        return self._mine_solvers(
            neighbor_perf, cutoff, already_selected, already_covered
        )

    def _construct_sunny_schedule(
        self, neighbor_perf: pd.DataFrame
    ) -> List[Tuple[str, float]]:
        """
        Construct a SUNNY schedule for a given neighborhood.

        Uses recursive greedy set cover to select a portfolio, allocates time slices
        proportionally to solved counts, and (if needed) adds a backup solver.

        Args:
            neighbor_perf (pd.DataFrame): Performance matrix for the k nearest neighbors.

        Returns:
            List[Tuple[str, float]]: List of (algorithm, allocated_time) tuples, sorted by average runtime.
        """
        # 1. H_sel: Select portfolio using recursive greedy set cover
        cutoff = min(self.k, len(self.algorithms))
        best_pfolio = self._mine_solvers(neighbor_perf, cutoff)

        # Count solved/unsolved instances for each selected solver
        solved_mask = neighbor_perf.notna()
        slots = {algo: solved_mask[algo].sum() for algo in best_pfolio}

        covered = set()
        for algo in best_pfolio:
            covered |= set(neighbor_perf.index[solved_mask[algo]])
        n_unsolved = len(set(neighbor_perf.index) - covered)

        # Total time slots = sum of solved counts + unsolved
        total_slots = sum(slots.values()) + n_unsolved
        if total_slots == 0:
            # fallback: equal allocation
            slots = {algo: 1 for algo in best_pfolio}
            total_slots = len(best_pfolio)

        # 2. H_all: Allocate time slices proportionally
        schedule = []
        for algo in best_pfolio:
            t = self.budget * (slots[algo] / total_slots)
            schedule.append((algo, t))

        # If there are unsolved instances, allocate remaining time to backup solver
        time_used = sum(t for _, t in schedule)
        if n_unsolved > 0:
            backup_time = self.budget - time_used
            if backup_time > 0:
                backup_algo = solved_mask.sum(axis=0).idxmax()
                schedule.append((backup_algo, backup_time))

        # 3. H_sch: Sort by average runtime (ascending) among neighbors
        avg_times = neighbor_perf[[algo for algo, _ in schedule]].mean(axis=0).to_dict()
        schedule.sort(key=lambda x: avg_times.get(x[0], float("inf")))

        return schedule

    def _predict(
        self,
        features: Optional[pd.DataFrame] = None,
    ) -> Dict[str, List[Tuple[str, float]]]:
        """
        Predict a SUNNY schedule for each instance in the provided features.

        Args:
            features (pd.DataFrame): Feature matrix for the test instances.

        Returns:
            Dict[str, List[Tuple[str, float]]]: Mapping from instance name to schedule (list of (algorithm, time) tuples).
        """
        if features is None:
            raise ValueError("Features must be provided for prediction.")

        predictions = {}
        for idx, instance in enumerate(features.index):
            x = features.loc[instance].values.reshape(1, -1)
            dists, neighbor_idxs = self.knn.kneighbors(x, n_neighbors=self.k)
            neighbor_idxs = neighbor_idxs.flatten()
            neighbor_perf = self.performance.iloc[neighbor_idxs]

            schedule = self._construct_sunny_schedule(neighbor_perf)
            predictions[instance] = schedule

        return predictions

__init__(k=10, use_v2=False, random_state=42, n_folds=5, k_candidates=[3, 5, 7, 10, 20, 50], **kwargs)

Initialize the SUNNY selector.

Parameters:

Name Type Description Default
k int

Number of neighbors for k-NN.

10
use_v2 bool

Whether to tune k using cross-validation.

False
budget float

Total time budget for the schedule.

required
random_state int

Random seed.

42
**kwargs

Additional arguments for the parent class.

{}
Source code in asf/selectors/sunny_selector.py
def __init__(
    self,
    k: int = 10,
    use_v2: bool = False,
    random_state: int = 42,
    n_folds: int = 5,
    k_candidates: list[int] = [3, 5, 7, 10, 20, 50],
    **kwargs,
):
    """
    Initialize the SUNNY selector.

    Args:
        k (int): Number of neighbors for k-NN.
        use_v2 (bool): Whether to tune k using cross-validation.
        budget (float): Total time budget for the schedule.
        random_state (int): Random seed.
        **kwargs: Additional arguments for the parent class.
    """
    super().__init__(**kwargs)
    self.k = k
    self.use_v2 = use_v2
    self.random_state = random_state
    self.features = None
    self.performance = None
    self.knn = None
    self.n_folds = n_folds
    self.k_candidates = k_candidates

tune_selector(X, y, selector_class, selector_kwargs={}, preprocessing_class=None, pre_solving_class=None, feature_selector=None, algorithm_pre_selector=None, budget=None, maximize=False, feature_groups=None, output_dir='./smac_output', smac_metric=running_time_selector_performance, smac_kwargs={}, smac_scenario_kwargs={}, runcount_limit=100, timeout=np.inf, seed=0, cv=10, groups=None)

Tunes a selector model using SMAC for hyperparameter optimization.

Parameters:

Name Type Description Default
X DataFrame

Feature matrix for training and testing.

required
y DataFrame

Target matrix for training and testing.

required
selector_class list[AbstractSelector]

List of selector classes to tune. Defaults to [PairwiseClassifier, PairwiseRegressor].

required
selector_space_kwargs dict

Additional arguments for the selector's configuration space.

required
selector_kwargs dict

Additional arguments for the selector's instantiation.

{}
preprocessing_class AbstractPreprocessor

Preprocessing class to apply before selector. Defaults to None.

None
pre_solving_class object

Pre-solving strategies to use. Defaults to None.

None
feature_selector object

Feature selector to use. Defaults to None.

None
algorithm_pre_selector object

Algorithm pre-selector to use. Defaults to None.

None
budget float

Budget for the selector. Defaults to None.

None
maximize bool

Whether to maximize the metric. Defaults to False.

False
feature_groups list

Feature groups to consider. Defaults to None.

None
output_dir str

Directory to store SMAC output. Defaults to "./smac_output".

'./smac_output'
smac_metric callable

Metric function to evaluate the selector's performance. Defaults to running_time_selector_performance.

running_time_selector_performance
smac_kwargs dict

Additional arguments for SMAC's optimization facade.

{}
smac_scenario_kwargs dict

Additional arguments for SMAC's scenario configuration.

{}
runcount_limit int

Maximum number of function evaluations. Defaults to 100.

100
timeout float

Maximum wall-clock time for optimization. Defaults to np.inf.

inf
seed int

Random seed for reproducibility. Defaults to None.

0
cv int

Number of cross-validation splits. Defaults to 10.

10
groups ndarray

Group labels for cross-validation. Defaults to None.

None

Returns:

Name Type Description
SelectorPipeline SelectorPipeline

A pipeline with the best-tuned selector and preprocessing steps.

Source code in asf/selectors/selector_tuner.py
def tune_selector(
    X: pd.DataFrame,
    y: pd.DataFrame,
    selector_class: list[AbstractSelector]
    | AbstractSelector
    | list[tuple[AbstractSelector, dict]],
    selector_kwargs: dict = {},
    preprocessing_class: list[TransformerMixin] = None,
    pre_solving_class: list[object] = None,
    feature_selector: object = None,
    algorithm_pre_selector: object = None,
    budget: float = None,
    maximize: bool = False,
    feature_groups: list = None,
    output_dir: str = "./smac_output",
    smac_metric: callable = running_time_selector_performance,
    smac_kwargs: dict = {},
    smac_scenario_kwargs: dict = {},
    runcount_limit: int = 100,
    timeout: float = np.inf,
    seed: int = 0,
    cv: int = 10,
    groups: np.ndarray = None,
) -> SelectorPipeline:
    """
    Tunes a selector model using SMAC for hyperparameter optimization.

    Parameters:
        X (pd.DataFrame): Feature matrix for training and testing.
        y (pd.DataFrame): Target matrix for training and testing.
        selector_class (list[AbstractSelector]): List of selector classes to tune. Defaults to [PairwiseClassifier, PairwiseRegressor].
        selector_space_kwargs (dict): Additional arguments for the selector's configuration space.
        selector_kwargs (dict): Additional arguments for the selector's instantiation.
        preprocessing_class (AbstractPreprocessor, optional): Preprocessing class to apply before selector. Defaults to None.
        pre_solving_class (object, optional): Pre-solving strategies to use. Defaults to None.
        feature_selector (object, optional): Feature selector to use. Defaults to None.
        algorithm_pre_selector (object, optional): Algorithm pre-selector to use. Defaults to None.
        budget (float, optional): Budget for the selector. Defaults to None.
        maximize (bool): Whether to maximize the metric. Defaults to False.
        feature_groups (list, optional): Feature groups to consider. Defaults to None.
        output_dir (str): Directory to store SMAC output. Defaults to "./smac_output".
        smac_metric (callable): Metric function to evaluate the selector's performance. Defaults to `running_time_selector_performance`.
        smac_kwargs (dict): Additional arguments for SMAC's optimization facade.
        smac_scenario_kwargs (dict): Additional arguments for SMAC's scenario configuration.
        runcount_limit (int): Maximum number of function evaluations. Defaults to 100.
        timeout (float): Maximum wall-clock time for optimization. Defaults to np.inf.
        seed (int, optional): Random seed for reproducibility. Defaults to None.
        cv (int): Number of cross-validation splits. Defaults to 10.
        groups (np.ndarray, optional): Group labels for cross-validation. Defaults to None.

    Returns:
        SelectorPipeline: A pipeline with the best-tuned selector and preprocessing steps.
    """
    assert CONFIGSPACE_AVAILABLE, (
        "SMAC is not installed. Please install it to use this function via pip install asf-lib[tune]."
    )

    if pre_solving_class is not None and len(pre_solving_class) > 0 and budget is None:
        raise ValueError(
            "If pre_solving_class is provided, you must also provide a budget."
        )

    if type(selector_class) is not list:
        selector_class = [selector_class]

    cs = ConfigurationSpace()
    cs_transform = {}

    # Add selectors to configuration space
    if type(selector_class[0]) is tuple:
        selector_param = Categorical(
            name="selector",
            items=[str(c[0].__name__) for c in selector_class],
        )
        cs_transform["selector"] = {str(c[0].__name__): c[0] for c in selector_class}
    else:
        selector_param = Categorical(
            name="selector",
            items=[str(c.__name__) for c in selector_class],
        )
        cs_transform["selector"] = {str(c.__name__): c for c in selector_class}
    cs.add(selector_param)

    for selector in selector_class:
        if type(selector) is tuple:
            selector_space_kwargs = selector[1]
            selector = selector[0]
        else:
            selector_space_kwargs = {}

        cs, cs_transform = selector.get_configuration_space(
            cs=cs,
            cs_transform=cs_transform,
            parent_param=selector_param,
            parent_value=str(selector.__name__),
            **selector_space_kwargs,
        )

    # Add pre-solving and budget to configuration space
    if pre_solving_class is not None and len(pre_solving_class) > 0:
        presolver_param = Categorical(
            name="presolver",
            items=[str(type(p).__name__) for p in pre_solving_class],
        )
        cs_transform["presolver"] = {
            str(type(p).__name__): p for p in pre_solving_class
        }
        cs.add(presolver_param)
        # Budget for presolver (fraction of total budget)
        presolver_budget_param = UniformFloatHyperparameter(
            name="presolver_budget",
            lower=0.0,
            upper=1.0,
            default_value=0.2,
        )
        cs.add(presolver_budget_param)

    # Add preprocessors to configuration spaces
    if preprocessing_class is not None and len(preprocessing_class) > 0:
        # Use a multi-categorical: for each preprocessor, a boolean flag
        for i, preproc in enumerate(preprocessing_class):
            preproc_param = Categorical(
                name=f"preprocessor_{i}",
                items=["off", "on"],
            )
            cs.add(preproc_param)
        cs_transform["preprocessors"] = preprocessing_class

    scenario = Scenario(
        configspace=cs,
        n_trials=runcount_limit,
        walltime_limit=timeout,
        deterministic=True,
        output_directory=output_dir,
        seed=seed,
        **smac_scenario_kwargs,
    )

    def target_function(config, seed):
        if groups is not None:
            kfold = GroupKFoldShuffle(n_splits=cv, shuffle=True, random_state=seed)
        else:
            kfold = KFold(n_splits=cv, shuffle=True, random_state=seed)

        scores = []
        for train_idx, test_idx in kfold.split(X, y, groups):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

            # Preprocessor selection
            preprocessors = None
            if "preprocessors" in cs_transform:
                preprocessors = []
                for i, preproc in enumerate(cs_transform["preprocessors"]):
                    if config.get(f"preprocessor_{i}", "off") == "on":
                        preprocessors.append(preproc)
                if len(preprocessors) == 0:
                    preprocessors = None

            # Presolver selection and budget
            presolver = None
            presolver_budget = None
            if "presolver" in cs_transform:
                presolver = cs_transform["presolver"][config["presolver"]]
                presolver_budget = (
                    config["presolver_budget"] * budget if budget is not None else None
                )
                if presolver is not None and presolver_budget is not None:
                    setattr(presolver, "budget", presolver_budget)

            selector = SelectorPipeline(
                selector=cs_transform["selector"][
                    config["selector"]
                ].get_from_configuration(
                    config,
                    cs_transform,
                    budget=(budget - presolver_budget)
                    if presolver_budget is not None
                    else budget,
                    maximize=maximize,
                    feature_groups=feature_groups,
                    **selector_kwargs,
                ),
                preprocessor=preprocessors,
                pre_solving=presolver,
                feature_selector=feature_selector,
                algorithm_pre_selector=algorithm_pre_selector,
                budget=budget,
                maximize=maximize,
                feature_groups=feature_groups,
            )
            selector.fit(X_train, y_train)

            y_pred = selector.predict(X_test)
            score = smac_metric(y_pred, y_test)
            scores.append(score)

        return np.mean(scores)

    smac = HyperparameterOptimizationFacade(scenario, target_function, **smac_kwargs)
    best_config = smac.optimize()

    del smac  # clean up SMAC to free memory and delete dask client

    # Final pipeline construction
    preprocessors = None
    if "preprocessors" in cs_transform:
        preprocessors = []
        for i, preproc in enumerate(cs_transform["preprocessors"]):
            if best_config.get(f"preprocessor_{i}", "off") == "on":
                preprocessors.append(preproc)
        if len(preprocessors) == 0:
            preprocessors = None

    presolver = None
    presolver_budget = None
    if "presolver" in cs_transform:
        presolver = cs_transform["presolver"][best_config["presolver"]]
        presolver_budget = (
            best_config["presolver_budget"] * budget if budget is not None else None
        )
        setattr(presolver, "budget", presolver_budget)

    return SelectorPipeline(
        selector=cs_transform["selector"][
            best_config["selector"]
        ].get_from_configuration(
            best_config,
            cs_transform,
            budget=(budget - presolver_budget)
            if presolver_budget is not None
            else budget,
            maximize=maximize,
            feature_groups=feature_groups,
            **selector_kwargs,
        ),
        preprocessor=preprocessors,
        pre_solving=presolver,
        feature_selector=feature_selector,
        algorithm_pre_selector=algorithm_pre_selector,
        budget=budget,
        maximize=maximize,
        feature_groups=feature_groups,
    )