Skip to content

Selectors

AbstractFeatureGenerator

Abstract base class for generating additional features.

Subclasses should implement the methods to define specific feature generation logic based on a set of base features.

Source code in asf/selectors/feature_generator.py
class AbstractFeatureGenerator:
    """
    Abstract base class for generating additional features.

    Subclasses should implement the methods to define specific feature
    generation logic based on a set of base features.
    """

    def __init__(self, **kwargs: Any) -> None:
        """
        Initialize the AbstractFeatureGenerator.

        Parameters
        ----------
        **kwargs : Any
            Additional keyword arguments.
        """
        pass

    def fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        algorithm_features: pd.DataFrame | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Fit the generator to the data.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        algorithm_features : pd.DataFrame or None, optional
            Additional features related to algorithms.
        **kwargs : Any
            Additional keyword arguments.
        """
        pass

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
                Generate additional features based on the provided base features.

                Parameters
                ----------
                base_features : pd.DataFrame
                    The input DataFrame containing the base features.

                Returns
        -------
                pd.DataFrame
                    A DataFrame containing the generated features.
        """
        raise NotImplementedError("Subclasses must implement generate_features.")

    @staticmethod
    def get_configuration_space(**kwargs: Any) -> Any:
        """
        Get the configuration space.
        """
        return None

__init__(**kwargs)

Initialize the AbstractFeatureGenerator.

Parameters

**kwargs : Any Additional keyword arguments.

Source code in asf/selectors/feature_generator.py
def __init__(self, **kwargs: Any) -> None:
    """
    Initialize the AbstractFeatureGenerator.

    Parameters
    ----------
    **kwargs : Any
        Additional keyword arguments.
    """
    pass

fit(features, performance, algorithm_features=None, **kwargs)

Fit the generator to the data.

Parameters

features : pd.DataFrame The input features. performance : pd.DataFrame The algorithm performance data. algorithm_features : pd.DataFrame or None, optional Additional features related to algorithms. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/feature_generator.py
def fit(
    self,
    features: pd.DataFrame,
    performance: pd.DataFrame,
    algorithm_features: pd.DataFrame | None = None,
    **kwargs: Any,
) -> None:
    """
    Fit the generator to the data.

    Parameters
    ----------
    features : pd.DataFrame
        The input features.
    performance : pd.DataFrame
        The algorithm performance data.
    algorithm_features : pd.DataFrame or None, optional
        Additional features related to algorithms.
    **kwargs : Any
        Additional keyword arguments.
    """
    pass

generate_features(base_features)

    Generate additional features based on the provided base features.

    Parameters
    ----------
    base_features : pd.DataFrame
        The input DataFrame containing the base features.

    Returns

    pd.DataFrame
        A DataFrame containing the generated features.
Source code in asf/selectors/feature_generator.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
            Generate additional features based on the provided base features.

            Parameters
            ----------
            base_features : pd.DataFrame
                The input DataFrame containing the base features.

            Returns
    -------
            pd.DataFrame
                A DataFrame containing the generated features.
    """
    raise NotImplementedError("Subclasses must implement generate_features.")

get_configuration_space(**kwargs) staticmethod

Get the configuration space.

Source code in asf/selectors/feature_generator.py
@staticmethod
def get_configuration_space(**kwargs: Any) -> Any:
    """
    Get the configuration space.
    """
    return None

AbstractModelBasedSelector

Bases: AbstractSelector

Abstract base class for selectors that utilize a machine learning model.

This class provides functionality to initialize with a model class, save the selector to a file, and load it back.

Attributes

model_class : Callable A callable that represents the model class to be used.

Source code in asf/selectors/abstract_model_based_selector.py
class AbstractModelBasedSelector(AbstractSelector):
    """
    Abstract base class for selectors that utilize a machine learning model.

    This class provides functionality to initialize with a model class,
    save the selector to a file, and load it back.

    Attributes
    ----------
    model_class : Callable
        A callable that represents the model class to be used.
    """

    def __init__(
        self,
        model_class: type[AbstractPredictor] | Callable[..., Any],
        **kwargs: Any,
    ) -> None:
        """
        Initialize the AbstractModelBasedSelector.

        Parameters
        ----------
        model_class : type[AbstractPredictor] or Callable
            The model class or a callable that returns a model instance.
            If a scikit-learn compatible class is provided, it's wrapped with SklearnWrapper.
        **kwargs : Any
            Additional keyword arguments passed to the parent class initializer.
        """
        super().__init__(**kwargs)

        if isinstance(model_class, type) and issubclass(
            model_class, (ClassifierMixin, RegressorMixin)
        ):
            self.model_class: Callable[..., Any] = partial(SklearnWrapper, model_class)
        else:
            self.model_class = model_class

    def save(self, path: str | Path) -> None:
        """
        Save the selector instance to the specified file path.

        Parameters
        ----------
        path : str or Path
            The file path to save the selector.
        """
        joblib.dump(self, path)

    @classmethod
    def load(cls, path: str | Path) -> AbstractModelBasedSelector:
        """
                Load a selector instance from the specified file path.

                Parameters
                ----------
                path : str or Path
                    The file path to load the selector from.

                Returns
        -------
                AbstractModelBasedSelector
                    The loaded selector instance.
        """
        return joblib.load(path)

__init__(model_class, **kwargs)

Initialize the AbstractModelBasedSelector.

Parameters

model_class : type[AbstractPredictor] or Callable The model class or a callable that returns a model instance. If a scikit-learn compatible class is provided, it's wrapped with SklearnWrapper. **kwargs : Any Additional keyword arguments passed to the parent class initializer.

Source code in asf/selectors/abstract_model_based_selector.py
def __init__(
    self,
    model_class: type[AbstractPredictor] | Callable[..., Any],
    **kwargs: Any,
) -> None:
    """
    Initialize the AbstractModelBasedSelector.

    Parameters
    ----------
    model_class : type[AbstractPredictor] or Callable
        The model class or a callable that returns a model instance.
        If a scikit-learn compatible class is provided, it's wrapped with SklearnWrapper.
    **kwargs : Any
        Additional keyword arguments passed to the parent class initializer.
    """
    super().__init__(**kwargs)

    if isinstance(model_class, type) and issubclass(
        model_class, (ClassifierMixin, RegressorMixin)
    ):
        self.model_class: Callable[..., Any] = partial(SklearnWrapper, model_class)
    else:
        self.model_class = model_class

load(path) classmethod

    Load a selector instance from the specified file path.

    Parameters
    ----------
    path : str or Path
        The file path to load the selector from.

    Returns

    AbstractModelBasedSelector
        The loaded selector instance.
Source code in asf/selectors/abstract_model_based_selector.py
@classmethod
def load(cls, path: str | Path) -> AbstractModelBasedSelector:
    """
            Load a selector instance from the specified file path.

            Parameters
            ----------
            path : str or Path
                The file path to load the selector from.

            Returns
    -------
            AbstractModelBasedSelector
                The loaded selector instance.
    """
    return joblib.load(path)

save(path)

Save the selector instance to the specified file path.

Parameters

path : str or Path The file path to save the selector.

Source code in asf/selectors/abstract_model_based_selector.py
def save(self, path: str | Path) -> None:
    """
    Save the selector instance to the specified file path.

    Parameters
    ----------
    path : str or Path
        The file path to save the selector.
    """
    joblib.dump(self, path)

AbstractSelector

Bases: ABC

Abstract base class for algorithm selectors.

Provides a framework for fitting, predicting, and managing hierarchical feature generators and configuration spaces.

Attributes

maximize : bool Indicates whether the objective is to maximize or minimize the performance metric. budget : float or None The budget for the selector, if applicable. feature_groups : list[str] or None Groups of features to be considered during selection. hierarchical_generator : AbstractFeatureGenerator or None A generator for hierarchical features, if applicable. algorithm_features : pd.DataFrame or None Additional features related to algorithms, if provided. prediction_mode : str Mode for predictions ('aslib', 'pandas', 'numpy'). algorithms : list[str] List of algorithm names seen during fitting. features : list[str] List of feature names seen during fitting.

Source code in asf/selectors/abstract_selector.py
class AbstractSelector(ABC):
    """
    Abstract base class for algorithm selectors.

    Provides a framework for fitting, predicting, and managing hierarchical feature
    generators and configuration spaces.

    Attributes
    ----------
    maximize : bool
        Indicates whether the objective is to maximize or minimize the performance metric.
    budget : float or None
        The budget for the selector, if applicable.
    feature_groups : list[str] or None
        Groups of features to be considered during selection.
    hierarchical_generator : AbstractFeatureGenerator or None
        A generator for hierarchical features, if applicable.
    algorithm_features : pd.DataFrame or None
        Additional features related to algorithms, if provided.
    prediction_mode : str
        Mode for predictions ('aslib', 'pandas', 'numpy').
    algorithms : list[str]
        List of algorithm names seen during fitting.
    features : list[str]
        List of feature names seen during fitting.
    """

    def __init__(
        self,
        budget: float | None = None,
        maximize: bool = False,
        feature_groups: list[str] | None = None,
        hierarchical_generator: AbstractFeatureGenerator | None = None,
        prediction_mode: str = "aslib",
        **kwargs: Any,
    ) -> None:
        """
        Initialize the AbstractSelector.

        Parameters
        ----------
        budget : float or None, default=None
            The budget for the selector, if applicable.
        maximize : bool, default=False
            Indicates whether to maximize the performance metric.
        feature_groups : list[str] or None, default=None
            Groups of features to be considered during selection.
        hierarchical_generator : AbstractFeatureGenerator or None, default=None
            A generator for hierarchical features, if applicable.
        prediction_mode : str, default="aslib"
            Mode for predictions ('aslib', 'pandas', 'numpy').
        **kwargs : Any
            Additional keyword arguments.
        """
        self.maximize = bool(maximize)
        self.budget = float(budget) if budget is not None else None
        self.feature_groups = feature_groups
        self.hierarchical_generator = hierarchical_generator
        self.algorithm_features: pd.DataFrame | None = None
        self.prediction_mode = str(prediction_mode)
        self.algorithms: list[str] = []
        self.features: list[str] = []

    def fit(
        self,
        features: pd.DataFrame | np.ndarray,
        performance: pd.DataFrame | np.ndarray,
        algorithm_features: pd.DataFrame | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Fit the selector.

        Parameters
        ----------
        features : pd.DataFrame or np.ndarray
            The input features.
        performance : pd.DataFrame or np.ndarray
            The algorithm performance data.
        algorithm_features : pd.DataFrame or None, optional
            Additional features related to algorithms.
        **kwargs : Any
            Additional keyword arguments for fitting.
        """
        if (
            isinstance(features, pd.DataFrame) and isinstance(performance, np.ndarray)
        ) or (
            isinstance(features, np.ndarray) and isinstance(performance, pd.DataFrame)
        ):
            raise ValueError(
                "Mixed input types (DataFrame and numpy array) are not allowed."
            )
        if isinstance(features, np.ndarray):
            features = pd.DataFrame(
                features,
                columns=pd.Index([f"f_{i}" for i in range(features.shape[1])]),
            )
        if isinstance(performance, np.ndarray):
            performance = pd.DataFrame(
                performance,
                columns=pd.Index([f"algo_{i}" for i in range(performance.shape[1])]),
            )

        if not isinstance(features, pd.DataFrame) or not isinstance(
            performance, pd.DataFrame
        ):
            raise ValueError(
                "Features and performance must be pandas DataFrames or numpy arrays."
            )

        if self.hierarchical_generator is not None:
            self.hierarchical_generator.fit(features, performance, algorithm_features)
            features = pd.concat(
                [features, self.hierarchical_generator.generate_features(features)],
                axis=1,
            )

        self.algorithms = [str(a) for a in performance.columns]
        self.features = [str(f) for f in features.columns]
        self.algorithm_features = algorithm_features

        self._fit(features, performance, **kwargs)

    def predict(
        self,
        features: pd.DataFrame | np.ndarray | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]] | pd.Series | np.ndarray:
        """
        Predict algorithm selections/rankings.

        Parameters
        ----------
        features : pd.DataFrame or np.ndarray or None
            The input features for prediction.
        performance : pd.DataFrame or None, default=None
            Partial performance data if available (e.g., for oracle selectors).

        Returns
        -------
        dict or pd.Series or np.ndarray
            Predicted selections in the specified prediction_mode.
        """
        if features is None:
            df_features: pd.DataFrame | None = None
        elif isinstance(features, np.ndarray):
            cols = (
                self.features
                if self.features
                else [f"f_{i}" for i in range(features.shape[1])]
            )
            df_features = pd.DataFrame(features, columns=pd.Index(cols))
        elif isinstance(features, pd.DataFrame):
            df_features = features
        else:
            raise ValueError(
                "Features must be a numpy array, pandas DataFrame, or None."
            )

        if self.hierarchical_generator is not None and df_features is not None:
            df_features = pd.concat(
                [
                    df_features,
                    self.hierarchical_generator.generate_features(df_features),
                ],
                axis=1,
            )

        # Call the internal _predict
        scheds = self._predict(df_features, performance=performance)

        if self.prediction_mode == "aslib":
            if self.feature_groups is None:
                return scheds

            fg_steps = list(self.feature_groups)
            return {
                str(instance): fg_steps + list(scheds.get(str(instance), []))
                for instance in (
                    df_features.index if df_features is not None else scheds.keys()
                )
            }
        elif self.prediction_mode == "pandas":
            if df_features is None:
                raise ValueError("Pandas mode requires features.")
            return pd.Series(
                {
                    instance: scheds.get(str(instance), [(None, 0.0)])[0][0]
                    for instance in df_features.index
                }
            )
        elif self.prediction_mode == "numpy":
            if df_features is None:
                raise ValueError("Numpy mode requires features.")
            labels = [
                scheds.get(str(instance), [(None, 0.0)])[0][0]
                for instance in df_features.index
            ]
            encoder = OneHotEncoder(sparse_output=False, categories=[self.algorithms])
            return encoder.fit_transform(np.array(labels).reshape(-1, 1))
        else:
            raise ValueError(f"Unknown prediction_mode: {self.prediction_mode}")

    def save(self, path: str) -> None:
        """
        Save the selector instance.

        Parameters
        ----------
        path : str
            File path to save to.
        """
        pass

    @classmethod
    def load(cls, path: str) -> "AbstractSelector":
        """
        Load a selector instance.

        Parameters
        ----------
        path : str
            File path to load from.

        Returns
        -------
        AbstractSelector
            The loaded selector instance.
        """
        raise NotImplementedError(f"{cls.__name__} does not support loading from file.")

    @staticmethod
    def get_configuration_space(
        cs: ConfigurationSpace | None = None, **kwargs: Any
    ) -> ConfigurationSpace:
        """
        Get the configuration space.

        Parameters
        ----------
        cs : ConfigurationSpace or None, optional
            Base configuration space.
        **kwargs : Any
            Additional options.

        Returns
        -------
        ConfigurationSpace
            The configuration space.
        """
        if not CONFIGSPACE_AVAILABLE:
            raise RuntimeError("ConfigSpace is not available.")
        raise NotImplementedError("Subclasses must implement get_configuration_space.")

    @staticmethod
    def get_from_configuration(configuration: Configuration) -> AbstractSelector:
        """
        Create an instance from a configuration.

        Parameters
        ----------
        configuration : Configuration
            The configuration object.

        Returns
        -------
        AbstractSelector
            The initialized selector.
        """
        if not CONFIGSPACE_AVAILABLE:
            raise RuntimeError("ConfigSpace is not available.")
        raise NotImplementedError("Subclasses must implement get_from_configuration.")

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Internal fit implementation.
        """
        raise NotImplementedError("Subclasses must implement _fit.")

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Internal predict implementation.
        """
        raise NotImplementedError("Subclasses must implement _predict.")

    @staticmethod
    def _add_hierarchical_generator_space(
        cs: ConfigurationSpace,
        hierarchical_generator: list[AbstractFeatureGenerator] | None = None,
        **kwargs: Any,
    ) -> ConfigurationSpace:
        """
        Add hierarchical generator options to the configuration space.
        """
        if not CONFIGSPACE_AVAILABLE:
            raise RuntimeError("ConfigSpace is not available.")
        if hierarchical_generator:
            if "hierarchical_generator" not in cs:
                cs.add(
                    Categorical(
                        name="hierarchical_generator", choices=hierarchical_generator
                    )
                )
            for g in hierarchical_generator:
                g.get_configuration_space(cs=cs, **kwargs)
        return cs

__init__(budget=None, maximize=False, feature_groups=None, hierarchical_generator=None, prediction_mode='aslib', **kwargs)

Initialize the AbstractSelector.

Parameters

budget : float or None, default=None The budget for the selector, if applicable. maximize : bool, default=False Indicates whether to maximize the performance metric. feature_groups : list[str] or None, default=None Groups of features to be considered during selection. hierarchical_generator : AbstractFeatureGenerator or None, default=None A generator for hierarchical features, if applicable. prediction_mode : str, default="aslib" Mode for predictions ('aslib', 'pandas', 'numpy'). **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/abstract_selector.py
def __init__(
    self,
    budget: float | None = None,
    maximize: bool = False,
    feature_groups: list[str] | None = None,
    hierarchical_generator: AbstractFeatureGenerator | None = None,
    prediction_mode: str = "aslib",
    **kwargs: Any,
) -> None:
    """
    Initialize the AbstractSelector.

    Parameters
    ----------
    budget : float or None, default=None
        The budget for the selector, if applicable.
    maximize : bool, default=False
        Indicates whether to maximize the performance metric.
    feature_groups : list[str] or None, default=None
        Groups of features to be considered during selection.
    hierarchical_generator : AbstractFeatureGenerator or None, default=None
        A generator for hierarchical features, if applicable.
    prediction_mode : str, default="aslib"
        Mode for predictions ('aslib', 'pandas', 'numpy').
    **kwargs : Any
        Additional keyword arguments.
    """
    self.maximize = bool(maximize)
    self.budget = float(budget) if budget is not None else None
    self.feature_groups = feature_groups
    self.hierarchical_generator = hierarchical_generator
    self.algorithm_features: pd.DataFrame | None = None
    self.prediction_mode = str(prediction_mode)
    self.algorithms: list[str] = []
    self.features: list[str] = []

fit(features, performance, algorithm_features=None, **kwargs)

Fit the selector.

Parameters

features : pd.DataFrame or np.ndarray The input features. performance : pd.DataFrame or np.ndarray The algorithm performance data. algorithm_features : pd.DataFrame or None, optional Additional features related to algorithms. **kwargs : Any Additional keyword arguments for fitting.

Source code in asf/selectors/abstract_selector.py
def fit(
    self,
    features: pd.DataFrame | np.ndarray,
    performance: pd.DataFrame | np.ndarray,
    algorithm_features: pd.DataFrame | None = None,
    **kwargs: Any,
) -> None:
    """
    Fit the selector.

    Parameters
    ----------
    features : pd.DataFrame or np.ndarray
        The input features.
    performance : pd.DataFrame or np.ndarray
        The algorithm performance data.
    algorithm_features : pd.DataFrame or None, optional
        Additional features related to algorithms.
    **kwargs : Any
        Additional keyword arguments for fitting.
    """
    if (
        isinstance(features, pd.DataFrame) and isinstance(performance, np.ndarray)
    ) or (
        isinstance(features, np.ndarray) and isinstance(performance, pd.DataFrame)
    ):
        raise ValueError(
            "Mixed input types (DataFrame and numpy array) are not allowed."
        )
    if isinstance(features, np.ndarray):
        features = pd.DataFrame(
            features,
            columns=pd.Index([f"f_{i}" for i in range(features.shape[1])]),
        )
    if isinstance(performance, np.ndarray):
        performance = pd.DataFrame(
            performance,
            columns=pd.Index([f"algo_{i}" for i in range(performance.shape[1])]),
        )

    if not isinstance(features, pd.DataFrame) or not isinstance(
        performance, pd.DataFrame
    ):
        raise ValueError(
            "Features and performance must be pandas DataFrames or numpy arrays."
        )

    if self.hierarchical_generator is not None:
        self.hierarchical_generator.fit(features, performance, algorithm_features)
        features = pd.concat(
            [features, self.hierarchical_generator.generate_features(features)],
            axis=1,
        )

    self.algorithms = [str(a) for a in performance.columns]
    self.features = [str(f) for f in features.columns]
    self.algorithm_features = algorithm_features

    self._fit(features, performance, **kwargs)

get_configuration_space(cs=None, **kwargs) staticmethod

Get the configuration space.

Parameters

cs : ConfigurationSpace or None, optional Base configuration space. **kwargs : Any Additional options.

Returns

ConfigurationSpace The configuration space.

Source code in asf/selectors/abstract_selector.py
@staticmethod
def get_configuration_space(
    cs: ConfigurationSpace | None = None, **kwargs: Any
) -> ConfigurationSpace:
    """
    Get the configuration space.

    Parameters
    ----------
    cs : ConfigurationSpace or None, optional
        Base configuration space.
    **kwargs : Any
        Additional options.

    Returns
    -------
    ConfigurationSpace
        The configuration space.
    """
    if not CONFIGSPACE_AVAILABLE:
        raise RuntimeError("ConfigSpace is not available.")
    raise NotImplementedError("Subclasses must implement get_configuration_space.")

get_from_configuration(configuration) staticmethod

Create an instance from a configuration.

Parameters

configuration : Configuration The configuration object.

Returns

AbstractSelector The initialized selector.

Source code in asf/selectors/abstract_selector.py
@staticmethod
def get_from_configuration(configuration: Configuration) -> AbstractSelector:
    """
    Create an instance from a configuration.

    Parameters
    ----------
    configuration : Configuration
        The configuration object.

    Returns
    -------
    AbstractSelector
        The initialized selector.
    """
    if not CONFIGSPACE_AVAILABLE:
        raise RuntimeError("ConfigSpace is not available.")
    raise NotImplementedError("Subclasses must implement get_from_configuration.")

load(path) classmethod

Load a selector instance.

Parameters

path : str File path to load from.

Returns

AbstractSelector The loaded selector instance.

Source code in asf/selectors/abstract_selector.py
@classmethod
def load(cls, path: str) -> "AbstractSelector":
    """
    Load a selector instance.

    Parameters
    ----------
    path : str
        File path to load from.

    Returns
    -------
    AbstractSelector
        The loaded selector instance.
    """
    raise NotImplementedError(f"{cls.__name__} does not support loading from file.")

predict(features, performance=None)

Predict algorithm selections/rankings.

Parameters

features : pd.DataFrame or np.ndarray or None The input features for prediction. performance : pd.DataFrame or None, default=None Partial performance data if available (e.g., for oracle selectors).

Returns

dict or pd.Series or np.ndarray Predicted selections in the specified prediction_mode.

Source code in asf/selectors/abstract_selector.py
def predict(
    self,
    features: pd.DataFrame | np.ndarray | None,
    performance: pd.DataFrame | None = None,
) -> dict[str, list[tuple[str, float]]] | pd.Series | np.ndarray:
    """
    Predict algorithm selections/rankings.

    Parameters
    ----------
    features : pd.DataFrame or np.ndarray or None
        The input features for prediction.
    performance : pd.DataFrame or None, default=None
        Partial performance data if available (e.g., for oracle selectors).

    Returns
    -------
    dict or pd.Series or np.ndarray
        Predicted selections in the specified prediction_mode.
    """
    if features is None:
        df_features: pd.DataFrame | None = None
    elif isinstance(features, np.ndarray):
        cols = (
            self.features
            if self.features
            else [f"f_{i}" for i in range(features.shape[1])]
        )
        df_features = pd.DataFrame(features, columns=pd.Index(cols))
    elif isinstance(features, pd.DataFrame):
        df_features = features
    else:
        raise ValueError(
            "Features must be a numpy array, pandas DataFrame, or None."
        )

    if self.hierarchical_generator is not None and df_features is not None:
        df_features = pd.concat(
            [
                df_features,
                self.hierarchical_generator.generate_features(df_features),
            ],
            axis=1,
        )

    # Call the internal _predict
    scheds = self._predict(df_features, performance=performance)

    if self.prediction_mode == "aslib":
        if self.feature_groups is None:
            return scheds

        fg_steps = list(self.feature_groups)
        return {
            str(instance): fg_steps + list(scheds.get(str(instance), []))
            for instance in (
                df_features.index if df_features is not None else scheds.keys()
            )
        }
    elif self.prediction_mode == "pandas":
        if df_features is None:
            raise ValueError("Pandas mode requires features.")
        return pd.Series(
            {
                instance: scheds.get(str(instance), [(None, 0.0)])[0][0]
                for instance in df_features.index
            }
        )
    elif self.prediction_mode == "numpy":
        if df_features is None:
            raise ValueError("Numpy mode requires features.")
        labels = [
            scheds.get(str(instance), [(None, 0.0)])[0][0]
            for instance in df_features.index
        ]
        encoder = OneHotEncoder(sparse_output=False, categories=[self.algorithms])
        return encoder.fit_transform(np.array(labels).reshape(-1, 1))
    else:
        raise ValueError(f"Unknown prediction_mode: {self.prediction_mode}")

save(path)

Save the selector instance.

Parameters

path : str File path to save to.

Source code in asf/selectors/abstract_selector.py
def save(self, path: str) -> None:
    """
    Save the selector instance.

    Parameters
    ----------
    path : str
        File path to save to.
    """
    pass

CSHCSelector

Bases: ConfigurableMixin, AbstractSelector

Confidence-Switching Hybrid Selector.

A meta-selector that uses a primary selector along with guardian models to predict the success probability of the primary's choice.

Attributes

primary_selector : AbstractSelector The primary selector model. backup_selector : AbstractSelector or None The backup selector model. n_folds : int Number of folds for cross-validation to find the optimal threshold. random_state : int Random seed for reproducibility. guardian_kwargs : dict[str, Any] Keyword arguments for the guardian models (RandomForestClassifier). threshold_grid : np.ndarray Grid of thresholds to evaluate. guardians : dict[str, RandomForestClassifier] Trained guardian models for each algorithm. threshold : float The learned optimal threshold for switching.

Source code in asf/selectors/cshc.py
class CSHCSelector(ConfigurableMixin, AbstractSelector):
    """
    Confidence-Switching Hybrid Selector.

    A meta-selector that uses a primary selector along with guardian models to
    predict the success probability of the primary's choice.

    Attributes
    ----------
    primary_selector : AbstractSelector
        The primary selector model.
    backup_selector : AbstractSelector or None
        The backup selector model.
    n_folds : int
        Number of folds for cross-validation to find the optimal threshold.
    random_state : int
        Random seed for reproducibility.
    guardian_kwargs : dict[str, Any]
        Keyword arguments for the guardian models (RandomForestClassifier).
    threshold_grid : np.ndarray
        Grid of thresholds to evaluate.
    guardians : dict[str, RandomForestClassifier]
        Trained guardian models for each algorithm.
    threshold : float
        The learned optimal threshold for switching.
    """

    PREFIX = "cshc"
    RETURN_TYPE = "single"

    def __init__(
        self,
        primary_selector: AbstractSelector | Callable[[], AbstractSelector],
        backup_selector: AbstractSelector
        | Callable[[], AbstractSelector]
        | None = None,
        n_estimators: int = 100,
        guardian_kwargs: dict[str, Any] | None = None,
        n_folds: int = 5,
        threshold_grid: np.ndarray | None = None,
        random_state: int = 42,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the CSHCSelector.

        Parameters
        ----------
        primary_selector : AbstractSelector or Callable
            The primary selector model.
        backup_selector : AbstractSelector or Callable or None, default=None
            The backup selector model.
        n_estimators : int, default=100
            Number of estimators for the guardian models.
        guardian_kwargs : dict or None, default=None
            Additional keyword arguments for guardian models.
        n_folds : int, default=5
            Number of folds for cross-validation.
        threshold_grid : np.ndarray or None, default=None
            Grid of thresholds to evaluate.
        random_state : int, default=42
            Random seed.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(**kwargs)

        if callable(primary_selector):
            self.primary_selector = primary_selector()
        else:
            self.primary_selector = primary_selector

        if callable(backup_selector):
            self.backup_selector = backup_selector()
        else:
            self.backup_selector = backup_selector

        self.n_folds = int(n_folds)
        self.random_state = int(random_state)
        self.guardian_kwargs = dict(guardian_kwargs or {})
        self.guardian_kwargs.setdefault("n_estimators", int(n_estimators))
        self.guardian_kwargs.setdefault("random_state", int(random_state))
        self.threshold_grid = (
            threshold_grid
            if threshold_grid is not None
            else np.linspace(0.01, 0.99, 99)
        )

        self.guardians: dict[str, RandomForestClassifier] = {}
        self.threshold: float = 0.5

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Train one guardian model per algorithm and find the optimal threshold.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The performance data.
        **kwargs : Any
            Additional keyword arguments.
        """
        self.algorithms = [str(a) for a in performance.columns]
        n_instances = len(features)
        kf = KFold(
            n_splits=min(self.n_folds, n_instances),
            shuffle=True,
            random_state=self.random_state,
        )

        oof_probs = []
        oof_true_success = []

        for train_idx, val_idx in kf.split(features):
            X_train, X_val = features.iloc[train_idx], features.iloc[val_idx]
            Y_train, Y_val = performance.iloc[train_idx], performance.iloc[val_idx]

            sel_copy = copy.deepcopy(self.primary_selector)
            sel_copy.fit(X_train, Y_train)
            primary_preds = sel_copy.predict(X_val)

            fold_guardians = {}
            for algo in self.algorithms:
                y_algo_train = (Y_train[algo] <= self.budget).astype(int)
                clf = RandomForestClassifier(**self.guardian_kwargs)
                clf.fit(X_train, y_algo_train)
                fold_guardians[algo] = clf

            assert isinstance(primary_preds, dict)
            for inst_name, pred_list in primary_preds.items():
                if pred_list:
                    chosen_algo = pred_list[0][0]
                    if Y_val.index.dtype != object:
                        # Convert back to original index type if necessary
                        orig_inst_name = Y_val.index.dtype.type(inst_name)
                    else:
                        orig_inst_name = inst_name

                    runtime = Y_val.at[orig_inst_name, chosen_algo]
                    inst_feature_df = X_val.loc[[orig_inst_name]]
                    guardian_for_choice = fold_guardians.get(chosen_algo)

                    if guardian_for_choice:
                        if len(guardian_for_choice.classes_) == 1:
                            class_val = guardian_for_choice.classes_[0]
                            prob = 1.0 if class_val == 1 else 0.0
                        else:
                            prob = guardian_for_choice.predict_proba(inst_feature_df)[
                                0, 1
                            ]

                        oof_probs.append(float(prob))
                        oof_true_success.append(
                            1 if pd.notna(runtime) and runtime <= self.budget else 0
                        )

        oof_probs_arr = np.array(oof_probs)
        oof_true_success_arr = np.array(oof_true_success)

        best_t, best_ratio = 0.5, float("inf")
        for t in self.threshold_grid:
            preds = (oof_probs_arr >= t).astype(int)
            fn = ((oof_true_success_arr == 1) & (preds == 0)).sum()
            tn = ((oof_true_success_arr == 0) & (preds == 0)).sum()
            ratio = fn / tn if tn > 0 else float("inf")
            if ratio < best_ratio:
                best_ratio, best_t = float(ratio), float(t)
        self.threshold = best_t

        for algo in self.algorithms:
            y_algo = (performance[algo] <= self.budget).astype(int)
            self.guardians[str(algo)] = RandomForestClassifier(
                **self.guardian_kwargs
            ).fit(features, y_algo)

        self.primary_selector.fit(features, performance)
        if self.backup_selector:
            self.backup_selector.fit(features, performance)

    def _predict(
        self, features: pd.DataFrame | None, performance: pd.DataFrame | None = None
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict using the guardian/backup logic.

        Parameters
        ----------
        features : pd.DataFrame or None
            The query instance features.
        performance : pd.DataFrame or None, default=None
            The performance data.

        Returns
        -------
        dict
            Mapping from instance names to algorithm schedules.
        """
        if features is None:
            raise ValueError("CSHCSelector requires features for prediction.")
        if not self.guardians:
            raise RuntimeError("The selector has not been fitted yet.")

        primary_preds = cast(dict[str, Any], self.primary_selector.predict(features))
        final_preds: dict[str, list[tuple[str, float]]] = {}

        for inst_name in features.index:
            preds = primary_preds.get(str(inst_name), [])
            if not isinstance(preds, list):
                preds = []

            pred_list: list[tuple[str, float]] = []
            for p in preds:
                if isinstance(p, tuple):
                    pred_list.append((str(p[0]), float(p[1])))
                else:
                    pred_list.append((str(p), float(self.budget or 0)))

            inst_feature_df = features.loc[[inst_name]]

            if not pred_list:
                if self.backup_selector:
                    backup_pred = cast(
                        dict[str, Any], self.backup_selector.predict(inst_feature_df)
                    )
                    preds = backup_pred.get(str(inst_name), [])
                    if preds:
                        final_preds[str(inst_name)] = [
                            (str(preds[0][0]), float(self.budget or 0))
                        ]
                    else:
                        final_preds[str(inst_name)] = []
                else:
                    final_preds[str(inst_name)] = []
                continue

            chosen_algo_primary = pred_list[0][0]

            guardian_for_choice = self.guardians.get(str(chosen_algo_primary))
            prob_success = 0.0
            if guardian_for_choice:
                if len(guardian_for_choice.classes_) == 1:
                    prob_success = 1.0 if guardian_for_choice.classes_[0] == 1 else 0.0
                else:
                    prob_success = float(
                        guardian_for_choice.predict_proba(inst_feature_df)[0, 1]
                    )

            if prob_success >= self.threshold:
                final_preds[str(inst_name)] = [
                    (str(chosen_algo_primary), float(self.budget or 0))
                ]
            elif self.backup_selector:
                backup_pred = cast(
                    dict[str, Any], self.backup_selector.predict(inst_feature_df)
                )
                preds = backup_pred.get(str(inst_name), [])
                if preds:
                    final_preds[str(inst_name)] = [
                        (str(preds[0][0]), float(self.budget or 0))
                    ]
                else:
                    final_preds[str(inst_name)] = []
            else:
                best_algo = chosen_algo_primary
                max_prob = -1.0
                for algo, guardian in self.guardians.items():
                    if len(guardian.classes_) == 1:
                        prob = 1.0 if guardian.classes_[0] == 1 else 0.0
                    else:
                        prob = float(guardian.predict_proba(inst_feature_df)[0, 1])
                    if prob > max_prob:
                        max_prob = prob
                        best_algo = algo
                final_preds[str(inst_name)] = [
                    (str(best_algo), float(self.budget or 0))
                ]

        return final_preds

    @staticmethod
    def _define_hyperparameters(
        candidate_selectors: list[type] | None = None, **kwargs: Any
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for CSHCSelector.

        Parameters
        ----------
        candidate_selectors : list[type] or None, default=None
            List of selector classes to choose from.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if candidate_selectors is None:
            return [], [], []

        primary_selector_param = ClassChoice(
            name="primary_selector",
            choices=candidate_selectors,
            default=candidate_selectors[0],
        )

        use_backup = Categorical(
            name="use_backup_selector",
            items=[True, False],
            default=False,
        )

        backup_selector_param = ClassChoice(
            name="backup_selector",
            choices=candidate_selectors,
            default=candidate_selectors[0],
        )

        n_estimators_param = Integer(
            name="n_estimators",
            bounds=(10, 200),
            default=100,
        )

        n_folds_param = Integer(
            name="n_folds",
            bounds=(2, 10),
            default=5,
        )

        params = [
            primary_selector_param,
            use_backup,
            backup_selector_param,
            n_estimators_param,
            n_folds_param,
        ]

        conditions = [
            EqualsCondition(backup_selector_param, use_backup, True),  # type: ignore[arg-type]
        ]

        return params, conditions, []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[CSHCSelector]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for CSHCSelector.
        """
        config = clean_config.copy()

        if not config.get("use_backup_selector", False):
            config["backup_selector"] = None

        if "use_backup_selector" in config:
            del config["use_backup_selector"]

        config.update(kwargs)
        return partial(CSHCSelector, **config)

__init__(primary_selector, backup_selector=None, n_estimators=100, guardian_kwargs=None, n_folds=5, threshold_grid=None, random_state=42, **kwargs)

Initialize the CSHCSelector.

Parameters

primary_selector : AbstractSelector or Callable The primary selector model. backup_selector : AbstractSelector or Callable or None, default=None The backup selector model. n_estimators : int, default=100 Number of estimators for the guardian models. guardian_kwargs : dict or None, default=None Additional keyword arguments for guardian models. n_folds : int, default=5 Number of folds for cross-validation. threshold_grid : np.ndarray or None, default=None Grid of thresholds to evaluate. random_state : int, default=42 Random seed. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/cshc.py
def __init__(
    self,
    primary_selector: AbstractSelector | Callable[[], AbstractSelector],
    backup_selector: AbstractSelector
    | Callable[[], AbstractSelector]
    | None = None,
    n_estimators: int = 100,
    guardian_kwargs: dict[str, Any] | None = None,
    n_folds: int = 5,
    threshold_grid: np.ndarray | None = None,
    random_state: int = 42,
    **kwargs: Any,
) -> None:
    """
    Initialize the CSHCSelector.

    Parameters
    ----------
    primary_selector : AbstractSelector or Callable
        The primary selector model.
    backup_selector : AbstractSelector or Callable or None, default=None
        The backup selector model.
    n_estimators : int, default=100
        Number of estimators for the guardian models.
    guardian_kwargs : dict or None, default=None
        Additional keyword arguments for guardian models.
    n_folds : int, default=5
        Number of folds for cross-validation.
    threshold_grid : np.ndarray or None, default=None
        Grid of thresholds to evaluate.
    random_state : int, default=42
        Random seed.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(**kwargs)

    if callable(primary_selector):
        self.primary_selector = primary_selector()
    else:
        self.primary_selector = primary_selector

    if callable(backup_selector):
        self.backup_selector = backup_selector()
    else:
        self.backup_selector = backup_selector

    self.n_folds = int(n_folds)
    self.random_state = int(random_state)
    self.guardian_kwargs = dict(guardian_kwargs or {})
    self.guardian_kwargs.setdefault("n_estimators", int(n_estimators))
    self.guardian_kwargs.setdefault("random_state", int(random_state))
    self.threshold_grid = (
        threshold_grid
        if threshold_grid is not None
        else np.linspace(0.01, 0.99, 99)
    )

    self.guardians: dict[str, RandomForestClassifier] = {}
    self.threshold: float = 0.5

CollaborativeFilteringSelector

Bases: ConfigurableMixin, AbstractModelBasedSelector

Collaborative filtering selector using SGD matrix factorization (ALORS-style).

Attributes

n_components : int Number of latent factors. n_iter : int Number of iterations for SGD. lr : float Learning rate for SGD. reg : float Regularization strength. random_state : int Random seed for initialization. U : np.ndarray or None Instance latent factors. V : np.ndarray or None Algorithm latent factors. performance_matrix : pd.DataFrame or None The performance data used for training. model : Any or None The regressor model to predict latent factors from features. mu : float or None Global mean of observed performance entries. b_U : np.ndarray or None Instance biases. b_V : np.ndarray or None Algorithm biases.

Source code in asf/selectors/collaborative_filtering_selector.py
class CollaborativeFilteringSelector(ConfigurableMixin, AbstractModelBasedSelector):
    """
    Collaborative filtering selector using SGD matrix factorization (ALORS-style).

    Attributes
    ----------
    n_components : int
        Number of latent factors.
    n_iter : int
        Number of iterations for SGD.
    lr : float
        Learning rate for SGD.
    reg : float
        Regularization strength.
    random_state : int
        Random seed for initialization.
    U : np.ndarray or None
        Instance latent factors.
    V : np.ndarray or None
        Algorithm latent factors.
    performance_matrix : pd.DataFrame or None
        The performance data used for training.
    model : Any or None
        The regressor model to predict latent factors from features.
    mu : float or None
        Global mean of observed performance entries.
    b_U : np.ndarray or None
        Instance biases.
    b_V : np.ndarray or None
        Algorithm biases.
    """

    PREFIX = "collaborative_filtering"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type | Callable[..., Any] = RidgeRegressorWrapper,
        n_components: int = 10,
        n_iter: int = 100,
        lr: float = 0.001,
        reg: float = 0.1,
        random_state: int = 42,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the CollaborativeFilteringSelector.

        Parameters
        ----------
        model_class : type or Callable, default=RidgeRegressorWrapper
            The regressor wrapper to predict latent factors from features.
        n_components : int, default=10
            Number of latent factors.
        n_iter : int, default=100
            Number of iterations for SGD.
        lr : float, default=0.001
            Learning rate for SGD.
        reg : float, default=0.1
            Regularization strength.
        random_state : int, default=42
            Random seed for initialization.
        **kwargs : Any
            Additional keyword arguments for the parent classes.
        """
        super().__init__(model_class=model_class, **kwargs)
        self.n_components = int(n_components)
        self.n_iter = int(n_iter)
        self.lr = float(lr)
        self.reg = float(reg)
        self.random_state = int(random_state)
        self.U: np.ndarray | None = None
        self.V: np.ndarray | None = None
        self.performance_matrix: pd.DataFrame | None = None
        self.model: Any | None = None

        self.mu: float | None = None
        self.b_U: np.ndarray | None = None
        self.b_V: np.ndarray | None = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Fit the collaborative filtering model.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        **kwargs : Any
            Additional keyword arguments.
        """
        self.algorithms = [str(a) for a in performance.columns]
        self.performance_matrix = performance.copy()
        rng = np.random.RandomState(self.random_state)

        n_instances, n_algorithms = performance.shape
        self.U = rng.normal(scale=0.1, size=(n_instances, self.n_components)).astype(
            float
        )
        self.V = rng.normal(scale=0.1, size=(n_algorithms, self.n_components)).astype(
            float
        )

        observed = ~performance.isna()
        rows, cols = np.where(observed.values)
        vals = performance.values

        self.mu = float(np.nanmean(vals))
        self.b_U = np.zeros(n_instances, dtype=float)
        self.b_V = np.zeros(n_algorithms, dtype=float)

        for _ in range(self.n_iter):
            for i, j in zip(rows, cols):
                r_ij = float(vals[i, j])
                pred = (
                    float(self.mu or 0)
                    + self.b_U[i]
                    + self.b_V[j]
                    + float(np.dot(self.U[i], self.V[j]))
                )

                err = r_ij - pred
                err = np.clip(err, -10.0, 10.0)

                self.U[i] += self.lr * (err * self.V[j] - self.reg * self.U[i])
                self.V[j] += self.lr * (err * self.U[i] - self.reg * self.V[j])
                self.b_U[i] += self.lr * (err - self.reg * self.b_U[i])
                self.b_V[j] += self.lr * (err - self.reg * self.b_V[j])

        self.model = self.model_class()
        if self.model is None:
            raise RuntimeError("Model could not be initialized.")

        self.model.fit(features.values, self.U)

    def _predict_cold_start(
        self, instance_features: pd.Series, instance_name: str
    ) -> tuple[str, float]:
        """
        Predict for a single instance using features.

        Parameters
        ----------
        instance_features : pd.Series
            Features of the instance.
        instance_name : str
            Name of the instance.

        Returns
        -------
        tuple
            Tuple of (best_algorithm, score).
        """
        if self.model is None or self.V is None or self.b_V is None:
            raise RuntimeError("Model has not been fitted.")

        X = instance_features[self.features].values.reshape(1, -1)
        U_new = self.model.predict(X)

        scores = float(self.mu or 0) + self.b_V + np.dot(U_new, self.V.T).flatten()
        scores = np.asarray(scores, dtype=float).flatten()

        idx = int(np.argmin(scores))
        return str(self.algorithms[idx]), float(scores[idx])

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict best algorithm for instances.

        Parameters
        ----------
        features : pd.DataFrame or None, default=None
            The input features.
        performance : pd.DataFrame or None, default=None
            The performance data.

        Returns
        -------
        dict
            Mapping from instance names to algorithm schedules.
        """
        if (
            self.U is None
            or self.V is None
            or self.performance_matrix is None
            or self.b_U is None
            or self.b_V is None
        ):
            raise ValueError("Model has not been fitted.")

        predictions: dict[str, list[tuple[str, float]]] = {}

        # Case 1: Return best algorithm for training instances
        if features is None and performance is None:
            pred_matrix = (
                float(self.mu or 0)
                + self.b_U[:, None]
                + self.b_V[None, :]
                + (self.U @ self.V.T)
            )
            for idx, instance in enumerate(self.performance_matrix.index):
                scores = np.asarray(pred_matrix[idx], dtype=float).flatten()
                best_idx = int(np.argmin(scores))
                predictions[str(instance)] = [
                    (str(self.algorithms[best_idx]), float(self.budget or 0))
                ]
            return predictions

        # Case 2: Warm-start prediction with some observed performance
        if performance is not None:
            rng = np.random.RandomState(self.random_state)
            for instance in performance.index:
                perf_row = performance.loc[instance]
                if not perf_row.isnull().all():
                    u = rng.normal(scale=0.1, size=(self.n_components,)).astype(float)
                    # SGD refinement for instance factors
                    for _ in range(20):
                        for j, _ in enumerate(self.algorithms):
                            r_ij = perf_row.iloc[j]
                            if not pd.isna(r_ij):
                                pred = (
                                    float(self.mu or 0)
                                    + self.b_V[j]
                                    + float(np.dot(u, self.V[j]))
                                )
                                err = float(r_ij) - pred
                                u += self.lr * (err * self.V[j] - self.reg * u)

                    scores = (
                        float(self.mu or 0) + self.b_V + np.dot(u, self.V.T).flatten()
                    )
                    scores = np.asarray(scores, dtype=float).flatten()
                    best_idx = int(np.argmin(scores))
                    predictions[str(instance)] = [
                        (str(self.algorithms[best_idx]), float(self.budget or 0))
                    ]
                else:
                    if features is None:
                        # Fallback to global average if nothing else available
                        avg_scores = self.performance_matrix.mean(axis=0)
                        best_idx = int(np.argmin(avg_scores.values))
                        predictions[str(instance)] = [
                            (str(self.algorithms[best_idx]), float(self.budget or 0))
                        ]
                    else:
                        best_algo, _ = self._predict_cold_start(
                            features.loc[instance], str(instance)
                        )
                        predictions[str(instance)] = [
                            (best_algo, float(self.budget or 0))
                        ]
            return predictions

        # Case 3: Cold-start prediction using only features
        if features is not None and performance is None:
            for instance in features.index:
                best_algo, _ = self._predict_cold_start(
                    features.loc[instance], str(instance)
                )
                predictions[str(instance)] = [(best_algo, float(self.budget or 0))]
            return predictions

        return predictions

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type] | None = None, **kwargs: Any
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
                Define hyperparameters for CollaborativeFilteringSelector.

                Parameters
                ----------
                model_class : list[type] or None, default=None
                    List of model classes.
                **kwargs : Any
                    Additional keyword arguments.

                Returns
        -------
                tuple
                    Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [RidgeRegressorWrapper]

        model_class_param = ClassChoice(
            name="model_class",
            choices=model_class,
            default=model_class[0],
        )

        n_components_param = Integer(
            name="n_components",
            bounds=(5, 50),
            default=10,
        )

        n_iter_param = Integer(
            name="n_iter",
            bounds=(50, 500),
            default=100,
        )

        lr_param = Float(
            name="lr",
            bounds=(1e-5, 1e-1),
            log=True,
            default=0.001,
        )

        reg_param = Float(
            name="reg",
            bounds=(1e-4, 1.0),
            log=True,
            default=0.1,
        )

        params = [
            model_class_param,
            n_components_param,
            n_iter_param,
            lr_param,
            reg_param,
        ]

        return params, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[CollaborativeFilteringSelector]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for CollaborativeFilteringSelector.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(CollaborativeFilteringSelector, **config)

__init__(model_class=RidgeRegressorWrapper, n_components=10, n_iter=100, lr=0.001, reg=0.1, random_state=42, **kwargs)

Initialize the CollaborativeFilteringSelector.

Parameters

model_class : type or Callable, default=RidgeRegressorWrapper The regressor wrapper to predict latent factors from features. n_components : int, default=10 Number of latent factors. n_iter : int, default=100 Number of iterations for SGD. lr : float, default=0.001 Learning rate for SGD. reg : float, default=0.1 Regularization strength. random_state : int, default=42 Random seed for initialization. **kwargs : Any Additional keyword arguments for the parent classes.

Source code in asf/selectors/collaborative_filtering_selector.py
def __init__(
    self,
    model_class: type | Callable[..., Any] = RidgeRegressorWrapper,
    n_components: int = 10,
    n_iter: int = 100,
    lr: float = 0.001,
    reg: float = 0.1,
    random_state: int = 42,
    **kwargs: Any,
) -> None:
    """
    Initialize the CollaborativeFilteringSelector.

    Parameters
    ----------
    model_class : type or Callable, default=RidgeRegressorWrapper
        The regressor wrapper to predict latent factors from features.
    n_components : int, default=10
        Number of latent factors.
    n_iter : int, default=100
        Number of iterations for SGD.
    lr : float, default=0.001
        Learning rate for SGD.
    reg : float, default=0.1
        Regularization strength.
    random_state : int, default=42
        Random seed for initialization.
    **kwargs : Any
        Additional keyword arguments for the parent classes.
    """
    super().__init__(model_class=model_class, **kwargs)
    self.n_components = int(n_components)
    self.n_iter = int(n_iter)
    self.lr = float(lr)
    self.reg = float(reg)
    self.random_state = int(random_state)
    self.U: np.ndarray | None = None
    self.V: np.ndarray | None = None
    self.performance_matrix: pd.DataFrame | None = None
    self.model: Any | None = None

    self.mu: float | None = None
    self.b_U: np.ndarray | None = None
    self.b_V: np.ndarray | None = None

CosineSelector

Bases: ConfigurableMixin, AbstractSelector

Algorithm selector based on the AS-LLM architecture.

Uses cosine similarity in a learned latent space to match instances to algorithms. This implementation follows the AS-LLM paper (arXiv:2311.13184) architecture: - Instance features → MLP → instance embedding - Algorithm index → Embedding → LSTM → fused with algorithm_features → algorithm embedding - Cosine similarity + MLP for final compatibility prediction

Attributes

normalize_features : bool If True, standardize instance features. embed_size : int Dimensionality of algorithm embeddings. num_hiddens : int Hidden units in LSTM. num_layers : int Number of LSTM layers. alpha : float Weight for learned LSTM features in fusion. beta : float Weight for algorithm_features in fusion. lr : float Learning rate for training. num_epochs : int Number of training epochs. batch_size : int Batch size for training. device : str Device for PyTorch ('cuda' or 'cpu').

Source code in asf/selectors/cosine_selector.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
class CosineSelector(ConfigurableMixin, AbstractSelector):
    """
    Algorithm selector based on the AS-LLM architecture.

    Uses cosine similarity in a learned latent space to match instances to algorithms.
    This implementation follows the AS-LLM paper (arXiv:2311.13184) architecture:
    - Instance features → MLP → instance embedding
    - Algorithm index → Embedding → LSTM → fused with algorithm_features → algorithm embedding
    - Cosine similarity + MLP for final compatibility prediction

    Attributes
    ----------
    normalize_features : bool
        If True, standardize instance features.
    embed_size : int
        Dimensionality of algorithm embeddings.
    num_hiddens : int
        Hidden units in LSTM.
    num_layers : int
        Number of LSTM layers.
    alpha : float
        Weight for learned LSTM features in fusion.
    beta : float
        Weight for algorithm_features in fusion.
    lr : float
        Learning rate for training.
    num_epochs : int
        Number of training epochs.
    batch_size : int
        Batch size for training.
    device : str
        Device for PyTorch ('cuda' or 'cpu').
    """

    PREFIX = "cosine"
    RETURN_TYPE = "single"

    # Type hints for attributes set in __init__
    normalize_features: bool
    embed_size: int
    num_hiddens: int
    num_layers: int
    alpha: float
    beta: float
    lr: float
    num_epochs: int
    batch_size: int
    random_state: int
    _device: torch.device
    _model: nn.Module | None
    _scaler: StandardScaler | None
    _imputer: SimpleImputer | None
    _alg_feats: pd.DataFrame | None
    algorithms: list[str]

    def __init__(
        self,
        normalize_features: bool = True,
        embed_size: int = 50,
        num_hiddens: int = 50,
        num_layers: int = 2,
        alpha: float = 0.9,
        beta: float = 0.1,
        lr: float = 0.001,
        num_epochs: int = 100,
        batch_size: int = 128,
        device: str | None = None,
        random_state: int = 42,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the CosineSelector (AS-LLM architecture).

        Parameters
        ----------
        normalize_features : bool, default=True
            If True, standardize instance features.
        embed_size : int, default=50
            Dimensionality of algorithm embeddings.
        num_hiddens : int, default=50
            Hidden units in LSTM.
        num_layers : int, default=2
            Number of LSTM layers.
        alpha : float, default=0.9
            Weight for learned LSTM features in fusion.
        beta : float, default=0.1
            Weight for algorithm_features in fusion.
        lr : float, default=0.001
            Learning rate for training.
        num_epochs : int, default=100
            Number of training epochs.
        batch_size : int, default=128
            Batch size for training.
        device : str, default=None
            Device for PyTorch ('cuda', 'cpu'). If None, auto-detect.
        random_state : int, default=42
            Random seed for reproducibility.
        **kwargs : Any
            Additional keyword arguments.
        """
        if not TORCH_AVAILABLE:
            raise ImportError(
                "CosineSelector requires PyTorch. Install with: pip install torch"
            )

        super().__init__(**kwargs)
        self.normalize_features = bool(normalize_features)
        self.embed_size = int(embed_size)
        self.num_hiddens = int(num_hiddens)
        self.num_layers = int(num_layers)
        self.alpha = float(alpha)
        self.beta = float(beta)
        self.lr = float(lr)
        self.num_epochs = int(num_epochs)
        self.batch_size = int(batch_size)
        self.random_state = int(random_state)

        if device is None:
            self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            self._device = torch.device(device)

        self._model: nn.Module | None = None
        self._scaler: StandardScaler | None = None
        self._imputer: SimpleImputer | None = None
        self._alg_feats: pd.DataFrame | None = None

    def _norm(self, s: str) -> str:
        """Minimal string normalization for matching algorithm identifiers."""
        s = str(s).lower().strip()
        return re.sub(r"[\W_]+", "", s)

    def _generate_training_data(
        self,
        features: np.ndarray,
        performance: np.ndarray,
    ) -> tuple[np.ndarray, np.ndarray]:
        """
        Generate pairwise training data.

        For each instance, create (instance, algorithm) pairs with label 1 if
        the algorithm is best for that instance, 0 otherwise.
        """
        num_instances = features.shape[0]
        num_algorithms = performance.shape[1]

        training_data = []
        training_labels = []

        for i in range(num_instances):
            best_algo = int(np.argmin(performance[i]))
            for j in range(num_algorithms):
                # One-hot encoding for algorithm
                alg_embed = [0] * num_algorithms
                alg_embed[j] = 1
                # Concatenate: instance features + one-hot + algorithm index
                sample = np.append(np.append(features[i], alg_embed), [j])
                training_data.append(sample)
                training_labels.append(1 if j == best_algo else 0)

        return np.array(training_data), np.array(training_labels)

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the AS-LLM selector.

        Parameters
        ----------
        features : pd.DataFrame
            Instance feature matrix (rows = instances).
        performance : pd.DataFrame
            Performance matrix (rows = instances, columns = algorithms).
        **kwargs : Any
            Additional keyword arguments.
        """
        torch.manual_seed(self.random_state)
        np.random.seed(self.random_state)

        # Get algorithm features
        alg_df = getattr(self, "algorithm_features", None)
        if alg_df is None or not isinstance(alg_df, pd.DataFrame):
            raise ValueError(
                "Set selector.algorithm_features (pd.DataFrame indexed by algorithm names) before fit()"
            )

        self.algorithms = [str(a) for a in performance.columns]
        alg_df.index = alg_df.index.astype(str)

        # Map algorithm features to performance columns
        norm_to_orig = {self._norm(n): n for n in alg_df.index}
        mapped = []
        missing = []
        for a in self.algorithms:
            na = self._norm(a)
            orig = norm_to_orig.get(na)
            if orig is None:
                missing.append(a)
            else:
                mapped.append(orig)
        if missing:
            avail = list(alg_df.index)[:10]
            raise ValueError(
                f"Algorithm feature rows do not match performance columns. Missing: {missing}. Available sample: {avail}"
            )

        alg_df = alg_df.loc[mapped].copy()
        alg_df.index = [str(a) for a in self.algorithms]
        alg_df = alg_df.select_dtypes(include=[np.number]).astype(float)
        self._alg_feats = alg_df

        # Preprocess features
        X = features.fillna(0.0).to_numpy(dtype=float)
        self._imputer = SimpleImputer()
        X = self._imputer.fit_transform(X)

        if self.normalize_features:
            self._scaler = StandardScaler()
            X = self._scaler.fit_transform(X)

        # Performance matrix
        Y_perf = performance.loc[features.index, self.algorithms].to_numpy(dtype=float)
        col_mean = np.nanmean(Y_perf, axis=0)
        inds = np.where(np.isnan(Y_perf))
        if inds[0].size:
            Y_perf[inds] = np.take(col_mean, inds[1])

        # Generate training data
        training_data, training_labels = self._generate_training_data(X, Y_perf)

        # Convert to tensors
        X_train = torch.tensor(training_data, dtype=torch.float32)
        y_train = torch.tensor(training_labels, dtype=torch.long)
        alg_features_tensor = torch.tensor(alg_df.values, dtype=torch.float32)

        # Create data loader
        train_set = Data.TensorDataset(X_train, y_train)
        train_loader = Data.DataLoader(
            train_set, batch_size=self.batch_size, shuffle=True
        )

        # Create model
        num_user_features = X.shape[1]
        num_algorithms = len(self.algorithms)

        self._model = _ASLLMRecommendationModel(  # type: ignore[attr-defined]
            num_algorithms=num_algorithms,
            num_user_features=num_user_features,
            algorithm_features=alg_features_tensor,
            embed_size=self.embed_size,
            num_hiddens=self.num_hiddens,
            num_layers=self.num_layers,
            alpha=self.alpha,
            beta=self.beta,
        ).to(self._device)

        # Training
        optimizer = torch.optim.Adam(self._model.parameters(), lr=self.lr)
        loss_fn = nn.CrossEntropyLoss()

        self._model.train()
        for epoch in range(self.num_epochs):
            for batch_X, batch_y in train_loader:
                batch_X = batch_X.to(self._device)
                batch_y = batch_y.to(self._device)

                optimizer.zero_grad()
                outputs = self._model(batch_X)
                loss = loss_fn(outputs, batch_y)
                loss.backward()
                optimizer.step()

        self._num_user_features = num_user_features

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each query instance.
        """
        if self._model is None:
            raise ValueError("fit() must be called before predict()")

        if features is None:
            raise ValueError("CosineSelector requires features for prediction.")

        # Preprocess features
        X = features.fillna(0.0).to_numpy(dtype=float)
        if self._imputer is not None:
            X = self._imputer.transform(X)
        if self.normalize_features and self._scaler is not None:
            X = self._scaler.transform(X)

        self._model.eval()
        num_algorithms = len(self.algorithms)

        out: dict[str, list[tuple[str, float]]] = {}

        with torch.no_grad():
            for i, inst in enumerate(features.index):
                inst_features = X[i]
                best_score = -float("inf")
                best_algo = None

                for j in range(num_algorithms):
                    # Create input: instance features + one-hot + algo index
                    alg_embed = [0] * num_algorithms
                    alg_embed[j] = 1
                    sample = np.append(np.append(inst_features, alg_embed), [j])
                    sample_tensor = (
                        torch.tensor(sample, dtype=torch.float32)
                        .unsqueeze(0)
                        .to(self._device)
                    )

                    output = self._model(sample_tensor)
                    # Get probability of being a match (class 1)
                    probs = torch.softmax(output, dim=1)
                    score = probs[0, 1].item()

                    if score > best_score:
                        best_score = score
                        best_algo = self.algorithms[j]

                out[str(inst)] = [(best_algo, float(self.budget or 0))]  # type: ignore[assignment]

        return out

    # save and load are inherited from AbstractSelector/ConfigurableMixin

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for CosineSelector.
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        normalize_features_param = Categorical(
            name="normalize_features",
            items=[True, False],
            default=True,
        )

        embed_size_param = Integer(
            name="embed_size",
            bounds=(10, 100),
            default=50,
        )

        num_hiddens_param = Integer(
            name="num_hiddens",
            bounds=(10, 100),
            default=50,
        )

        num_layers_param = Integer(
            name="num_layers",
            bounds=(1, 4),
            default=2,
        )

        alpha_param = Float(
            name="alpha",
            bounds=(0.0, 1.0),
            default=0.9,
        )

        beta_param = Float(
            name="beta",
            bounds=(0.0, 1.0),
            default=0.1,
        )

        lr_param = Float(
            name="lr",
            bounds=(1e-5, 1e-2),
            log=True,
            default=0.001,
        )

        num_epochs_param = Integer(
            name="num_epochs",
            bounds=(10, 200),
            default=100,
        )

        batch_size_param = Integer(
            name="batch_size",
            bounds=(32, 256),
            default=128,
        )

        params = [
            normalize_features_param,
            embed_size_param,
            num_hiddens_param,
            num_layers_param,
            alpha_param,
            beta_param,
            lr_param,
            num_epochs_param,
            batch_size_param,
        ]

        return params, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[CosineSelector]:
        """
        Create a partial function from a clean configuration.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(CosineSelector, **config)

__init__(normalize_features=True, embed_size=50, num_hiddens=50, num_layers=2, alpha=0.9, beta=0.1, lr=0.001, num_epochs=100, batch_size=128, device=None, random_state=42, **kwargs)

Initialize the CosineSelector (AS-LLM architecture).

Parameters

normalize_features : bool, default=True If True, standardize instance features. embed_size : int, default=50 Dimensionality of algorithm embeddings. num_hiddens : int, default=50 Hidden units in LSTM. num_layers : int, default=2 Number of LSTM layers. alpha : float, default=0.9 Weight for learned LSTM features in fusion. beta : float, default=0.1 Weight for algorithm_features in fusion. lr : float, default=0.001 Learning rate for training. num_epochs : int, default=100 Number of training epochs. batch_size : int, default=128 Batch size for training. device : str, default=None Device for PyTorch ('cuda', 'cpu'). If None, auto-detect. random_state : int, default=42 Random seed for reproducibility. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/cosine_selector.py
def __init__(
    self,
    normalize_features: bool = True,
    embed_size: int = 50,
    num_hiddens: int = 50,
    num_layers: int = 2,
    alpha: float = 0.9,
    beta: float = 0.1,
    lr: float = 0.001,
    num_epochs: int = 100,
    batch_size: int = 128,
    device: str | None = None,
    random_state: int = 42,
    **kwargs: Any,
) -> None:
    """
    Initialize the CosineSelector (AS-LLM architecture).

    Parameters
    ----------
    normalize_features : bool, default=True
        If True, standardize instance features.
    embed_size : int, default=50
        Dimensionality of algorithm embeddings.
    num_hiddens : int, default=50
        Hidden units in LSTM.
    num_layers : int, default=2
        Number of LSTM layers.
    alpha : float, default=0.9
        Weight for learned LSTM features in fusion.
    beta : float, default=0.1
        Weight for algorithm_features in fusion.
    lr : float, default=0.001
        Learning rate for training.
    num_epochs : int, default=100
        Number of training epochs.
    batch_size : int, default=128
        Batch size for training.
    device : str, default=None
        Device for PyTorch ('cuda', 'cpu'). If None, auto-detect.
    random_state : int, default=42
        Random seed for reproducibility.
    **kwargs : Any
        Additional keyword arguments.
    """
    if not TORCH_AVAILABLE:
        raise ImportError(
            "CosineSelector requires PyTorch. Install with: pip install torch"
        )

    super().__init__(**kwargs)
    self.normalize_features = bool(normalize_features)
    self.embed_size = int(embed_size)
    self.num_hiddens = int(num_hiddens)
    self.num_layers = int(num_layers)
    self.alpha = float(alpha)
    self.beta = float(beta)
    self.lr = float(lr)
    self.num_epochs = int(num_epochs)
    self.batch_size = int(batch_size)
    self.random_state = int(random_state)

    if device is None:
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        self._device = torch.device(device)

    self._model: nn.Module | None = None
    self._scaler: StandardScaler | None = None
    self._imputer: SimpleImputer | None = None
    self._alg_feats: pd.DataFrame | None = None

DummyFeatureGenerator

Bases: AbstractFeatureGenerator

Feature generator that does nothing.

Source code in asf/selectors/feature_generator.py
class DummyFeatureGenerator(AbstractFeatureGenerator):
    """
    Feature generator that does nothing.
    """

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        return pd.DataFrame(index=base_features.index)

ISA

Bases: ConfigurableMixin, AbstractSelector

ISA (Instance-Specific Aspeed) selector.

Attributes

k : int Number of neighbors for k-NN. use_k_tuning : bool Whether to tune k using cross-validation. n_folds : int Number of folds for cross-validation when tuning k. k_candidates : list[int] Candidate k values to consider when tuning. aspeed_cutoff : int Time limit for the internal aspeed solver. cores : int Number of cores for the internal aspeed solver. random_state : int Random seed for reproducibility. reduced_features : pd.DataFrame or None Training features after set reduction. reduced_performance : pd.DataFrame or None Training performance after set reduction. knn : NearestNeighbors or None k-NN model.

Source code in asf/selectors/isa.py
class ISA(ConfigurableMixin, AbstractSelector):
    """
    ISA (Instance-Specific Aspeed) selector.

    Attributes
    ----------
    k : int
        Number of neighbors for k-NN.
    use_k_tuning : bool
        Whether to tune k using cross-validation.
    n_folds : int
        Number of folds for cross-validation when tuning k.
    k_candidates : list[int]
        Candidate k values to consider when tuning.
    aspeed_cutoff : int
        Time limit for the internal aspeed solver.
    cores : int
        Number of cores for the internal aspeed solver.
    random_state : int
        Random seed for reproducibility.
    reduced_features : pd.DataFrame or None
        Training features after set reduction.
    reduced_performance : pd.DataFrame or None
        Training performance after set reduction.
    knn : NearestNeighbors or None
        k-NN model.
    """

    PREFIX = "isa"
    RETURN_TYPE = "schedule"

    def __init__(
        self,
        k: int = 10,
        use_k_tuning: bool = True,
        n_folds: int = 5,
        k_candidates: list[int] | None = None,
        aspeed_cutoff: int = 30,
        cores: int = 1,
        random_state: int = 42,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the ISA selector.

        Parameters
        ----------
        k : int, default=10
            Number of neighbors for k-NN.
        use_k_tuning : bool, default=True
            Whether to tune k using cross-validation.
        n_folds : int, default=5
            Number of folds for cross-validation when tuning k.
        k_candidates : list[int] or None, default=None
            Candidate k values to consider when tuning.
        aspeed_cutoff : int, default=30
            Time limit for the internal aspeed solver.
        cores : int, default=1
            Number of cores for the internal aspeed solver.
        random_state : int, default=42
            Random seed for reproducibility.
        **kwargs : Any
            Additional keyword arguments for the parent class.
        """
        if not CLINGO_AVAIL:
            raise ImportError("clingo is not installed. Please install it to use ISA.")
        super().__init__(**kwargs)
        self.k = int(k)
        self.use_k_tuning = bool(use_k_tuning)
        self.n_folds = int(n_folds)
        self.k_candidates = [3, 5, 10, 15, 20] if k_candidates is None else k_candidates
        self.aspeed_cutoff = int(aspeed_cutoff)
        self.cores = int(cores)
        self.random_state = int(random_state)

        self.reduced_features: pd.DataFrame | None = None
        self.reduced_performance: pd.DataFrame | None = None
        self.knn: NearestNeighbors | None = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Fit the ISA selector.

        Parameters
        ----------
        features : pd.DataFrame
            Training features (instances x features).
        performance : pd.DataFrame
            Training performance (instances x algorithms).
        """
        is_solved = performance < self.budget
        solved_by_all = is_solved.all(axis=1)
        solved_by_none = ~is_solved.any(axis=1)
        trivial_mask = solved_by_all | solved_by_none

        self.reduced_features = features[~trivial_mask].copy()
        self.reduced_performance = performance[~trivial_mask].copy()

        if self.reduced_features.empty:
            return

        if self.use_k_tuning:
            self.k = self._tune_k()

        self.knn = NearestNeighbors(
            n_neighbors=min(self.k, len(self.reduced_features)), metric="euclidean"
        )
        self.knn.fit(self.reduced_features.values)

    def _tune_k(self) -> int:
        """
        Tune the neighborhood size k via cross-validation.

        Returns
        -------
        int
            The best k value found.
        """
        if self.reduced_features is None:
            return self.k
        best_k = self.k
        best_score = float("inf")
        kf = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state)
        instance_indices = np.arange(len(self.reduced_features))

        for candidate_k in self.k_candidates:
            fold_scores = []
            for train_idx, val_idx in kf.split(instance_indices):
                assert self.reduced_features is not None
                assert self.reduced_performance is not None
                train_features = self.reduced_features.iloc[train_idx]
                train_perf = self.reduced_performance.iloc[train_idx]
                val_features = self.reduced_features.iloc[val_idx]
                val_perf = self.reduced_performance.iloc[val_idx]

                if len(train_features) < candidate_k:
                    continue

                knn = NearestNeighbors(n_neighbors=candidate_k, metric="euclidean")
                knn.fit(train_features.values)

                total_runtime = 0.0
                for _, instance_row in val_features.iterrows():
                    x = instance_row.values.reshape(1, -1)
                    _, neighbor_idxs = knn.kneighbors(x)
                    neighbor_perf = train_perf.iloc[neighbor_idxs.flatten()]

                    schedule = self._get_aspeed_schedule(neighbor_perf)

                    instance_actual_perf = val_perf.loc[instance_row.name]
                    solved = False
                    for algo, _ in schedule:
                        runtime = float(instance_actual_perf.get(algo, self.budget))
                        if self.budget is not None and runtime < self.budget:
                            total_runtime += runtime
                            solved = True
                            break
                    if not solved:
                        total_runtime += float(self.budget or 0)

                avg_runtime = total_runtime / len(val_features)
                fold_scores.append(avg_runtime)

            mean_score = np.mean(fold_scores) if fold_scores else float("inf")
            if mean_score < best_score:
                best_score = float(mean_score)
                best_k = candidate_k

        return best_k

    def _get_aspeed_schedule(
        self, performance_subset: pd.DataFrame
    ) -> list[tuple[str, float]]:
        """
        Run aspeed on performance data to get a schedule.

        Parameters
        ----------
        performance_subset : pd.DataFrame
            Performance matrix for the neighborhood.

        Returns
        -------
        list[tuple[str, float]]
            List of (algorithm, time) tuples.
        """
        aspeed_presolver = Aspeed(
            budget=float(self.budget or 0),
            aspeed_cutoff=self.aspeed_cutoff,
            cores=self.cores,
        )

        aspeed_presolver.fit(features=None, performance=performance_subset)
        schedule = cast(list[tuple[str, float]], aspeed_presolver.predict())
        schedule.sort(key=lambda x: x[1])

        total_time = sum(time for _, time in schedule)
        remaining_time = float(self.budget or 0) - total_time

        if remaining_time > 0:
            if schedule:
                max_idx = max(range(len(schedule)), key=lambda i: schedule[i][1])
                algo, time = schedule[max_idx]
                schedule[max_idx] = (str(algo), float(time + remaining_time))
            else:
                # Fallback if aspeed returns empty schedule (unlikely)
                pass

        return [(str(a), float(t)) for a, t in schedule]

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict algorithm schedules for each instance.

        Parameters
        ----------
        features : pd.DataFrame or None
            The input features.
        performance : pd.DataFrame or None, default=None
            Partial performance data.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("ISA requires features for prediction.")
        if self.knn is None:
            return {str(instance): [] for instance in features.index}

        predictions: dict[str, list[tuple[str, float]]] = {}
        for instance_name in features.index:
            x = features.loc[[instance_name]].values
            _, neighbor_idxs = self.knn.kneighbors(x)
            assert self.reduced_performance is not None
            neighbor_perf = self.reduced_performance.iloc[neighbor_idxs.flatten()]

            schedule = self._get_aspeed_schedule(neighbor_perf)
            predictions[str(instance_name)] = schedule

        return predictions

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for ISA.

        Parameters
        ----------
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        k_param = Integer(
            name="k",
            bounds=(1, 50),
            default=10,
        )

        use_k_tuning_param = Categorical(
            name="use_k_tuning",
            items=[True, False],
            default=True,
        )

        n_folds_param = Integer(
            name="n_folds",
            bounds=(2, 10),
            default=5,
        )

        k_candidates_param = Categorical(
            name="k_candidates",
            items=["small", "medium", "broad"],
            default="medium",
        )

        aspeed_cutoff_param = Integer(
            name="aspeed_cutoff",
            bounds=(1, 300),
            default=30,
        )

        cores_param = Integer(
            name="cores",
            bounds=(1, 8),
            default=1,
        )

        params = [
            k_param,
            use_k_tuning_param,
            n_folds_param,
            k_candidates_param,
            aspeed_cutoff_param,
            cores_param,
        ]

        conditions = [
            EqualsCondition(n_folds_param, use_k_tuning_param, True),
            EqualsCondition(k_candidates_param, use_k_tuning_param, True),
        ]

        return params, conditions, []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[ISA]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for ISA.
        """
        config = clean_config.copy()

        k_candidates_map = {
            "small": [3, 5, 10],
            "medium": [3, 5, 10, 15, 20],
            "broad": [3, 5, 10, 15, 20, 30, 50],
        }

        use_k = config.get("use_k_tuning", True)

        if use_k:
            k_candidates_str = config.get("k_candidates", "medium")
            config["k_candidates"] = k_candidates_map[k_candidates_str]
        else:
            config["k_candidates"] = [3, 5, 10, 15, 20]
            if "n_folds" not in config:
                config["n_folds"] = 5

        config.update(kwargs)
        return partial(ISA, **config)

__init__(k=10, use_k_tuning=True, n_folds=5, k_candidates=None, aspeed_cutoff=30, cores=1, random_state=42, **kwargs)

Initialize the ISA selector.

Parameters

k : int, default=10 Number of neighbors for k-NN. use_k_tuning : bool, default=True Whether to tune k using cross-validation. n_folds : int, default=5 Number of folds for cross-validation when tuning k. k_candidates : list[int] or None, default=None Candidate k values to consider when tuning. aspeed_cutoff : int, default=30 Time limit for the internal aspeed solver. cores : int, default=1 Number of cores for the internal aspeed solver. random_state : int, default=42 Random seed for reproducibility. **kwargs : Any Additional keyword arguments for the parent class.

Source code in asf/selectors/isa.py
def __init__(
    self,
    k: int = 10,
    use_k_tuning: bool = True,
    n_folds: int = 5,
    k_candidates: list[int] | None = None,
    aspeed_cutoff: int = 30,
    cores: int = 1,
    random_state: int = 42,
    **kwargs: Any,
) -> None:
    """
    Initialize the ISA selector.

    Parameters
    ----------
    k : int, default=10
        Number of neighbors for k-NN.
    use_k_tuning : bool, default=True
        Whether to tune k using cross-validation.
    n_folds : int, default=5
        Number of folds for cross-validation when tuning k.
    k_candidates : list[int] or None, default=None
        Candidate k values to consider when tuning.
    aspeed_cutoff : int, default=30
        Time limit for the internal aspeed solver.
    cores : int, default=1
        Number of cores for the internal aspeed solver.
    random_state : int, default=42
        Random seed for reproducibility.
    **kwargs : Any
        Additional keyword arguments for the parent class.
    """
    if not CLINGO_AVAIL:
        raise ImportError("clingo is not installed. Please install it to use ISA.")
    super().__init__(**kwargs)
    self.k = int(k)
    self.use_k_tuning = bool(use_k_tuning)
    self.n_folds = int(n_folds)
    self.k_candidates = [3, 5, 10, 15, 20] if k_candidates is None else k_candidates
    self.aspeed_cutoff = int(aspeed_cutoff)
    self.cores = int(cores)
    self.random_state = int(random_state)

    self.reduced_features: pd.DataFrame | None = None
    self.reduced_performance: pd.DataFrame | None = None
    self.knn: NearestNeighbors | None = None

ISAC

Bases: ConfigurableMixin, AbstractSelector

ISAC (Instance-Specific Algorithm Configuration) selector.

Clusters instances in feature space and assigns to each cluster the best algorithm (by mean performance).

Attributes

clusterer : type or Callable or Any The clusterer class, partial, or instance. clusterer_kwargs : dict[str, Any] Arguments for clusterer instantiation. clusterer_instance : Any or None The trained clusterer instance. cluster_to_best_algo : dict[int, str] Mapping from cluster ID to best algorithm name.

Source code in asf/selectors/isac.py
class ISAC(ConfigurableMixin, AbstractSelector):
    """
    ISAC (Instance-Specific Algorithm Configuration) selector.

    Clusters instances in feature space and assigns to each cluster the best
    algorithm (by mean performance).

    Attributes
    ----------
    clusterer : type or Callable or Any
        The clusterer class, partial, or instance.
    clusterer_kwargs : dict[str, Any]
        Arguments for clusterer instantiation.
    clusterer_instance : Any or None
        The trained clusterer instance.
    cluster_to_best_algo : dict[int, str]
        Mapping from cluster ID to best algorithm name.
    """

    PREFIX = "isac"
    RETURN_TYPE = "single"

    def __init__(
        self,
        clusterer: type | Callable[..., Any] | Any = GMeansWrapper,
        clusterer_kwargs: dict[str, Any] | None = None,
        random_state: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the ISAC selector.

        Parameters
        ----------
        clusterer : type or Callable or Any, default=GMeansWrapper
            The clusterer class, partial, or instance.
        clusterer_kwargs : dict[str, Any] or None, default=None
            Arguments for clusterer instantiation.
        random_state : int or None, default=None
            Random state for the clusterer.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(**kwargs)
        self.clusterer = clusterer
        self.clusterer_kwargs = clusterer_kwargs or {}
        self.random_state = random_state
        self.clusterer_instance: Any | None = None
        self.cluster_to_best_algo: dict[int, str] = {}

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the ISAC selector.

        Parameters
        ----------
        features : pd.DataFrame
            Feature matrix (instances x features).
        performance : pd.DataFrame
            Performance matrix (instances x algorithms).
        """
        if isinstance(self.clusterer, type) or isinstance(self.clusterer, partial):
            self.clusterer_instance = self.clusterer(
                random_state=self.random_state, **self.clusterer_kwargs
            )
        elif hasattr(self.clusterer, "fit") and hasattr(self.clusterer, "predict"):
            self.clusterer_instance = self.clusterer
        else:
            raise ValueError(
                "clusterer must be a class, partial, or an instance with fit/predict"
            )

        self.clusterer_instance.fit(features.values)  # type: ignore[attr-defined]
        cluster_labels = self.clusterer_instance.predict(features.values)  # type: ignore[attr-defined]

        n_clusters = len(np.unique(cluster_labels))
        for cluster_id in range(n_clusters):
            idxs = np.where(cluster_labels == cluster_id)[0]
            if len(idxs) == 0:
                continue
            cluster_perf = performance.iloc[idxs]
            algo_means = cluster_perf.mean(axis=0)
            best_algo = algo_means.idxmin()
            self.cluster_to_best_algo[int(cluster_id)] = str(best_algo)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame
            Feature matrix for test instances.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("ISAC require features for prediction.")
        cluster_labels = self.clusterer_instance.predict(features.values)  # type: ignore[attr-defined]
        predictions: dict[str, list[tuple[str, float]]] = {}
        for idx, instance in enumerate(features.index):
            cluster_id = int(cluster_labels[idx])
            best_algo = self.cluster_to_best_algo.get(cluster_id)
            if best_algo:
                predictions[str(instance)] = [(str(best_algo), float(self.budget or 0))]
            else:
                predictions[str(instance)] = []
        return predictions

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for ISAC.

        Parameters
        ----------
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        clusterer_param = ClassChoice(
            name="clusterer",
            choices=[
                GMeansWrapper,
                KMeansWrapper,
                AgglomerativeClusteringWrapper,
                DBSCANWrapper,
            ],
            default=GMeansWrapper,
        )

        return [clusterer_param], [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[ISAC]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for ISAC.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(ISAC, **config)

__init__(clusterer=GMeansWrapper, clusterer_kwargs=None, random_state=None, **kwargs)

Initialize the ISAC selector.

Parameters

clusterer : type or Callable or Any, default=GMeansWrapper The clusterer class, partial, or instance. clusterer_kwargs : dict[str, Any] or None, default=None Arguments for clusterer instantiation. random_state : int or None, default=None Random state for the clusterer. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/isac.py
def __init__(
    self,
    clusterer: type | Callable[..., Any] | Any = GMeansWrapper,
    clusterer_kwargs: dict[str, Any] | None = None,
    random_state: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the ISAC selector.

    Parameters
    ----------
    clusterer : type or Callable or Any, default=GMeansWrapper
        The clusterer class, partial, or instance.
    clusterer_kwargs : dict[str, Any] or None, default=None
        Arguments for clusterer instantiation.
    random_state : int or None, default=None
        Random state for the clusterer.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(**kwargs)
    self.clusterer = clusterer
    self.clusterer_kwargs = clusterer_kwargs or {}
    self.random_state = random_state
    self.clusterer_instance: Any | None = None
    self.cluster_to_best_algo: dict[int, str] = {}

JointRanking

Bases: ConfigurableMixin, AbstractSelector, AbstractFeatureGenerator

Ranking-based algorithm selector.

Combines feature generation and model-based selection to predict algorithm performance.

Reference

Ortuzk et al. (2022)

Attributes

model : RankingMLP or Callable or None The model used for ranking.

Source code in asf/selectors/joint_ranking.py
class JointRanking(ConfigurableMixin, AbstractSelector, AbstractFeatureGenerator):
    """
    Ranking-based algorithm selector.

    Combines feature generation and model-based selection to predict algorithm
    performance.

    Reference:
        Ortuzk et al. (2022)

    Attributes
    ----------
    model : RankingMLP or Callable or None
        The model used for ranking.
    """

    PREFIX = "joint_ranking"
    RETURN_TYPE = "single"

    def save(self, path: str) -> None:
        with open(path, "wb") as f:
            pickle.dump(self, f)

    @classmethod
    def load(cls, path: str) -> "AbstractSelector":
        with open(path, "rb") as f:
            return pickle.load(f)

    def __init__(
        self,
        model: RankingMLP | Callable[..., RankingMLP] | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the JointRanking selector.

        Parameters
        ----------
        model : RankingMLP or Callable or None, default=None
            The model to be used for ranking algorithms.
        **kwargs : Any
            Additional keyword arguments.
        """
        AbstractSelector.__init__(self, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.model = model

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the ranking model.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(np.array(self.algorithms).reshape(-1, 1)),
                index=list(self.algorithms),  # type: ignore[arg-type]
                columns=[f"algo_{i}" for i in range(len(self.algorithms))],  # type: ignore[arg-type]
            )

        if self.model is None:
            self.model = RankingMLP(
                input_size=len(self.features) + len(self.algorithms)
            )
        elif callable(self.model) and not isinstance(self.model, RankingMLP):
            self.model = self.model(
                input_size=len(self.features) + len(self.algorithms)
            )

        if self.model is None:
            raise RuntimeError("Model could not be initialized.")

        self.model.fit(
            X=features[self.features],
            Y=performance,
            algorithm_features=self.algorithm_features,
        )

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame
            The query instance features.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("JointRanking require features for prediction.")
        predictions = self.generate_features(features)

        results: dict[str, list[tuple[str, float]]] = {}
        for i, instance_name in enumerate(features.index):
            idx = (
                int(np.argmax(predictions.iloc[i]))
                if self.maximize
                else int(np.argmin(predictions.iloc[i]))
            )
            results[str(instance_name)] = [
                (str(self.algorithms[idx]), float(self.budget or 0))
            ]
        return results

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
        Generate predictions for each algorithm.

        Parameters
        ----------
        features : pd.DataFrame
            Input feature matrix.

        Returns
        -------
        pd.DataFrame
            DataFrame containing the predictions for each algorithm.
        """
        if self.model is None:
            raise RuntimeError("Model has not been fitted.")

        predictions = np.zeros((base_features.shape[0], len(self.algorithms)))
        selected_features = base_features[self.features]

        for i, algorithm in enumerate(self.algorithms):
            if self.algorithm_features is None:
                raise RuntimeError("Algorithm features are missing.")

            data = selected_features.assign(**self.algorithm_features.loc[algorithm])
            # Ensure column order matches training
            data = data[self.algorithm_features.columns.to_list() + self.features]
            prediction = self.model.predict(data)  # type: ignore[attr-defined]
            predictions[:, i] = prediction.flatten()

        return pd.DataFrame(predictions, columns=list(self.algorithms))  # type: ignore[arg-type]

    @staticmethod
    def _define_hyperparameters(
        model: list[type] | None = None, **kwargs: Any
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
                Define hyperparameters for JointRanking.

                Parameters
                ----------
                model : list[type] or None, default=None
                    List of model classes to choose from.
                **kwargs : Any
                    Additional keyword arguments.

                Returns
        -------
                tuple
                    Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model is None:
            model = [RankingMLP]

        model_param = ClassChoice(
            name="model",
            choices=model,
            default=model[0],
        )

        return [model_param], [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[JointRanking]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for JointRanking.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(JointRanking, **config)

__init__(model=None, **kwargs)

Initialize the JointRanking selector.

Parameters

model : RankingMLP or Callable or None, default=None The model to be used for ranking algorithms. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/joint_ranking.py
def __init__(
    self,
    model: RankingMLP | Callable[..., RankingMLP] | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the JointRanking selector.

    Parameters
    ----------
    model : RankingMLP or Callable or None, default=None
        The model to be used for ranking algorithms.
    **kwargs : Any
        Additional keyword arguments.
    """
    AbstractSelector.__init__(self, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.model = model

generate_features(base_features)

Generate predictions for each algorithm.

Parameters

features : pd.DataFrame Input feature matrix.

Returns

pd.DataFrame DataFrame containing the predictions for each algorithm.

Source code in asf/selectors/joint_ranking.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
    Generate predictions for each algorithm.

    Parameters
    ----------
    features : pd.DataFrame
        Input feature matrix.

    Returns
    -------
    pd.DataFrame
        DataFrame containing the predictions for each algorithm.
    """
    if self.model is None:
        raise RuntimeError("Model has not been fitted.")

    predictions = np.zeros((base_features.shape[0], len(self.algorithms)))
    selected_features = base_features[self.features]

    for i, algorithm in enumerate(self.algorithms):
        if self.algorithm_features is None:
            raise RuntimeError("Algorithm features are missing.")

        data = selected_features.assign(**self.algorithm_features.loc[algorithm])
        # Ensure column order matches training
        data = data[self.algorithm_features.columns.to_list() + self.features]
        prediction = self.model.predict(data)  # type: ignore[attr-defined]
        predictions[:, i] = prediction.flatten()

    return pd.DataFrame(predictions, columns=list(self.algorithms))  # type: ignore[arg-type]

MultiClassClassifier

Bases: ConfigurableMixin, AbstractModelBasedSelector

Multi-class classification algorithm selector.

Attributes

classifier : AbstractPredictor or None The trained classification model.

Source code in asf/selectors/multi_class.py
class MultiClassClassifier(ConfigurableMixin, AbstractModelBasedSelector):
    """
    Multi-class classification algorithm selector.

    Attributes
    ----------
    classifier : AbstractPredictor or None
        The trained classification model.
    """

    PREFIX = "multi_class_classifier"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type[AbstractPredictor] = RandomForestClassifierWrapper,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the MultiClassClassifier.

        Parameters
        ----------
        model_class : type[AbstractPredictor], default=RandomForestClassifierWrapper
            The class of the model to be used for classification.
        **kwargs : Any
            Additional keyword arguments.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        self.classifier: AbstractPredictor | None = None

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the classification model.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        """
        if self.algorithm_features is not None:
            raise ValueError("MultiClassClassifier does not use algorithm features.")

        self.classifier = self.model_class()
        if self.classifier is None:
            raise RuntimeError("Classifier could not be initialized.")

        # Best algorithm (lowest value) per instance
        target = np.argmin(performance.values, axis=1)
        self.classifier.fit(features, target)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.

        Returns
        -------
        dict
            Mapping from instance names to algorithm schedules.
        """
        if self.classifier is None:
            raise RuntimeError("Classifier has not been fitted.")

        if features is None:
            raise ValueError("MultiClassClassifier require features for prediction.")
        predictions = self.classifier.predict(features)

        results: dict[str, list[tuple[str, float]]] = {}
        for i, instance_name in enumerate(features.index):
            idx = int(predictions[i])
            results[str(instance_name)] = [
                (str(self.algorithms[idx]), float(self.budget or 0))
            ]
        return results

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type[AbstractPredictor]] | None = None,
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for MultiClassClassifier.

        Parameters
        ----------
        model_class : list[type[AbstractPredictor]] or None, default=None
            List of model classes to include in the configuration space.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [RandomForestClassifierWrapper, XGBoostClassifierWrapper]

        hyperparameters = [
            ClassChoice("model_class", choices=model_class, default=model_class[0]),
        ]
        return hyperparameters, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[MultiClassClassifier]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for MultiClassClassifier.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(MultiClassClassifier, **config)

__init__(model_class=RandomForestClassifierWrapper, **kwargs)

Initialize the MultiClassClassifier.

Parameters

model_class : type[AbstractPredictor], default=RandomForestClassifierWrapper The class of the model to be used for classification. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/multi_class.py
def __init__(
    self,
    model_class: type[AbstractPredictor] = RandomForestClassifierWrapper,
    **kwargs: Any,
) -> None:
    """
    Initialize the MultiClassClassifier.

    Parameters
    ----------
    model_class : type[AbstractPredictor], default=RandomForestClassifierWrapper
        The class of the model to be used for classification.
    **kwargs : Any
        Additional keyword arguments.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    self.classifier: AbstractPredictor | None = None

OSLLinearSelector

Bases: ConfigurableMixin, AbstractSelector

Selector using Optimistic Superset Loss (OSL) to predict runtimes.

Attributes

reg : float L2 regularization strength. optimizer_method : str Method for scipy.optimize.minimize. maxiter : int Maximum number of optimizer iterations. tol : float or None Tolerance for the optimizer. thetas : dict[str, np.ndarray] Learned parameters for each algorithm.

Source code in asf/selectors/osl_linear.py
class OSLLinearSelector(ConfigurableMixin, AbstractSelector):
    """
    Selector using Optimistic Superset Loss (OSL) to predict runtimes.

    Attributes
    ----------
    reg : float
        L2 regularization strength.
    optimizer_method : str
        Method for scipy.optimize.minimize.
    maxiter : int
        Maximum number of optimizer iterations.
    tol : float or None
        Tolerance for the optimizer.
    thetas : dict[str, np.ndarray]
        Learned parameters for each algorithm.
    """

    PREFIX = "osl_linear"
    RETURN_TYPE = "single"

    def __init__(
        self,
        budget: float,
        reg: float = 0.0,
        optimizer_method: str = "L-BFGS-B",
        maxiter: int = 1000,
        tol: float | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the OSLLinearSelector.

        Parameters
        ----------
        budget : float
            Global cutoff time.
        reg : float, default=0.0
            L2 regularization strength.
        optimizer_method : str, default="L-BFGS-B"
            Optimization algorithm name for scipy.optimize.minimize.
        maxiter : int, default=1000
            Maximum number of optimizer iterations.
        tol : float or None, default=None
            Tolerance for the optimizer.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(**kwargs)
        self.budget = float(budget)
        self.reg = float(reg)
        self.optimizer_method = optimizer_method
        self.maxiter = int(maxiter)
        self.tol = None if tol is None else float(tol)
        self.thetas: dict[str, np.ndarray] = {}

    def _osl_obj_grad(
        self,
        theta: np.ndarray,
        X: np.ndarray,
        y: np.ndarray,
        censored_mask: np.ndarray,
        C: float,
    ) -> tuple[float, np.ndarray]:
        """
                Compute loss and gradient for parameters theta.

                Parameters
                ----------
                theta : np.ndarray
                    Parameter vector.
                X : np.ndarray
                    Design matrix.
                y : np.ndarray
                    Observed runtimes.
                censored_mask : np.ndarray
                    Boolean mask of censored observations.
                C : float
                    Cutoff time.

                Returns
        -------
                tuple
                    Tuple of (loss, gradient).
        """
        preds = X.dot(theta)
        precise_mask = ~censored_mask & ~np.isnan(y)
        cens_pred_mask = censored_mask & (preds < C)

        loss_precise = (
            float(((y[precise_mask] - preds[precise_mask]) ** 2).sum())
            if precise_mask.any()
            else 0.0
        )
        loss_cens = (
            float(((C - preds[cens_pred_mask]) ** 2).sum())
            if cens_pred_mask.any()
            else 0.0
        )
        loss = loss_precise + loss_cens

        if self.reg:
            loss += 0.5 * self.reg * float(np.sum(theta**2))

        grad = np.zeros_like(theta)
        if precise_mask.any():
            resid = y[precise_mask] - preds[precise_mask]
            grad_prec = -2.0 * (X[precise_mask].T.dot(resid))
            grad += grad_prec
        if cens_pred_mask.any():
            diff = C - preds[cens_pred_mask]
            grad_cens = -2.0 * (X[cens_pred_mask].T.dot(diff))
            grad += grad_cens

        if self.reg:
            grad += self.reg * theta

        return float(loss), grad

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Fit linear models for each algorithm by minimizing OSL.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        **kwargs : Any
            Additional keyword arguments.
        """
        X_base = np.asarray(features, dtype=float)
        n, d = X_base.shape
        X = np.hstack([X_base, np.ones((n, 1), dtype=float)])

        self.algorithms = [str(a) for a in performance.columns]
        thetas: dict[str, np.ndarray] = {}

        censored = (performance >= self.budget) | performance.isna()

        for algo in self.algorithms:
            y_col = performance[algo].to_numpy(dtype=float)
            cens_mask = censored[algo].to_numpy(dtype=bool)

            try:
                unc_idx = (~cens_mask) & (~np.isnan(y_col))
                if unc_idx.sum() >= d + 1:
                    theta0, *_ = np.linalg.lstsq(X[unc_idx], y_col[unc_idx], rcond=None)
                    theta0 = np.asarray(theta0, dtype=float)
                else:
                    theta0 = np.zeros(d + 1, dtype=float)
            except Exception:
                theta0 = np.zeros(d + 1, dtype=float)

            def fun_and_grad(th: np.ndarray) -> tuple[float, np.ndarray]:
                val, grad = self._osl_obj_grad(
                    th, X, y_col, cens_mask, float(self.budget or 0)
                )
                return val, grad

            res = minimize(
                fun=lambda th: fun_and_grad(th)[0],
                x0=theta0,
                jac=lambda th: fun_and_grad(th)[1],
                method=self.optimizer_method,
                tol=self.tol,
                options={"maxiter": self.maxiter, "disp": False},
            )
            theta_opt = np.asarray(res.x, dtype=float)
            thetas[str(algo)] = theta_opt

        self.thetas = thetas

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict best algorithm per instance.

        Parameters
        ----------
        features : pd.DataFrame or None
            The input features.
        performance : pd.DataFrame or None, default=None
            Partial performance data.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("OSLLinearSelector requires features for prediction.")
        X_base = np.asarray(features, dtype=float)
        n = X_base.shape[0]
        X = np.hstack([X_base, np.ones((n, 1), dtype=float)])

        preds_per_algo = {}
        for algo, theta in self.thetas.items():
            preds = X.dot(theta)
            preds_per_algo[algo] = np.asarray(preds, dtype=float)

        out: dict[str, list[tuple[str, float]]] = {}
        algs = [str(a) for a in self.algorithms]
        for i, idx in enumerate(features.index):
            best_algo = None
            best_val = float("inf")
            for algo in algs:
                val = float(preds_per_algo[algo][i])
                if val < best_val:
                    best_val = val
                    best_algo = algo
            out[str(idx)] = [(str(best_algo), float(self.budget or 0))]
        return out

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for OSLLinearSelector.

        Parameters
        ----------
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        reg_param = Float(
            name="reg",
            bounds=(0.0, 10.0),
            default=0.0,
        )

        optimizer_method_param = Categorical(
            name="optimizer_method",
            items=["L-BFGS-B", "CG", "BFGS", "TNC", "SLSQP"],
            default="L-BFGS-B",
        )

        maxiter_param = Integer(
            name="maxiter",
            bounds=(100, 5000),
            default=1000,
        )

        tol_param = Float(
            name="tol",
            bounds=(1e-6, 1e-2),
            log=True,
            default=1e-5,
        )

        params = [
            reg_param,
            optimizer_method_param,
            maxiter_param,
            tol_param,
        ]

        return params, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[OSLLinearSelector]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for OSLLinearSelector.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(OSLLinearSelector, **config)

__init__(budget, reg=0.0, optimizer_method='L-BFGS-B', maxiter=1000, tol=None, **kwargs)

Initialize the OSLLinearSelector.

Parameters

budget : float Global cutoff time. reg : float, default=0.0 L2 regularization strength. optimizer_method : str, default="L-BFGS-B" Optimization algorithm name for scipy.optimize.minimize. maxiter : int, default=1000 Maximum number of optimizer iterations. tol : float or None, default=None Tolerance for the optimizer. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/osl_linear.py
def __init__(
    self,
    budget: float,
    reg: float = 0.0,
    optimizer_method: str = "L-BFGS-B",
    maxiter: int = 1000,
    tol: float | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the OSLLinearSelector.

    Parameters
    ----------
    budget : float
        Global cutoff time.
    reg : float, default=0.0
        L2 regularization strength.
    optimizer_method : str, default="L-BFGS-B"
        Optimization algorithm name for scipy.optimize.minimize.
    maxiter : int, default=1000
        Maximum number of optimizer iterations.
    tol : float or None, default=None
        Tolerance for the optimizer.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(**kwargs)
    self.budget = float(budget)
    self.reg = float(reg)
    self.optimizer_method = optimizer_method
    self.maxiter = int(maxiter)
    self.tol = None if tol is None else float(tol)
    self.thetas: dict[str, np.ndarray] = {}

PairwiseClassifier

Bases: ConfigurableMixin, AbstractModelBasedSelector, AbstractFeatureGenerator

Selector using pairwise comparison of algorithms.

Attributes

classifiers : list[AbstractPredictor] Trained classifiers for pairwise comparisons. use_weights : bool Whether to use weights based on performance differences.

Source code in asf/selectors/pairwise_classifier.py
class PairwiseClassifier(
    ConfigurableMixin, AbstractModelBasedSelector, AbstractFeatureGenerator
):
    """
    Selector using pairwise comparison of algorithms.

    Attributes
    ----------
    classifiers : list[AbstractPredictor]
        Trained classifiers for pairwise comparisons.
    use_weights : bool
        Whether to use weights based on performance differences.
    """

    PREFIX = "pairwise_classifier"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type[AbstractPredictor] = RandomForestClassifierWrapper,
        use_weights: bool = True,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the PairwiseClassifier.

        Parameters
        ----------
        model_class : type[AbstractPredictor], default=RandomForestClassifierWrapper
            The classifier model class used for pairwise comparisons.
        use_weights : bool, default=True
            Whether to use weights based on performance differences.
        **kwargs : Any
            Additional keyword arguments.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.classifiers: list[AbstractPredictor] = []
        self.use_weights = bool(use_weights)

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the pairwise classifiers.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        """
        if self.algorithm_features is not None:
            raise ValueError("PairwiseClassifier does not use algorithm features.")

        self.classifiers = []
        for i, algorithm in enumerate(self.algorithms):
            for other_algorithm in self.algorithms[i + 1 :]:
                val1 = performance[algorithm].to_numpy(dtype=float)
                val2 = performance[other_algorithm].to_numpy(dtype=float)

                if self.maximize:
                    diffs = (val1 > val2).astype(int)
                else:
                    diffs = (val1 < val2).astype(int)

                cur_model = self.model_class()
                if cur_model is None:
                    raise RuntimeError("Classifier could not be initialized.")

                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None if not self.use_weights else np.abs(val1 - val2),
                )
                self.classifiers.append(cur_model)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame
            The query instance features.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("PairwiseClassifier require features for prediction.")
        votes = self.generate_features(features)
        result: dict[str, list[tuple[str, float]]] = {}
        for instance in features.index:
            best_algo = votes.loc[instance].idxmax()
            result[str(instance)] = [(str(best_algo), float(self.budget or 0))]
        return result

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
                Generate vote counts for each algorithm.

                Parameters
                ----------
                base_features : pd.DataFrame
                    The input features.

                Returns
        -------
                pd.DataFrame
                    DataFrame of vote counts for each algorithm.
        """
        # Ensure input is a DataFrame
        if not isinstance(base_features, pd.DataFrame):
            cols = (
                self.features
                if self.features
                else [f"f_{i}" for i in range(base_features.shape[1])]
            )
            features_df = pd.DataFrame(base_features, columns=list(cols))  # type: ignore[arg-type]
        else:
            features_df = base_features

        votes = pd.DataFrame(0, index=features_df.index, columns=list(self.algorithms))  # type: ignore[arg-type]
        cnt = 0
        for i, algo1 in enumerate(self.algorithms):
            for _j, algo2 in enumerate(self.algorithms[i + 1 :]):
                pred = self.classifiers[cnt].predict(features_df)
                # 1 means algo1 is better, 0 means algo2 is better
                votes.loc[features_df.index[pred == 1], algo1] += 1
                votes.loc[features_df.index[pred == 0], algo2] += 1
                cnt += 1
        return votes

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type[AbstractPredictor]] | None = None,
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for PairwiseClassifier.

        Parameters
        ----------
        model_class : list[type[AbstractPredictor]] or None, default=None
            List of model classes.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [RandomForestClassifierWrapper, XGBoostClassifierWrapper]

        hyperparameters = [
            ClassChoice("model_class", choices=model_class),
            Categorical("use_weights", items=[True, False], default=True),
        ]
        return hyperparameters, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[PairwiseClassifier]:
        """
                Create a partial function from a clean configuration.

                Parameters
        -------
                clean_config : dict
                    The clean configuration.
                **kwargs : Any
                    Additional keyword arguments.

                Returns
                -------
                partial
                    Partial function for PairwiseClassifier.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(PairwiseClassifier, **config)

__init__(model_class=RandomForestClassifierWrapper, use_weights=True, **kwargs)

Initialize the PairwiseClassifier.

Parameters

model_class : type[AbstractPredictor], default=RandomForestClassifierWrapper The classifier model class used for pairwise comparisons. use_weights : bool, default=True Whether to use weights based on performance differences. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/pairwise_classifier.py
def __init__(
    self,
    model_class: type[AbstractPredictor] = RandomForestClassifierWrapper,
    use_weights: bool = True,
    **kwargs: Any,
) -> None:
    """
    Initialize the PairwiseClassifier.

    Parameters
    ----------
    model_class : type[AbstractPredictor], default=RandomForestClassifierWrapper
        The classifier model class used for pairwise comparisons.
    use_weights : bool, default=True
        Whether to use weights based on performance differences.
    **kwargs : Any
        Additional keyword arguments.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.classifiers: list[AbstractPredictor] = []
    self.use_weights = bool(use_weights)

generate_features(base_features)

    Generate vote counts for each algorithm.

    Parameters
    ----------
    base_features : pd.DataFrame
        The input features.

    Returns

    pd.DataFrame
        DataFrame of vote counts for each algorithm.
Source code in asf/selectors/pairwise_classifier.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
            Generate vote counts for each algorithm.

            Parameters
            ----------
            base_features : pd.DataFrame
                The input features.

            Returns
    -------
            pd.DataFrame
                DataFrame of vote counts for each algorithm.
    """
    # Ensure input is a DataFrame
    if not isinstance(base_features, pd.DataFrame):
        cols = (
            self.features
            if self.features
            else [f"f_{i}" for i in range(base_features.shape[1])]
        )
        features_df = pd.DataFrame(base_features, columns=list(cols))  # type: ignore[arg-type]
    else:
        features_df = base_features

    votes = pd.DataFrame(0, index=features_df.index, columns=list(self.algorithms))  # type: ignore[arg-type]
    cnt = 0
    for i, algo1 in enumerate(self.algorithms):
        for _j, algo2 in enumerate(self.algorithms[i + 1 :]):
            pred = self.classifiers[cnt].predict(features_df)
            # 1 means algo1 is better, 0 means algo2 is better
            votes.loc[features_df.index[pred == 1], algo1] += 1
            votes.loc[features_df.index[pred == 0], algo2] += 1
            cnt += 1
    return votes

PairwiseRegressor

Bases: ConfigurableMixin, AbstractModelBasedSelector, AbstractFeatureGenerator

Selector using pairwise regression of algorithms.

Attributes

regressors : list[AbstractPredictor] Trained regressors for pairwise comparisons.

Source code in asf/selectors/pairwise_regressor.py
class PairwiseRegressor(
    ConfigurableMixin, AbstractModelBasedSelector, AbstractFeatureGenerator
):
    """
    Selector using pairwise regression of algorithms.

    Attributes
    ----------
    regressors : list[AbstractPredictor]
        Trained regressors for pairwise comparisons.
    """

    PREFIX = "pairwise_regressor"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type[AbstractPredictor] = RandomForestRegressorWrapper,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the PairwiseRegressor.

        Parameters
        ----------
        model_class : type[AbstractPredictor], default=RandomForestRegressorWrapper
            The regression model class used for pairwise comparisons.
        **kwargs : Any
            Additional keyword arguments.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.regressors: list[AbstractPredictor] = []

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the pairwise regressors.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        """
        if self.algorithm_features is not None:
            raise ValueError("PairwiseRegressor does not use algorithm features.")

        self.regressors = []
        for i, algorithm in enumerate(self.algorithms):
            for other_algorithm in self.algorithms[i + 1 :]:
                val1 = performance[algorithm].to_numpy(dtype=float)
                val2 = performance[other_algorithm].to_numpy(dtype=float)

                diffs = val1 - val2
                cur_model = self.model_class()
                if cur_model is None:
                    raise RuntimeError("Regressor could not be initialized.")

                cur_model.fit(features, diffs)
                self.regressors.append(cur_model)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
                Predict the best algorithm for each instance.

                Parameters
                ----------
                features : pd.DataFrame
                    The query instance features.

                Returns
        -------
                dict
                    Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("PairwiseRegressor require features for prediction.")
        scores = self.generate_features(features)
        result: dict[str, list[tuple[str, float]]] = {}
        for instance in features.index:
            # If maximizing, we want the highest combined score (algo1 - algo2 > 0)
            # If minimizing, we want the lowest combined score
            if self.maximize:
                best_algo = scores.loc[instance].idxmax()
            else:
                best_algo = scores.loc[instance].idxmin()
            result[str(instance)] = [(str(best_algo), float(self.budget or 0))]
        return result

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
                Generate pairwise comparisons for each algorithm.

                Parameters
                ----------
                features : pd.DataFrame
                    The input features.

                Returns
        -------
                pd.DataFrame
                    DataFrame of aggregated regression values for each algorithm.
        """
        # Ensure input is a DataFrame
        if not isinstance(base_features, pd.DataFrame):
            cols = (
                self.features
                if self.features
                else [f"f_{i}" for i in range(base_features.shape[1])]
            )
            features_df = pd.DataFrame(base_features, columns=list(cols))  # type: ignore[arg-type]
        else:
            features_df = base_features

        scores = pd.DataFrame(
            0.0,
            index=features_df.index,
            columns=pd.Index(list(self.algorithms)),
        )
        cnt = 0
        for i, algo1 in enumerate(self.algorithms):
            for _j, algo2 in enumerate(self.algorithms[i + 1 :]):
                pred = self.regressors[cnt].predict(features_df)
                # pred is algo1_perf - algo2_perf
                scores.loc[features_df.index, algo1] += pred
                scores.loc[features_df.index, algo2] -= pred
                cnt += 1
        return scores

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type[AbstractPredictor]] | None = None,
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for PairwiseRegressor.

        Parameters
        ----------
        model_class : list[type[AbstractPredictor]] or None, default=None
            List of model classes.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [RandomForestRegressorWrapper, XGBoostRegressorWrapper]

        hyperparameters = [
            ClassChoice("model_class", choices=model_class),
        ]
        return hyperparameters, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[PairwiseRegressor]:
        """
                Create a partial function from a clean configuration.

                Parameters
        -------
                clean_config : dict
                    The clean configuration.
                **kwargs : Any
                    Additional keyword arguments.

                Returns
                -------
                partial
                    Partial function for PairwiseRegressor.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(PairwiseRegressor, **config)

__init__(model_class=RandomForestRegressorWrapper, **kwargs)

Initialize the PairwiseRegressor.

Parameters

model_class : type[AbstractPredictor], default=RandomForestRegressorWrapper The regression model class used for pairwise comparisons. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/pairwise_regressor.py
def __init__(
    self,
    model_class: type[AbstractPredictor] = RandomForestRegressorWrapper,
    **kwargs: Any,
) -> None:
    """
    Initialize the PairwiseRegressor.

    Parameters
    ----------
    model_class : type[AbstractPredictor], default=RandomForestRegressorWrapper
        The regression model class used for pairwise comparisons.
    **kwargs : Any
        Additional keyword arguments.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.regressors: list[AbstractPredictor] = []

generate_features(base_features)

    Generate pairwise comparisons for each algorithm.

    Parameters
    ----------
    features : pd.DataFrame
        The input features.

    Returns

    pd.DataFrame
        DataFrame of aggregated regression values for each algorithm.
Source code in asf/selectors/pairwise_regressor.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
            Generate pairwise comparisons for each algorithm.

            Parameters
            ----------
            features : pd.DataFrame
                The input features.

            Returns
    -------
            pd.DataFrame
                DataFrame of aggregated regression values for each algorithm.
    """
    # Ensure input is a DataFrame
    if not isinstance(base_features, pd.DataFrame):
        cols = (
            self.features
            if self.features
            else [f"f_{i}" for i in range(base_features.shape[1])]
        )
        features_df = pd.DataFrame(base_features, columns=list(cols))  # type: ignore[arg-type]
    else:
        features_df = base_features

    scores = pd.DataFrame(
        0.0,
        index=features_df.index,
        columns=pd.Index(list(self.algorithms)),
    )
    cnt = 0
    for i, algo1 in enumerate(self.algorithms):
        for _j, algo2 in enumerate(self.algorithms[i + 1 :]):
            pred = self.regressors[cnt].predict(features_df)
            # pred is algo1_perf - algo2_perf
            scores.loc[features_df.index, algo1] += pred
            scores.loc[features_df.index, algo2] -= pred
            cnt += 1
    return scores

PerformanceModel

Bases: ConfigurableMixin, AbstractModelBasedSelector, AbstractFeatureGenerator

PerformanceModel predicts algorithm performance based on instance features.

It can handle both single-target (one model per algorithm) and multi-target regression models.

Attributes

model_class : type The class of the regression model to be used. use_multi_target : bool Whether to use multi-target regression. normalize : AbstractNormalization Method to normalize the performance data. regressors : list or object Trained regression models.

Source code in asf/selectors/performance_model.py
class PerformanceModel(
    ConfigurableMixin, AbstractModelBasedSelector, AbstractFeatureGenerator
):
    """
    PerformanceModel predicts algorithm performance based on instance features.

    It can handle both single-target (one model per algorithm) and multi-target
    regression models.

    Attributes
    ----------
    model_class : type
        The class of the regression model to be used.
    use_multi_target : bool
        Whether to use multi-target regression.
    normalize : AbstractNormalization
        Method to normalize the performance data.
    regressors : list or object
        Trained regression models.
    """

    PREFIX = "performance_model"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type[AbstractPredictor] = RandomForestRegressorWrapper,
        use_multi_target: bool = False,
        normalize: AbstractNormalization | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the PerformanceModel.

        Parameters
        ----------
        model_class : type[AbstractPredictor], default=RandomForestRegressorWrapper
            The class of the regression model to be used.
        use_multi_target : bool, default=False
            Indicates whether to use multi-target regression.
        normalize : AbstractNormalization or None, default=None
            Method to normalize performance data. If None, defaults to LogNormalization().
        **kwargs : Any
            Additional arguments for the parent classes.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        AbstractFeatureGenerator.__init__(self)
        self.regressors: list[AbstractPredictor] | AbstractPredictor | None = None
        self.use_multi_target = bool(use_multi_target)
        self.normalize = normalize if normalize is not None else LogNormalization()

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the regression models.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The performance data.
        """
        if self.normalize is not None:
            performance = self.normalize.fit_transform(performance)

        regressor_init_args: dict[str, Any] = {}
        # Safely check for input_size if it's a type (standard wrapper classes usually have it)
        try:
            sig = inspect.signature(self.model_class)
            if "input_size" in sig.parameters:
                regressor_init_args["input_size"] = features.shape[1]
        except (ValueError, TypeError):
            pass

        if self.use_multi_target:
            if self.algorithm_features is not None:
                raise ValueError(
                    "PerformanceModel does not use algorithm features for multi-target regression."
                )
            self.regressors = self.model_class(**regressor_init_args)
            self.regressors.fit(features, performance)
        else:
            if self.algorithm_features is None:
                self.regressors = []
                for i, _ in enumerate(self.algorithms):
                    algo_times = performance.iloc[:, i]
                    cur_model = self.model_class(**regressor_init_args)
                    cur_model.fit(features, algo_times)
                    self.regressors.append(cur_model)
            else:
                train_data_list = []
                for i, algorithm in enumerate(self.algorithms):
                    # Align algorithm features with instance features
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[[algorithm]]
                        .reindex([algorithm] * len(features))
                        .set_index(features.index),
                        left_index=True,
                        right_index=True,
                    )
                    data = pd.merge(
                        data,
                        performance.iloc[:, [i]],
                        left_index=True,
                        right_index=True,
                    )
                    train_data_list.append(data)
                train_data = pd.concat(train_data_list)
                self.regressors = self.model_class(**regressor_init_args)
                self.regressors.fit(train_data.iloc[:, :-1], train_data.iloc[:, -1])

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("PerformanceModel require features for prediction.")
        predictions = self.generate_features(features)

        results: dict[str, list[tuple[str, float]]] = {}
        for i, instance_name in enumerate(features.index):
            idx = (
                int(np.argmax(predictions.iloc[i]))
                if self.maximize
                else int(np.argmin(predictions.iloc[i]))
            )
            results[str(instance_name)] = [
                (str(self.algorithms[idx]), float(self.budget or 0))
            ]
        return results

    def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
        """
        Generate predictions for each algorithm.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.

        Returns
        -------
        np.ndarray
            Predicted performance for each algorithm (n_instances x n_algorithms).
        """
        if self.regressors is None:
            raise RuntimeError("Model has not been fitted.")

        predictions = np.zeros((base_features.shape[0], len(self.algorithms)))

        if self.use_multi_target:
            if not isinstance(self.regressors, AbstractPredictor):
                raise RuntimeError("Multi-target regressor missing.")
            predictions = self.regressors.predict(base_features)
            if isinstance(predictions, pd.DataFrame):
                predictions = predictions.values
        else:
            if self.algorithm_features is None:
                if not isinstance(self.regressors, list):
                    raise RuntimeError("Individual regressors missing.")
                for i, _ in enumerate(self.algorithms):
                    predictions[:, i] = np.asarray(
                        self.regressors[i].predict(base_features)
                    ).flatten()
            else:
                if not isinstance(self.regressors, AbstractPredictor):
                    raise RuntimeError("Joint regressor missing.")
                for i, algorithm in enumerate(self.algorithms):
                    data = pd.merge(
                        base_features,
                        self.algorithm_features.loc[[algorithm]]
                        .reindex([algorithm] * len(base_features))
                        .set_index(base_features.index),
                        left_index=True,
                        right_index=True,
                    )
                    predictions[:, i] = self.regressors.predict(data)

        return pd.DataFrame(
            predictions,
            index=base_features.index,
            columns=pd.Index(list(self.algorithms)),
        )

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type[AbstractPredictor]] | None = None,
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for PerformanceModel.

        Parameters
        ----------
        model_class : list[type[AbstractPredictor]] or None, default=None
            List of model classes to include in the configuration space.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [RandomForestRegressorWrapper, XGBoostRegressorWrapper]

        hyperparameters = [
            ClassChoice("model_class", choices=model_class, default=model_class[0]),
        ]
        return hyperparameters, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[PerformanceModel]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for PerformanceModel.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(PerformanceModel, **config)

__init__(model_class=RandomForestRegressorWrapper, use_multi_target=False, normalize=None, **kwargs)

Initialize the PerformanceModel.

Parameters

model_class : type[AbstractPredictor], default=RandomForestRegressorWrapper The class of the regression model to be used. use_multi_target : bool, default=False Indicates whether to use multi-target regression. normalize : AbstractNormalization or None, default=None Method to normalize performance data. If None, defaults to LogNormalization(). **kwargs : Any Additional arguments for the parent classes.

Source code in asf/selectors/performance_model.py
def __init__(
    self,
    model_class: type[AbstractPredictor] = RandomForestRegressorWrapper,
    use_multi_target: bool = False,
    normalize: AbstractNormalization | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the PerformanceModel.

    Parameters
    ----------
    model_class : type[AbstractPredictor], default=RandomForestRegressorWrapper
        The class of the regression model to be used.
    use_multi_target : bool, default=False
        Indicates whether to use multi-target regression.
    normalize : AbstractNormalization or None, default=None
        Method to normalize performance data. If None, defaults to LogNormalization().
    **kwargs : Any
        Additional arguments for the parent classes.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    AbstractFeatureGenerator.__init__(self)
    self.regressors: list[AbstractPredictor] | AbstractPredictor | None = None
    self.use_multi_target = bool(use_multi_target)
    self.normalize = normalize if normalize is not None else LogNormalization()

generate_features(base_features)

Generate predictions for each algorithm.

Parameters

features : pd.DataFrame The input features.

Returns

np.ndarray Predicted performance for each algorithm (n_instances x n_algorithms).

Source code in asf/selectors/performance_model.py
def generate_features(self, base_features: pd.DataFrame) -> pd.DataFrame:
    """
    Generate predictions for each algorithm.

    Parameters
    ----------
    features : pd.DataFrame
        The input features.

    Returns
    -------
    np.ndarray
        Predicted performance for each algorithm (n_instances x n_algorithms).
    """
    if self.regressors is None:
        raise RuntimeError("Model has not been fitted.")

    predictions = np.zeros((base_features.shape[0], len(self.algorithms)))

    if self.use_multi_target:
        if not isinstance(self.regressors, AbstractPredictor):
            raise RuntimeError("Multi-target regressor missing.")
        predictions = self.regressors.predict(base_features)
        if isinstance(predictions, pd.DataFrame):
            predictions = predictions.values
    else:
        if self.algorithm_features is None:
            if not isinstance(self.regressors, list):
                raise RuntimeError("Individual regressors missing.")
            for i, _ in enumerate(self.algorithms):
                predictions[:, i] = np.asarray(
                    self.regressors[i].predict(base_features)
                ).flatten()
        else:
            if not isinstance(self.regressors, AbstractPredictor):
                raise RuntimeError("Joint regressor missing.")
            for i, algorithm in enumerate(self.algorithms):
                data = pd.merge(
                    base_features,
                    self.algorithm_features.loc[[algorithm]]
                    .reindex([algorithm] * len(base_features))
                    .set_index(base_features.index),
                    left_index=True,
                    right_index=True,
                )
                predictions[:, i] = self.regressors.predict(data)

    return pd.DataFrame(
        predictions,
        index=base_features.index,
        columns=pd.Index(list(self.algorithms)),
    )

SATzilla

Bases: ConfigurableMixin, AbstractEPMBasedSelector, AbstractModelBasedSelector

SATzilla-like selector using iterative imputation for censored runtimes.

Uses per-algorithm ridge models on expanded features.

Attributes

epms : dict[str, dict[str, EPM]] Mapping from algorithm name to another mapping of label to EPM. label_classifier : AbstractPredictor or None Model trained to predict instance labels (e.g., SAT/UNSAT). labels : list[str] Unique labels used for conditioning EPMs.

Source code in asf/selectors/satzilla.py
class SATzilla(ConfigurableMixin, AbstractEPMBasedSelector, AbstractModelBasedSelector):
    """
    SATzilla-like selector using iterative imputation for censored runtimes.

    Uses per-algorithm ridge models on expanded features.

    Attributes
    ----------
    epms : dict[str, dict[str, EPM]]
        Mapping from algorithm name to another mapping of label to EPM.
    label_classifier : AbstractPredictor or None
        Model trained to predict instance labels (e.g., SAT/UNSAT).
    labels : list[str]
        Unique labels used for conditioning EPMs.
    """

    PREFIX = "satzilla"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type[Any] = RandomForestClassifierWrapper,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the SATzilla selector.

        Parameters
        ----------
        model_class : type, default=RandomForestClassifierWrapper
            The class of the model used for label classification.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(model_class=model_class, **kwargs)
        self.epms: dict[str, dict[str, EPM]] = {}
        self.label_classifier: Any = None
        self.labels: list[str] = []

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        labels: pd.DataFrame | pd.Series | list[str] | np.ndarray | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Fit per-algorithm models.

        Parameters
        ----------
        features : pd.DataFrame
            Training features (instances x features).
        performance : pd.DataFrame
            Training performance matrix (instances x algorithms).
        labels : pd.DataFrame, pd.Series, list, or np.ndarray, optional
            Optional labels for training conditioned EPMs.
        """
        if labels is None:
            labels_series = pd.Series(["default"] * len(features), index=features.index)
            self.label_classifier = None
            self.labels = ["default"]
        else:
            if isinstance(labels, pd.DataFrame):
                labels_series = labels.squeeze(axis=1)
            elif isinstance(labels, pd.Series):
                labels_series = labels
            else:
                labels_series = pd.Series(labels, index=features.index)

            if not labels_series.index.equals(features.index):
                labels_series = labels_series.reindex(features.index)

            self.label_classifier = self.model_class()
            self.label_classifier.fit(features.values, labels_series.values)

            # Extract unique labels
            if hasattr(self.label_classifier, "model_class") and hasattr(
                self.label_classifier.model_class, "classes_"
            ):
                self.labels = [
                    str(c) for c in self.label_classifier.model_class.classes_
                ]
            else:
                self.labels = [str(c) for c in np.unique(labels_series.values)]

        for algo in self.algorithms:
            self.epms[str(algo)] = {}
            for label in self.labels:
                idx = labels_series.astype(str) == str(label)
                if idx.sum() == 0:
                    continue
                self.epms[str(algo)][str(label)] = EPM(**self.epm_kwargs)
                X_sub = features.loc[idx]
                y_sub = performance.loc[idx, algo]
                self.epms[str(algo)][str(label)].fit(X_sub, y_sub)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame or None
            The input features.
        performance : pd.DataFrame or None, default=None
            Partial performance data.

        Returns
        -------
        dict
            Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("SATzilla requires features for prediction.")
        n_instances = features.shape[0]
        n_algorithms = len(self.algorithms)
        preds = np.zeros((n_instances, n_algorithms), dtype=float)

        if self.label_classifier is None:
            label_probs = np.ones((n_instances, 1), dtype=float)
        elif hasattr(self.label_classifier, "model_class") and hasattr(
            self.label_classifier.model_class, "predict_proba"
        ):
            label_probs = self.label_classifier.model_class.predict_proba(
                features.values
            )
        else:
            hard_preds = np.asarray(self.label_classifier.predict(features.values))
            classes = np.asarray(self.labels)
            label_probs = (hard_preds[:, None] == classes[None, :]).astype(float)

        for j, algo in enumerate(self.algorithms):
            for k, label in enumerate(self.labels):
                if str(algo) not in self.epms or str(label) not in self.epms[str(algo)]:
                    continue
                pred_time = np.asarray(
                    self.epms[str(algo)][str(label)].predict(features)
                )
                preds[:, j] += label_probs[:, k] * pred_time

        best_idx = np.argmin(preds, axis=1)
        results: dict[str, list[tuple[str, float]]] = {}
        for i, inst in enumerate(features.index):
            j = int(best_idx[i])
            algo = str(self.algorithms[j])
            results[str(inst)] = [(algo, float(self.budget or 0))]
        return results

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type] | None = None, **kwargs: Any
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
                Define hyperparameters for SATzilla.

                Parameters
                ----------
                model_class : list[type] or None, default=None
                    List of model classes to choose from.
                **kwargs : Any
                    Additional keyword arguments.

                Returns
        -------
                tuple
                    Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [RidgeRegressorWrapper]

        model_class_param = ClassChoice(
            name="model_class",
            choices=model_class,
            default=model_class[0],
        )

        use_log10_param = Categorical(
            name="use_log10",
            items=[True, False],
            default=True,
        )

        em_max_iter_param = Integer(
            name="em_max_iter",
            bounds=(5, 50),
            default=20,
        )

        em_tol_param = Float(
            name="em_tol",
            bounds=(1e-6, 1e-2),
            log=True,
            default=1e-3,
        )

        em_min_sigma_param = Float(
            name="em_min_sigma",
            bounds=(1e-8, 1e-1),
            log=True,
            default=1e-6,
        )

        params = [
            model_class_param,
            use_log10_param,
            em_max_iter_param,
            em_tol_param,
            em_min_sigma_param,
        ]

        return params, [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[SATzilla]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for SATzilla.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(SATzilla, **config)

__init__(model_class=RandomForestClassifierWrapper, **kwargs)

Initialize the SATzilla selector.

Parameters

model_class : type, default=RandomForestClassifierWrapper The class of the model used for label classification. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/satzilla.py
def __init__(
    self,
    model_class: type[Any] = RandomForestClassifierWrapper,
    **kwargs: Any,
) -> None:
    """
    Initialize the SATzilla selector.

    Parameters
    ----------
    model_class : type, default=RandomForestClassifierWrapper
        The class of the model used for label classification.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(model_class=model_class, **kwargs)
    self.epms: dict[str, dict[str, EPM]] = {}
    self.label_classifier: Any = None
    self.labels: list[str] = []

SNNAP

Bases: ConfigurableMixin, AbstractSelector

SNNAP (Simple Nearest Neighbor Algorithm Portfolio) selector.

Attributes

k : int Number of neighbors to use. metric : str Distance metric for NearestNeighbors. random_state : int or None Random seed for reproducibility. nn_model : NearestNeighbors or None Trained NearestNeighbors model. features_df : pd.DataFrame or None Training features. performance_df : pd.DataFrame or None Training performance.

Source code in asf/selectors/snnap.py
class SNNAP(ConfigurableMixin, AbstractSelector):
    """
    SNNAP (Simple Nearest Neighbor Algorithm Portfolio) selector.

    Attributes
    ----------
    k : int
        Number of neighbors to use.
    metric : str
        Distance metric for NearestNeighbors.
    random_state : int or None
        Random seed for reproducibility.
    nn_model : NearestNeighbors or None
        Trained NearestNeighbors model.
    features_df : pd.DataFrame or None
        Training features.
    performance_df : pd.DataFrame or None
        Training performance.
    """

    PREFIX = "snnap"
    RETURN_TYPE = "single"

    def __init__(
        self,
        k: int = 5,
        metric: str = "euclidean",
        random_state: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the SNNAP selector.

        Parameters
        ----------
        k : int, default=5
            Number of neighbors to use.
        metric : str, default='euclidean'
            Distance metric for NearestNeighbors.
        random_state : int or None, default=None
            Random seed for reproducibility.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(**kwargs)
        self.k = int(k)
        self.metric = str(metric)
        self.random_state = random_state

        self.features_df: pd.DataFrame | None = None
        self.performance_df: pd.DataFrame | None = None
        self.nn_model: NearestNeighbors | None = None

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the NearestNeighbors model.

        Parameters
        ----------
        features : pd.DataFrame
            The training features.
        performance : pd.DataFrame
            The training performance data.
        """
        self.features_df = features.copy()
        self.performance_df = performance.copy()

        n_neighbors = min(self.k, len(self.features_df))
        self.nn_model = NearestNeighbors(n_neighbors=n_neighbors, metric=self.metric)
        self.nn_model.fit(self.features_df.values)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
                Predict the best algorithm for each instance.

                Parameters
                ----------
                features : pd.DataFrame
                    The input features.

                Returns
        -------
                dict
                    Mapping from instance name to algorithm schedules.
        """
        if features is None:
            raise ValueError("SNNAP requires features for prediction.")
        if (
            self.nn_model is None
            or self.features_df is None
            or self.performance_df is None
        ):
            raise RuntimeError("SNNAP must be fitted before prediction.")

        predictions: dict[str, list[tuple[str, float]]] = {}
        for instance_name in features.index:
            x = features.loc[[instance_name]].values
            n_neighbors = min(self.k, len(self.features_df))
            _, neighbor_idxs = self.nn_model.kneighbors(x, n_neighbors=n_neighbors)
            neighbor_idxs = neighbor_idxs.flatten()

            votes: dict[str, int] = {}
            runtimes_for_candidates: dict[str, list[float]] = {}

            for ni in neighbor_idxs:
                neighbor_perf = self.performance_df.iloc[ni]
                valid = neighbor_perf.dropna()
                if valid.empty:
                    continue
                # Best algorithm for this neighbor
                best_algo = str(valid.idxmax() if self.maximize else valid.idxmin())
                votes[best_algo] = votes.get(best_algo, 0) + 1
                runtimes_for_candidates.setdefault(best_algo, []).append(
                    float(valid.loc[best_algo])
                )

            if not votes:
                predictions[str(instance_name)] = []
                continue

            # Identify candidate(s) with max votes
            max_votes = max(votes.values())
            candidates = [a for a, c in votes.items() if c == max_votes]

            if len(candidates) == 1:
                chosen = candidates[0]
            else:
                # Tie-break: Smallest mean runtime (or largest mean performance)
                mean_perfs = {
                    algo: float(np.mean(runtimes_for_candidates[algo]))
                    for algo in candidates
                }
                if self.maximize:
                    chosen = max(mean_perfs.items(), key=lambda x: x[1])[0]
                else:
                    chosen = min(mean_perfs.items(), key=lambda x: x[1])[0]

            predictions[str(instance_name)] = [(str(chosen), float(self.budget or 0))]

        return predictions

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for SNNAP.

        Parameters
        ----------
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        k_param = Integer(
            name="k",
            bounds=(1, 50),
            default=5,
        )

        metric_param = Categorical(
            name="metric",
            items=["euclidean", "manhattan", "minkowski", "cosine"],
            default="euclidean",
        )

        return [k_param, metric_param], [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[SNNAP]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for SNNAP.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(SNNAP, **config)

__init__(k=5, metric='euclidean', random_state=None, **kwargs)

Initialize the SNNAP selector.

Parameters

k : int, default=5 Number of neighbors to use. metric : str, default='euclidean' Distance metric for NearestNeighbors. random_state : int or None, default=None Random seed for reproducibility. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/snnap.py
def __init__(
    self,
    k: int = 5,
    metric: str = "euclidean",
    random_state: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the SNNAP selector.

    Parameters
    ----------
    k : int, default=5
        Number of neighbors to use.
    metric : str, default='euclidean'
        Distance metric for NearestNeighbors.
    random_state : int or None, default=None
        Random seed for reproducibility.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(**kwargs)
    self.k = int(k)
    self.metric = str(metric)
    self.random_state = random_state

    self.features_df: pd.DataFrame | None = None
    self.performance_df: pd.DataFrame | None = None
    self.nn_model: NearestNeighbors | None = None

SUNNY

Bases: ConfigurableMixin, AbstractSelector

SUNNY/SUNNY-AS2 algorithm selector.

This selector uses k-nearest neighbors (k-NN) in feature space to construct a schedule. When SUNNY-AS2 is enabled, k is optimized.

Attributes

k : int Number of neighbors for k-NN. use_v2 : bool Whether to tune k using cross-validation. n_folds : int Number of folds for cross-validation when tuning. k_candidates : list[int] Candidate k values for tuning. random_state : int Random seed for reproducibility. use_tsunny : bool Whether to tune the maximum number of algorithms. algorithm_limit : int or None Manual cap on the number of algorithms in each schedule. tuned_algorithm_limit : int or None Tuned cap on the number of algorithms. features_df : pd.DataFrame or None Training features. performance_df : pd.DataFrame or None Training performance. knn : NearestNeighbors or None Trained k-NN model.

Source code in asf/selectors/sunny.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
class SUNNY(ConfigurableMixin, AbstractSelector):
    """
    SUNNY/SUNNY-AS2 algorithm selector.

    This selector uses k-nearest neighbors (k-NN) in feature space to construct
    a schedule. When SUNNY-AS2 is enabled, k is optimized.

    Attributes
    ----------
    k : int
        Number of neighbors for k-NN.
    use_v2 : bool
        Whether to tune k using cross-validation.
    n_folds : int
        Number of folds for cross-validation when tuning.
    k_candidates : list[int]
        Candidate k values for tuning.
    random_state : int
        Random seed for reproducibility.
    use_tsunny : bool
        Whether to tune the maximum number of algorithms.
    algorithm_limit : int or None
        Manual cap on the number of algorithms in each schedule.
    tuned_algorithm_limit : int or None
        Tuned cap on the number of algorithms.
    features_df : pd.DataFrame or None
        Training features.
    performance_df : pd.DataFrame or None
        Training performance.
    knn : NearestNeighbors or None
        Trained k-NN model.
    """

    PREFIX = "sunny"
    RETURN_TYPE = "schedule"

    def __init__(
        self,
        k: int = 10,
        use_v2: bool = False,
        n_folds: int = 5,
        k_candidates: list[int] | None = None,
        random_state: int = 42,
        use_tsunny: bool = False,
        algorithm_limit: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the SUNNY selector.

        Parameters
        ----------
        k : int, default=10
            Number of neighbors for k-NN.
        use_v2 : bool, default=False
            Whether to tune k using cross-validation (SUNNY-AS2).
        n_folds : int, default=5
            Number of folds for cross-validation when tuning.
        k_candidates : list[int] or None, default=None
            Candidate k values to consider when tuning.
        random_state : int, default=42
            Random seed for reproducibility.
        use_tsunny : bool, default=False
            Whether to tune the max number of algorithms via cross-validation.
        algorithm_limit : int or None, default=None
            If set, cap the number of algorithms in each schedule.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(**kwargs)
        self.k = int(k)
        self.use_v2 = bool(use_v2)
        self.n_folds = int(n_folds)
        self.k_candidates = k_candidates or [3, 5, 7, 10, 20, 50]
        self.random_state = int(random_state)
        self.use_tsunny = bool(use_tsunny)
        self.algorithm_limit = algorithm_limit
        self.tuned_algorithm_limit: int | None = None

        self.features_df: pd.DataFrame | None = None
        self.performance_df: pd.DataFrame | None = None
        self.knn: NearestNeighbors | None = None

    def _fit(
        self, features: pd.DataFrame, performance: pd.DataFrame, **kwargs: Any
    ) -> None:
        """
        Fit the SUNNY selector.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        """
        self.features_df = features.copy()
        perf = performance.copy()
        budget = float(self.budget or 1e10)
        perf[perf > budget] = np.nan
        self.performance_df = perf

        if self.use_v2:
            self.k = self._tune_k()

        if self.use_tsunny and self.algorithm_limit is None:
            self.tuned_algorithm_limit = self._tune_algorithm_limit()

        self.knn = NearestNeighbors(
            n_neighbors=min(self.k, len(self.features_df)), metric="euclidean"
        )
        self.knn.fit(self.features_df.values)

    def _tune_k(self) -> int:
        """
        Tune the neighborhood size k via cross-validation.

        Returns
        -------
        int
            The best k value found.
        """
        if self.features_df is None or self.performance_df is None:
            return self.k

        best_k = self.k
        best_score = float("inf")
        n_splits = min(self.n_folds, max(2, len(self.features_df)))

        if n_splits < 2:
            return best_k

        kf = KFold(n_splits=n_splits, shuffle=True, random_state=self.random_state)
        indices = np.arange(len(self.features_df))

        for candidate_k in self.k_candidates:
            fold_scores = []
            for train_idx, val_idx in kf.split(indices):
                train_feat = self.features_df.iloc[train_idx]
                train_perf = self.performance_df.iloc[train_idx]
                val_feat = self.features_df.iloc[val_idx]
                val_perf = self.performance_df.iloc[val_idx]

                if len(train_feat) == 0:
                    continue

                knn = NearestNeighbors(
                    n_neighbors=min(candidate_k, len(train_feat)),
                    metric="euclidean",
                )
                knn.fit(train_feat.values)

                total_cost = 0.0
                for instance in val_feat.index:
                    x = val_feat.loc[[instance]].values
                    _, n_idx = knn.kneighbors(
                        x, n_neighbors=min(candidate_k, len(train_feat))
                    )
                    n_perf = train_perf.iloc[n_idx.flatten()]
                    schedule = self._construct_sunny_schedule(n_perf)

                    inst_perf = val_perf.loc[instance]
                    solved = False
                    for algo, _ in schedule:
                        runtime = inst_perf[algo]
                        if not pd.isna(runtime) and runtime <= float(
                            self.budget or 1e10
                        ):
                            total_cost += float(runtime)
                            solved = True
                            break
                    if not solved:
                        total_cost += float(self.budget or 1e10)

                fold_scores.append(total_cost / len(val_feat))

            mean_score = float(np.mean(fold_scores)) if fold_scores else float("inf")
            if mean_score < best_score:
                best_score = mean_score
                best_k = int(candidate_k)

        return best_k

    def _tune_algorithm_limit(self) -> int:
        """
        Tune the maximum number of algorithms via cross-validation.

        Returns
        -------
        int
            Best limit found.
        """
        if self.features_df is None or self.performance_df is None:
            return len(self.algorithms)

        n_solvers = len(self.performance_df.columns)
        if n_solvers <= 1:
            return n_solvers

        n_splits = min(self.n_folds, max(2, len(self.features_df)))
        if n_splits < 2:
            return n_solvers

        best_lam = n_solvers
        best_score = float("inf")
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=self.random_state)
        indices = np.arange(len(self.features_df))

        for lam in range(1, n_solvers + 1):
            fold_scores = []
            for train_idx, val_idx in kf.split(indices):
                train_feat = self.features_df.iloc[train_idx]
                train_perf = self.performance_df.iloc[train_idx]
                val_feat = self.features_df.iloc[val_idx]
                val_perf = self.performance_df.iloc[val_idx]

                knn = NearestNeighbors(
                    n_neighbors=min(self.k, len(train_feat)), metric="euclidean"
                )
                knn.fit(train_feat.values)

                total_cost = 0.0
                for instance in val_feat.index:
                    x = val_feat.loc[[instance]].values
                    _, n_idx = knn.kneighbors(
                        x, n_neighbors=min(self.k, len(train_feat))
                    )
                    n_perf = train_perf.iloc[n_idx.flatten()]
                    schedule = self._construct_sunny_schedule(n_perf, lam_limit=lam)

                    inst_perf = val_perf.loc[instance]
                    solved = False
                    for algo, _ in schedule:
                        runtime = inst_perf[algo]
                        if not pd.isna(runtime) and runtime <= float(
                            self.budget or 1e10
                        ):
                            total_cost += float(runtime)
                            solved = True
                            break
                    if not solved:
                        total_cost += float(self.budget or 1e10)
                fold_scores.append(total_cost / len(val_feat))

            mean_score = float(np.mean(fold_scores)) if fold_scores else float("inf")
            if mean_score < best_score:
                best_score = mean_score
                best_lam = lam

        return best_lam

    def _mine_solvers(
        self,
        neighbor_perf: pd.DataFrame,
        cutoff: int,
        already_selected: list[str] | None = None,
        already_covered: set[str] | None = None,
    ) -> list[str]:
        """
        Recursive greedy set cover to identify a portfolio.
        """
        if already_selected is None:
            already_selected = []
        if already_covered is None:
            already_covered = set()

        remaining_instances = set(neighbor_perf.index) - already_covered
        if len(already_selected) >= cutoff or not remaining_instances:
            return already_selected

        best_solver: str | None = None
        best_cover: set[str] = set()
        best_runtime = float("inf")

        for algo in self.algorithms:
            if algo in already_selected:
                continue
            covers = (
                set(neighbor_perf.index[neighbor_perf[algo].notna()])
                & remaining_instances
            )

            if not best_solver or len(covers) > len(best_cover):
                best_solver = algo
                best_cover = covers
                best_runtime = (
                    float(neighbor_perf.loc[list(covers), algo].sum())
                    if covers
                    else float("inf")
                )
            elif len(covers) == len(best_cover) and len(covers) > 0:
                runtime = float(neighbor_perf.loc[list(covers), algo].sum())
                if runtime < best_runtime:
                    best_solver = algo
                    best_cover = covers
                    best_runtime = runtime

        if not best_solver or not best_cover:
            return already_selected

        already_selected.append(str(best_solver))
        already_covered |= best_cover
        return self._mine_solvers(
            neighbor_perf, cutoff, already_selected, already_covered
        )

    def _construct_sunny_schedule(
        self, neighbor_perf: pd.DataFrame, lam_limit: int | None = None
    ) -> list[tuple[str, float]]:
        """
        Construct a SUNNY schedule.
        """
        lam = (
            lam_limit
            or self.algorithm_limit
            or self.tuned_algorithm_limit
            or len(self.algorithms)
        )
        lam = max(1, min(int(lam), len(self.algorithms)))

        cutoff = min(self.k, lam, len(self.algorithms))
        best_pfolio = self._mine_solvers(neighbor_perf, cutoff)

        valid_perf = neighbor_perf.notna()
        slots = {algo: int(valid_perf[algo].sum()) for algo in best_pfolio}
        n_unsolved = len(
            set(neighbor_perf.index)
            - set().union(
                *(set(neighbor_perf.index[valid_perf[a]]) for a in best_pfolio)
            )
        )

        total_slots = sum(slots.values()) + n_unsolved
        if total_slots == 0:
            slots = {algo: 1 for algo in best_pfolio}
            total_slots = len(best_pfolio)

        budget = float(self.budget or 1e10)
        schedule: list[tuple[str, float]] = []
        for algo in best_pfolio:
            allocated = budget * (slots[algo] / total_slots)
            schedule.append((str(algo), float(allocated)))

        # Sort by mean running time in neighborhood
        avg_times = neighbor_perf[[a for a, _ in schedule]].mean(axis=0).to_dict()
        schedule.sort(key=lambda x: avg_times.get(x[0], float("inf")))

        # Allocate remaining budget
        used = sum(t for _, t in schedule)
        remaining = max(0.0, budget - used)
        if n_unsolved > 0 and remaining > 0:
            # Add to the globally best solver in the neighborhood
            best_global = str(valid_perf.sum(axis=0).idxmax())
            for i, (a, t) in enumerate(schedule):
                if a == best_global:
                    schedule[i] = (a, t + remaining)
                    break
            else:
                if len(schedule) < lam:
                    schedule.append((best_global, remaining))
                    # Resort
                    avg_t = (
                        float(neighbor_perf[best_global].mean())
                        if best_global in neighbor_perf.columns
                        else float("inf")
                    )
                    avg_times[best_global] = avg_t
                    schedule.sort(key=lambda x: avg_times.get(x[0], float("inf")))
                else:
                    a_last, t_last = schedule[-1]
                    schedule[-1] = (a_last, t_last + remaining)

        return schedule

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict schedules for each instance.
        """
        if self.knn is None or self.performance_df is None:
            raise RuntimeError("SUNNY must be fitted.")

        if features is None:
            raise ValueError("Sunny require features for prediction.")
        predictions: dict[str, list[tuple[str, float]]] = {}
        for instance in features.index:
            x = features.loc[[instance]].values
            _, n_idx = self.knn.kneighbors(x, n_neighbors=self.k)
            n_perf = self.performance_df.iloc[n_idx.flatten()]
            predictions[str(instance)] = self._construct_sunny_schedule(n_perf)
        return predictions

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for SUNNY.
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        use_v2_param = Categorical(name="use_v2", items=[True, False], default=False)
        k_param = Integer(name="k", bounds=(1, 50), default=10)
        n_folds_param = Integer(name="n_folds", bounds=(3, 10), default=5)
        k_candidates_param = Categorical(
            name="k_candidates", items=["small", "medium", "broad"], default="medium"
        )

        params = [use_v2_param, k_param, n_folds_param, k_candidates_param]
        conditions = [
            EqualsCondition(n_folds_param, use_v2_param, True),
            EqualsCondition(k_candidates_param, use_v2_param, True),
        ]
        return params, conditions, []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[SUNNY]:
        """
        Create a partial function from a clean configuration.
        """
        config = clean_config.copy()
        k_map = {
            "small": [3, 5, 7],
            "medium": [3, 5, 7, 10, 20],
            "broad": [3, 5, 7, 10, 20, 50],
        }
        if config.get("use_v2"):
            config["k_candidates"] = k_map[config.get("k_candidates", "medium")]
        else:
            config["n_folds"] = 5
            config["k_candidates"] = [3, 5, 7, 10, 20, 50]

        config.update(kwargs)
        return partial(SUNNY, **config)

__init__(k=10, use_v2=False, n_folds=5, k_candidates=None, random_state=42, use_tsunny=False, algorithm_limit=None, **kwargs)

Initialize the SUNNY selector.

Parameters

k : int, default=10 Number of neighbors for k-NN. use_v2 : bool, default=False Whether to tune k using cross-validation (SUNNY-AS2). n_folds : int, default=5 Number of folds for cross-validation when tuning. k_candidates : list[int] or None, default=None Candidate k values to consider when tuning. random_state : int, default=42 Random seed for reproducibility. use_tsunny : bool, default=False Whether to tune the max number of algorithms via cross-validation. algorithm_limit : int or None, default=None If set, cap the number of algorithms in each schedule. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/sunny.py
def __init__(
    self,
    k: int = 10,
    use_v2: bool = False,
    n_folds: int = 5,
    k_candidates: list[int] | None = None,
    random_state: int = 42,
    use_tsunny: bool = False,
    algorithm_limit: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the SUNNY selector.

    Parameters
    ----------
    k : int, default=10
        Number of neighbors for k-NN.
    use_v2 : bool, default=False
        Whether to tune k using cross-validation (SUNNY-AS2).
    n_folds : int, default=5
        Number of folds for cross-validation when tuning.
    k_candidates : list[int] or None, default=None
        Candidate k values to consider when tuning.
    random_state : int, default=42
        Random seed for reproducibility.
    use_tsunny : bool, default=False
        Whether to tune the max number of algorithms via cross-validation.
    algorithm_limit : int or None, default=None
        If set, cap the number of algorithms in each schedule.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(**kwargs)
    self.k = int(k)
    self.use_v2 = bool(use_v2)
    self.n_folds = int(n_folds)
    self.k_candidates = k_candidates or [3, 5, 7, 10, 20, 50]
    self.random_state = int(random_state)
    self.use_tsunny = bool(use_tsunny)
    self.algorithm_limit = algorithm_limit
    self.tuned_algorithm_limit: int | None = None

    self.features_df: pd.DataFrame | None = None
    self.performance_df: pd.DataFrame | None = None
    self.knn: NearestNeighbors | None = None

SelectorPipeline

Bases: ConfigurableMixin

Sequence of preprocessing, feature selection, and algorithm selection steps.

Attributes

selector : AbstractSelector The main selector model to be used. pre_solving : AbstractPresolver or None A presolver for selecting initial algorithms. feature_selector : Any or None A component for feature selection. algorithm_pre_selector : Any or None A component for algorithm pre-selection. feature_groups : Any or None Feature groups to be used by the selector. max_feature_time : float or None Budget (seconds) to allocate per feature group in predictions. preprocessor : Pipeline The preprocessing pipeline (including SimpleImputer).

Source code in asf/selectors/selector_pipeline.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
class SelectorPipeline(ConfigurableMixin):
    """
    Sequence of preprocessing, feature selection, and algorithm selection steps.

    Attributes
    ----------
    selector : AbstractSelector
        The main selector model to be used.
    pre_solving : AbstractPresolver or None
        A presolver for selecting initial algorithms.
    feature_selector : Any or None
        A component for feature selection.
    algorithm_pre_selector : Any or None
        A component for algorithm pre-selection.
    feature_groups : Any or None
        Feature groups to be used by the selector.
    max_feature_time : float or None
        Budget (seconds) to allocate per feature group in predictions.
    preprocessor : Pipeline
        The preprocessing pipeline (including SimpleImputer).
    """

    PREFIX = "pipeline"

    def __init__(
        self,
        selector: AbstractSelector,
        preprocessor: Any | list[Any] | None = None,
        pre_solving: AbstractPresolver | None = None,
        feature_selector: Any | None = None,
        algorithm_pre_selector: Any | None = None,
        feature_groups: dict[str, Any] | list[str] | None = None,
        max_feature_time: float | None = None,
    ) -> None:
        """
        Initialize the SelectorPipeline.

        Parameters
        ----------
        selector : AbstractSelector
            The main selector model to be used.
        preprocessor : Any or list or None, default=None
            Preprocessing steps. SimpleImputer(strategy="mean") is always added first.
        pre_solving : AbstractPresolver or None, default=None
            Presolver for initial algorithm selection.
        feature_selector : Any or None, default=None
            Component for feature selection.
        algorithm_pre_selector : Any or None, default=None
            Component for algorithm pre-selection.
        feature_groups : dict or list or None, default=None
            Feature groups configuration.
        max_feature_time : float or None, default=None
            Budget (seconds) per feature group.
        """
        self.selector = selector
        self.pre_solving = pre_solving
        self.feature_selector = feature_selector
        self.algorithm_pre_selector = algorithm_pre_selector
        self.feature_groups = feature_groups
        self.max_feature_time = (
            float(max_feature_time) if max_feature_time is not None else None
        )

        if preprocessor is None:
            preproc_list = []
        elif not isinstance(preprocessor, list):
            preproc_list = [preprocessor]
        else:
            preproc_list = preprocessor

        # Always include SimpleImputer as the first step
        steps = [("SimpleImputer", SimpleImputer(strategy="mean"))]
        for p in preproc_list:
            steps.append((type(p).__name__, p))

        self.preprocessor = Pipeline(steps)
        self.preprocessor.set_output(transform="pandas")

        self._logger = logging.getLogger(__name__)

    def _filter_features(self, X: pd.DataFrame) -> pd.DataFrame:
        """
        Filter features based on selected feature groups.

        Parameters
        ----------
        X : pd.DataFrame
            The input features.

        Returns
        -------
        pd.DataFrame
            Filtered features.
        """
        if self.feature_groups and isinstance(self.feature_groups, dict):
            selected_features = []
            for fg_info in self.feature_groups.values():
                if isinstance(fg_info, dict) and "provides" in fg_info:
                    selected_features.extend(fg_info["provides"])

            available_features = [f for f in selected_features if f in X.columns]
            if available_features:
                return X[available_features]
        return X

    def fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        algorithm_features: pd.DataFrame | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Fit the pipeline.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The performance data.
        algorithm_features : pd.DataFrame or None, default=None
            Optional algorithm features.
        **kwargs : Any
            Additional keyword arguments.
        """
        start = time.time()
        self._logger.debug("Starting fit process")

        X = self.preprocessor.fit_transform(features, performance)
        self._logger.debug(
            f"Preprocessing completed in {time.time() - start:.2f} seconds"
        )
        start = time.time()

        # Update y (performance) alias for local usage
        y = performance

        if self.algorithm_pre_selector:
            if hasattr(self.algorithm_pre_selector, "fit_transform"):
                y = self.algorithm_pre_selector.fit_transform(X, y)  # type: ignore
            else:
                self.algorithm_pre_selector.fit(X, y)  # type: ignore
                # Some pre-selectors might not have transform for y, checking usage
                if hasattr(self.algorithm_pre_selector, "transform"):
                    y = self.algorithm_pre_selector.transform(y)  # type: ignore

        self._logger.debug(
            f"Algorithm pre-selection completed in {time.time() - start:.2f} seconds"
        )
        start = time.time()

        if self.pre_solving:
            self.pre_solving.fit(features, performance)

        self._logger.debug(
            f"Pre-solving completed in {time.time() - start:.2f} seconds"
        )
        start = time.time()

        if self.feature_selector:
            if hasattr(self.feature_selector, "fit_transform"):
                X, y = self.feature_selector.fit_transform(X, y)  # type: ignore
            else:
                self.feature_selector.fit(X, y)  # type: ignore
                X = self.feature_selector.transform(X)  # type: ignore

        self._logger.debug(
            f"Feature selection completed in {time.time() - start:.2f} seconds"
        )
        start = time.time()

        X = self._filter_features(X)
        self.selector.fit(X, y, algorithm_features=algorithm_features, **kwargs)

        self._logger.debug(
            f"Selector fitting completed in {time.time() - start:.2f} seconds"
        )

    def predict(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame | None = None,
        **kwargs: Any,
    ) -> dict[str, list[tuple[str, float] | tuple[str, float, float]]]:
        """
        Make predictions.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame or None, default=None
            Performance data for oracle selectors.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        dict
            Predictions mapping instance IDs to schedules.
        """
        X = self.preprocessor.transform(features)

        scheds: list[Any] = []
        if self.pre_solving:
            # Presolver prediction returns a single schedule applied to all test instances
            scheds = list(self.pre_solving.predict())

        if self.feature_selector:
            X = self.feature_selector.transform(X)  # type: ignore

        X = self._filter_features(X)

        # Pass performance to selector (needed for oracle selectors like VBS)
        predictions = self.selector.predict(X, performance=performance)

        feature_steps: list[Any] = []
        if self.feature_groups is not None:
            if isinstance(self.feature_groups, dict):
                feature_steps = list(self.feature_groups.keys())
            elif isinstance(self.feature_groups, list):
                feature_steps = self.feature_groups

        if self.max_feature_time is not None and feature_steps:
            feature_steps = [
                (str(fg), float(self.max_feature_time)) for fg in feature_steps
            ]

        final_preds: dict[str, list[tuple[str, float] | tuple[str, float, float]]] = {}
        for instance_id in X.index:
            prediction = predictions.get(str(instance_id), [])  # type: ignore
            final_preds[str(instance_id)] = scheds + feature_steps + list(prediction)

        return final_preds

    def save(self, path: str | Path) -> None:
        """
        Save the pipeline to a file.

        Parameters
        ----------
        path : str or Path
            File path to save the pipeline.
        """
        import joblib

        joblib.dump(self, path)

    @staticmethod
    def load(path: str | Path) -> SelectorPipeline:
        """
        Load a pipeline from a file.

        Parameters
        ----------
        path : str or Path
            File path to load from.

        Returns
        -------
        SelectorPipeline
            The loaded pipeline.
        """
        import joblib

        return joblib.load(path)

    def get_config(self) -> dict[str, Any]:
        """
        Return configuration details.

        Returns
        -------
        dict
            Configuration metadata.
        """

        def get_model_name(obj: Any) -> str | None:
            if obj is None:
                return None
            if hasattr(obj, "model_class"):
                mc = obj.model_class
                if hasattr(mc, "func"):
                    return str(mc.func.__name__)
                return str(getattr(mc, "__name__", type(mc).__name__))
            return type(obj).__name__

        return {
            "selector": type(self.selector).__name__,
            "selector_model": get_model_name(self.selector),
            "pre_solving": type(self.pre_solving).__name__
            if self.pre_solving
            else None,
            "selector_budget": getattr(self.selector, "budget", None),
            "presolving_budget": getattr(self.pre_solving, "budget", None),
            "preprocessor_steps": [
                type(step[1]).__name__ for step in self.preprocessor.steps
            ],
            "feature_selector": type(self.feature_selector).__name__
            if self.feature_selector
            else None,
            "algorithm_pre_selector": type(self.algorithm_pre_selector).__name__
            if self.algorithm_pre_selector
            else None,
        }

    @staticmethod
    def _define_hyperparameters(
        selector_class: list[type] | None = None,
        preprocessing_class: list[type] | None = None,
        pre_solving_class: list[type] | None = None,
        feature_groups: dict[str, Any] | None = None,
        algorithm_pre_selector: type | tuple[type, dict[str, Any]] | None = None,
        max_feature_time: float | None | bool = False,
        budget: float | None = None,
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for SelectorPipeline.

        Parameters
        ----------
        selector_class : list[type] or None, default=None
            List of selector classes.
        preprocessing_class : list[type] or None, default=None
            List of preprocessor classes.
        pre_solving_class : list[type] or None, default=None
            List of presolver classes.
        feature_groups : dict or None, default=None
            Feature groups definition.
        algorithm_pre_selector : type or tuple or None, default=None
            Algorithm pre-selector class.
        max_feature_time : float or None or bool, default=False
            Maximum feature computation time.
        budget : float or None, default=None
            Total budget.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        hyperparameters = []
        conditions = []
        forbiddens = []

        if selector_class:
            if (
                isinstance(selector_class, list)
                and selector_class
                and isinstance(selector_class[0], tuple)
            ):
                selector_choices = [
                    c[0]  # type: ignore[index]
                    for c in (
                        selector_class if isinstance(selector_class, list) else []
                    )
                ]  # type: ignore
            else:
                selector_choices = (
                    selector_class
                    if isinstance(selector_class, list)
                    else [selector_class]
                )
            hyperparameters.append(ClassChoice("selector", choices=selector_choices))

        if pre_solving_class:
            ps_choices = (
                pre_solving_class
                if isinstance(pre_solving_class, list)
                else [pre_solving_class]
            )
            use_presolver = Categorical(
                "use_presolver", items=[True, False], default=False
            )
            hyperparameters.append(use_presolver)
            presolver_choice = ClassChoice("presolver", choices=ps_choices)
            hyperparameters.append(presolver_choice)
            conditions.append(EqualsCondition(presolver_choice, use_presolver, True))  # type: ignore

        if preprocessing_class:
            for preproc_cls in preprocessing_class:
                hyperparameters.append(
                    ClassChoice(
                        f"preprocessor:{preproc_cls.__name__}",
                        choices=[preproc_cls, False],
                        default=False,
                    )
                )

        fg_params = {}
        if feature_groups and len(feature_groups) > 1:
            for fg_name in feature_groups:
                fg_param = Categorical(
                    f"feature_group:{fg_name}", [True, False], default=True
                )
                hyperparameters.append(fg_param)
                fg_params[fg_name] = fg_param

            for fg_name, fg_info in feature_groups.items():
                for req in fg_info.get("requires", []):
                    if req in fg_params:
                        forbiddens.append(
                            ForbiddenAndConjunction(
                                ForbiddenEqualsClause(fg_params[fg_name], True),
                                ForbiddenEqualsClause(fg_params[req], False),
                            )
                        )
            forbiddens.append(
                ForbiddenAndConjunction(
                    *[ForbiddenEqualsClause(p, False) for p in fg_params.values()]
                )
            )

        if algorithm_pre_selector:
            aps_cls = (
                algorithm_pre_selector[0]
                if isinstance(algorithm_pre_selector, tuple)
                else algorithm_pre_selector
            )
            hyperparameters.append(
                ClassChoice("algorithm_pre_selector", choices=[aps_cls])
            )

        if max_feature_time is None:
            upper = float(budget or 3600.0)
            hyperparameters.append(
                UniformFloatHyperparameter(
                    "max_feature_time",
                    lower=0.0,
                    upper=upper,
                    default_value=min(60.0, upper),
                )
            )

        return hyperparameters, conditions, forbiddens

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        configuration: Configuration | dict[str, Any] | None = None,
        pre_prefix: str = "",
        feature_groups: dict[str, Any] | None = None,
        max_feature_time: float | None = None,
        **kwargs: Any,
    ) -> partial[SelectorPipeline]:
        """
        Create a SelectorPipeline from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            Clean configuration mapping.
        configuration : Configuration or dict or None, default=None
            Original configuration.
        pre_prefix : str, default=""
            Prefix for nested lookups.
        feature_groups : dict or None, default=None
            Feature groups definition.
        max_feature_time : float or None, default=None
            Maximum feature computation time.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for SelectorPipeline.
        """
        init_kwargs: dict[str, Any] = {}
        prefix = f"{pre_prefix}:{cls.PREFIX}:" if pre_prefix else f"{cls.PREFIX}:"

        if "selector" in clean_config:
            val = clean_config["selector"]
            init_kwargs["selector"] = val() if callable(val) else val

        if clean_config.get("use_presolver") and "presolver" in clean_config:
            val = clean_config["presolver"]
            init_kwargs["pre_solving"] = val() if callable(val) else val

        preprocs = []
        for k, v in clean_config.items():
            if k.startswith("preprocessor:") and v and v != "False":
                preprocs.append(v() if callable(v) else v)
        if preprocs:
            init_kwargs["preprocessor"] = preprocs

        if feature_groups and configuration is not None:
            from asf.preprocessing.feature_group_selector import FeatureGroupSelector

            init_kwargs["feature_groups"] = (
                FeatureGroupSelector.get_selected_groups_from_config(
                    feature_groups, configuration, prefix=f"{prefix}feature_group:"
                )
            )

        if "algorithm_pre_selector" in clean_config:
            val = clean_config["algorithm_pre_selector"]
            init_kwargs["algorithm_pre_selector"] = val() if callable(val) else val

        init_kwargs["max_feature_time"] = clean_config.get(
            "max_feature_time", max_feature_time
        )

        return partial(cls, **init_kwargs)

    @classmethod
    def get_from_configuration(
        cls,
        configuration: Configuration | dict[str, Any],
        pre_prefix: str = "",
        feature_groups: dict[str, Any] | None = None,
        budget: float | None = None,
        max_feature_time: float | None = None,
        **kwargs: Any,
    ) -> partial[SelectorPipeline]:
        """
        Create a SelectorPipeline from a configuration.

        Parameters
        ----------
        configuration : Configuration or dict
            Configuration object.
        pre_prefix : str, default=""
            Prefix for nested lookups.
        feature_groups : dict or None, default=None
            Feature groups definition.
        budget : float or None, default=None
            Total budget.
        max_feature_time : float or None, default=None
            Maximum feature computation time.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for SelectorPipeline.
        """
        if not CONFIGSPACE_AVAILABLE:
            raise RuntimeError("ConfigSpace is not available.")

        prefix = f"{pre_prefix}:{cls.PREFIX}:" if pre_prefix else f"{cls.PREFIX}:"
        clean_config: dict[str, Any] = {}
        cs = getattr(configuration, "config_space", None)

        def resolve(hp_name: str, val: Any) -> Any:
            if cs and val:
                hp = cs.get(hp_name)
                if hp:
                    return cls._resolve_class_from_hp(hp, str(val))
            return None

        # 1. Selector
        s_val = configuration.get(f"{prefix}selector")
        s_cls = resolve(f"{prefix}selector", s_val)
        if s_cls is None and callable(s_val):
            s_cls = s_val

        if s_cls:
            if hasattr(s_cls, "get_from_configuration"):
                clean_config["selector"] = s_cls.get_from_configuration(
                    configuration,
                    pre_prefix=f"{prefix}selector",
                    budget=budget,
                    **kwargs,
                )
            else:
                clean_config["selector"] = s_cls(budget=budget) if budget else s_cls()

        # 2. Presolver
        use_ps = configuration.get(f"{prefix}use_presolver")
        clean_config["use_presolver"] = use_ps
        ps_val = configuration.get(f"{prefix}presolver")
        ps_cls = resolve(f"{prefix}presolver", ps_val)
        if use_ps and ps_cls:
            if hasattr(ps_cls, "get_from_configuration"):
                clean_config["presolver"] = ps_cls.get_from_configuration(
                    configuration, pre_prefix=f"{prefix}presolver", **kwargs
                )
            else:
                clean_config["presolver"] = ps_cls()

        # 3. Preprocessors
        if cs:
            for hp in list(cs.values()):
                if hp.name.startswith(f"{prefix}preprocessor:"):
                    val = configuration.get(hp.name)
                    if val and val != "False":
                        res = cls._resolve_class_from_hp(hp, str(val))
                        if res:
                            key = hp.name[len(prefix) :]
                            clean_config[key] = (
                                res.get_from_configuration(  # type: ignore
                                    configuration, pre_prefix=hp.name, **kwargs
                                )
                                if hasattr(res, "get_from_configuration")
                                else (res() if callable(res) else res)
                            )
                    else:
                        clean_config[hp.name[len(prefix) :]] = False

        # 4. Algorithm Pre-selector
        aps_val = configuration.get(f"{prefix}algorithm_pre_selector")
        aps_cls = resolve(f"{prefix}algorithm_pre_selector", aps_val)
        if aps_cls:
            if hasattr(aps_cls, "get_from_configuration"):
                clean_config["algorithm_pre_selector"] = aps_cls.get_from_configuration(
                    configuration,
                    pre_prefix=f"{prefix}algorithm_pre_selector",
                    **kwargs,
                )
            else:
                clean_config["algorithm_pre_selector"] = aps_cls()

        # 5. Max feature time
        mft = configuration.get(f"{prefix}max_feature_time")
        if mft is not None:
            clean_config["max_feature_time"] = mft

        return cls._get_from_clean_configuration(
            clean_config=clean_config,
            configuration=configuration,
            pre_prefix=pre_prefix,
            feature_groups=feature_groups,
            max_feature_time=max_feature_time,
            **kwargs,
        )

__init__(selector, preprocessor=None, pre_solving=None, feature_selector=None, algorithm_pre_selector=None, feature_groups=None, max_feature_time=None)

Initialize the SelectorPipeline.

Parameters

selector : AbstractSelector The main selector model to be used. preprocessor : Any or list or None, default=None Preprocessing steps. SimpleImputer(strategy="mean") is always added first. pre_solving : AbstractPresolver or None, default=None Presolver for initial algorithm selection. feature_selector : Any or None, default=None Component for feature selection. algorithm_pre_selector : Any or None, default=None Component for algorithm pre-selection. feature_groups : dict or list or None, default=None Feature groups configuration. max_feature_time : float or None, default=None Budget (seconds) per feature group.

Source code in asf/selectors/selector_pipeline.py
def __init__(
    self,
    selector: AbstractSelector,
    preprocessor: Any | list[Any] | None = None,
    pre_solving: AbstractPresolver | None = None,
    feature_selector: Any | None = None,
    algorithm_pre_selector: Any | None = None,
    feature_groups: dict[str, Any] | list[str] | None = None,
    max_feature_time: float | None = None,
) -> None:
    """
    Initialize the SelectorPipeline.

    Parameters
    ----------
    selector : AbstractSelector
        The main selector model to be used.
    preprocessor : Any or list or None, default=None
        Preprocessing steps. SimpleImputer(strategy="mean") is always added first.
    pre_solving : AbstractPresolver or None, default=None
        Presolver for initial algorithm selection.
    feature_selector : Any or None, default=None
        Component for feature selection.
    algorithm_pre_selector : Any or None, default=None
        Component for algorithm pre-selection.
    feature_groups : dict or list or None, default=None
        Feature groups configuration.
    max_feature_time : float or None, default=None
        Budget (seconds) per feature group.
    """
    self.selector = selector
    self.pre_solving = pre_solving
    self.feature_selector = feature_selector
    self.algorithm_pre_selector = algorithm_pre_selector
    self.feature_groups = feature_groups
    self.max_feature_time = (
        float(max_feature_time) if max_feature_time is not None else None
    )

    if preprocessor is None:
        preproc_list = []
    elif not isinstance(preprocessor, list):
        preproc_list = [preprocessor]
    else:
        preproc_list = preprocessor

    # Always include SimpleImputer as the first step
    steps = [("SimpleImputer", SimpleImputer(strategy="mean"))]
    for p in preproc_list:
        steps.append((type(p).__name__, p))

    self.preprocessor = Pipeline(steps)
    self.preprocessor.set_output(transform="pandas")

    self._logger = logging.getLogger(__name__)

fit(features, performance, algorithm_features=None, **kwargs)

Fit the pipeline.

Parameters

features : pd.DataFrame The input features. performance : pd.DataFrame The performance data. algorithm_features : pd.DataFrame or None, default=None Optional algorithm features. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/selector_pipeline.py
def fit(
    self,
    features: pd.DataFrame,
    performance: pd.DataFrame,
    algorithm_features: pd.DataFrame | None = None,
    **kwargs: Any,
) -> None:
    """
    Fit the pipeline.

    Parameters
    ----------
    features : pd.DataFrame
        The input features.
    performance : pd.DataFrame
        The performance data.
    algorithm_features : pd.DataFrame or None, default=None
        Optional algorithm features.
    **kwargs : Any
        Additional keyword arguments.
    """
    start = time.time()
    self._logger.debug("Starting fit process")

    X = self.preprocessor.fit_transform(features, performance)
    self._logger.debug(
        f"Preprocessing completed in {time.time() - start:.2f} seconds"
    )
    start = time.time()

    # Update y (performance) alias for local usage
    y = performance

    if self.algorithm_pre_selector:
        if hasattr(self.algorithm_pre_selector, "fit_transform"):
            y = self.algorithm_pre_selector.fit_transform(X, y)  # type: ignore
        else:
            self.algorithm_pre_selector.fit(X, y)  # type: ignore
            # Some pre-selectors might not have transform for y, checking usage
            if hasattr(self.algorithm_pre_selector, "transform"):
                y = self.algorithm_pre_selector.transform(y)  # type: ignore

    self._logger.debug(
        f"Algorithm pre-selection completed in {time.time() - start:.2f} seconds"
    )
    start = time.time()

    if self.pre_solving:
        self.pre_solving.fit(features, performance)

    self._logger.debug(
        f"Pre-solving completed in {time.time() - start:.2f} seconds"
    )
    start = time.time()

    if self.feature_selector:
        if hasattr(self.feature_selector, "fit_transform"):
            X, y = self.feature_selector.fit_transform(X, y)  # type: ignore
        else:
            self.feature_selector.fit(X, y)  # type: ignore
            X = self.feature_selector.transform(X)  # type: ignore

    self._logger.debug(
        f"Feature selection completed in {time.time() - start:.2f} seconds"
    )
    start = time.time()

    X = self._filter_features(X)
    self.selector.fit(X, y, algorithm_features=algorithm_features, **kwargs)

    self._logger.debug(
        f"Selector fitting completed in {time.time() - start:.2f} seconds"
    )

get_config()

Return configuration details.

Returns

dict Configuration metadata.

Source code in asf/selectors/selector_pipeline.py
def get_config(self) -> dict[str, Any]:
    """
    Return configuration details.

    Returns
    -------
    dict
        Configuration metadata.
    """

    def get_model_name(obj: Any) -> str | None:
        if obj is None:
            return None
        if hasattr(obj, "model_class"):
            mc = obj.model_class
            if hasattr(mc, "func"):
                return str(mc.func.__name__)
            return str(getattr(mc, "__name__", type(mc).__name__))
        return type(obj).__name__

    return {
        "selector": type(self.selector).__name__,
        "selector_model": get_model_name(self.selector),
        "pre_solving": type(self.pre_solving).__name__
        if self.pre_solving
        else None,
        "selector_budget": getattr(self.selector, "budget", None),
        "presolving_budget": getattr(self.pre_solving, "budget", None),
        "preprocessor_steps": [
            type(step[1]).__name__ for step in self.preprocessor.steps
        ],
        "feature_selector": type(self.feature_selector).__name__
        if self.feature_selector
        else None,
        "algorithm_pre_selector": type(self.algorithm_pre_selector).__name__
        if self.algorithm_pre_selector
        else None,
    }

get_from_configuration(configuration, pre_prefix='', feature_groups=None, budget=None, max_feature_time=None, **kwargs) classmethod

Create a SelectorPipeline from a configuration.

Parameters

configuration : Configuration or dict Configuration object. pre_prefix : str, default="" Prefix for nested lookups. feature_groups : dict or None, default=None Feature groups definition. budget : float or None, default=None Total budget. max_feature_time : float or None, default=None Maximum feature computation time. **kwargs : Any Additional keyword arguments.

Returns

partial Partial function for SelectorPipeline.

Source code in asf/selectors/selector_pipeline.py
@classmethod
def get_from_configuration(
    cls,
    configuration: Configuration | dict[str, Any],
    pre_prefix: str = "",
    feature_groups: dict[str, Any] | None = None,
    budget: float | None = None,
    max_feature_time: float | None = None,
    **kwargs: Any,
) -> partial[SelectorPipeline]:
    """
    Create a SelectorPipeline from a configuration.

    Parameters
    ----------
    configuration : Configuration or dict
        Configuration object.
    pre_prefix : str, default=""
        Prefix for nested lookups.
    feature_groups : dict or None, default=None
        Feature groups definition.
    budget : float or None, default=None
        Total budget.
    max_feature_time : float or None, default=None
        Maximum feature computation time.
    **kwargs : Any
        Additional keyword arguments.

    Returns
    -------
    partial
        Partial function for SelectorPipeline.
    """
    if not CONFIGSPACE_AVAILABLE:
        raise RuntimeError("ConfigSpace is not available.")

    prefix = f"{pre_prefix}:{cls.PREFIX}:" if pre_prefix else f"{cls.PREFIX}:"
    clean_config: dict[str, Any] = {}
    cs = getattr(configuration, "config_space", None)

    def resolve(hp_name: str, val: Any) -> Any:
        if cs and val:
            hp = cs.get(hp_name)
            if hp:
                return cls._resolve_class_from_hp(hp, str(val))
        return None

    # 1. Selector
    s_val = configuration.get(f"{prefix}selector")
    s_cls = resolve(f"{prefix}selector", s_val)
    if s_cls is None and callable(s_val):
        s_cls = s_val

    if s_cls:
        if hasattr(s_cls, "get_from_configuration"):
            clean_config["selector"] = s_cls.get_from_configuration(
                configuration,
                pre_prefix=f"{prefix}selector",
                budget=budget,
                **kwargs,
            )
        else:
            clean_config["selector"] = s_cls(budget=budget) if budget else s_cls()

    # 2. Presolver
    use_ps = configuration.get(f"{prefix}use_presolver")
    clean_config["use_presolver"] = use_ps
    ps_val = configuration.get(f"{prefix}presolver")
    ps_cls = resolve(f"{prefix}presolver", ps_val)
    if use_ps and ps_cls:
        if hasattr(ps_cls, "get_from_configuration"):
            clean_config["presolver"] = ps_cls.get_from_configuration(
                configuration, pre_prefix=f"{prefix}presolver", **kwargs
            )
        else:
            clean_config["presolver"] = ps_cls()

    # 3. Preprocessors
    if cs:
        for hp in list(cs.values()):
            if hp.name.startswith(f"{prefix}preprocessor:"):
                val = configuration.get(hp.name)
                if val and val != "False":
                    res = cls._resolve_class_from_hp(hp, str(val))
                    if res:
                        key = hp.name[len(prefix) :]
                        clean_config[key] = (
                            res.get_from_configuration(  # type: ignore
                                configuration, pre_prefix=hp.name, **kwargs
                            )
                            if hasattr(res, "get_from_configuration")
                            else (res() if callable(res) else res)
                        )
                else:
                    clean_config[hp.name[len(prefix) :]] = False

    # 4. Algorithm Pre-selector
    aps_val = configuration.get(f"{prefix}algorithm_pre_selector")
    aps_cls = resolve(f"{prefix}algorithm_pre_selector", aps_val)
    if aps_cls:
        if hasattr(aps_cls, "get_from_configuration"):
            clean_config["algorithm_pre_selector"] = aps_cls.get_from_configuration(
                configuration,
                pre_prefix=f"{prefix}algorithm_pre_selector",
                **kwargs,
            )
        else:
            clean_config["algorithm_pre_selector"] = aps_cls()

    # 5. Max feature time
    mft = configuration.get(f"{prefix}max_feature_time")
    if mft is not None:
        clean_config["max_feature_time"] = mft

    return cls._get_from_clean_configuration(
        clean_config=clean_config,
        configuration=configuration,
        pre_prefix=pre_prefix,
        feature_groups=feature_groups,
        max_feature_time=max_feature_time,
        **kwargs,
    )

load(path) staticmethod

Load a pipeline from a file.

Parameters

path : str or Path File path to load from.

Returns

SelectorPipeline The loaded pipeline.

Source code in asf/selectors/selector_pipeline.py
@staticmethod
def load(path: str | Path) -> SelectorPipeline:
    """
    Load a pipeline from a file.

    Parameters
    ----------
    path : str or Path
        File path to load from.

    Returns
    -------
    SelectorPipeline
        The loaded pipeline.
    """
    import joblib

    return joblib.load(path)

predict(features, performance=None, **kwargs)

Make predictions.

Parameters

features : pd.DataFrame The input features. performance : pd.DataFrame or None, default=None Performance data for oracle selectors. **kwargs : Any Additional keyword arguments.

Returns

dict Predictions mapping instance IDs to schedules.

Source code in asf/selectors/selector_pipeline.py
def predict(
    self,
    features: pd.DataFrame,
    performance: pd.DataFrame | None = None,
    **kwargs: Any,
) -> dict[str, list[tuple[str, float] | tuple[str, float, float]]]:
    """
    Make predictions.

    Parameters
    ----------
    features : pd.DataFrame
        The input features.
    performance : pd.DataFrame or None, default=None
        Performance data for oracle selectors.
    **kwargs : Any
        Additional keyword arguments.

    Returns
    -------
    dict
        Predictions mapping instance IDs to schedules.
    """
    X = self.preprocessor.transform(features)

    scheds: list[Any] = []
    if self.pre_solving:
        # Presolver prediction returns a single schedule applied to all test instances
        scheds = list(self.pre_solving.predict())

    if self.feature_selector:
        X = self.feature_selector.transform(X)  # type: ignore

    X = self._filter_features(X)

    # Pass performance to selector (needed for oracle selectors like VBS)
    predictions = self.selector.predict(X, performance=performance)

    feature_steps: list[Any] = []
    if self.feature_groups is not None:
        if isinstance(self.feature_groups, dict):
            feature_steps = list(self.feature_groups.keys())
        elif isinstance(self.feature_groups, list):
            feature_steps = self.feature_groups

    if self.max_feature_time is not None and feature_steps:
        feature_steps = [
            (str(fg), float(self.max_feature_time)) for fg in feature_steps
        ]

    final_preds: dict[str, list[tuple[str, float] | tuple[str, float, float]]] = {}
    for instance_id in X.index:
        prediction = predictions.get(str(instance_id), [])  # type: ignore
        final_preds[str(instance_id)] = scheds + feature_steps + list(prediction)

    return final_preds

save(path)

Save the pipeline to a file.

Parameters

path : str or Path File path to save the pipeline.

Source code in asf/selectors/selector_pipeline.py
def save(self, path: str | Path) -> None:
    """
    Save the pipeline to a file.

    Parameters
    ----------
    path : str or Path
        File path to save the pipeline.
    """
    import joblib

    joblib.dump(self, path)

SimpleRanking

Bases: ConfigurableMixin, AbstractModelBasedSelector

Algorithm Selection via Ranking.

Attributes

classifier : AbstractPredictor or None The trained ranking model.

Source code in asf/selectors/simple_ranking.py
class SimpleRanking(ConfigurableMixin, AbstractModelBasedSelector):
    """
    Algorithm Selection via Ranking.

    Attributes
    ----------
    classifier : AbstractPredictor or None
        The trained ranking model.
    """

    PREFIX = "simple_ranking"
    RETURN_TYPE = "single"

    def __init__(
        self,
        model_class: type[AbstractPredictor] = XGBoostRankerWrapper,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the SimpleRanking.

        Parameters
        ----------
        model_class : type[AbstractPredictor], default=XGBoostRankerWrapper
            The class of the ranking model to be used.
        **kwargs : Any
            Additional keyword arguments.
        """
        AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
        self.classifier: AbstractPredictor | None = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Fit the ranking model.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The algorithm performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(np.array(self.algorithms).reshape(-1, 1)),
                index=list(self.algorithms),  # type: ignore[arg-type]
                columns=[f"algo_{i}" for i in range(len(self.algorithms))],  # type: ignore[arg-type]
            )

        performance = performance[self.algorithms]
        features = features[list(self.features)]

        # Reset index to have instance names as a column for merging
        features_reset = features.reset_index().rename(
            columns={features.index.name or "index": "INSTANCE_ID"}
        )
        self.algorithm_features.index.name = "ALGORITHM"

        # Create cross-product of instances and algorithms
        total_features = pd.merge(
            features_reset, self.algorithm_features.reset_index(), how="cross"
        )

        stacked_perf = performance.stack().reset_index()
        stacked_perf.columns = ["INSTANCE_ID", "ALGORITHM", "PERFORMANCE"]

        merged = total_features.merge(
            stacked_perf, on=["INSTANCE_ID", "ALGORITHM"], how="left"
        )

        # Calculate ranks per instance
        gdfs = []
        for _name, gdf in merged.groupby("INSTANCE_ID"):
            gdf["rank"] = gdf["PERFORMANCE"].rank(
                ascending=not self.maximize, method="min"
            )
            gdfs.append(gdf)
        merged = pd.concat(gdfs)

        # Features for training
        X = merged.drop(columns=["INSTANCE_ID", "ALGORITHM", "PERFORMANCE", "rank"])
        y = merged["rank"]

        q_encoder = OrdinalEncoder()
        qid = q_encoder.fit_transform(
            merged["INSTANCE_ID"].to_numpy().reshape(-1, 1)
        ).flatten()

        self.classifier = self.model_class()
        if self.classifier is None:
            raise RuntimeError("Classifier could not be initialized.")

        self.classifier.fit(X, y, qid=qid)

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.

        Returns
        -------
        dict
            Mapping from instance names to algorithm schedules.
        """
        if features is None:
            raise ValueError("SimpleRanking require features for prediction.")
        if self.classifier is None:
            raise RuntimeError("Classifier has not been fitted.")
        if self.algorithm_features is None:
            raise RuntimeError("Algorithm features missing.")

        f_cols = list(self.features)
        inst_name = features.index.name or "index"
        features_reset = features.reset_index().rename(
            columns={inst_name: "INSTANCE_ID"}
        )

        total_features = pd.merge(
            features_reset, self.algorithm_features.reset_index(), how="cross"
        )

        X = total_features[f_cols + list(self.algorithm_features.columns)]
        predictions = self.classifier.predict(X)
        results: dict[str, list[tuple[str, float]]] = {}
        for i, instance_name in enumerate(features.index):
            mask = total_features["INSTANCE_ID"] == instance_name
            # Local best for this group
            best_idx = int(np.argmin(predictions[mask]))
            best_algo = total_features[mask].iloc[best_idx]["ALGORITHM"]
            results[str(instance_name)] = [(str(best_algo), float(self.budget or 0))]

        return results

    @staticmethod
    def _define_hyperparameters(
        model_class: list[type[AbstractPredictor]] | None = None,
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for SimpleRanking.

        Parameters
        ----------
        model_class : list[type[AbstractPredictor]] or None, default=None
            List of model classes to choose from.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        tuple
            Tuple of (hyperparameters, conditions, forbiddens).
        """
        if not CONFIGSPACE_AVAILABLE:
            return [], [], []

        if model_class is None:
            model_class = [XGBoostRankerWrapper]

        model_class_param = ClassChoice(
            name="model_class",
            choices=model_class,
            default=model_class[0],
        )

        return [model_class_param], [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[SimpleRanking]:
        """
        Create a partial function from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for SimpleRanking.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(SimpleRanking, **config)

__init__(model_class=XGBoostRankerWrapper, **kwargs)

Initialize the SimpleRanking.

Parameters

model_class : type[AbstractPredictor], default=XGBoostRankerWrapper The class of the ranking model to be used. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/simple_ranking.py
def __init__(
    self,
    model_class: type[AbstractPredictor] = XGBoostRankerWrapper,
    **kwargs: Any,
) -> None:
    """
    Initialize the SimpleRanking.

    Parameters
    ----------
    model_class : type[AbstractPredictor], default=XGBoostRankerWrapper
        The class of the ranking model to be used.
    **kwargs : Any
        Additional keyword arguments.
    """
    AbstractModelBasedSelector.__init__(self, model_class, **kwargs)
    self.classifier: AbstractPredictor | None = None

SingleBestSolver

Bases: ConfigurableMixin, AbstractSelector

Single Best Solver (SBS) selector.

Always selects the algorithm with the best average performance across all training instances. This represents the baseline performance achievable without any instance-specific selection.

Attributes

best_algorithm : str or None The name of the algorithm with the best aggregate performance.

Source code in asf/selectors/baselines.py
class SingleBestSolver(ConfigurableMixin, AbstractSelector):
    """
    Single Best Solver (SBS) selector.

    Always selects the algorithm with the best average performance across all
    training instances. This represents the baseline performance achievable
    without any instance-specific selection.

    Attributes
    ----------
    best_algorithm : str or None
        The name of the algorithm with the best aggregate performance.
    """

    PREFIX = "sbs"

    def __init__(
        self,
        budget: int | None = None,
        maximize: bool = False,
        feature_groups: list[str] | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the SingleBestSolver.

        Parameters
        ----------
        budget : int or None, default=None
            The budget for the selector.
        maximize : bool, default=False
            Indicates whether to maximize the performance metric.
        feature_groups : list[str] or None, default=None
            Groups of features to be considered.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(
            budget=budget,
            maximize=maximize,
            feature_groups=feature_groups,
            **kwargs,
        )
        self.best_algorithm: str | None = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Find the single best algorithm based on aggregate performance.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The performance data.
        **kwargs : Any
            Additional keyword arguments.
        """
        # Apply PAR10 penalty for comparison
        if self.budget is not None:
            perf_penalized = np.where(
                performance <= self.budget, performance, self.budget * 10
            )
        else:
            perf_penalized = performance.values

        # Aggregate performance across all instances
        perf_sum = np.sum(perf_penalized, axis=0)

        if self.maximize:
            best_idx = np.argmax(perf_sum)
        else:
            best_idx = np.argmin(perf_sum)

        self.best_algorithm = performance.columns[best_idx]

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
                Predict the single best algorithm for all instances.

                Parameters
                ----------
                features : pd.DataFrame or None
                    The input features.
                performance : pd.DataFrame or None, default=None
                    The performance data.

                Returns
        -------
                dict
                    Dictionary mapping instance IDs to the single best algorithm.
        """
        indices = features.index if features is not None else [0]
        return {
            str(instance): [(str(self.best_algorithm), float(self.budget or 0))]
            for instance in indices
        }

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for SingleBestSolver.

        Returns
        -------
        tuple
            Empty hyperparameters, conditions, and forbiddens.
        """
        return [], [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[SingleBestSolver]:
        """
                Create a SingleBestSolver from a clean configuration.

                Parameters
                ----------
                clean_config : dict
                    The clean configuration.
                **kwargs : Any
                    Additional keyword arguments.

                Returns
        -------
                partial
                    Partial function for SingleBestSolver.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(SingleBestSolver, **config)

__init__(budget=None, maximize=False, feature_groups=None, **kwargs)

Initialize the SingleBestSolver.

Parameters

budget : int or None, default=None The budget for the selector. maximize : bool, default=False Indicates whether to maximize the performance metric. feature_groups : list[str] or None, default=None Groups of features to be considered. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/baselines.py
def __init__(
    self,
    budget: int | None = None,
    maximize: bool = False,
    feature_groups: list[str] | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the SingleBestSolver.

    Parameters
    ----------
    budget : int or None, default=None
        The budget for the selector.
    maximize : bool, default=False
        Indicates whether to maximize the performance metric.
    feature_groups : list[str] or None, default=None
        Groups of features to be considered.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(
        budget=budget,
        maximize=maximize,
        feature_groups=feature_groups,
        **kwargs,
    )
    self.best_algorithm: str | None = None

VirtualBestSolver

Bases: ConfigurableMixin, AbstractSelector

Virtual Best Solver (VBS) / Oracle selector.

Always selects the best algorithm for each specific instance. This represents the upper bound of performance achievable by any algorithm selector (requires oracle knowledge of true performance).

Note: This selector "cheats" by using the test performance data.

Source code in asf/selectors/baselines.py
class VirtualBestSolver(ConfigurableMixin, AbstractSelector):
    """
    Virtual Best Solver (VBS) / Oracle selector.

    Always selects the best algorithm for each specific instance.
    This represents the upper bound of performance achievable by any
    algorithm selector (requires oracle knowledge of true performance).

    Note: This selector "cheats" by using the test performance data.
    """

    PREFIX = "vbs"

    def __init__(
        self,
        budget: int | None = None,
        maximize: bool = False,
        feature_groups: list[str] | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Initialize the VirtualBestSolver.

        Parameters
        ----------
        budget : int or None, default=None
            The budget for the selector.
        maximize : bool, default=False
            Indicates whether to maximize the performance metric.
        feature_groups : list[str] or None, default=None
            Groups of features to be considered.
        **kwargs : Any
            Additional keyword arguments.
        """
        super().__init__(
            budget=budget,
            maximize=maximize,
            feature_groups=feature_groups,
            **kwargs,
        )
        self._performance: pd.DataFrame | None = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
        **kwargs: Any,
    ) -> None:
        """
        Store the performance data for oracle predictions.

        Parameters
        ----------
        features : pd.DataFrame
            The input features.
        performance : pd.DataFrame
            The performance data.
        **kwargs : Any
            Additional keyword arguments.
        """
        self._performance = performance

    def _predict(
        self,
        features: pd.DataFrame | None,
        performance: pd.DataFrame | None = None,
    ) -> dict[str, list[tuple[str, float]]]:
        """
        Predict the best algorithm for each instance (oracle).

        If performance data is provided at prediction time, use it.
        Otherwise, fall back to training performance.

        Parameters
        ----------
        features : pd.DataFrame or None
            The input features.
        performance : pd.DataFrame or None, default=None
            The performance data.

        Returns
        -------
        dict
            Dictionary mapping instance IDs to the best algorithm.
        """
        # Use provided performance or fall back to stored
        perf = performance if performance is not None else self._performance

        if perf is None:
            raise ValueError(
                "VirtualBestSolver requires performance data. "
                "Either provide it at fit time or pass it to predict."
            )

        indices = features.index if features is not None else perf.index

        result: dict[str, list[tuple[str, float]]] = {}
        for instance in indices:
            if instance not in perf.index:
                # Fall back to first algorithm if instance not found
                result[str(instance)] = [
                    (str(self.algorithms[0]), float(self.budget or 0))
                ]
                continue

            instance_perf = perf.loc[instance]

            # Apply PAR10 penalty for comparison
            if self.budget is not None:
                instance_perf_penalized = np.where(
                    instance_perf <= self.budget, instance_perf, self.budget * 10
                )
            else:
                instance_perf_penalized = instance_perf.values

            if self.maximize:
                best_idx = int(np.argmax(instance_perf_penalized))
            else:
                best_idx = int(np.argmin(instance_perf_penalized))

            best_algorithm = str(perf.columns[best_idx])
            result[str(instance)] = [(best_algorithm, float(self.budget or 0))]

        return result

    @staticmethod
    def _define_hyperparameters(
        **kwargs: Any,
    ) -> tuple[list[Any], list[Any], list[Any]]:
        """
        Define hyperparameters for VirtualBestSolver.

        Returns
        -------
        tuple
            Empty hyperparameters, conditions, and forbiddens.
        """
        return [], [], []

    @classmethod
    def _get_from_clean_configuration(
        cls,
        clean_config: dict[str, Any],
        **kwargs: Any,
    ) -> partial[VirtualBestSolver]:
        """
        Create a VirtualBestSolver from a clean configuration.

        Parameters
        ----------
        clean_config : dict
            The clean configuration.
        **kwargs : Any
            Additional keyword arguments.

        Returns
        -------
        partial
            Partial function for VirtualBestSolver.
        """
        config = clean_config.copy()
        config.update(kwargs)
        return partial(VirtualBestSolver, **config)

__init__(budget=None, maximize=False, feature_groups=None, **kwargs)

Initialize the VirtualBestSolver.

Parameters

budget : int or None, default=None The budget for the selector. maximize : bool, default=False Indicates whether to maximize the performance metric. feature_groups : list[str] or None, default=None Groups of features to be considered. **kwargs : Any Additional keyword arguments.

Source code in asf/selectors/baselines.py
def __init__(
    self,
    budget: int | None = None,
    maximize: bool = False,
    feature_groups: list[str] | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize the VirtualBestSolver.

    Parameters
    ----------
    budget : int or None, default=None
        The budget for the selector.
    maximize : bool, default=False
        Indicates whether to maximize the performance metric.
    feature_groups : list[str] or None, default=None
        Groups of features to be considered.
    **kwargs : Any
        Additional keyword arguments.
    """
    super().__init__(
        budget=budget,
        maximize=maximize,
        feature_groups=feature_groups,
        **kwargs,
    )
    self._performance: pd.DataFrame | None = None

tune_selector(X, y, selector_class, features_running_time, algorithm_features=None, selector_kwargs=None, preprocessing_class=None, pre_solving_class=None, feature_selector=None, algorithm_pre_selector=None, max_algorithm_pre_selector=None, budget=None, maximize=False, feature_groups=None, output_dir='./smac_output', smac_metric=running_time_selector_performance, smac_kwargs=None, smac_scenario_kwargs=None, runcount_limit=100, timeout=float('inf'), seed=0, cv=10, groups=None, max_feature_time=None)

Tunes a selector model using SMAC.

Parameters

X : pd.DataFrame Instance feature matrix. y : pd.DataFrame Algorithm performance matrix. selector_class : type or list Selector classes to tune. features_running_time : pd.DataFrame Running times for computing feature groups. algorithm_features : pd.DataFrame or None, optional Features for algorithms. selector_kwargs : dict or None, optional Arguments for selector instantiation. preprocessing_class : list or None, optional List of preprocessor classes. pre_solving_class : list or None, optional List of presolver classes. feature_selector : Any or None, optional Feature selection component. algorithm_pre_selector : Any or None, optional Algorithm pre-selection component. max_algorithm_pre_selector : int or None, optional Constraint for pre-selection. budget : float or None, optional Global cutoff time. maximize : bool, default=False Whether to maximize the performance metric. feature_groups : dict or None, optional Definition of feature groups. output_dir : str, default="./smac_output" SMAC output directory. smac_metric : callable, default=running_time_selector_performance Evaluation metric for SMAC. smac_kwargs : callable or None, optional Additional arguments for SMAC facade. smac_scenario_kwargs : dict or None, optional Additional arguments for SMAC scenario. runcount_limit : int, default=100 Limit for trials. timeout : float, default=inf Wall-clock time limit. seed : int, default=0 Random seed. cv : int, default=10 Number of cross-validation folds. groups : np.ndarray or None, optional Group labels for CV. max_feature_time : float or None, optional Budget per feature group.

Returns

SelectorPipeline Best pipeline found by SMAC.

Source code in asf/selectors/selector_tuner.py
def tune_selector(
    X: pd.DataFrame,
    y: pd.DataFrame,
    selector_class: type[AbstractSelector]
    | list[type[AbstractSelector]]
    | list[tuple[type[AbstractSelector], dict[str, Any]]],
    features_running_time: pd.DataFrame,
    algorithm_features: pd.DataFrame | None = None,
    selector_kwargs: dict[str, Any] | None = None,
    preprocessing_class: list[type[TransformerMixin]] | None = None,
    pre_solving_class: list[type[Any]] | None = None,
    feature_selector: Any | None = None,
    algorithm_pre_selector: Any | None = None,
    max_algorithm_pre_selector: int | None = None,
    budget: float | None = None,
    maximize: bool = False,
    feature_groups: dict[str, Any] | None = None,
    output_dir: str = "./smac_output",
    smac_metric: Callable[
        ..., float | dict[str, float]
    ] = running_time_selector_performance,
    smac_kwargs: Callable[[Scenario], dict[str, Any]] | None = None,
    smac_scenario_kwargs: dict[str, Any] | None = None,
    runcount_limit: int = 100,
    timeout: float = float("inf"),
    seed: int = 0,
    cv: int = 10,
    groups: np.ndarray | None = None,
    max_feature_time: float | None = None,
) -> SelectorPipeline:
    """
    Tunes a selector model using SMAC.

    Parameters
    ----------
    X : pd.DataFrame
        Instance feature matrix.
    y : pd.DataFrame
        Algorithm performance matrix.
    selector_class : type or list
        Selector classes to tune.
    features_running_time : pd.DataFrame
        Running times for computing feature groups.
    algorithm_features : pd.DataFrame or None, optional
        Features for algorithms.
    selector_kwargs : dict or None, optional
        Arguments for selector instantiation.
    preprocessing_class : list or None, optional
        List of preprocessor classes.
    pre_solving_class : list or None, optional
        List of presolver classes.
    feature_selector : Any or None, optional
        Feature selection component.
    algorithm_pre_selector : Any or None, optional
        Algorithm pre-selection component.
    max_algorithm_pre_selector : int or None, optional
        Constraint for pre-selection.
    budget : float or None, optional
        Global cutoff time.
    maximize : bool, default=False
        Whether to maximize the performance metric.
    feature_groups : dict or None, optional
        Definition of feature groups.
    output_dir : str, default="./smac_output"
        SMAC output directory.
    smac_metric : callable, default=running_time_selector_performance
        Evaluation metric for SMAC.
    smac_kwargs : callable or None, optional
        Additional arguments for SMAC facade.
    smac_scenario_kwargs : dict or None, optional
        Additional arguments for SMAC scenario.
    runcount_limit : int, default=100
        Limit for trials.
    timeout : float, default=inf
        Wall-clock time limit.
    seed : int, default=0
        Random seed.
    cv : int, default=10
        Number of cross-validation folds.
    groups : np.ndarray or None, optional
        Group labels for CV.
    max_feature_time : float or None, optional
        Budget per feature group.

    Returns
    -------
    SelectorPipeline
        Best pipeline found by SMAC.
    """
    _logger = logging.getLogger(__name__)

    if not SMAC_AVAILABLE:
        raise RuntimeError("SMAC is not installed.")
    if not CONFIGSPACE_AVAILABLE:
        raise RuntimeError("ConfigSpace is not installed.")

    if pre_solving_class is not None and budget is None:
        raise ValueError("Budget must be provided if using pre-solving.")

    sel_list = selector_class if isinstance(selector_class, list) else [selector_class]
    sel_kwargs = selector_kwargs or {}
    sc_kwargs = smac_scenario_kwargs or {}

    cs = SelectorPipeline.get_configuration_space(
        selector_class=sel_list,
        preprocessing_class=preprocessing_class,
        pre_solving_class=pre_solving_class,
        feature_groups=feature_groups,
        algorithm_pre_selector=algorithm_pre_selector,
        max_feature_time=max_feature_time,
        budget=budget,
        max_algorithm_pre_selector=max_algorithm_pre_selector,
        n_algorithms=y.shape[1] if hasattr(y, "shape") else None,
        **sel_kwargs,
    )

    cs = convert_class_choices_to_categorical(cs)

    scenario = Scenario(
        configspace=cs,
        n_trials=runcount_limit,
        walltime_limit=timeout,
        deterministic=True,
        output_directory=Path(output_dir),
        seed=seed,
        **sc_kwargs,
    )

    def target_function(config: Configuration, seed: int) -> float:
        if groups is not None:
            kfold = GroupKFoldShuffle(n_splits=cv, shuffle=True, random_state=seed)
        else:
            kfold = KFold(n_splits=cv, shuffle=True, random_state=seed)

        scores = []
        for train_idx, test_idx in kfold.split(X, y, groups):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
            rt_test = features_running_time.iloc[test_idx]

            pipeline = _create_pipeline(
                config,
                budget,
                maximize,
                sel_kwargs,
                feature_groups,
                max_feature_time=max_feature_time,
            )

            pipeline.fit(X_train, y_train, algorithm_features=algorithm_features)
            y_pred = pipeline.predict(X_test)
            assert isinstance(y_pred, dict)  # Added assertion for y_pred type

            score = smac_metric(y_pred, y_test, budget, rt_test)
            if isinstance(score, dict):
                score = float(np.mean(list(score.values())))
            scores.append(float(score))

        final_score = float(np.mean(scores))
        return -final_score if maximize else final_score

    facade_kwargs = smac_kwargs(scenario) if smac_kwargs is not None else {}
    smac = HyperparameterOptimizationFacade(scenario, target_function, **facade_kwargs)
    best_config = smac.optimize()

    if isinstance(best_config, list):
        best_config = best_config[0]
    return _create_pipeline(
        best_config,
        budget,
        maximize,
        sel_kwargs,
        feature_groups,
        max_feature_time=max_feature_time,
    )