Skip to content

Selectors

JointRanking

Bases: AbstractSelector, AbstractFeatureGenerator

Joint ranking (Ortuzk et al. 2022)

Attributes:

Name Type Description
metadata

Metadata containing information about the algorithms.

use_multi_target

Boolean indicating whether to use multi-target regression.

normalize

Method to normalize the performance data.

regressors

List of trained regression models.

Source code in asf/selectors/joint_ranking.py
class JointRanking(AbstractSelector, AbstractFeatureGenerator):
    """
    Joint ranking (Ortuzk et al. 2022)

    Attributes:
        metadata: Metadata containing information about the algorithms.
        use_multi_target: Boolean indicating whether to use multi-target regression.
        normalize: Method to normalize the performance data.
        regressors: List of trained regression models.
    """

    def __init__(
        self,
        metadata,
        model=None,
        hierarchical_generator=None,
    ):
        """
        Initializes the PerformancePredictor with the given parameters.

        Args:
            model_class: The class of the regression model to be used.
            metadata: Metadata containing information about the algorithms.
            use_multi_target: Boolean indicating whether to use multi-target regression.
            normalize: Method to normalize the performance data.
            hierarchical_generator: Feature generator to be used.
        """
        AbstractSelector.__init__(self, metadata, hierarchical_generator)
        AbstractFeatureGenerator.__init__(self)
        self.model = model

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
        """
        Fits the regression models to the given features and performance data.

        Args:
            features: DataFrame containing the feature data.
            performance: DataFrame containing the performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(
                    np.array(self.metadata.algorithms).reshape(-1, 1)
                ),
                index=self.metadata.algorithms,
                columns=[f"algo_{i}" for i in range(len(self.metadata.algorithms))],
            )

        print(features)
        print(performance)
        if self.model is None:
            self.model = RankingMLP(
                input_size=len(self.metadata.features) + len(self.metadata.algorithms)
            )

        self.model.fit(
            features[self.metadata.features], performance, self.algorithm_features
        )

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the performance of algorithms for the given features.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            A dictionary mapping instance names to the predicted best algorithm.
        """
        predictions = self.generate_features(features)

        return {
            instance_name: [
                (
                    self.metadata.algorithms[np.argmin(predictions[i])],
                    self.metadata.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates predictions for the given features using the trained models.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            DataFrame containing the predictions for each algorithm.
        """

        predictions = np.zeros((features.shape[0], len(self.metadata.algorithms)))

        features = features[self.metadata.features]
        for i, algorithm in enumerate(self.metadata.algorithms):
            # import pdb; pdb.set_trace()
            data = features.assign(**self.algorithm_features.loc[algorithm])
            data = data[
                self.algorithm_features.columns.to_list() + self.metadata.features
            ]
            prediction = self.model.predict(data)
            predictions[:, i] = prediction.flatten()
            print(predictions)

        return predictions

__init__(metadata, model=None, hierarchical_generator=None)

Initializes the PerformancePredictor with the given parameters.

Parameters:

Name Type Description Default
model_class

The class of the regression model to be used.

required
metadata

Metadata containing information about the algorithms.

required
use_multi_target

Boolean indicating whether to use multi-target regression.

required
normalize

Method to normalize the performance data.

required
hierarchical_generator

Feature generator to be used.

None
Source code in asf/selectors/joint_ranking.py
def __init__(
    self,
    metadata,
    model=None,
    hierarchical_generator=None,
):
    """
    Initializes the PerformancePredictor with the given parameters.

    Args:
        model_class: The class of the regression model to be used.
        metadata: Metadata containing information about the algorithms.
        use_multi_target: Boolean indicating whether to use multi-target regression.
        normalize: Method to normalize the performance data.
        hierarchical_generator: Feature generator to be used.
    """
    AbstractSelector.__init__(self, metadata, hierarchical_generator)
    AbstractFeatureGenerator.__init__(self)
    self.model = model

_fit(features, performance)

Fits the regression models to the given features and performance data.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required
performance DataFrame

DataFrame containing the performance data.

required
Source code in asf/selectors/joint_ranking.py
def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
    """
    Fits the regression models to the given features and performance data.

    Args:
        features: DataFrame containing the feature data.
        performance: DataFrame containing the performance data.
    """
    if self.algorithm_features is None:
        encoder = OneHotEncoder(sparse_output=False)
        self.algorithm_features = pd.DataFrame(
            encoder.fit_transform(
                np.array(self.metadata.algorithms).reshape(-1, 1)
            ),
            index=self.metadata.algorithms,
            columns=[f"algo_{i}" for i in range(len(self.metadata.algorithms))],
        )

    print(features)
    print(performance)
    if self.model is None:
        self.model = RankingMLP(
            input_size=len(self.metadata.features) + len(self.metadata.algorithms)
        )

    self.model.fit(
        features[self.metadata.features], performance, self.algorithm_features
    )

_predict(features)

Predicts the performance of algorithms for the given features.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description

A dictionary mapping instance names to the predicted best algorithm.

Source code in asf/selectors/joint_ranking.py
def _predict(self, features: pd.DataFrame):
    """
    Predicts the performance of algorithms for the given features.

    Args:
        features: DataFrame containing the feature data.

    Returns:
        A dictionary mapping instance names to the predicted best algorithm.
    """
    predictions = self.generate_features(features)

    return {
        instance_name: [
            (
                self.metadata.algorithms[np.argmin(predictions[i])],
                self.metadata.budget,
            )
        ]
        for i, instance_name in enumerate(features.index)
    }

generate_features(features)

Generates predictions for the given features using the trained models.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description
DataFrame

DataFrame containing the predictions for each algorithm.

Source code in asf/selectors/joint_ranking.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates predictions for the given features using the trained models.

    Args:
        features: DataFrame containing the feature data.

    Returns:
        DataFrame containing the predictions for each algorithm.
    """

    predictions = np.zeros((features.shape[0], len(self.metadata.algorithms)))

    features = features[self.metadata.features]
    for i, algorithm in enumerate(self.metadata.algorithms):
        # import pdb; pdb.set_trace()
        data = features.assign(**self.algorithm_features.loc[algorithm])
        data = data[
            self.algorithm_features.columns.to_list() + self.metadata.features
        ]
        prediction = self.model.predict(data)
        predictions[:, i] = prediction.flatten()
        print(predictions)

    return predictions

MultiClassClassifier

Bases: AbstractModelBasedSelector

MultiClassClassifier is a class that predicts the best algorithm for a given instance using a multi-class classification model.

Attributes:

Name Type Description
model_class

The class of the classification model to be used.

metadata

Metadata containing information about the algorithms.

classifier

The trained classification model.

Source code in asf/selectors/mutli_class.py
class MultiClassClassifier(AbstractModelBasedSelector):
    """
    MultiClassClassifier is a class that predicts the best algorithm for a given instance
    using a multi-class classification model.

    Attributes:
        model_class: The class of the classification model to be used.
        metadata: Metadata containing information about the algorithms.
        classifier: The trained classification model.
    """

    def __init__(self, model_class, metadata, hierarchical_generator=None):
        """
        Initializes the MultiClassClassifier with the given parameters.

        Args:
            model_class: The class of the classification model to be used.
            metadata: Metadata containing information about the algorithms.
            hierarchical_generator: Feature generator to be used.
        """
        AbstractModelBasedSelector.__init__(
            self, model_class, metadata, hierarchical_generator
        )
        self.classifier = None

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
        """
        Fits the classification model to the given feature and performance data.

        Args:
            features: DataFrame containing the feature data.
            performance: DataFrame containing the performance data.
        """
        assert self.algorithm_features is None, (
            "MultiClassClassifier does not use algorithm features."
        )
        self.classifier = self.model_class()
        self.classifier.fit(features, np.argmin(performance.values, axis=1))

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the best algorithm for each instance in the given feature data using simple multi class classification.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            A dictionary mapping instance names to the predicted best algorithm.
        """
        predictions = self.classifier.predict(features)

        return {
            instance_name: [
                (self.metadata.algorithms[predictions[i]], self.metadata.budget)
            ]
            for i, instance_name in enumerate(features.index)
        }

__init__(model_class, metadata, hierarchical_generator=None)

Initializes the MultiClassClassifier with the given parameters.

Parameters:

Name Type Description Default
model_class

The class of the classification model to be used.

required
metadata

Metadata containing information about the algorithms.

required
hierarchical_generator

Feature generator to be used.

None
Source code in asf/selectors/mutli_class.py
def __init__(self, model_class, metadata, hierarchical_generator=None):
    """
    Initializes the MultiClassClassifier with the given parameters.

    Args:
        model_class: The class of the classification model to be used.
        metadata: Metadata containing information about the algorithms.
        hierarchical_generator: Feature generator to be used.
    """
    AbstractModelBasedSelector.__init__(
        self, model_class, metadata, hierarchical_generator
    )
    self.classifier = None

_fit(features, performance)

Fits the classification model to the given feature and performance data.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required
performance DataFrame

DataFrame containing the performance data.

required
Source code in asf/selectors/mutli_class.py
def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
    """
    Fits the classification model to the given feature and performance data.

    Args:
        features: DataFrame containing the feature data.
        performance: DataFrame containing the performance data.
    """
    assert self.algorithm_features is None, (
        "MultiClassClassifier does not use algorithm features."
    )
    self.classifier = self.model_class()
    self.classifier.fit(features, np.argmin(performance.values, axis=1))

_predict(features)

Predicts the best algorithm for each instance in the given feature data using simple multi class classification.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description

A dictionary mapping instance names to the predicted best algorithm.

Source code in asf/selectors/mutli_class.py
def _predict(self, features: pd.DataFrame):
    """
    Predicts the best algorithm for each instance in the given feature data using simple multi class classification.

    Args:
        features: DataFrame containing the feature data.

    Returns:
        A dictionary mapping instance names to the predicted best algorithm.
    """
    predictions = self.classifier.predict(features)

    return {
        instance_name: [
            (self.metadata.algorithms[predictions[i]], self.metadata.budget)
        ]
        for i, instance_name in enumerate(features.index)
    }

PairwiseClassifier

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

PairwiseClassifier is a selector that uses pairwise comparison of algorithms to predict the best algorithm for a given instance.

Attributes:

Name Type Description
model_class ClassifierMixin

The classifier model to be used for pairwise comparisons.

classifiers list[ClassifierMixin]

List of trained classifiers for pairwise comparisons.

Source code in asf/selectors/pairwise_classifier.py
class PairwiseClassifier(AbstractModelBasedSelector, AbstractFeatureGenerator):
    """
    PairwiseClassifier is a selector that uses pairwise comparison of algorithms
    to predict the best algorithm for a given instance.

    Attributes:
        model_class (ClassifierMixin): The classifier model to be used for pairwise comparisons.
        classifiers (list[ClassifierMixin]): List of trained classifiers for pairwise comparisons.
    """

    def __init__(
        self, model_class, metadata, hierarchical_generator=None, use_weights=True
    ):
        """
        Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

        Args:
            model_class (ClassifierMixin): The classifier model to be used for pairwise comparisons.
            hierarchical_generator (AbstractFeatureGenerator, optional): The feature generator to be used. Defaults to DummyFeatureGenerator.
        """
        AbstractModelBasedSelector.__init__(
            self, model_class, metadata, hierarchical_generator
        )
        AbstractFeatureGenerator.__init__(self)
        self.classifiers: list[AbstractPredictor] = []
        self.use_weights = use_weights

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
        """
        Fits the pairwise classifiers using the provided features and performance data.

        Args:
            features (pd.DataFrame): The feature data for the instances.
            performance (pd.DataFrame): The performance data for the algorithms.
        """
        assert self.algorithm_features is None, (
            "PairwiseClassifier does not use algorithm features."
        )
        for i, algorithm in enumerate(self.metadata.algorithms):
            for other_algorithm in self.metadata.algorithms[i + 1 :]:
                algo1_times = performance[algorithm]
                algo2_times = performance[other_algorithm]

                diffs = algo1_times < algo2_times
                cur_model = self.model_class()
                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None
                    if not self.use_weights
                    else np.abs(algo1_times - algo2_times),
                )
                self.classifiers.append(cur_model)

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the best algorithm for each instance using the trained pairwise classifiers.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            dict: A dictionary mapping instance names to the predicted best algorithm.
        """
        predictions_sum = self.generate_features(features)

        return {
            instance_name: [
                (
                    predictions_sum.loc[instance_name].idxmax(),
                    self.metadata.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame):
        """
        Generates features for the pairwise classifiers.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            np.ndarray: An array of predictions for each instance and algorithm pair.
        """
        cnt = 0
        predictions_sum = pd.DataFrame(
            0, index=features.index, columns=self.metadata.algorithms
        )
        for i, algorithm in enumerate(self.metadata.algorithms):
            for j, other_algorithm in enumerate(self.metadata.algorithms[i + 1 :]):
                prediction = self.classifiers[cnt].predict(features)
                predictions_sum.loc[prediction, algorithm] += 1
                predictions_sum.loc[~prediction, other_algorithm] += 1
                cnt += 1

        return predictions_sum

__init__(model_class, metadata, hierarchical_generator=None, use_weights=True)

Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

Parameters:

Name Type Description Default
model_class ClassifierMixin

The classifier model to be used for pairwise comparisons.

required
hierarchical_generator AbstractFeatureGenerator

The feature generator to be used. Defaults to DummyFeatureGenerator.

None
Source code in asf/selectors/pairwise_classifier.py
def __init__(
    self, model_class, metadata, hierarchical_generator=None, use_weights=True
):
    """
    Initializes the PairwiseClassifier with a given model class and hierarchical feature generator.

    Args:
        model_class (ClassifierMixin): The classifier model to be used for pairwise comparisons.
        hierarchical_generator (AbstractFeatureGenerator, optional): The feature generator to be used. Defaults to DummyFeatureGenerator.
    """
    AbstractModelBasedSelector.__init__(
        self, model_class, metadata, hierarchical_generator
    )
    AbstractFeatureGenerator.__init__(self)
    self.classifiers: list[AbstractPredictor] = []
    self.use_weights = use_weights

_fit(features, performance)

Fits the pairwise classifiers using the provided features and performance data.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required
performance DataFrame

The performance data for the algorithms.

required
Source code in asf/selectors/pairwise_classifier.py
def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
    """
    Fits the pairwise classifiers using the provided features and performance data.

    Args:
        features (pd.DataFrame): The feature data for the instances.
        performance (pd.DataFrame): The performance data for the algorithms.
    """
    assert self.algorithm_features is None, (
        "PairwiseClassifier does not use algorithm features."
    )
    for i, algorithm in enumerate(self.metadata.algorithms):
        for other_algorithm in self.metadata.algorithms[i + 1 :]:
            algo1_times = performance[algorithm]
            algo2_times = performance[other_algorithm]

            diffs = algo1_times < algo2_times
            cur_model = self.model_class()
            cur_model.fit(
                features,
                diffs,
                sample_weight=None
                if not self.use_weights
                else np.abs(algo1_times - algo2_times),
            )
            self.classifiers.append(cur_model)

_predict(features)

Predicts the best algorithm for each instance using the trained pairwise classifiers.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Name Type Description
dict

A dictionary mapping instance names to the predicted best algorithm.

Source code in asf/selectors/pairwise_classifier.py
def _predict(self, features: pd.DataFrame):
    """
    Predicts the best algorithm for each instance using the trained pairwise classifiers.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        dict: A dictionary mapping instance names to the predicted best algorithm.
    """
    predictions_sum = self.generate_features(features)

    return {
        instance_name: [
            (
                predictions_sum.loc[instance_name].idxmax(),
                self.metadata.budget,
            )
        ]
        for i, instance_name in enumerate(features.index)
    }

generate_features(features)

Generates features for the pairwise classifiers.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Type Description

np.ndarray: An array of predictions for each instance and algorithm pair.

Source code in asf/selectors/pairwise_classifier.py
def generate_features(self, features: pd.DataFrame):
    """
    Generates features for the pairwise classifiers.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        np.ndarray: An array of predictions for each instance and algorithm pair.
    """
    cnt = 0
    predictions_sum = pd.DataFrame(
        0, index=features.index, columns=self.metadata.algorithms
    )
    for i, algorithm in enumerate(self.metadata.algorithms):
        for j, other_algorithm in enumerate(self.metadata.algorithms[i + 1 :]):
            prediction = self.classifiers[cnt].predict(features)
            predictions_sum.loc[prediction, algorithm] += 1
            predictions_sum.loc[~prediction, other_algorithm] += 1
            cnt += 1

    return predictions_sum

PairwiseRegressor

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

PairwiseRegressor is a selector that uses pairwise regression of algorithms to predict the best algorithm for a given instance.

Attributes:

Name Type Description
model_class

The regression model to be used for pairwise comparisons.

regressors

List of trained regressors for pairwise comparisons.

Source code in asf/selectors/pairwise_regressor.py
class PairwiseRegressor(AbstractModelBasedSelector, AbstractFeatureGenerator):
    """
    PairwiseRegressor is a selector that uses pairwise regression of algorithms
    to predict the best algorithm for a given instance.

    Attributes:
        model_class: The regression model to be used for pairwise comparisons.
        regressors: List of trained regressors for pairwise comparisons.
    """

    def __init__(self, model_class, metadata, hierarchical_generator=None):
        """
        Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

        Args:
            model_class: The regression model to be used for pairwise comparisons.
            hierarchical_generator (AbstractFeatureGenerator, optional): The feature generator to be used. Defaults to DummyFeatureGenerator.
        """
        AbstractModelBasedSelector.__init__(
            self, model_class, metadata, hierarchical_generator
        )
        AbstractFeatureGenerator.__init__(self)
        self.regressors = []

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
        """
        Fits the pairwise regressors using the provided features and performance data.

        Args:
            features (pd.DataFrame): The feature data for the instances.
            performance (pd.DataFrame): The performance data for the algorithms.
        """
        assert self.algorithm_features is None, (
            "PairwiseRegressor does not use algorithm features."
        )
        for i, algorithm in enumerate(self.metadata.algorithms):
            for other_algorithm in self.metadata.algorithms[i + 1 :]:
                algo1_times = performance[algorithm]
                algo2_times = performance[other_algorithm]

                diffs = algo1_times - algo2_times
                cur_model = self.model_class()
                cur_model.fit(
                    features,
                    diffs,
                    sample_weight=None,
                )
                self.regressors.append(cur_model)

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the best algorithm for each instance using the trained pairwise regressors.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            dict: A dictionary mapping instance names to the predicted best algorithm.
        """
        predictions_sum = self.generate_features(features)
        return {
            instance_name: [
                (
                    predictions_sum.loc[instance_name].idxmin(),
                    self.metadata.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame):
        """
        Generates features for the pairwise regressors.

        Args:
            features (pd.DataFrame): The feature data for the instances.

        Returns:
            np.ndarray: An array of predictions for each instance and algorithm pair.
        """

        cnt = 0
        predictions_sum = pd.DataFrame(
            0, index=features.index, columns=self.metadata.algorithms
        )
        for i, algorithm in enumerate(self.metadata.algorithms):
            for j, other_algorithm in enumerate(self.metadata.algorithms[i + 1 :]):
                prediction = self.regressors[cnt].predict(features)
                predictions_sum[algorithm] += prediction
                predictions_sum[other_algorithm] -= prediction
                cnt += 1

        return predictions_sum

__init__(model_class, metadata, hierarchical_generator=None)

Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

Parameters:

Name Type Description Default
model_class

The regression model to be used for pairwise comparisons.

required
hierarchical_generator AbstractFeatureGenerator

The feature generator to be used. Defaults to DummyFeatureGenerator.

None
Source code in asf/selectors/pairwise_regressor.py
def __init__(self, model_class, metadata, hierarchical_generator=None):
    """
    Initializes the PairwiseRegressor with a given model class and hierarchical feature generator.

    Args:
        model_class: The regression model to be used for pairwise comparisons.
        hierarchical_generator (AbstractFeatureGenerator, optional): The feature generator to be used. Defaults to DummyFeatureGenerator.
    """
    AbstractModelBasedSelector.__init__(
        self, model_class, metadata, hierarchical_generator
    )
    AbstractFeatureGenerator.__init__(self)
    self.regressors = []

_fit(features, performance)

Fits the pairwise regressors using the provided features and performance data.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required
performance DataFrame

The performance data for the algorithms.

required
Source code in asf/selectors/pairwise_regressor.py
def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
    """
    Fits the pairwise regressors using the provided features and performance data.

    Args:
        features (pd.DataFrame): The feature data for the instances.
        performance (pd.DataFrame): The performance data for the algorithms.
    """
    assert self.algorithm_features is None, (
        "PairwiseRegressor does not use algorithm features."
    )
    for i, algorithm in enumerate(self.metadata.algorithms):
        for other_algorithm in self.metadata.algorithms[i + 1 :]:
            algo1_times = performance[algorithm]
            algo2_times = performance[other_algorithm]

            diffs = algo1_times - algo2_times
            cur_model = self.model_class()
            cur_model.fit(
                features,
                diffs,
                sample_weight=None,
            )
            self.regressors.append(cur_model)

_predict(features)

Predicts the best algorithm for each instance using the trained pairwise regressors.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Name Type Description
dict

A dictionary mapping instance names to the predicted best algorithm.

Source code in asf/selectors/pairwise_regressor.py
def _predict(self, features: pd.DataFrame):
    """
    Predicts the best algorithm for each instance using the trained pairwise regressors.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        dict: A dictionary mapping instance names to the predicted best algorithm.
    """
    predictions_sum = self.generate_features(features)
    return {
        instance_name: [
            (
                predictions_sum.loc[instance_name].idxmin(),
                self.metadata.budget,
            )
        ]
        for i, instance_name in enumerate(features.index)
    }

generate_features(features)

Generates features for the pairwise regressors.

Parameters:

Name Type Description Default
features DataFrame

The feature data for the instances.

required

Returns:

Type Description

np.ndarray: An array of predictions for each instance and algorithm pair.

Source code in asf/selectors/pairwise_regressor.py
def generate_features(self, features: pd.DataFrame):
    """
    Generates features for the pairwise regressors.

    Args:
        features (pd.DataFrame): The feature data for the instances.

    Returns:
        np.ndarray: An array of predictions for each instance and algorithm pair.
    """

    cnt = 0
    predictions_sum = pd.DataFrame(
        0, index=features.index, columns=self.metadata.algorithms
    )
    for i, algorithm in enumerate(self.metadata.algorithms):
        for j, other_algorithm in enumerate(self.metadata.algorithms[i + 1 :]):
            prediction = self.regressors[cnt].predict(features)
            predictions_sum[algorithm] += prediction
            predictions_sum[other_algorithm] -= prediction
            cnt += 1

    return predictions_sum

PerformanceModel

Bases: AbstractModelBasedSelector, AbstractFeatureGenerator

PerformancePredictor is a class that predicts the performance of algorithms based on given features. It can handle both single-target and multi-target regression models.

Attributes:

Name Type Description
model_class

The class of the regression model to be used.

metadata

Metadata containing information about the algorithms.

use_multi_target

Boolean indicating whether to use multi-target regression.

normalize

Method to normalize the performance data.

regressors

List of trained regression models.

Source code in asf/selectors/performance_model.py
class PerformanceModel(AbstractModelBasedSelector, AbstractFeatureGenerator):
    """
    PerformancePredictor is a class that predicts the performance of algorithms
    based on given features. It can handle both single-target and multi-target
    regression models.

    Attributes:
        model_class: The class of the regression model to be used.
        metadata: Metadata containing information about the algorithms.
        use_multi_target: Boolean indicating whether to use multi-target regression.
        normalize: Method to normalize the performance data.
        regressors: List of trained regression models.
    """

    def __init__(
        self,
        model_class,
        metadata,
        use_multi_target=False,
        normalize="log",
        hierarchical_generator=None,
    ):
        """
        Initializes the PerformancePredictor with the given parameters.

        Args:
            model_class: The class of the regression model to be used.
            metadata: Metadata containing information about the algorithms.
            use_multi_target: Boolean indicating whether to use multi-target regression.
            normalize: Method to normalize the performance data.
            hierarchical_generator: Feature generator to be used.
        """
        AbstractModelBasedSelector.__init__(
            self, model_class, metadata, hierarchical_generator
        )
        AbstractFeatureGenerator.__init__(self)
        self.regressors = []
        self.use_multi_target = use_multi_target
        self.normalize = normalize

    def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
        """
        Fits the regression models to the given features and performance data.

        Args:
            features: DataFrame containing the feature data.
            performance: DataFrame containing the performance data.
        """
        assert self.algorithm_features is None, (
            "PerformanceModel does not use algorithm features."
        )
        if self.normalize == "log":
            performance = np.log10(performance + 1e-6)

        regressor_init_args = {}
        if "input_size" in inspect.signature(self.model_class).parameters.keys():
            regressor_init_args["input_size"] = features.shape[1]

        if self.use_multi_target:
            assert self.algorithm_features is None, (
                "PerformanceModel does not use algorithm features for multi-target regression."
            )
            self.regressors = self.model_class(**regressor_init_args)
            self.regressors.fit(features, performance)
        else:
            if self.algorithm_features is None:
                for i, algorithm in enumerate(self.metadata.algorithms):
                    algo_times = performance.iloc[:, i]

                    cur_model = self.model_class(**regressor_init_args)
                    cur_model.fit(features, algo_times)
                    self.regressors.append(cur_model)
            else:
                train_data = []
                for i, algorithm in enumerate(self.metadata.algorithms):
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[algorithm],
                        left_index=True,
                        right_index=True,
                    )
                    data = pd.merge(
                        data, performance.iloc[:, i], left_index=True, right_index=True
                    )
                    train_data.append(data)
                train_data = pd.concat(train_data)
                self.regressors = self.model_class(**regressor_init_args)
                self.regressors.fit(train_data.iloc[:, :-1], train_data.iloc[:, -1])

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the performance of algorithms for the given features.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            A dictionary mapping instance names to the predicted best algorithm.
        """
        predictions = self.generate_features(features)

        return {
            instance_name: [
                (
                    self.metadata.algorithms[np.argmin(predictions[i])],
                    self.metadata.budget,
                )
            ]
            for i, instance_name in enumerate(features.index)
        }

    def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """
        Generates predictions for the given features using the trained models.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            DataFrame containing the predictions for each algorithm.
        """
        if self.use_multi_target:
            predictions = self.regressors.predict(features)
        else:
            if self.algorithm_features is None:
                predictions = np.zeros(
                    (features.shape[0], len(self.metadata.algorithms))
                )
                for i, algorithm in enumerate(self.metadata.algorithms):
                    prediction = self.regressors[i].predict(features)
                    predictions[:, i] = prediction
            else:
                predictions = np.zeros(
                    (features.shape[0], len(self.metadata.algorithms))
                )
                for i, algorithm in enumerate(self.metadata.algorithms):
                    data = pd.merge(
                        features,
                        self.algorithm_features.loc[algorithm],
                        left_index=True,
                        right_index=True,
                    )
                    prediction = self.regressors.predict(data)
                    predictions[:, i] = prediction

        return predictions

__init__(model_class, metadata, use_multi_target=False, normalize='log', hierarchical_generator=None)

Initializes the PerformancePredictor with the given parameters.

Parameters:

Name Type Description Default
model_class

The class of the regression model to be used.

required
metadata

Metadata containing information about the algorithms.

required
use_multi_target

Boolean indicating whether to use multi-target regression.

False
normalize

Method to normalize the performance data.

'log'
hierarchical_generator

Feature generator to be used.

None
Source code in asf/selectors/performance_model.py
def __init__(
    self,
    model_class,
    metadata,
    use_multi_target=False,
    normalize="log",
    hierarchical_generator=None,
):
    """
    Initializes the PerformancePredictor with the given parameters.

    Args:
        model_class: The class of the regression model to be used.
        metadata: Metadata containing information about the algorithms.
        use_multi_target: Boolean indicating whether to use multi-target regression.
        normalize: Method to normalize the performance data.
        hierarchical_generator: Feature generator to be used.
    """
    AbstractModelBasedSelector.__init__(
        self, model_class, metadata, hierarchical_generator
    )
    AbstractFeatureGenerator.__init__(self)
    self.regressors = []
    self.use_multi_target = use_multi_target
    self.normalize = normalize

_fit(features, performance)

Fits the regression models to the given features and performance data.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required
performance DataFrame

DataFrame containing the performance data.

required
Source code in asf/selectors/performance_model.py
def _fit(self, features: pd.DataFrame, performance: pd.DataFrame):
    """
    Fits the regression models to the given features and performance data.

    Args:
        features: DataFrame containing the feature data.
        performance: DataFrame containing the performance data.
    """
    assert self.algorithm_features is None, (
        "PerformanceModel does not use algorithm features."
    )
    if self.normalize == "log":
        performance = np.log10(performance + 1e-6)

    regressor_init_args = {}
    if "input_size" in inspect.signature(self.model_class).parameters.keys():
        regressor_init_args["input_size"] = features.shape[1]

    if self.use_multi_target:
        assert self.algorithm_features is None, (
            "PerformanceModel does not use algorithm features for multi-target regression."
        )
        self.regressors = self.model_class(**regressor_init_args)
        self.regressors.fit(features, performance)
    else:
        if self.algorithm_features is None:
            for i, algorithm in enumerate(self.metadata.algorithms):
                algo_times = performance.iloc[:, i]

                cur_model = self.model_class(**regressor_init_args)
                cur_model.fit(features, algo_times)
                self.regressors.append(cur_model)
        else:
            train_data = []
            for i, algorithm in enumerate(self.metadata.algorithms):
                data = pd.merge(
                    features,
                    self.algorithm_features.loc[algorithm],
                    left_index=True,
                    right_index=True,
                )
                data = pd.merge(
                    data, performance.iloc[:, i], left_index=True, right_index=True
                )
                train_data.append(data)
            train_data = pd.concat(train_data)
            self.regressors = self.model_class(**regressor_init_args)
            self.regressors.fit(train_data.iloc[:, :-1], train_data.iloc[:, -1])

_predict(features)

Predicts the performance of algorithms for the given features.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description

A dictionary mapping instance names to the predicted best algorithm.

Source code in asf/selectors/performance_model.py
def _predict(self, features: pd.DataFrame):
    """
    Predicts the performance of algorithms for the given features.

    Args:
        features: DataFrame containing the feature data.

    Returns:
        A dictionary mapping instance names to the predicted best algorithm.
    """
    predictions = self.generate_features(features)

    return {
        instance_name: [
            (
                self.metadata.algorithms[np.argmin(predictions[i])],
                self.metadata.budget,
            )
        ]
        for i, instance_name in enumerate(features.index)
    }

generate_features(features)

Generates predictions for the given features using the trained models.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description
DataFrame

DataFrame containing the predictions for each algorithm.

Source code in asf/selectors/performance_model.py
def generate_features(self, features: pd.DataFrame) -> pd.DataFrame:
    """
    Generates predictions for the given features using the trained models.

    Args:
        features: DataFrame containing the feature data.

    Returns:
        DataFrame containing the predictions for each algorithm.
    """
    if self.use_multi_target:
        predictions = self.regressors.predict(features)
    else:
        if self.algorithm_features is None:
            predictions = np.zeros(
                (features.shape[0], len(self.metadata.algorithms))
            )
            for i, algorithm in enumerate(self.metadata.algorithms):
                prediction = self.regressors[i].predict(features)
                predictions[:, i] = prediction
        else:
            predictions = np.zeros(
                (features.shape[0], len(self.metadata.algorithms))
            )
            for i, algorithm in enumerate(self.metadata.algorithms):
                data = pd.merge(
                    features,
                    self.algorithm_features.loc[algorithm],
                    left_index=True,
                    right_index=True,
                )
                prediction = self.regressors.predict(data)
                predictions[:, i] = prediction

    return predictions

SimpleRanking

Bases: AbstractModelBasedSelector

Algorithm Selection via Ranking (Oentaryo et al.) + algo features (optional). Attributes: model_class: The class of the classification model to be used. metadata: Metadata containing information about the algorithms. classifier: The trained classification model.

Source code in asf/selectors/simple_ranking.py
class SimpleRanking(AbstractModelBasedSelector):
    """
    Algorithm Selection via Ranking (Oentaryo et al.) + algo features (optional).
    Attributes:
        model_class: The class of the classification model to be used.
        metadata: Metadata containing information about the algorithms.
        classifier: The trained classification model.
    """

    def __init__(self, model_class, metadata, hierarchical_generator=None):
        """
        Initializes the MultiClassClassifier with the given parameters.

        Args:
            model_class: The class of the classification model to be used. Assumes XGBoost API.
            metadata: Metadata containing information about the algorithms.
            hierarchical_generator: Feature generator to be used.
        """
        AbstractModelBasedSelector.__init__(
            self, model_class, metadata, hierarchical_generator
        )
        self.classifier = None

    def _fit(
        self,
        features: pd.DataFrame,
        performance: pd.DataFrame,
    ):
        """
        Fits the classification model to the given feature and performance data.

        Args:
            features: DataFrame containing the feature data.
            performance: DataFrame containing the performance data.
        """
        if self.algorithm_features is None:
            encoder = OneHotEncoder(sparse_output=False)
            self.algorithm_features = pd.DataFrame(
                encoder.fit_transform(
                    np.array(self.metadata.algorithms).reshape(-1, 1)
                ),
                index=self.metadata.algorithms,
                columns=[f"algo_{i}" for i in range(len(self.metadata.algorithms))],
            )

        performance = performance[self.metadata.algorithms]
        features = features[self.metadata.features]

        total_features = pd.merge(
            features.reset_index(), self.algorithm_features.reset_index(), how="cross"
        )

        stacked_performance = performance.stack().reset_index()
        stacked_performance.columns = [
            performance.index.name,
            performance.columns.name,
            "performance",
        ]
        merged = total_features.merge(
            stacked_performance,
            right_on=[performance.index.name, performance.columns.name],
            left_on=[features.index.name, self.algorithm_features.index.name],
            how="left",
        )

        gdfs = []
        for group, gdf in merged.groupby(features.index.name):
            gdf["rank"] = gdf["performance"].rank(ascending=True, method="min")
            gdfs.append(gdf)
        merged = pd.concat(gdfs)

        total_features = merged.drop(
            columns=[
                performance.index.name,
                performance.columns.name,
                "performance",
                "rank",
                self.algorithm_features.index.name,
            ]
        )
        qid = merged[features.index.name].values
        encoder = OrdinalEncoder()
        qid = encoder.fit_transform(qid.reshape(-1, 1)).flatten()

        self.classifier = self.model_class()
        self.classifier.fit(
            total_features,
            merged["rank"],
            qid=qid,
        )

    def _predict(self, features: pd.DataFrame):
        """
        Predicts the best algorithm for each instance in the given feature data.

        Args:
            features: DataFrame containing the feature data.

        Returns:
            A dictionary mapping instance names to the predicted best algorithm.
        """

        features = features[self.metadata.features]

        total_features = pd.merge(
            features.reset_index(), self.algorithm_features.reset_index(), how="cross"
        )

        predictions = self.classifier.predict(
            total_features[
                list(self.metadata.features) + list(self.algorithm_features.columns)
            ]
        )

        scheds = {}
        for instance_name in features.index.unique():
            ids = total_features[features.index.name] == instance_name
            chosen = predictions[ids].argmin()
            scheds[instance_name] = [
                (
                    total_features.loc[ids].iloc[chosen]["algorithm"],
                    self.metadata.budget,
                )
            ]

        return scheds

__init__(model_class, metadata, hierarchical_generator=None)

Initializes the MultiClassClassifier with the given parameters.

Parameters:

Name Type Description Default
model_class

The class of the classification model to be used. Assumes XGBoost API.

required
metadata

Metadata containing information about the algorithms.

required
hierarchical_generator

Feature generator to be used.

None
Source code in asf/selectors/simple_ranking.py
def __init__(self, model_class, metadata, hierarchical_generator=None):
    """
    Initializes the MultiClassClassifier with the given parameters.

    Args:
        model_class: The class of the classification model to be used. Assumes XGBoost API.
        metadata: Metadata containing information about the algorithms.
        hierarchical_generator: Feature generator to be used.
    """
    AbstractModelBasedSelector.__init__(
        self, model_class, metadata, hierarchical_generator
    )
    self.classifier = None

_fit(features, performance)

Fits the classification model to the given feature and performance data.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required
performance DataFrame

DataFrame containing the performance data.

required
Source code in asf/selectors/simple_ranking.py
def _fit(
    self,
    features: pd.DataFrame,
    performance: pd.DataFrame,
):
    """
    Fits the classification model to the given feature and performance data.

    Args:
        features: DataFrame containing the feature data.
        performance: DataFrame containing the performance data.
    """
    if self.algorithm_features is None:
        encoder = OneHotEncoder(sparse_output=False)
        self.algorithm_features = pd.DataFrame(
            encoder.fit_transform(
                np.array(self.metadata.algorithms).reshape(-1, 1)
            ),
            index=self.metadata.algorithms,
            columns=[f"algo_{i}" for i in range(len(self.metadata.algorithms))],
        )

    performance = performance[self.metadata.algorithms]
    features = features[self.metadata.features]

    total_features = pd.merge(
        features.reset_index(), self.algorithm_features.reset_index(), how="cross"
    )

    stacked_performance = performance.stack().reset_index()
    stacked_performance.columns = [
        performance.index.name,
        performance.columns.name,
        "performance",
    ]
    merged = total_features.merge(
        stacked_performance,
        right_on=[performance.index.name, performance.columns.name],
        left_on=[features.index.name, self.algorithm_features.index.name],
        how="left",
    )

    gdfs = []
    for group, gdf in merged.groupby(features.index.name):
        gdf["rank"] = gdf["performance"].rank(ascending=True, method="min")
        gdfs.append(gdf)
    merged = pd.concat(gdfs)

    total_features = merged.drop(
        columns=[
            performance.index.name,
            performance.columns.name,
            "performance",
            "rank",
            self.algorithm_features.index.name,
        ]
    )
    qid = merged[features.index.name].values
    encoder = OrdinalEncoder()
    qid = encoder.fit_transform(qid.reshape(-1, 1)).flatten()

    self.classifier = self.model_class()
    self.classifier.fit(
        total_features,
        merged["rank"],
        qid=qid,
    )

_predict(features)

Predicts the best algorithm for each instance in the given feature data.

Parameters:

Name Type Description Default
features DataFrame

DataFrame containing the feature data.

required

Returns:

Type Description

A dictionary mapping instance names to the predicted best algorithm.

Source code in asf/selectors/simple_ranking.py
def _predict(self, features: pd.DataFrame):
    """
    Predicts the best algorithm for each instance in the given feature data.

    Args:
        features: DataFrame containing the feature data.

    Returns:
        A dictionary mapping instance names to the predicted best algorithm.
    """

    features = features[self.metadata.features]

    total_features = pd.merge(
        features.reset_index(), self.algorithm_features.reset_index(), how="cross"
    )

    predictions = self.classifier.predict(
        total_features[
            list(self.metadata.features) + list(self.algorithm_features.columns)
        ]
    )

    scheds = {}
    for instance_name in features.index.unique():
        ids = total_features[features.index.name] == instance_name
        chosen = predictions[ids].argmin()
        scheds[instance_name] = [
            (
                total_features.loc[ids].iloc[chosen]["algorithm"],
                self.metadata.budget,
            )
        ]

    return scheds