diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py index ec84d339158..9b64b7a6c81 100644 --- a/ax/service/tests/test_best_point.py +++ b/ax/service/tests/test_best_point.py @@ -10,13 +10,23 @@ import pandas as pd from ax.adapter.registry import Generators from ax.core.arm import Arm +from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose from ax.core.batch_trial import BatchTrial from ax.core.data import Data -from ax.core.optimization_config import MultiObjectiveOptimizationConfig +from ax.core.experiment import Experiment +from ax.core.metric import Metric +from ax.core.objective import MultiObjective, Objective +from ax.core.optimization_config import ( + MultiObjectiveOptimizationConfig, + PreferenceOptimizationConfig, +) +from ax.core.parameter import ParameterType, RangeParameter +from ax.core.search_space import SearchSpace from ax.core.trial import Trial -from ax.exceptions.core import DataRequiredError +from ax.exceptions.core import DataRequiredError, UserInputError from ax.service.utils.best_point import get_trace from ax.service.utils.best_point_mixin import BestPointMixin +from ax.utils.common.constants import Keys from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import ( get_experiment_with_batch_trial, @@ -367,3 +377,170 @@ def test_get_best_observed_value(self) -> None: minimize=True, ) self.assertEqual(get_best(exp), 10) # 5 and 9 are out of design + + def _get_pe_search_space(self) -> SearchSpace: + """Create a standard PE_EXPERIMENT search space with m1 and m2 parameters.""" + return SearchSpace( + parameters=[ + RangeParameter( + name="m1", + parameter_type=ParameterType.FLOAT, + lower=0.0, + upper=10.0, + ), + RangeParameter( + name="m2", + parameter_type=ParameterType.FLOAT, + lower=0.0, + upper=10.0, + ), + ] + ) + + def _make_pref_opt_config(self, profile_name: str) -> PreferenceOptimizationConfig: + """Create a PreferenceOptimizationConfig with m1 and m2 objectives.""" + return PreferenceOptimizationConfig( + objective=MultiObjective( + objectives=[ + Objective(metric=Metric(name="m1"), minimize=False), + Objective(metric=Metric(name="m2"), minimize=False), + ] + ), + preference_profile_name=profile_name, + ) + + def _assert_valid_trace(self, trace: list[float], expected_len: int) -> None: + """Assert trace has expected length, contains floats, is non-decreasing and has + more than one unique value.""" + self.assertEqual(len(trace), expected_len) + for value in trace: + self.assertIsInstance(value, float) + for i in range(1, len(trace)): + self.assertGreaterEqual( + trace[i], + trace[i - 1], + msg=f"Trace not monotonically increasing at index {i}: {trace}", + ) + unique_values = set(trace) + self.assertGreater( + len(unique_values), + 1, + msg=f"Trace has only trivial values (all same): {trace}", + ) + + def test_get_trace_preference_learning_config(self) -> None: + """Test that get_trace works correctly with PreferenceOptimizationConfig. + + This test verifies various scenarios for BOPE experiments, + including cases with and without PE_EXPERIMENT data. + """ + with self.subTest("without_pe_experiment_raises_error"): + # Setup: Create a multi-objective experiment WITHOUT PE_EXPERIMENT + exp = get_experiment_with_observations( + observations=[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], + ) + exp.name = "main_experiment" + pref_opt_config = self._make_pref_opt_config( + profile_name="nonexistent_profile" + ) + + # Execute & Assert: Should raise UserInputError without PE_EXPERIMENT + with self.assertRaisesRegex( + UserInputError, + "Preference profile 'nonexistent_profile' not found", + ): + get_trace(exp, pref_opt_config) + + with self.subTest("with_pe_experiment_empty_data_raises_error"): + # Setup: Create main experiment + exp = get_experiment_with_observations( + observations=[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], + ) + exp.name = "main_experiment_empty" + + # Setup: Create PE_EXPERIMENT with no preference comparisons + pe_experiment = Experiment( + name="test_profile_empty", + search_space=self._get_pe_search_space(), + ) + + # Setup: Attach PE_EXPERIMENT without any data + aux_exp = AuxiliaryExperiment(experiment=pe_experiment, data=None) + exp.add_auxiliary_experiment( + auxiliary_experiment=aux_exp, + purpose=AuxiliaryExperimentPurpose.PE_EXPERIMENT, + ) + pref_opt_config = self._make_pref_opt_config( + profile_name="test_profile_empty" + ) + + # Execute & Assert: Should raise DataRequiredError due to empty data + with self.assertRaisesRegex( + DataRequiredError, + "No preference data found in preference profile", + ): + get_trace(exp, pref_opt_config) + + with self.subTest("with_pe_experiment_valid_data_computes_utility"): + # This subtest verifies that when PE_EXPERIMENT exists with valid data, + # the code uses the preference model to compute utility-based traces. + + # Setup: Create main experiment with tracking data + exp = get_experiment_with_observations( + observations=[[1.0, 1.0], [5.0, 5.0], [9.0, 9.0]], + ) + exp.name = "main_experiment_with_pe" + + # Setup: Create PE_EXPERIMENT with minimal but well-separated preference + # data + pe_experiment = Experiment( + name="test_profile_with_minimal_data", + search_space=self._get_pe_search_space(), + ) + + # Setup: Add one pairwise preference comparison (minimal data) + trial1 = pe_experiment.new_batch_trial() + trial1.add_arm(Arm(name="0_0", parameters={"m1": 1.0, "m2": 1.0})) + trial1.add_arm(Arm(name="0_1", parameters={"m1": 9.0, "m2": 9.0})) + trial1.mark_running(no_runner_required=True).mark_completed() + + # Setup: Create minimal preference data + pe_data_records = [ + { + "trial_index": 0, + "arm_name": "0_0", + "metric_name": Keys.PAIRWISE_PREFERENCE_QUERY.value, + "mean": 0.0, + "sem": 0.0, + "metric_signature": Keys.PAIRWISE_PREFERENCE_QUERY.value, + }, + { + "trial_index": 0, + "arm_name": "0_1", + "metric_name": Keys.PAIRWISE_PREFERENCE_QUERY.value, + "mean": 1.0, + "sem": 0.0, + "metric_signature": Keys.PAIRWISE_PREFERENCE_QUERY.value, + }, + ] + pe_data = Data(df=pd.DataFrame.from_records(pe_data_records)) + pe_experiment.attach_data(pe_data) + + # Setup: Attach PE_EXPERIMENT to main experiment + aux_exp = AuxiliaryExperiment(experiment=pe_experiment, data=pe_data) + exp.add_auxiliary_experiment( + auxiliary_experiment=aux_exp, + purpose=AuxiliaryExperimentPurpose.PE_EXPERIMENT, + ) + pref_opt_config = self._make_pref_opt_config( + profile_name="test_profile_with_minimal_data" + ) + + # Execute: With valid data, model computes utility-based trace + trace = get_trace(exp, pref_opt_config) + + # Assert: Verify trace is valid, monotonically increasing, and non-trivial + self._assert_valid_trace( + trace, + expected_len=3, + ) diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py index 68482897cfd..c8dd50f6616 100644 --- a/ax/service/utils/best_point.py +++ b/ax/service/utils/best_point.py @@ -26,23 +26,28 @@ from ax.adapter.registry import Generators from ax.adapter.torch import TorchAdapter from ax.adapter.transforms.derelativize import Derelativize +from ax.core.auxiliary import AuxiliaryExperimentPurpose from ax.core.base_trial import BaseTrial, TrialStatus from ax.core.batch_trial import BatchTrial from ax.core.data import Data from ax.core.experiment import Experiment from ax.core.generator_run import GeneratorRun from ax.core.objective import MultiObjective, Objective, ScalarizedObjective +from ax.core.observation import ObservationFeatures from ax.core.optimization_config import ( MultiObjectiveOptimizationConfig, OptimizationConfig, + PreferenceOptimizationConfig, ) from ax.core.outcome_constraint import OutcomeConstraint from ax.core.trial import Trial from ax.core.types import ComparisonOp, TModelPredictArm, TParameterization -from ax.exceptions.core import UnsupportedError, UserInputError +from ax.exceptions.core import DataRequiredError, UnsupportedError, UserInputError from ax.generation_strategy.generation_strategy import GenerationStrategy from ax.plot.pareto_utils import get_tensor_converter_adapter +from ax.utils.common.constants import Keys from ax.utils.common.logger import get_logger +from ax.utils.preference.preference_utils import get_preference_adapter from botorch.utils.multi_objective.box_decompositions import DominatedPartitioning from numpy import nan from numpy.typing import NDArray @@ -160,6 +165,7 @@ def get_best_raw_objective_point_with_trial_index( df=in_design_df, optimization_config=optimization_config, use_cumulative_best=False, + experiment=experiment, ) maximize = isinstance(optimization_config.objective, MultiObjective) or ( @@ -382,7 +388,7 @@ def get_best_by_raw_objective_with_trial_index( optimization_config=optimization_config, trial_indices=trial_indices, ) - except ValueError as err: + except (ValueError, UserInputError, DataRequiredError) as err: logger.error( "Encountered error while trying to identify the best point: " f"'{err}'. Returning None." @@ -807,6 +813,91 @@ def get_hypervolume_trace_of_outcomes_multi_objective( ) +def _compute_utility_from_preference_model( + df_wide: pd.DataFrame, + experiment: Experiment, + optimization_config: PreferenceOptimizationConfig, +) -> NDArray: + """Compute utility predictions for each arm using the learned preference model. + + This function accesses the PE_EXPERIMENT auxiliary experiment, fits a PairwiseGP + model to the preference data, and uses it to predict utility values for each + arm's metric values. + + Args: + df_wide: DataFrame with columns for trial_index, arm_name, feasible, + and metric values. + experiment: The main experiment containing the PE_EXPERIMENT auxiliary. + optimization_config: PreferenceOptimizationConfig specifying the preference + profile to use. + + Returns: + Array of utility predictions, one for each row in df_wide. Infeasible + arms will have utility of negative infinity. + + Raises: + DataRequiredError: If PE_EXPERIMENT has no data. + UserInputError: If PE_EXPERIMENT is not found for the specified profile. + """ + pref_profile_name = optimization_config.preference_profile_name + + # Find the PE_EXPERIMENT auxiliary experiment + pe_aux_exp = experiment.find_auxiliary_experiment_by_name( + purpose=AuxiliaryExperimentPurpose.PE_EXPERIMENT, + auxiliary_experiment_name=pref_profile_name, + raise_if_not_found=False, + ) + + if pe_aux_exp is None: + raise UserInputError( + f"Preference profile '{pref_profile_name}' not found in experiment " + f"'{experiment.name}'. Cannot compute utility-based trace without " + "a valid preference profile." + ) + + pe_experiment = pe_aux_exp.experiment + pe_data = pe_experiment.lookup_data() + + if pe_data.df.empty: + raise DataRequiredError( + f"No preference data found in preference profile '{pref_profile_name}'. " + "Update the preference profile or play the preference game before " + "computing utility-based trace." + ) + + # Create adapter with fitted preference model + adapter = get_preference_adapter(experiment=pe_experiment, data=pe_data) + + # Create ObservationFeatures for each arm with metric values as parameters + observation_features = [] + for _, row in df_wide.iterrows(): + # Create parameters dict with metric names as keys and their values + parameters = { + metric_name: row[metric_name] + for metric_name in optimization_config.objective.metric_names + } + obs_feat = ObservationFeatures(parameters=parameters) + observation_features.append(obs_feat) + + # Predict utilities using the fitted preference model + f_dict, _ = adapter.predict( + observation_features=observation_features, + use_posterior_predictive=False, + ) + + # Extract utility metric predictions + # PE_EXPERIMENT always has a single metric: "pairwise_pref_query" + utility_metric_name = Keys.PAIRWISE_PREFERENCE_QUERY.value + utilities = np.array(f_dict[utility_metric_name]) + + # Set infeasible arms to -inf (higher utility is better, so infeasible arms + # should have the worst possible utility) + infeasible_idx = np.where(~df_wide["feasible"])[0] + utilities[infeasible_idx] = float("-inf") + + return utilities + + def _prepare_data_for_trace( df: pd.DataFrame, optimization_config: OptimizationConfig, @@ -866,6 +957,7 @@ def get_trace_by_arm_pull_from_data( df: pd.DataFrame, optimization_config: OptimizationConfig, use_cumulative_best: bool = True, + experiment: Experiment | None = None, ) -> pd.DataFrame: """ Get a trace of the objective value or hypervolume of outcomes. @@ -874,6 +966,10 @@ def get_trace_by_arm_pull_from_data( function returns a single value for each arm pull, even if there are multiple arms per trial or if an arm is repeated in multiple trials. + For BOPE experiments, this function computes + utility predictions using the learned preference model from the PE_EXPERIMENT + auxiliary experiment. + Args: df: Data in the format returned by ``Data.df``, with a separate row for each trial index-arm name-metric. @@ -881,10 +977,13 @@ def get_trace_by_arm_pull_from_data( not be in relative form. use_cumulative_best: If True, the trace will be the cumulative best objective. Otherwise, the trace will be the value of each point. + experiment: Optional experiment object. Required for preference learning + experiments to access the PE_EXPERIMENT auxiliary experiment. Return: A DataFrame containing columns 'trial_index', 'arm_name', and "value", - where "value" is the value of the outcomes attained. + where "value" is the value of the outcomes attained (or predicted utility + for preference learning experiments). """ if any(oc.relative for oc in optimization_config.all_constraints): raise ValueError( @@ -899,6 +998,21 @@ def get_trace_by_arm_pull_from_data( if len(df_wide) == 0: return empty_result + # Handle preference learning experiments + if experiment is not None and isinstance( + optimization_config, PreferenceOptimizationConfig + ): + logger.info( + f"Computing utility-based trace for preference learning experiment " + f"using PE_EXPERIMENT '{optimization_config.preference_profile_name}'." + ) + df_wide["value"] = _compute_utility_from_preference_model( + df_wide=df_wide, + experiment=experiment, + optimization_config=optimization_config, + ) + return df_wide[["trial_index", "arm_name", "value"]] + # MOO and *not* ScalarizedObjective if isinstance(optimization_config.objective, MultiObjective): optimization_config = assert_is_instance( @@ -933,6 +1047,11 @@ def get_trace( the hypervolume. For single objective, the performance is computed as the best observed objective value. + For BOPE experiments, the utility of each trial is computed using + the learned preference model from the PE_EXPERIMENT auxiliary experiment. The + preference model is used to predict the utility of each trial's metric values, + and the trace represents the best predicted utility over time. + Infeasible points (that violate constraints) do not contribute to improvements in the optimization trace. If the first trial(s) are infeasible, the trace can start at inf or -inf. @@ -957,6 +1076,7 @@ def get_trace( df = experiment.lookup_data().df if len(df) == 0: return [] + # Get the names of the metrics in optimization config. metric_names = set(optimization_config.objective.metric_names) for cons in optimization_config.outcome_constraints: @@ -992,6 +1112,7 @@ def get_trace( df=df, optimization_config=optimization_config, use_cumulative_best=True, + experiment=experiment, ) # Aggregate to trial level objective = optimization_config.objective diff --git a/ax/utils/preference/preference_utils.py b/ax/utils/preference/preference_utils.py index ba913d3be7a..e69c2fa6ea6 100644 --- a/ax/utils/preference/preference_utils.py +++ b/ax/utils/preference/preference_utils.py @@ -9,8 +9,13 @@ from ax.adapter.torch import TorchAdapter from ax.core.data import Data from ax.core.experiment import Experiment +from ax.core.metric import Metric +from ax.core.objective import Objective +from ax.core.optimization_config import OptimizationConfig +from ax.exceptions.core import DataRequiredError from ax.generators.torch.botorch_modular.generator import BoTorchGenerator from ax.generators.torch.botorch_modular.surrogate import ModelConfig, SurrogateSpec +from ax.utils.common.constants import Keys from botorch.models.pairwise_gp import PairwiseGP, PairwiseLaplaceMarginalLogLikelihood from botorch.models.transforms.input import Normalize @@ -21,7 +26,6 @@ def get_preference_adapter( ) -> TorchAdapter: """Obtain a TorchAdapter from a preference experiment and data. - Args: experiment: The preference experiment. The preference experiment consists of a list of parameters we wish to model the preference over and a single @@ -36,13 +40,37 @@ def get_preference_adapter( Returns: A PairwiseAdapter that wraps around a fitted BoTorch preference GP model, typically a PairwiseGP. + + Raises: + DataRequiredError: If the provided data is empty. Preference data with + at least one pairwise comparison is required to fit the preference model. """ + # Check for empty data before creating the adapter + if data.df.empty: + raise DataRequiredError( + "No preference data available. At least one pairwise comparison is " + "required to fit the preference model." + ) + + # Configure TorchAdapter for preference modeling: + # - fit_tracking_metrics=False: Only fit on preference labels, not all metrics + # in the data. Requires optimization_config to specify which metrics to use. + pref_metric = Metric(name=Keys.PAIRWISE_PREFERENCE_QUERY.value) + optimization_config = OptimizationConfig( + objective=Objective(metric=pref_metric, minimize=False) + ) + # Register the metric on the experiment if not already present. + # This is required for _extract_observation_data filtering in TorchAdapter. + if pref_metric.name not in experiment.metrics: + experiment.add_tracking_metric(pref_metric) # Setting up the preference adapter return TorchAdapter( experiment=experiment, search_space=experiment.search_space, data=data, + optimization_config=optimization_config, + fit_tracking_metrics=False, generator=BoTorchGenerator( # acqf doesn't matter. We only use the adapter for # data parsing and preference model construction diff --git a/ax/utils/preference/tests/test_preference_utils.py b/ax/utils/preference/tests/test_preference_utils.py new file mode 100644 index 00000000000..41142d9cb90 --- /dev/null +++ b/ax/utils/preference/tests/test_preference_utils.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import pandas as pd +from ax.core.arm import Arm +from ax.core.data import Data +from ax.core.experiment import Experiment +from ax.core.parameter import ParameterType, RangeParameter +from ax.core.search_space import SearchSpace +from ax.exceptions.core import DataRequiredError +from ax.utils.common.constants import Keys +from ax.utils.common.testutils import TestCase +from ax.utils.preference.preference_utils import get_preference_adapter +from ax.utils.testing.preference_stubs import get_pbo_experiment + + +class TestGetPreferenceAdapter(TestCase): + def setUp(self) -> None: + super().setUp() + # Preference experiment with data for testing get_preference_adapter + self.pe_experiment = get_pbo_experiment( + parameter_names=["metric1", "metric2"], + num_preference_trials=2, + unbounded_search_space=True, + ) + + def test_get_preference_adapter(self) -> None: + """Verify get_preference_adapter behavior with empty and valid data.""" + with self.subTest("raises_on_empty_data"): + with self.assertRaisesRegex( + DataRequiredError, + "No preference data available", + ): + get_preference_adapter(experiment=self.pe_experiment, data=Data()) + + with self.subTest("succeeds_with_valid_data"): + data = self.pe_experiment.lookup_data() + self.assertFalse(data.df.empty) + + adapter = get_preference_adapter( + experiment=self.pe_experiment, data=self.pe_experiment.lookup_data() + ) + self.assertIsNotNone(adapter) + + def test_registers_metric_when_not_present(self) -> None: + """Verify get_preference_adapter registers the pref metric if not present. + + This tests the case when a PE experiment is loaded from storage without + the pairwise_pref_query metric already registered (e.g., when called from + best_point.py via find_auxiliary_experiment_by_name). + """ + # Setup: Create a minimal PE experiment WITHOUT the metric registered + pe_experiment = Experiment( + name="test_pe_experiment_no_metric", + search_space=SearchSpace( + parameters=[ + RangeParameter( + name="m1", + parameter_type=ParameterType.FLOAT, + lower=0.0, + upper=10.0, + ), + RangeParameter( + name="m2", + parameter_type=ParameterType.FLOAT, + lower=0.0, + upper=10.0, + ), + ] + ), + ) + + # Verify the metric is NOT registered before calling get_preference_adapter + pref_metric_name = Keys.PAIRWISE_PREFERENCE_QUERY.value + self.assertNotIn(pref_metric_name, pe_experiment.metrics) + + # check it will err with empty data + with self.assertRaisesRegex( + DataRequiredError, + "No preference data available", + ): + get_preference_adapter( + experiment=pe_experiment, data=pe_experiment.lookup_data() + ) + + # Setup: Add a trial with preference data + trial = pe_experiment.new_batch_trial() + trial.add_arm(Arm(name="0_0", parameters={"m1": 0.5, "m2": 1.0})) + trial.add_arm(Arm(name="0_1", parameters={"m1": 1.0, "m2": 1.5})) + trial.mark_running(no_runner_required=True).mark_completed() + + # Setup: Create preference data + pe_data = Data( + df=pd.DataFrame.from_records( + [ + { + "trial_index": 0, + "arm_name": "0_0", + "metric_name": pref_metric_name, + "mean": 0.0, + "sem": 0.0, + "metric_signature": pref_metric_name, + }, + { + "trial_index": 0, + "arm_name": "0_1", + "metric_name": pref_metric_name, + "mean": 1.0, + "sem": 0.0, + "metric_signature": pref_metric_name, + }, + ] + ) + ) + + # Execute: Call get_preference_adapter + adapter = get_preference_adapter(experiment=pe_experiment, data=pe_data) + + # Assert: The adapter was created successfully + self.assertIsNotNone(adapter) + + # Assert: The metric is now registered on the experiment + self.assertIn(pref_metric_name, pe_experiment.metrics)