Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions baybe/kernels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from baybe.kernels.basic import (
IndexKernel,
LinearKernel,
MaternKernel,
PeriodicKernel,
Expand All @@ -18,6 +19,7 @@

__all__ = [
"AdditiveKernel",
"IndexKernel",
"LinearKernel",
"MaternKernel",
"PeriodicKernel",
Expand Down
10 changes: 7 additions & 3 deletions baybe/parameters/fidelity.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
validate_is_finite,
validate_unique_values,
)
from baybe.settings import active_settings
from baybe.utils.conversion import nonstring_to_tuple
from baybe.utils.numerical import DTypeFloatNumpy


def _convert_zeta(
Expand Down Expand Up @@ -107,7 +107,9 @@ def values(self) -> tuple[str | bool, ...]:
@cached_property
def comp_df(self) -> pd.DataFrame:
return pd.DataFrame(
range(len(self.values)), dtype=DTypeFloatNumpy, columns=[self.name]
range(len(self.values)),
dtype=active_settings.DTypeFloatNumpy,
columns=[self.name],
)


Expand Down Expand Up @@ -159,5 +161,7 @@ def values(self) -> tuple[float, ...]:
@cached_property
def comp_df(self) -> pd.DataFrame:
return pd.DataFrame(
{self.name: self.values}, index=self.values, dtype=DTypeFloatNumpy
{self.name: self.values},
index=self.values,
dtype=active_settings.DTypeFloatNumpy,
)
7 changes: 7 additions & 0 deletions baybe/recommenders/pure/bayesian/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ def _autoreplicate(surrogate: SurrogateProtocol, /) -> SurrogateProtocol:
class BayesianRecommender(PureRecommender, ABC):
"""An abstract class for Bayesian Recommenders."""

# TODO: Factory defaults the surrogate to a GaussianProcessesSurrogate always.
# Surrogate and kernel defaults should be different for searchspaces with
# CategoricalFidelityParameter or NumericalDiscreteFidelityParameter.
# This can be achieved without the user having to specify the surroagte model,
# e.g., by
# * using a dispatcher factory which decides surrogate model on fit time
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the respective class is implemented, can you point out where the dispatching is done? or is this for another PR?

# * having a "_setup_surrogate" method similar to the acquisition function logic
_surrogate_model: SurrogateProtocol = field(
alias="surrogate_model",
factory=GaussianProcessSurrogate,
Expand Down
144 changes: 144 additions & 0 deletions baybe/searchspace/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
from baybe.constraints.base import Constraint
from baybe.parameters import TaskParameter
from baybe.parameters.base import Parameter
from baybe.parameters.fidelity import (
CategoricalFidelityParameter,
NumericalDiscreteFidelityParameter,
)
from baybe.searchspace.continuous import SubspaceContinuous
from baybe.searchspace.discrete import (
MemorySize,
Expand Down Expand Up @@ -48,6 +52,29 @@ class SearchSpaceType(Enum):
"""Flag for hybrid search spaces resp. compatibility with hybrid search spaces."""


class SearchSpaceTaskType(Enum):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i really dont get why TL and MF thigns are mixed in here to havea single property, imo that dosnt amke sense

I would expect to have a SearchSpaceTaskType (with SINGLETASK and CATEGORICALTASK) and SearchSpaceFidelityType (with SINGLEFIDELITY, CATEGORICALFIDELITY and NUMERICALFIDELITY etc) which their respective values and not a mixed kind of thing

is there any reason younare enforcing such a mxiture between unrelated concepts?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would then also make the TODO note about Distinguish between multiple task parameter and mixed task parameter types obsolete, as this would basically be controlled by the settings of the two enums, right?

"""Enum class for different types of task and/or fidelity subspaces."""

SINGLETASK = "SINGLETASK"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would prefer NOTASK over SINGLETASK as the latter one could also be read as "has a single task parameter"

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now will revise the docstring to saySINGLETASK means there are no task parameters and keep SINGLETASK.

"""Flag for search spaces with no task parameters."""

CATEGORICALTASK = "CATEGORICALTASK"
"""Flag for search spaces with a categorical task parameter."""

NUMERICALFIDELITY = "NUMERICALFIDELITY"
"""Flag for search spaces with a discrete numerical (ordered) fidelity parameter."""

CATEGORICALFIDELITY = "CATEGORICALFIDELITY"
"""Flag for search spaces with a categorical (unordered) fidelity parameter."""

# TODO: Distinguish between multiple task parameter and mixed task parameter types.
# In future versions, multiple task/fidelity parameters may be allowed. For now,
# they are disallowed, whether the task-like parameters are different or the same
# class.
MULTIPLETASKPARAMETER = "MULTIPLETASKPARAMETER"
"""Flag for search spaces with mixed task and fidelity parameters."""


@define
class SearchSpace(SerialMixin):
"""Class for managing the overall search space.
Expand Down Expand Up @@ -275,6 +302,24 @@ def task_idx(self) -> int | None:
# --> Fix this when refactoring the data
return cast(int, self.discrete.comp_rep.columns.get_loc(task_param.name))

@property
def fidelity_idx(self) -> int | None:
"""The column index of the task parameter in computational representation."""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""The column index of the task parameter in computational representation."""
"""The column index of the fidelity parameter in computational representation."""

try:
# See TODO [16932] and TODO [11611]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be stupid, but what do [16932] and [11611] refer to?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably to the generalization to more than 1 task parameter, numbers refer to old Azure items

imo remove

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, if this refers to old azure items this is already a historical piece of code :D But agree, let's remove then.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: delete both todos here and in task_idx.

fidelity_param = next(
p
for p in self.parameters
if isinstance(
p,
(CategoricalFidelityParameter, NumericalDiscreteFidelityParameter),
)
)
except StopIteration:
return None

return cast(int, self.discrete.comp_rep.columns.get_loc(fidelity_param.name))

@property
def n_tasks(self) -> int:
"""The number of tasks encoded in the search space."""
Expand All @@ -287,6 +332,105 @@ def n_tasks(self) -> int:
return 1
return len(task_param.values)

@property
def n_fidelities(self) -> int:
"""The number of tasks encoded in the search space."""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""The number of tasks encoded in the search space."""
"""The number of fidelities encoded in the search space."""

# See TODO [16932]
try:
fidelity_param = next(
p
for p in self.parameters
if isinstance(
p,
(CategoricalFidelityParameter, NumericalDiscreteFidelityParameter),
)
)
return len(fidelity_param.values)

# When there are no fidelity parameters, we effectively have a single fidelity
except StopIteration:
return 1

@property
def n_task_dimensions(self) -> int:
"""The number of task dimensions."""
try:
# See TODO [16932]
fidelity_param = next(
p for p in self.parameters if isinstance(p, (TaskParameter,))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why (TaskParameter,), shouldn't TaskParameter also do the job?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check this! fidelity_param should not be a TaskParameter.

)
except StopIteration:
fidelity_param = None

return 1 if fidelity_param is not None else 0

@property
def n_fidelity_dimensions(self) -> int:
"""The number of fidelity dimensions."""
try:
# See TODO [16932]
fidelity_param = next(
p
for p in self.parameters
if isinstance(
p,
(CategoricalFidelityParameter, NumericalDiscreteFidelityParameter),
)
)
except StopIteration:
fidelity_param = None

return 1 if fidelity_param is not None else 0

@property
def task_type(self) -> SearchSpaceTaskType:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imo mixing TL and MF into the task terminology is strange and I dont get at all whynits done (similar question as to why we have a searchspace task type that descirbes both task and fiedelity)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree here. I see that the two features share a lot of similarities from the technical/implementation point of view, but for a user, these are still two unrelated features - quite literally, as we cannot even support a combination of TL and MF at the moment, so we should try to avoid potential confusion here.

"""Return the task type of the search space.

Raises:
ValueError: If searchspace contains more than one task/fidelity parameter.
ValueError: An unrecognised fidelity parameter type is in SearchSpace.
Comment on lines +387 to +391
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doctoring uses search space, searchspace and SearchSpace. Please use the same wording that is used in other places (and double-check which it is, I think it is search space but not entirely sure)

"""
task_like_parameters = (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the word task is reserved for TL, lets not mix it into the MF terminology

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: task_and_fidelity_parameters or non_design_parameters or possibly meta_parameters.

TaskParameter,
CategoricalFidelityParameter,
NumericalDiscreteFidelityParameter,
)

n_task_like_parameters = sum(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we re-use the properties here? I'd prefer that as it re-uses existing code

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some parts of this PR also use TypeSelector which does the isinstance check and some dont, please unify

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, shouldn't we be able to fully get this by using the properties? There literally are different properties for checking the number task and fidelity dimensions, so I would assume that we do not need to recalculate this here - unless this would mean to change how those are calculated (as we currently technically only check if there is at least one).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, if we go through the self.parameters, couldn't we simply count how many different occurrences of the different parameter types we find? The code does multiple iterations over the full self.parameters, it would be fully sufficient to this once, count the occurrences of the individual parameters and then act based on those numbers.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: this.

isinstance(p, (task_like_parameters)) for p in self.parameters
)

if n_task_like_parameters == 0:
return SearchSpaceTaskType.SINGLETASK
elif n_task_like_parameters > 1:
# TODO: commute this validation further downstream.
# In case of user-defined custom models which allow for multiple task
# parameters, this should be later in recommender logic.
# * Should this be an IncompatibilityError?
raise ValueError(
"SearchSpace must not contain more than one task/fidelity parameter."
)
return SearchSpaceTaskType.MULTIPLETASKPARAMETER

if self.n_task_dimensions == 1:
return SearchSpaceTaskType.CATEGORICALTASK

if self.n_fidelity_dimensions == 1:
n_categorical_fidelity_dims = sum(
isinstance(p, CategoricalFidelityParameter) for p in self.parameters
)
if n_categorical_fidelity_dims == 1:
return SearchSpaceTaskType.CATEGORICALFIDELITY

n_numerical_disc_fidelity_dims = sum(
isinstance(p, NumericalDiscreteFidelityParameter)
for p in self.parameters
)
if n_numerical_disc_fidelity_dims == 1:
return SearchSpaceTaskType.NUMERICALFIDELITY

raise RuntimeError("This line should be impossible to reach.")

def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]:
"""Find a parameter's column indices in the computational representation.

Expand Down
3 changes: 3 additions & 0 deletions baybe/surrogates/bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class BetaBernoulliMultiArmedBanditSurrogate(Surrogate):
supports_transfer_learning: ClassVar[bool] = False
# See base class.

supports_multi_fidelity: ClassVar[bool] = False
# See base class.

prior: BetaPrior = field(factory=lambda: BetaPrior(1, 1))
"""The beta prior for the win rates of the bandit arms. Uniform by default."""

Expand Down
17 changes: 17 additions & 0 deletions baybe/surrogates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class Surrogate(ABC, SurrogateProtocol, SerialMixin):
"""Class variable encoding whether or not the surrogate supports transfer
learning."""

supports_multi_fidelity: ClassVar[bool]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is an inconsistent treatment of the supports_* proeprties now ehere _multi_output is set in the abse class but _transfer_learning and _fidelity are not set

THe altter requries many explicit lines in the derived classes which could eba voided if we explicitly set it to False here and require overwriting if its actually compatible

I would prefer that but remember we had disucssion on it some time ago.

In order to unify this, can you man a note here so this is revisited / rediscussed? for this PR its fine I guess

"""Class variable encoding whether or not the surrogate supports multi fidelity
Bayesian optimization."""

supports_multi_output: ClassVar[bool] = False
"""Class variable encoding whether or not the surrogate is multi-output
compatible."""
Expand Down Expand Up @@ -428,6 +432,14 @@ def fit(
f"support transfer learning."
)

# Check if multi fidelity capabilities are needed
if (searchspace.n_fidelities > 1) and (not self.supports_multi_fidelity):
raise ValueError(
f"The search space contains fidelity parameters but the selected "
f"surrogate model type ({self.__class__.__name__}) does not "
f"support multi fidelity Bayesian optimisation."
)

# Block partial measurements
handle_missing_values(measurements, [t.name for t in objective.targets])

Expand Down Expand Up @@ -472,6 +484,11 @@ def __str__(self) -> str:
self.supports_transfer_learning,
single_line=True,
),
to_string(
"Supports Multi Fidelity",
self.supports_multi_fidelity,
single_line=True,
),
]
return to_string(self.__class__.__name__, *fields)

Expand Down
3 changes: 3 additions & 0 deletions baybe/surrogates/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class CustomONNXSurrogate(IndependentGaussianSurrogate):
supports_transfer_learning: ClassVar[bool] = False
# See base class.

supports_multi_fidelity: ClassVar[bool] = False
# See base class.

onnx_input_name: str = field(validator=validators.instance_of(str))
"""The input name used for constructing the ONNX str."""

Expand Down
20 changes: 18 additions & 2 deletions baybe/surrogates/gaussian_process/components/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from baybe.kernels.base import Kernel
from baybe.kernels.composite import ProductKernel
from baybe.parameters.categorical import TaskParameter
from baybe.parameters.fidelity import CategoricalFidelityParameter
from baybe.parameters.selector import (
ParameterSelectorProtocol,
TypeSelector,
Expand Down Expand Up @@ -79,15 +80,30 @@ def _default_base_kernel_factory(self) -> KernelFactoryProtocol:
BayBENumericalKernelFactory,
)

return BayBENumericalKernelFactory(TypeSelector((TaskParameter,), exclude=True))
return BayBENumericalKernelFactory(
TypeSelector(
(
TaskParameter,
CategoricalFidelityParameter,
),
exclude=True,
)
)

@task_kernel_factory.default
def _default_task_kernel_factory(self) -> KernelFactoryProtocol:
from baybe.surrogates.gaussian_process.presets.baybe import (
BayBETaskKernelFactory,
)

return BayBETaskKernelFactory(TypeSelector((TaskParameter,)))
return BayBETaskKernelFactory(
TypeSelector(
(
TaskParameter,
CategoricalFidelityParameter,
)
)
)

@override
def __call__(
Expand Down
25 changes: 24 additions & 1 deletion baybe/surrogates/gaussian_process/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
from torch import Tensor


# TODO Jordan MHS: _ModelContext is used by fidelity surrogate models now so may deserve
# its own file.
@define
class _ModelContext:
"""Model context for :class:`GaussianProcessSurrogate`."""
Expand Down Expand Up @@ -80,6 +82,27 @@ def n_tasks(self) -> int:
"""The number of tasks."""
return self.searchspace.n_tasks

@property
def n_fidelity_dimensions(self) -> int:
"""The number of fidelity dimensions."""
# Possible TODO: Generalize to multiple fidelity dimensions
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This todo is here to spark discussion - will remove after review.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I would propose to keep everything focused around a single dimension, unless it is very clear and easy how to go on.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in analogy to the task, the fidelity is kept at max 1 fidelity parameter and everything els eis out of scope for the MF branch

return 1 if self.searchspace.fidelity_idx is not None else 0

@property
def is_multi_fidelity(self) -> bool:
"""Are there any fidelity dimensions?"""
return self.n_fidelity_dimensions > 0

@property
def fidelity_idx(self) -> int | None:
"""The computational column index of the task parameter, if available."""
return self.searchspace.fidelity_idx

@property
def n_fidelities(self) -> int:
"""The number of fidelities."""
return self.searchspace.n_fidelities

@property
def parameter_bounds(self) -> Tensor:
"""Get the search space parameter bounds in BoTorch Format."""
Expand All @@ -93,7 +116,7 @@ def numerical_indices(self) -> list[int]:
return [
i
for i in range(len(self.searchspace.comp_rep_columns))
if i != self.task_idx
if i not in (self.task_idx, self.fidelity_idx)
]


Expand Down
Loading