From a9f8ed61d3ea6caea9c588411314b4145aaa769a Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Mon, 9 Dec 2024 10:53:33 +0100 Subject: [PATCH 1/5] fix typos in Bounce tests --- .../tests/solvers/bayesian_optimization/test_bounce.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py b/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py index 7d2ec60..934bd02 100644 --- a/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py +++ b/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py @@ -1,4 +1,4 @@ -"""Tests for our bridge with Probabilistic Reparametrization [1] +"""Tests for our bridge with Bounce TODO: add reference """ @@ -34,7 +34,7 @@ def test_bounce_runs(): """Tests that Bounce instantiates and runs.""" from poli import objective_factory - pytest.importorskip("bounce") # We check if we have PR installed + pytest.importorskip("bounce") # We check if we have Bounce installed from poli_baselines.solvers.bayesian_optimization.bounce import BounceSolver alphabet = load_alphabet() From ad8afddc5c1efa34b97bbd61666c745024f7f818 Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Mon, 9 Dec 2024 10:53:46 +0100 Subject: [PATCH 2/5] add test for running Boss solver --- .../bayesian_optimization/test_boss.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py diff --git a/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py b/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py new file mode 100644 index 0000000..1f1d5e7 --- /dev/null +++ b/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py @@ -0,0 +1,59 @@ +""" +Tests for the BOSS implementation + +""" +import json +import warnings +from pathlib import Path + +import pytest + +TEST_FILES_PATH = Path(__file__).parent.parent.parent / "test_files" + + +warnings.filterwarnings("ignore") + + +def load_alphabet() -> list[str]: + with open(TEST_FILES_PATH / "zinc250k_alphabet_stoi.json") as f: + alphabet = json.load(f) + + return list(alphabet.keys()) + + +def load_sequence_length() -> int: + with open(TEST_FILES_PATH / "zinc250k_metadata.json") as f: + metadata = json.load(f) + + return metadata["max_sequence_length"] + +@pytest.mark.slow() +def test_boss_runs(): + """ + Test BOSS instantiates and runs. + """ + from poli import objective_factory + + pytest.importorskip("boss") + from poli_baselines.solvers.bayesian_optimization.boss import BossSolver + + alphabet = load_alphabet() + sequence_length = load_sequence_length() + + problem = objective_factory.create( + name="rdkit_qed", string_representation="SMILES" + ) + black_box = problem.black_box + x0 = problem.x0 + y0 = black_box(x0) + + solver = BossSolver( + black_box=black_box, + x0=x0, + y0=y0, + n_initial_points=1, + ) + + assert solver is not None + + solver.solve(max_iter=1) \ No newline at end of file From 33f420eba0afa929a0f35420b1fa8dbd0963f7f2 Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Mon, 9 Dec 2024 10:54:04 +0100 Subject: [PATCH 3/5] add BOSS environments --- .../boss/environment.boss.small.yml | 12 ++++++++++++ .../boss/environment.boss.yml | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml create mode 100644 src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml new file mode 100644 index 0000000..3bdf6ba --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml @@ -0,0 +1,12 @@ +name: poli__boss +channels: + - defaults +dependencies: + - python=3.10 + - pip + - pip: + - numpy<2 + - emukit + - "git+https://github.com/MachineLearningLifeScience/poli.git@v0.2.1" + - "git+https://github.com/MachineLearningLifeScience/poli-baselines@main" + - "git+https://github.com/henrymoss/BOSS.git@master" diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml new file mode 100644 index 0000000..cdbb657 --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml @@ -0,0 +1,18 @@ +name: poli__boss +channels: + - defaults +dependencies: + - python=3.10 + - pip + - pip: + - numpy<2 + - emukit + - pandas>=1.5.3,<1.6.0 + - coverage>=7.2.5,<7.3.0 + - requests>=2.31.0,<2.32.0 + - black>=22.12.0,<22.13.0 + - memray>=1.6.0,<1.7.0 + - pytest>=7.3.1,<7.4.0 + - "git+https://github.com/MachineLearningLifeScience/poli.git@v0.2.1" + - "git+https://github.com/MachineLearningLifeScience/poli-baselines@main" + - "git+https://github.com/miguelgondu/bounce.git@main" From af98d913d0909dd23e3172beb877ea103c3a5564 Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Mon, 9 Dec 2024 10:54:51 +0100 Subject: [PATCH 4/5] add BOSS solver --- .../bayesian_optimization/boss/__init__.py | 3 + .../bayesian_optimization/boss/solver.py | 117 ++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py create mode 100644 src/poli_baselines/solvers/bayesian_optimization/boss/solver.py diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py b/src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py new file mode 100644 index 0000000..d9f759c --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py @@ -0,0 +1,3 @@ +from .solver import BossSolver + +__all__ = ["BossSolver"] diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py b/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py new file mode 100644 index 0000000..ca543c6 --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py @@ -0,0 +1,117 @@ +""" +This has to be run inside the poli__boss environment. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Literal + +import numpy as np +import torch + +try: + from boss.code.emukit_models.emukit_ssk_model import SSK_model + from boss.code.parameters.candidate_parameter import CandidateStringParameter + from emukit.core import ParameterSpace + from emukit.core.loop import FixedIterationsStoppingCondition + from emukit.core.optimization import RandomSearchAcquisitionOptimizer + from emukit.bayesian_optimization.loops import BayesianOptimizationLoop + from emukit.bayesian_optimization.acquisitions import ExpectedImprovement + from emukit.core.initial_designs import RandomDesign +except ImportError as e: + raise ImportError( + "You are trying to use the BOSS solver. Install " + "the relevant optional dependencies with [boss]. \n" + "You can do this by running: \n" + "pip install 'poli-baselines[boss] @ git+https://github.com/MachineLearningLifeScience/poli-baselines.git'" + ) from e + +from poli.core.abstract_black_box import AbstractBlackBox +from poli.core.util.seeding import seed_python_numpy_and_torch + +from poli_baselines.core.abstract_solver import AbstractSolver + +ROOT_DIR = Path(__file__).parent.parent.parent.parent.parent.parent.resolve() + + +class BossSolver(AbstractSolver): + def __init__( + self, + black_box: AbstractBlackBox, + x0: np.ndarray = None, + y0: np.ndarray = None, + device: str | None = None, + dtype: Literal["float32", "float64"] = "float32", + batch_size: int = 1, + n_initial_points: int | None = None, + number_new_bins_on_split: int = 2, + results_dir: Path | None = None, + ): + super().__init__(black_box, None, None) + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + self.x0 = x0 + self.y0 = y0 + self.device = device + self.dtype = dtype + self.batch_size = batch_size + self.number_new_bins_on_split = number_new_bins_on_split + self.n_initial_points = n_initial_points + + self.objective = lambda x: -self.black_box(x) # BOSS minimizes + + # see SMILES examples + token_space = np.array([" ".join(list(ss)) for ss in self.x0]).reshape(-1,1) + + self.search_space = ParameterSpace([CandidateStringParameter("string", token_space)]) # x0 goes here with correct wrapper + self.model = SSK_model(self.search_space, self.x0, self.y0, max_subsequence_length=5, n_restarts=1) + self.acquisition = ExpectedImprovement(self.model) + self.optimizer = RandomSearchAcquisitionOptimizer(self.search_space, 100) + + self.bo_loop_ssk = BayesianOptimizationLoop( + model=self.model, + space=self.search_space, + acquisition=self.acquisition, + acquisition_optimizer=self.optimizer, + ) + + # Creating the results dir for boss + if results_dir is None: + results_dir = ROOT_DIR / "boss_results" + Path(results_dir).mkdir(parents=True, exist_ok=True) + + # Creating a gitignore file inside that dir + with open(results_dir / ".gitignore", "w") as fp: + fp.write("*\n!.gitignore") + + def solve( + self, + max_iter: int = 100, + n_initial_points: int | None = None, + seed: int | None = None, + ) -> None: + if seed is not None: + seed_python_numpy_and_torch(seed) + + if n_initial_points is None: + if self.n_initial_points is None: + raise ValueError( + "n_initial_points must be set, either in init or in solve" + ) + n_initial_points = self.n_initial_points + + stopping_condition = FixedIterationsStoppingCondition(i_max=max_iter) + + self.boss = BossSolver( + black_box=self.black_box, + x0=self.x0, + y0=self.y0, + n_initial_points=n_initial_points, + batch_size=self.batch_size, + results_dir=ROOT_DIR / "data" / "boss_results", + device=self.device, + dtype=self.dtype, + ) + self.boss.bo_loop_ssk.run_loop(self.objective, stopping_condition) + From dfa01e2a99964f1127e65da7449ec4eb0462d401 Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Mon, 9 Dec 2024 10:58:56 +0100 Subject: [PATCH 5/5] black it --- .../solvers/bayesian_optimization/boss/solver.py | 13 ++++++++----- .../solvers/bayesian_optimization/test_boss.py | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py b/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py index ca543c6..d21d9c3 100644 --- a/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py @@ -62,10 +62,14 @@ def __init__( self.objective = lambda x: -self.black_box(x) # BOSS minimizes # see SMILES examples - token_space = np.array([" ".join(list(ss)) for ss in self.x0]).reshape(-1,1) + token_space = np.array([" ".join(list(ss)) for ss in self.x0]).reshape(-1, 1) - self.search_space = ParameterSpace([CandidateStringParameter("string", token_space)]) # x0 goes here with correct wrapper - self.model = SSK_model(self.search_space, self.x0, self.y0, max_subsequence_length=5, n_restarts=1) + self.search_space = ParameterSpace( + [CandidateStringParameter("string", token_space)] + ) # x0 goes here with correct wrapper + self.model = SSK_model( + self.search_space, self.x0, self.y0, max_subsequence_length=5, n_restarts=1 + ) self.acquisition = ExpectedImprovement(self.model) self.optimizer = RandomSearchAcquisitionOptimizer(self.search_space, 100) @@ -84,7 +88,7 @@ def __init__( # Creating a gitignore file inside that dir with open(results_dir / ".gitignore", "w") as fp: fp.write("*\n!.gitignore") - + def solve( self, max_iter: int = 100, @@ -114,4 +118,3 @@ def solve( dtype=self.dtype, ) self.boss.bo_loop_ssk.run_loop(self.objective, stopping_condition) - diff --git a/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py b/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py index 1f1d5e7..e5f528c 100644 --- a/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py +++ b/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py @@ -2,6 +2,7 @@ Tests for the BOSS implementation """ + import json import warnings from pathlib import Path @@ -27,6 +28,7 @@ def load_sequence_length() -> int: return metadata["max_sequence_length"] + @pytest.mark.slow() def test_boss_runs(): """ @@ -40,9 +42,7 @@ def test_boss_runs(): alphabet = load_alphabet() sequence_length = load_sequence_length() - problem = objective_factory.create( - name="rdkit_qed", string_representation="SMILES" - ) + problem = objective_factory.create(name="rdkit_qed", string_representation="SMILES") black_box = problem.black_box x0 = problem.x0 y0 = black_box(x0) @@ -56,4 +56,4 @@ def test_boss_runs(): assert solver is not None - solver.solve(max_iter=1) \ No newline at end of file + solver.solve(max_iter=1)