From 2b96e5ade6280e7fc29d65c0255d876572c04b84 Mon Sep 17 00:00:00 2001
From: m-a-blommaert <61234439+m-a-blommaert@users.noreply.github.com>
Date: Thu, 3 Apr 2025 00:38:24 +0200
Subject: [PATCH 01/18] Add pymoo_minimize.py

pymoo_minimize.py contains some skeleton code for the integration with Pymoo
---
 kernel_tuner/strategies/pymoo_minimize.py | 65 +++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 kernel_tuner/strategies/pymoo_minimize.py

diff --git a/kernel_tuner/strategies/pymoo_minimize.py b/kernel_tuner/strategies/pymoo_minimize.py
new file mode 100644
index 000000000..c5405ee15
--- /dev/null
+++ b/kernel_tuner/strategies/pymoo_minimize.py
@@ -0,0 +1,65 @@
+"""The Pymoo strategy that uses a minimizer method for searching through the parameter space."""
+
+import pymoo.optimize
+import pymoo.core
+
+from kernel_tuner import util
+from kernel_tuner.searchspace import Searchspace
+from kernel_tuner.strategies.common import (
+    CostFunc,
+    get_options,
+    get_strategy_docstring,
+    setup_method_arguments,
+    setup_method_options,
+)
+
+# TODO: Add the PyMOO algorithms
+supported_methods = []
+
+_options = dict(method=(f"Pymoo optimization algorithm to use, choose any from {supported_methods}", ""))
+
+def tune(searchspace: Searchspace, runner, tuning_options):
+
+    # TODO:
+    # The idea is to create a Problem, Algorithm, and Termination
+    # then use to run `pymoo.optimize.minimize`
+    # so I basically need to write some adapter/integration code
+
+    method = get_options(tuning_options.strategy_options, _options)[0]
+
+    # scale variables in x to make 'eps' relevant for multiple variables
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
+
+    bounds, x0, _ = cost_func.get_bounds_x0_eps()
+    kwargs = setup_method_arguments(method, bounds)
+    options = setup_method_options(method, tuning_options)
+
+    # TODO: make a pymoo.core.problem.Problem
+    # * use `searchspace`, `runner`, and `cost_func` to define the problem
+    # * use etc to define the problem
+    problem = None # pymoo.core.problem.Problem()
+
+    # TODO: make a pymoo.core.algorithm.Algorithm
+    # * use `method` to select the algorithm
+    # * use etc to define the algorithm
+    algorithm = None # pymoo.core.algorithm.Algorithm()
+
+    # TODO:
+    termination = None # pymoo.core.termination.Termination()
+
+    # TODO: change the rest of the code to work with `Pymoo`
+
+    opt_result = None
+    try:
+        opt_result = pymoo.optimize.minimize(problem, algorithm, termination)
+    except util.StopCriterionReached as e:
+        if tuning_options.verbose:
+            print(e)
+
+    if opt_result and tuning_options.verbose:
+        print(opt_result.message)
+
+    return cost_func.results
+
+
+tune.__doc__ = get_strategy_docstring("Pymoo minimize", _options)

From d1d1540bb1d603335fdb3e0201a67359e0f4cdf9 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Fri, 9 May 2025 00:37:45 +0200
Subject: [PATCH 02/18] add "error" field to result dict and convert existing
 code to use it

---
 kernel_tuner/core.py               | 14 +++++++++++---
 kernel_tuner/file_utils.py         | 20 ++++++++++++--------
 kernel_tuner/interface.py          | 11 ++++++++++-
 kernel_tuner/runners/sequential.py | 10 ++++++++--
 kernel_tuner/runners/simulation.py |  4 ++++
 kernel_tuner/strategies/common.py  | 20 +++++++++++++++-----
 kernel_tuner/util.py               |  7 +++++++
 7 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 655779337..594ea3b78 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -480,11 +480,15 @@ def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_sett
                     print(
                         f"skipping config {util.get_instance_string(instance.params)} reason: too many resources requested for launch"
                     )
-                result[objective] = util.RuntimeFailedConfig()
+                # result[objective] = util.RuntimeFailedConfig()
+                result['error'] = util.RuntimeFailedConfig()
             else:
                 logging.debug("benchmark encountered runtime failure: " + str(e))
                 print("Error while benchmarking:", instance.name)
                 raise e
+
+        assert util.check_result_type(result), "The error in a result MUST be an actual error."
+        
         return result
 
     def check_kernel_output(
@@ -571,7 +575,8 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
 
         instance = self.create_kernel_instance(kernel_source, kernel_options, params, verbose)
         if isinstance(instance, util.ErrorConfig):
-            result[to.objective] = util.InvalidConfig()
+            # result[to.objective] = util.InvalidConfig()
+            result['error'] = util.InvalidConfig()
         else:
             # Preprocess the argument list. This is required to deal with `MixedPrecisionArray`s
             gpu_args = _preprocess_gpu_arguments(gpu_args, params)
@@ -581,7 +586,8 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
                 start_compilation = time.perf_counter()
                 func = self.compile_kernel(instance, verbose)
                 if not func:
-                    result[to.objective] = util.CompilationFailedConfig()
+                    # result[to.objective] = util.CompilationFailedConfig()
+                    result['error'] = util.CompilationFailedConfig()
                 else:
                     # add shared memory arguments to compiled module
                     if kernel_options.smem_args is not None:
@@ -635,6 +641,8 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
         result["verification_time"] = last_verification_time or 0
         result["benchmark_time"] = last_benchmark_time or 0
 
+        assert util.check_result_type(result), "The error in a result MUST be an actual error."
+
         return result
 
     def compile_kernel(self, instance, verbose):
diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
index e5d3dcb90..fb53d5956 100644
--- a/kernel_tuner/file_utils.py
+++ b/kernel_tuner/file_utils.py
@@ -32,20 +32,20 @@ def output_file_schema(target):
     return current_version, json_string
 
 
-def get_configuration_validity(objective) -> str:
+def get_configuration_validity(error) -> str:
     """Convert internal Kernel Tuner error to string."""
     errorstring: str
-    if not isinstance(objective, util.ErrorConfig):
+    if not isinstance(error, util.ErrorConfig):
         errorstring = "correct"
     else:
-        if isinstance(objective, util.CompilationFailedConfig):
+        if isinstance(error, util.CompilationFailedConfig):
             errorstring = "compile"
-        elif isinstance(objective, util.RuntimeFailedConfig):
+        elif isinstance(error, util.RuntimeFailedConfig):
             errorstring = "runtime"
-        elif isinstance(objective, util.InvalidConfig):
+        elif isinstance(error, util.InvalidConfig):
             errorstring = "constraints"
         else:
-            raise ValueError(f"Unkown objective type {type(objective)}, value {objective}")
+            raise ValueError(f"Unkown error type {type(error)}, value {error}")
     return errorstring
 
 
@@ -110,7 +110,8 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim
         out["times"] = timings
 
         # encode the validity of the configuration
-        out["invalidity"] = get_configuration_validity(result[objective])
+        # out["invalidity"] = get_configuration_validity(result[objective])
+        out["invalidity"] = get_configuration_validity(result['error'])
 
         # Kernel Tuner does not support producing results of configs that fail the correctness check
         # therefore correctness is always 1
@@ -127,7 +128,10 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim
         # In Kernel Tuner we currently support only one objective at a time, this can be a user-defined
         # metric that combines scores from multiple different quantities into a single value to support
         # multi-objective tuning however.
-        out["objectives"] = [objective]
+        # NOTE(maric): With PyMOO integrated we do support multi-objective tuning without scalarization
+        objectives = [objective] if isinstance(objective, str) else list(objective)
+        assert isinstance(objectives, list)
+        out["objectives"] = objectives
 
         # append to output
         output_data.append(out)
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 97ae22848..5d2891218 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -587,7 +587,16 @@ def tune_kernel(
     _check_user_input(kernel_name, kernelsource, arguments, block_size_names)
 
     # default objective if none is specified
-    objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
+    # if len(list(objective)) == 1:
+    #     objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
+
+    if isinstance(objective, str):
+        objective = list(objective)
+
+    if isinstance(objective_higher_is_better, bool):
+        objective_higher_is_better = list(objective_higher_is_better)
+
+    assert len(list(objective)) == len(list(objective_higher_is_better))
 
     # check for forbidden names in tune parameters
     util.check_tune_params_list(tune_params, observers, simulation_mode=simulation_mode)
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index aeebd5116..95bfff500 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -90,13 +90,17 @@ def run(self, parameter_space, tuning_options):
 
                 result = self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, self.kernel_options, tuning_options)
 
+                assert util.check_result_type(result)
+
                 params.update(result)
 
-                if tuning_options.objective in result and isinstance(result[tuning_options.objective], ErrorConfig):
+                # if tuning_options.objective in result and isinstance(result[tuning_options.objective], ErrorConfig):
+                if 'error' in result:
                     logging.debug('kernel configuration was skipped silently due to compile or runtime failure')
 
             # only compute metrics on configs that have not errored
-            if tuning_options.metrics and not isinstance(params.get(tuning_options.objective), ErrorConfig):
+            # if tuning_options.metrics and not isinstance(params.get(tuning_options.objective), ErrorConfig):
+            if 'error' in params:
                 params = process_metrics(params, tuning_options.metrics)
 
             # get the framework time by estimating based on other times
@@ -113,6 +117,8 @@ def run(self, parameter_space, tuning_options):
                 # add configuration to cache
                 store_cache(x_int, params, tuning_options)
 
+            assert util.check_result_type(params)
+
             # all visited configurations are added to results to provide a trace for optimization strategies
             results.append(params)
 
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 22c7c667c..5134f0fbd 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -90,6 +90,8 @@ def run(self, parameter_space, tuning_options):
             if tuning_options.cache and x_int in tuning_options.cache:
                 result = tuning_options.cache[x_int].copy()
 
+                assert util.check_result_type(result)
+
                 # Simulate behavior of sequential runner that when a configuration is
                 # served from the cache by the sequential runner, the compile_time,
                 # verification_time, and benchmark_time are set to 0.
@@ -124,6 +126,8 @@ def run(self, parameter_space, tuning_options):
                 self.start_time = perf_counter()
                 result['framework_time'] = total_time - self.last_strategy_time
 
+                assert util.check_result_type(result)
+
                 results.append(result)
                 continue
 
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index d01eae937..d7acd4ed9 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -92,7 +92,10 @@ def __call__(self, x, check_restrictions=True):
             legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose)
             if not legal:
                 result = params_dict
-                result[self.tuning_options.objective] = util.InvalidConfig()
+                # result[self.tuning_options.objective] = util.InvalidConfig()
+                result['error'] = util.InvalidConfig()
+
+        assert legal == ('error' not in result), "A legal config MUST NOT have an error result."
 
         if legal:
             # compile and benchmark this instance
@@ -109,10 +112,17 @@ def __call__(self, x, check_restrictions=True):
             self.runner.last_strategy_start_time = perf_counter()
 
         # get numerical return value, taking optimization direction into account
-        return_value = result[self.tuning_options.objective] or sys.float_info.max
-        return_value = return_value if not self.tuning_options.objective_higher_is_better else -return_value
-
-        return return_value
+        return_values = []
+        for obj, higher_is_better in zip(self.tuning_options.objective, self.tuning_options.objective_higher_is_better):
+            return_value = result[obj] if 'error' not in result else sys.float_info.max
+            return_value = return_value if not higher_is_better else -return_value
+            return_values.append(return_value)
+
+        if len(return_values) == 1:
+            return return_values[0]
+        else:
+            # NOTE: MAYBE make this a tuple()
+            return return_values
 
     def get_bounds_x0_eps(self):
         """Compute bounds, x0 (the initial guess), and eps."""
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 710b59e0d..054ef5453 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -79,6 +79,13 @@ def default(self, obj):
         return super(NpEncoder, self).default(obj)
 
 
+def check_result_type(r):
+    "Check if the result has the right format."
+    if 'error' in r:
+        return isinstance(r['error'], ErrorConfig)
+    return True
+
+
 class TorchPlaceHolder:
     def __init__(self):
         self.Tensor = Exception  # using Exception here as a type that will never be among kernel arguments

From 133d35cc4913e77fb41376e2ba9f7ed1c5e2160d Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Fri, 9 May 2025 00:39:54 +0200
Subject: [PATCH 03/18] add rough support for multiple objectis through pymoo

---
 kernel_tuner/interface.py                 |  6 +-
 kernel_tuner/strategies/pymoo_minimize.py | 91 ++++++++++++++++++++---
 2 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 5d2891218..f7adef3e2 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -57,6 +57,7 @@
     pso,
     random_sample,
     simulated_annealing,
+    pymoo_minimize,
 )
 
 strategy_map = {
@@ -75,6 +76,7 @@
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
     "bayes_opt": bayes_opt,
+    "pymoo_minimize": pymoo_minimize,
 }
 
 
@@ -425,7 +427,7 @@ def __deepcopy__(self, _):
                 """Optimization objective to sort results on, consisting of a string
             that also occurs in results as a metric or observed quantity, default 'time'.
             Please see :ref:`objectives`.""",
-                "string",
+                "str | list[str]",
             ),
         ),
         (
@@ -433,7 +435,7 @@ def __deepcopy__(self, _):
             (
                 """boolean that specifies whether the objective should
             be maximized (True) or minimized (False), default False.""",
-                "bool",
+                "bool | list[bool]",
             ),
         ),
         (
diff --git a/kernel_tuner/strategies/pymoo_minimize.py b/kernel_tuner/strategies/pymoo_minimize.py
index c5405ee15..c31ff0797 100644
--- a/kernel_tuner/strategies/pymoo_minimize.py
+++ b/kernel_tuner/strategies/pymoo_minimize.py
@@ -1,7 +1,6 @@
 """The Pymoo strategy that uses a minimizer method for searching through the parameter space."""
 
-import pymoo.optimize
-import pymoo.core
+import numpy as np
 
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
@@ -12,6 +11,14 @@
     setup_method_arguments,
     setup_method_options,
 )
+from kernel_tuner.strategies.genetic_algorithm import mutate
+
+from pymoo.optimize import minimize
+from pymoo.core.problem import ElementwiseProblem
+from pymoo.core.sampling import Sampling
+from pymoo.core.mutation import Mutation
+from pymoo.operators.crossover.ux import UX
+from pymoo.algorithms.moo.nsga2 import NSGA2
 
 # TODO: Add the PyMOO algorithms
 supported_methods = []
@@ -28,38 +35,98 @@ def tune(searchspace: Searchspace, runner, tuning_options):
     method = get_options(tuning_options.strategy_options, _options)[0]
 
     # scale variables in x to make 'eps' relevant for multiple variables
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True)
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
 
     bounds, x0, _ = cost_func.get_bounds_x0_eps()
     kwargs = setup_method_arguments(method, bounds)
     options = setup_method_options(method, tuning_options)
 
-    # TODO: make a pymoo.core.problem.Problem
-    # * use `searchspace`, `runner`, and `cost_func` to define the problem
-    # * use etc to define the problem
-    problem = None # pymoo.core.problem.Problem()
+    problem = KernelTunerProblem(
+        f = cost_func,
+        n_var = len(tuning_options.tune_params),
+        n_obj = len(tuning_options.objective),
+    )
 
     # TODO: make a pymoo.core.algorithm.Algorithm
     # * use `method` to select the algorithm
     # * use etc to define the algorithm
-    algorithm = None # pymoo.core.algorithm.Algorithm()
+    
+    # algorithm_type = get_algorithm
+    algorithm = NSGA2(
+        pop_size=100,
+        sampling=SearchspaceRandomSampling(searchspace),
+        crossover=UX(prob=0.6),
+        mutation=MutateToNeighbor(searchspace, prob=0.5),
+    )
 
     # TODO:
+    # - CostFunc throws exception when done, so isn't really needed
     termination = None # pymoo.core.termination.Termination()
 
-    # TODO: change the rest of the code to work with `Pymoo`
-
     opt_result = None
     try:
-        opt_result = pymoo.optimize.minimize(problem, algorithm, termination)
+        opt_result = minimize(problem, algorithm, termination)
     except util.StopCriterionReached as e:
+        print(f"Stopped because of {e}")
         if tuning_options.verbose:
             print(e)
 
     if opt_result and tuning_options.verbose:
-        print(opt_result.message)
+        print(f"{opt_result.message=}")
 
+    # print(f"{opt_result.message=}")
+    # print(f"{cost_func.results=}")
     return cost_func.results
 
 
 tune.__doc__ = get_strategy_docstring("Pymoo minimize", _options)
+
+
+class KernelTunerProblem(ElementwiseProblem):
+    def __init__(self, f, n_var, n_obj):
+        super().__init__(
+            n_var = n_var,
+            n_obj = n_obj,
+        )
+        self.f = f
+
+    def _evaluate(self, x, out, *args, **kwargs):
+        F = self.f(x)
+        out["F"] = F
+
+
+class SearchspaceRandomSampling(Sampling):
+    def __init__(self, searchspace):
+        super().__init__()
+        self.ss = searchspace
+
+    def _do(self, problem, n_samples, **kwargs):
+        X = self.ss.get_random_sample(n_samples)
+        return X
+
+
+class MutateToNeighbor(Mutation):
+    def __init__(
+            self,
+            searchspace : Searchspace,
+            prob=1.0,
+            prob_var=None,
+            **kwargs
+        ):
+        super().__init__(
+            prob=prob,
+            prob_var=prob_var,
+            **kwargs,
+        )
+        self.ss = searchspace
+
+    def _do(self, problem, X, **kwargs):
+        Xm = np.empty_like(X)
+        for i in range(X.shape[0]):
+            neighbors = self.ss.get_neighbors_indices_no_cache(tuple(X[i]), neighbor_method="Hamming")
+            # copy X[i] to result in case there are no neighbors
+            if len(neighbors) > 0:
+                Xm[i] = neighbors[np.random.choice(len(neighbors))]
+            else:
+                Xm[i] = X[i]
+        return Xm

From 541972cb2c60a0dac395a2e1a4b6e78eee0e47d7 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 13 Aug 2025 21:42:26 +0200
Subject: [PATCH 04/18] pareto front algo

---
 kernel_tuner/util.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 054ef5453..297538b30 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -400,11 +400,43 @@ def get_best_config(results, objective, objective_higher_is_better=False):
     ignore_val = sys.float_info.max if not objective_higher_is_better else -sys.float_info.max
     best_config = func(
         results,
-        key=lambda x: x[objective] if isinstance(x[objective], float) else ignore_val,
+        key=lambda x: x[objective] if 'error' not in x and isinstance(x[objective], float) else ignore_val,
     )
     return best_config
 
 
+def get_pareto_front(results, objective, objective_higher_is_better):
+    assert isinstance(objective, list)
+
+    nonerror_results = list(filter(lambda x: "error" not in x, results))
+    front = []
+
+    # A point `p` in a finite set of points `S` is said to be maximal or non-dominated if there is no other point `q` in `S` whose `q(i)` are all >= `p(i)`
+    # So for all q there must be a q(i) such that q(i) < p(i)
+    for p in nonerror_results:
+        p_nondom = True
+        for q in nonerror_results:
+            if p is q:
+                continue
+            # \forall(i): q(i) >= p(i)?
+            flag = True
+            for i, higher_is_better in zip(objective, objective_higher_is_better):
+                p_i, q_i = p[i], q[i]
+                if not higher_is_better:
+                    p_i, q_i = -p_i, -q_i
+                if q_i < p_i:
+                    flag = False
+                    break
+            if flag:
+                p_nondom = False
+                break
+        if p_nondom:
+            p["optimal"] = True
+            front.append(p)
+    
+    return front
+
+
 def get_config_string(params, keys=None, units=None):
     """Return a compact string representation of a measurement."""
 

From daca5900603d61f010f4590bf986d418f4f47a6a Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 5 Nov 2025 10:55:07 +0100
Subject: [PATCH 05/18] forgot to commit for a very long time...

---
 .gitignore                                |   3 +-
 kernel_tuner/__init__.py                  |   2 +-
 kernel_tuner/interface.py                 |  65 ++++-
 kernel_tuner/runners/simulation.py        |   3 +-
 kernel_tuner/strategies/common.py         |  29 ++-
 kernel_tuner/strategies/pymoo_minimize.py | 284 ++++++++++++++++------
 kernel_tuner/util.py                      | 140 ++++++++---
 7 files changed, 400 insertions(+), 126 deletions(-)

diff --git a/.gitignore b/.gitignore
index 43bd95c2b..5034957b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ deploy_key
 temp_*.*
 .python-version
 .nox
+.venv
 
 ### Visual Studio Code ###
 !.vscode/settings.json
@@ -37,4 +38,4 @@ temp_*.*
 .LSOverride
 
 .vscode
-.idea
\ No newline at end of file
+.idea
diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py
index b64d69813..3f575faa0 100644
--- a/kernel_tuner/__init__.py
+++ b/kernel_tuner/__init__.py
@@ -1,5 +1,5 @@
 from kernel_tuner.integration import store_results, create_device_targets
-from kernel_tuner.interface import tune_kernel, run_kernel
+from kernel_tuner.interface import tune_kernel, tune_cache, run_kernel
 
 from importlib.metadata import version
 
diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index f7adef3e2..0c0282ad0 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -579,6 +579,7 @@ def tune_kernel(
     observers=None,
     objective=None,
     objective_higher_is_better=None,
+    objectives=None,
 ):
     start_overhead_time = perf_counter()
     if log:
@@ -593,10 +594,17 @@ def tune_kernel(
     #     objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
 
     if isinstance(objective, str):
-        objective = list(objective)
+        objective = [objective]
 
     if isinstance(objective_higher_is_better, bool):
-        objective_higher_is_better = list(objective_higher_is_better)
+        objective_higher_is_better = [objective_higher_is_better]
+
+    if objectives:
+        if isinstance(objectives, dict):
+            objective = list(objectives.keys())
+            objective_higher_is_better = list(objectives.values())
+        else:
+            raise ValueError("objectives should be a dict of (objective, higher_is_better) pairs")
 
     assert len(list(objective)) == len(list(objective_higher_is_better))
 
@@ -693,13 +701,34 @@ def tune_kernel(
 
     # finished iterating over search space
     if results:  # checks if results is not empty
-        best_config = util.get_best_config(results, objective, objective_higher_is_better)
-        # add the best configuration to env
-        env['best_config'] = best_config
-        if not device_options.quiet:
-            units = getattr(runner, "units", None)
-            print("best performing configuration:")
-            util.print_config_output(tune_params, best_config, device_options.quiet, metrics, units)
+        if len(list(objective)) == 1:
+            objective = objective[0]
+            objective_higher_is_better = objective_higher_is_better[0]
+            best_config = util.get_best_config(results, objective, objective_higher_is_better)
+            print(best_config)
+            # add the best configuration to env
+            env['best_config'] = best_config
+            if not device_options.quiet:
+                units = getattr(runner, "units", None)
+                print(f"\nBEST PERFORMING CONFIGURATION FOR OBJECTIVE {objective}:")
+                keys = list(tune_params.keys())
+                keys += [objective]
+                if metrics:
+                    keys += list(metrics.keys())
+                print(util.get_config_string(best_config, keys, units))
+        else:
+            pareto_front = util.get_pareto_results(results, objective, objective_higher_is_better)
+            # add the best configuration to env
+            env['best_config'] = pareto_front
+            if not device_options.quiet:
+                units = getattr(runner, "units", None)
+                keys = list(tune_params.keys())
+                keys += list(objective)
+                if metrics:
+                    keys += list(metrics.keys)
+                print(f"\nBEST PERFORMING CONFIGURATIONS FOR OBJECTIVES: {objective}:")
+                for best_config in pareto_front:
+                    print(util.get_config_string(best_config, keys, units))
     elif not device_options.quiet:
         print("no results to report")
 
@@ -714,6 +743,24 @@ def tune_kernel(
 
 tune_kernel.__doc__ = _tune_kernel_docstring
 
+
+def tune_cache(
+    cache,
+    restrictions = None,
+    **kwargs,
+):
+    tune_args = util.tune_args_from_cache_file(cache)
+    if restrictions:
+        new_restrictions = [tune_args['restrictions']]
+        if isinstance(restrictions, list):
+            new_restrictions.extend(restrictions)
+        else:
+            new_restrictions.append(restrictions)
+        tune_args['restrictions'] = new_restrictions
+    tune_args.update(kwargs)
+    return tune_kernel(simulation_mode=True, **tune_args)
+
+
 _run_kernel_docstring = """Compile and run a single kernel
 
     Compiles and runs a single kernel once, given a specific instance of the kernels tuning parameters.
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 5134f0fbd..1ede287dd 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -47,7 +47,8 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         :type iterations: int
         """
         self.quiet = device_options.quiet
-        self.dev = SimulationDevice(1024, dict(device_name="Simulation"), self.quiet)
+        # NOTE(maric): had to increase max_threas so the default restraints would pass
+        self.dev = SimulationDevice(1_000_000_000, dict(device_name="Simulation"), self.quiet)
 
         self.kernel_source = kernel_source
         self.simulation_mode = True
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index d7acd4ed9..e47f1edb0 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -60,11 +60,15 @@ def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=
         self.scaling = scaling
         self.searchspace = searchspace
         self.results = []
+        self.total_config_count = 0
+        self.illegal_config_count = 0
 
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
 
+        self.total_config_count += 1
+
         # error value to return for numeric optimizers that need a numerical value
         logging.debug('_cost_func called')
         logging.debug('x: ' + str(x))
@@ -94,10 +98,12 @@ def __call__(self, x, check_restrictions=True):
                 result = params_dict
                 # result[self.tuning_options.objective] = util.InvalidConfig()
                 result['error'] = util.InvalidConfig()
-
-        assert legal == ('error' not in result), "A legal config MUST NOT have an error result."
+                self.illegal_config_count += 1
 
         if legal:
+            assert ('error' not in result), "A legal config MUST NOT have an error result."
+            if 'error' in result: exit()
+
             # compile and benchmark this instance
             res = self.runner.run([params], self.tuning_options)
             result = res[0]
@@ -111,18 +117,17 @@ def __call__(self, x, check_restrictions=True):
             # upon returning from this function control will be given back to the strategy, so reset the start time
             self.runner.last_strategy_start_time = perf_counter()
 
-        # get numerical return value, taking optimization direction into account
-        return_values = []
-        for obj, higher_is_better in zip(self.tuning_options.objective, self.tuning_options.objective_higher_is_better):
-            return_value = result[obj] if 'error' not in result else sys.float_info.max
-            return_value = return_value if not higher_is_better else -return_value
-            return_values.append(return_value)
+        # get the cost of the result
+        cost_vec = util.get_result_cost(
+            result,
+            self.tuning_options.objective,
+            self.tuning_options.objective_higher_is_better
+        )
 
-        if len(return_values) == 1:
-            return return_values[0]
+        if len(cost_vec) == 1:
+            return cost_vec[0]
         else:
-            # NOTE: MAYBE make this a tuple()
-            return return_values
+            return cost_vec
 
     def get_bounds_x0_eps(self):
         """Compute bounds, x0 (the initial guess), and eps."""
diff --git a/kernel_tuner/strategies/pymoo_minimize.py b/kernel_tuner/strategies/pymoo_minimize.py
index c31ff0797..25060c04c 100644
--- a/kernel_tuner/strategies/pymoo_minimize.py
+++ b/kernel_tuner/strategies/pymoo_minimize.py
@@ -2,131 +2,277 @@
 
 import numpy as np
 
+from pymoo.algorithms.moo.nsga2 import NSGA2
+from pymoo.algorithms.moo.nsga3 import NSGA3
+from pymoo.core.mutation import Mutation
+from pymoo.core.problem import ElementwiseProblem
+from pymoo.core.sampling import Sampling
+from pymoo.core.termination import NoTermination, Termination
+from pymoo.core.repair import Repair
+from pymoo.operators.crossover.ux import UniformCrossover
+from pymoo.operators.crossover.pntx import TwoPointCrossover
+from pymoo.optimize import minimize
+from pymoo.util.ref_dirs import get_reference_directions
+from pymoo.indicators.igd import IGD
+
 from kernel_tuner import util
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import (
     CostFunc,
-    get_options,
     get_strategy_docstring,
     setup_method_arguments,
-    setup_method_options,
 )
-from kernel_tuner.strategies.genetic_algorithm import mutate
-
-from pymoo.optimize import minimize
-from pymoo.core.problem import ElementwiseProblem
-from pymoo.core.sampling import Sampling
-from pymoo.core.mutation import Mutation
-from pymoo.operators.crossover.ux import UX
-from pymoo.algorithms.moo.nsga2 import NSGA2
 
 # TODO: Add the PyMOO algorithms
-supported_methods = []
+supported_methods = [
+    "NSGA2",
+    "NSGA3",
+]
 
-_options = dict(method=(f"Pymoo optimization algorithm to use, choose any from {supported_methods}", ""))
+_options = {
+    "method": (f"Pymoo optimization algorithm to use, choose any from {supported_methods}", "NSGA2"),
+    "pop_size": ("Initial population size", 100),
+}
 
-def tune(searchspace: Searchspace, runner, tuning_options):
 
-    # TODO:
-    # The idea is to create a Problem, Algorithm, and Termination
-    # then use to run `pymoo.optimize.minimize`
-    # so I basically need to write some adapter/integration code
+def tune(
+    searchspace: Searchspace,
+    runner,
+    tuning_options,
+):
+    strategy_options = tuning_options.strategy_options
+
+    if "method" in strategy_options:
+        method = strategy_options["method"]
+    else:
+        (_, method) = _options["method"]
+    print(f"{method=}")
 
-    method = get_options(tuning_options.strategy_options, _options)[0]
+    if "pop_size" in strategy_options:
+        pop_size = strategy_options["pop_size"]
+    else:
+        (_, pop_size) = _options["pop_size"]
+    print(f"{pop_size=}")
 
     # scale variables in x to make 'eps' relevant for multiple variables
     cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
 
     bounds, x0, _ = cost_func.get_bounds_x0_eps()
     kwargs = setup_method_arguments(method, bounds)
-    options = setup_method_options(method, tuning_options)
 
-    problem = KernelTunerProblem(
-        f = cost_func,
-        n_var = len(tuning_options.tune_params),
-        n_obj = len(tuning_options.objective),
+    problem = TuningProblem(
+        cost_func=cost_func,
+        n_var=len(tuning_options.tune_params),
+        n_obj=len(tuning_options.objective),
     )
 
-    # TODO: make a pymoo.core.algorithm.Algorithm
-    # * use `method` to select the algorithm
-    # * use etc to define the algorithm
-    
-    # algorithm_type = get_algorithm
-    algorithm = NSGA2(
-        pop_size=100,
-        sampling=SearchspaceRandomSampling(searchspace),
-        crossover=UX(prob=0.6),
-        mutation=MutateToNeighbor(searchspace, prob=0.5),
-    )
+    # algorithm_type = get_algorithm(method)
+    algorithm = None
+    if method == "NSGA2":
+        algorithm = NSGA2(
+            pop_size = pop_size,
+            sampling = SearchspaceRandomSampling(searchspace),
+            crossover = TwoPointCrossover(),
+            mutation = MutateToNeighbor(searchspace, prob = 0.5),
+            repair = RepairConfig(),
+            # save_history = True,
+        )
+    elif method == "NSGA3":
+        algorithm = NSGA3(
+            pop_size = pop_size,
+            ref_dirs = get_reference_directions("das-dennis", len(tuning_options.objective), n_partitions = 26),
+            sampling = SearchspaceRandomSampling(searchspace),
+            crossover = UniformCrossover(prob = 0.6),
+            mutation = MutateToNeighbor(searchspace, prob = 0.5),
+            # repair = MyRepair(),
+            # save_history = True,
+        )
 
     # TODO:
     # - CostFunc throws exception when done, so isn't really needed
-    termination = None # pymoo.core.termination.Termination()
+    termination = None
+    if "max_fevals" in tuning_options.strategy_options or "time_limit" in tuning_options.strategy_options:
+        termination = NoTermination()
+
+    pf = problem.pareto_front()
+    igd_ind = IGD(pf, zero_to_one=True)
 
-    opt_result = None
     try:
-        opt_result = minimize(problem, algorithm, termination)
+        _ = algorithm.setup(
+            problem,
+            # termination = termination,
+            termination=("n_gen", 20),
+            seed=1,
+            verbose=True,
+        )
+
+        while algorithm.has_next():
+            algorithm.next()
+
+            illegal_count = cost_func.illegal_config_count
+            total_count = cost_func.total_config_count
+            print(f"config valid: {total_count - illegal_count}/{total_count} ({100 * (1 - (illegal_count / total_count)):.4}%)")
+
+            print("IGD: ", igd_ind(algorithm.opt.get("F")))
+
     except util.StopCriterionReached as e:
-        print(f"Stopped because of {e}")
         if tuning_options.verbose:
-            print(e)
+            print(f"Stopped because of {e}")
+
+    opt_result = cost_func.results
 
     if opt_result and tuning_options.verbose:
         print(f"{opt_result.message=}")
 
-    # print(f"{opt_result.message=}")
-    # print(f"{cost_func.results=}")
-    return cost_func.results
+    return opt_result
 
 
 tune.__doc__ = get_strategy_docstring("Pymoo minimize", _options)
 
 
-class KernelTunerProblem(ElementwiseProblem):
-    def __init__(self, f, n_var, n_obj):
+class TuningProblem(ElementwiseProblem):
+    def __init__(
+        self,
+        cost_func: CostFunc,
+        n_var,
+        n_obj,
+        **kwargs,
+    ):
         super().__init__(
             n_var = n_var,
             n_obj = n_obj,
+            **kwargs,
         )
-        self.f = f
+        self.cost_func = cost_func
+        self.searchspace = cost_func.searchspace
+        self.tuning_options = cost_func.tuning_options
 
-    def _evaluate(self, x, out, *args, **kwargs):
-        F = self.f(x)
+    def _evaluate(
+        self,
+        x,
+        out,
+        *args,
+        **kwargs,
+    ):
+        F = self.cost_func(x)
         out["F"] = F
 
+    def _calc_pareto_front(
+        self,
+        *args,
+        **kwargs
+    ) -> np.ndarray | None:
+        # Can only compute the pareto front if we are in simulation mode.
+        if not self.tuning_options.simulation_mode:
+            return None
+
+        objectives = self.tuning_options.objective
+        higher_is_better = self.tuning_options.objective_higher_is_better
+        pareto_results = util.get_pareto_results(
+            list(self.tuning_options.cache.values()),
+            objectives,
+            higher_is_better,
+        )
+
+        pareto_front_list = list()
+        for res in pareto_results:
+            cost = util.get_result_cost(res, objectives, higher_is_better)
+            pareto_front_list.append(cost)
+
+        return np.array(pareto_front_list)
+
+
+class TuningTermination(Termination):
+    def __init__(
+        self,
+        tuning_options,
+    ):
+        super().__init__()
+        self.tuning_options = tuning_options
+        self.reason = None
+
+    def _update(
+        self,
+        algorithm,
+    ):
+        try:
+            util.check_stop_criterion(self.tuning_options)
+            print(f"progress: {len(self.tuning_options.unique_results) / self.tuning_options.max_fevals}")
+            return 0.0
+        except util.StopCriterionReached as e:
+            self.terminate()
+            self.reason = e
+            return 1.0
+
 
 class SearchspaceRandomSampling(Sampling):
-    def __init__(self, searchspace):
+    def __init__(
+        self,
+        searchspace,
+    ):
         super().__init__()
-        self.ss = searchspace
+        self.searchspace = searchspace
 
-    def _do(self, problem, n_samples, **kwargs):
-        X = self.ss.get_random_sample(n_samples)
+    def _do(
+        self,
+        problem,
+        n_samples: int,
+        **kwargs,
+    ):
+        X = self.searchspace.get_random_sample(n_samples)
         return X
 
 
 class MutateToNeighbor(Mutation):
     def __init__(
-            self,
-            searchspace : Searchspace,
-            prob=1.0,
-            prob_var=None,
-            **kwargs
-        ):
+        self,
+        searchspace: Searchspace,
+        prob=1.0,
+        prob_var=None,
+        **kwargs
+    ):
         super().__init__(
             prob=prob,
             prob_var=prob_var,
             **kwargs,
         )
-        self.ss = searchspace
-
-    def _do(self, problem, X, **kwargs):
-        Xm = np.empty_like(X)
-        for i in range(X.shape[0]):
-            neighbors = self.ss.get_neighbors_indices_no_cache(tuple(X[i]), neighbor_method="Hamming")
-            # copy X[i] to result in case there are no neighbors
-            if len(neighbors) > 0:
-                Xm[i] = neighbors[np.random.choice(len(neighbors))]
-            else:
-                Xm[i] = X[i]
-        return Xm
+        self.searchspace = searchspace
+
+    def _do(
+        self,
+        problem: TuningProblem,
+        X: np.ndarray,
+        **kwargs,
+    ):
+        for ind_index in range(X.shape[0]):
+            params_config_tuple = tuple(X[ind_index])
+            neighbors_indices = self.searchspace.get_neighbors_indices_no_cache(params_config_tuple, neighbor_method="Hamming")
+            if len(neighbors_indices) > 0:
+                neighbor_index = neighbors_indices[np.random.choice(len(neighbors_indices))]
+                neighbor = self.searchspace.get_param_configs_at_indices([neighbor_index])[0]
+                X[ind_index] = np.array(neighbor)
+
+        return X
+
+
+class RepairConfig(Repair):
+
+    def _do(
+        self,
+        problem: TuningProblem,
+        X : np.ndarray,
+        **kwargs,
+    ) -> np.ndarray:
+        for ind_index in range(X.shape[0]):
+            params_config_tuple = tuple(X[ind_index])
+            if problem.searchspace.is_param_config_valid(params_config_tuple):
+               continue
+            for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
+                neighbors_indices = problem.searchspace.get_neighbors_indices_no_cache(params_config_tuple, neighbor_method)
+                if len(neighbors_indices) > 0:
+                    neighbor_index = neighbors_indices[np.random.choice(len(neighbors_indices))]
+                    neighbor = problem.searchspace.get_param_configs_at_indices([neighbor_index])[0]
+                    X[ind_index] = np.array(neighbor)
+                    break
+
+        return X
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 297538b30..3cced7390 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -79,8 +79,27 @@ def default(self, obj):
         return super(NpEncoder, self).default(obj)
 
 
+def get_result_cost(
+    result: dict,
+    objectives: list[str],
+    objective_higher_is_better: list[bool]
+) -> list[float]:
+    """Returns the cost of a result, taking the objective directions into account."""
+    # return the highest cost for invalid results
+    if 'error' in result:
+        return [sys.float_info.max] * len(objectives)
+
+    cost_vec = list()
+    for objective, is_maximizer in zip(objectives, objective_higher_is_better):
+        objective_value = result[objective]
+        cost = -objective_value if is_maximizer else objective_value
+        cost_vec.append(cost)
+
+    return cost_vec
+
+
 def check_result_type(r):
-    "Check if the result has the right format."
+    """Check if the result has the right format."""
     if 'error' in r:
         return isinstance(r['error'], ErrorConfig)
     return True
@@ -198,10 +217,20 @@ def check_argument_list(kernel_name, kernel_string, args):
 
 def check_stop_criterion(to):
     """Checks if max_fevals is reached or time limit is exceeded."""
-    if "max_fevals" in to and len(to.unique_results) >= to.max_fevals:
-        raise StopCriterionReached("max_fevals reached")
-    if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit):
-        raise StopCriterionReached("time limit exceeded")
+    if "max_fevals" in to:
+        if to.verbose:
+            print(f"Progress: {len(to.unique_results)/to.max_fevals}")
+        if len(to.unique_results) >= to.max_fevals:
+            raise StopCriterionReached("max_fevals reached")
+    if "time_limit" in to:
+        # if to.verbose:
+        #     print(f"Progress: {((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) / to.time_limit}")
+        # if (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit):
+        #     raise StopCriterionReached("time limit exceeded")
+        if to.verbose:
+            print(f"Progress: {((time.perf_counter() - to.start_time)) / to.time_limit}")
+        if (((time.perf_counter() - to.start_time)) > to.time_limit):
+            raise StopCriterionReached("time limit exceeded")
 
 
 def check_tune_params_list(tune_params, observers, simulation_mode=False):
@@ -405,35 +434,37 @@ def get_best_config(results, objective, objective_higher_is_better=False):
     return best_config
 
 
-def get_pareto_front(results, objective, objective_higher_is_better):
-    assert isinstance(objective, list)
+def get_pareto_results(
+    results: list[dict],
+    objectives: list[str],
+    objective_higher_is_better: list[bool],
+    mark_optima=True
+):
+    assert isinstance(results, list)
+    assert isinstance(objectives, list)
 
-    nonerror_results = list(filter(lambda x: "error" not in x, results))
-    front = []
+    cost_points = list()
+    for res in results:
+        cost_point = get_result_cost(res, objectives, objective_higher_is_better)
+        cost_points.append(cost_point)
+
+    cost_points = np.asarray(cost_points, dtype=float)
+    is_efficient = np.ones(cost_points.shape[0], dtype=bool)
+
+    # A point `p` in a finite set of points `S` is said to be non-dominated if there is no other point `q` in `S` where `q(i) <= p(i)` for all `i`
+    for idx, cost_point in enumerate(cost_points):
+        if not is_efficient[idx]:
+            continue
+        is_efficient[is_efficient] = np.any(cost_points[is_efficient] <= cost_point, axis=1)
+
+    # select and mark the optimal points
+    front = list()
+    for idx in np.flatnonzero(is_efficient):
+        res = results[idx]
+        if mark_optima:
+            res['optimal'] = True
+        front.append(res)
 
-    # A point `p` in a finite set of points `S` is said to be maximal or non-dominated if there is no other point `q` in `S` whose `q(i)` are all >= `p(i)`
-    # So for all q there must be a q(i) such that q(i) < p(i)
-    for p in nonerror_results:
-        p_nondom = True
-        for q in nonerror_results:
-            if p is q:
-                continue
-            # \forall(i): q(i) >= p(i)?
-            flag = True
-            for i, higher_is_better in zip(objective, objective_higher_is_better):
-                p_i, q_i = p[i], q[i]
-                if not higher_is_better:
-                    p_i, q_i = -p_i, -q_i
-                if q_i < p_i:
-                    flag = False
-                    break
-            if flag:
-                p_nondom = False
-                break
-        if p_nondom:
-            p["optimal"] = True
-            front.append(p)
-    
     return front
 
 
@@ -1197,7 +1228,8 @@ def process_cache(cache, kernel_options, tuning_options, runner):
 
     # if file exists
     else:
-        cached_data = read_cache(cache)
+        # cached_data = read_cache(cache)
+        cached_data = read_cache(cache, open_cache=(not runner.simulation_mode))
 
         # if in simulation mode, use the device name from the cache file as the runner device name
         if runner.simulation_mode:
@@ -1327,3 +1359,45 @@ def cuda_error_check(error):
         if error != nvrtc.nvrtcResult.NVRTC_SUCCESS:
             _, desc = nvrtc.nvrtcGetErrorString(error)
             raise RuntimeError(f"NVRTC error: {desc.decode()}")
+
+
+def restriction_from_cache(cache: dict):
+    param_config_string_set = set(
+        param_config_string
+        for param_config_string, result in cache['cache'].items()
+        if 'error' not in result
+    )
+
+    # print(f"WTH: {len(config_strings)}/{len(list(cache['cache'].keys()))}")
+
+    def _restrictions_func(params_config: dict) -> bool:
+        nonlocal param_config_string_set
+
+        param_config_string = ",".join(map(str, params_config.values()))
+        return param_config_string in param_config_string_set
+
+    return _restrictions_func
+
+
+def tune_args_from_cache_file(cache_file_path) -> dict:
+    with open(cache_file_path, mode="r") as cache_file:
+        cache = json.load(cache_file)
+
+    tune_args = dict(
+        kernel_name=cache['kernel_name'],
+        kernel_source="",
+        problem_size=tuple(cache['problem_size']),
+        arguments=[],
+        tune_params=cache['tune_params'],
+        restrictions=restriction_from_cache(cache),
+        cache=cache_file_path,
+    )
+
+    return tune_args
+
+
+def results_from_cache_file(cache_file_path) -> list[dict]:
+    with open(cache_file_path, mode="r") as cache_file:
+        cache = json.load(cache_file)
+
+    return list(cache['cache'].values())

From ee08aaf76412123613f088757c6978ef10d15f03 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:20:22 +0100
Subject: [PATCH 06/18] Replace the error key "error" with "__error__" to make
 collision with user defined params and metrics far less likely.

---
 kernel_tuner/core.py               |  8 ++++----
 kernel_tuner/file_utils.py         |  2 +-
 kernel_tuner/runners/sequential.py |  4 ++--
 kernel_tuner/strategies/common.py  |  3 +--
 kernel_tuner/util.py               | 10 +++++-----
 5 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 594ea3b78..6fb478ce2 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -481,14 +481,14 @@ def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_sett
                         f"skipping config {util.get_instance_string(instance.params)} reason: too many resources requested for launch"
                     )
                 # result[objective] = util.RuntimeFailedConfig()
-                result['error'] = util.RuntimeFailedConfig()
+                result['__error__'] = util.RuntimeFailedConfig()
             else:
                 logging.debug("benchmark encountered runtime failure: " + str(e))
                 print("Error while benchmarking:", instance.name)
                 raise e
 
         assert util.check_result_type(result), "The error in a result MUST be an actual error."
-        
+
         return result
 
     def check_kernel_output(
@@ -576,7 +576,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
         instance = self.create_kernel_instance(kernel_source, kernel_options, params, verbose)
         if isinstance(instance, util.ErrorConfig):
             # result[to.objective] = util.InvalidConfig()
-            result['error'] = util.InvalidConfig()
+            result['__error__'] = util.InvalidConfig()
         else:
             # Preprocess the argument list. This is required to deal with `MixedPrecisionArray`s
             gpu_args = _preprocess_gpu_arguments(gpu_args, params)
@@ -587,7 +587,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
                 func = self.compile_kernel(instance, verbose)
                 if not func:
                     # result[to.objective] = util.CompilationFailedConfig()
-                    result['error'] = util.CompilationFailedConfig()
+                    result['__error__'] = util.CompilationFailedConfig()
                 else:
                     # add shared memory arguments to compiled module
                     if kernel_options.smem_args is not None:
diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py
index fb53d5956..9d7b7042c 100644
--- a/kernel_tuner/file_utils.py
+++ b/kernel_tuner/file_utils.py
@@ -111,7 +111,7 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim
 
         # encode the validity of the configuration
         # out["invalidity"] = get_configuration_validity(result[objective])
-        out["invalidity"] = get_configuration_validity(result['error'])
+        out["invalidity"] = get_configuration_validity(result['__error__'])
 
         # Kernel Tuner does not support producing results of configs that fail the correctness check
         # therefore correctness is always 1
diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index 95bfff500..5a6f1a5a3 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -95,12 +95,12 @@ def run(self, parameter_space, tuning_options):
                 params.update(result)
 
                 # if tuning_options.objective in result and isinstance(result[tuning_options.objective], ErrorConfig):
-                if 'error' in result:
+                if '__error__' in result:
                     logging.debug('kernel configuration was skipped silently due to compile or runtime failure')
 
             # only compute metrics on configs that have not errored
             # if tuning_options.metrics and not isinstance(params.get(tuning_options.objective), ErrorConfig):
-            if 'error' in params:
+            if '__error__' in params:
                 params = process_metrics(params, tuning_options.metrics)
 
             # get the framework time by estimating based on other times
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index e47f1edb0..4dc3a1bd4 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -96,8 +96,7 @@ def __call__(self, x, check_restrictions=True):
             legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose)
             if not legal:
                 result = params_dict
-                # result[self.tuning_options.objective] = util.InvalidConfig()
-                result['error'] = util.InvalidConfig()
+                result['__error__'] = util.InvalidConfig()
                 self.illegal_config_count += 1
 
         if legal:
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 3cced7390..b1d5380fe 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -86,7 +86,7 @@ def get_result_cost(
 ) -> list[float]:
     """Returns the cost of a result, taking the objective directions into account."""
     # return the highest cost for invalid results
-    if 'error' in result:
+    if '__error__' in result:
         return [sys.float_info.max] * len(objectives)
 
     cost_vec = list()
@@ -100,8 +100,8 @@ def get_result_cost(
 
 def check_result_type(r):
     """Check if the result has the right format."""
-    if 'error' in r:
-        return isinstance(r['error'], ErrorConfig)
+    if '__error__' in r:
+        return isinstance(r['__error__'], ErrorConfig)
     return True
 
 
@@ -429,7 +429,7 @@ def get_best_config(results, objective, objective_higher_is_better=False):
     ignore_val = sys.float_info.max if not objective_higher_is_better else -sys.float_info.max
     best_config = func(
         results,
-        key=lambda x: x[objective] if 'error' not in x and isinstance(x[objective], float) else ignore_val,
+        key=lambda x: x[objective] if '__error__' not in x and isinstance(x[objective], float) else ignore_val,
     )
     return best_config
 
@@ -1365,7 +1365,7 @@ def restriction_from_cache(cache: dict):
     param_config_string_set = set(
         param_config_string
         for param_config_string, result in cache['cache'].items()
-        if 'error' not in result
+        if '__error__' not in result
     )
 
     # print(f"WTH: {len(config_strings)}/{len(list(cache['cache'].keys()))}")

From 5022750e5a1f0072d44246c0d6ebff1554a0580c Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:21:32 +0100
Subject: [PATCH 07/18] New pymoo version needs at least Python 3.10

---
 noxfile.py     | 6 +++---
 pyproject.toml | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index e32bbb588..fe26ef1d4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -15,7 +15,7 @@
 
 # set the test parameters
 verbose = False
-python_versions_to_test = ["3.9", "3.10", "3.11", "3.12"]
+python_versions_to_test = ["3.10", "3.11", "3.12"]
 nox.options.stop_on_first_error = True
 nox.options.error_on_missing_interpreters = True
 nox.options.default_venv_backend = 'virtualenv'
@@ -38,7 +38,7 @@ def create_settings(session: Session) -> None:
         venvbackend = nox.options.default_venv_backend
         envdir = ""
         # conversion from old notenv.txt
-        if noxenv_file_path.exists(): 
+        if noxenv_file_path.exists():
             venvbackend = noxenv_file_path.read_text().strip()
             noxenv_file_path.unlink()
         # write the settings
@@ -91,7 +91,7 @@ def check_development_environment(session: Session) -> None:
         # packages = re.findall(r"• Installing .* | • Updating .*", output, flags=re.MULTILINE)
         # assert packages is not None
         session.warn(f"""
-            Your development environment is out of date ({installs} installs, {updates} updates). 
+            Your development environment is out of date ({installs} installs, {updates} updates).
             Update with 'poetry install --sync', using '--with' and '-E' for optional dependencies, extras respectively.
             Note: {removals} packages are not in the specification (i.e. installed manually) and may be removed.
             To preview changes, run 'poetry install --sync --dry-run' (with optional dependencies and extras).""")
diff --git a/pyproject.toml b/pyproject.toml
index 48034bf15..2791b929c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,7 @@ generate-setup-file = false
 
 # ATTENTION: if anything is changed here, run `poetry update`
 [tool.poetry.dependencies]
-python = ">=3.9,<3.13"          # NOTE when changing the supported Python versions, also change the test versions in the noxfile
+python = ">=3.10,<3.13"          # NOTE when changing the supported Python versions, also change the test versions in the noxfile
 numpy = "^1.26.0"              # Python 3.12 requires numpy at least 1.26
 scipy = ">=1.11.0"
 packaging = "*"                 # required by file_utils
@@ -84,6 +84,7 @@ hip-python = { version = "*", optional = true }
 # Tutorial (for the notebooks used in the examples)
 jupyter = { version = "^1.0.0", optional = true }
 matplotlib = { version = "^3.5.0", optional = true }
+pymoo = "^0.6.1.6"
 
 [tool.poetry.extras]
 cuda = ["pycuda", "nvidia-ml-py", "pynvml"]

From cd3f16afde81ee913e22242e0ef56c0220f6ef1f Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:20:40 +0100
Subject: [PATCH 08/18] Use pymoo's impl of non dom sorting

---
 kernel_tuner/util.py | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index b1d5380fe..06328b730 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -43,6 +43,8 @@
 
 from kernel_tuner.observers.nvml import NVMLObserver
 
+from pymoo.util.nds.find_non_dominated import find_non_dominated
+
 # number of special values to insert when a configuration cannot be measured
 
 
@@ -443,29 +445,24 @@ def get_pareto_results(
     assert isinstance(results, list)
     assert isinstance(objectives, list)
 
-    cost_points = list()
-    for res in results:
-        cost_point = get_result_cost(res, objectives, objective_higher_is_better)
-        cost_points.append(cost_point)
-
-    cost_points = np.asarray(cost_points, dtype=float)
-    is_efficient = np.ones(cost_points.shape[0], dtype=bool)
-
-    # A point `p` in a finite set of points `S` is said to be non-dominated if there is no other point `q` in `S` where `q(i) <= p(i)` for all `i`
-    for idx, cost_point in enumerate(cost_points):
-        if not is_efficient[idx]:
+    n_rows = len(results)
+    n_cols = len(objectives)
+    Y = np.empty((n_rows, n_cols), dtype=float)
+    for row_idx, result in enumerate(results):
+        if "__error__" in result:
+            Y[row_idx, :] = sys.float_info.max
             continue
-        is_efficient[is_efficient] = np.any(cost_points[is_efficient] <= cost_point, axis=1)
-
-    # select and mark the optimal points
-    front = list()
-    for idx in np.flatnonzero(is_efficient):
-        res = results[idx]
-        if mark_optima:
-            res['optimal'] = True
-        front.append(res)
-
-    return front
+        for col_idx, (objective_name, higher_is_better) in enumerate(zip(objectives, objective_higher_is_better)):
+            y = result[objective_name]
+            # negate for maximizers to optimize through minimization
+            Y[row_idx, col_idx] = -y if higher_is_better else y
+
+    pf_indices = find_non_dominated(Y)
+    pf = [results[idx] for idx in pf_indices]
+    if mark_optima:
+        for p in pf:
+            p["optimal"] = True
+    return pf
 
 
 def get_config_string(params, keys=None, units=None):

From e8cbcda6f83e1070a9e2014640fcd57531125484 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:20:45 +0100
Subject: [PATCH 09/18] - update tune_cache() interface - improve inferred
 restrictions - improve check_restriction() to make it less inefficient for  
 `FunctionConstraint`s

---
 kernel_tuner/interface.py | 20 +++++++++------
 kernel_tuner/util.py      | 53 +++++++++++++++++----------------------
 2 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 0c0282ad0..0a78bc051 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -744,21 +744,25 @@ def tune_kernel(
 tune_kernel.__doc__ = _tune_kernel_docstring
 
 
-def tune_cache(
-    cache,
+def tune_cache(*,
+    cache_path,
     restrictions = None,
     **kwargs,
 ):
-    tune_args = util.tune_args_from_cache_file(cache)
+    cache = util.read_cache(cache_path, open_cache=False)
+    tune_args = util.infer_args_from_cache(cache)
+    _restrictions = [util.infer_restrictions_from_cache(cache)]
+
+    # Add the user provided restrictions
     if restrictions:
-        new_restrictions = [tune_args['restrictions']]
         if isinstance(restrictions, list):
-            new_restrictions.extend(restrictions)
+            _restrictions.extend(restrictions)
         else:
-            new_restrictions.append(restrictions)
-        tune_args['restrictions'] = new_restrictions
+            raise ValueError("The restrictions must be a list()")
+
     tune_args.update(kwargs)
-    return tune_kernel(simulation_mode=True, **tune_args)
+
+    return tune_kernel(**tune_args, cache=cache_path, restrictions=_restrictions, simulation_mode=True)
 
 
 _run_kernel_docstring = """Compile and run a single kernel
diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 06328b730..5d872ce76 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -282,8 +282,11 @@ def check_block_size_params_names_list(block_size_names, tune_params):
 
 def check_restriction(restrict, params: dict) -> bool:
     """Check whether a configuration meets a search space restriction."""
+    # if it's a function python-constraint it can be called directly
+    if isinstance(restrict, FunctionConstraint):
+        return restrict._func(*params.values())
     # if it's a python-constraint, convert to function and execute
-    if isinstance(restrict, Constraint):
+    elif isinstance(restrict, Constraint):
         restrict = convert_constraint_restriction(restrict)
         return restrict(list(params.values()))
     # if it's a string, fill in the parameters and evaluate
@@ -1358,43 +1361,33 @@ def cuda_error_check(error):
             raise RuntimeError(f"NVRTC error: {desc.decode()}")
 
 
-def restriction_from_cache(cache: dict):
-    param_config_string_set = set(
-        param_config_string
-        for param_config_string, result in cache['cache'].items()
+def infer_restrictions_from_cache(cache: dict):
+    param_names = cache["tune_params_keys"]
+    valid_param_config_set = set(
+        tuple(result[param_name] for param_name in param_names)
+        for result in cache['cache'].values()
         if '__error__' not in result
     )
 
-    # print(f"WTH: {len(config_strings)}/{len(list(cache['cache'].keys()))}")
-
-    def _restrictions_func(params_config: dict) -> bool:
-        nonlocal param_config_string_set
-
-        param_config_string = ",".join(map(str, params_config.values()))
-        return param_config_string in param_config_string_set
+    def restrictions_func(*param_values) -> bool:
+        nonlocal valid_param_config_set
+        return param_values in valid_param_config_set
 
-    return _restrictions_func
+    return FunctionConstraint(restrictions_func)
 
 
-def tune_args_from_cache_file(cache_file_path) -> dict:
-    with open(cache_file_path, mode="r") as cache_file:
-        cache = json.load(cache_file)
-
-    tune_args = dict(
-        kernel_name=cache['kernel_name'],
-        kernel_source="",
-        problem_size=tuple(cache['problem_size']),
-        arguments=[],
-        tune_params=cache['tune_params'],
-        restrictions=restriction_from_cache(cache),
-        cache=cache_file_path,
+def infer_args_from_cache(cache: dict) -> dict:
+    inferred_args = dict(
+        kernel_name = cache['kernel_name'],
+        kernel_source = "",
+        problem_size = tuple(cache['problem_size']),
+        arguments = [],
+        tune_params = cache['tune_params'],
+        # restrictions = infer_restrictions_from_cache(cache),
     )
 
-    return tune_args
-
+    return inferred_args
 
-def results_from_cache_file(cache_file_path) -> list[dict]:
-    with open(cache_file_path, mode="r") as cache_file:
-        cache = json.load(cache_file)
 
+def get_results_from_cache(cache) -> list[dict]:
     return list(cache['cache'].values())

From 1fa76b2368f1e9a2c6ba015afb9f23212db20d48 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:24:38 +0100
Subject: [PATCH 10/18] fixed and improved version of the pymoo strat

---
 kernel_tuner/interface.py                 |   5 +-
 kernel_tuner/strategies/pymoo_minimize.py | 230 +++++++++++-----------
 2 files changed, 117 insertions(+), 118 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 0a78bc051..42a6577e2 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -76,7 +76,8 @@
     "simulated_annealing": simulated_annealing,
     "firefly_algorithm": firefly_algorithm,
     "bayes_opt": bayes_opt,
-    "pymoo_minimize": pymoo_minimize,
+    "nsga2": pymoo_minimize,
+    "nsga3": pymoo_minimize,
 }
 
 
@@ -466,6 +467,7 @@ def __deepcopy__(self, _):
         ("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
         ("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
         ("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
+        ("seed", ("""The random seed.""", "int")),
     ]
 )
 
@@ -580,6 +582,7 @@ def tune_kernel(
     objective=None,
     objective_higher_is_better=None,
     objectives=None,
+    seed=None,
 ):
     start_overhead_time = perf_counter()
     if log:
diff --git a/kernel_tuner/strategies/pymoo_minimize.py b/kernel_tuner/strategies/pymoo_minimize.py
index 25060c04c..0ac530941 100644
--- a/kernel_tuner/strategies/pymoo_minimize.py
+++ b/kernel_tuner/strategies/pymoo_minimize.py
@@ -1,92 +1,112 @@
 """The Pymoo strategy that uses a minimizer method for searching through the parameter space."""
 
+from typing import assert_never
 import numpy as np
 
 from pymoo.algorithms.moo.nsga2 import NSGA2
 from pymoo.algorithms.moo.nsga3 import NSGA3
-from pymoo.core.mutation import Mutation
+from pymoo.core.algorithm import Algorithm
 from pymoo.core.problem import ElementwiseProblem
-from pymoo.core.sampling import Sampling
+from pymoo.core.duplicate import ElementwiseDuplicateElimination
 from pymoo.core.termination import NoTermination, Termination
+from pymoo.core.sampling import Sampling
+from pymoo.core.mutation import Mutation
 from pymoo.core.repair import Repair
-from pymoo.operators.crossover.ux import UniformCrossover
 from pymoo.operators.crossover.pntx import TwoPointCrossover
-from pymoo.optimize import minimize
-from pymoo.util.ref_dirs import get_reference_directions
-from pymoo.indicators.igd import IGD
 
 from kernel_tuner import util
+from kernel_tuner.runners.runner import Runner
 from kernel_tuner.searchspace import Searchspace
 from kernel_tuner.strategies.common import (
     CostFunc,
     get_strategy_docstring,
-    setup_method_arguments,
 )
 
-# TODO: Add the PyMOO algorithms
-supported_methods = [
-    "NSGA2",
-    "NSGA3",
+from enum import StrEnum
+
+class SupportedAlgos(StrEnum):
+    NSGA2 = "nsga2"
+    NSGA3 = "nsga3"
+
+supported_algos = [ algo.value for algo in SupportedAlgos ]
+
+supported_crossover_opers = [
+    # "uniform-crossover",
+    # "single-point-crossover",
+    "two-point-crossover",
 ]
 
 _options = {
-    "method": (f"Pymoo optimization algorithm to use, choose any from {supported_methods}", "NSGA2"),
-    "pop_size": ("Initial population size", 100),
+    "pop_size": ("Initial population size", 20),
+    "crossover_operator": ("The crossover operator", "two-point-crossover"),
+    "crossover_prob": ("Crossover probability", 1.0),
+    "mutation_prob": ("Mutation probability", 0.1),
+    "ref_dirs_list": ("The list of reference directions on the unit hyperplane in the objective space to guide NSGA-III, see https://pymoo.org/misc/reference_directions.html for more information.", []),
 }
 
+_option_defaults = { key: option_pair[1] for key, option_pair in _options.items() }
+
 
 def tune(
     searchspace: Searchspace,
-    runner,
+    runner: Runner,
     tuning_options,
 ):
+    algo_name: str = tuning_options.strategy
     strategy_options = tuning_options.strategy_options
 
-    if "method" in strategy_options:
-        method = strategy_options["method"]
+    algo_name = algo_name.lower()
+    if algo_name not in SupportedAlgos:
+        raise ValueError(f"\"{algo_name}\" is not supported. The supported algorithms are: {supported_algos}\n")
     else:
-        (_, method) = _options["method"]
-    print(f"{method=}")
+        algo_name = SupportedAlgos(algo_name)
 
-    if "pop_size" in strategy_options:
-        pop_size = strategy_options["pop_size"]
-    else:
-        (_, pop_size) = _options["pop_size"]
-    print(f"{pop_size=}")
+    pop_size = strategy_options.get("pop_size", _option_defaults["pop_size"])
+    crossover_prob = strategy_options.get("crossover_prob", _option_defaults["crossover_prob"])
+    mutation_prob = strategy_options.get("mutation_prob", _option_defaults["mutation_prob"])
+    ref_dirs_list = strategy_options.get("ref_dirs_list", _option_defaults["ref_dirs_list"])
 
-    # scale variables in x to make 'eps' relevant for multiple variables
-    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
+    if algo_name == "nsga3" and len(ref_dirs_list) == 0:
+        raise ValueError("NSGA-III requires reference directions to be specified, but they are missing.")
 
-    bounds, x0, _ = cost_func.get_bounds_x0_eps()
-    kwargs = setup_method_arguments(method, bounds)
+    cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False)
 
     problem = TuningProblem(
-        cost_func=cost_func,
-        n_var=len(tuning_options.tune_params),
-        n_obj=len(tuning_options.objective),
+        cost_func = cost_func,
+        n_var = len(tuning_options.tune_params),
+        n_obj = len(tuning_options.objective),
     )
 
+    sampling = TuningSearchspaceRandomSampling(searchspace)
+    crossover = TwoPointCrossover(prob = crossover_prob)
+    mutation = TuningParamConfigNeighborhoodMutation(prob = mutation_prob, searchspace = searchspace)
+    repair = TuningParamConfigRepair()
+    eliminate_duplicates = TuningParamConfigDuplicateElimination()
+
     # algorithm_type = get_algorithm(method)
-    algorithm = None
-    if method == "NSGA2":
-        algorithm = NSGA2(
-            pop_size = pop_size,
-            sampling = SearchspaceRandomSampling(searchspace),
-            crossover = TwoPointCrossover(),
-            mutation = MutateToNeighbor(searchspace, prob = 0.5),
-            repair = RepairConfig(),
-            # save_history = True,
-        )
-    elif method == "NSGA3":
-        algorithm = NSGA3(
-            pop_size = pop_size,
-            ref_dirs = get_reference_directions("das-dennis", len(tuning_options.objective), n_partitions = 26),
-            sampling = SearchspaceRandomSampling(searchspace),
-            crossover = UniformCrossover(prob = 0.6),
-            mutation = MutateToNeighbor(searchspace, prob = 0.5),
-            # repair = MyRepair(),
-            # save_history = True,
-        )
+    algo: Algorithm
+    match algo_name:
+        case SupportedAlgos.NSGA2:
+            algo = NSGA2(
+                pop_size = pop_size,
+                sampling = sampling,
+                crossover = crossover,
+                mutation = mutation,
+                repair = repair,
+                eliminate_duplicates = eliminate_duplicates,
+            )
+        case SupportedAlgos.NSGA3:
+            algo = NSGA3(
+                pop_size = pop_size,
+                ref_dirs = ref_dirs_list,
+                sampling = sampling,
+                crossover = crossover,
+                mutation = mutation,
+                repair = repair,
+                eliminate_duplicates = eliminate_duplicates,
+            )
+        case _ as unreachable:
+            assert_never(unreachable)
 
     # TODO:
     # - CostFunc throws exception when done, so isn't really needed
@@ -94,37 +114,28 @@ def tune(
     if "max_fevals" in tuning_options.strategy_options or "time_limit" in tuning_options.strategy_options:
         termination = NoTermination()
 
-    pf = problem.pareto_front()
-    igd_ind = IGD(pf, zero_to_one=True)
-
     try:
-        _ = algorithm.setup(
+        algo.setup(
             problem,
-            # termination = termination,
-            termination=("n_gen", 20),
-            seed=1,
-            verbose=True,
+            termination = termination,
+            verbose = tuning_options.verbose,
+            progress = tuning_options.verbose,
+            seed = tuning_options.seed,
         )
 
-        while algorithm.has_next():
-            algorithm.next()
-
-            illegal_count = cost_func.illegal_config_count
-            total_count = cost_func.total_config_count
-            print(f"config valid: {total_count - illegal_count}/{total_count} ({100 * (1 - (illegal_count / total_count)):.4}%)")
-
-            print("IGD: ", igd_ind(algorithm.opt.get("F")))
+        while algo.has_next():
+            algo.next()
 
     except util.StopCriterionReached as e:
         if tuning_options.verbose:
             print(f"Stopped because of {e}")
 
-    opt_result = cost_func.results
+    results = cost_func.results
 
-    if opt_result and tuning_options.verbose:
-        print(f"{opt_result.message=}")
+    if results and tuning_options.verbose:
+        print(f"{results.message=}")
 
-    return opt_result
+    return results
 
 
 tune.__doc__ = get_strategy_docstring("Pymoo minimize", _options)
@@ -134,8 +145,8 @@ class TuningProblem(ElementwiseProblem):
     def __init__(
         self,
         cost_func: CostFunc,
-        n_var,
-        n_obj,
+        n_var: int,
+        n_obj: int,
         **kwargs,
     ):
         super().__init__(
@@ -147,21 +158,12 @@ def __init__(
         self.searchspace = cost_func.searchspace
         self.tuning_options = cost_func.tuning_options
 
-    def _evaluate(
-        self,
-        x,
-        out,
-        *args,
-        **kwargs,
-    ):
-        F = self.cost_func(x)
+    def _evaluate( self, x, out, *args, **kwargs, ):
+        # A copy of `x` is made to make sure sharing does not happen
+        F = self.cost_func(tuple(x))
         out["F"] = F
 
-    def _calc_pareto_front(
-        self,
-        *args,
-        **kwargs
-    ) -> np.ndarray | None:
+    def _calc_pareto_front( self, *args, **kwargs, ):
         # Can only compute the pareto front if we are in simulation mode.
         if not self.tuning_options.simulation_mode:
             return None
@@ -179,14 +181,11 @@ def _calc_pareto_front(
             cost = util.get_result_cost(res, objectives, higher_is_better)
             pareto_front_list.append(cost)
 
-        return np.array(pareto_front_list)
+        return np.array(pareto_front_list, dtype=float)
 
 
 class TuningTermination(Termination):
-    def __init__(
-        self,
-        tuning_options,
-    ):
+    def __init__( self, tuning_options, ):
         super().__init__()
         self.tuning_options = tuning_options
         self.reason = None
@@ -205,35 +204,26 @@ def _update(
             return 1.0
 
 
-class SearchspaceRandomSampling(Sampling):
-    def __init__(
-        self,
-        searchspace,
-    ):
+class TuningSearchspaceRandomSampling(Sampling):
+    def __init__( self, searchspace, ):
         super().__init__()
         self.searchspace = searchspace
 
-    def _do(
-        self,
-        problem,
-        n_samples: int,
-        **kwargs,
-    ):
-        X = self.searchspace.get_random_sample(n_samples)
-        return X
+    def _do( self, problem, n_samples: int, **kwargs, ):
+        sample = self.searchspace.get_random_sample(n_samples)
+        return np.array(sample, dtype=object)
 
 
-class MutateToNeighbor(Mutation):
+class TuningParamConfigNeighborhoodMutation(Mutation):
     def __init__(
         self,
+        prob,
         searchspace: Searchspace,
-        prob=1.0,
-        prob_var=None,
         **kwargs
     ):
         super().__init__(
-            prob=prob,
-            prob_var=prob_var,
+            prob = prob,
+            # prob_var = None,
             **kwargs,
         )
         self.searchspace = searchspace
@@ -244,27 +234,27 @@ def _do(
         X: np.ndarray,
         **kwargs,
     ):
-        for ind_index in range(X.shape[0]):
-            params_config_tuple = tuple(X[ind_index])
+        for X_index in range(X.shape[0]):
+            params_config_tuple = tuple(X[X_index])
             neighbors_indices = self.searchspace.get_neighbors_indices_no_cache(params_config_tuple, neighbor_method="Hamming")
             if len(neighbors_indices) > 0:
                 neighbor_index = neighbors_indices[np.random.choice(len(neighbors_indices))]
                 neighbor = self.searchspace.get_param_configs_at_indices([neighbor_index])[0]
-                X[ind_index] = np.array(neighbor)
+                X[X_index] = np.array(neighbor, dtype=object)
 
         return X
 
 
-class RepairConfig(Repair):
+class TuningParamConfigRepair(Repair):
 
     def _do(
         self,
         problem: TuningProblem,
-        X : np.ndarray,
+        X: np.ndarray,
         **kwargs,
-    ) -> np.ndarray:
-        for ind_index in range(X.shape[0]):
-            params_config_tuple = tuple(X[ind_index])
+    ):
+        for X_index in range(X.shape[0]):
+            params_config_tuple = tuple(X[X_index])
             if problem.searchspace.is_param_config_valid(params_config_tuple):
                continue
             for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]:
@@ -272,7 +262,13 @@ def _do(
                 if len(neighbors_indices) > 0:
                     neighbor_index = neighbors_indices[np.random.choice(len(neighbors_indices))]
                     neighbor = problem.searchspace.get_param_configs_at_indices([neighbor_index])[0]
-                    X[ind_index] = np.array(neighbor)
+                    X[X_index] = np.array(neighbor, dtype=object)
                     break
 
         return X
+
+
+class TuningParamConfigDuplicateElimination(ElementwiseDuplicateElimination):
+
+    def is_equal(self, a, b):
+        return np.all(a.X == b.X)

From 393a79bf33fe9bc808cdabb9a5d5e14fe7864f73 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:25:47 +0100
Subject: [PATCH 11/18] improves infeasable evals counter

---
 kernel_tuner/runners/sequential.py | 11 +++++++++--
 kernel_tuner/runners/simulation.py | 10 ++++++----
 kernel_tuner/strategies/common.py  | 11 ++++-------
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index 5a6f1a5a3..79d11f889 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -44,8 +44,15 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         #move data to the GPU
         self.gpu_args = self.dev.ready_argument_list(kernel_options.arguments)
 
+        # It is the task of the cost function to increment there counters
+        self.config_eval_count = 0
+        self.infeasable_config_eval_count = 0
+
     def get_environment(self, tuning_options):
-        return self.dev.get_environment()
+        env = self.dev.get_environment()
+        env["config_eval_count"] = self.config_eval_count
+        env["infeasable_config_eval_count"] = self.infeasable_config_eval_count
+        return env
 
     def run(self, parameter_space, tuning_options):
         """Iterate through the entire parameter space using a single Python process.
@@ -104,7 +111,7 @@ def run(self, parameter_space, tuning_options):
                 params = process_metrics(params, tuning_options.metrics)
 
             # get the framework time by estimating based on other times
-            total_time = 1000 * ((perf_counter() - self.start_time) - warmup_time) 
+            total_time = 1000 * ((perf_counter() - self.start_time) - warmup_time)
             params['strategy_time'] = self.last_strategy_time
             params['framework_time'] = max(total_time - (params['compile_time'] + params['verification_time'] + params['benchmark_time'] + params['strategy_time']), 0)
             params['timestamp'] = str(datetime.now(timezone.utc))
diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py
index 1ede287dd..cd181288a 100644
--- a/kernel_tuner/runners/simulation.py
+++ b/kernel_tuner/runners/simulation.py
@@ -59,10 +59,16 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
         self.last_strategy_time = 0
         self.units = {}
 
+        # It is the task of the cost function to increment there counters
+        self.config_eval_count = 0
+        self.infeasable_config_eval_count = 0
+
     def get_environment(self, tuning_options):
         env = self.dev.get_environment()
         env["simulation"] = True
         env["simulated_time"] = tuning_options.simulated_time
+        env["config_eval_count"] = self.config_eval_count
+        env["infeasable_config_eval_count"] = self.infeasable_config_eval_count
         return env
 
     def run(self, parameter_space, tuning_options):
@@ -91,8 +97,6 @@ def run(self, parameter_space, tuning_options):
             if tuning_options.cache and x_int in tuning_options.cache:
                 result = tuning_options.cache[x_int].copy()
 
-                assert util.check_result_type(result)
-
                 # Simulate behavior of sequential runner that when a configuration is
                 # served from the cache by the sequential runner, the compile_time,
                 # verification_time, and benchmark_time are set to 0.
@@ -127,8 +131,6 @@ def run(self, parameter_space, tuning_options):
                 self.start_time = perf_counter()
                 result['framework_time'] = total_time - self.last_strategy_time
 
-                assert util.check_result_type(result)
-
                 results.append(result)
                 continue
 
diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py
index 4dc3a1bd4..1901476f2 100644
--- a/kernel_tuner/strategies/common.py
+++ b/kernel_tuner/strategies/common.py
@@ -60,15 +60,11 @@ def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=
         self.scaling = scaling
         self.searchspace = searchspace
         self.results = []
-        self.total_config_count = 0
-        self.illegal_config_count = 0
 
     def __call__(self, x, check_restrictions=True):
         """Cost function used by almost all strategies."""
         self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time)
 
-        self.total_config_count += 1
-
         # error value to return for numeric optimizers that need a numerical value
         logging.debug('_cost_func called')
         logging.debug('x: ' + str(x))
@@ -76,6 +72,8 @@ def __call__(self, x, check_restrictions=True):
         # check if max_fevals is reached or time limit is exceeded
         util.check_stop_criterion(self.tuning_options)
 
+        self.runner.config_eval_count += 1
+
         # snap values in x to nearest actual value for each parameter, unscale x if needed
         if self.snap:
             if self.scaling:
@@ -97,11 +95,10 @@ def __call__(self, x, check_restrictions=True):
             if not legal:
                 result = params_dict
                 result['__error__'] = util.InvalidConfig()
-                self.illegal_config_count += 1
+                self.runner.infeasable_config_eval_count += 1
 
         if legal:
-            assert ('error' not in result), "A legal config MUST NOT have an error result."
-            if 'error' in result: exit()
+            assert ('__error__' not in result), "A legal config MUST NOT have an error result."
 
             # compile and benchmark this instance
             res = self.runner.run([params], self.tuning_options)

From e576048ceae09def97e202f98cc3258ab2964309 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Sun, 4 Jan 2026 01:28:58 +0100
Subject: [PATCH 12/18] Removes superfluous print

---
 kernel_tuner/interface.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index 42a6577e2..a158edc19 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -708,7 +708,6 @@ def tune_kernel(
             objective = objective[0]
             objective_higher_is_better = objective_higher_is_better[0]
             best_config = util.get_best_config(results, objective, objective_higher_is_better)
-            print(best_config)
             # add the best configuration to env
             env['best_config'] = best_config
             if not device_options.quiet:

From 86f5ea7e54ab6053d75d2f21c950de1f319b81f5 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 21 Jan 2026 19:57:42 +0100
Subject: [PATCH 13/18] removes unused func

---
 kernel_tuner/util.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py
index 5d872ce76..fc2c941ed 100644
--- a/kernel_tuner/util.py
+++ b/kernel_tuner/util.py
@@ -1387,7 +1387,3 @@ def infer_args_from_cache(cache: dict) -> dict:
     )
 
     return inferred_args
-
-
-def get_results_from_cache(cache) -> list[dict]:
-    return list(cache['cache'].values())

From f6c795b26ddabd2efb3f44f26b7e58b07d115a24 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 21 Jan 2026 21:09:38 +0100
Subject: [PATCH 14/18] improves `tune_kernel()` code

---
 kernel_tuner/interface.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py
index a158edc19..c13a3d0a1 100644
--- a/kernel_tuner/interface.py
+++ b/kernel_tuner/interface.py
@@ -592,23 +592,19 @@ def tune_kernel(
 
     _check_user_input(kernel_name, kernelsource, arguments, block_size_names)
 
-    # default objective if none is specified
-    # if len(list(objective)) == 1:
-    #     objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
-
-    if isinstance(objective, str):
-        objective = [objective]
-
-    if isinstance(objective_higher_is_better, bool):
-        objective_higher_is_better = [objective_higher_is_better]
-
     if objectives:
         if isinstance(objectives, dict):
             objective = list(objectives.keys())
             objective_higher_is_better = list(objectives.values())
         else:
             raise ValueError("objectives should be a dict of (objective, higher_is_better) pairs")
+    else:
+        objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
+        objective = [objective]
+        objective_higher_is_better = [objective_higher_is_better]
 
+    assert isinstance(objective, list)
+    assert isinstance(objective_higher_is_better, list)
     assert len(list(objective)) == len(list(objective_higher_is_better))
 
     # check for forbidden names in tune parameters

From 0dba06b65eabbd222b16125888d25486bda9f9f9 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 21 Jan 2026 21:09:55 +0100
Subject: [PATCH 15/18] makes tests compat with changes

---
 test/strategies/test_common.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/strategies/test_common.py b/test/strategies/test_common.py
index 29ead8615..945290494 100644
--- a/test/strategies/test_common.py
+++ b/test/strategies/test_common.py
@@ -19,6 +19,8 @@ def fake_runner():
     runner = Mock()
     runner.last_strategy_start_time = perf_counter()
     runner.run.return_value = [fake_result]
+    runner.config_eval_count = 0
+    runner.infeasable_config_eval_count = 0
     return runner
 
 
@@ -29,7 +31,7 @@ def test_cost_func():
     x = [1, 4]
     tuning_options = Options(scaling=False, snap=False, tune_params=tune_params,
                              restrictions=None, strategy_options={}, cache={}, unique_results={},
-                             objective="time", objective_higher_is_better=False, metrics=None)
+                             objective=["time"], objective_higher_is_better=[False], metrics=None)
     runner = fake_runner()
 
     time = CostFunc(Searchspace(tune_params, None, 1024), tuning_options, runner)(x)
@@ -41,7 +43,7 @@ def restrictions(_):
     tuning_options = Options(scaling=False, snap=False, tune_params=tune_params,
                              restrictions=restrictions, strategy_options={},
                              verbose=True, cache={}, unique_results={},
-                             objective="time", objective_higher_is_better=False, metrics=None)
+                             objective=["time"], objective_higher_is_better=[False], metrics=None)
     time = CostFunc(Searchspace(tune_params, restrictions, 1024), tuning_options, runner)(x)
     assert time == sys.float_info.max
 

From c0a20b3b9b3466231d6916c050e2f5fd133a7bd4 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 21 Jan 2026 21:22:02 +0100
Subject: [PATCH 16/18] fix import issue

---
 kernel_tuner/runners/sequential.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index 79d11f889..2a8481973 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -5,6 +5,7 @@
 
 from kernel_tuner.core import DeviceInterface
 from kernel_tuner.runners.runner import Runner
+import kernel_tuner.util as util
 from kernel_tuner.util import ErrorConfig, print_config_output, process_metrics, store_cache
 
 

From 0efc4bcd7b84e6b17272f04a78a01f064955c685 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 21 Jan 2026 21:25:57 +0100
Subject: [PATCH 17/18] fix seq runner

---
 kernel_tuner/runners/sequential.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py
index 2a8481973..dae34a3c8 100644
--- a/kernel_tuner/runners/sequential.py
+++ b/kernel_tuner/runners/sequential.py
@@ -102,13 +102,11 @@ def run(self, parameter_space, tuning_options):
 
                 params.update(result)
 
-                # if tuning_options.objective in result and isinstance(result[tuning_options.objective], ErrorConfig):
                 if '__error__' in result:
                     logging.debug('kernel configuration was skipped silently due to compile or runtime failure')
 
             # only compute metrics on configs that have not errored
-            # if tuning_options.metrics and not isinstance(params.get(tuning_options.objective), ErrorConfig):
-            if '__error__' in params:
+            if tuning_options.metrics and '__error__' not in params:
                 params = process_metrics(params, tuning_options.metrics)
 
             # get the framework time by estimating based on other times

From a20f18eb5391bc5b1f861021c5591596310ea080 Mon Sep 17 00:00:00 2001
From: maric-a-b <61234439+maric-a-b@users.noreply.github.com>
Date: Wed, 21 Jan 2026 21:32:39 +0100
Subject: [PATCH 18/18] removes old commented out code

---
 kernel_tuner/core.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py
index 6fb478ce2..1cd47d297 100644
--- a/kernel_tuner/core.py
+++ b/kernel_tuner/core.py
@@ -480,7 +480,6 @@ def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_sett
                     print(
                         f"skipping config {util.get_instance_string(instance.params)} reason: too many resources requested for launch"
                     )
-                # result[objective] = util.RuntimeFailedConfig()
                 result['__error__'] = util.RuntimeFailedConfig()
             else:
                 logging.debug("benchmark encountered runtime failure: " + str(e))
@@ -575,7 +574,6 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
 
         instance = self.create_kernel_instance(kernel_source, kernel_options, params, verbose)
         if isinstance(instance, util.ErrorConfig):
-            # result[to.objective] = util.InvalidConfig()
             result['__error__'] = util.InvalidConfig()
         else:
             # Preprocess the argument list. This is required to deal with `MixedPrecisionArray`s
@@ -586,7 +584,6 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
                 start_compilation = time.perf_counter()
                 func = self.compile_kernel(instance, verbose)
                 if not func:
-                    # result[to.objective] = util.CompilationFailedConfig()
                     result['__error__'] = util.CompilationFailedConfig()
                 else:
                     # add shared memory arguments to compiled module