From a08e5fbe732b8c92a49cf41abe3ffa06a8ad2cb9 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 Jan 2026 15:38:01 +0100 Subject: [PATCH 1/6] Implemented optimized LHS closest samples calculation --- kernel_tuner/searchspace.py | 59 +++++++++++++++++++---------- kernel_tuner/strategies/diff_evo.py | 5 ++- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index d3d00052..be669f8d 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -980,7 +980,7 @@ def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, para sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() if return_one: # if return_one is True, return the index of the closest parameter configuration (faster than finding all) - get_partial_neighbors_indices = [np.argmin(sum_of_index_differences)] + matching_indices = [np.argmin(sum_of_index_differences)] else: # find the param config indices where the difference is the smallest min_difference = np.min(sum_of_index_differences) @@ -1169,7 +1169,7 @@ def get_random_sample(self, num_samples: int) -> List[tuple]: return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples)) def get_distributed_random_sample_indices(self, num_samples: int, sampling_factor=10) -> List[int]: - """Get a distributed random sample of parameter configuration indices. Note: `get_LHS_random_sample_indices` is likely faster and better distributed.""" + """Get a distributed random sample of parameter configuration indices. Note: `get_LHS_sample_indices` is likely faster and better distributed.""" if num_samples > self.size: warn( f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})" @@ -1219,16 +1219,16 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple: self.__prepare_neighbors_index() target_sample_indices = list() for target_sample_param_config_indices in target_samples_param_indices: - # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) - # find the param config index where the difference is the smallest - sum_of_index_differences = np.sum(abs_index_difference, axis=1) param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) if param_index is not None: - # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration - sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() - min_index_difference_index = np.argmin(sum_of_index_differences) - target_sample_indices.append(min_index_difference_index.item()) + target_sample_indices.append(param_index) + else: + # calculate the absolute difference between the parameter value indices + abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) + # find the param config index where the difference is the smallest + sum_of_index_differences = np.sum(abs_index_difference, axis=1) + min_index_difference_index = np.argmin(sum_of_index_differences) + target_sample_indices.append(min_index_difference_index.item()) # filter out duplicate samples and replace with random ones target_sample_indices = list(set(target_sample_indices)) @@ -1264,19 +1264,40 @@ def get_LHS_sample_indices(self, num_samples: int) -> List[int]: endpoint=True) target_samples_param_indices = np.array(target_samples_param_indices, dtype=self.params_values_indices.dtype) - # for each of the target sample indices, calculate which parameter configuration is closest + # # validate and if necessary repair the target samples (slower than sum difference method below) + # target_sample_indices = list() + # for target_sample_param_config_indices in target_samples_param_indices: + # param_config = self.get_param_config_from_param_indices(tuple(target_sample_param_config_indices)) + # target_sample_index = None + # if not self.is_param_config_valid(param_config): + # # if the parameter configuration is not valid, replace with a neighbor + # neighbors_indices = self.get_neighbors_indices(param_config, neighbor_method="closest-param-indices") + # # remove already selected samples from the neighbors + # neighbors_indices = [idx for idx in neighbors_indices if idx not in target_sample_indices] + # if len(neighbors_indices) == 0: + # # if there are no valid neighbors, get a random sample + # target_sample_index = self.get_random_sample_indices(1).item() + # else: + # target_sample_index = choice(neighbors_indices).item() + # else: + # target_sample_index = self.get_param_config_index(param_config) + # target_sample_indices.append(target_sample_index) + + # allocate space for the target sample indices target_sample_indices = list() + diff = np.empty_like(self.params_values_indices, dtype=self.params_values_indices.dtype) + row_sums = np.empty(self.params_values_indices.shape[0], dtype=self.params_values_indices.dtype) + + # for each of the target sample indices, calculate which parameter configuration is closest for target_sample_param_config_indices in target_samples_param_indices: - # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) - # find the param config index where the difference is the smallest - sum_of_index_differences = np.sum(abs_index_difference, axis=1) param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) if param_index is not None: - # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration - sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() - min_index_difference_index = np.argmin(sum_of_index_differences) - target_sample_indices.append(min_index_difference_index.item()) + target_sample_indices.append(param_index) + else: + np.subtract(self.params_values_indices, target_sample_param_config_indices, out=diff) + np.abs(diff, out=diff) + np.einsum('ij->i', diff, out=row_sums) + target_sample_indices.append(np.argmin(row_sums).item()) # filter out duplicate samples and replace with random ones target_sample_indices = list(set(target_sample_indices)) diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py index d80b6e8e..2d21a26b 100644 --- a/kernel_tuner/strategies/diff_evo.py +++ b/kernel_tuner/strategies/diff_evo.py @@ -387,12 +387,13 @@ def repair(trial_vector, searchspace): """ Attempts to repair trial_vector if trial_vector is invalid """ - if not searchspace.is_param_config_valid(tuple(trial_vector)): + trial_tuple = tuple(trial_vector) + if not searchspace.is_param_config_valid(trial_tuple): # search for valid configurations neighboring trial_vector for neighbor_method in ["closest-param-indices"]: # start from strictly-adjacent to increasingly allowing more neighbors # for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: - new_trial_vector = searchspace.get_random_neighbor(tuple(trial_vector), neighbor_method=neighbor_method) + new_trial_vector = searchspace.get_random_neighbor(trial_tuple, neighbor_method=neighbor_method) if new_trial_vector is not None: # print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}") return list(new_trial_vector) From 8812552999b4788f754b31989e8397c5542a1523 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 Jan 2026 17:40:49 +0100 Subject: [PATCH 2/6] Optimized additional neighbor operations and created shared allocation of numpy matrices --- kernel_tuner/searchspace.py | 50 ++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index be669f8d..b5974de2 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -103,6 +103,8 @@ def __init__( self.param_names = list(self.tune_params.keys()) self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values()) self.params_values_indices = None + self._alloc_diff = None + self._alloc_sum_of_index_differences = None self.build_neighbors_index = build_neighbors_index self.solver_method = solver_method self.tune_param_is_numeric = { param_name: all(isinstance(val, (int, float)) for val in param_values) and not any(isinstance(val, bool) for val in param_values) for (param_name, param_values) in tune_params.items() } @@ -962,6 +964,8 @@ def __prepare_neighbors_index(self): """Prepare by calculating the indices for the individual parameters.""" if self.params_values_indices is None: self.params_values_indices = self.get_list_param_indices_numpy() + self._alloc_diff = np.empty_like(self.params_values_indices, dtype=self.params_values_indices.dtype) + self._alloc_sum_of_index_differences = np.empty((self.params_values_indices.shape[0],), dtype=self.params_values_indices.dtype) def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, param_index: int = None, return_one=False) -> List[int]: """Get the neighbors closest in parameter indices difference from the parameter configuration. Always returns at least 1 neighbor.""" @@ -972,19 +976,21 @@ def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, para self.__prepare_neighbors_index() # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - np.array(param_indices), dtype=self.params_values_indices.dtype) - # calculate the sum of the absolute differences for each parameter configuration - sum_of_index_differences = np.sum(abs_index_difference, axis=1) + np.subtract(self.params_values_indices, np.array(param_indices), out=self._alloc_diff) + np.abs(self._alloc_diff, out=self._alloc_diff) + np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) if param_index is not None: # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration - sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + self._alloc_sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + + # return the indices of the closest parameter configurations if return_one: # if return_one is True, return the index of the closest parameter configuration (faster than finding all) - matching_indices = [np.argmin(sum_of_index_differences)] + matching_indices = [np.argmin(self._alloc_sum_of_index_differences).item()] else: # find the param config indices where the difference is the smallest - min_difference = np.min(sum_of_index_differences) - matching_indices = (sum_of_index_differences == min_difference).nonzero()[0] + min_difference = np.min(self._alloc_sum_of_index_differences) + matching_indices = (self._alloc_sum_of_index_differences == min_difference).nonzero()[0] return matching_indices def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]: @@ -1073,15 +1079,17 @@ def __get_neighbors_indices_strictlyadjacent( """Get the neighbors using strictly adjacent distance from the parameter configuration (parameter index absolute difference == 1).""" if self.params_values_indices is None: self.__prepare_neighbors_index() - param_config_value_indices = ( + param_config_value_indices = np.array( self.get_param_indices(param_config) if param_config_index is None else self.params_values_indices[param_config_index] ) + # calculate the absolute difference between the parameter value indices abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices, dtype=self.params_values_indices.dtype) # get the param config indices where the difference is one or less for each position matching_indices = (np.max(abs_index_difference, axis=1) <= 1).nonzero()[0] + # as the selected param config does not differ anywhere, remove it from the matches if param_config_index is not None: matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=True) @@ -1145,7 +1153,7 @@ def __build_neighbors_index(self, neighbor_method) -> List[List[int]]: ) if neighbor_method == "closest-param-indices": return list( - self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index) + self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index, return_one=False) for param_config_index, param_config in enumerate(self.list) ) @@ -1224,11 +1232,10 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple: target_sample_indices.append(param_index) else: # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) - # find the param config index where the difference is the smallest - sum_of_index_differences = np.sum(abs_index_difference, axis=1) - min_index_difference_index = np.argmin(sum_of_index_differences) - target_sample_indices.append(min_index_difference_index.item()) + np.subtract(self.params_values_indices, target_sample_param_config_indices, out=self._alloc_diff) + np.abs(self._alloc_diff, out=self._alloc_diff) + np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + target_sample_indices.append(np.argmin(self._alloc_sum_of_index_differences).item()) # filter out duplicate samples and replace with random ones target_sample_indices = list(set(target_sample_indices)) @@ -1282,22 +1289,19 @@ def get_LHS_sample_indices(self, num_samples: int) -> List[int]: # else: # target_sample_index = self.get_param_config_index(param_config) # target_sample_indices.append(target_sample_index) - - # allocate space for the target sample indices - target_sample_indices = list() - diff = np.empty_like(self.params_values_indices, dtype=self.params_values_indices.dtype) - row_sums = np.empty(self.params_values_indices.shape[0], dtype=self.params_values_indices.dtype) # for each of the target sample indices, calculate which parameter configuration is closest + target_sample_indices = list() for target_sample_param_config_indices in target_samples_param_indices: param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) if param_index is not None: target_sample_indices.append(param_index) else: - np.subtract(self.params_values_indices, target_sample_param_config_indices, out=diff) - np.abs(diff, out=diff) - np.einsum('ij->i', diff, out=row_sums) - target_sample_indices.append(np.argmin(row_sums).item()) + # calculate the absolute difference between the parameter value indices + np.subtract(self.params_values_indices, target_sample_param_config_indices, out=self._alloc_diff) + np.abs(self._alloc_diff, out=self._alloc_diff) + np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + target_sample_indices.append(np.argmin(self._alloc_sum_of_index_differences).item()) # filter out duplicate samples and replace with random ones target_sample_indices = list(set(target_sample_indices)) From 49854c3821e2cb50845e28c99f99a2bb1493974d Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 Jan 2026 17:57:48 +0100 Subject: [PATCH 3/6] Slightly faster building of searchspace list --- kernel_tuner/searchspace.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index b5974de2..bf8ab08f 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -724,14 +724,20 @@ def get_list_param_indices_numpy(self) -> np.ndarray: tune_params_from_index_lookup.append({ index: value for index, value in enumerate(param_values) }) # build the list - list_param_indices = list() - for param_config in self.list: - list_param_indices.append([tune_params_to_index_lookup[index][val] for index, val in enumerate(param_config)]) + lookups = [ + {v: i for i, v in enumerate(values)} + for values in self.tune_params.values() + ] + list_param_indices = np.array([ + [lookups[val] for lookup, val in zip(lookups, config)] + for config in self.list + ]) # register the computed results self.__tune_params_to_index_lookup = tune_params_to_index_lookup self.__tune_params_from_index_lookup = tune_params_from_index_lookup - self.__list_param_indices = np.array(list_param_indices) + self.__list_param_indices = list_param_indices + assert self.__list_param_indices.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_param_indices.shape}" # calculate the actual minimum and maximum index for each parameter after restrictions From 8cd683669f2f22b7939d358001e8276926845933 Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 Jan 2026 18:49:17 +0100 Subject: [PATCH 4/6] Implemented much faster parameter indices building for exclusively integer-value search spaces --- kernel_tuner/searchspace.py | 38 ++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index bf8ab08f..6c563d64 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -717,21 +717,41 @@ def get_list_param_indices_numpy(self) -> np.ndarray: the NumPy array. """ if self.__list_param_indices is None: + + # compute the lookups tune_params_to_index_lookup = list() tune_params_from_index_lookup = list() + all_values_integer_nonnegative = True for param_name, param_values in self.tune_params.items(): tune_params_to_index_lookup.append({ value: index for index, value in enumerate(param_values) }) tune_params_from_index_lookup.append({ index: value for index, value in enumerate(param_values) }) - + if (all_values_integer_nonnegative and + not all(isinstance(v, int) and v >= 0 for v in param_values) or + max(param_values) >= 2**15 + ): + all_values_integer_nonnegative = False + # build the list - lookups = [ - {v: i for i, v in enumerate(values)} - for values in self.tune_params.values() - ] - list_param_indices = np.array([ - [lookups[val] for lookup, val in zip(lookups, config)] - for config in self.list - ]) + if all_values_integer_nonnegative: + # optimized case for integer non-negative values + configs = np.asarray(self.list) + index_arrays = [] + for values in self.tune_params.values(): + arr = np.full(max(values) + 1, -1, dtype=np.int16) + for i, v in enumerate(values): + arr[v] = i + index_arrays.append(arr) + # use advanced indexing to build the list of parameter indices + list_param_indices = np.column_stack([ + index_arrays[i][configs[:, i]] + for i in range(configs.shape[1]) + ]) + else: + # general case for any type of values + list_param_indices = list() + for param_config in self.list: + list_param_indices.append([tune_params_to_index_lookup[index][val] for index, val in enumerate(param_config)]) + list_param_indices = np.array(list_param_indices) # register the computed results self.__tune_params_to_index_lookup = tune_params_to_index_lookup From 114bb0b09c8471527a47ad2843e5b63cec7856ca Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Mon, 12 Jan 2026 18:58:54 +0100 Subject: [PATCH 5/6] Minor improvement to parameter index optimization technique selection --- kernel_tuner/searchspace.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index 6c563d64..2961d9ab 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -726,8 +726,7 @@ def get_list_param_indices_numpy(self) -> np.ndarray: tune_params_to_index_lookup.append({ value: index for index, value in enumerate(param_values) }) tune_params_from_index_lookup.append({ index: value for index, value in enumerate(param_values) }) if (all_values_integer_nonnegative and - not all(isinstance(v, int) and v >= 0 for v in param_values) or - max(param_values) >= 2**15 + not all(isinstance(v, int) and 0 < v < 2**15 for v in param_values) ): all_values_integer_nonnegative = False From 6c17d270c0c4829fd8030971bfecdcade7257a9e Mon Sep 17 00:00:00 2001 From: fjwillemsen Date: Fri, 16 Jan 2026 13:40:25 +0100 Subject: [PATCH 6/6] Simplified code by creating dedicated sum of index differences function --- kernel_tuner/searchspace.py | 39 +++++++++---------------------------- 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index 2961d9ab..a604f69a 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -1001,9 +1001,7 @@ def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, para self.__prepare_neighbors_index() # calculate the absolute difference between the parameter value indices - np.subtract(self.params_values_indices, np.array(param_indices), out=self._alloc_diff) - np.abs(self._alloc_diff, out=self._alloc_diff) - np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + self.__calc_sum_of_index_differences(np.array(param_indices)) if param_index is not None: # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration self._alloc_sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() @@ -1184,6 +1182,12 @@ def __build_neighbors_index(self, neighbor_method) -> List[List[int]]: raise NotImplementedError(f"The neighbor method {neighbor_method} is not implemented") + def __calc_sum_of_index_differences(self, target_param_config_indices: np.ndarray): + """Calculates the absolute difference between the parameter value indices and `target_param_config_indices` into `self._alloc_sum_of_index_differences`.""" + np.subtract(self.params_values_indices, target_param_config_indices, out=self._alloc_diff) + np.abs(self._alloc_diff, out=self._alloc_diff) + np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + def get_random_sample_indices(self, num_samples: int) -> np.ndarray: """Get the list indices for a random, non-conflicting sample.""" if num_samples > self.size: @@ -1256,10 +1260,7 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple: if param_index is not None: target_sample_indices.append(param_index) else: - # calculate the absolute difference between the parameter value indices - np.subtract(self.params_values_indices, target_sample_param_config_indices, out=self._alloc_diff) - np.abs(self._alloc_diff, out=self._alloc_diff) - np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + self.__calc_sum_of_index_differences(target_sample_param_config_indices) target_sample_indices.append(np.argmin(self._alloc_sum_of_index_differences).item()) # filter out duplicate samples and replace with random ones @@ -1296,25 +1297,6 @@ def get_LHS_sample_indices(self, num_samples: int) -> List[int]: endpoint=True) target_samples_param_indices = np.array(target_samples_param_indices, dtype=self.params_values_indices.dtype) - # # validate and if necessary repair the target samples (slower than sum difference method below) - # target_sample_indices = list() - # for target_sample_param_config_indices in target_samples_param_indices: - # param_config = self.get_param_config_from_param_indices(tuple(target_sample_param_config_indices)) - # target_sample_index = None - # if not self.is_param_config_valid(param_config): - # # if the parameter configuration is not valid, replace with a neighbor - # neighbors_indices = self.get_neighbors_indices(param_config, neighbor_method="closest-param-indices") - # # remove already selected samples from the neighbors - # neighbors_indices = [idx for idx in neighbors_indices if idx not in target_sample_indices] - # if len(neighbors_indices) == 0: - # # if there are no valid neighbors, get a random sample - # target_sample_index = self.get_random_sample_indices(1).item() - # else: - # target_sample_index = choice(neighbors_indices).item() - # else: - # target_sample_index = self.get_param_config_index(param_config) - # target_sample_indices.append(target_sample_index) - # for each of the target sample indices, calculate which parameter configuration is closest target_sample_indices = list() for target_sample_param_config_indices in target_samples_param_indices: @@ -1322,10 +1304,7 @@ def get_LHS_sample_indices(self, num_samples: int) -> List[int]: if param_index is not None: target_sample_indices.append(param_index) else: - # calculate the absolute difference between the parameter value indices - np.subtract(self.params_values_indices, target_sample_param_config_indices, out=self._alloc_diff) - np.abs(self._alloc_diff, out=self._alloc_diff) - np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + self.__calc_sum_of_index_differences(target_sample_param_config_indices) target_sample_indices.append(np.argmin(self._alloc_sum_of_index_differences).item()) # filter out duplicate samples and replace with random ones