diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index d3d00052..a604f69a 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -103,6 +103,8 @@ def __init__( self.param_names = list(self.tune_params.keys()) self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values()) self.params_values_indices = None + self._alloc_diff = None + self._alloc_sum_of_index_differences = None self.build_neighbors_index = build_neighbors_index self.solver_method = solver_method self.tune_param_is_numeric = { param_name: all(isinstance(val, (int, float)) for val in param_values) and not any(isinstance(val, bool) for val in param_values) for (param_name, param_values) in tune_params.items() } @@ -715,21 +717,46 @@ def get_list_param_indices_numpy(self) -> np.ndarray: the NumPy array. """ if self.__list_param_indices is None: + + # compute the lookups tune_params_to_index_lookup = list() tune_params_from_index_lookup = list() + all_values_integer_nonnegative = True for param_name, param_values in self.tune_params.items(): tune_params_to_index_lookup.append({ value: index for index, value in enumerate(param_values) }) tune_params_from_index_lookup.append({ index: value for index, value in enumerate(param_values) }) - + if (all_values_integer_nonnegative and + not all(isinstance(v, int) and 0 < v < 2**15 for v in param_values) + ): + all_values_integer_nonnegative = False + # build the list - list_param_indices = list() - for param_config in self.list: - list_param_indices.append([tune_params_to_index_lookup[index][val] for index, val in enumerate(param_config)]) + if all_values_integer_nonnegative: + # optimized case for integer non-negative values + configs = np.asarray(self.list) + index_arrays = [] + for values in self.tune_params.values(): + arr = np.full(max(values) + 1, -1, dtype=np.int16) + for i, v in enumerate(values): + arr[v] = i + index_arrays.append(arr) + # use advanced indexing to build the list of parameter indices + list_param_indices = np.column_stack([ + index_arrays[i][configs[:, i]] + for i in range(configs.shape[1]) + ]) + else: + # general case for any type of values + list_param_indices = list() + for param_config in self.list: + list_param_indices.append([tune_params_to_index_lookup[index][val] for index, val in enumerate(param_config)]) + list_param_indices = np.array(list_param_indices) # register the computed results self.__tune_params_to_index_lookup = tune_params_to_index_lookup self.__tune_params_from_index_lookup = tune_params_from_index_lookup - self.__list_param_indices = np.array(list_param_indices) + self.__list_param_indices = list_param_indices + assert self.__list_param_indices.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_param_indices.shape}" # calculate the actual minimum and maximum index for each parameter after restrictions @@ -962,6 +989,8 @@ def __prepare_neighbors_index(self): """Prepare by calculating the indices for the individual parameters.""" if self.params_values_indices is None: self.params_values_indices = self.get_list_param_indices_numpy() + self._alloc_diff = np.empty_like(self.params_values_indices, dtype=self.params_values_indices.dtype) + self._alloc_sum_of_index_differences = np.empty((self.params_values_indices.shape[0],), dtype=self.params_values_indices.dtype) def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, param_index: int = None, return_one=False) -> List[int]: """Get the neighbors closest in parameter indices difference from the parameter configuration. Always returns at least 1 neighbor.""" @@ -972,19 +1001,19 @@ def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, para self.__prepare_neighbors_index() # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - np.array(param_indices), dtype=self.params_values_indices.dtype) - # calculate the sum of the absolute differences for each parameter configuration - sum_of_index_differences = np.sum(abs_index_difference, axis=1) + self.__calc_sum_of_index_differences(np.array(param_indices)) if param_index is not None: # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration - sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + self._alloc_sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + + # return the indices of the closest parameter configurations if return_one: # if return_one is True, return the index of the closest parameter configuration (faster than finding all) - get_partial_neighbors_indices = [np.argmin(sum_of_index_differences)] + matching_indices = [np.argmin(self._alloc_sum_of_index_differences).item()] else: # find the param config indices where the difference is the smallest - min_difference = np.min(sum_of_index_differences) - matching_indices = (sum_of_index_differences == min_difference).nonzero()[0] + min_difference = np.min(self._alloc_sum_of_index_differences) + matching_indices = (self._alloc_sum_of_index_differences == min_difference).nonzero()[0] return matching_indices def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]: @@ -1073,15 +1102,17 @@ def __get_neighbors_indices_strictlyadjacent( """Get the neighbors using strictly adjacent distance from the parameter configuration (parameter index absolute difference == 1).""" if self.params_values_indices is None: self.__prepare_neighbors_index() - param_config_value_indices = ( + param_config_value_indices = np.array( self.get_param_indices(param_config) if param_config_index is None else self.params_values_indices[param_config_index] ) + # calculate the absolute difference between the parameter value indices abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices, dtype=self.params_values_indices.dtype) # get the param config indices where the difference is one or less for each position matching_indices = (np.max(abs_index_difference, axis=1) <= 1).nonzero()[0] + # as the selected param config does not differ anywhere, remove it from the matches if param_config_index is not None: matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=True) @@ -1145,12 +1176,18 @@ def __build_neighbors_index(self, neighbor_method) -> List[List[int]]: ) if neighbor_method == "closest-param-indices": return list( - self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index) + self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index, return_one=False) for param_config_index, param_config in enumerate(self.list) ) raise NotImplementedError(f"The neighbor method {neighbor_method} is not implemented") + def __calc_sum_of_index_differences(self, target_param_config_indices: np.ndarray): + """Calculates the absolute difference between the parameter value indices and `target_param_config_indices` into `self._alloc_sum_of_index_differences`.""" + np.subtract(self.params_values_indices, target_param_config_indices, out=self._alloc_diff) + np.abs(self._alloc_diff, out=self._alloc_diff) + np.einsum('ij->i', self._alloc_diff, out=self._alloc_sum_of_index_differences) + def get_random_sample_indices(self, num_samples: int) -> np.ndarray: """Get the list indices for a random, non-conflicting sample.""" if num_samples > self.size: @@ -1169,7 +1206,7 @@ def get_random_sample(self, num_samples: int) -> List[tuple]: return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples)) def get_distributed_random_sample_indices(self, num_samples: int, sampling_factor=10) -> List[int]: - """Get a distributed random sample of parameter configuration indices. Note: `get_LHS_random_sample_indices` is likely faster and better distributed.""" + """Get a distributed random sample of parameter configuration indices. Note: `get_LHS_sample_indices` is likely faster and better distributed.""" if num_samples > self.size: warn( f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})" @@ -1219,16 +1256,12 @@ def get_next_sample(lower: tuple, upper: tuple) -> tuple: self.__prepare_neighbors_index() target_sample_indices = list() for target_sample_param_config_indices in target_samples_param_indices: - # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) - # find the param config index where the difference is the smallest - sum_of_index_differences = np.sum(abs_index_difference, axis=1) param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) if param_index is not None: - # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration - sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() - min_index_difference_index = np.argmin(sum_of_index_differences) - target_sample_indices.append(min_index_difference_index.item()) + target_sample_indices.append(param_index) + else: + self.__calc_sum_of_index_differences(target_sample_param_config_indices) + target_sample_indices.append(np.argmin(self._alloc_sum_of_index_differences).item()) # filter out duplicate samples and replace with random ones target_sample_indices = list(set(target_sample_indices)) @@ -1267,16 +1300,12 @@ def get_LHS_sample_indices(self, num_samples: int) -> List[int]: # for each of the target sample indices, calculate which parameter configuration is closest target_sample_indices = list() for target_sample_param_config_indices in target_samples_param_indices: - # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) - # find the param config index where the difference is the smallest - sum_of_index_differences = np.sum(abs_index_difference, axis=1) param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) if param_index is not None: - # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration - sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() - min_index_difference_index = np.argmin(sum_of_index_differences) - target_sample_indices.append(min_index_difference_index.item()) + target_sample_indices.append(param_index) + else: + self.__calc_sum_of_index_differences(target_sample_param_config_indices) + target_sample_indices.append(np.argmin(self._alloc_sum_of_index_differences).item()) # filter out duplicate samples and replace with random ones target_sample_indices = list(set(target_sample_indices)) diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py index d80b6e8e..2d21a26b 100644 --- a/kernel_tuner/strategies/diff_evo.py +++ b/kernel_tuner/strategies/diff_evo.py @@ -387,12 +387,13 @@ def repair(trial_vector, searchspace): """ Attempts to repair trial_vector if trial_vector is invalid """ - if not searchspace.is_param_config_valid(tuple(trial_vector)): + trial_tuple = tuple(trial_vector) + if not searchspace.is_param_config_valid(trial_tuple): # search for valid configurations neighboring trial_vector for neighbor_method in ["closest-param-indices"]: # start from strictly-adjacent to increasingly allowing more neighbors # for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: - new_trial_vector = searchspace.get_random_neighbor(tuple(trial_vector), neighbor_method=neighbor_method) + new_trial_vector = searchspace.get_random_neighbor(trial_tuple, neighbor_method=neighbor_method) if new_trial_vector is not None: # print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}") return list(new_trial_vector)