Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ push_to_pypi.sh
!kernel_tuner/schema/T1/1.0.0/input-schema.json
!test/test_T1_input.json
!test_cache_file*.json
!test/regression/baselines/*.json
*.csv
.cache
*.ipynb_checkpoints
Expand Down
14 changes: 12 additions & 2 deletions kernel_tuner/backends/nvcuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,20 @@

# embedded in try block to be able to generate documentation
# and run tests without cuda-python installed
# Support both cuda-python < 13 and >= 13 import structures
try:
from cuda import cuda, cudart, nvrtc
# cuda-python >= 13 uses cuda.bindings module
from cuda.bindings import driver as cuda
from cuda.bindings import runtime as cudart
from cuda.bindings import nvrtc
except ImportError:
cuda = None
try:
# cuda-python < 13 uses direct imports
from cuda import cuda, cudart, nvrtc
except ImportError:
cuda = None
cudart = None
nvrtc = None


class CudaFunctions(GPUBackend):
Expand Down
4 changes: 2 additions & 2 deletions kernel_tuner/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ def run_kernel(
try:
# create kernel instance
instance = dev.create_kernel_instance(kernelsource, kernel_options, params, False)
if instance is None:
if isinstance(instance, util.InvalidConfig):
raise RuntimeError("cannot create kernel instance, too many threads per block")

# see if the kernel arguments have correct type
Expand All @@ -821,7 +821,7 @@ def run_kernel(
dev.copy_texture_memory_args(texmem_args)
finally:
# delete temp files
if instance is not None:
if instance is not None and not isinstance(instance, util.ErrorConfig):
instance.delete_temp_files()

# run the kernel
Expand Down
10 changes: 8 additions & 2 deletions kernel_tuner/observers/nvcuda.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import numpy as np

# Support both cuda-python < 13 and >= 13 import structures
try:
from cuda import cudart
# cuda-python >= 13 uses cuda.bindings module
from cuda.bindings import runtime as cudart
except ImportError:
cuda = None
try:
# cuda-python < 13 uses direct imports
from cuda import cudart
except ImportError:
cudart = None

from kernel_tuner.observers.observer import BenchmarkObserver
from kernel_tuner.util import cuda_error_check
Expand Down
25 changes: 19 additions & 6 deletions kernel_tuner/searchspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,18 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem:
required_params = restriction[1]
restriction = restriction[0]
if callable(restriction) and not isinstance(restriction, Constraint):
# def restrictions_wrapper(*args):
# return check_instance_restrictions(restriction, dict(zip(self.param_names, args)), False)
# print(restriction, isinstance(restriction, Constraint))
# restriction = FunctionConstraint(restrictions_wrapper)
restriction = FunctionConstraint(restriction, required_params)
# Wrap the restriction to convert positional args to keyword args for backwards compatibility
# Old API: restriction received keyword args (via **params unpacking)
# New API: FunctionConstraint passes positional args
original_restriction = restriction
params_for_wrapper = required_params

def make_wrapper(func, param_names):
def restrictions_wrapper(*args):
return func(**dict(zip(param_names, args)))
return restrictions_wrapper

restriction = FunctionConstraint(make_wrapper(original_restriction, params_for_wrapper), required_params)

# add as a Constraint
all_params_required = all(param_name in required_params for param_name in self.param_names)
Expand Down Expand Up @@ -1421,13 +1428,19 @@ def get_random_neighbor(self, param_config: tuple, neighbor_method=None, use_par
return choice(neighbors)

def get_param_neighbors(self, param_config: tuple, index: int, neighbor_method: str, randomize: bool) -> list:
"""Get the neighboring parameters at an index."""
"""Get the neighboring parameters at an index.

Only returns values from neighbors that differ ONLY at the specified index,
not in multiple places. This ensures that changing only this parameter
produces a valid configuration in the searchspace.
"""
original_value = param_config[index]
params = list(
set(
neighbor[index]
for neighbor in self.get_neighbors(param_config, neighbor_method)
if neighbor[index] != original_value
and all(neighbor[i] == param_config[i] for i in range(len(param_config)) if i != index)
)
)
if randomize:
Expand Down
32 changes: 29 additions & 3 deletions kernel_tuner/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,20 @@
import cupy as cp
except ImportError:
cp = np
# Support both cuda-python < 13 and >= 13 import structures
try:
from cuda import cuda, cudart, nvrtc
# cuda-python >= 13 uses cuda.bindings module
from cuda.bindings import driver as cuda
from cuda.bindings import runtime as cudart
from cuda.bindings import nvrtc
except ImportError:
cuda = None
try:
# cuda-python < 13 uses direct imports
from cuda import cuda, cudart, nvrtc
except ImportError:
cuda = None
cudart = None
nvrtc = None

from kernel_tuner.observers.nvml import NVMLObserver

Expand Down Expand Up @@ -1077,13 +1087,29 @@ def unparse_constraint_lambda(lambda_ast):
return rewritten_lambda_body_as_string


def has_closure_variables(func):
"""Check if a function has captured closure variables."""
return func.__closure__ is not None and len(func.__closure__) > 0


def convert_constraint_lambdas(restrictions):
"""Extract and convert all constraint lambdas from the restrictions"""
"""Extract and convert all constraint lambdas from the restrictions.

Lambdas with captured closure variables are kept as-is to preserve
the closure context. Only simple lambdas without closures are converted
to strings for the constraint solver.
"""
res = []
for c in restrictions:
if isinstance(c, (str, Constraint)):
res.append(c)
if callable(c) and not isinstance(c, Constraint):
# If the lambda has closure variables, keep it as a callable
# to preserve the captured variable context
if has_closure_variables(c):
res.append(c)
continue

try:
lambda_asts = get_all_lambda_asts(c)
except ValueError:
Expand Down
1 change: 1 addition & 0 deletions test/regression/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Regression tests for Kernel Tuner
Loading