Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ deploy_key
temp_*.*
.python-version
.nox
.venv

### Visual Studio Code ###
!.vscode/settings.json
Expand All @@ -37,4 +38,4 @@ temp_*.*
.LSOverride

.vscode
.idea
.idea
2 changes: 1 addition & 1 deletion kernel_tuner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from kernel_tuner.integration import store_results, create_device_targets
from kernel_tuner.interface import tune_kernel, run_kernel
from kernel_tuner.interface import tune_kernel, tune_cache, run_kernel

from importlib.metadata import version

Expand Down
11 changes: 8 additions & 3 deletions kernel_tuner/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,11 +480,14 @@ def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_sett
print(
f"skipping config {util.get_instance_string(instance.params)} reason: too many resources requested for launch"
)
result[objective] = util.RuntimeFailedConfig()
result['__error__'] = util.RuntimeFailedConfig()
else:
logging.debug("benchmark encountered runtime failure: " + str(e))
print("Error while benchmarking:", instance.name)
raise e

assert util.check_result_type(result), "The error in a result MUST be an actual error."

return result

def check_kernel_output(
Expand Down Expand Up @@ -571,7 +574,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,

instance = self.create_kernel_instance(kernel_source, kernel_options, params, verbose)
if isinstance(instance, util.ErrorConfig):
result[to.objective] = util.InvalidConfig()
result['__error__'] = util.InvalidConfig()
else:
# Preprocess the argument list. This is required to deal with `MixedPrecisionArray`s
gpu_args = _preprocess_gpu_arguments(gpu_args, params)
Expand All @@ -581,7 +584,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
start_compilation = time.perf_counter()
func = self.compile_kernel(instance, verbose)
if not func:
result[to.objective] = util.CompilationFailedConfig()
result['__error__'] = util.CompilationFailedConfig()
else:
# add shared memory arguments to compiled module
if kernel_options.smem_args is not None:
Expand Down Expand Up @@ -635,6 +638,8 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
result["verification_time"] = last_verification_time or 0
result["benchmark_time"] = last_benchmark_time or 0

assert util.check_result_type(result), "The error in a result MUST be an actual error."

return result

def compile_kernel(self, instance, verbose):
Expand Down
20 changes: 12 additions & 8 deletions kernel_tuner/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,20 @@ def output_file_schema(target):
return current_version, json_string


def get_configuration_validity(objective) -> str:
def get_configuration_validity(error) -> str:
"""Convert internal Kernel Tuner error to string."""
errorstring: str
if not isinstance(objective, util.ErrorConfig):
if not isinstance(error, util.ErrorConfig):
errorstring = "correct"
else:
if isinstance(objective, util.CompilationFailedConfig):
if isinstance(error, util.CompilationFailedConfig):
errorstring = "compile"
elif isinstance(objective, util.RuntimeFailedConfig):
elif isinstance(error, util.RuntimeFailedConfig):
errorstring = "runtime"
elif isinstance(objective, util.InvalidConfig):
elif isinstance(error, util.InvalidConfig):
errorstring = "constraints"
else:
raise ValueError(f"Unkown objective type {type(objective)}, value {objective}")
raise ValueError(f"Unkown error type {type(error)}, value {error}")
return errorstring


Expand Down Expand Up @@ -110,7 +110,8 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim
out["times"] = timings

# encode the validity of the configuration
out["invalidity"] = get_configuration_validity(result[objective])
# out["invalidity"] = get_configuration_validity(result[objective])
out["invalidity"] = get_configuration_validity(result['__error__'])

# Kernel Tuner does not support producing results of configs that fail the correctness check
# therefore correctness is always 1
Expand All @@ -127,7 +128,10 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim
# In Kernel Tuner we currently support only one objective at a time, this can be a user-defined
# metric that combines scores from multiple different quantities into a single value to support
# multi-objective tuning however.
out["objectives"] = [objective]
# NOTE(maric): With PyMOO integrated we do support multi-objective tuning without scalarization
objectives = [objective] if isinstance(objective, str) else list(objective)
assert isinstance(objectives, list)
out["objectives"] = objectives

# append to output
output_data.append(out)
Expand Down
82 changes: 71 additions & 11 deletions kernel_tuner/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
pso,
random_sample,
simulated_annealing,
pymoo_minimize,
)

strategy_map = {
Expand All @@ -75,6 +76,8 @@
"simulated_annealing": simulated_annealing,
"firefly_algorithm": firefly_algorithm,
"bayes_opt": bayes_opt,
"nsga2": pymoo_minimize,
"nsga3": pymoo_minimize,
}


Expand Down Expand Up @@ -425,15 +428,15 @@ def __deepcopy__(self, _):
"""Optimization objective to sort results on, consisting of a string
that also occurs in results as a metric or observed quantity, default 'time'.
Please see :ref:`objectives`.""",
"string",
"str | list[str]",
),
),
(
"objective_higher_is_better",
(
"""boolean that specifies whether the objective should
be maximized (True) or minimized (False), default False.""",
"bool",
"bool | list[bool]",
),
),
(
Expand Down Expand Up @@ -464,6 +467,7 @@ def __deepcopy__(self, _):
("metrics", ("specifies user-defined metrics, please see :ref:`metrics`.", "dict")),
("simulation_mode", ("Simulate an auto-tuning search from an existing cachefile", "bool")),
("observers", ("""A list of Observers to use during tuning, please see :ref:`observers`.""", "list")),
("seed", ("""The random seed.""", "int")),
]
)

Expand Down Expand Up @@ -577,6 +581,8 @@ def tune_kernel(
observers=None,
objective=None,
objective_higher_is_better=None,
objectives=None,
seed=None,
):
start_overhead_time = perf_counter()
if log:
Expand All @@ -586,8 +592,20 @@ def tune_kernel(

_check_user_input(kernel_name, kernelsource, arguments, block_size_names)

# default objective if none is specified
objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
if objectives:
if isinstance(objectives, dict):
objective = list(objectives.keys())
objective_higher_is_better = list(objectives.values())
else:
raise ValueError("objectives should be a dict of (objective, higher_is_better) pairs")
else:
objective, objective_higher_is_better = get_objective_defaults(objective, objective_higher_is_better)
objective = [objective]
objective_higher_is_better = [objective_higher_is_better]

assert isinstance(objective, list)
assert isinstance(objective_higher_is_better, list)
assert len(list(objective)) == len(list(objective_higher_is_better))

# check for forbidden names in tune parameters
util.check_tune_params_list(tune_params, observers, simulation_mode=simulation_mode)
Expand Down Expand Up @@ -682,13 +700,33 @@ def tune_kernel(

# finished iterating over search space
if results: # checks if results is not empty
best_config = util.get_best_config(results, objective, objective_higher_is_better)
# add the best configuration to env
env['best_config'] = best_config
if not device_options.quiet:
units = getattr(runner, "units", None)
print("best performing configuration:")
util.print_config_output(tune_params, best_config, device_options.quiet, metrics, units)
if len(list(objective)) == 1:
objective = objective[0]
objective_higher_is_better = objective_higher_is_better[0]
best_config = util.get_best_config(results, objective, objective_higher_is_better)
# add the best configuration to env
env['best_config'] = best_config
if not device_options.quiet:
units = getattr(runner, "units", None)
print(f"\nBEST PERFORMING CONFIGURATION FOR OBJECTIVE {objective}:")
keys = list(tune_params.keys())
keys += [objective]
if metrics:
keys += list(metrics.keys())
print(util.get_config_string(best_config, keys, units))
else:
pareto_front = util.get_pareto_results(results, objective, objective_higher_is_better)
# add the best configuration to env
env['best_config'] = pareto_front
if not device_options.quiet:
units = getattr(runner, "units", None)
keys = list(tune_params.keys())
keys += list(objective)
if metrics:
keys += list(metrics.keys)
print(f"\nBEST PERFORMING CONFIGURATIONS FOR OBJECTIVES: {objective}:")
for best_config in pareto_front:
print(util.get_config_string(best_config, keys, units))
elif not device_options.quiet:
print("no results to report")

Expand All @@ -703,6 +741,28 @@ def tune_kernel(

tune_kernel.__doc__ = _tune_kernel_docstring


def tune_cache(*,
cache_path,
restrictions = None,
**kwargs,
):
cache = util.read_cache(cache_path, open_cache=False)
tune_args = util.infer_args_from_cache(cache)
_restrictions = [util.infer_restrictions_from_cache(cache)]

# Add the user provided restrictions
if restrictions:
if isinstance(restrictions, list):
_restrictions.extend(restrictions)
else:
raise ValueError("The restrictions must be a list()")

tune_args.update(kwargs)

return tune_kernel(**tune_args, cache=cache_path, restrictions=_restrictions, simulation_mode=True)


_run_kernel_docstring = """Compile and run a single kernel

Compiles and runs a single kernel once, given a specific instance of the kernels tuning parameters.
Expand Down
20 changes: 16 additions & 4 deletions kernel_tuner/runners/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from kernel_tuner.core import DeviceInterface
from kernel_tuner.runners.runner import Runner
import kernel_tuner.util as util
from kernel_tuner.util import ErrorConfig, print_config_output, process_metrics, store_cache


Expand Down Expand Up @@ -44,8 +45,15 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
#move data to the GPU
self.gpu_args = self.dev.ready_argument_list(kernel_options.arguments)

# It is the task of the cost function to increment there counters
self.config_eval_count = 0
self.infeasable_config_eval_count = 0

def get_environment(self, tuning_options):
return self.dev.get_environment()
env = self.dev.get_environment()
env["config_eval_count"] = self.config_eval_count
env["infeasable_config_eval_count"] = self.infeasable_config_eval_count
return env

def run(self, parameter_space, tuning_options):
"""Iterate through the entire parameter space using a single Python process.
Expand Down Expand Up @@ -90,17 +98,19 @@ def run(self, parameter_space, tuning_options):

result = self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, self.kernel_options, tuning_options)

assert util.check_result_type(result)

params.update(result)

if tuning_options.objective in result and isinstance(result[tuning_options.objective], ErrorConfig):
if '__error__' in result:
logging.debug('kernel configuration was skipped silently due to compile or runtime failure')

# only compute metrics on configs that have not errored
if tuning_options.metrics and not isinstance(params.get(tuning_options.objective), ErrorConfig):
if tuning_options.metrics and '__error__' not in params:
params = process_metrics(params, tuning_options.metrics)

# get the framework time by estimating based on other times
total_time = 1000 * ((perf_counter() - self.start_time) - warmup_time)
total_time = 1000 * ((perf_counter() - self.start_time) - warmup_time)
params['strategy_time'] = self.last_strategy_time
params['framework_time'] = max(total_time - (params['compile_time'] + params['verification_time'] + params['benchmark_time'] + params['strategy_time']), 0)
params['timestamp'] = str(datetime.now(timezone.utc))
Expand All @@ -113,6 +123,8 @@ def run(self, parameter_space, tuning_options):
# add configuration to cache
store_cache(x_int, params, tuning_options)

assert util.check_result_type(params)

# all visited configurations are added to results to provide a trace for optimization strategies
results.append(params)

Expand Down
9 changes: 8 additions & 1 deletion kernel_tuner/runners/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
:type iterations: int
"""
self.quiet = device_options.quiet
self.dev = SimulationDevice(1024, dict(device_name="Simulation"), self.quiet)
# NOTE(maric): had to increase max_threas so the default restraints would pass
self.dev = SimulationDevice(1_000_000_000, dict(device_name="Simulation"), self.quiet)

self.kernel_source = kernel_source
self.simulation_mode = True
Expand All @@ -58,10 +59,16 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob
self.last_strategy_time = 0
self.units = {}

# It is the task of the cost function to increment there counters
self.config_eval_count = 0
self.infeasable_config_eval_count = 0

def get_environment(self, tuning_options):
env = self.dev.get_environment()
env["simulation"] = True
env["simulated_time"] = tuning_options.simulated_time
env["config_eval_count"] = self.config_eval_count
env["infeasable_config_eval_count"] = self.infeasable_config_eval_count
return env

def run(self, parameter_space, tuning_options):
Expand Down
21 changes: 16 additions & 5 deletions kernel_tuner/strategies/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def __call__(self, x, check_restrictions=True):
# check if max_fevals is reached or time limit is exceeded
util.check_stop_criterion(self.tuning_options)

self.runner.config_eval_count += 1

# snap values in x to nearest actual value for each parameter, unscale x if needed
if self.snap:
if self.scaling:
Expand All @@ -92,9 +94,12 @@ def __call__(self, x, check_restrictions=True):
legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose)
if not legal:
result = params_dict
result[self.tuning_options.objective] = util.InvalidConfig()
result['__error__'] = util.InvalidConfig()
self.runner.infeasable_config_eval_count += 1

if legal:
assert ('__error__' not in result), "A legal config MUST NOT have an error result."

# compile and benchmark this instance
res = self.runner.run([params], self.tuning_options)
result = res[0]
Expand All @@ -108,11 +113,17 @@ def __call__(self, x, check_restrictions=True):
# upon returning from this function control will be given back to the strategy, so reset the start time
self.runner.last_strategy_start_time = perf_counter()

# get numerical return value, taking optimization direction into account
return_value = result[self.tuning_options.objective] or sys.float_info.max
return_value = return_value if not self.tuning_options.objective_higher_is_better else -return_value
# get the cost of the result
cost_vec = util.get_result_cost(
result,
self.tuning_options.objective,
self.tuning_options.objective_higher_is_better
)

return return_value
if len(cost_vec) == 1:
return cost_vec[0]
else:
return cost_vec

def get_bounds_x0_eps(self):
"""Compute bounds, x0 (the initial guess), and eps."""
Expand Down
Loading