From 9c7411eeefb63a65e40b1db8146135335fcac776 Mon Sep 17 00:00:00 2001 From: Hans Vanrompay Date: Mon, 12 Jan 2026 12:17:02 +0100 Subject: [PATCH 1/6] add a more concrete error message if possible --- openeo/udf/run_code.py | 124 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 5 deletions(-) diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py index fc673dbed..d4c9f7208 100644 --- a/openeo/udf/run_code.py +++ b/openeo/udf/run_code.py @@ -9,7 +9,7 @@ import math import pathlib import re -from typing import Callable, List, Union +from typing import Callable, List, Union, Tuple import numpy import pandas @@ -28,6 +28,22 @@ _log = logging.getLogger(__name__) +# UDF specifications +UDF_CATEGORIES = { + "timeseries": { + "function_names": ["apply_timeseries"], + "required_params": ["series", "context"], + }, + "datacube": { + "function_names": ["apply_datacube", "apply_hypercube"], + "required_params": ["cube", "context"], + }, + "vectorcube": { + "function_names": ["apply_vectorcube"], + "required_params": ["geometries", "cube", "context"], + } +} + def _build_default_execution_context(): # TODO: is it really necessary to "pre-load" these modules? Isn't user going to import them explicitly in their script anyway? @@ -147,8 +163,99 @@ def apply_timeseries_generic( return udf_data + +def validate_udf_signature(func: Callable, category: str) -> List[str]: + """ + Returns a list of error messages. If empty, the signature is valid. + """ + errors = [] + try: + sig = inspect.signature(func) + params = sig.parameters + except Exception: + return ["Could not read function signature."] + + # Check required parameters + required_params = UDF_CATEGORIES[category]["required_params"] + for param_name in required_params: + if param_name not in params: + errors.append(f"Missing required parameter: '{param_name}'") + + return errors + + +def discover_udf(code: str) -> Tuple[Callable, str]: + """ + Analyzes code and provides specific error feedback. + Returns (function, category) + """ + module = load_module_from_string(code) + functions = {k: v for k, v in module.items() if callable(v)} + + if not functions: + raise OpenEoUdfException("No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code.") + + # Track all signature errors for better error messages + signature_errors = [] + + for func_name, func in functions.items(): + # Determine category based on function name + category = None + for cat_name, cat_info in UDF_CATEGORIES.items(): + if func_name in cat_info["function_names"]: + category = cat_name + break + + if category: + # Validate signature + errors = validate_udf_signature(func, category) + if not errors: + _log.info(f"Found UDF '{func_name}' (category: {category})") + return func, category + else: + signature_errors.append(f"Function '{func_name}' failed validation: {', '.join(errors)}") + + # Check for generic UDF (single parameter) + elif len(inspect.signature(func).parameters) == 1: + _log.info(f"Found generic UDF '{func_name}'") + return func, "generic" + + # Build comprehensive error message + error_msg = "No valid UDF found.\n" + + if signature_errors: + error_msg += "\nSignature errors:\n" + "\n".join(f" - {e}" for e in signature_errors) + + error_msg += "\n\nExpected signatures:\n" + for cat_name, cat_info in UDF_CATEGORIES.items(): + for func_name in cat_info["function_names"]: + params = ", ".join(cat_info["required_params"]) + error_msg += f" - {func_name}({params})\n" + + error_msg += " - Or a single-parameter function for generic UDF" + + raise OpenEoUdfException(error_msg) + + def run_udf_code(code: str, data: UdfData) -> UdfData: + # Main orchestrator; + + # Use discover_udf to get better error messages, but fall back to original logic + # if discover_udf fails (for backward compatibility) + # TODO: current implementation uses first match directly, first check for multiple matches? + + + discovered_func = None + discovered_category = None + + try: + discovered_func, discovered_category = discover_udf(code) + except OpenEoUdfException: + # If discover_udf fails, we'll use the original logic + pass + + # Original logic unchanged (except for adding logging of discovered UDF) module = load_module_from_string(code) functions = ((k, v) for (k, v) in module.items() if callable(v)) @@ -179,7 +286,6 @@ def run_udf_code(code: str, data: UdfData) -> UdfData: raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format( c=len(data.get_datacube_list()) )) - # TODO: also support calls without user context? result_cube = func(cube=data.get_datacube_list()[0], context=data.user_context) data.set_datacube_list([result_cube]) return data @@ -194,7 +300,6 @@ def run_udf_code(code: str, data: UdfData) -> UdfData: raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format( c=len(data.get_datacube_list()) )) - # TODO: also support calls without user context? result_cube: xarray.DataArray = func(cube=data.get_datacube_list()[0].get_array(), context=data.user_context) data.set_datacube_list([XarrayDataCube(result_cube)]) return data @@ -223,7 +328,6 @@ def run_udf_code(code: str, data: UdfData) -> UdfData: c=len(data.get_datacube_list()) ) ) - # TODO: geopandas is optional dependency. input_geoms = data.get_feature_collection_list()[0].data input_cube = data.get_datacube_list()[0].get_array() result_geoms, result_cube = func(geometries=input_geoms, cube=input_cube, context=data.user_context) @@ -235,7 +339,17 @@ def run_udf_code(code: str, data: UdfData) -> UdfData: func(data) return data - raise OpenEoUdfException("No UDF found.") + # If we get here and discover_udf succeeded, log the inconsistency + if discovered_func and discovered_category: + _log.warning(f"discover_udf found UDF but run_udf_code didn't execute it") + + # Use discover_udf's error message if it failed + try: + discover_udf(code) # This will raise with better error message + except OpenEoUdfException as e: + raise e + except Exception: + raise OpenEoUdfException("No UDF found.") def execute_local_udf( From 9878f6701ae009ec6098a4c8de4d87bec12c535c Mon Sep 17 00:00:00 2001 From: Hans Vanrompay Date: Mon, 12 Jan 2026 12:32:34 +0100 Subject: [PATCH 2/6] add a more concrete error message if possible --- openeo/udf/run_code.py | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py index d4c9f7208..53bb57dfa 100644 --- a/openeo/udf/run_code.py +++ b/openeo/udf/run_code.py @@ -238,24 +238,15 @@ def discover_udf(code: str) -> Tuple[Callable, str]: def run_udf_code(code: str, data: UdfData) -> UdfData: - # Main orchestrator; - - # Use discover_udf to get better error messages, but fall back to original logic - # if discover_udf fails (for backward compatibility) - # TODO: current implementation uses first match directly, first check for multiple matches? - - - discovered_func = None - discovered_category = None - try: - discovered_func, discovered_category = discover_udf(code) - except OpenEoUdfException: - # If discover_udf fails, we'll use the original logic - pass + # This will give us better error messages if no UDF is found + discover_udf(code) + except OpenEoUdfException as e: + # Re-raise with the better error message + raise e - # Original logic unchanged (except for adding logging of discovered UDF) + # SECOND: Run the original logic unchanged module = load_module_from_string(code) functions = ((k, v) for (k, v) in module.items() if callable(v)) @@ -339,17 +330,8 @@ def run_udf_code(code: str, data: UdfData) -> UdfData: func(data) return data - # If we get here and discover_udf succeeded, log the inconsistency - if discovered_func and discovered_category: - _log.warning(f"discover_udf found UDF but run_udf_code didn't execute it") - - # Use discover_udf's error message if it failed - try: - discover_udf(code) # This will raise with better error message - except OpenEoUdfException as e: - raise e - except Exception: - raise OpenEoUdfException("No UDF found.") + # This should rarely be reached since discover_udf should have caught it + raise OpenEoUdfException("No UDF found.") def execute_local_udf( From 3bb0773b5056cc896654210a60e53a7c503aaa06 Mon Sep 17 00:00:00 2001 From: Hans Vanrompay Date: Mon, 12 Jan 2026 12:45:08 +0100 Subject: [PATCH 3/6] including unit tests --- tests/udf/test_run_code.py | 96 +++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/tests/udf/test_run_code.py b/tests/udf/test_run_code.py index 2579b574e..8d31aa48e 100644 --- a/tests/udf/test_run_code.py +++ b/tests/udf/test_run_code.py @@ -9,7 +9,8 @@ import xarray from openeo import UDF -from openeo.udf import UdfData, XarrayDataCube + +from openeo.udf import UdfData, XarrayDataCube, OpenEoUdfException from openeo.udf._compat import FlimsyTomlParser from openeo.udf.run_code import ( _annotation_is_pandas_series, @@ -19,6 +20,7 @@ execute_local_udf, extract_udf_dependencies, run_udf_code, + discover_udf, ) from .test_xarraydatacube import _build_xdc @@ -336,6 +338,98 @@ def _is_package_available(name: str) -> bool: # TODO: move this to a more general test utility module. return importlib.util.find_spec(name) is not None +class TestsDiscoverUdf: + + def test_discover_udf_empty_code(): + """Test: Code with no functions at all.""" + udf_code = textwrap.dedent(""" + # Just comments + x = 42 + y = "no functions here" + """) + + with pytest.raises(OpenEoUdfException) as exc_info: + discover_udf(udf_code) + + error_msg = str(exc_info.value) + assert "No function (apply_datacube, apply_timeseries, apply_vectorcube) found" in error_msg + assert "Expected signatures:" in error_msg + # Verify it lists all expected signatures + assert "apply_timeseries(series, context)" in error_msg + assert "apply_datacube(cube, context)" in error_msg + assert "apply_hypercube(cube, context)" in error_msg + assert "apply_vectorcube(geometries, cube, context)" in error_msg + + + def test_discover_udf_wrong_function_name(): + """Test: Functions exist but with wrong names.""" + udf_code = textwrap.dedent(""" + def process_data(cube, context): + return cube + + def my_custom_func(series, context): + return series + + def transform_vector(geometries, cube, context): + return geometries, cube + """) + + with pytest.raises(OpenEoUdfException) as exc_info: + discover_udf(udf_code) + + error_msg = str(exc_info.value) + # Should show what we expected vs what we got + assert "Expected signatures:" in error_msg + + + + def test_discover_udf_missing_required_param_timeseries(): + """Test: apply_timeseries missing 'series' parameter.""" + udf_code = textwrap.dedent(""" + import pandas as pd + + def apply_timeseries(context: dict) -> pd.Series: # Missing 'series' + return pd.Series() + """) + + with pytest.raises(OpenEoUdfException) as exc_info: + discover_udf(udf_code) + + error_msg = str(exc_info.value) + assert "Function 'apply_timeseries' failed validation" in error_msg + assert "Missing required parameter: 'series'" in error_msg + + + def test_discover_udf_missing_required_param_datacube(): + """Test: apply_datacube missing 'context' parameter.""" + udf_code = textwrap.dedent(""" + def apply_datacube(cube): # Missing 'context' + return cube + """) + + with pytest.raises(OpenEoUdfException) as exc_info: + discover_udf(udf_code) + + error_msg = str(exc_info.value) + assert "Function 'apply_datacube' failed validation" in error_msg + assert "Missing required parameter: 'context'" in error_msg + + + def test_discover_udf_missing_required_param_vectorcube(): + """Test: apply_vectorcube missing 'geometries' parameter.""" + udf_code = textwrap.dedent(""" + def apply_vectorcube(cube, context): # Missing 'geometries' + return cube + """) + + with pytest.raises(OpenEoUdfException) as exc_info: + discover_udf(udf_code) + + error_msg = str(exc_info.value) + assert "Function 'apply_vectorcube' failed validation" in error_msg + assert "Missing required parameter: 'geometries'" in error_msg + + class TestExtractUdfDependencies: From 4bc0116b99216d53a3bdf1791e8caa95be6eaac1 Mon Sep 17 00:00:00 2001 From: Hans Vanrompay Date: Mon, 12 Jan 2026 12:48:32 +0100 Subject: [PATCH 4/6] fix --- tests/udf/test_run_code.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/udf/test_run_code.py b/tests/udf/test_run_code.py index 8d31aa48e..4061feb4e 100644 --- a/tests/udf/test_run_code.py +++ b/tests/udf/test_run_code.py @@ -340,7 +340,7 @@ def _is_package_available(name: str) -> bool: class TestsDiscoverUdf: - def test_discover_udf_empty_code(): + def test_discover_udf_empty_code(self): """Test: Code with no functions at all.""" udf_code = textwrap.dedent(""" # Just comments @@ -361,7 +361,7 @@ def test_discover_udf_empty_code(): assert "apply_vectorcube(geometries, cube, context)" in error_msg - def test_discover_udf_wrong_function_name(): + def test_discover_udf_wrong_function_name(self): """Test: Functions exist but with wrong names.""" udf_code = textwrap.dedent(""" def process_data(cube, context): @@ -383,7 +383,7 @@ def transform_vector(geometries, cube, context): - def test_discover_udf_missing_required_param_timeseries(): + def test_discover_udf_missing_required_param_timeseries(self): """Test: apply_timeseries missing 'series' parameter.""" udf_code = textwrap.dedent(""" import pandas as pd @@ -400,7 +400,7 @@ def apply_timeseries(context: dict) -> pd.Series: # Missing 'series' assert "Missing required parameter: 'series'" in error_msg - def test_discover_udf_missing_required_param_datacube(): + def test_discover_udf_missing_required_param_datacube(self): """Test: apply_datacube missing 'context' parameter.""" udf_code = textwrap.dedent(""" def apply_datacube(cube): # Missing 'context' @@ -415,7 +415,7 @@ def apply_datacube(cube): # Missing 'context' assert "Missing required parameter: 'context'" in error_msg - def test_discover_udf_missing_required_param_vectorcube(): + def test_discover_udf_missing_required_param_vectorcube(self): """Test: apply_vectorcube missing 'geometries' parameter.""" udf_code = textwrap.dedent(""" def apply_vectorcube(cube, context): # Missing 'geometries' From f635ed4dc3496da69746f6a9cbc7350aaa4e2f82 Mon Sep 17 00:00:00 2001 From: Hans Vanrompay Date: Mon, 12 Jan 2026 15:46:34 +0100 Subject: [PATCH 5/6] more selective parsing --- openeo/udf/run_code.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py index 53bb57dfa..f6377511a 100644 --- a/openeo/udf/run_code.py +++ b/openeo/udf/run_code.py @@ -3,6 +3,8 @@ Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) """ +#%% + import functools import inspect import logging @@ -190,11 +192,24 @@ def discover_udf(code: str) -> Tuple[Callable, str]: Returns (function, category) """ module = load_module_from_string(code) - functions = {k: v for k, v in module.items() if callable(v)} + + functions = {} + for name, obj in module.items(): + if not callable(obj): + continue + # Exclude classes + if inspect.isclass(obj): + continue + # Exclude built-in functions + if not inspect.isfunction(obj): + continue + + functions[name] = obj if not functions: raise OpenEoUdfException("No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code.") + # Track all signature errors for better error messages signature_errors = [] @@ -427,3 +442,6 @@ def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray: ) return tomllib.loads(content).get("dependencies") + + + From 1c37d19de2b7c80e026ca925f191d908e4e414bd Mon Sep 17 00:00:00 2001 From: Hans Vanrompay Date: Mon, 12 Jan 2026 15:53:38 +0100 Subject: [PATCH 6/6] fix --- openeo/udf/run_code.py | 44 ++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py index f6377511a..1c4f9077e 100644 --- a/openeo/udf/run_code.py +++ b/openeo/udf/run_code.py @@ -164,6 +164,15 @@ def apply_timeseries_generic( udf_data.set_datacube_list(datacubes) return udf_data +def _build_expected_signatures_message() -> str: + """Build the 'expected signatures' part of error messages.""" + message = "\nExpected signatures:\n" + for cat_name, cat_info in UDF_CATEGORIES.items(): + for func_name in cat_info["function_names"]: + params = ", ".join(cat_info["required_params"]) + message += f" - {func_name}({params})\n" + message += " - Or a single-parameter function for generic UDF" + return message def validate_udf_signature(func: Callable, category: str) -> List[str]: @@ -192,27 +201,32 @@ def discover_udf(code: str) -> Tuple[Callable, str]: Returns (function, category) """ module = load_module_from_string(code) + signature_errors = [] + # Collect valid UDF candidate functions functions = {} for name, obj in module.items(): if not callable(obj): continue - # Exclude classes + # Exclude classes (e.g., XarrayDataCube) if inspect.isclass(obj): continue - # Exclude built-in functions + # Exclude built-in functions, keep only user-defined functions if not inspect.isfunction(obj): continue functions[name] = obj - if not functions: - raise OpenEoUdfException("No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code.") - + # Helper for building error messages + expected_sigs = _build_expected_signatures_message() - # Track all signature errors for better error messages - signature_errors = [] + if not functions: + # No functions found at all + error_msg = "No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code." + error_msg += expected_sigs + raise OpenEoUdfException(error_msg) + # Check each function for func_name, func in functions.items(): # Determine category based on function name category = None @@ -222,7 +236,7 @@ def discover_udf(code: str) -> Tuple[Callable, str]: break if category: - # Validate signature + # Validate signature for specific UDF type errors = validate_udf_signature(func, category) if not errors: _log.info(f"Found UDF '{func_name}' (category: {category})") @@ -235,19 +249,19 @@ def discover_udf(code: str) -> Tuple[Callable, str]: _log.info(f"Found generic UDF '{func_name}'") return func, "generic" - # Build comprehensive error message + # Functions were found, but none were valid error_msg = "No valid UDF found.\n" if signature_errors: error_msg += "\nSignature errors:\n" + "\n".join(f" - {e}" for e in signature_errors) + error_msg += "\n" - error_msg += "\n\nExpected signatures:\n" - for cat_name, cat_info in UDF_CATEGORIES.items(): - for func_name in cat_info["function_names"]: - params = ", ".join(cat_info["required_params"]) - error_msg += f" - {func_name}({params})\n" + # Optional: List the functions that were found but rejected + found_names = list(functions.keys()) + if found_names: + error_msg += f"\nFound functions (none valid): {', '.join(found_names)}\n" - error_msg += " - Or a single-parameter function for generic UDF" + error_msg += expected_sigs raise OpenEoUdfException(error_msg)