From 9c7411eeefb63a65e40b1db8146135335fcac776 Mon Sep 17 00:00:00 2001
From: Hans Vanrompay <hansvanrompay@hotmail.com>
Date: Mon, 12 Jan 2026 12:17:02 +0100
Subject: [PATCH 1/6] add a more concrete error message if possible

---
 openeo/udf/run_code.py | 124 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 119 insertions(+), 5 deletions(-)

diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py
index fc673dbed..d4c9f7208 100644
--- a/openeo/udf/run_code.py
+++ b/openeo/udf/run_code.py
@@ -9,7 +9,7 @@
 import math
 import pathlib
 import re
-from typing import Callable, List, Union
+from typing import Callable, List, Union, Tuple
 
 import numpy
 import pandas
@@ -28,6 +28,22 @@
 
 _log = logging.getLogger(__name__)
 
+# UDF specifications
+UDF_CATEGORIES = {
+    "timeseries": {
+        "function_names": ["apply_timeseries"],
+        "required_params": ["series", "context"],
+    },
+    "datacube": {
+        "function_names": ["apply_datacube", "apply_hypercube"],
+        "required_params": ["cube", "context"],
+    },
+    "vectorcube": {
+        "function_names": ["apply_vectorcube"],
+        "required_params": ["geometries", "cube", "context"],
+    }
+}
+
 
 def _build_default_execution_context():
     # TODO: is it really necessary to "pre-load" these modules? Isn't user going to import them explicitly in their script anyway?
@@ -147,8 +163,99 @@ def apply_timeseries_generic(
     return udf_data
 
 
+
+def validate_udf_signature(func: Callable, category: str) -> List[str]:
+    """
+    Returns a list of error messages. If empty, the signature is valid.
+    """
+    errors = []
+    try:
+        sig = inspect.signature(func)
+        params = sig.parameters
+    except Exception:
+        return ["Could not read function signature."]
+
+    # Check required parameters
+    required_params = UDF_CATEGORIES[category]["required_params"]
+    for param_name in required_params:
+        if param_name not in params:
+            errors.append(f"Missing required parameter: '{param_name}'")
+    
+    return errors
+
+
+def discover_udf(code: str) -> Tuple[Callable, str]:
+    """
+    Analyzes code and provides specific error feedback.
+    Returns (function, category)
+    """
+    module = load_module_from_string(code)
+    functions = {k: v for k, v in module.items() if callable(v)}
+    
+    if not functions:
+        raise OpenEoUdfException("No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code.")
+    
+    # Track all signature errors for better error messages
+    signature_errors = []
+    
+    for func_name, func in functions.items():
+        # Determine category based on function name
+        category = None
+        for cat_name, cat_info in UDF_CATEGORIES.items():
+            if func_name in cat_info["function_names"]:
+                category = cat_name
+                break
+        
+        if category:
+            # Validate signature
+            errors = validate_udf_signature(func, category)
+            if not errors:
+                _log.info(f"Found UDF '{func_name}' (category: {category})")
+                return func, category
+            else:
+                signature_errors.append(f"Function '{func_name}' failed validation: {', '.join(errors)}")
+        
+        # Check for generic UDF (single parameter)
+        elif len(inspect.signature(func).parameters) == 1:
+            _log.info(f"Found generic UDF '{func_name}'")
+            return func, "generic"
+    
+    # Build comprehensive error message
+    error_msg = "No valid UDF found.\n"
+    
+    if signature_errors:
+        error_msg += "\nSignature errors:\n" + "\n".join(f"  - {e}" for e in signature_errors)
+    
+    error_msg += "\n\nExpected signatures:\n"
+    for cat_name, cat_info in UDF_CATEGORIES.items():
+        for func_name in cat_info["function_names"]:
+            params = ", ".join(cat_info["required_params"])
+            error_msg += f"  - {func_name}({params})\n"
+    
+    error_msg += "  - Or a single-parameter function for generic UDF"
+    
+    raise OpenEoUdfException(error_msg)
+
+
 def run_udf_code(code: str, data: UdfData) -> UdfData:
+    # Main orchestrator;  
+
+    # Use discover_udf to get better error messages, but fall back to original logic
+    # if discover_udf fails (for backward compatibility)
+
     # TODO: current implementation uses first match directly, first check for multiple matches?
+
+    
+    discovered_func = None
+    discovered_category = None
+    
+    try:
+        discovered_func, discovered_category = discover_udf(code)
+    except OpenEoUdfException:
+        # If discover_udf fails, we'll use the original logic
+        pass
+    
+    # Original logic unchanged (except for adding logging of discovered UDF)
     module = load_module_from_string(code)
     functions = ((k, v) for (k, v) in module.items() if callable(v))
 
@@ -179,7 +286,6 @@ def run_udf_code(code: str, data: UdfData) -> UdfData:
                 raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format(
                     c=len(data.get_datacube_list())
                 ))
-            # TODO: also support calls without user context?
             result_cube = func(cube=data.get_datacube_list()[0], context=data.user_context)
             data.set_datacube_list([result_cube])
             return data
@@ -194,7 +300,6 @@ def run_udf_code(code: str, data: UdfData) -> UdfData:
                 raise ValueError("The provided UDF expects exactly one datacube, but {c} were provided.".format(
                     c=len(data.get_datacube_list())
                 ))
-            # TODO: also support calls without user context?
             result_cube: xarray.DataArray = func(cube=data.get_datacube_list()[0].get_array(), context=data.user_context)
             data.set_datacube_list([XarrayDataCube(result_cube)])
             return data
@@ -223,7 +328,6 @@ def run_udf_code(code: str, data: UdfData) -> UdfData:
                         c=len(data.get_datacube_list())
                     )
                 )
-            # TODO: geopandas is optional dependency.
             input_geoms = data.get_feature_collection_list()[0].data
             input_cube = data.get_datacube_list()[0].get_array()
             result_geoms, result_cube = func(geometries=input_geoms, cube=input_cube, context=data.user_context)
@@ -235,7 +339,17 @@ def run_udf_code(code: str, data: UdfData) -> UdfData:
             func(data)
             return data
 
-    raise OpenEoUdfException("No UDF found.")
+    # If we get here and discover_udf succeeded, log the inconsistency
+    if discovered_func and discovered_category:
+        _log.warning(f"discover_udf found UDF but run_udf_code didn't execute it")
+    
+    # Use discover_udf's error message if it failed
+    try:
+        discover_udf(code)  # This will raise with better error message
+    except OpenEoUdfException as e:
+        raise e
+    except Exception:
+        raise OpenEoUdfException("No UDF found.")
 
 
 def execute_local_udf(

From 9878f6701ae009ec6098a4c8de4d87bec12c535c Mon Sep 17 00:00:00 2001
From: Hans Vanrompay <hansvanrompay@hotmail.com>
Date: Mon, 12 Jan 2026 12:32:34 +0100
Subject: [PATCH 2/6] add a more concrete error message if possible

---
 openeo/udf/run_code.py | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py
index d4c9f7208..53bb57dfa 100644
--- a/openeo/udf/run_code.py
+++ b/openeo/udf/run_code.py
@@ -238,24 +238,15 @@ def discover_udf(code: str) -> Tuple[Callable, str]:
 
 
 def run_udf_code(code: str, data: UdfData) -> UdfData:
-    # Main orchestrator;  
-
-    # Use discover_udf to get better error messages, but fall back to original logic
-    # if discover_udf fails (for backward compatibility)
-
     # TODO: current implementation uses first match directly, first check for multiple matches?
-
-    
-    discovered_func = None
-    discovered_category = None
-    
     try:
-        discovered_func, discovered_category = discover_udf(code)
-    except OpenEoUdfException:
-        # If discover_udf fails, we'll use the original logic
-        pass
+        # This will give us better error messages if no UDF is found
+        discover_udf(code)
+    except OpenEoUdfException as e:
+        # Re-raise with the better error message
+        raise e
     
-    # Original logic unchanged (except for adding logging of discovered UDF)
+    # SECOND: Run the original logic unchanged
     module = load_module_from_string(code)
     functions = ((k, v) for (k, v) in module.items() if callable(v))
 
@@ -339,17 +330,8 @@ def run_udf_code(code: str, data: UdfData) -> UdfData:
             func(data)
             return data
 
-    # If we get here and discover_udf succeeded, log the inconsistency
-    if discovered_func and discovered_category:
-        _log.warning(f"discover_udf found UDF but run_udf_code didn't execute it")
-    
-    # Use discover_udf's error message if it failed
-    try:
-        discover_udf(code)  # This will raise with better error message
-    except OpenEoUdfException as e:
-        raise e
-    except Exception:
-        raise OpenEoUdfException("No UDF found.")
+    # This should rarely be reached since discover_udf should have caught it
+    raise OpenEoUdfException("No UDF found.")
 
 
 def execute_local_udf(

From 3bb0773b5056cc896654210a60e53a7c503aaa06 Mon Sep 17 00:00:00 2001
From: Hans Vanrompay <hansvanrompay@hotmail.com>
Date: Mon, 12 Jan 2026 12:45:08 +0100
Subject: [PATCH 3/6] including unit tests

---
 tests/udf/test_run_code.py | 96 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 95 insertions(+), 1 deletion(-)

diff --git a/tests/udf/test_run_code.py b/tests/udf/test_run_code.py
index 2579b574e..8d31aa48e 100644
--- a/tests/udf/test_run_code.py
+++ b/tests/udf/test_run_code.py
@@ -9,7 +9,8 @@
 import xarray
 
 from openeo import UDF
-from openeo.udf import UdfData, XarrayDataCube
+
+from openeo.udf import UdfData, XarrayDataCube, OpenEoUdfException
 from openeo.udf._compat import FlimsyTomlParser
 from openeo.udf.run_code import (
     _annotation_is_pandas_series,
@@ -19,6 +20,7 @@
     execute_local_udf,
     extract_udf_dependencies,
     run_udf_code,
+    discover_udf,
 )
 
 from .test_xarraydatacube import _build_xdc
@@ -336,6 +338,98 @@ def _is_package_available(name: str) -> bool:
     # TODO: move this to a more general test utility module.
     return importlib.util.find_spec(name) is not None
 
+class TestsDiscoverUdf:
+
+    def test_discover_udf_empty_code():
+        """Test: Code with no functions at all."""
+        udf_code = textwrap.dedent("""
+            # Just comments
+            x = 42
+            y = "no functions here"
+        """)
+        
+        with pytest.raises(OpenEoUdfException) as exc_info:
+            discover_udf(udf_code)
+        
+        error_msg = str(exc_info.value)
+        assert "No function (apply_datacube, apply_timeseries, apply_vectorcube) found" in error_msg
+        assert "Expected signatures:" in error_msg
+        # Verify it lists all expected signatures
+        assert "apply_timeseries(series, context)" in error_msg
+        assert "apply_datacube(cube, context)" in error_msg
+        assert "apply_hypercube(cube, context)" in error_msg
+        assert "apply_vectorcube(geometries, cube, context)" in error_msg
+
+
+    def test_discover_udf_wrong_function_name():
+        """Test: Functions exist but with wrong names."""
+        udf_code = textwrap.dedent("""
+            def process_data(cube, context):
+                return cube
+                
+            def my_custom_func(series, context):
+                return series
+                
+            def transform_vector(geometries, cube, context):
+                return geometries, cube
+        """)
+        
+        with pytest.raises(OpenEoUdfException) as exc_info:
+            discover_udf(udf_code)
+        
+        error_msg = str(exc_info.value)
+        # Should show what we expected vs what we got
+        assert "Expected signatures:" in error_msg
+
+
+
+    def test_discover_udf_missing_required_param_timeseries():
+        """Test: apply_timeseries missing 'series' parameter."""
+        udf_code = textwrap.dedent("""
+            import pandas as pd
+            
+            def apply_timeseries(context: dict) -> pd.Series:  # Missing 'series'
+                return pd.Series()
+        """)
+        
+        with pytest.raises(OpenEoUdfException) as exc_info:
+            discover_udf(udf_code)
+        
+        error_msg = str(exc_info.value)
+        assert "Function 'apply_timeseries' failed validation" in error_msg
+        assert "Missing required parameter: 'series'" in error_msg
+
+
+    def test_discover_udf_missing_required_param_datacube():
+        """Test: apply_datacube missing 'context' parameter."""
+        udf_code = textwrap.dedent("""
+            def apply_datacube(cube):  # Missing 'context'
+                return cube
+        """)
+        
+        with pytest.raises(OpenEoUdfException) as exc_info:
+            discover_udf(udf_code)
+        
+        error_msg = str(exc_info.value)
+        assert "Function 'apply_datacube' failed validation" in error_msg
+        assert "Missing required parameter: 'context'" in error_msg
+
+
+    def test_discover_udf_missing_required_param_vectorcube():
+        """Test: apply_vectorcube missing 'geometries' parameter."""
+        udf_code = textwrap.dedent("""
+            def apply_vectorcube(cube, context):  # Missing 'geometries'
+                return cube
+        """)
+        
+        with pytest.raises(OpenEoUdfException) as exc_info:
+            discover_udf(udf_code)
+        
+        error_msg = str(exc_info.value)
+        assert "Function 'apply_vectorcube' failed validation" in error_msg
+        assert "Missing required parameter: 'geometries'" in error_msg
+
+
 
 class TestExtractUdfDependencies:
 

From 4bc0116b99216d53a3bdf1791e8caa95be6eaac1 Mon Sep 17 00:00:00 2001
From: Hans Vanrompay <hansvanrompay@hotmail.com>
Date: Mon, 12 Jan 2026 12:48:32 +0100
Subject: [PATCH 4/6] fix

---
 tests/udf/test_run_code.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/udf/test_run_code.py b/tests/udf/test_run_code.py
index 8d31aa48e..4061feb4e 100644
--- a/tests/udf/test_run_code.py
+++ b/tests/udf/test_run_code.py
@@ -340,7 +340,7 @@ def _is_package_available(name: str) -> bool:
 
 class TestsDiscoverUdf:
 
-    def test_discover_udf_empty_code():
+    def test_discover_udf_empty_code(self):
         """Test: Code with no functions at all."""
         udf_code = textwrap.dedent("""
             # Just comments
@@ -361,7 +361,7 @@ def test_discover_udf_empty_code():
         assert "apply_vectorcube(geometries, cube, context)" in error_msg
 
 
-    def test_discover_udf_wrong_function_name():
+    def test_discover_udf_wrong_function_name(self):
         """Test: Functions exist but with wrong names."""
         udf_code = textwrap.dedent("""
             def process_data(cube, context):
@@ -383,7 +383,7 @@ def transform_vector(geometries, cube, context):
 
 
 
-    def test_discover_udf_missing_required_param_timeseries():
+    def test_discover_udf_missing_required_param_timeseries(self):
         """Test: apply_timeseries missing 'series' parameter."""
         udf_code = textwrap.dedent("""
             import pandas as pd
@@ -400,7 +400,7 @@ def apply_timeseries(context: dict) -> pd.Series:  # Missing 'series'
         assert "Missing required parameter: 'series'" in error_msg
 
 
-    def test_discover_udf_missing_required_param_datacube():
+    def test_discover_udf_missing_required_param_datacube(self):
         """Test: apply_datacube missing 'context' parameter."""
         udf_code = textwrap.dedent("""
             def apply_datacube(cube):  # Missing 'context'
@@ -415,7 +415,7 @@ def apply_datacube(cube):  # Missing 'context'
         assert "Missing required parameter: 'context'" in error_msg
 
 
-    def test_discover_udf_missing_required_param_vectorcube():
+    def test_discover_udf_missing_required_param_vectorcube(self):
         """Test: apply_vectorcube missing 'geometries' parameter."""
         udf_code = textwrap.dedent("""
             def apply_vectorcube(cube, context):  # Missing 'geometries'

From f635ed4dc3496da69746f6a9cbc7350aaa4e2f82 Mon Sep 17 00:00:00 2001
From: Hans Vanrompay <hansvanrompay@hotmail.com>
Date: Mon, 12 Jan 2026 15:46:34 +0100
Subject: [PATCH 5/6] more selective parsing

---
 openeo/udf/run_code.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py
index 53bb57dfa..f6377511a 100644
--- a/openeo/udf/run_code.py
+++ b/openeo/udf/run_code.py
@@ -3,6 +3,8 @@
 Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf)
 """
 
+#%%
+
 import functools
 import inspect
 import logging
@@ -190,11 +192,24 @@ def discover_udf(code: str) -> Tuple[Callable, str]:
     Returns (function, category)
     """
     module = load_module_from_string(code)
-    functions = {k: v for k, v in module.items() if callable(v)}
+    
+    functions = {}
+    for name, obj in module.items():
+        if not callable(obj):
+            continue
+        # Exclude classes
+        if inspect.isclass(obj):
+            continue
+        # Exclude built-in functions
+        if not inspect.isfunction(obj):
+            continue
+        
+        functions[name] = obj
     
     if not functions:
         raise OpenEoUdfException("No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code.")
     
+    
     # Track all signature errors for better error messages
     signature_errors = []
     
@@ -427,3 +442,6 @@ def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
     )
 
     return tomllib.loads(content).get("dependencies")
+
+
+

From 1c37d19de2b7c80e026ca925f191d908e4e414bd Mon Sep 17 00:00:00 2001
From: Hans Vanrompay <hansvanrompay@hotmail.com>
Date: Mon, 12 Jan 2026 15:53:38 +0100
Subject: [PATCH 6/6] fix

---
 openeo/udf/run_code.py | 44 ++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/openeo/udf/run_code.py b/openeo/udf/run_code.py
index f6377511a..1c4f9077e 100644
--- a/openeo/udf/run_code.py
+++ b/openeo/udf/run_code.py
@@ -164,6 +164,15 @@ def apply_timeseries_generic(
     udf_data.set_datacube_list(datacubes)
     return udf_data
 
+def _build_expected_signatures_message() -> str:
+    """Build the 'expected signatures' part of error messages."""
+    message = "\nExpected signatures:\n"
+    for cat_name, cat_info in UDF_CATEGORIES.items():
+        for func_name in cat_info["function_names"]:
+            params = ", ".join(cat_info["required_params"])
+            message += f"  - {func_name}({params})\n"
+    message += "  - Or a single-parameter function for generic UDF"
+    return message
 
 
 def validate_udf_signature(func: Callable, category: str) -> List[str]:
@@ -192,27 +201,32 @@ def discover_udf(code: str) -> Tuple[Callable, str]:
     Returns (function, category)
     """
     module = load_module_from_string(code)
+    signature_errors = []
     
+    # Collect valid UDF candidate functions
     functions = {}
     for name, obj in module.items():
         if not callable(obj):
             continue
-        # Exclude classes
+        # Exclude classes (e.g., XarrayDataCube)
         if inspect.isclass(obj):
             continue
-        # Exclude built-in functions
+        # Exclude built-in functions, keep only user-defined functions
         if not inspect.isfunction(obj):
             continue
         
         functions[name] = obj
     
-    if not functions:
-        raise OpenEoUdfException("No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code.")
-    
+    # Helper for building error messages
+    expected_sigs = _build_expected_signatures_message()
     
-    # Track all signature errors for better error messages
-    signature_errors = []
+    if not functions:
+        # No functions found at all
+        error_msg = "No function (apply_datacube, apply_timeseries, apply_vectorcube) found in UDF code."
+        error_msg += expected_sigs
+        raise OpenEoUdfException(error_msg)
     
+    # Check each function
     for func_name, func in functions.items():
         # Determine category based on function name
         category = None
@@ -222,7 +236,7 @@ def discover_udf(code: str) -> Tuple[Callable, str]:
                 break
         
         if category:
-            # Validate signature
+            # Validate signature for specific UDF type
             errors = validate_udf_signature(func, category)
             if not errors:
                 _log.info(f"Found UDF '{func_name}' (category: {category})")
@@ -235,19 +249,19 @@ def discover_udf(code: str) -> Tuple[Callable, str]:
             _log.info(f"Found generic UDF '{func_name}'")
             return func, "generic"
     
-    # Build comprehensive error message
+    # Functions were found, but none were valid
     error_msg = "No valid UDF found.\n"
     
     if signature_errors:
         error_msg += "\nSignature errors:\n" + "\n".join(f"  - {e}" for e in signature_errors)
+        error_msg += "\n"
     
-    error_msg += "\n\nExpected signatures:\n"
-    for cat_name, cat_info in UDF_CATEGORIES.items():
-        for func_name in cat_info["function_names"]:
-            params = ", ".join(cat_info["required_params"])
-            error_msg += f"  - {func_name}({params})\n"
+    # Optional: List the functions that were found but rejected
+    found_names = list(functions.keys())
+    if found_names:
+        error_msg += f"\nFound functions (none valid): {', '.join(found_names)}\n"
     
-    error_msg += "  - Or a single-parameter function for generic UDF"
+    error_msg += expected_sigs
     
     raise OpenEoUdfException(error_msg)