From b296a79f0801a28f8c7153b68bae9e0564b224ce Mon Sep 17 00:00:00 2001 From: Anipik Date: Tue, 13 Jan 2026 15:31:35 -0800 Subject: [PATCH 1/2] feat: add live tracking for evaluation spans --- src/uipath/_cli/_evals/_runtime.py | 81 ++++- .../eval/test_live_tracking_span_processor.py | 292 ++++++++++++++++++ 2 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 tests/cli/eval/test_live_tracking_span_processor.py diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index b9fa8e9b7..9e130b9a5 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -18,8 +18,11 @@ import coverage from opentelemetry import context as context_api -from opentelemetry.sdk.trace import ReadableSpan, Span -from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult +from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor +from opentelemetry.sdk.trace.export import ( + SpanExporter, + SpanExportResult, +) from opentelemetry.trace import Status, StatusCode from pydantic import BaseModel from uipath.core.tracing import UiPathTraceManager @@ -47,6 +50,7 @@ from uipath._cli._evals.mocks.input_mocker import ( generate_llm_input, ) +from uipath.tracing import LlmOpsHttpExporter, SpanStatus from ..._events._event_bus import EventBus from ..._events._events import ( @@ -155,6 +159,73 @@ def on_start( self.collector.add_span(span, exec_id) +class LiveTrackingSpanProcessor(SpanProcessor): + """Span processor for live span tracking using upsert_span API. + + Sends real-time span updates: + - On span start: Upsert with RUNNING status + - On span end: Upsert with final status (OK/ERROR) + """ + + def __init__(self, exporter: LlmOpsHttpExporter): + self.exporter = exporter + self.span_status = SpanStatus + + def on_start( + self, span: Span, parent_context: context_api.Context | None = None + ) -> None: + """Called when span starts - upsert with RUNNING status.""" + # Only track evaluation-related spans + if span.attributes and self._is_eval_span(span): + try: + self.exporter.upsert_span( + span, status_override=self.span_status.RUNNING + ) + except Exception as e: + logger.debug(f"Failed to upsert span on start: {e}") + + def on_end(self, span: ReadableSpan) -> None: + """Called when span ends - upsert with final status.""" + # Only track evaluation-related spans + if span.attributes and self._is_eval_span(span): + try: + self.exporter.upsert_span(span) + except Exception as e: + logger.debug(f"Failed to upsert span on end: {e}") + + def _is_eval_span(self, span: Span | ReadableSpan) -> bool: + """Check if span is evaluation-related.""" + if not span.attributes: + return False + + span_type = span.attributes.get("span_type") + # Track eval-related span types + eval_span_types = { + "eval", + "evaluator", + "evaluation", + "eval_set_run", + "evalOutput", + } + + if span_type in eval_span_types: + return True + + # Also track spans with execution.id (eval executions) + if "execution.id" in span.attributes: + return True + + return False + + def shutdown(self) -> None: + """Shutdown the processor.""" + pass + + def force_flush(self, timeout_millis: int = 30000) -> bool: + """Force flush - no-op for live tracking.""" + return True + + class ExecutionLogsExporter: """Custom exporter that stores multiple execution log handlers.""" @@ -217,6 +288,12 @@ def __init__( self.trace_manager.tracer_span_processors.append(span_processor) self.trace_manager.tracer_provider.add_span_processor(span_processor) + # Live tracking processor for real-time span updates + live_tracking_exporter = LlmOpsHttpExporter() + live_tracking_processor = LiveTrackingSpanProcessor(live_tracking_exporter) + self.trace_manager.tracer_span_processors.append(live_tracking_processor) + self.trace_manager.tracer_provider.add_span_processor(live_tracking_processor) + self.logs_exporter: ExecutionLogsExporter = ExecutionLogsExporter() self.execution_id = str(uuid.uuid4()) self.coverage = coverage.Coverage(branch=True) diff --git a/tests/cli/eval/test_live_tracking_span_processor.py b/tests/cli/eval/test_live_tracking_span_processor.py new file mode 100644 index 000000000..3adf3e658 --- /dev/null +++ b/tests/cli/eval/test_live_tracking_span_processor.py @@ -0,0 +1,292 @@ +"""Tests for LiveTrackingSpanProcessor in _runtime.py.""" + +from typing import Any +from unittest.mock import Mock + +import pytest +from opentelemetry import context as context_api +from opentelemetry.sdk.trace import ReadableSpan, Span + +from uipath._cli._evals._runtime import LiveTrackingSpanProcessor +from uipath.tracing import SpanStatus + + +class TestLiveTrackingSpanProcessor: + """Test suite for LiveTrackingSpanProcessor.""" + + @pytest.fixture + def mock_exporter(self): + """Create a mock LlmOpsHttpExporter.""" + exporter = Mock() + exporter.upsert_span = Mock() + return exporter + + @pytest.fixture + def processor(self, mock_exporter): + """Create a LiveTrackingSpanProcessor with mock exporter.""" + return LiveTrackingSpanProcessor(mock_exporter) + + def create_mock_span(self, attributes: dict[str, Any] | None = None): + """Create a mock span with attributes.""" + span = Mock(spec=Span) + span.attributes = attributes or {} + return span + + def create_mock_readable_span(self, attributes: dict[str, Any] | None = None): + """Create a mock ReadableSpan with attributes.""" + span = Mock(spec=ReadableSpan) + span.attributes = attributes or {} + return span + + def test_init(self, mock_exporter): + """Test processor initialization.""" + processor = LiveTrackingSpanProcessor(mock_exporter) + + assert processor.exporter == mock_exporter + assert processor.span_status == SpanStatus + + def test_on_start_with_eval_span_type(self, processor, mock_exporter): + """Test on_start is called for eval span type.""" + span = self.create_mock_span({"span_type": "eval"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_on_start_with_evaluator_span_type(self, processor, mock_exporter): + """Test on_start is called for evaluator span type.""" + span = self.create_mock_span({"span_type": "evaluator"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_on_start_with_evaluation_span_type(self, processor, mock_exporter): + """Test on_start is called for evaluation span type.""" + span = self.create_mock_span({"span_type": "evaluation"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_on_start_with_eval_set_run_span_type(self, processor, mock_exporter): + """Test on_start is called for eval_set_run span type.""" + span = self.create_mock_span({"span_type": "eval_set_run"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_on_start_with_eval_output_span_type(self, processor, mock_exporter): + """Test on_start is called for evalOutput span type.""" + span = self.create_mock_span({"span_type": "evalOutput"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_on_start_with_execution_id(self, processor, mock_exporter): + """Test on_start is called for span with execution.id.""" + span = self.create_mock_span({"execution.id": "test-exec-id"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_on_start_with_non_eval_span(self, processor, mock_exporter): + """Test on_start is NOT called for non-eval spans.""" + span = self.create_mock_span({"span_type": "agent"}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_not_called() + + def test_on_start_with_no_attributes(self, processor, mock_exporter): + """Test on_start is NOT called when span has no attributes.""" + span = self.create_mock_span(None) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_not_called() + + def test_on_start_with_empty_attributes(self, processor, mock_exporter): + """Test on_start is NOT called when span has empty attributes.""" + span = self.create_mock_span({}) + + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_not_called() + + def test_on_start_exception_handling(self, processor, mock_exporter): + """Test on_start handles exceptions gracefully.""" + span = self.create_mock_span({"span_type": "eval"}) + mock_exporter.upsert_span.side_effect = Exception("Network error") + + # Should not raise exception + processor.on_start(span, None) + + mock_exporter.upsert_span.assert_called_once() + + def test_on_end_with_eval_span_type(self, processor, mock_exporter): + """Test on_end is called for eval span type.""" + span = self.create_mock_readable_span({"span_type": "eval"}) + + processor.on_end(span) + + mock_exporter.upsert_span.assert_called_once_with(span) + + def test_on_end_with_evaluator_span_type(self, processor, mock_exporter): + """Test on_end is called for evaluator span type.""" + span = self.create_mock_readable_span({"span_type": "evaluator"}) + + processor.on_end(span) + + mock_exporter.upsert_span.assert_called_once_with(span) + + def test_on_end_with_evaluation_span_type(self, processor, mock_exporter): + """Test on_end is called for evaluation span type.""" + span = self.create_mock_readable_span({"span_type": "evaluation"}) + + processor.on_end(span) + + mock_exporter.upsert_span.assert_called_once_with(span) + + def test_on_end_with_execution_id(self, processor, mock_exporter): + """Test on_end is called for span with execution.id.""" + span = self.create_mock_readable_span({"execution.id": "test-exec-id"}) + + processor.on_end(span) + + mock_exporter.upsert_span.assert_called_once_with(span) + + def test_on_end_with_non_eval_span(self, processor, mock_exporter): + """Test on_end is NOT called for non-eval spans.""" + span = self.create_mock_readable_span({"span_type": "agent"}) + + processor.on_end(span) + + mock_exporter.upsert_span.assert_not_called() + + def test_on_end_with_no_attributes(self, processor, mock_exporter): + """Test on_end is NOT called when span has no attributes.""" + span = self.create_mock_readable_span(None) + + processor.on_end(span) + + mock_exporter.upsert_span.assert_not_called() + + def test_on_end_exception_handling(self, processor, mock_exporter): + """Test on_end handles exceptions gracefully.""" + span = self.create_mock_readable_span({"span_type": "eval"}) + mock_exporter.upsert_span.side_effect = Exception("Network error") + + # Should not raise exception + processor.on_end(span) + + mock_exporter.upsert_span.assert_called_once() + + def test_is_eval_span_with_eval_type(self, processor): + """Test _is_eval_span returns True for eval span type.""" + span = self.create_mock_span({"span_type": "eval"}) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_evaluator_type(self, processor): + """Test _is_eval_span returns True for evaluator span type.""" + span = self.create_mock_span({"span_type": "evaluator"}) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_evaluation_type(self, processor): + """Test _is_eval_span returns True for evaluation span type.""" + span = self.create_mock_span({"span_type": "evaluation"}) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_eval_set_run_type(self, processor): + """Test _is_eval_span returns True for eval_set_run span type.""" + span = self.create_mock_span({"span_type": "eval_set_run"}) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_eval_output_type(self, processor): + """Test _is_eval_span returns True for evalOutput span type.""" + span = self.create_mock_span({"span_type": "evalOutput"}) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_execution_id(self, processor): + """Test _is_eval_span returns True for span with execution.id.""" + span = self.create_mock_span({"execution.id": "test-id"}) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_both_criteria(self, processor): + """Test _is_eval_span returns True when both criteria match.""" + span = self.create_mock_span( + {"span_type": "evaluation", "execution.id": "test-id"} + ) + assert processor._is_eval_span(span) is True + + def test_is_eval_span_with_non_eval_type(self, processor): + """Test _is_eval_span returns False for non-eval span type.""" + span = self.create_mock_span({"span_type": "agent"}) + assert processor._is_eval_span(span) is False + + def test_is_eval_span_with_no_attributes(self, processor): + """Test _is_eval_span returns False when span has no attributes.""" + span = self.create_mock_span(None) + assert processor._is_eval_span(span) is False + + def test_is_eval_span_with_empty_attributes(self, processor): + """Test _is_eval_span returns False when span has empty attributes.""" + span = self.create_mock_span({}) + assert processor._is_eval_span(span) is False + + def test_shutdown(self, processor): + """Test shutdown method.""" + # Should not raise exception + processor.shutdown() + + def test_force_flush(self, processor): + """Test force_flush method.""" + result = processor.force_flush() + assert result is True + + def test_force_flush_with_timeout(self, processor): + """Test force_flush with custom timeout.""" + result = processor.force_flush(timeout_millis=5000) + assert result is True + + def test_on_start_with_parent_context(self, processor, mock_exporter): + """Test on_start with parent context.""" + span = self.create_mock_span({"span_type": "eval"}) + parent_context = Mock(spec=context_api.Context) + + processor.on_start(span, parent_context) + + mock_exporter.upsert_span.assert_called_once_with( + span, status_override=SpanStatus.RUNNING + ) + + def test_processor_handles_all_eval_span_types(self, processor): + """Test that all eval span types are properly detected.""" + eval_span_types = [ + "eval", + "evaluator", + "evaluation", + "eval_set_run", + "evalOutput", + ] + + for span_type in eval_span_types: + span = self.create_mock_span({"span_type": span_type}) + assert processor._is_eval_span(span) is True, ( + f"Failed for span_type: {span_type}" + ) From 61f77021346ec2ea807c29014ef4e052dbdc270b Mon Sep 17 00:00:00 2001 From: Anipik Date: Wed, 14 Jan 2026 10:17:15 -0800 Subject: [PATCH 2/2] chore: bump version to 2.4.20 --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7b3035833..3da530fc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.4.19" +version = "2.4.20" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/uv.lock b/uv.lock index f3155edd9..81f311af3 100644 --- a/uv.lock +++ b/uv.lock @@ -2486,7 +2486,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.4.19" +version = "2.4.20" source = { editable = "." } dependencies = [ { name = "applicationinsights" },