From 2895fcd23a325ee6bf2cbc96c48abbddf3134393 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 14 Jan 2026 21:43:30 +0100 Subject: [PATCH 01/24] feat(ai): add OpenAI Agents SDK integration Add PostHogTracingProcessor that implements the OpenAI Agents SDK TracingProcessor interface to capture agent traces in PostHog. - Maps GenerationSpanData to $ai_generation events - Maps FunctionSpanData, AgentSpanData, HandoffSpanData, GuardrailSpanData to $ai_span events with appropriate types - Supports privacy mode, groups, and custom properties - Includes instrument() helper for one-liner setup - 22 unit tests covering all span types --- posthog/ai/openai_agents/__init__.py | 67 ++ posthog/ai/openai_agents/processor.py | 624 ++++++++++++++++++ posthog/test/ai/openai_agents/__init__.py | 1 + .../test/ai/openai_agents/test_processor.py | 413 ++++++++++++ 4 files changed, 1105 insertions(+) create mode 100644 posthog/ai/openai_agents/__init__.py create mode 100644 posthog/ai/openai_agents/processor.py create mode 100644 posthog/test/ai/openai_agents/__init__.py create mode 100644 posthog/test/ai/openai_agents/test_processor.py diff --git a/posthog/ai/openai_agents/__init__.py b/posthog/ai/openai_agents/__init__.py new file mode 100644 index 00000000..49e4186e --- /dev/null +++ b/posthog/ai/openai_agents/__init__.py @@ -0,0 +1,67 @@ +try: + import agents # noqa: F401 +except ImportError: + raise ModuleNotFoundError( + "Please install the OpenAI Agents SDK to use this feature: 'pip install openai-agents'" + ) + +from posthog.ai.openai_agents.processor import PostHogTracingProcessor + +__all__ = ["PostHogTracingProcessor", "instrument"] + + +def instrument( + client=None, + distinct_id=None, + privacy_mode: bool = False, + groups=None, + properties=None, +): + """ + One-liner to instrument OpenAI Agents SDK with PostHog tracing. + + This registers a PostHogTracingProcessor with the OpenAI Agents SDK, + automatically capturing traces, spans, and LLM generations. + + Args: + client: Optional PostHog client instance. If not provided, uses the default client. + distinct_id: Optional distinct ID to associate with all traces. + Can also be a callable that takes a trace and returns a distinct ID. + privacy_mode: If True, redacts input/output content from events. + groups: Optional PostHog groups to associate with events. + properties: Optional additional properties to include with all events. + + Returns: + PostHogTracingProcessor: The registered processor instance. + + Example: + ```python + from posthog.ai.openai_agents import instrument + + # Simple setup + instrument(distinct_id="user@example.com") + + # With custom properties + instrument( + distinct_id="user@example.com", + privacy_mode=True, + properties={"environment": "production"} + ) + + # Now run agents as normal - traces automatically sent to PostHog + from agents import Agent, Runner + agent = Agent(name="Assistant", instructions="You are helpful.") + result = Runner.run_sync(agent, "Hello!") + ``` + """ + from agents.tracing import add_trace_processor + + processor = PostHogTracingProcessor( + client=client, + distinct_id=distinct_id, + privacy_mode=privacy_mode, + groups=groups, + properties=properties, + ) + add_trace_processor(processor) + return processor diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py new file mode 100644 index 00000000..34f84556 --- /dev/null +++ b/posthog/ai/openai_agents/processor.py @@ -0,0 +1,624 @@ +import json +import logging +import time +from datetime import datetime +from typing import Any, Callable, Dict, Optional, Union + +from agents.tracing import Span, Trace +from agents.tracing.processor_interface import TracingProcessor +from agents.tracing.span_data import ( + AgentSpanData, + CustomSpanData, + FunctionSpanData, + GenerationSpanData, + GuardrailSpanData, + HandoffSpanData, + MCPListToolsSpanData, + ResponseSpanData, + SpeechGroupSpanData, + SpeechSpanData, + TranscriptionSpanData, +) + +from posthog import setup +from posthog.client import Client + +log = logging.getLogger("posthog") + + +def _safe_json(obj: Any) -> Any: + """Safely convert object to JSON-serializable format.""" + if obj is None: + return None + try: + json.dumps(obj) + return obj + except (TypeError, ValueError): + return str(obj) + + +def _parse_iso_timestamp(iso_str: Optional[str]) -> Optional[float]: + """Parse ISO timestamp to Unix timestamp.""" + if not iso_str: + return None + try: + dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00")) + return dt.timestamp() + except (ValueError, AttributeError): + return None + + +class PostHogTracingProcessor(TracingProcessor): + """ + A tracing processor that sends OpenAI Agents SDK traces to PostHog. + + This processor implements the TracingProcessor interface from the OpenAI Agents SDK + and maps agent traces, spans, and generations to PostHog's LLM analytics events. + + Example: + ```python + from agents import Agent, Runner + from agents.tracing import add_trace_processor + from posthog.ai.openai_agents import PostHogTracingProcessor + + # Create and register the processor + processor = PostHogTracingProcessor( + distinct_id="user@example.com", + privacy_mode=False, + ) + add_trace_processor(processor) + + # Run agents as normal - traces automatically sent to PostHog + agent = Agent(name="Assistant", instructions="You are helpful.") + result = Runner.run_sync(agent, "Hello!") + ``` + """ + + def __init__( + self, + client: Optional[Client] = None, + distinct_id: Optional[Union[str, Callable[[Trace], Optional[str]]]] = None, + privacy_mode: bool = False, + groups: Optional[Dict[str, Any]] = None, + properties: Optional[Dict[str, Any]] = None, + ): + """ + Initialize the PostHog tracing processor. + + Args: + client: Optional PostHog client instance. If not provided, uses the default client. + distinct_id: Either a string distinct ID or a callable that takes a Trace + and returns a distinct ID. If not provided, uses the trace_id. + privacy_mode: If True, redacts input/output content from events. + groups: Optional PostHog groups to associate with all events. + properties: Optional additional properties to include with all events. + """ + self._client = client or setup() + self._distinct_id = distinct_id + self._privacy_mode = privacy_mode + self._groups = groups or {} + self._properties = properties or {} + + # Track span start times for latency calculation + self._span_start_times: Dict[str, float] = {} + + # Track trace metadata for associating with spans + self._trace_metadata: Dict[str, Dict[str, Any]] = {} + + def _get_distinct_id(self, trace: Optional[Trace]) -> str: + """Resolve the distinct ID for a trace.""" + if callable(self._distinct_id): + if trace: + result = self._distinct_id(trace) + if result: + return str(result) + return trace.trace_id if trace else "unknown" + elif self._distinct_id: + return str(self._distinct_id) + elif trace: + return trace.trace_id + return "unknown" + + def _with_privacy_mode(self, value: Any) -> Any: + """Apply privacy mode redaction if enabled.""" + if self._privacy_mode or ( + hasattr(self._client, "privacy_mode") and self._client.privacy_mode + ): + return None + return value + + def _capture_event( + self, + event: str, + properties: Dict[str, Any], + distinct_id: Optional[str] = None, + ) -> None: + """Capture an event to PostHog with error handling.""" + try: + if not hasattr(self._client, "capture") or not callable(self._client.capture): + return + + final_distinct_id = distinct_id or "unknown" + final_properties = { + **properties, + **self._properties, + } + + # Don't process person profile if no distinct_id + if distinct_id is None: + final_properties["$process_person_profile"] = False + + self._client.capture( + distinct_id=final_distinct_id, + event=event, + properties=final_properties, + groups=self._groups, + ) + except Exception as e: + log.debug(f"Failed to capture PostHog event: {e}") + + def on_trace_start(self, trace: Trace) -> None: + """Called when a new trace begins.""" + try: + trace_id = trace.trace_id + trace_name = trace.name + + # Store trace metadata for later + self._trace_metadata[trace_id] = { + "name": trace_name, + "group_id": getattr(trace, "group_id", None), + "metadata": getattr(trace, "metadata", None), + } + + distinct_id = self._get_distinct_id(trace) + + self._capture_event( + event="$ai_trace", + distinct_id=distinct_id, + properties={ + "$ai_trace_id": trace_id, + "$ai_trace_name": trace_name, + "$ai_provider": "openai_agents", + }, + ) + except Exception as e: + log.debug(f"Error in on_trace_start: {e}") + + def on_trace_end(self, trace: Trace) -> None: + """Called when a trace completes.""" + try: + trace_id = trace.trace_id + + # Clean up stored metadata + self._trace_metadata.pop(trace_id, None) + except Exception as e: + log.debug(f"Error in on_trace_end: {e}") + + def on_span_start(self, span: Span[Any]) -> None: + """Called when a new span begins.""" + try: + span_id = span.span_id + self._span_start_times[span_id] = time.time() + except Exception as e: + log.debug(f"Error in on_span_start: {e}") + + def on_span_end(self, span: Span[Any]) -> None: + """Called when a span completes.""" + try: + span_id = span.span_id + trace_id = span.trace_id + parent_id = span.parent_id + span_data = span.span_data + + # Calculate latency + start_time = self._span_start_times.pop(span_id, None) + if start_time: + latency = time.time() - start_time + else: + # Fall back to parsing timestamps + started = _parse_iso_timestamp(span.started_at) + ended = _parse_iso_timestamp(span.ended_at) + latency = (ended - started) if (started and ended) else 0 + + # Get distinct ID from trace metadata or default + distinct_id = self._get_distinct_id(None) + if trace_id in self._trace_metadata: + # Use trace's distinct ID resolver if available + distinct_id = self._get_distinct_id(None) + + # Get error info if present + error_info = span.error + error_properties = {} + if error_info: + error_properties = { + "$ai_is_error": True, + "$ai_error": error_info.get("message", str(error_info)), + } + + # Dispatch based on span data type + if isinstance(span_data, GenerationSpanData): + self._handle_generation_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, FunctionSpanData): + self._handle_function_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, AgentSpanData): + self._handle_agent_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, HandoffSpanData): + self._handle_handoff_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, GuardrailSpanData): + self._handle_guardrail_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, ResponseSpanData): + self._handle_response_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, CustomSpanData): + self._handle_custom_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)): + self._handle_audio_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + elif isinstance(span_data, MCPListToolsSpanData): + self._handle_mcp_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + else: + # Unknown span type - capture as generic span + self._handle_generic_span( + span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + ) + + except Exception as e: + log.debug(f"Error in on_span_end: {e}") + + def _handle_generation_span( + self, + span_data: GenerationSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle LLM generation spans - maps to $ai_generation event.""" + # Extract token usage + usage = span_data.usage or {} + input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens", 0) + output_tokens = usage.get("output_tokens") or usage.get("completion_tokens", 0) + + # Extract model config parameters + model_config = span_data.model_config or {} + model_params = {} + for param in ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]: + if param in model_config: + model_params[param] = model_config[param] + + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_provider": "openai", + "$ai_model": span_data.model, + "$ai_model_parameters": model_params if model_params else None, + "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), + "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)), + "$ai_input_tokens": input_tokens, + "$ai_output_tokens": output_tokens, + "$ai_latency": latency, + **error_properties, + } + + # Add optional token fields if present + if usage.get("reasoning_tokens"): + properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"] + if usage.get("cache_read_input_tokens"): + properties["$ai_cache_read_input_tokens"] = usage["cache_read_input_tokens"] + if usage.get("cache_creation_input_tokens"): + properties["$ai_cache_creation_input_tokens"] = usage["cache_creation_input_tokens"] + + self._capture_event("$ai_generation", properties, distinct_id) + + def _handle_function_span( + self, + span_data: FunctionSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle function/tool call spans - maps to $ai_span event.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": span_data.name, + "$ai_span_type": "tool", + "$ai_provider": "openai_agents", + "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)), + "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)), + "$ai_latency": latency, + **error_properties, + } + + # Add MCP data if present + if span_data.mcp_data: + properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data) + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_agent_span( + self, + span_data: AgentSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle agent execution spans - maps to $ai_span event.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": span_data.name, + "$ai_span_type": "agent", + "$ai_provider": "openai_agents", + "$ai_latency": latency, + **error_properties, + } + + # Add agent-specific metadata + if span_data.handoffs: + properties["$ai_agent_handoffs"] = span_data.handoffs + if span_data.tools: + properties["$ai_agent_tools"] = span_data.tools + if span_data.output_type: + properties["$ai_agent_output_type"] = span_data.output_type + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_handoff_span( + self, + span_data: HandoffSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle agent handoff spans - maps to $ai_span event.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}", + "$ai_span_type": "handoff", + "$ai_provider": "openai_agents", + "$ai_handoff_from_agent": span_data.from_agent, + "$ai_handoff_to_agent": span_data.to_agent, + "$ai_latency": latency, + **error_properties, + } + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_guardrail_span( + self, + span_data: GuardrailSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle guardrail execution spans - maps to $ai_span event.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": span_data.name, + "$ai_span_type": "guardrail", + "$ai_provider": "openai_agents", + "$ai_guardrail_triggered": span_data.triggered, + "$ai_latency": latency, + **error_properties, + } + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_response_span( + self, + span_data: ResponseSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle OpenAI Response API spans - maps to $ai_generation event.""" + response = span_data.response + response_id = response.id if response else None + + # Try to extract usage from response + usage = getattr(response, "usage", None) if response else None + input_tokens = 0 + output_tokens = 0 + if usage: + input_tokens = getattr(usage, "input_tokens", 0) or 0 + output_tokens = getattr(usage, "output_tokens", 0) or 0 + + # Try to extract model from response + model = getattr(response, "model", None) if response else None + + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_provider": "openai", + "$ai_model": model, + "$ai_response_id": response_id, + "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), + "$ai_input_tokens": input_tokens, + "$ai_output_tokens": output_tokens, + "$ai_latency": latency, + **error_properties, + } + + self._capture_event("$ai_generation", properties, distinct_id) + + def _handle_custom_span( + self, + span_data: CustomSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle custom user-defined spans - maps to $ai_span event.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": span_data.name, + "$ai_span_type": "custom", + "$ai_provider": "openai_agents", + "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)), + "$ai_latency": latency, + **error_properties, + } + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_audio_span( + self, + span_data: Union[TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData], + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle audio-related spans (transcription, speech) - maps to $ai_span event.""" + span_type = span_data.type # "transcription", "speech", or "speech_group" + + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": span_type, + "$ai_span_type": span_type, + "$ai_provider": "openai_agents", + "$ai_latency": latency, + **error_properties, + } + + # Add model info if available + if hasattr(span_data, "model") and span_data.model: + properties["$ai_model"] = span_data.model + + # Don't include audio data (base64) - just metadata + if hasattr(span_data, "output") and isinstance(span_data.output, str): + # For transcription, output is the text + properties["$ai_output_state"] = self._with_privacy_mode(span_data.output) + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_mcp_span( + self, + span_data: MCPListToolsSpanData, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle MCP (Model Context Protocol) spans - maps to $ai_span event.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": f"mcp:{span_data.server}", + "$ai_span_type": "mcp_tools", + "$ai_provider": "openai_agents", + "$ai_mcp_server": span_data.server, + "$ai_mcp_tools": span_data.result, + "$ai_latency": latency, + **error_properties, + } + + self._capture_event("$ai_span", properties, distinct_id) + + def _handle_generic_span( + self, + span_data: Any, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + distinct_id: str, + error_properties: Dict[str, Any], + ) -> None: + """Handle unknown span types - maps to $ai_span event.""" + span_type = getattr(span_data, "type", "unknown") + + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_span_name": span_type, + "$ai_span_type": span_type, + "$ai_provider": "openai_agents", + "$ai_latency": latency, + **error_properties, + } + + # Try to export span data + if hasattr(span_data, "export"): + try: + exported = span_data.export() + properties["$ai_span_data"] = _safe_json(exported) + except Exception: + pass + + self._capture_event("$ai_span", properties, distinct_id) + + def shutdown(self) -> None: + """Clean up resources when the application stops.""" + try: + self._span_start_times.clear() + self._trace_metadata.clear() + + # Flush the PostHog client if possible + if hasattr(self._client, "flush") and callable(self._client.flush): + self._client.flush() + except Exception as e: + log.debug(f"Error in shutdown: {e}") + + def force_flush(self) -> None: + """Force immediate processing of any queued events.""" + try: + if hasattr(self._client, "flush") and callable(self._client.flush): + self._client.flush() + except Exception as e: + log.debug(f"Error in force_flush: {e}") diff --git a/posthog/test/ai/openai_agents/__init__.py b/posthog/test/ai/openai_agents/__init__.py new file mode 100644 index 00000000..1a28a2a4 --- /dev/null +++ b/posthog/test/ai/openai_agents/__init__.py @@ -0,0 +1 @@ +# Tests for OpenAI Agents SDK integration diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py new file mode 100644 index 00000000..bab7d798 --- /dev/null +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -0,0 +1,413 @@ +import logging +from unittest.mock import MagicMock, patch + +import pytest + +try: + from agents.tracing.span_data import ( + AgentSpanData, + CustomSpanData, + FunctionSpanData, + GenerationSpanData, + GuardrailSpanData, + HandoffSpanData, + ) + + from posthog.ai.openai_agents import PostHogTracingProcessor, instrument + + OPENAI_AGENTS_AVAILABLE = True +except ImportError: + OPENAI_AGENTS_AVAILABLE = False + + +# Skip all tests if OpenAI Agents SDK is not available +pytestmark = pytest.mark.skipif( + not OPENAI_AGENTS_AVAILABLE, reason="OpenAI Agents SDK is not available" +) + + +@pytest.fixture(scope="function") +def mock_client(): + client = MagicMock() + client.privacy_mode = False + logging.getLogger("posthog").setLevel(logging.DEBUG) + return client + + +@pytest.fixture(scope="function") +def processor(mock_client): + return PostHogTracingProcessor( + client=mock_client, + distinct_id="test-user", + privacy_mode=False, + ) + + +@pytest.fixture +def mock_trace(): + trace = MagicMock() + trace.trace_id = "trace_123456789" + trace.name = "Test Workflow" + trace.group_id = "group_123" + trace.metadata = {"key": "value"} + return trace + + +@pytest.fixture +def mock_span(): + span = MagicMock() + span.trace_id = "trace_123456789" + span.span_id = "span_987654321" + span.parent_id = None + span.started_at = "2024-01-01T00:00:00Z" + span.ended_at = "2024-01-01T00:00:01Z" + span.error = None + return span + + +class TestPostHogTracingProcessor: + """Tests for the PostHogTracingProcessor class.""" + + def test_initialization(self, mock_client): + """Test processor initializes correctly.""" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id="user@example.com", + privacy_mode=True, + groups={"company": "acme"}, + properties={"env": "test"}, + ) + + assert processor._client == mock_client + assert processor._distinct_id == "user@example.com" + assert processor._privacy_mode is True + assert processor._groups == {"company": "acme"} + assert processor._properties == {"env": "test"} + + def test_initialization_with_callable_distinct_id(self, mock_client, mock_trace): + """Test processor with callable distinct_id resolver.""" + resolver = lambda trace: trace.metadata.get("user_id", "default") + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id=resolver, + ) + + mock_trace.metadata = {"user_id": "resolved-user"} + distinct_id = processor._get_distinct_id(mock_trace) + assert distinct_id == "resolved-user" + + def test_on_trace_start(self, processor, mock_client, mock_trace): + """Test that on_trace_start captures $ai_trace event.""" + processor.on_trace_start(mock_trace) + + mock_client.capture.assert_called_once() + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_trace" + assert call_kwargs["distinct_id"] == "test-user" + assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789" + assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow" + assert call_kwargs["properties"]["$ai_provider"] == "openai_agents" + + def test_on_trace_end_clears_metadata(self, processor, mock_trace): + """Test that on_trace_end clears stored trace metadata.""" + processor.on_trace_start(mock_trace) + assert mock_trace.trace_id in processor._trace_metadata + + processor.on_trace_end(mock_trace) + assert mock_trace.trace_id not in processor._trace_metadata + + def test_on_span_start_tracks_time(self, processor, mock_span): + """Test that on_span_start records start time.""" + processor.on_span_start(mock_span) + assert mock_span.span_id in processor._span_start_times + + def test_generation_span_mapping(self, processor, mock_client, mock_span): + """Test GenerationSpanData maps to $ai_generation event.""" + span_data = GenerationSpanData( + input=[{"role": "user", "content": "Hello"}], + output=[{"role": "assistant", "content": "Hi there!"}], + model="gpt-4o", + model_config={"temperature": 0.7, "max_tokens": 100}, + usage={"input_tokens": 10, "output_tokens": 20}, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + mock_client.capture.assert_called_once() + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_generation" + assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789" + assert call_kwargs["properties"]["$ai_span_id"] == "span_987654321" + assert call_kwargs["properties"]["$ai_provider"] == "openai" + assert call_kwargs["properties"]["$ai_model"] == "gpt-4o" + assert call_kwargs["properties"]["$ai_input_tokens"] == 10 + assert call_kwargs["properties"]["$ai_output_tokens"] == 20 + assert call_kwargs["properties"]["$ai_input"] == [ + {"role": "user", "content": "Hello"} + ] + assert call_kwargs["properties"]["$ai_output_choices"] == [ + {"role": "assistant", "content": "Hi there!"} + ] + + def test_generation_span_with_reasoning_tokens(self, processor, mock_client, mock_span): + """Test GenerationSpanData includes reasoning tokens when present.""" + span_data = GenerationSpanData( + model="o1-preview", + usage={ + "input_tokens": 100, + "output_tokens": 500, + "reasoning_tokens": 400, + }, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_reasoning_tokens"] == 400 + + def test_function_span_mapping(self, processor, mock_client, mock_span): + """Test FunctionSpanData maps to $ai_span event with type=tool.""" + span_data = FunctionSpanData( + name="get_weather", + input='{"city": "San Francisco"}', + output="Sunny, 72F", + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_name"] == "get_weather" + assert call_kwargs["properties"]["$ai_span_type"] == "tool" + assert call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}' + assert call_kwargs["properties"]["$ai_output_state"] == "Sunny, 72F" + + def test_agent_span_mapping(self, processor, mock_client, mock_span): + """Test AgentSpanData maps to $ai_span event with type=agent.""" + span_data = AgentSpanData( + name="CustomerServiceAgent", + handoffs=["TechnicalAgent", "BillingAgent"], + tools=["search", "get_order"], + output_type="str", + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_name"] == "CustomerServiceAgent" + assert call_kwargs["properties"]["$ai_span_type"] == "agent" + assert call_kwargs["properties"]["$ai_agent_handoffs"] == [ + "TechnicalAgent", + "BillingAgent", + ] + assert call_kwargs["properties"]["$ai_agent_tools"] == ["search", "get_order"] + + def test_handoff_span_mapping(self, processor, mock_client, mock_span): + """Test HandoffSpanData maps to $ai_span event with type=handoff.""" + span_data = HandoffSpanData( + from_agent="TriageAgent", + to_agent="TechnicalAgent", + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_type"] == "handoff" + assert call_kwargs["properties"]["$ai_handoff_from_agent"] == "TriageAgent" + assert call_kwargs["properties"]["$ai_handoff_to_agent"] == "TechnicalAgent" + assert ( + call_kwargs["properties"]["$ai_span_name"] + == "TriageAgent -> TechnicalAgent" + ) + + def test_guardrail_span_mapping(self, processor, mock_client, mock_span): + """Test GuardrailSpanData maps to $ai_span event with type=guardrail.""" + span_data = GuardrailSpanData( + name="ContentFilter", + triggered=True, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_name"] == "ContentFilter" + assert call_kwargs["properties"]["$ai_span_type"] == "guardrail" + assert call_kwargs["properties"]["$ai_guardrail_triggered"] is True + + def test_custom_span_mapping(self, processor, mock_client, mock_span): + """Test CustomSpanData maps to $ai_span event with type=custom.""" + span_data = CustomSpanData( + name="database_query", + data={"query": "SELECT * FROM users", "rows": 100}, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_name"] == "database_query" + assert call_kwargs["properties"]["$ai_span_type"] == "custom" + assert call_kwargs["properties"]["$ai_custom_data"] == { + "query": "SELECT * FROM users", + "rows": 100, + } + + def test_privacy_mode_redacts_content(self, mock_client, mock_span): + """Test that privacy_mode redacts input/output content.""" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id="test-user", + privacy_mode=True, + ) + + span_data = GenerationSpanData( + input=[{"role": "user", "content": "Secret message"}], + output=[{"role": "assistant", "content": "Secret response"}], + model="gpt-4o", + usage={"input_tokens": 10, "output_tokens": 20}, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + # Content should be redacted + assert call_kwargs["properties"]["$ai_input"] is None + assert call_kwargs["properties"]["$ai_output_choices"] is None + # Token counts should still be present + assert call_kwargs["properties"]["$ai_input_tokens"] == 10 + assert call_kwargs["properties"]["$ai_output_tokens"] == 20 + + def test_error_handling_in_span(self, processor, mock_client, mock_span): + """Test that span errors are captured correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "Rate limit exceeded", "data": {"code": 429}} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["properties"]["$ai_is_error"] is True + assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded" + + def test_latency_calculation(self, processor, mock_client, mock_span): + """Test that latency is calculated correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + + with patch("time.time") as mock_time: + mock_time.return_value = 1000.0 + processor.on_span_start(mock_span) + + mock_time.return_value = 1001.5 # 1.5 seconds later + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_latency"] == pytest.approx(1.5, rel=0.01) + + def test_groups_included_in_events(self, mock_client, mock_trace, mock_span): + """Test that groups are included in captured events.""" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id="test-user", + groups={"company": "acme", "team": "engineering"}, + ) + + processor.on_trace_start(mock_trace) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["groups"] == {"company": "acme", "team": "engineering"} + + def test_additional_properties_included(self, mock_client, mock_trace): + """Test that additional properties are included in events.""" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id="test-user", + properties={"environment": "production", "version": "1.0"}, + ) + + processor.on_trace_start(mock_trace) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["environment"] == "production" + assert call_kwargs["properties"]["version"] == "1.0" + + def test_shutdown_clears_state(self, processor): + """Test that shutdown clears internal state.""" + processor._span_start_times["span_1"] = 1000.0 + processor._trace_metadata["trace_1"] = {"name": "test"} + + processor.shutdown() + + assert len(processor._span_start_times) == 0 + assert len(processor._trace_metadata) == 0 + + def test_force_flush_calls_client_flush(self, processor, mock_client): + """Test that force_flush calls client.flush().""" + processor.force_flush() + mock_client.flush.assert_called_once() + + +class TestInstrumentHelper: + """Tests for the instrument() convenience function.""" + + def test_instrument_registers_processor(self, mock_client): + """Test that instrument() registers a processor.""" + with patch("agents.tracing.add_trace_processor") as mock_add: + processor = instrument( + client=mock_client, + distinct_id="test-user", + ) + + mock_add.assert_called_once_with(processor) + assert isinstance(processor, PostHogTracingProcessor) + + def test_instrument_with_privacy_mode(self, mock_client): + """Test instrument() respects privacy_mode.""" + with patch("agents.tracing.add_trace_processor"): + processor = instrument( + client=mock_client, + privacy_mode=True, + ) + + assert processor._privacy_mode is True + + def test_instrument_with_groups_and_properties(self, mock_client): + """Test instrument() accepts groups and properties.""" + with patch("agents.tracing.add_trace_processor"): + processor = instrument( + client=mock_client, + groups={"company": "acme"}, + properties={"env": "test"}, + ) + + assert processor._groups == {"company": "acme"} + assert processor._properties == {"env": "test"} From 3006664813f4f3fbad195837aed2308bfeef7089 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 14 Jan 2026 23:27:03 +0100 Subject: [PATCH 02/24] feat(openai-agents): add $ai_group_id support for linking conversation traces - Capture group_id from trace and include as $ai_group_id on all events - Add _get_group_id() helper to retrieve group_id from trace metadata - Pass group_id through all span handlers (generation, function, agent, handoff, guardrail, response, custom, audio, mcp, generic) - Enables linking multiple traces in the same conversation thread --- posthog/ai/openai_agents/processor.py | 110 +++++++++++++++++++++----- 1 file changed, 89 insertions(+), 21 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 34f84556..87bafd14 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -127,6 +127,12 @@ def _with_privacy_mode(self, value: Any) -> Any: return None return value + def _get_group_id(self, trace_id: str) -> Optional[str]: + """Get the group_id for a trace from stored metadata.""" + if trace_id in self._trace_metadata: + return self._trace_metadata[trace_id].get("group_id") + return None + def _capture_event( self, event: str, @@ -162,24 +168,36 @@ def on_trace_start(self, trace: Trace) -> None: try: trace_id = trace.trace_id trace_name = trace.name + group_id = getattr(trace, "group_id", None) + metadata = getattr(trace, "metadata", None) - # Store trace metadata for later + # Store trace metadata for later (used by spans) self._trace_metadata[trace_id] = { "name": trace_name, - "group_id": getattr(trace, "group_id", None), - "metadata": getattr(trace, "metadata", None), + "group_id": group_id, + "metadata": metadata, } distinct_id = self._get_distinct_id(trace) + properties = { + "$ai_trace_id": trace_id, + "$ai_trace_name": trace_name, + "$ai_provider": "openai_agents", + } + + # Include group_id for linking related traces (e.g., conversation threads) + if group_id: + properties["$ai_group_id"] = group_id + + # Include trace metadata if present + if metadata: + properties["$ai_trace_metadata"] = _safe_json(metadata) + self._capture_event( event="$ai_trace", distinct_id=distinct_id, - properties={ - "$ai_trace_id": trace_id, - "$ai_trace_name": trace_name, - "$ai_provider": "openai_agents", - }, + properties=properties, ) except Exception as e: log.debug(f"Error in on_trace_start: {e}") @@ -222,9 +240,9 @@ def on_span_end(self, span: Span[Any]) -> None: # Get distinct ID from trace metadata or default distinct_id = self._get_distinct_id(None) - if trace_id in self._trace_metadata: - # Use trace's distinct ID resolver if available - distinct_id = self._get_distinct_id(None) + + # Get group_id from trace metadata for linking + group_id = self._get_group_id(trace_id) # Get error info if present error_info = span.error @@ -238,44 +256,44 @@ def on_span_end(self, span: Span[Any]) -> None: # Dispatch based on span data type if isinstance(span_data, GenerationSpanData): self._handle_generation_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, FunctionSpanData): self._handle_function_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, AgentSpanData): self._handle_agent_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, HandoffSpanData): self._handle_handoff_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, GuardrailSpanData): self._handle_guardrail_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, ResponseSpanData): self._handle_response_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, CustomSpanData): self._handle_custom_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)): self._handle_audio_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) elif isinstance(span_data, MCPListToolsSpanData): self._handle_mcp_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) else: # Unknown span type - capture as generic span self._handle_generic_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties + span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties ) except Exception as e: @@ -289,6 +307,7 @@ def _handle_generation_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle LLM generation spans - maps to $ai_generation event.""" @@ -319,6 +338,10 @@ def _handle_generation_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + # Add optional token fields if present if usage.get("reasoning_tokens"): properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"] @@ -337,6 +360,7 @@ def _handle_function_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle function/tool call spans - maps to $ai_span event.""" @@ -353,6 +377,10 @@ def _handle_function_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + # Add MCP data if present if span_data.mcp_data: properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data) @@ -367,6 +395,7 @@ def _handle_agent_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle agent execution spans - maps to $ai_span event.""" @@ -381,6 +410,10 @@ def _handle_agent_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + # Add agent-specific metadata if span_data.handoffs: properties["$ai_agent_handoffs"] = span_data.handoffs @@ -399,6 +432,7 @@ def _handle_handoff_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle agent handoff spans - maps to $ai_span event.""" @@ -415,6 +449,10 @@ def _handle_handoff_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + self._capture_event("$ai_span", properties, distinct_id) def _handle_guardrail_span( @@ -425,6 +463,7 @@ def _handle_guardrail_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle guardrail execution spans - maps to $ai_span event.""" @@ -440,6 +479,10 @@ def _handle_guardrail_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + self._capture_event("$ai_span", properties, distinct_id) def _handle_response_span( @@ -450,6 +493,7 @@ def _handle_response_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle OpenAI Response API spans - maps to $ai_generation event.""" @@ -481,6 +525,10 @@ def _handle_response_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + self._capture_event("$ai_generation", properties, distinct_id) def _handle_custom_span( @@ -491,6 +539,7 @@ def _handle_custom_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle custom user-defined spans - maps to $ai_span event.""" @@ -506,6 +555,10 @@ def _handle_custom_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + self._capture_event("$ai_span", properties, distinct_id) def _handle_audio_span( @@ -516,6 +569,7 @@ def _handle_audio_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle audio-related spans (transcription, speech) - maps to $ai_span event.""" @@ -532,6 +586,10 @@ def _handle_audio_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + # Add model info if available if hasattr(span_data, "model") and span_data.model: properties["$ai_model"] = span_data.model @@ -551,6 +609,7 @@ def _handle_mcp_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle MCP (Model Context Protocol) spans - maps to $ai_span event.""" @@ -567,6 +626,10 @@ def _handle_mcp_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + self._capture_event("$ai_span", properties, distinct_id) def _handle_generic_span( @@ -577,6 +640,7 @@ def _handle_generic_span( parent_id: Optional[str], latency: float, distinct_id: str, + group_id: Optional[str], error_properties: Dict[str, Any], ) -> None: """Handle unknown span types - maps to $ai_span event.""" @@ -593,6 +657,10 @@ def _handle_generic_span( **error_properties, } + # Include group_id for linking related traces + if group_id: + properties["$ai_group_id"] = group_id + # Try to export span data if hasattr(span_data, "export"): try: From 945134a8df58874080879b008acecae95ff19fca Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Thu, 15 Jan 2026 11:43:36 +0100 Subject: [PATCH 03/24] feat(openai-agents): add enhanced span properties - Add $ai_total_tokens to generation and response spans (required by PostHog cost reporting) - Add $ai_error_type for cross-provider error categorization (model_behavior_error, user_error, input_guardrail_triggered, output_guardrail_triggered, max_turns_exceeded) - Add $ai_output_choices to response spans for output content capture - Add audio pass-through properties for voice spans: - first_content_at (time to first audio byte) - audio_input_format / audio_output_format - model_config - $ai_input for TTS text input - Add comprehensive tests for all new properties --- posthog/ai/openai_agents/processor.py | 45 ++++- .../test/ai/openai_agents/test_processor.py | 168 ++++++++++++++++++ 2 files changed, 212 insertions(+), 1 deletion(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 87bafd14..26805c2c 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -248,9 +248,26 @@ def on_span_end(self, span: Span[Any]) -> None: error_info = span.error error_properties = {} if error_info: + error_message = error_info.get("message", str(error_info)) + error_type_raw = error_info.get("type", "") + + # Categorize error type for cross-provider filtering/alerting + error_type = "unknown" + if "ModelBehaviorError" in error_type_raw or "ModelBehaviorError" in error_message: + error_type = "model_behavior_error" + elif "UserError" in error_type_raw or "UserError" in error_message: + error_type = "user_error" + elif "InputGuardrailTripwireTriggered" in error_message: + error_type = "input_guardrail_triggered" + elif "OutputGuardrailTripwireTriggered" in error_message: + error_type = "output_guardrail_triggered" + elif "MaxTurnsExceeded" in error_message: + error_type = "max_turns_exceeded" + error_properties = { "$ai_is_error": True, - "$ai_error": error_info.get("message", str(error_info)), + "$ai_error": error_message, + "$ai_error_type": error_type, } # Dispatch based on span data type @@ -334,6 +351,7 @@ def _handle_generation_span( "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, + "$ai_total_tokens": input_tokens + output_tokens, "$ai_latency": latency, **error_properties, } @@ -521,6 +539,7 @@ def _handle_response_span( "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, + "$ai_total_tokens": input_tokens + output_tokens, "$ai_latency": latency, **error_properties, } @@ -529,6 +548,12 @@ def _handle_response_span( if group_id: properties["$ai_group_id"] = group_id + # Extract output content from response + if response: + output_items = getattr(response, "output", None) + if output_items: + properties["$ai_output_choices"] = self._with_privacy_mode(_safe_json(output_items)) + self._capture_event("$ai_generation", properties, distinct_id) def _handle_custom_span( @@ -594,6 +619,24 @@ def _handle_audio_span( if hasattr(span_data, "model") and span_data.model: properties["$ai_model"] = span_data.model + # Add model config if available (pass-through property) + if hasattr(span_data, "model_config") and span_data.model_config: + properties["model_config"] = _safe_json(span_data.model_config) + + # Add time to first audio byte for speech spans (pass-through property) + if hasattr(span_data, "first_content_at") and span_data.first_content_at: + properties["first_content_at"] = span_data.first_content_at + + # Add audio format info (pass-through properties) + if hasattr(span_data, "input_format"): + properties["audio_input_format"] = span_data.input_format + if hasattr(span_data, "output_format"): + properties["audio_output_format"] = span_data.output_format + + # Add text input for TTS + if hasattr(span_data, "input") and span_data.input and isinstance(span_data.input, str): + properties["$ai_input"] = self._with_privacy_mode(span_data.input) + # Don't include audio data (base64) - just metadata if hasattr(span_data, "output") and isinstance(span_data.output, str): # For transcription, output is the text diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index bab7d798..77eb4b11 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -11,6 +11,9 @@ GenerationSpanData, GuardrailSpanData, HandoffSpanData, + ResponseSpanData, + SpeechSpanData, + TranscriptionSpanData, ) from posthog.ai.openai_agents import PostHogTracingProcessor, instrument @@ -318,6 +321,171 @@ def test_error_handling_in_span(self, processor, mock_client, mock_span): assert call_kwargs["properties"]["$ai_is_error"] is True assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded" + def test_generation_span_includes_total_tokens(self, processor, mock_client, mock_span): + """Test that $ai_total_tokens is calculated and included.""" + span_data = GenerationSpanData( + model="gpt-4o", + usage={"input_tokens": 100, "output_tokens": 50}, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_total_tokens"] == 150 + + def test_error_type_categorization_model_behavior(self, processor, mock_client, mock_span): + """Test that ModelBehaviorError is categorized correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "ModelBehaviorError: Invalid JSON output", "type": "ModelBehaviorError"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error" + + def test_error_type_categorization_user_error(self, processor, mock_client, mock_span): + """Test that UserError is categorized correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "UserError: Tool failed", "type": "UserError"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "user_error" + + def test_error_type_categorization_input_guardrail(self, processor, mock_client, mock_span): + """Test that InputGuardrailTripwireTriggered is categorized correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "InputGuardrailTripwireTriggered: Content blocked"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered" + + def test_error_type_categorization_output_guardrail(self, processor, mock_client, mock_span): + """Test that OutputGuardrailTripwireTriggered is categorized correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "OutputGuardrailTripwireTriggered: Response blocked"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered" + + def test_error_type_categorization_max_turns(self, processor, mock_client, mock_span): + """Test that MaxTurnsExceeded is categorized correctly.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "MaxTurnsExceeded: Agent exceeded maximum turns"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "max_turns_exceeded" + + def test_error_type_categorization_unknown(self, processor, mock_client, mock_span): + """Test that unknown errors are categorized as unknown.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "Some random error occurred"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "unknown" + + def test_response_span_with_output_and_total_tokens(self, processor, mock_client, mock_span): + """Test ResponseSpanData includes output choices and total tokens.""" + # Create a mock response object + mock_response = MagicMock() + mock_response.id = "resp_123" + mock_response.model = "gpt-4o" + mock_response.output = [{"type": "message", "content": "Hello!"}] + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 25 + mock_response.usage.output_tokens = 10 + + span_data = ResponseSpanData( + response=mock_response, + input="Hello, world!", + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_generation" + assert call_kwargs["properties"]["$ai_total_tokens"] == 35 + assert call_kwargs["properties"]["$ai_output_choices"] == [{"type": "message", "content": "Hello!"}] + assert call_kwargs["properties"]["$ai_response_id"] == "resp_123" + + def test_speech_span_with_pass_through_properties(self, processor, mock_client, mock_span): + """Test SpeechSpanData includes pass-through properties.""" + span_data = SpeechSpanData( + input="Hello, how can I help you?", + output="base64_audio_data", + output_format="pcm", + model="tts-1", + model_config={"voice": "alloy", "speed": 1.0}, + first_content_at="2024-01-01T00:00:00.500Z", + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_type"] == "speech" + assert call_kwargs["properties"]["$ai_model"] == "tts-1" + # Pass-through properties (no $ai_ prefix) + assert call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z" + assert call_kwargs["properties"]["audio_output_format"] == "pcm" + assert call_kwargs["properties"]["model_config"] == {"voice": "alloy", "speed": 1.0} + # Text input should be captured + assert call_kwargs["properties"]["$ai_input"] == "Hello, how can I help you?" + + def test_transcription_span_with_pass_through_properties(self, processor, mock_client, mock_span): + """Test TranscriptionSpanData includes pass-through properties.""" + span_data = TranscriptionSpanData( + input="base64_audio_data", + input_format="pcm", + output="This is the transcribed text.", + model="whisper-1", + model_config={"language": "en"}, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + + assert call_kwargs["event"] == "$ai_span" + assert call_kwargs["properties"]["$ai_span_type"] == "transcription" + assert call_kwargs["properties"]["$ai_model"] == "whisper-1" + # Pass-through properties (no $ai_ prefix) + assert call_kwargs["properties"]["audio_input_format"] == "pcm" + assert call_kwargs["properties"]["model_config"] == {"language": "en"} + # Transcription output should be captured + assert call_kwargs["properties"]["$ai_output_state"] == "This is the transcribed text." + def test_latency_calculation(self, processor, mock_client, mock_span): """Test that latency is calculated correctly.""" span_data = GenerationSpanData(model="gpt-4o") From 2445b409b2bfc17f5c55e64238540ef612e122b3 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Thu, 15 Jan 2026 15:45:54 +0100 Subject: [PATCH 04/24] Add $ai_framework property and standardize $ai_provider for OpenAI Agents - Add $ai_framework="openai-agents" to all events for framework identification - Standardize $ai_provider="openai" on all events (previously some used "openai_agents") - Follows pattern from posthog-js where $ai_provider is the underlying LLM provider --- posthog/ai/openai_agents/processor.py | 29 +++++++++++++------ .../test/ai/openai_agents/test_processor.py | 4 ++- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 26805c2c..07385d2c 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -183,7 +183,8 @@ def on_trace_start(self, trace: Trace) -> None: properties = { "$ai_trace_id": trace_id, "$ai_trace_name": trace_name, - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", } # Include group_id for linking related traces (e.g., conversation threads) @@ -345,6 +346,7 @@ def _handle_generation_span( "$ai_span_id": span_id, "$ai_parent_id": parent_id, "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_model": span_data.model, "$ai_model_parameters": model_params if model_params else None, "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), @@ -388,7 +390,8 @@ def _handle_function_span( "$ai_parent_id": parent_id, "$ai_span_name": span_data.name, "$ai_span_type": "tool", - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)), "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)), "$ai_latency": latency, @@ -423,7 +426,8 @@ def _handle_agent_span( "$ai_parent_id": parent_id, "$ai_span_name": span_data.name, "$ai_span_type": "agent", - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_latency": latency, **error_properties, } @@ -460,7 +464,8 @@ def _handle_handoff_span( "$ai_parent_id": parent_id, "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}", "$ai_span_type": "handoff", - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_handoff_from_agent": span_data.from_agent, "$ai_handoff_to_agent": span_data.to_agent, "$ai_latency": latency, @@ -491,7 +496,8 @@ def _handle_guardrail_span( "$ai_parent_id": parent_id, "$ai_span_name": span_data.name, "$ai_span_type": "guardrail", - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_guardrail_triggered": span_data.triggered, "$ai_latency": latency, **error_properties, @@ -534,6 +540,7 @@ def _handle_response_span( "$ai_span_id": span_id, "$ai_parent_id": parent_id, "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_model": model, "$ai_response_id": response_id, "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), @@ -574,7 +581,8 @@ def _handle_custom_span( "$ai_parent_id": parent_id, "$ai_span_name": span_data.name, "$ai_span_type": "custom", - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)), "$ai_latency": latency, **error_properties, @@ -606,7 +614,8 @@ def _handle_audio_span( "$ai_parent_id": parent_id, "$ai_span_name": span_type, "$ai_span_type": span_type, - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_latency": latency, **error_properties, } @@ -662,7 +671,8 @@ def _handle_mcp_span( "$ai_parent_id": parent_id, "$ai_span_name": f"mcp:{span_data.server}", "$ai_span_type": "mcp_tools", - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_mcp_server": span_data.server, "$ai_mcp_tools": span_data.result, "$ai_latency": latency, @@ -695,7 +705,8 @@ def _handle_generic_span( "$ai_parent_id": parent_id, "$ai_span_name": span_type, "$ai_span_type": span_type, - "$ai_provider": "openai_agents", + "$ai_provider": "openai", + "$ai_framework": "openai-agents", "$ai_latency": latency, **error_properties, } diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index 77eb4b11..b5509340 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -110,7 +110,8 @@ def test_on_trace_start(self, processor, mock_client, mock_trace): assert call_kwargs["distinct_id"] == "test-user" assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789" assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow" - assert call_kwargs["properties"]["$ai_provider"] == "openai_agents" + assert call_kwargs["properties"]["$ai_provider"] == "openai" + assert call_kwargs["properties"]["$ai_framework"] == "openai-agents" def test_on_trace_end_clears_metadata(self, processor, mock_trace): """Test that on_trace_end clears stored trace metadata.""" @@ -146,6 +147,7 @@ def test_generation_span_mapping(self, processor, mock_client, mock_span): assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789" assert call_kwargs["properties"]["$ai_span_id"] == "span_987654321" assert call_kwargs["properties"]["$ai_provider"] == "openai" + assert call_kwargs["properties"]["$ai_framework"] == "openai-agents" assert call_kwargs["properties"]["$ai_model"] == "gpt-4o" assert call_kwargs["properties"]["$ai_input_tokens"] == 10 assert call_kwargs["properties"]["$ai_output_tokens"] == 20 From 6193698228de3c4be881ebc36ea0007c41259223 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Thu, 15 Jan 2026 15:48:02 +0100 Subject: [PATCH 05/24] chore: bump version to 7.7.0 for OpenAI Agents SDK integration --- CHANGELOG.md | 735 +-------------------------------------------- posthog/version.py | 2 +- 2 files changed, 7 insertions(+), 730 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21b8cc37..f674fdc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# 7.7.0 - 2026-01-15 + +feat(ai): Add OpenAI Agents SDK integration + +Automatic tracing for agent workflows, handoffs, tool calls, guardrails, and custom spans. Includes `$ai_total_tokens`, `$ai_error_type` categorization, and `$ai_framework` property. + # 7.6.0 - 2026-01-12 feat: add device_id to flags request payload @@ -30,732 +36,3 @@ When using OpenAI stored prompts, the model is defined in the OpenAI dashboard r # 7.4.0 - 2025-12-16 feat: Add automatic retries for feature flag requests - -Feature flag API requests now automatically retry on transient failures: - -- Network errors (connection refused, DNS failures, timeouts) -- Server errors (500, 502, 503, 504) -- Up to 2 retries with exponential backoff (0.5s, 1s delays) - -Rate limit (429) and quota (402) errors are not retried. - -# 7.3.1 - 2025-12-06 - -fix: remove unused $exception_message and $exception_type - -# 7.3.0 - 2025-12-05 - -feat: improve code variables capture masking - -# 7.2.0 - 2025-12-01 - -feat: add $feature_flag_evaluated_at properties to $feature_flag_called events - -# 7.1.0 - 2025-11-26 - -Add support for the async version of Gemini. - -# 7.0.2 - 2025-11-18 - -Add support for Python 3.14. -Projects upgrading to Python 3.14 should ensure any Pydantic models passed into the SDK use Pydantic v2, as Pydantic v1 is not compatible with Python 3.14. - -# 7.0.1 - 2025-11-15 - -Try to use repr() when formatting code variables - -# 7.0.0 - 2025-11-11 - -NB Python 3.9 is no longer supported - -- chore(llma): update LLM provider SDKs to latest major versions - - openai: 1.102.0 → 2.7.1 - - anthropic: 0.64.0 → 0.72.0 - - google-genai: 1.32.0 → 1.49.0 - - langchain-core: 0.3.75 → 1.0.3 - - langchain-openai: 0.3.32 → 1.0.2 - - langchain-anthropic: 0.3.19 → 1.0.1 - - langchain-community: 0.3.29 → 0.4.1 - - langgraph: 0.6.6 → 1.0.2 - -# 6.9.3 - 2025-11-10 - -- feat(ph-ai): PostHog properties dict in GenerationMetadata - -# 6.9.2 - 2025-11-10 - -- fix(llma): fix cache token double subtraction in Langchain for non-Anthropic providers causing negative costs - -# 6.9.1 - 2025-11-07 - -- fix(error-tracking): pass code variables config from init to client - -# 6.9.0 - 2025-11-06 - -- feat(error-tracking): add local variables capture - -# 6.8.0 - 2025-11-03 - -- feat(llma): send web search calls to be used for LLM cost calculations - -# 6.7.14 - 2025-11-03 - -- fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355) -- test(django): Add Django 5 integration test suite with real ASGI application testing async middleware behavior - -# 6.7.13 - 2025-11-02 - -- fix(llma): cache cost calculation in the LangChain callback - -# 6.7.12 - 2025-11-02 - -- fix(django): Restore process_exception method to capture view and downstream middleware exceptions (fixes #329) -- fix(ai/langchain): Add LangChain 1.0+ compatibility for CallbackHandler imports (fixes #362) - -# 6.7.11 - 2025-10-28 - -- feat(ai): Add `$ai_framework` property for framework integrations (e.g. LangChain) - -# 6.7.10 - 2025-10-24 - -- fix(django): Make middleware truly hybrid - compatible with both sync (WSGI) and async (ASGI) Django stacks without breaking sync-only deployments - -# 6.7.9 - 2025-10-22 - -- fix(flags): multi-condition flags with static cohorts returning wrong variants - -# 6.7.8 - 2025-10-16 - -- fix(llma): missing async for OpenAI's streaming implementation - -# 6.7.7 - 2025-10-14 - -- fix: remove deprecated attribute $exception_personURL from exception events - -# 6.7.6 - 2025-09-16 - -- fix: don't sort condition sets with variant overrides to the top -- fix: Prevent core Client methods from raising exceptions - -# 6.7.5 - 2025-09-16 - -- feat: Django middleware now supports async request handling. - -# 6.7.4 - 2025-09-05 - -- fix: Missing system prompts for some providers - -# 6.7.3 - 2025-09-04 - -- fix: missing usage tokens in Gemini - -# 6.7.2 - 2025-09-03 - -- fix: tool call results in streaming providers - -# 6.7.1 - 2025-09-01 - -- fix: Add base64 inline image sanitization - -# 6.7.0 - 2025-08-26 - -- feat: Add support for feature flag dependencies - -# 6.6.1 - 2025-08-21 - -- fix: Prevent `NoneType` error when `group_properties` is `None` - -# 6.6.0 - 2025-08-15 - -- feat: Add `flag_keys_to_evaluate` parameter to optimize feature flag evaluation performance by only evaluating specified flags -- feat: Add `flag_keys_filter` option to `send_feature_flags` for selective flag evaluation in capture events - -# 6.5.0 - 2025-08-08 - -- feat: Add `$context_tags` to an event to know which properties were included as tags - -# 6.4.1 - 2025-08-06 - -- fix: Always pass project API key in `remote_config` requests for deterministic project routing - -# 6.4.0 - 2025-08-05 - -- feat: support Vertex AI for Gemini - -# 6.3.4 - 2025-08-04 - -- fix: set `$ai_tools` for all providers and `$ai_output_choices` for all non-streaming provider flows properly - -# 6.3.3 - 2025-08-01 - -- fix: `get_feature_flag_result` now correctly returns FeatureFlagResult when payload is empty string instead of None - -# 6.3.2 - 2025-07-31 - -- fix: Anthropic's tool calls are now handled properly - -# 6.3.0 - 2025-07-22 - -- feat: Enhanced `send_feature_flags` parameter to accept `SendFeatureFlagsOptions` object for declarative control over local/remote evaluation and custom properties - -# 6.2.1 - 2025-07-21 - -- feat: make `posthog_client` an optional argument in PostHog AI providers wrappers (`posthog.ai.*`), intuitively using the default client as the default - -# 6.1.1 - 2025-07-16 - -- fix: correctly capture exceptions processed by Django from views or middleware - -# 6.1.0 - 2025-07-10 - -- feat: decouple feature flag local evaluation from personal API keys; support decrypting remote config payloads without relying on the feature flags poller - -# 6.0.4 - 2025-07-09 - -- fix: add POSTHOG_MW_CLIENT setting to django middleware, to support custom clients for exception capture. - -# 6.0.3 - 2025-07-07 - -- feat: add a feature flag evaluation cache (local storage or redis) to support returning flag evaluations when the service is down - -# 6.0.2 - 2025-07-02 - -- fix: send_feature_flags changed to default to false in `Client::capture_exception` - -# 6.0.1 - -- fix: response `$process_person_profile` property when passed to capture - -# 6.0.0 - -This release contains a number of major breaking changes: - -- feat: make distinct_id an optional parameter in posthog.capture and related functions -- feat: make capture and related functions return `Optional[str]`, which is the UUID of the sent event, if it was sent -- fix: remove `identify` (prefer `posthog.set()`), and `page` and `screen` (prefer `posthog.capture()`) -- fix: delete exception-capture specific integrations module. Prefer the general-purpose django middleware as a replacement for the django `Integration`. - -To migrate to this version, you'll mostly just need to switch to using named keyword arguments, rather than positional ones. For example: - -```python -# Old calling convention -posthog.capture("user123", "button_clicked", {"button_id": "123"}) -# New calling convention -posthog.capture(distinct_id="user123", event="button_clicked", properties={"button_id": "123"}) - -# Better pattern -with posthog.new_context(): - posthog.identify_context("user123") - - # The event name is the first argument, and can be passed positionally, or as a keyword argument in a later position - posthog.capture("button_pressed") -``` - -Generally, arguments are now appropriately typed, and docstrings have been updated. If something is unclear, please open an issue, or submit a PR! - -# 5.4.0 - 2025-06-20 - -- feat: add support to session_id context on page method - -# 5.3.0 - 2025-06-19 - -- fix: safely handle exception values - -# 5.2.0 - 2025-06-19 - -- feat: construct artificial stack traces if no traceback is available on a captured exception - -## 5.1.0 - 2025-06-18 - -- feat: session and distinct ID's can now be associated with contexts, and are used as such -- feat: django http request middleware - -## 5.0.0 - 2025-06-16 - -- fix: removed deprecated sentry integration - -## 4.10.0 - 2025-06-13 - -- fix: no longer fail in autocapture. - -## 4.9.0 - 2025-06-13 - -- feat(ai): track reasoning and cache tokens in the LangChain callback - -## 4.8.0 - 2025-06-10 - -- fix: export scoped, rather than tracked, decorator -- feat: allow use of contexts without error tracking - -## 4.7.0 - 2025-06-10 - -- feat: add support for parse endpoint in responses API (no longer beta) - -## 4.6.2 - 2025-06-09 - -- fix: replace `import posthog` with direct method imports - -## 4.6.1 - 2025-06-09 - -- fix: replace `import posthog` in `posthoganalytics` package - -## 4.6.0 - 2025-06-09 - -- feat: add additional user and request context to captured exceptions via the Django integration -- feat: Add `setup()` function to initialise default client - -## 4.5.0 - 2025-06-09 - -- feat: add before_send callback (#249) - -## 4.4.2- 2025-06-09 - -- empty point release to fix release automation - -## 4.4.1 2025-06-09 - -- empty point release to fix release automation - -## 4.4.0 - 2025-06-09 - -- Use the new `/flags` endpoint for all feature flag evaluations (don't fall back to `/decide` at all) - -## 4.3.2 - 2025-06-06 - -1. Add context management: - -- New context manager with `posthog.new_context()` -- Tag functions: `posthog.tag()`, `posthog.get_tags()`, `posthog.clear_tags()` -- Function decorator: - - `@posthog.scoped` - Creates context and captures exceptions thrown within the function -- Automatic deduplication of exceptions to ensure each exception is only captured once - -2. fix: feature flag request use geoip_disable (#235) -3. chore: pin actions versions (#210) -4. fix: opinionated setup and clean fn fix (#240) -5. fix: release action failed (#241) - -## 4.2.0 - 2025-05-22 - -Add support for google gemini - -## 4.1.0 - 2025-05-22 - -Moved ai openai package to a composition approach over inheritance. - -## 4.0.1 – 2025-04-29 - -1. Remove deprecated `monotonic` library. Use Python's core `time.monotonic` function instead -2. Clarify Python 3.9+ is required - -## 4.0.0 - 2025-04-24 - -1. Added new method `get_feature_flag_result` which returns a `FeatureFlagResult` object. This object breaks down the result of a feature flag into its enabled state, variant, and payload. The benefit of this method is it allows you to retrieve the result of a feature flag and its payload in a single API call. You can call `get_value` on the result to get the value of the feature flag, which is the same value returned by `get_feature_flag` (aka the string `variant` if the flag is a multivariate flag or the `boolean` value if the flag is a boolean flag). - -Example: - -```python -result = posthog.get_feature_flag_result("my-flag", "distinct_id") -print(result.enabled) # True or False -print(result.variant) # 'the-variant-value' or None -print(result.payload) # {'foo': 'bar'} -print(result.get_value()) # 'the-variant-value' or True or False -print(result.reason) # 'matched condition set 2' (Not available for local evaluation) -``` - -Breaking change: - -1. `get_feature_flag_payload` now deserializes payloads from JSON strings to `Any`. Previously, it returned the payload as a JSON encoded string. - -Before: - -```python -payload = get_feature_flag_payload('key', 'distinct_id') # "{\"some\": \"payload\"}" -``` - -After: - -```python -payload = get_feature_flag_payload('key', 'distinct_id') # {"some": "payload"} -``` - -## 3.25.0 – 2025-04-15 - -1. Roll out new `/flags` endpoint to 100% of `/decide` traffic, excluding the top 10 customers. - -## 3.24.3 – 2025-04-15 - -1. Fix hash inclusion/exclusion for flag rollout - -## 3.24.2 – 2025-04-15 - -1. Roll out new /flags endpoint to 10% of /decide traffic - -## 3.24.1 – 2025-04-11 - -1. Add `log_captured_exceptions` option to proxy setup - -## 3.24.0 – 2025-04-10 - -1. Add config option to `log_captured_exceptions` - -## 3.23.0 – 2025-03-26 - -1. Expand automatic retries to include read errors (e.g. RemoteDisconnected) - -## 3.22.0 – 2025-03-26 - -1. Add more information to `$feature_flag_called` events. -2. Support for the `/decide?v=4` endpoint which contains more information about feature flags. - -## 3.21.0 – 2025-03-17 - -1. Support serializing dataclasses. - -## 3.20.0 – 2025-03-13 - -1. Add support for OpenAI Responses API. - -## 3.19.2 – 2025-03-11 - -1. Fix install requirements for analytics package - -## 3.19.1 – 2025-03-11 - -1. Fix bug where None is sent as delta in azure - -## 3.19.0 – 2025-03-04 - -1. Add support for tool calls in OpenAI and Anthropic. -2. Add support for cached tokens. - -## 3.18.1 – 2025-03-03 - -1. Improve quota-limited feature flag logs - -## 3.18.0 - 2025-02-28 - -1. Add support for Azure OpenAI. - -## 3.17.0 - 2025-02-27 - -1. The LangChain handler now captures tools in `$ai_generation` events, in property `$ai_tools`. This allows for displaying tools provided to the LLM call in PostHog UI. Note that support for `$ai_tools` in OpenAI and Anthropic SDKs is coming soon. - -## 3.16.0 - 2025-02-26 - -1. feat: add some platform info to events (#198) - -## 3.15.1 - 2025-02-23 - -1. Fix async client support for OpenAI. - -## 3.15.0 - 2025-02-19 - -1. Support quota-limited feature flags - -## 3.14.2 - 2025-02-19 - -1. Evaluate feature flag payloads with case sensitivity correctly. Fixes - -## 3.14.1 - 2025-02-18 - -1. Add support for Bedrock Anthropic Usage - -## 3.13.0 - 2025-02-12 - -1. Automatically retry connection errors - -## 3.12.1 - 2025-02-11 - -1. Fix mypy support for 3.12.0 -2. Deprecate `is_simple_flag` - -## 3.12.0 - 2025-02-11 - -1. Add support for OpenAI beta parse API. -2. Deprecate `context` parameter - -## 3.11.1 - 2025-02-06 - -1. Fix LangChain callback handler to capture parent run ID. - -## 3.11.0 - 2025-01-28 - -1. Add the `$ai_span` event to the LangChain callback handler to capture the input and output of intermediary chains. - - > LLM observability naming change: event property `$ai_trace_name` is now `$ai_span_name`. - -2. Fix serialiazation of Pydantic models in methods. - -## 3.10.0 - 2025-01-24 - -1. Add `$ai_error` and `$ai_is_error` properties to LangChain callback handler, OpenAI, and Anthropic. - -## 3.9.3 - 2025-01-23 - -1. Fix capturing of multiple traces in the LangChain callback handler. - -## 3.9.2 - 2025-01-22 - -1. Fix importing of LangChain callback handler under certain circumstances. - -## 3.9.0 - 2025-01-22 - -1. Add `$ai_trace` event emission to LangChain callback handler. - -## 3.8.4 - 2025-01-17 - -1. Add Anthropic support for LLM Observability. -2. Update LLM Observability to use output_choices. - -## 3.8.3 - 2025-01-14 - -1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages for the `posthoganalytics` package. - -## 3.8.2 - 2025-01-14 - -1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages. - -## 3.8.1 - 2025-01-14 - -1. Add LLM Observability with support for OpenAI and Langchain callbacks. - -## 3.7.5 - 2025-01-03 - -1. Add `distinct_id` to group_identify - -## 3.7.4 - 2024-11-25 - -1. Fix bug where this SDK incorrectly sent feature flag events with null values when calling `get_feature_flag_payload`. - -## 3.7.3 - 2024-11-25 - -1. Use personless mode when sending an exception without a provided `distinct_id`. - -## 3.7.2 - 2024-11-19 - -1. Add `type` property to exception stacks. - -## 3.7.1 - 2024-10-24 - -1. Add `platform` property to each frame of exception stacks. - -## 3.7.0 - 2024-10-03 - -1. Adds a new `super_properties` parameter on the client that are appended to every /capture call. - -## 3.6.7 - 2024-09-24 - -1. Remove deprecated datetime.utcnow() in favour of datetime.now(tz=tzutc()) - -## 3.6.6 - 2024-09-16 - -1. Fix manual capture support for in app frames - -## 3.6.5 - 2024-09-10 - -1. Fix django integration support for manual exception capture. - -## 3.6.4 - 2024-09-05 - -1. Add manual exception capture. - -## 3.6.3 - 2024-09-03 - -1. Make sure setup.py for posthoganalytics package also discovers the new exception integration package. - -## 3.6.2 - 2024-09-03 - -1. Make sure setup.py discovers the new exception integration package. - -## 3.6.1 - 2024-09-03 - -1. Adds django integration to exception autocapture in alpha state. This feature is not yet stable and may change in future versions. - -## 3.6.0 - 2024-08-28 - -1. Adds exception autocapture in alpha state. This feature is not yet stable and may change in future versions. - -## 3.5.2 - 2024-08-21 - -1. Guard for None values in local evaluation - -## 3.5.1 - 2024-08-13 - -1. Remove "-api" suffix from ingestion hostnames - -## 3.5.0 - 2024-02-29 - -1. - Adds a new `feature_flags_request_timeout_seconds` timeout parameter for feature flags which defaults to 3 seconds, updated from the default 10s for all other API calls. - -## 3.4.2 - 2024-02-20 - -1. Add `historical_migration` option for bulk migration to PostHog Cloud. - -## 3.4.1 - 2024-02-09 - -1. Use new hosts for event capture as well - -## 3.4.0 - 2024-02-05 - -1. Point given hosts to new ingestion hosts - -## 3.3.4 - 2024-01-30 - -1. Update type hints for module variables to work with newer versions of mypy - -## 3.3.3 - 2024-01-26 - -1. Remove new relative date operators, combine into regular date operators - -## 3.3.2 - 2024-01-19 - -1. Return success/failure with all capture calls from module functions - -## 3.3.1 - 2024-01-10 - -1. Make sure we don't override any existing feature flag properties when adding locally evaluated feature flag properties. - -## 3.3.0 - 2024-01-09 - -1. When local evaluation is enabled, we automatically add flag information to all events sent to PostHog, whenever possible. This makes it easier to use these events in experiments. - -## 3.2.0 - 2024-01-09 - -1. Numeric property handling for feature flags now does the expected: When passed in a number, we do a numeric comparison. When passed in a string, we do a string comparison. Previously, we always did a string comparison. -2. Add support for relative date operators for local evaluation. - -## 3.1.0 - 2023-12-04 - -1. Increase maximum event size and batch size - -## 3.0.2 - 2023-08-17 - -1. Returns the current flag property with $feature_flag_called events, to make it easier to use in experiments - -## 3.0.1 - 2023-04-21 - -1. Restore how feature flags work when the client library is disabled: All requests return `None` and no events are sent when the client is disabled. -2. Add a `feature_flag_definitions()` debug option, which returns currently loaded feature flag definitions. You can use this to more cleverly decide when to request local evaluation of feature flags. - -## 3.0.0 - 2023-04-14 - -Breaking change: - -All events by default now send the `$geoip_disable` property to disable geoip lookup in app. This is because usually we don't -want to update person properties to take the server's location. - -The same now happens for feature flag requests, where we discard the IP address of the server for matching on geoip properties like city, country, continent. - -To restore previous behaviour, you can set the default to False like so: - -```python -posthog.disable_geoip = False - -# // and if using client instantiation: -posthog = Posthog('api_key', disable_geoip=False) - -``` - -## 2.5.0 - 2023-04-10 - -1. Add option for instantiating separate client object - -## 2.4.2 - 2023-03-30 - -1. Update backoff dependency for posthoganalytics package to be the same as posthog package - -## 2.4.1 - 2023-03-17 - -1. Removes accidental print call left in for decide response - -## 2.4.0 - 2023-03-14 - -1. Support evaluating all cohorts in feature flags for local evaluation - -## 2.3.1 - 2023-02-07 - -1. Log instead of raise error on posthog personal api key errors -2. Remove upper bound on backoff dependency - -## 2.3.0 - 2023-01-31 - -1. Add support for returning payloads of matched feature flags - -## 2.2.0 - 2022-11-14 - -Changes: - -1. Add support for feature flag variant overrides with local evaluation - -## 2.1.2 - 2022-09-15 - -Changes: - -1. Fixes issues with date comparison. - -## 2.1.1 - 2022-09-14 - -Changes: - -1. Feature flags local evaluation now supports date property filters as well. Accepts both strings and datetime objects. - -## 2.1.0 - 2022-08-11 - -Changes: - -1. Feature flag defaults have been removed -2. Setup logging only when debug mode is enabled. - -## 2.0.1 - 2022-08-04 - -- Make poll_interval configurable -- Add `send_feature_flag_events` parameter to feature flag calls, which determine whether the `$feature_flag_called` event should be sent or not. -- Add `only_evaluate_locally` parameter to feature flag calls, which determines whether the feature flag should only be evaluated locally or not. - -## 2.0.0 - 2022-08-02 - -Breaking changes: - -1. The minimum version requirement for PostHog servers is now 1.38. If you're using PostHog Cloud, you satisfy this requirement automatically. -2. Feature flag defaults apply only when there's an error fetching feature flag results. Earlier, if the default was set to `True`, even if a flag resolved to `False`, the default would override this. - **Note: These are removed in 2.0.2** -3. Feature flag remote evaluation doesn't require a personal API key. - -New Changes: - -1. You can now evaluate feature flags locally (i.e. without sending a request to your PostHog servers) by setting a personal API key, and passing in groups and person properties to `is_feature_enabled` and `get_feature_flag` calls. -2. Introduces a `get_all_flags` method that returns all feature flags. This is useful for when you want to seed your frontend with some initial flags, given a user ID. - -## 1.4.9 - 2022-06-13 - -- Support for sending feature flags with capture calls - -## 1.4.8 - 2022-05-12 - -- Support multi variate feature flags - -## 1.4.7 - 2022-04-25 - -- Allow feature flags usage without project_api_key - -## 1.4.1 - 2021-05-28 - -- Fix packaging issues with Sentry integrations - -## 1.4.0 - 2021-05-18 - -- Improve support for `project_api_key` (#32) -- Resolve polling issues with feature flags (#29) -- Add Sentry (and Sentry+Django) integrations (#13) -- Fix feature flag issue with no percentage rollout (#30) - -## 1.3.1 - 2021-05-07 - -- Add `$set` and `$set_once` support (#23) -- Add distinct ID to `$create_alias` event (#27) -- Add `UUID` to `ID_TYPES` (#26) - -## 1.2.1 - 2021-02-05 - -Initial release logged in CHANGELOG.md. diff --git a/posthog/version.py b/posthog/version.py index 05114bf5..fdc77316 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.6.0" +VERSION = "7.7.0" if __name__ == "__main__": print(VERSION, end="") # noqa: T201 From df0fcc0f924484bc83ba612fbede67a8298d1619 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:02:58 +0000 Subject: [PATCH 06/24] fix: add openai_agents package to setuptools config Without this, the module is not included in the distribution and users get an ImportError after pip install. --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b0e69264..0e3e8001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,9 +84,12 @@ packages = [ "posthog.ai", "posthog.ai.langchain", "posthog.ai.openai", + "posthog.ai.openai_agents", "posthog.ai.anthropic", "posthog.ai.gemini", "posthog.test", + "posthog.test.ai", + "posthog.test.ai.openai_agents", "posthog.integrations", ] From 6bf341ae115de0ee56ed2c4d890b4abab02fc22e Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:03:25 +0000 Subject: [PATCH 07/24] fix: correct indentation in on_trace_start properties dict --- posthog/ai/openai_agents/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 07385d2c..c3e094fd 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -184,7 +184,7 @@ def on_trace_start(self, trace: Trace) -> None: "$ai_trace_id": trace_id, "$ai_trace_name": trace_name, "$ai_provider": "openai", - "$ai_framework": "openai-agents", + "$ai_framework": "openai-agents", } # Include group_id for linking related traces (e.g., conversation threads) From 71069ed03cecc8cd3789b6a5484f12f0df7d0eb0 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:06:36 +0000 Subject: [PATCH 08/24] fix: prevent unbounded growth of span/trace tracking dicts Add max entry limit and eviction for _span_start_times and _trace_metadata dicts. If on_span_end or on_trace_end is never called (e.g., due to an SDK exception), these dicts could grow indefinitely in long-running processes. --- posthog/ai/openai_agents/processor.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index c3e094fd..cc1c9bfd 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -105,6 +105,10 @@ def __init__( # Track trace metadata for associating with spans self._trace_metadata: Dict[str, Dict[str, Any]] = {} + # Max entries to prevent unbounded growth if on_span_end/on_trace_end + # is never called (e.g., due to an exception in the Agents SDK). + self._max_tracked_entries = 10000 + def _get_distinct_id(self, trace: Optional[Trace]) -> str: """Resolve the distinct ID for a trace.""" if callable(self._distinct_id): @@ -127,6 +131,22 @@ def _with_privacy_mode(self, value: Any) -> Any: return None return value + def _evict_stale_entries(self) -> None: + """Evict oldest entries if dicts exceed max size to prevent unbounded growth.""" + if len(self._span_start_times) > self._max_tracked_entries: + # Remove oldest entries by start time + sorted_spans = sorted(self._span_start_times.items(), key=lambda x: x[1]) + for span_id, _ in sorted_spans[: len(sorted_spans) // 2]: + del self._span_start_times[span_id] + log.debug("Evicted stale span start times (exceeded %d entries)", self._max_tracked_entries) + + if len(self._trace_metadata) > self._max_tracked_entries: + # Remove half the entries (oldest inserted via dict ordering in Python 3.7+) + keys = list(self._trace_metadata.keys()) + for key in keys[: len(keys) // 2]: + del self._trace_metadata[key] + log.debug("Evicted stale trace metadata (exceeded %d entries)", self._max_tracked_entries) + def _get_group_id(self, trace_id: str) -> Optional[str]: """Get the group_id for a trace from stored metadata.""" if trace_id in self._trace_metadata: @@ -166,6 +186,7 @@ def _capture_event( def on_trace_start(self, trace: Trace) -> None: """Called when a new trace begins.""" try: + self._evict_stale_entries() trace_id = trace.trace_id trace_name = trace.name group_id = getattr(trace, "group_id", None) @@ -216,6 +237,7 @@ def on_trace_end(self, trace: Trace) -> None: def on_span_start(self, span: Span[Any]) -> None: """Called when a new span begins.""" try: + self._evict_stale_entries() span_id = span.span_id self._span_start_times[span_id] = time.time() except Exception as e: From 2f49c73581f18fc739a4d361116eef6e18e2829d Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:07:27 +0000 Subject: [PATCH 09/24] fix: resolve distinct_id from trace metadata in on_span_end Previously on_span_end always called _get_distinct_id(None), which meant callable distinct_id resolvers never received the trace object for spans. Now the resolved distinct_id is stored at trace start and looked up by trace_id during span end. --- posthog/ai/openai_agents/processor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index cc1c9bfd..548b0b08 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -192,15 +192,16 @@ def on_trace_start(self, trace: Trace) -> None: group_id = getattr(trace, "group_id", None) metadata = getattr(trace, "metadata", None) + distinct_id = self._get_distinct_id(trace) + # Store trace metadata for later (used by spans) self._trace_metadata[trace_id] = { "name": trace_name, "group_id": group_id, "metadata": metadata, + "distinct_id": distinct_id, } - distinct_id = self._get_distinct_id(trace) - properties = { "$ai_trace_id": trace_id, "$ai_trace_name": trace_name, @@ -261,8 +262,9 @@ def on_span_end(self, span: Span[Any]) -> None: ended = _parse_iso_timestamp(span.ended_at) latency = (ended - started) if (started and ended) else 0 - # Get distinct ID from trace metadata or default - distinct_id = self._get_distinct_id(None) + # Get distinct ID from trace metadata (resolved at trace start) or default + trace_info = self._trace_metadata.get(trace_id, {}) + distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(None) # Get group_id from trace metadata for linking group_id = self._get_group_id(trace_id) From 8d7a68dbb3733fc98d97556f2565bb42d41b4b51 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:16:13 +0000 Subject: [PATCH 10/24] refactor: extract _base_properties helper to reduce duplication All span handlers repeated the same 6 base fields (trace_id, span_id, parent_id, provider, framework, latency) plus the group_id conditional. Extract into a shared helper to reduce ~100 lines of boilerplate. --- posthog/ai/openai_agents/processor.py | 145 ++++++-------------------- 1 file changed, 33 insertions(+), 112 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 548b0b08..a9ee6bd4 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -341,6 +341,29 @@ def on_span_end(self, span: Span[Any]) -> None: except Exception as e: log.debug(f"Error in on_span_end: {e}") + def _base_properties( + self, + trace_id: str, + span_id: str, + parent_id: Optional[str], + latency: float, + group_id: Optional[str], + error_properties: Dict[str, Any], + ) -> Dict[str, Any]: + """Build the base properties dict shared by all span handlers.""" + properties = { + "$ai_trace_id": trace_id, + "$ai_span_id": span_id, + "$ai_parent_id": parent_id, + "$ai_provider": "openai", + "$ai_framework": "openai-agents", + "$ai_latency": latency, + **error_properties, + } + if group_id: + properties["$ai_group_id"] = group_id + return properties + def _handle_generation_span( self, span_data: GenerationSpanData, @@ -366,11 +389,7 @@ def _handle_generation_span( model_params[param] = model_config[param] properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, - "$ai_provider": "openai", - "$ai_framework": "openai-agents", + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_model": span_data.model, "$ai_model_parameters": model_params if model_params else None, "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), @@ -378,14 +397,8 @@ def _handle_generation_span( "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, "$ai_total_tokens": input_tokens + output_tokens, - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - # Add optional token fields if present if usage.get("reasoning_tokens"): properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"] @@ -409,24 +422,13 @@ def _handle_function_span( ) -> None: """Handle function/tool call spans - maps to $ai_span event.""" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": span_data.name, "$ai_span_type": "tool", - "$ai_provider": "openai", - "$ai_framework": "openai-agents", "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)), "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)), - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - - # Add MCP data if present if span_data.mcp_data: properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data) @@ -445,22 +447,11 @@ def _handle_agent_span( ) -> None: """Handle agent execution spans - maps to $ai_span event.""" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": span_data.name, "$ai_span_type": "agent", - "$ai_provider": "openai", - "$ai_framework": "openai-agents", - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - - # Add agent-specific metadata if span_data.handoffs: properties["$ai_agent_handoffs"] = span_data.handoffs if span_data.tools: @@ -483,23 +474,13 @@ def _handle_handoff_span( ) -> None: """Handle agent handoff spans - maps to $ai_span event.""" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}", "$ai_span_type": "handoff", - "$ai_provider": "openai", - "$ai_framework": "openai-agents", "$ai_handoff_from_agent": span_data.from_agent, "$ai_handoff_to_agent": span_data.to_agent, - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - self._capture_event("$ai_span", properties, distinct_id) def _handle_guardrail_span( @@ -515,22 +496,12 @@ def _handle_guardrail_span( ) -> None: """Handle guardrail execution spans - maps to $ai_span event.""" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": span_data.name, "$ai_span_type": "guardrail", - "$ai_provider": "openai", - "$ai_framework": "openai-agents", "$ai_guardrail_triggered": span_data.triggered, - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - self._capture_event("$ai_span", properties, distinct_id) def _handle_response_span( @@ -560,25 +531,15 @@ def _handle_response_span( model = getattr(response, "model", None) if response else None properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, - "$ai_provider": "openai", - "$ai_framework": "openai-agents", + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_model": model, "$ai_response_id": response_id, "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, "$ai_total_tokens": input_tokens + output_tokens, - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - # Extract output content from response if response: output_items = getattr(response, "output", None) @@ -600,22 +561,12 @@ def _handle_custom_span( ) -> None: """Handle custom user-defined spans - maps to $ai_span event.""" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": span_data.name, "$ai_span_type": "custom", - "$ai_provider": "openai", - "$ai_framework": "openai-agents", "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)), - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - self._capture_event("$ai_span", properties, distinct_id) def _handle_audio_span( @@ -633,21 +584,11 @@ def _handle_audio_span( span_type = span_data.type # "transcription", "speech", or "speech_group" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": span_type, "$ai_span_type": span_type, - "$ai_provider": "openai", - "$ai_framework": "openai-agents", - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - # Add model info if available if hasattr(span_data, "model") and span_data.model: properties["$ai_model"] = span_data.model @@ -690,23 +631,13 @@ def _handle_mcp_span( ) -> None: """Handle MCP (Model Context Protocol) spans - maps to $ai_span event.""" properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": f"mcp:{span_data.server}", "$ai_span_type": "mcp_tools", - "$ai_provider": "openai", - "$ai_framework": "openai-agents", "$ai_mcp_server": span_data.server, "$ai_mcp_tools": span_data.result, - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - self._capture_event("$ai_span", properties, distinct_id) def _handle_generic_span( @@ -724,21 +655,11 @@ def _handle_generic_span( span_type = getattr(span_data, "type", "unknown") properties = { - "$ai_trace_id": trace_id, - "$ai_span_id": span_id, - "$ai_parent_id": parent_id, + **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), "$ai_span_name": span_type, "$ai_span_type": span_type, - "$ai_provider": "openai", - "$ai_framework": "openai-agents", - "$ai_latency": latency, - **error_properties, } - # Include group_id for linking related traces - if group_id: - properties["$ai_group_id"] = group_id - # Try to export span data if hasattr(span_data, "export"): try: From 27bd98c4bcabc2e28093305030570634b2a728d6 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:17:40 +0000 Subject: [PATCH 11/24] test: add missing edge case tests for openai agents processor - test_generation_span_with_no_usage: zero tokens when usage is None - test_generation_span_with_partial_usage: only input_tokens present - test_error_type_categorization_by_type_field_only: type field without matching message content - test_distinct_id_resolved_from_trace_for_spans: callable resolver uses trace context for span events - test_eviction_of_stale_entries: memory leak prevention works --- .../test/ai/openai_agents/test_processor.py | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index b5509340..2cf0bb9b 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -545,6 +545,88 @@ def test_force_flush_calls_client_flush(self, processor, mock_client): processor.force_flush() mock_client.flush.assert_called_once() + def test_generation_span_with_no_usage(self, processor, mock_client, mock_span): + """Test GenerationSpanData with no usage data defaults to zero tokens.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_input_tokens"] == 0 + assert call_kwargs["properties"]["$ai_output_tokens"] == 0 + assert call_kwargs["properties"]["$ai_total_tokens"] == 0 + + def test_generation_span_with_partial_usage(self, processor, mock_client, mock_span): + """Test GenerationSpanData with only input_tokens present.""" + span_data = GenerationSpanData( + model="gpt-4o", + usage={"input_tokens": 42}, + ) + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_input_tokens"] == 42 + assert call_kwargs["properties"]["$ai_output_tokens"] == 0 + assert call_kwargs["properties"]["$ai_total_tokens"] == 42 + + def test_error_type_categorization_by_type_field_only(self, processor, mock_client, mock_span): + """Test error categorization works when only the type field matches.""" + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + mock_span.error = {"message": "Something went wrong", "type": "ModelBehaviorError"} + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error" + + def test_distinct_id_resolved_from_trace_for_spans(self, mock_client, mock_trace, mock_span): + """Test that spans use the distinct_id resolved at trace start.""" + resolver = lambda trace: f"user-{trace.name}" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id=resolver, + ) + + # Start trace - this resolves and stores distinct_id + processor.on_trace_start(mock_trace) + mock_client.capture.reset_mock() + + # End a span - should use the stored distinct_id from trace + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["distinct_id"] == "user-Test Workflow" + + def test_eviction_of_stale_entries(self, mock_client): + """Test that stale entries are evicted when max is exceeded.""" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id="test-user", + ) + processor._max_tracked_entries = 10 + + # Fill beyond max + for i in range(15): + processor._span_start_times[f"span_{i}"] = float(i) + processor._trace_metadata[f"trace_{i}"] = {"name": f"trace_{i}"} + + processor._evict_stale_entries() + + # Should have evicted half + assert len(processor._span_start_times) <= 10 + assert len(processor._trace_metadata) <= 10 + class TestInstrumentHelper: """Tests for the instrument() convenience function.""" From 143ff91d8e9f76597e7f85a3582cd8dcaea4f0c5 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:20:09 +0000 Subject: [PATCH 12/24] fix: handle non-dict error_info in span error parsing If span.error is a string instead of a dict, calling .get() would raise AttributeError. Now falls back to str() for non-dict errors. --- posthog/ai/openai_agents/processor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index a9ee6bd4..6f0830d5 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -273,8 +273,12 @@ def on_span_end(self, span: Span[Any]) -> None: error_info = span.error error_properties = {} if error_info: - error_message = error_info.get("message", str(error_info)) - error_type_raw = error_info.get("type", "") + if isinstance(error_info, dict): + error_message = error_info.get("message", str(error_info)) + error_type_raw = error_info.get("type", "") + else: + error_message = str(error_info) + error_type_raw = "" # Categorize error type for cross-provider filtering/alerting error_type = "unknown" From ae519ef30377ce6aa92f1ed116194f2641150836 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:25:47 +0000 Subject: [PATCH 13/24] style: apply ruff formatting --- posthog/ai/openai_agents/processor.py | 175 +++++++++++++++--- .../test/ai/openai_agents/test_processor.py | 100 +++++++--- 2 files changed, 222 insertions(+), 53 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 6f0830d5..459f4a90 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -138,14 +138,20 @@ def _evict_stale_entries(self) -> None: sorted_spans = sorted(self._span_start_times.items(), key=lambda x: x[1]) for span_id, _ in sorted_spans[: len(sorted_spans) // 2]: del self._span_start_times[span_id] - log.debug("Evicted stale span start times (exceeded %d entries)", self._max_tracked_entries) + log.debug( + "Evicted stale span start times (exceeded %d entries)", + self._max_tracked_entries, + ) if len(self._trace_metadata) > self._max_tracked_entries: # Remove half the entries (oldest inserted via dict ordering in Python 3.7+) keys = list(self._trace_metadata.keys()) for key in keys[: len(keys) // 2]: del self._trace_metadata[key] - log.debug("Evicted stale trace metadata (exceeded %d entries)", self._max_tracked_entries) + log.debug( + "Evicted stale trace metadata (exceeded %d entries)", + self._max_tracked_entries, + ) def _get_group_id(self, trace_id: str) -> Optional[str]: """Get the group_id for a trace from stored metadata.""" @@ -161,7 +167,9 @@ def _capture_event( ) -> None: """Capture an event to PostHog with error handling.""" try: - if not hasattr(self._client, "capture") or not callable(self._client.capture): + if not hasattr(self._client, "capture") or not callable( + self._client.capture + ): return final_distinct_id = distinct_id or "unknown" @@ -282,7 +290,10 @@ def on_span_end(self, span: Span[Any]) -> None: # Categorize error type for cross-provider filtering/alerting error_type = "unknown" - if "ModelBehaviorError" in error_type_raw or "ModelBehaviorError" in error_message: + if ( + "ModelBehaviorError" in error_type_raw + or "ModelBehaviorError" in error_message + ): error_type = "model_behavior_error" elif "UserError" in error_type_raw or "UserError" in error_message: error_type = "user_error" @@ -302,44 +313,116 @@ def on_span_end(self, span: Span[Any]) -> None: # Dispatch based on span data type if isinstance(span_data, GenerationSpanData): self._handle_generation_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, FunctionSpanData): self._handle_function_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, AgentSpanData): self._handle_agent_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, HandoffSpanData): self._handle_handoff_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, GuardrailSpanData): self._handle_guardrail_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, ResponseSpanData): self._handle_response_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, CustomSpanData): self._handle_custom_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) - elif isinstance(span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)): + elif isinstance( + span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData) + ): self._handle_audio_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) elif isinstance(span_data, MCPListToolsSpanData): self._handle_mcp_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) else: # Unknown span type - capture as generic span self._handle_generic_span( - span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties + span_data, + trace_id, + span_id, + parent_id, + latency, + distinct_id, + group_id, + error_properties, ) except Exception as e: @@ -388,12 +471,20 @@ def _handle_generation_span( # Extract model config parameters model_config = span_data.model_config or {} model_params = {} - for param in ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]: + for param in [ + "temperature", + "max_tokens", + "top_p", + "frequency_penalty", + "presence_penalty", + ]: if param in model_config: model_params[param] = model_config[param] properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_model": span_data.model, "$ai_model_parameters": model_params if model_params else None, "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), @@ -409,7 +500,9 @@ def _handle_generation_span( if usage.get("cache_read_input_tokens"): properties["$ai_cache_read_input_tokens"] = usage["cache_read_input_tokens"] if usage.get("cache_creation_input_tokens"): - properties["$ai_cache_creation_input_tokens"] = usage["cache_creation_input_tokens"] + properties["$ai_cache_creation_input_tokens"] = usage[ + "cache_creation_input_tokens" + ] self._capture_event("$ai_generation", properties, distinct_id) @@ -426,7 +519,9 @@ def _handle_function_span( ) -> None: """Handle function/tool call spans - maps to $ai_span event.""" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": span_data.name, "$ai_span_type": "tool", "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)), @@ -451,7 +546,9 @@ def _handle_agent_span( ) -> None: """Handle agent execution spans - maps to $ai_span event.""" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": span_data.name, "$ai_span_type": "agent", } @@ -478,7 +575,9 @@ def _handle_handoff_span( ) -> None: """Handle agent handoff spans - maps to $ai_span event.""" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}", "$ai_span_type": "handoff", "$ai_handoff_from_agent": span_data.from_agent, @@ -500,7 +599,9 @@ def _handle_guardrail_span( ) -> None: """Handle guardrail execution spans - maps to $ai_span event.""" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": span_data.name, "$ai_span_type": "guardrail", "$ai_guardrail_triggered": span_data.triggered, @@ -535,7 +636,9 @@ def _handle_response_span( model = getattr(response, "model", None) if response else None properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_model": model, "$ai_response_id": response_id, "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), @@ -548,7 +651,9 @@ def _handle_response_span( if response: output_items = getattr(response, "output", None) if output_items: - properties["$ai_output_choices"] = self._with_privacy_mode(_safe_json(output_items)) + properties["$ai_output_choices"] = self._with_privacy_mode( + _safe_json(output_items) + ) self._capture_event("$ai_generation", properties, distinct_id) @@ -565,7 +670,9 @@ def _handle_custom_span( ) -> None: """Handle custom user-defined spans - maps to $ai_span event.""" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": span_data.name, "$ai_span_type": "custom", "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)), @@ -588,7 +695,9 @@ def _handle_audio_span( span_type = span_data.type # "transcription", "speech", or "speech_group" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": span_type, "$ai_span_type": span_type, } @@ -612,7 +721,11 @@ def _handle_audio_span( properties["audio_output_format"] = span_data.output_format # Add text input for TTS - if hasattr(span_data, "input") and span_data.input and isinstance(span_data.input, str): + if ( + hasattr(span_data, "input") + and span_data.input + and isinstance(span_data.input, str) + ): properties["$ai_input"] = self._with_privacy_mode(span_data.input) # Don't include audio data (base64) - just metadata @@ -635,7 +748,9 @@ def _handle_mcp_span( ) -> None: """Handle MCP (Model Context Protocol) spans - maps to $ai_span event.""" properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": f"mcp:{span_data.server}", "$ai_span_type": "mcp_tools", "$ai_mcp_server": span_data.server, @@ -659,7 +774,9 @@ def _handle_generic_span( span_type = getattr(span_data, "type", "unknown") properties = { - **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties), + **self._base_properties( + trace_id, span_id, parent_id, latency, group_id, error_properties + ), "$ai_span_name": span_type, "$ai_span_type": span_type, } diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index 2cf0bb9b..37ec3733 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -158,7 +158,9 @@ def test_generation_span_mapping(self, processor, mock_client, mock_span): {"role": "assistant", "content": "Hi there!"} ] - def test_generation_span_with_reasoning_tokens(self, processor, mock_client, mock_span): + def test_generation_span_with_reasoning_tokens( + self, processor, mock_client, mock_span + ): """Test GenerationSpanData includes reasoning tokens when present.""" span_data = GenerationSpanData( model="o1-preview", @@ -193,7 +195,9 @@ def test_function_span_mapping(self, processor, mock_client, mock_span): assert call_kwargs["event"] == "$ai_span" assert call_kwargs["properties"]["$ai_span_name"] == "get_weather" assert call_kwargs["properties"]["$ai_span_type"] == "tool" - assert call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}' + assert ( + call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}' + ) assert call_kwargs["properties"]["$ai_output_state"] == "Sunny, 72F" def test_agent_span_mapping(self, processor, mock_client, mock_span): @@ -323,7 +327,9 @@ def test_error_handling_in_span(self, processor, mock_client, mock_span): assert call_kwargs["properties"]["$ai_is_error"] is True assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded" - def test_generation_span_includes_total_tokens(self, processor, mock_client, mock_span): + def test_generation_span_includes_total_tokens( + self, processor, mock_client, mock_span + ): """Test that $ai_total_tokens is calculated and included.""" span_data = GenerationSpanData( model="gpt-4o", @@ -337,11 +343,16 @@ def test_generation_span_includes_total_tokens(self, processor, mock_client, moc call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$ai_total_tokens"] == 150 - def test_error_type_categorization_model_behavior(self, processor, mock_client, mock_span): + def test_error_type_categorization_model_behavior( + self, processor, mock_client, mock_span + ): """Test that ModelBehaviorError is categorized correctly.""" span_data = GenerationSpanData(model="gpt-4o") mock_span.span_data = span_data - mock_span.error = {"message": "ModelBehaviorError: Invalid JSON output", "type": "ModelBehaviorError"} + mock_span.error = { + "message": "ModelBehaviorError: Invalid JSON output", + "type": "ModelBehaviorError", + } processor.on_span_start(mock_span) processor.on_span_end(mock_span) @@ -349,7 +360,9 @@ def test_error_type_categorization_model_behavior(self, processor, mock_client, call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error" - def test_error_type_categorization_user_error(self, processor, mock_client, mock_span): + def test_error_type_categorization_user_error( + self, processor, mock_client, mock_span + ): """Test that UserError is categorized correctly.""" span_data = GenerationSpanData(model="gpt-4o") mock_span.span_data = span_data @@ -361,31 +374,45 @@ def test_error_type_categorization_user_error(self, processor, mock_client, mock call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$ai_error_type"] == "user_error" - def test_error_type_categorization_input_guardrail(self, processor, mock_client, mock_span): + def test_error_type_categorization_input_guardrail( + self, processor, mock_client, mock_span + ): """Test that InputGuardrailTripwireTriggered is categorized correctly.""" span_data = GenerationSpanData(model="gpt-4o") mock_span.span_data = span_data - mock_span.error = {"message": "InputGuardrailTripwireTriggered: Content blocked"} + mock_span.error = { + "message": "InputGuardrailTripwireTriggered: Content blocked" + } processor.on_span_start(mock_span) processor.on_span_end(mock_span) call_kwargs = mock_client.capture.call_args[1] - assert call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered" + assert ( + call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered" + ) - def test_error_type_categorization_output_guardrail(self, processor, mock_client, mock_span): + def test_error_type_categorization_output_guardrail( + self, processor, mock_client, mock_span + ): """Test that OutputGuardrailTripwireTriggered is categorized correctly.""" span_data = GenerationSpanData(model="gpt-4o") mock_span.span_data = span_data - mock_span.error = {"message": "OutputGuardrailTripwireTriggered: Response blocked"} + mock_span.error = { + "message": "OutputGuardrailTripwireTriggered: Response blocked" + } processor.on_span_start(mock_span) processor.on_span_end(mock_span) call_kwargs = mock_client.capture.call_args[1] - assert call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered" + assert ( + call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered" + ) - def test_error_type_categorization_max_turns(self, processor, mock_client, mock_span): + def test_error_type_categorization_max_turns( + self, processor, mock_client, mock_span + ): """Test that MaxTurnsExceeded is categorized correctly.""" span_data = GenerationSpanData(model="gpt-4o") mock_span.span_data = span_data @@ -409,7 +436,9 @@ def test_error_type_categorization_unknown(self, processor, mock_client, mock_sp call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$ai_error_type"] == "unknown" - def test_response_span_with_output_and_total_tokens(self, processor, mock_client, mock_span): + def test_response_span_with_output_and_total_tokens( + self, processor, mock_client, mock_span + ): """Test ResponseSpanData includes output choices and total tokens.""" # Create a mock response object mock_response = MagicMock() @@ -433,10 +462,14 @@ def test_response_span_with_output_and_total_tokens(self, processor, mock_client assert call_kwargs["event"] == "$ai_generation" assert call_kwargs["properties"]["$ai_total_tokens"] == 35 - assert call_kwargs["properties"]["$ai_output_choices"] == [{"type": "message", "content": "Hello!"}] + assert call_kwargs["properties"]["$ai_output_choices"] == [ + {"type": "message", "content": "Hello!"} + ] assert call_kwargs["properties"]["$ai_response_id"] == "resp_123" - def test_speech_span_with_pass_through_properties(self, processor, mock_client, mock_span): + def test_speech_span_with_pass_through_properties( + self, processor, mock_client, mock_span + ): """Test SpeechSpanData includes pass-through properties.""" span_data = SpeechSpanData( input="Hello, how can I help you?", @@ -457,13 +490,20 @@ def test_speech_span_with_pass_through_properties(self, processor, mock_client, assert call_kwargs["properties"]["$ai_span_type"] == "speech" assert call_kwargs["properties"]["$ai_model"] == "tts-1" # Pass-through properties (no $ai_ prefix) - assert call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z" + assert ( + call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z" + ) assert call_kwargs["properties"]["audio_output_format"] == "pcm" - assert call_kwargs["properties"]["model_config"] == {"voice": "alloy", "speed": 1.0} + assert call_kwargs["properties"]["model_config"] == { + "voice": "alloy", + "speed": 1.0, + } # Text input should be captured assert call_kwargs["properties"]["$ai_input"] == "Hello, how can I help you?" - def test_transcription_span_with_pass_through_properties(self, processor, mock_client, mock_span): + def test_transcription_span_with_pass_through_properties( + self, processor, mock_client, mock_span + ): """Test TranscriptionSpanData includes pass-through properties.""" span_data = TranscriptionSpanData( input="base64_audio_data", @@ -486,7 +526,10 @@ def test_transcription_span_with_pass_through_properties(self, processor, mock_c assert call_kwargs["properties"]["audio_input_format"] == "pcm" assert call_kwargs["properties"]["model_config"] == {"language": "en"} # Transcription output should be captured - assert call_kwargs["properties"]["$ai_output_state"] == "This is the transcribed text." + assert ( + call_kwargs["properties"]["$ai_output_state"] + == "This is the transcribed text." + ) def test_latency_calculation(self, processor, mock_client, mock_span): """Test that latency is calculated correctly.""" @@ -558,7 +601,9 @@ def test_generation_span_with_no_usage(self, processor, mock_client, mock_span): assert call_kwargs["properties"]["$ai_output_tokens"] == 0 assert call_kwargs["properties"]["$ai_total_tokens"] == 0 - def test_generation_span_with_partial_usage(self, processor, mock_client, mock_span): + def test_generation_span_with_partial_usage( + self, processor, mock_client, mock_span + ): """Test GenerationSpanData with only input_tokens present.""" span_data = GenerationSpanData( model="gpt-4o", @@ -574,11 +619,16 @@ def test_generation_span_with_partial_usage(self, processor, mock_client, mock_s assert call_kwargs["properties"]["$ai_output_tokens"] == 0 assert call_kwargs["properties"]["$ai_total_tokens"] == 42 - def test_error_type_categorization_by_type_field_only(self, processor, mock_client, mock_span): + def test_error_type_categorization_by_type_field_only( + self, processor, mock_client, mock_span + ): """Test error categorization works when only the type field matches.""" span_data = GenerationSpanData(model="gpt-4o") mock_span.span_data = span_data - mock_span.error = {"message": "Something went wrong", "type": "ModelBehaviorError"} + mock_span.error = { + "message": "Something went wrong", + "type": "ModelBehaviorError", + } processor.on_span_start(mock_span) processor.on_span_end(mock_span) @@ -586,7 +636,9 @@ def test_error_type_categorization_by_type_field_only(self, processor, mock_clie call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error" - def test_distinct_id_resolved_from_trace_for_spans(self, mock_client, mock_trace, mock_span): + def test_distinct_id_resolved_from_trace_for_spans( + self, mock_client, mock_trace, mock_span + ): """Test that spans use the distinct_id resolved at trace start.""" resolver = lambda trace: f"user-{trace.name}" processor = PostHogTracingProcessor( From 789be8d45c3711f2136cd702e0b2250dd48c49b9 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 13:30:33 +0000 Subject: [PATCH 14/24] style: replace lambda assignments with def (ruff E731) --- posthog/test/ai/openai_agents/test_processor.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index 37ec3733..6759bf7d 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -89,7 +89,10 @@ def test_initialization(self, mock_client): def test_initialization_with_callable_distinct_id(self, mock_client, mock_trace): """Test processor with callable distinct_id resolver.""" - resolver = lambda trace: trace.metadata.get("user_id", "default") + + def resolver(trace): + return trace.metadata.get("user_id", "default") + processor = PostHogTracingProcessor( client=mock_client, distinct_id=resolver, @@ -640,7 +643,10 @@ def test_distinct_id_resolved_from_trace_for_spans( self, mock_client, mock_trace, mock_span ): """Test that spans use the distinct_id resolved at trace start.""" - resolver = lambda trace: f"user-{trace.name}" + + def resolver(trace): + return f"user-{trace.name}" + processor = PostHogTracingProcessor( client=mock_client, distinct_id=resolver, From b3c631ce8f78a18db51c65c539aa6d8c904d1ccb Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 14:03:09 +0000 Subject: [PATCH 15/24] fix: restore full CHANGELOG.md history The rebase conflict resolution accidentally truncated the changelog to only the most recent entries. Restored all historical entries. --- CHANGELOG.md | 729 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 729 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f674fdc0..466f2a44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,3 +36,732 @@ When using OpenAI stored prompts, the model is defined in the OpenAI dashboard r # 7.4.0 - 2025-12-16 feat: Add automatic retries for feature flag requests + +Feature flag API requests now automatically retry on transient failures: + +- Network errors (connection refused, DNS failures, timeouts) +- Server errors (500, 502, 503, 504) +- Up to 2 retries with exponential backoff (0.5s, 1s delays) + +Rate limit (429) and quota (402) errors are not retried. + +# 7.3.1 - 2025-12-06 + +fix: remove unused $exception_message and $exception_type + +# 7.3.0 - 2025-12-05 + +feat: improve code variables capture masking + +# 7.2.0 - 2025-12-01 + +feat: add $feature_flag_evaluated_at properties to $feature_flag_called events + +# 7.1.0 - 2025-11-26 + +Add support for the async version of Gemini. + +# 7.0.2 - 2025-11-18 + +Add support for Python 3.14. +Projects upgrading to Python 3.14 should ensure any Pydantic models passed into the SDK use Pydantic v2, as Pydantic v1 is not compatible with Python 3.14. + +# 7.0.1 - 2025-11-15 + +Try to use repr() when formatting code variables + +# 7.0.0 - 2025-11-11 + +NB Python 3.9 is no longer supported + +- chore(llma): update LLM provider SDKs to latest major versions + - openai: 1.102.0 → 2.7.1 + - anthropic: 0.64.0 → 0.72.0 + - google-genai: 1.32.0 → 1.49.0 + - langchain-core: 0.3.75 → 1.0.3 + - langchain-openai: 0.3.32 → 1.0.2 + - langchain-anthropic: 0.3.19 → 1.0.1 + - langchain-community: 0.3.29 → 0.4.1 + - langgraph: 0.6.6 → 1.0.2 + +# 6.9.3 - 2025-11-10 + +- feat(ph-ai): PostHog properties dict in GenerationMetadata + +# 6.9.2 - 2025-11-10 + +- fix(llma): fix cache token double subtraction in Langchain for non-Anthropic providers causing negative costs + +# 6.9.1 - 2025-11-07 + +- fix(error-tracking): pass code variables config from init to client + +# 6.9.0 - 2025-11-06 + +- feat(error-tracking): add local variables capture + +# 6.8.0 - 2025-11-03 + +- feat(llma): send web search calls to be used for LLM cost calculations + +# 6.7.14 - 2025-11-03 + +- fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355) +- test(django): Add Django 5 integration test suite with real ASGI application testing async middleware behavior + +# 6.7.13 - 2025-11-02 + +- fix(llma): cache cost calculation in the LangChain callback + +# 6.7.12 - 2025-11-02 + +- fix(django): Restore process_exception method to capture view and downstream middleware exceptions (fixes #329) +- fix(ai/langchain): Add LangChain 1.0+ compatibility for CallbackHandler imports (fixes #362) + +# 6.7.11 - 2025-10-28 + +- feat(ai): Add `$ai_framework` property for framework integrations (e.g. LangChain) + +# 6.7.10 - 2025-10-24 + +- fix(django): Make middleware truly hybrid - compatible with both sync (WSGI) and async (ASGI) Django stacks without breaking sync-only deployments + +# 6.7.9 - 2025-10-22 + +- fix(flags): multi-condition flags with static cohorts returning wrong variants + +# 6.7.8 - 2025-10-16 + +- fix(llma): missing async for OpenAI's streaming implementation + +# 6.7.7 - 2025-10-14 + +- fix: remove deprecated attribute $exception_personURL from exception events + +# 6.7.6 - 2025-09-16 + +- fix: don't sort condition sets with variant overrides to the top +- fix: Prevent core Client methods from raising exceptions + +# 6.7.5 - 2025-09-16 + +- feat: Django middleware now supports async request handling. + +# 6.7.4 - 2025-09-05 + +- fix: Missing system prompts for some providers + +# 6.7.3 - 2025-09-04 + +- fix: missing usage tokens in Gemini + +# 6.7.2 - 2025-09-03 + +- fix: tool call results in streaming providers + +# 6.7.1 - 2025-09-01 + +- fix: Add base64 inline image sanitization + +# 6.7.0 - 2025-08-26 + +- feat: Add support for feature flag dependencies + +# 6.6.1 - 2025-08-21 + +- fix: Prevent `NoneType` error when `group_properties` is `None` + +# 6.6.0 - 2025-08-15 + +- feat: Add `flag_keys_to_evaluate` parameter to optimize feature flag evaluation performance by only evaluating specified flags +- feat: Add `flag_keys_filter` option to `send_feature_flags` for selective flag evaluation in capture events + +# 6.5.0 - 2025-08-08 + +- feat: Add `$context_tags` to an event to know which properties were included as tags + +# 6.4.1 - 2025-08-06 + +- fix: Always pass project API key in `remote_config` requests for deterministic project routing + +# 6.4.0 - 2025-08-05 + +- feat: support Vertex AI for Gemini + +# 6.3.4 - 2025-08-04 + +- fix: set `$ai_tools` for all providers and `$ai_output_choices` for all non-streaming provider flows properly + +# 6.3.3 - 2025-08-01 + +- fix: `get_feature_flag_result` now correctly returns FeatureFlagResult when payload is empty string instead of None + +# 6.3.2 - 2025-07-31 + +- fix: Anthropic's tool calls are now handled properly + +# 6.3.0 - 2025-07-22 + +- feat: Enhanced `send_feature_flags` parameter to accept `SendFeatureFlagsOptions` object for declarative control over local/remote evaluation and custom properties + +# 6.2.1 - 2025-07-21 + +- feat: make `posthog_client` an optional argument in PostHog AI providers wrappers (`posthog.ai.*`), intuitively using the default client as the default + +# 6.1.1 - 2025-07-16 + +- fix: correctly capture exceptions processed by Django from views or middleware + +# 6.1.0 - 2025-07-10 + +- feat: decouple feature flag local evaluation from personal API keys; support decrypting remote config payloads without relying on the feature flags poller + +# 6.0.4 - 2025-07-09 + +- fix: add POSTHOG_MW_CLIENT setting to django middleware, to support custom clients for exception capture. + +# 6.0.3 - 2025-07-07 + +- feat: add a feature flag evaluation cache (local storage or redis) to support returning flag evaluations when the service is down + +# 6.0.2 - 2025-07-02 + +- fix: send_feature_flags changed to default to false in `Client::capture_exception` + +# 6.0.1 + +- fix: response `$process_person_profile` property when passed to capture + +# 6.0.0 + +This release contains a number of major breaking changes: + +- feat: make distinct_id an optional parameter in posthog.capture and related functions +- feat: make capture and related functions return `Optional[str]`, which is the UUID of the sent event, if it was sent +- fix: remove `identify` (prefer `posthog.set()`), and `page` and `screen` (prefer `posthog.capture()`) +- fix: delete exception-capture specific integrations module. Prefer the general-purpose django middleware as a replacement for the django `Integration`. + +To migrate to this version, you'll mostly just need to switch to using named keyword arguments, rather than positional ones. For example: + +```python +# Old calling convention +posthog.capture("user123", "button_clicked", {"button_id": "123"}) +# New calling convention +posthog.capture(distinct_id="user123", event="button_clicked", properties={"button_id": "123"}) + +# Better pattern +with posthog.new_context(): + posthog.identify_context("user123") + + # The event name is the first argument, and can be passed positionally, or as a keyword argument in a later position + posthog.capture("button_pressed") +``` + +Generally, arguments are now appropriately typed, and docstrings have been updated. If something is unclear, please open an issue, or submit a PR! + +# 5.4.0 - 2025-06-20 + +- feat: add support to session_id context on page method + +# 5.3.0 - 2025-06-19 + +- fix: safely handle exception values + +# 5.2.0 - 2025-06-19 + +- feat: construct artificial stack traces if no traceback is available on a captured exception + +## 5.1.0 - 2025-06-18 + +- feat: session and distinct ID's can now be associated with contexts, and are used as such +- feat: django http request middleware + +## 5.0.0 - 2025-06-16 + +- fix: removed deprecated sentry integration + +## 4.10.0 - 2025-06-13 + +- fix: no longer fail in autocapture. + +## 4.9.0 - 2025-06-13 + +- feat(ai): track reasoning and cache tokens in the LangChain callback + +## 4.8.0 - 2025-06-10 + +- fix: export scoped, rather than tracked, decorator +- feat: allow use of contexts without error tracking + +## 4.7.0 - 2025-06-10 + +- feat: add support for parse endpoint in responses API (no longer beta) + +## 4.6.2 - 2025-06-09 + +- fix: replace `import posthog` with direct method imports + +## 4.6.1 - 2025-06-09 + +- fix: replace `import posthog` in `posthoganalytics` package + +## 4.6.0 - 2025-06-09 + +- feat: add additional user and request context to captured exceptions via the Django integration +- feat: Add `setup()` function to initialise default client + +## 4.5.0 - 2025-06-09 + +- feat: add before_send callback (#249) + +## 4.4.2- 2025-06-09 + +- empty point release to fix release automation + +## 4.4.1 2025-06-09 + +- empty point release to fix release automation + +## 4.4.0 - 2025-06-09 + +- Use the new `/flags` endpoint for all feature flag evaluations (don't fall back to `/decide` at all) + +## 4.3.2 - 2025-06-06 + +1. Add context management: + +- New context manager with `posthog.new_context()` +- Tag functions: `posthog.tag()`, `posthog.get_tags()`, `posthog.clear_tags()` +- Function decorator: + - `@posthog.scoped` - Creates context and captures exceptions thrown within the function +- Automatic deduplication of exceptions to ensure each exception is only captured once + +2. fix: feature flag request use geoip_disable (#235) +3. chore: pin actions versions (#210) +4. fix: opinionated setup and clean fn fix (#240) +5. fix: release action failed (#241) + +## 4.2.0 - 2025-05-22 + +Add support for google gemini + +## 4.1.0 - 2025-05-22 + +Moved ai openai package to a composition approach over inheritance. + +## 4.0.1 – 2025-04-29 + +1. Remove deprecated `monotonic` library. Use Python's core `time.monotonic` function instead +2. Clarify Python 3.9+ is required + +## 4.0.0 - 2025-04-24 + +1. Added new method `get_feature_flag_result` which returns a `FeatureFlagResult` object. This object breaks down the result of a feature flag into its enabled state, variant, and payload. The benefit of this method is it allows you to retrieve the result of a feature flag and its payload in a single API call. You can call `get_value` on the result to get the value of the feature flag, which is the same value returned by `get_feature_flag` (aka the string `variant` if the flag is a multivariate flag or the `boolean` value if the flag is a boolean flag). + +Example: + +```python +result = posthog.get_feature_flag_result("my-flag", "distinct_id") +print(result.enabled) # True or False +print(result.variant) # 'the-variant-value' or None +print(result.payload) # {'foo': 'bar'} +print(result.get_value()) # 'the-variant-value' or True or False +print(result.reason) # 'matched condition set 2' (Not available for local evaluation) +``` + +Breaking change: + +1. `get_feature_flag_payload` now deserializes payloads from JSON strings to `Any`. Previously, it returned the payload as a JSON encoded string. + +Before: + +```python +payload = get_feature_flag_payload('key', 'distinct_id') # "{\"some\": \"payload\"}" +``` + +After: + +```python +payload = get_feature_flag_payload('key', 'distinct_id') # {"some": "payload"} +``` + +## 3.25.0 – 2025-04-15 + +1. Roll out new `/flags` endpoint to 100% of `/decide` traffic, excluding the top 10 customers. + +## 3.24.3 – 2025-04-15 + +1. Fix hash inclusion/exclusion for flag rollout + +## 3.24.2 – 2025-04-15 + +1. Roll out new /flags endpoint to 10% of /decide traffic + +## 3.24.1 – 2025-04-11 + +1. Add `log_captured_exceptions` option to proxy setup + +## 3.24.0 – 2025-04-10 + +1. Add config option to `log_captured_exceptions` + +## 3.23.0 – 2025-03-26 + +1. Expand automatic retries to include read errors (e.g. RemoteDisconnected) + +## 3.22.0 – 2025-03-26 + +1. Add more information to `$feature_flag_called` events. +2. Support for the `/decide?v=4` endpoint which contains more information about feature flags. + +## 3.21.0 – 2025-03-17 + +1. Support serializing dataclasses. + +## 3.20.0 – 2025-03-13 + +1. Add support for OpenAI Responses API. + +## 3.19.2 – 2025-03-11 + +1. Fix install requirements for analytics package + +## 3.19.1 – 2025-03-11 + +1. Fix bug where None is sent as delta in azure + +## 3.19.0 – 2025-03-04 + +1. Add support for tool calls in OpenAI and Anthropic. +2. Add support for cached tokens. + +## 3.18.1 – 2025-03-03 + +1. Improve quota-limited feature flag logs + +## 3.18.0 - 2025-02-28 + +1. Add support for Azure OpenAI. + +## 3.17.0 - 2025-02-27 + +1. The LangChain handler now captures tools in `$ai_generation` events, in property `$ai_tools`. This allows for displaying tools provided to the LLM call in PostHog UI. Note that support for `$ai_tools` in OpenAI and Anthropic SDKs is coming soon. + +## 3.16.0 - 2025-02-26 + +1. feat: add some platform info to events (#198) + +## 3.15.1 - 2025-02-23 + +1. Fix async client support for OpenAI. + +## 3.15.0 - 2025-02-19 + +1. Support quota-limited feature flags + +## 3.14.2 - 2025-02-19 + +1. Evaluate feature flag payloads with case sensitivity correctly. Fixes + +## 3.14.1 - 2025-02-18 + +1. Add support for Bedrock Anthropic Usage + +## 3.13.0 - 2025-02-12 + +1. Automatically retry connection errors + +## 3.12.1 - 2025-02-11 + +1. Fix mypy support for 3.12.0 +2. Deprecate `is_simple_flag` + +## 3.12.0 - 2025-02-11 + +1. Add support for OpenAI beta parse API. +2. Deprecate `context` parameter + +## 3.11.1 - 2025-02-06 + +1. Fix LangChain callback handler to capture parent run ID. + +## 3.11.0 - 2025-01-28 + +1. Add the `$ai_span` event to the LangChain callback handler to capture the input and output of intermediary chains. + + > LLM observability naming change: event property `$ai_trace_name` is now `$ai_span_name`. + +2. Fix serialiazation of Pydantic models in methods. + +## 3.10.0 - 2025-01-24 + +1. Add `$ai_error` and `$ai_is_error` properties to LangChain callback handler, OpenAI, and Anthropic. + +## 3.9.3 - 2025-01-23 + +1. Fix capturing of multiple traces in the LangChain callback handler. + +## 3.9.2 - 2025-01-22 + +1. Fix importing of LangChain callback handler under certain circumstances. + +## 3.9.0 - 2025-01-22 + +1. Add `$ai_trace` event emission to LangChain callback handler. + +## 3.8.4 - 2025-01-17 + +1. Add Anthropic support for LLM Observability. +2. Update LLM Observability to use output_choices. + +## 3.8.3 - 2025-01-14 + +1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages for the `posthoganalytics` package. + +## 3.8.2 - 2025-01-14 + +1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages. + +## 3.8.1 - 2025-01-14 + +1. Add LLM Observability with support for OpenAI and Langchain callbacks. + +## 3.7.5 - 2025-01-03 + +1. Add `distinct_id` to group_identify + +## 3.7.4 - 2024-11-25 + +1. Fix bug where this SDK incorrectly sent feature flag events with null values when calling `get_feature_flag_payload`. + +## 3.7.3 - 2024-11-25 + +1. Use personless mode when sending an exception without a provided `distinct_id`. + +## 3.7.2 - 2024-11-19 + +1. Add `type` property to exception stacks. + +## 3.7.1 - 2024-10-24 + +1. Add `platform` property to each frame of exception stacks. + +## 3.7.0 - 2024-10-03 + +1. Adds a new `super_properties` parameter on the client that are appended to every /capture call. + +## 3.6.7 - 2024-09-24 + +1. Remove deprecated datetime.utcnow() in favour of datetime.now(tz=tzutc()) + +## 3.6.6 - 2024-09-16 + +1. Fix manual capture support for in app frames + +## 3.6.5 - 2024-09-10 + +1. Fix django integration support for manual exception capture. + +## 3.6.4 - 2024-09-05 + +1. Add manual exception capture. + +## 3.6.3 - 2024-09-03 + +1. Make sure setup.py for posthoganalytics package also discovers the new exception integration package. + +## 3.6.2 - 2024-09-03 + +1. Make sure setup.py discovers the new exception integration package. + +## 3.6.1 - 2024-09-03 + +1. Adds django integration to exception autocapture in alpha state. This feature is not yet stable and may change in future versions. + +## 3.6.0 - 2024-08-28 + +1. Adds exception autocapture in alpha state. This feature is not yet stable and may change in future versions. + +## 3.5.2 - 2024-08-21 + +1. Guard for None values in local evaluation + +## 3.5.1 - 2024-08-13 + +1. Remove "-api" suffix from ingestion hostnames + +## 3.5.0 - 2024-02-29 + +1. - Adds a new `feature_flags_request_timeout_seconds` timeout parameter for feature flags which defaults to 3 seconds, updated from the default 10s for all other API calls. + +## 3.4.2 - 2024-02-20 + +1. Add `historical_migration` option for bulk migration to PostHog Cloud. + +## 3.4.1 - 2024-02-09 + +1. Use new hosts for event capture as well + +## 3.4.0 - 2024-02-05 + +1. Point given hosts to new ingestion hosts + +## 3.3.4 - 2024-01-30 + +1. Update type hints for module variables to work with newer versions of mypy + +## 3.3.3 - 2024-01-26 + +1. Remove new relative date operators, combine into regular date operators + +## 3.3.2 - 2024-01-19 + +1. Return success/failure with all capture calls from module functions + +## 3.3.1 - 2024-01-10 + +1. Make sure we don't override any existing feature flag properties when adding locally evaluated feature flag properties. + +## 3.3.0 - 2024-01-09 + +1. When local evaluation is enabled, we automatically add flag information to all events sent to PostHog, whenever possible. This makes it easier to use these events in experiments. + +## 3.2.0 - 2024-01-09 + +1. Numeric property handling for feature flags now does the expected: When passed in a number, we do a numeric comparison. When passed in a string, we do a string comparison. Previously, we always did a string comparison. +2. Add support for relative date operators for local evaluation. + +## 3.1.0 - 2023-12-04 + +1. Increase maximum event size and batch size + +## 3.0.2 - 2023-08-17 + +1. Returns the current flag property with $feature_flag_called events, to make it easier to use in experiments + +## 3.0.1 - 2023-04-21 + +1. Restore how feature flags work when the client library is disabled: All requests return `None` and no events are sent when the client is disabled. +2. Add a `feature_flag_definitions()` debug option, which returns currently loaded feature flag definitions. You can use this to more cleverly decide when to request local evaluation of feature flags. + +## 3.0.0 - 2023-04-14 + +Breaking change: + +All events by default now send the `$geoip_disable` property to disable geoip lookup in app. This is because usually we don't +want to update person properties to take the server's location. + +The same now happens for feature flag requests, where we discard the IP address of the server for matching on geoip properties like city, country, continent. + +To restore previous behaviour, you can set the default to False like so: + +```python +posthog.disable_geoip = False + +# // and if using client instantiation: +posthog = Posthog('api_key', disable_geoip=False) + +``` + +## 2.5.0 - 2023-04-10 + +1. Add option for instantiating separate client object + +## 2.4.2 - 2023-03-30 + +1. Update backoff dependency for posthoganalytics package to be the same as posthog package + +## 2.4.1 - 2023-03-17 + +1. Removes accidental print call left in for decide response + +## 2.4.0 - 2023-03-14 + +1. Support evaluating all cohorts in feature flags for local evaluation + +## 2.3.1 - 2023-02-07 + +1. Log instead of raise error on posthog personal api key errors +2. Remove upper bound on backoff dependency + +## 2.3.0 - 2023-01-31 + +1. Add support for returning payloads of matched feature flags + +## 2.2.0 - 2022-11-14 + +Changes: + +1. Add support for feature flag variant overrides with local evaluation + +## 2.1.2 - 2022-09-15 + +Changes: + +1. Fixes issues with date comparison. + +## 2.1.1 - 2022-09-14 + +Changes: + +1. Feature flags local evaluation now supports date property filters as well. Accepts both strings and datetime objects. + +## 2.1.0 - 2022-08-11 + +Changes: + +1. Feature flag defaults have been removed +2. Setup logging only when debug mode is enabled. + +## 2.0.1 - 2022-08-04 + +- Make poll_interval configurable +- Add `send_feature_flag_events` parameter to feature flag calls, which determine whether the `$feature_flag_called` event should be sent or not. +- Add `only_evaluate_locally` parameter to feature flag calls, which determines whether the feature flag should only be evaluated locally or not. + +## 2.0.0 - 2022-08-02 + +Breaking changes: + +1. The minimum version requirement for PostHog servers is now 1.38. If you're using PostHog Cloud, you satisfy this requirement automatically. +2. Feature flag defaults apply only when there's an error fetching feature flag results. Earlier, if the default was set to `True`, even if a flag resolved to `False`, the default would override this. + **Note: These are removed in 2.0.2** +3. Feature flag remote evaluation doesn't require a personal API key. + +New Changes: + +1. You can now evaluate feature flags locally (i.e. without sending a request to your PostHog servers) by setting a personal API key, and passing in groups and person properties to `is_feature_enabled` and `get_feature_flag` calls. +2. Introduces a `get_all_flags` method that returns all feature flags. This is useful for when you want to seed your frontend with some initial flags, given a user ID. + +## 1.4.9 - 2022-06-13 + +- Support for sending feature flags with capture calls + +## 1.4.8 - 2022-05-12 + +- Support multi variate feature flags + +## 1.4.7 - 2022-04-25 + +- Allow feature flags usage without project_api_key + +## 1.4.1 - 2021-05-28 + +- Fix packaging issues with Sentry integrations + +## 1.4.0 - 2021-05-18 + +- Improve support for `project_api_key` (#32) +- Resolve polling issues with feature flags (#29) +- Add Sentry (and Sentry+Django) integrations (#13) +- Fix feature flag issue with no percentage rollout (#30) + +## 1.3.1 - 2021-05-07 + +- Add `$set` and `$set_once` support (#23) +- Add distinct ID to `$create_alias` event (#27) +- Add `UUID` to `ID_TYPES` (#26) + +## 1.2.1 - 2021-02-05 + +Initial release logged in CHANGELOG.md. From fea14207605a13ecb08f48b031648b9ad1202ca5 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 14:06:12 +0000 Subject: [PATCH 16/24] fix: preserve personless mode for trace-id fallback distinct IDs When no distinct_id is provided, _get_distinct_id falls back to trace_id or "unknown". Since these are non-None strings, the $process_person_profile=False check in _capture_event never fired, creating unwanted person profiles keyed by trace IDs. Track whether the user explicitly provided a distinct_id and use that flag to control personless mode, matching the pattern used by the langchain and openai integrations. --- posthog/ai/openai_agents/processor.py | 5 ++-- .../test/ai/openai_agents/test_processor.py | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 459f4a90..6c61d10a 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -95,6 +95,7 @@ def __init__( """ self._client = client or setup() self._distinct_id = distinct_id + self._has_user_distinct_id = distinct_id is not None self._privacy_mode = privacy_mode self._groups = groups or {} self._properties = properties or {} @@ -178,8 +179,8 @@ def _capture_event( **self._properties, } - # Don't process person profile if no distinct_id - if distinct_id is None: + # Don't create person profiles when using fallback IDs (trace_id, "unknown") + if not self._has_user_distinct_id: final_properties["$process_person_profile"] = False self._client.capture( diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index 6759bf7d..a3c9367a 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -116,6 +116,29 @@ def test_on_trace_start(self, processor, mock_client, mock_trace): assert call_kwargs["properties"]["$ai_provider"] == "openai" assert call_kwargs["properties"]["$ai_framework"] == "openai-agents" + def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace): + """Test that events use personless mode when no distinct_id is provided.""" + processor = PostHogTracingProcessor( + client=mock_client, + ) + + processor.on_trace_start(mock_trace) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$process_person_profile"] is False + + def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace): + """Test that events create person profiles when distinct_id is provided.""" + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id="real-user", + ) + + processor.on_trace_start(mock_trace) + + call_kwargs = mock_client.capture.call_args[1] + assert "$process_person_profile" not in call_kwargs["properties"] + def test_on_trace_end_clears_metadata(self, processor, mock_trace): """Test that on_trace_end clears stored trace metadata.""" processor.on_trace_start(mock_trace) From fb4f1da8d15efe836098471c5c0249a63274ca19 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 14:23:15 +0000 Subject: [PATCH 17/24] fix: restore changelog history and fix personless mode edge cases Two fixes from bot review: 1. CHANGELOG.md was accidentally truncated to 38 lines during rebase conflict resolution. Restored all 767 lines of history. 2. Personless mode now follows the same pattern as langchain/openai integrations: _get_distinct_id returns None when no user-provided ID is available, and callers set $process_person_profile=False before falling back to trace_id. This covers the edge case where a callable distinct_id returns None. --- posthog/ai/openai_agents/processor.py | 46 +++++++++++------ .../test/ai/openai_agents/test_processor.py | 51 ++++++++++++++++++- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 6c61d10a..9b76000c 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -95,7 +95,6 @@ def __init__( """ self._client = client or setup() self._distinct_id = distinct_id - self._has_user_distinct_id = distinct_id is not None self._privacy_mode = privacy_mode self._groups = groups or {} self._properties = properties or {} @@ -110,19 +109,22 @@ def __init__( # is never called (e.g., due to an exception in the Agents SDK). self._max_tracked_entries = 10000 - def _get_distinct_id(self, trace: Optional[Trace]) -> str: - """Resolve the distinct ID for a trace.""" + def _get_distinct_id(self, trace: Optional[Trace]) -> Optional[str]: + """Resolve the distinct ID for a trace. + + Returns the user-provided distinct ID (string or callable result), + or None if no user-provided ID is available. Callers should treat + None as a signal to use a fallback ID in personless mode. + """ if callable(self._distinct_id): if trace: result = self._distinct_id(trace) if result: return str(result) - return trace.trace_id if trace else "unknown" + return None elif self._distinct_id: return str(self._distinct_id) - elif trace: - return trace.trace_id - return "unknown" + return None def _with_privacy_mode(self, value: Any) -> Any: """Apply privacy mode redaction if enabled.""" @@ -166,25 +168,27 @@ def _capture_event( properties: Dict[str, Any], distinct_id: Optional[str] = None, ) -> None: - """Capture an event to PostHog with error handling.""" + """Capture an event to PostHog with error handling. + + Args: + distinct_id: The resolved distinct ID. When the user didn't provide + one, callers should pass ``user_distinct_id or fallback_id`` + (matching the langchain/openai pattern) and separately set + ``$process_person_profile`` in properties. + """ try: if not hasattr(self._client, "capture") or not callable( self._client.capture ): return - final_distinct_id = distinct_id or "unknown" final_properties = { **properties, **self._properties, } - # Don't create person profiles when using fallback IDs (trace_id, "unknown") - if not self._has_user_distinct_id: - final_properties["$process_person_profile"] = False - self._client.capture( - distinct_id=final_distinct_id, + distinct_id=distinct_id or "unknown", event=event, properties=final_properties, groups=self._groups, @@ -226,9 +230,12 @@ def on_trace_start(self, trace: Trace) -> None: if metadata: properties["$ai_trace_metadata"] = _safe_json(metadata) + if distinct_id is None: + properties["$process_person_profile"] = False + self._capture_event( event="$ai_trace", - distinct_id=distinct_id, + distinct_id=distinct_id or trace_id, properties=properties, ) except Exception as e: @@ -271,7 +278,9 @@ def on_span_end(self, span: Span[Any]) -> None: ended = _parse_iso_timestamp(span.ended_at) latency = (ended - started) if (started and ended) else 0 - # Get distinct ID from trace metadata (resolved at trace start) or default + # Get user-provided distinct ID from trace metadata (resolved at trace start). + # None means no user-provided ID — use trace_id as fallback in personless mode, + # matching the langchain/openai pattern: `distinct_id or trace_id`. trace_info = self._trace_metadata.get(trace_id, {}) distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(None) @@ -311,6 +320,11 @@ def on_span_end(self, span: Span[Any]) -> None: "$ai_error_type": error_type, } + # Personless mode: no user-provided distinct_id, fallback to trace_id + if distinct_id is None: + error_properties["$process_person_profile"] = False + distinct_id = trace_id + # Dispatch based on span data type if isinstance(span_data, GenerationSpanData): self._handle_generation_span( diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index a3c9367a..eafe83f0 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -117,7 +117,7 @@ def test_on_trace_start(self, processor, mock_client, mock_trace): assert call_kwargs["properties"]["$ai_framework"] == "openai-agents" def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace): - """Test that events use personless mode when no distinct_id is provided.""" + """Test that trace events use personless mode when no distinct_id is provided.""" processor = PostHogTracingProcessor( client=mock_client, ) @@ -126,6 +126,55 @@ def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace): call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$process_person_profile"] is False + # Should fallback to trace_id as the distinct_id + assert call_kwargs["distinct_id"] == mock_trace.trace_id + + def test_personless_mode_for_spans_when_no_distinct_id( + self, mock_client, mock_trace, mock_span + ): + """Test that span events use personless mode when no distinct_id is provided.""" + processor = PostHogTracingProcessor( + client=mock_client, + ) + + processor.on_trace_start(mock_trace) + mock_client.capture.reset_mock() + + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$process_person_profile"] is False + assert call_kwargs["distinct_id"] == mock_span.trace_id + + def test_personless_mode_when_callable_returns_none( + self, mock_client, mock_trace, mock_span + ): + """Test personless mode when callable distinct_id returns None.""" + + def resolver(trace): + return None # Simulate no user ID available + + processor = PostHogTracingProcessor( + client=mock_client, + distinct_id=resolver, + ) + + processor.on_trace_start(mock_trace) + mock_client.capture.reset_mock() + + span_data = GenerationSpanData(model="gpt-4o") + mock_span.span_data = span_data + + processor.on_span_start(mock_span) + processor.on_span_end(mock_span) + + call_kwargs = mock_client.capture.call_args[1] + assert call_kwargs["properties"]["$process_person_profile"] is False + assert call_kwargs["distinct_id"] == mock_span.trace_id def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace): """Test that events create person profiles when distinct_id is provided.""" From 61d43e3bd1b40a479df0a5dec8eecbf4ce02c3ef Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 20:25:03 +0000 Subject: [PATCH 18/24] fix: handle None token counts in generation span Guard against input_tokens or output_tokens being None when computing $ai_total_tokens to avoid TypeError. --- posthog/ai/openai_agents/processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 9b76000c..29c2cf88 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -480,8 +480,8 @@ def _handle_generation_span( """Handle LLM generation spans - maps to $ai_generation event.""" # Extract token usage usage = span_data.usage or {} - input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens", 0) - output_tokens = usage.get("output_tokens") or usage.get("completion_tokens", 0) + input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens") or 0 + output_tokens = usage.get("output_tokens") or usage.get("completion_tokens") or 0 # Extract model config parameters model_config = span_data.model_config or {} @@ -506,7 +506,7 @@ def _handle_generation_span( "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, - "$ai_total_tokens": input_tokens + output_tokens, + "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0), } # Add optional token fields if present From b626a16b155586538deb0a61d805675dc0547e50 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 20:26:44 +0000 Subject: [PATCH 19/24] fix: check error_type_raw for all error categories Check both error_type_raw and error_message for guardrail and max_turns errors, consistent with how ModelBehaviorError and UserError are already checked. --- posthog/ai/openai_agents/processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 29c2cf88..d3cba1d0 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -307,11 +307,11 @@ def on_span_end(self, span: Span[Any]) -> None: error_type = "model_behavior_error" elif "UserError" in error_type_raw or "UserError" in error_message: error_type = "user_error" - elif "InputGuardrailTripwireTriggered" in error_message: + elif "InputGuardrailTripwireTriggered" in error_type_raw or "InputGuardrailTripwireTriggered" in error_message: error_type = "input_guardrail_triggered" - elif "OutputGuardrailTripwireTriggered" in error_message: + elif "OutputGuardrailTripwireTriggered" in error_type_raw or "OutputGuardrailTripwireTriggered" in error_message: error_type = "output_guardrail_triggered" - elif "MaxTurnsExceeded" in error_message: + elif "MaxTurnsExceeded" in error_type_raw or "MaxTurnsExceeded" in error_message: error_type = "max_turns_exceeded" error_properties = { From b4a2d8be2e68f492fd5bc8061c66133d226a03d3 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 20:28:46 +0000 Subject: [PATCH 20/24] fix: add type hints to instrument() function --- posthog/ai/openai_agents/__init__.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/posthog/ai/openai_agents/__init__.py b/posthog/ai/openai_agents/__init__.py index 49e4186e..2c7f277c 100644 --- a/posthog/ai/openai_agents/__init__.py +++ b/posthog/ai/openai_agents/__init__.py @@ -1,3 +1,5 @@ +from typing import Any, Callable, Dict, Optional, Union + try: import agents # noqa: F401 except ImportError: @@ -11,12 +13,12 @@ def instrument( - client=None, - distinct_id=None, + client: Optional["Client"] = None, + distinct_id: Optional[Union[str, Callable[["Trace"], Optional[str]]]] = None, privacy_mode: bool = False, - groups=None, - properties=None, -): + groups: Optional[Dict[str, Any]] = None, + properties: Optional[Dict[str, Any]] = None, +) -> PostHogTracingProcessor: """ One-liner to instrument OpenAI Agents SDK with PostHog tracing. From d4f4a3a7c168477fcede28ffa3009d320a47adac Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 20:29:26 +0000 Subject: [PATCH 21/24] refactor: rename _safe_json to _ensure_serializable for clarity The function validates JSON serializability and falls back to str(), not serializes. Rename and update docstring to make the contract clear. --- posthog/ai/openai_agents/processor.py | 31 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index d3cba1d0..02fdf359 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -26,8 +26,13 @@ log = logging.getLogger("posthog") -def _safe_json(obj: Any) -> Any: - """Safely convert object to JSON-serializable format.""" +def _ensure_serializable(obj: Any) -> Any: + """Ensure an object is JSON-serializable, converting to str as fallback. + + Returns the original object if it's already serializable (dict, list, str, + int, etc.), or str(obj) for non-serializable types so that downstream + json.dumps() calls won't fail. + """ if obj is None: return None try: @@ -228,7 +233,7 @@ def on_trace_start(self, trace: Trace) -> None: # Include trace metadata if present if metadata: - properties["$ai_trace_metadata"] = _safe_json(metadata) + properties["$ai_trace_metadata"] = _ensure_serializable(metadata) if distinct_id is None: properties["$process_person_profile"] = False @@ -502,8 +507,8 @@ def _handle_generation_span( ), "$ai_model": span_data.model, "$ai_model_parameters": model_params if model_params else None, - "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), - "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)), + "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)), + "$ai_output_choices": self._with_privacy_mode(_ensure_serializable(span_data.output)), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0), @@ -539,12 +544,12 @@ def _handle_function_span( ), "$ai_span_name": span_data.name, "$ai_span_type": "tool", - "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)), - "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)), + "$ai_input_state": self._with_privacy_mode(_ensure_serializable(span_data.input)), + "$ai_output_state": self._with_privacy_mode(_ensure_serializable(span_data.output)), } if span_data.mcp_data: - properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data) + properties["$ai_mcp_data"] = _ensure_serializable(span_data.mcp_data) self._capture_event("$ai_span", properties, distinct_id) @@ -656,7 +661,7 @@ def _handle_response_span( ), "$ai_model": model, "$ai_response_id": response_id, - "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)), + "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, "$ai_total_tokens": input_tokens + output_tokens, @@ -667,7 +672,7 @@ def _handle_response_span( output_items = getattr(response, "output", None) if output_items: properties["$ai_output_choices"] = self._with_privacy_mode( - _safe_json(output_items) + _ensure_serializable(output_items) ) self._capture_event("$ai_generation", properties, distinct_id) @@ -690,7 +695,7 @@ def _handle_custom_span( ), "$ai_span_name": span_data.name, "$ai_span_type": "custom", - "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)), + "$ai_custom_data": self._with_privacy_mode(_ensure_serializable(span_data.data)), } self._capture_event("$ai_span", properties, distinct_id) @@ -723,7 +728,7 @@ def _handle_audio_span( # Add model config if available (pass-through property) if hasattr(span_data, "model_config") and span_data.model_config: - properties["model_config"] = _safe_json(span_data.model_config) + properties["model_config"] = _ensure_serializable(span_data.model_config) # Add time to first audio byte for speech spans (pass-through property) if hasattr(span_data, "first_content_at") and span_data.first_content_at: @@ -800,7 +805,7 @@ def _handle_generic_span( if hasattr(span_data, "export"): try: exported = span_data.export() - properties["$ai_span_data"] = _safe_json(exported) + properties["$ai_span_data"] = _ensure_serializable(exported) except Exception: pass From 7a534de1a82630b5eff7343a7d8a80a904c9e8b0 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 20:36:17 +0000 Subject: [PATCH 22/24] refactor: emit $ai_trace at trace end instead of start Move the $ai_trace event from on_trace_start to on_trace_end to capture full metadata including latency, matching the LangChain integration approach. on_trace_start now only stores metadata for use by spans. --- posthog/ai/openai_agents/processor.py | 36 ++++++++++++------- .../test/ai/openai_agents/test_processor.py | 21 +++++++++-- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 02fdf359..0c32b696 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -202,7 +202,7 @@ def _capture_event( log.debug(f"Failed to capture PostHog event: {e}") def on_trace_start(self, trace: Trace) -> None: - """Called when a new trace begins.""" + """Called when a new trace begins. Stores metadata for spans; the $ai_trace event is emitted in on_trace_end.""" try: self._evict_stale_entries() trace_id = trace.trace_id @@ -212,13 +212,32 @@ def on_trace_start(self, trace: Trace) -> None: distinct_id = self._get_distinct_id(trace) - # Store trace metadata for later (used by spans) + # Store trace metadata for later (used by spans and on_trace_end) self._trace_metadata[trace_id] = { "name": trace_name, "group_id": group_id, "metadata": metadata, "distinct_id": distinct_id, + "start_time": time.time(), } + except Exception as e: + log.debug(f"Error in on_trace_start: {e}") + + def on_trace_end(self, trace: Trace) -> None: + """Called when a trace completes. Emits the $ai_trace event with full metadata.""" + try: + trace_id = trace.trace_id + + # Pop stored metadata (also cleans up) + trace_info = self._trace_metadata.pop(trace_id, {}) + trace_name = trace_info.get("name") or trace.name + group_id = trace_info.get("group_id") or getattr(trace, "group_id", None) + metadata = trace_info.get("metadata") or getattr(trace, "metadata", None) + distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(trace) + + # Calculate trace-level latency + start_time = trace_info.get("start_time") + latency = (time.time() - start_time) if start_time else None properties = { "$ai_trace_id": trace_id, @@ -227,6 +246,9 @@ def on_trace_start(self, trace: Trace) -> None: "$ai_framework": "openai-agents", } + if latency is not None: + properties["$ai_latency"] = latency + # Include group_id for linking related traces (e.g., conversation threads) if group_id: properties["$ai_group_id"] = group_id @@ -243,16 +265,6 @@ def on_trace_start(self, trace: Trace) -> None: distinct_id=distinct_id or trace_id, properties=properties, ) - except Exception as e: - log.debug(f"Error in on_trace_start: {e}") - - def on_trace_end(self, trace: Trace) -> None: - """Called when a trace completes.""" - try: - trace_id = trace.trace_id - - # Clean up stored metadata - self._trace_metadata.pop(trace_id, None) except Exception as e: log.debug(f"Error in on_trace_end: {e}") diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py index eafe83f0..99ad7b43 100644 --- a/posthog/test/ai/openai_agents/test_processor.py +++ b/posthog/test/ai/openai_agents/test_processor.py @@ -102,10 +102,18 @@ def resolver(trace): distinct_id = processor._get_distinct_id(mock_trace) assert distinct_id == "resolved-user" - def test_on_trace_start(self, processor, mock_client, mock_trace): - """Test that on_trace_start captures $ai_trace event.""" + def test_on_trace_start_stores_metadata(self, processor, mock_client, mock_trace): + """Test that on_trace_start stores metadata but does not capture an event.""" processor.on_trace_start(mock_trace) + mock_client.capture.assert_not_called() + assert mock_trace.trace_id in processor._trace_metadata + + def test_on_trace_end_captures_ai_trace(self, processor, mock_client, mock_trace): + """Test that on_trace_end captures $ai_trace event.""" + processor.on_trace_start(mock_trace) + processor.on_trace_end(mock_trace) + mock_client.capture.assert_called_once() call_kwargs = mock_client.capture.call_args[1] @@ -115,6 +123,7 @@ def test_on_trace_start(self, processor, mock_client, mock_trace): assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow" assert call_kwargs["properties"]["$ai_provider"] == "openai" assert call_kwargs["properties"]["$ai_framework"] == "openai-agents" + assert "$ai_latency" in call_kwargs["properties"] def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace): """Test that trace events use personless mode when no distinct_id is provided.""" @@ -123,6 +132,7 @@ def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace): ) processor.on_trace_start(mock_trace) + processor.on_trace_end(mock_trace) call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["$process_person_profile"] is False @@ -184,17 +194,20 @@ def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace) ) processor.on_trace_start(mock_trace) + processor.on_trace_end(mock_trace) call_kwargs = mock_client.capture.call_args[1] assert "$process_person_profile" not in call_kwargs["properties"] - def test_on_trace_end_clears_metadata(self, processor, mock_trace): + def test_on_trace_end_clears_metadata(self, processor, mock_client, mock_trace): """Test that on_trace_end clears stored trace metadata.""" processor.on_trace_start(mock_trace) assert mock_trace.trace_id in processor._trace_metadata processor.on_trace_end(mock_trace) assert mock_trace.trace_id not in processor._trace_metadata + # Also verify it captured the event + mock_client.capture.assert_called_once() def test_on_span_start_tracks_time(self, processor, mock_span): """Test that on_span_start records start time.""" @@ -630,6 +643,7 @@ def test_groups_included_in_events(self, mock_client, mock_trace, mock_span): ) processor.on_trace_start(mock_trace) + processor.on_trace_end(mock_trace) call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["groups"] == {"company": "acme", "team": "engineering"} @@ -643,6 +657,7 @@ def test_additional_properties_included(self, mock_client, mock_trace): ) processor.on_trace_start(mock_trace) + processor.on_trace_end(mock_trace) call_kwargs = mock_client.capture.call_args[1] assert call_kwargs["properties"]["environment"] == "production" From ea6cba36a09de39a8fd10de1b81fa58272e82c28 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 20:45:52 +0000 Subject: [PATCH 23/24] style: fix ruff formatting --- posthog/ai/openai_agents/processor.py | 35 +++++++++++++++++++++------ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py index 0c32b696..fcc51e45 100644 --- a/posthog/ai/openai_agents/processor.py +++ b/posthog/ai/openai_agents/processor.py @@ -324,11 +324,20 @@ def on_span_end(self, span: Span[Any]) -> None: error_type = "model_behavior_error" elif "UserError" in error_type_raw or "UserError" in error_message: error_type = "user_error" - elif "InputGuardrailTripwireTriggered" in error_type_raw or "InputGuardrailTripwireTriggered" in error_message: + elif ( + "InputGuardrailTripwireTriggered" in error_type_raw + or "InputGuardrailTripwireTriggered" in error_message + ): error_type = "input_guardrail_triggered" - elif "OutputGuardrailTripwireTriggered" in error_type_raw or "OutputGuardrailTripwireTriggered" in error_message: + elif ( + "OutputGuardrailTripwireTriggered" in error_type_raw + or "OutputGuardrailTripwireTriggered" in error_message + ): error_type = "output_guardrail_triggered" - elif "MaxTurnsExceeded" in error_type_raw or "MaxTurnsExceeded" in error_message: + elif ( + "MaxTurnsExceeded" in error_type_raw + or "MaxTurnsExceeded" in error_message + ): error_type = "max_turns_exceeded" error_properties = { @@ -498,7 +507,9 @@ def _handle_generation_span( # Extract token usage usage = span_data.usage or {} input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens") or 0 - output_tokens = usage.get("output_tokens") or usage.get("completion_tokens") or 0 + output_tokens = ( + usage.get("output_tokens") or usage.get("completion_tokens") or 0 + ) # Extract model config parameters model_config = span_data.model_config or {} @@ -520,7 +531,9 @@ def _handle_generation_span( "$ai_model": span_data.model, "$ai_model_parameters": model_params if model_params else None, "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)), - "$ai_output_choices": self._with_privacy_mode(_ensure_serializable(span_data.output)), + "$ai_output_choices": self._with_privacy_mode( + _ensure_serializable(span_data.output) + ), "$ai_input_tokens": input_tokens, "$ai_output_tokens": output_tokens, "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0), @@ -556,8 +569,12 @@ def _handle_function_span( ), "$ai_span_name": span_data.name, "$ai_span_type": "tool", - "$ai_input_state": self._with_privacy_mode(_ensure_serializable(span_data.input)), - "$ai_output_state": self._with_privacy_mode(_ensure_serializable(span_data.output)), + "$ai_input_state": self._with_privacy_mode( + _ensure_serializable(span_data.input) + ), + "$ai_output_state": self._with_privacy_mode( + _ensure_serializable(span_data.output) + ), } if span_data.mcp_data: @@ -707,7 +724,9 @@ def _handle_custom_span( ), "$ai_span_name": span_data.name, "$ai_span_type": "custom", - "$ai_custom_data": self._with_privacy_mode(_ensure_serializable(span_data.data)), + "$ai_custom_data": self._with_privacy_mode( + _ensure_serializable(span_data.data) + ), } self._capture_event("$ai_span", properties, distinct_id) From f239609954ba67351a565c433750ed91f5ec49b0 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 27 Jan 2026 21:11:12 +0000 Subject: [PATCH 24/24] fix: add TYPE_CHECKING imports for type hints in instrument() --- posthog/ai/openai_agents/__init__.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/posthog/ai/openai_agents/__init__.py b/posthog/ai/openai_agents/__init__.py index 2c7f277c..2e1611cc 100644 --- a/posthog/ai/openai_agents/__init__.py +++ b/posthog/ai/openai_agents/__init__.py @@ -1,4 +1,11 @@ -from typing import Any, Callable, Dict, Optional, Union +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union + +if TYPE_CHECKING: + from agents.tracing import Trace + + from posthog.client import Client try: import agents # noqa: F401 @@ -13,8 +20,8 @@ def instrument( - client: Optional["Client"] = None, - distinct_id: Optional[Union[str, Callable[["Trace"], Optional[str]]]] = None, + client: Optional[Client] = None, + distinct_id: Optional[Union[str, Callable[[Trace], Optional[str]]]] = None, privacy_mode: bool = False, groups: Optional[Dict[str, Any]] = None, properties: Optional[Dict[str, Any]] = None,