From 2895fcd23a325ee6bf2cbc96c48abbddf3134393 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Wed, 14 Jan 2026 21:43:30 +0100
Subject: [PATCH 01/24] feat(ai): add OpenAI Agents SDK integration

Add PostHogTracingProcessor that implements the OpenAI Agents SDK
TracingProcessor interface to capture agent traces in PostHog.

- Maps GenerationSpanData to $ai_generation events
- Maps FunctionSpanData, AgentSpanData, HandoffSpanData, GuardrailSpanData
  to $ai_span events with appropriate types
- Supports privacy mode, groups, and custom properties
- Includes instrument() helper for one-liner setup
- 22 unit tests covering all span types
---
 posthog/ai/openai_agents/__init__.py          |  67 ++
 posthog/ai/openai_agents/processor.py         | 624 ++++++++++++++++++
 posthog/test/ai/openai_agents/__init__.py     |   1 +
 .../test/ai/openai_agents/test_processor.py   | 413 ++++++++++++
 4 files changed, 1105 insertions(+)
 create mode 100644 posthog/ai/openai_agents/__init__.py
 create mode 100644 posthog/ai/openai_agents/processor.py
 create mode 100644 posthog/test/ai/openai_agents/__init__.py
 create mode 100644 posthog/test/ai/openai_agents/test_processor.py

diff --git a/posthog/ai/openai_agents/__init__.py b/posthog/ai/openai_agents/__init__.py
new file mode 100644
index 00000000..49e4186e
--- /dev/null
+++ b/posthog/ai/openai_agents/__init__.py
@@ -0,0 +1,67 @@
+try:
+    import agents  # noqa: F401
+except ImportError:
+    raise ModuleNotFoundError(
+        "Please install the OpenAI Agents SDK to use this feature: 'pip install openai-agents'"
+    )
+
+from posthog.ai.openai_agents.processor import PostHogTracingProcessor
+
+__all__ = ["PostHogTracingProcessor", "instrument"]
+
+
+def instrument(
+    client=None,
+    distinct_id=None,
+    privacy_mode: bool = False,
+    groups=None,
+    properties=None,
+):
+    """
+    One-liner to instrument OpenAI Agents SDK with PostHog tracing.
+
+    This registers a PostHogTracingProcessor with the OpenAI Agents SDK,
+    automatically capturing traces, spans, and LLM generations.
+
+    Args:
+        client: Optional PostHog client instance. If not provided, uses the default client.
+        distinct_id: Optional distinct ID to associate with all traces.
+            Can also be a callable that takes a trace and returns a distinct ID.
+        privacy_mode: If True, redacts input/output content from events.
+        groups: Optional PostHog groups to associate with events.
+        properties: Optional additional properties to include with all events.
+
+    Returns:
+        PostHogTracingProcessor: The registered processor instance.
+
+    Example:
+        ```python
+        from posthog.ai.openai_agents import instrument
+
+        # Simple setup
+        instrument(distinct_id="user@example.com")
+
+        # With custom properties
+        instrument(
+            distinct_id="user@example.com",
+            privacy_mode=True,
+            properties={"environment": "production"}
+        )
+
+        # Now run agents as normal - traces automatically sent to PostHog
+        from agents import Agent, Runner
+        agent = Agent(name="Assistant", instructions="You are helpful.")
+        result = Runner.run_sync(agent, "Hello!")
+        ```
+    """
+    from agents.tracing import add_trace_processor
+
+    processor = PostHogTracingProcessor(
+        client=client,
+        distinct_id=distinct_id,
+        privacy_mode=privacy_mode,
+        groups=groups,
+        properties=properties,
+    )
+    add_trace_processor(processor)
+    return processor
diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
new file mode 100644
index 00000000..34f84556
--- /dev/null
+++ b/posthog/ai/openai_agents/processor.py
@@ -0,0 +1,624 @@
+import json
+import logging
+import time
+from datetime import datetime
+from typing import Any, Callable, Dict, Optional, Union
+
+from agents.tracing import Span, Trace
+from agents.tracing.processor_interface import TracingProcessor
+from agents.tracing.span_data import (
+    AgentSpanData,
+    CustomSpanData,
+    FunctionSpanData,
+    GenerationSpanData,
+    GuardrailSpanData,
+    HandoffSpanData,
+    MCPListToolsSpanData,
+    ResponseSpanData,
+    SpeechGroupSpanData,
+    SpeechSpanData,
+    TranscriptionSpanData,
+)
+
+from posthog import setup
+from posthog.client import Client
+
+log = logging.getLogger("posthog")
+
+
+def _safe_json(obj: Any) -> Any:
+    """Safely convert object to JSON-serializable format."""
+    if obj is None:
+        return None
+    try:
+        json.dumps(obj)
+        return obj
+    except (TypeError, ValueError):
+        return str(obj)
+
+
+def _parse_iso_timestamp(iso_str: Optional[str]) -> Optional[float]:
+    """Parse ISO timestamp to Unix timestamp."""
+    if not iso_str:
+        return None
+    try:
+        dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
+        return dt.timestamp()
+    except (ValueError, AttributeError):
+        return None
+
+
+class PostHogTracingProcessor(TracingProcessor):
+    """
+    A tracing processor that sends OpenAI Agents SDK traces to PostHog.
+
+    This processor implements the TracingProcessor interface from the OpenAI Agents SDK
+    and maps agent traces, spans, and generations to PostHog's LLM analytics events.
+
+    Example:
+        ```python
+        from agents import Agent, Runner
+        from agents.tracing import add_trace_processor
+        from posthog.ai.openai_agents import PostHogTracingProcessor
+
+        # Create and register the processor
+        processor = PostHogTracingProcessor(
+            distinct_id="user@example.com",
+            privacy_mode=False,
+        )
+        add_trace_processor(processor)
+
+        # Run agents as normal - traces automatically sent to PostHog
+        agent = Agent(name="Assistant", instructions="You are helpful.")
+        result = Runner.run_sync(agent, "Hello!")
+        ```
+    """
+
+    def __init__(
+        self,
+        client: Optional[Client] = None,
+        distinct_id: Optional[Union[str, Callable[[Trace], Optional[str]]]] = None,
+        privacy_mode: bool = False,
+        groups: Optional[Dict[str, Any]] = None,
+        properties: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Initialize the PostHog tracing processor.
+
+        Args:
+            client: Optional PostHog client instance. If not provided, uses the default client.
+            distinct_id: Either a string distinct ID or a callable that takes a Trace
+                and returns a distinct ID. If not provided, uses the trace_id.
+            privacy_mode: If True, redacts input/output content from events.
+            groups: Optional PostHog groups to associate with all events.
+            properties: Optional additional properties to include with all events.
+        """
+        self._client = client or setup()
+        self._distinct_id = distinct_id
+        self._privacy_mode = privacy_mode
+        self._groups = groups or {}
+        self._properties = properties or {}
+
+        # Track span start times for latency calculation
+        self._span_start_times: Dict[str, float] = {}
+
+        # Track trace metadata for associating with spans
+        self._trace_metadata: Dict[str, Dict[str, Any]] = {}
+
+    def _get_distinct_id(self, trace: Optional[Trace]) -> str:
+        """Resolve the distinct ID for a trace."""
+        if callable(self._distinct_id):
+            if trace:
+                result = self._distinct_id(trace)
+                if result:
+                    return str(result)
+            return trace.trace_id if trace else "unknown"
+        elif self._distinct_id:
+            return str(self._distinct_id)
+        elif trace:
+            return trace.trace_id
+        return "unknown"
+
+    def _with_privacy_mode(self, value: Any) -> Any:
+        """Apply privacy mode redaction if enabled."""
+        if self._privacy_mode or (
+            hasattr(self._client, "privacy_mode") and self._client.privacy_mode
+        ):
+            return None
+        return value
+
+    def _capture_event(
+        self,
+        event: str,
+        properties: Dict[str, Any],
+        distinct_id: Optional[str] = None,
+    ) -> None:
+        """Capture an event to PostHog with error handling."""
+        try:
+            if not hasattr(self._client, "capture") or not callable(self._client.capture):
+                return
+
+            final_distinct_id = distinct_id or "unknown"
+            final_properties = {
+                **properties,
+                **self._properties,
+            }
+
+            # Don't process person profile if no distinct_id
+            if distinct_id is None:
+                final_properties["$process_person_profile"] = False
+
+            self._client.capture(
+                distinct_id=final_distinct_id,
+                event=event,
+                properties=final_properties,
+                groups=self._groups,
+            )
+        except Exception as e:
+            log.debug(f"Failed to capture PostHog event: {e}")
+
+    def on_trace_start(self, trace: Trace) -> None:
+        """Called when a new trace begins."""
+        try:
+            trace_id = trace.trace_id
+            trace_name = trace.name
+
+            # Store trace metadata for later
+            self._trace_metadata[trace_id] = {
+                "name": trace_name,
+                "group_id": getattr(trace, "group_id", None),
+                "metadata": getattr(trace, "metadata", None),
+            }
+
+            distinct_id = self._get_distinct_id(trace)
+
+            self._capture_event(
+                event="$ai_trace",
+                distinct_id=distinct_id,
+                properties={
+                    "$ai_trace_id": trace_id,
+                    "$ai_trace_name": trace_name,
+                    "$ai_provider": "openai_agents",
+                },
+            )
+        except Exception as e:
+            log.debug(f"Error in on_trace_start: {e}")
+
+    def on_trace_end(self, trace: Trace) -> None:
+        """Called when a trace completes."""
+        try:
+            trace_id = trace.trace_id
+
+            # Clean up stored metadata
+            self._trace_metadata.pop(trace_id, None)
+        except Exception as e:
+            log.debug(f"Error in on_trace_end: {e}")
+
+    def on_span_start(self, span: Span[Any]) -> None:
+        """Called when a new span begins."""
+        try:
+            span_id = span.span_id
+            self._span_start_times[span_id] = time.time()
+        except Exception as e:
+            log.debug(f"Error in on_span_start: {e}")
+
+    def on_span_end(self, span: Span[Any]) -> None:
+        """Called when a span completes."""
+        try:
+            span_id = span.span_id
+            trace_id = span.trace_id
+            parent_id = span.parent_id
+            span_data = span.span_data
+
+            # Calculate latency
+            start_time = self._span_start_times.pop(span_id, None)
+            if start_time:
+                latency = time.time() - start_time
+            else:
+                # Fall back to parsing timestamps
+                started = _parse_iso_timestamp(span.started_at)
+                ended = _parse_iso_timestamp(span.ended_at)
+                latency = (ended - started) if (started and ended) else 0
+
+            # Get distinct ID from trace metadata or default
+            distinct_id = self._get_distinct_id(None)
+            if trace_id in self._trace_metadata:
+                # Use trace's distinct ID resolver if available
+                distinct_id = self._get_distinct_id(None)
+
+            # Get error info if present
+            error_info = span.error
+            error_properties = {}
+            if error_info:
+                error_properties = {
+                    "$ai_is_error": True,
+                    "$ai_error": error_info.get("message", str(error_info)),
+                }
+
+            # Dispatch based on span data type
+            if isinstance(span_data, GenerationSpanData):
+                self._handle_generation_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, FunctionSpanData):
+                self._handle_function_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, AgentSpanData):
+                self._handle_agent_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, HandoffSpanData):
+                self._handle_handoff_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, GuardrailSpanData):
+                self._handle_guardrail_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, ResponseSpanData):
+                self._handle_response_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, CustomSpanData):
+                self._handle_custom_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)):
+                self._handle_audio_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            elif isinstance(span_data, MCPListToolsSpanData):
+                self._handle_mcp_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+            else:
+                # Unknown span type - capture as generic span
+                self._handle_generic_span(
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                )
+
+        except Exception as e:
+            log.debug(f"Error in on_span_end: {e}")
+
+    def _handle_generation_span(
+        self,
+        span_data: GenerationSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle LLM generation spans - maps to $ai_generation event."""
+        # Extract token usage
+        usage = span_data.usage or {}
+        input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens", 0)
+        output_tokens = usage.get("output_tokens") or usage.get("completion_tokens", 0)
+
+        # Extract model config parameters
+        model_config = span_data.model_config or {}
+        model_params = {}
+        for param in ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]:
+            if param in model_config:
+                model_params[param] = model_config[param]
+
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_provider": "openai",
+            "$ai_model": span_data.model,
+            "$ai_model_parameters": model_params if model_params else None,
+            "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
+            "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)),
+            "$ai_input_tokens": input_tokens,
+            "$ai_output_tokens": output_tokens,
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        # Add optional token fields if present
+        if usage.get("reasoning_tokens"):
+            properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"]
+        if usage.get("cache_read_input_tokens"):
+            properties["$ai_cache_read_input_tokens"] = usage["cache_read_input_tokens"]
+        if usage.get("cache_creation_input_tokens"):
+            properties["$ai_cache_creation_input_tokens"] = usage["cache_creation_input_tokens"]
+
+        self._capture_event("$ai_generation", properties, distinct_id)
+
+    def _handle_function_span(
+        self,
+        span_data: FunctionSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle function/tool call spans - maps to $ai_span event."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": span_data.name,
+            "$ai_span_type": "tool",
+            "$ai_provider": "openai_agents",
+            "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)),
+            "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)),
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        # Add MCP data if present
+        if span_data.mcp_data:
+            properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data)
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_agent_span(
+        self,
+        span_data: AgentSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle agent execution spans - maps to $ai_span event."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": span_data.name,
+            "$ai_span_type": "agent",
+            "$ai_provider": "openai_agents",
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        # Add agent-specific metadata
+        if span_data.handoffs:
+            properties["$ai_agent_handoffs"] = span_data.handoffs
+        if span_data.tools:
+            properties["$ai_agent_tools"] = span_data.tools
+        if span_data.output_type:
+            properties["$ai_agent_output_type"] = span_data.output_type
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_handoff_span(
+        self,
+        span_data: HandoffSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle agent handoff spans - maps to $ai_span event."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}",
+            "$ai_span_type": "handoff",
+            "$ai_provider": "openai_agents",
+            "$ai_handoff_from_agent": span_data.from_agent,
+            "$ai_handoff_to_agent": span_data.to_agent,
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_guardrail_span(
+        self,
+        span_data: GuardrailSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle guardrail execution spans - maps to $ai_span event."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": span_data.name,
+            "$ai_span_type": "guardrail",
+            "$ai_provider": "openai_agents",
+            "$ai_guardrail_triggered": span_data.triggered,
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_response_span(
+        self,
+        span_data: ResponseSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle OpenAI Response API spans - maps to $ai_generation event."""
+        response = span_data.response
+        response_id = response.id if response else None
+
+        # Try to extract usage from response
+        usage = getattr(response, "usage", None) if response else None
+        input_tokens = 0
+        output_tokens = 0
+        if usage:
+            input_tokens = getattr(usage, "input_tokens", 0) or 0
+            output_tokens = getattr(usage, "output_tokens", 0) or 0
+
+        # Try to extract model from response
+        model = getattr(response, "model", None) if response else None
+
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_provider": "openai",
+            "$ai_model": model,
+            "$ai_response_id": response_id,
+            "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
+            "$ai_input_tokens": input_tokens,
+            "$ai_output_tokens": output_tokens,
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        self._capture_event("$ai_generation", properties, distinct_id)
+
+    def _handle_custom_span(
+        self,
+        span_data: CustomSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle custom user-defined spans - maps to $ai_span event."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": span_data.name,
+            "$ai_span_type": "custom",
+            "$ai_provider": "openai_agents",
+            "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)),
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_audio_span(
+        self,
+        span_data: Union[TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData],
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle audio-related spans (transcription, speech) - maps to $ai_span event."""
+        span_type = span_data.type  # "transcription", "speech", or "speech_group"
+
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": span_type,
+            "$ai_span_type": span_type,
+            "$ai_provider": "openai_agents",
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        # Add model info if available
+        if hasattr(span_data, "model") and span_data.model:
+            properties["$ai_model"] = span_data.model
+
+        # Don't include audio data (base64) - just metadata
+        if hasattr(span_data, "output") and isinstance(span_data.output, str):
+            # For transcription, output is the text
+            properties["$ai_output_state"] = self._with_privacy_mode(span_data.output)
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_mcp_span(
+        self,
+        span_data: MCPListToolsSpanData,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle MCP (Model Context Protocol) spans - maps to $ai_span event."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": f"mcp:{span_data.server}",
+            "$ai_span_type": "mcp_tools",
+            "$ai_provider": "openai_agents",
+            "$ai_mcp_server": span_data.server,
+            "$ai_mcp_tools": span_data.result,
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def _handle_generic_span(
+        self,
+        span_data: Any,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        distinct_id: str,
+        error_properties: Dict[str, Any],
+    ) -> None:
+        """Handle unknown span types - maps to $ai_span event."""
+        span_type = getattr(span_data, "type", "unknown")
+
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_span_name": span_type,
+            "$ai_span_type": span_type,
+            "$ai_provider": "openai_agents",
+            "$ai_latency": latency,
+            **error_properties,
+        }
+
+        # Try to export span data
+        if hasattr(span_data, "export"):
+            try:
+                exported = span_data.export()
+                properties["$ai_span_data"] = _safe_json(exported)
+            except Exception:
+                pass
+
+        self._capture_event("$ai_span", properties, distinct_id)
+
+    def shutdown(self) -> None:
+        """Clean up resources when the application stops."""
+        try:
+            self._span_start_times.clear()
+            self._trace_metadata.clear()
+
+            # Flush the PostHog client if possible
+            if hasattr(self._client, "flush") and callable(self._client.flush):
+                self._client.flush()
+        except Exception as e:
+            log.debug(f"Error in shutdown: {e}")
+
+    def force_flush(self) -> None:
+        """Force immediate processing of any queued events."""
+        try:
+            if hasattr(self._client, "flush") and callable(self._client.flush):
+                self._client.flush()
+        except Exception as e:
+            log.debug(f"Error in force_flush: {e}")
diff --git a/posthog/test/ai/openai_agents/__init__.py b/posthog/test/ai/openai_agents/__init__.py
new file mode 100644
index 00000000..1a28a2a4
--- /dev/null
+++ b/posthog/test/ai/openai_agents/__init__.py
@@ -0,0 +1 @@
+# Tests for OpenAI Agents SDK integration
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
new file mode 100644
index 00000000..bab7d798
--- /dev/null
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -0,0 +1,413 @@
+import logging
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+try:
+    from agents.tracing.span_data import (
+        AgentSpanData,
+        CustomSpanData,
+        FunctionSpanData,
+        GenerationSpanData,
+        GuardrailSpanData,
+        HandoffSpanData,
+    )
+
+    from posthog.ai.openai_agents import PostHogTracingProcessor, instrument
+
+    OPENAI_AGENTS_AVAILABLE = True
+except ImportError:
+    OPENAI_AGENTS_AVAILABLE = False
+
+
+# Skip all tests if OpenAI Agents SDK is not available
+pytestmark = pytest.mark.skipif(
+    not OPENAI_AGENTS_AVAILABLE, reason="OpenAI Agents SDK is not available"
+)
+
+
+@pytest.fixture(scope="function")
+def mock_client():
+    client = MagicMock()
+    client.privacy_mode = False
+    logging.getLogger("posthog").setLevel(logging.DEBUG)
+    return client
+
+
+@pytest.fixture(scope="function")
+def processor(mock_client):
+    return PostHogTracingProcessor(
+        client=mock_client,
+        distinct_id="test-user",
+        privacy_mode=False,
+    )
+
+
+@pytest.fixture
+def mock_trace():
+    trace = MagicMock()
+    trace.trace_id = "trace_123456789"
+    trace.name = "Test Workflow"
+    trace.group_id = "group_123"
+    trace.metadata = {"key": "value"}
+    return trace
+
+
+@pytest.fixture
+def mock_span():
+    span = MagicMock()
+    span.trace_id = "trace_123456789"
+    span.span_id = "span_987654321"
+    span.parent_id = None
+    span.started_at = "2024-01-01T00:00:00Z"
+    span.ended_at = "2024-01-01T00:00:01Z"
+    span.error = None
+    return span
+
+
+class TestPostHogTracingProcessor:
+    """Tests for the PostHogTracingProcessor class."""
+
+    def test_initialization(self, mock_client):
+        """Test processor initializes correctly."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id="user@example.com",
+            privacy_mode=True,
+            groups={"company": "acme"},
+            properties={"env": "test"},
+        )
+
+        assert processor._client == mock_client
+        assert processor._distinct_id == "user@example.com"
+        assert processor._privacy_mode is True
+        assert processor._groups == {"company": "acme"}
+        assert processor._properties == {"env": "test"}
+
+    def test_initialization_with_callable_distinct_id(self, mock_client, mock_trace):
+        """Test processor with callable distinct_id resolver."""
+        resolver = lambda trace: trace.metadata.get("user_id", "default")
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id=resolver,
+        )
+
+        mock_trace.metadata = {"user_id": "resolved-user"}
+        distinct_id = processor._get_distinct_id(mock_trace)
+        assert distinct_id == "resolved-user"
+
+    def test_on_trace_start(self, processor, mock_client, mock_trace):
+        """Test that on_trace_start captures $ai_trace event."""
+        processor.on_trace_start(mock_trace)
+
+        mock_client.capture.assert_called_once()
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_trace"
+        assert call_kwargs["distinct_id"] == "test-user"
+        assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
+        assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow"
+        assert call_kwargs["properties"]["$ai_provider"] == "openai_agents"
+
+    def test_on_trace_end_clears_metadata(self, processor, mock_trace):
+        """Test that on_trace_end clears stored trace metadata."""
+        processor.on_trace_start(mock_trace)
+        assert mock_trace.trace_id in processor._trace_metadata
+
+        processor.on_trace_end(mock_trace)
+        assert mock_trace.trace_id not in processor._trace_metadata
+
+    def test_on_span_start_tracks_time(self, processor, mock_span):
+        """Test that on_span_start records start time."""
+        processor.on_span_start(mock_span)
+        assert mock_span.span_id in processor._span_start_times
+
+    def test_generation_span_mapping(self, processor, mock_client, mock_span):
+        """Test GenerationSpanData maps to $ai_generation event."""
+        span_data = GenerationSpanData(
+            input=[{"role": "user", "content": "Hello"}],
+            output=[{"role": "assistant", "content": "Hi there!"}],
+            model="gpt-4o",
+            model_config={"temperature": 0.7, "max_tokens": 100},
+            usage={"input_tokens": 10, "output_tokens": 20},
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        mock_client.capture.assert_called_once()
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_generation"
+        assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
+        assert call_kwargs["properties"]["$ai_span_id"] == "span_987654321"
+        assert call_kwargs["properties"]["$ai_provider"] == "openai"
+        assert call_kwargs["properties"]["$ai_model"] == "gpt-4o"
+        assert call_kwargs["properties"]["$ai_input_tokens"] == 10
+        assert call_kwargs["properties"]["$ai_output_tokens"] == 20
+        assert call_kwargs["properties"]["$ai_input"] == [
+            {"role": "user", "content": "Hello"}
+        ]
+        assert call_kwargs["properties"]["$ai_output_choices"] == [
+            {"role": "assistant", "content": "Hi there!"}
+        ]
+
+    def test_generation_span_with_reasoning_tokens(self, processor, mock_client, mock_span):
+        """Test GenerationSpanData includes reasoning tokens when present."""
+        span_data = GenerationSpanData(
+            model="o1-preview",
+            usage={
+                "input_tokens": 100,
+                "output_tokens": 500,
+                "reasoning_tokens": 400,
+            },
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_reasoning_tokens"] == 400
+
+    def test_function_span_mapping(self, processor, mock_client, mock_span):
+        """Test FunctionSpanData maps to $ai_span event with type=tool."""
+        span_data = FunctionSpanData(
+            name="get_weather",
+            input='{"city": "San Francisco"}',
+            output="Sunny, 72F",
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_name"] == "get_weather"
+        assert call_kwargs["properties"]["$ai_span_type"] == "tool"
+        assert call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}'
+        assert call_kwargs["properties"]["$ai_output_state"] == "Sunny, 72F"
+
+    def test_agent_span_mapping(self, processor, mock_client, mock_span):
+        """Test AgentSpanData maps to $ai_span event with type=agent."""
+        span_data = AgentSpanData(
+            name="CustomerServiceAgent",
+            handoffs=["TechnicalAgent", "BillingAgent"],
+            tools=["search", "get_order"],
+            output_type="str",
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_name"] == "CustomerServiceAgent"
+        assert call_kwargs["properties"]["$ai_span_type"] == "agent"
+        assert call_kwargs["properties"]["$ai_agent_handoffs"] == [
+            "TechnicalAgent",
+            "BillingAgent",
+        ]
+        assert call_kwargs["properties"]["$ai_agent_tools"] == ["search", "get_order"]
+
+    def test_handoff_span_mapping(self, processor, mock_client, mock_span):
+        """Test HandoffSpanData maps to $ai_span event with type=handoff."""
+        span_data = HandoffSpanData(
+            from_agent="TriageAgent",
+            to_agent="TechnicalAgent",
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_type"] == "handoff"
+        assert call_kwargs["properties"]["$ai_handoff_from_agent"] == "TriageAgent"
+        assert call_kwargs["properties"]["$ai_handoff_to_agent"] == "TechnicalAgent"
+        assert (
+            call_kwargs["properties"]["$ai_span_name"]
+            == "TriageAgent -> TechnicalAgent"
+        )
+
+    def test_guardrail_span_mapping(self, processor, mock_client, mock_span):
+        """Test GuardrailSpanData maps to $ai_span event with type=guardrail."""
+        span_data = GuardrailSpanData(
+            name="ContentFilter",
+            triggered=True,
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_name"] == "ContentFilter"
+        assert call_kwargs["properties"]["$ai_span_type"] == "guardrail"
+        assert call_kwargs["properties"]["$ai_guardrail_triggered"] is True
+
+    def test_custom_span_mapping(self, processor, mock_client, mock_span):
+        """Test CustomSpanData maps to $ai_span event with type=custom."""
+        span_data = CustomSpanData(
+            name="database_query",
+            data={"query": "SELECT * FROM users", "rows": 100},
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_name"] == "database_query"
+        assert call_kwargs["properties"]["$ai_span_type"] == "custom"
+        assert call_kwargs["properties"]["$ai_custom_data"] == {
+            "query": "SELECT * FROM users",
+            "rows": 100,
+        }
+
+    def test_privacy_mode_redacts_content(self, mock_client, mock_span):
+        """Test that privacy_mode redacts input/output content."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id="test-user",
+            privacy_mode=True,
+        )
+
+        span_data = GenerationSpanData(
+            input=[{"role": "user", "content": "Secret message"}],
+            output=[{"role": "assistant", "content": "Secret response"}],
+            model="gpt-4o",
+            usage={"input_tokens": 10, "output_tokens": 20},
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        # Content should be redacted
+        assert call_kwargs["properties"]["$ai_input"] is None
+        assert call_kwargs["properties"]["$ai_output_choices"] is None
+        # Token counts should still be present
+        assert call_kwargs["properties"]["$ai_input_tokens"] == 10
+        assert call_kwargs["properties"]["$ai_output_tokens"] == 20
+
+    def test_error_handling_in_span(self, processor, mock_client, mock_span):
+        """Test that span errors are captured correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "Rate limit exceeded", "data": {"code": 429}}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["properties"]["$ai_is_error"] is True
+        assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded"
+
+    def test_latency_calculation(self, processor, mock_client, mock_span):
+        """Test that latency is calculated correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+
+        with patch("time.time") as mock_time:
+            mock_time.return_value = 1000.0
+            processor.on_span_start(mock_span)
+
+            mock_time.return_value = 1001.5  # 1.5 seconds later
+            processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_latency"] == pytest.approx(1.5, rel=0.01)
+
+    def test_groups_included_in_events(self, mock_client, mock_trace, mock_span):
+        """Test that groups are included in captured events."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id="test-user",
+            groups={"company": "acme", "team": "engineering"},
+        )
+
+        processor.on_trace_start(mock_trace)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["groups"] == {"company": "acme", "team": "engineering"}
+
+    def test_additional_properties_included(self, mock_client, mock_trace):
+        """Test that additional properties are included in events."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id="test-user",
+            properties={"environment": "production", "version": "1.0"},
+        )
+
+        processor.on_trace_start(mock_trace)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["environment"] == "production"
+        assert call_kwargs["properties"]["version"] == "1.0"
+
+    def test_shutdown_clears_state(self, processor):
+        """Test that shutdown clears internal state."""
+        processor._span_start_times["span_1"] = 1000.0
+        processor._trace_metadata["trace_1"] = {"name": "test"}
+
+        processor.shutdown()
+
+        assert len(processor._span_start_times) == 0
+        assert len(processor._trace_metadata) == 0
+
+    def test_force_flush_calls_client_flush(self, processor, mock_client):
+        """Test that force_flush calls client.flush()."""
+        processor.force_flush()
+        mock_client.flush.assert_called_once()
+
+
+class TestInstrumentHelper:
+    """Tests for the instrument() convenience function."""
+
+    def test_instrument_registers_processor(self, mock_client):
+        """Test that instrument() registers a processor."""
+        with patch("agents.tracing.add_trace_processor") as mock_add:
+            processor = instrument(
+                client=mock_client,
+                distinct_id="test-user",
+            )
+
+            mock_add.assert_called_once_with(processor)
+            assert isinstance(processor, PostHogTracingProcessor)
+
+    def test_instrument_with_privacy_mode(self, mock_client):
+        """Test instrument() respects privacy_mode."""
+        with patch("agents.tracing.add_trace_processor"):
+            processor = instrument(
+                client=mock_client,
+                privacy_mode=True,
+            )
+
+            assert processor._privacy_mode is True
+
+    def test_instrument_with_groups_and_properties(self, mock_client):
+        """Test instrument() accepts groups and properties."""
+        with patch("agents.tracing.add_trace_processor"):
+            processor = instrument(
+                client=mock_client,
+                groups={"company": "acme"},
+                properties={"env": "test"},
+            )
+
+            assert processor._groups == {"company": "acme"}
+            assert processor._properties == {"env": "test"}

From 3006664813f4f3fbad195837aed2308bfeef7089 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Wed, 14 Jan 2026 23:27:03 +0100
Subject: [PATCH 02/24] feat(openai-agents): add $ai_group_id support for
 linking conversation traces

- Capture group_id from trace and include as $ai_group_id on all events
- Add _get_group_id() helper to retrieve group_id from trace metadata
- Pass group_id through all span handlers (generation, function, agent, handoff, guardrail, response, custom, audio, mcp, generic)
- Enables linking multiple traces in the same conversation thread
---
 posthog/ai/openai_agents/processor.py | 110 +++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 21 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 34f84556..87bafd14 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -127,6 +127,12 @@ def _with_privacy_mode(self, value: Any) -> Any:
             return None
         return value
 
+    def _get_group_id(self, trace_id: str) -> Optional[str]:
+        """Get the group_id for a trace from stored metadata."""
+        if trace_id in self._trace_metadata:
+            return self._trace_metadata[trace_id].get("group_id")
+        return None
+
     def _capture_event(
         self,
         event: str,
@@ -162,24 +168,36 @@ def on_trace_start(self, trace: Trace) -> None:
         try:
             trace_id = trace.trace_id
             trace_name = trace.name
+            group_id = getattr(trace, "group_id", None)
+            metadata = getattr(trace, "metadata", None)
 
-            # Store trace metadata for later
+            # Store trace metadata for later (used by spans)
             self._trace_metadata[trace_id] = {
                 "name": trace_name,
-                "group_id": getattr(trace, "group_id", None),
-                "metadata": getattr(trace, "metadata", None),
+                "group_id": group_id,
+                "metadata": metadata,
             }
 
             distinct_id = self._get_distinct_id(trace)
 
+            properties = {
+                "$ai_trace_id": trace_id,
+                "$ai_trace_name": trace_name,
+                "$ai_provider": "openai_agents",
+            }
+
+            # Include group_id for linking related traces (e.g., conversation threads)
+            if group_id:
+                properties["$ai_group_id"] = group_id
+
+            # Include trace metadata if present
+            if metadata:
+                properties["$ai_trace_metadata"] = _safe_json(metadata)
+
             self._capture_event(
                 event="$ai_trace",
                 distinct_id=distinct_id,
-                properties={
-                    "$ai_trace_id": trace_id,
-                    "$ai_trace_name": trace_name,
-                    "$ai_provider": "openai_agents",
-                },
+                properties=properties,
             )
         except Exception as e:
             log.debug(f"Error in on_trace_start: {e}")
@@ -222,9 +240,9 @@ def on_span_end(self, span: Span[Any]) -> None:
 
             # Get distinct ID from trace metadata or default
             distinct_id = self._get_distinct_id(None)
-            if trace_id in self._trace_metadata:
-                # Use trace's distinct ID resolver if available
-                distinct_id = self._get_distinct_id(None)
+
+            # Get group_id from trace metadata for linking
+            group_id = self._get_group_id(trace_id)
 
             # Get error info if present
             error_info = span.error
@@ -238,44 +256,44 @@ def on_span_end(self, span: Span[Any]) -> None:
             # Dispatch based on span data type
             if isinstance(span_data, GenerationSpanData):
                 self._handle_generation_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, FunctionSpanData):
                 self._handle_function_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, AgentSpanData):
                 self._handle_agent_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, HandoffSpanData):
                 self._handle_handoff_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, GuardrailSpanData):
                 self._handle_guardrail_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, ResponseSpanData):
                 self._handle_response_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, CustomSpanData):
                 self._handle_custom_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)):
                 self._handle_audio_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             elif isinstance(span_data, MCPListToolsSpanData):
                 self._handle_mcp_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
             else:
                 # Unknown span type - capture as generic span
                 self._handle_generic_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, error_properties
+                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
                 )
 
         except Exception as e:
@@ -289,6 +307,7 @@ def _handle_generation_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle LLM generation spans - maps to $ai_generation event."""
@@ -319,6 +338,10 @@ def _handle_generation_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         # Add optional token fields if present
         if usage.get("reasoning_tokens"):
             properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"]
@@ -337,6 +360,7 @@ def _handle_function_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle function/tool call spans - maps to $ai_span event."""
@@ -353,6 +377,10 @@ def _handle_function_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         # Add MCP data if present
         if span_data.mcp_data:
             properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data)
@@ -367,6 +395,7 @@ def _handle_agent_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle agent execution spans - maps to $ai_span event."""
@@ -381,6 +410,10 @@ def _handle_agent_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         # Add agent-specific metadata
         if span_data.handoffs:
             properties["$ai_agent_handoffs"] = span_data.handoffs
@@ -399,6 +432,7 @@ def _handle_handoff_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle agent handoff spans - maps to $ai_span event."""
@@ -415,6 +449,10 @@ def _handle_handoff_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_guardrail_span(
@@ -425,6 +463,7 @@ def _handle_guardrail_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle guardrail execution spans - maps to $ai_span event."""
@@ -440,6 +479,10 @@ def _handle_guardrail_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_response_span(
@@ -450,6 +493,7 @@ def _handle_response_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle OpenAI Response API spans - maps to $ai_generation event."""
@@ -481,6 +525,10 @@ def _handle_response_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         self._capture_event("$ai_generation", properties, distinct_id)
 
     def _handle_custom_span(
@@ -491,6 +539,7 @@ def _handle_custom_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle custom user-defined spans - maps to $ai_span event."""
@@ -506,6 +555,10 @@ def _handle_custom_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_audio_span(
@@ -516,6 +569,7 @@ def _handle_audio_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle audio-related spans (transcription, speech) - maps to $ai_span event."""
@@ -532,6 +586,10 @@ def _handle_audio_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         # Add model info if available
         if hasattr(span_data, "model") and span_data.model:
             properties["$ai_model"] = span_data.model
@@ -551,6 +609,7 @@ def _handle_mcp_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle MCP (Model Context Protocol) spans - maps to $ai_span event."""
@@ -567,6 +626,10 @@ def _handle_mcp_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_generic_span(
@@ -577,6 +640,7 @@ def _handle_generic_span(
         parent_id: Optional[str],
         latency: float,
         distinct_id: str,
+        group_id: Optional[str],
         error_properties: Dict[str, Any],
     ) -> None:
         """Handle unknown span types - maps to $ai_span event."""
@@ -593,6 +657,10 @@ def _handle_generic_span(
             **error_properties,
         }
 
+        # Include group_id for linking related traces
+        if group_id:
+            properties["$ai_group_id"] = group_id
+
         # Try to export span data
         if hasattr(span_data, "export"):
             try:

From 945134a8df58874080879b008acecae95ff19fca Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Thu, 15 Jan 2026 11:43:36 +0100
Subject: [PATCH 03/24] feat(openai-agents): add enhanced span properties

- Add $ai_total_tokens to generation and response spans (required by PostHog cost reporting)
- Add $ai_error_type for cross-provider error categorization (model_behavior_error, user_error, input_guardrail_triggered, output_guardrail_triggered, max_turns_exceeded)
- Add $ai_output_choices to response spans for output content capture
- Add audio pass-through properties for voice spans:
  - first_content_at (time to first audio byte)
  - audio_input_format / audio_output_format
  - model_config
  - $ai_input for TTS text input
- Add comprehensive tests for all new properties
---
 posthog/ai/openai_agents/processor.py         |  45 ++++-
 .../test/ai/openai_agents/test_processor.py   | 168 ++++++++++++++++++
 2 files changed, 212 insertions(+), 1 deletion(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 87bafd14..26805c2c 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -248,9 +248,26 @@ def on_span_end(self, span: Span[Any]) -> None:
             error_info = span.error
             error_properties = {}
             if error_info:
+                error_message = error_info.get("message", str(error_info))
+                error_type_raw = error_info.get("type", "")
+
+                # Categorize error type for cross-provider filtering/alerting
+                error_type = "unknown"
+                if "ModelBehaviorError" in error_type_raw or "ModelBehaviorError" in error_message:
+                    error_type = "model_behavior_error"
+                elif "UserError" in error_type_raw or "UserError" in error_message:
+                    error_type = "user_error"
+                elif "InputGuardrailTripwireTriggered" in error_message:
+                    error_type = "input_guardrail_triggered"
+                elif "OutputGuardrailTripwireTriggered" in error_message:
+                    error_type = "output_guardrail_triggered"
+                elif "MaxTurnsExceeded" in error_message:
+                    error_type = "max_turns_exceeded"
+
                 error_properties = {
                     "$ai_is_error": True,
-                    "$ai_error": error_info.get("message", str(error_info)),
+                    "$ai_error": error_message,
+                    "$ai_error_type": error_type,
                 }
 
             # Dispatch based on span data type
@@ -334,6 +351,7 @@ def _handle_generation_span(
             "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
+            "$ai_total_tokens": input_tokens + output_tokens,
             "$ai_latency": latency,
             **error_properties,
         }
@@ -521,6 +539,7 @@ def _handle_response_span(
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
+            "$ai_total_tokens": input_tokens + output_tokens,
             "$ai_latency": latency,
             **error_properties,
         }
@@ -529,6 +548,12 @@ def _handle_response_span(
         if group_id:
             properties["$ai_group_id"] = group_id
 
+        # Extract output content from response
+        if response:
+            output_items = getattr(response, "output", None)
+            if output_items:
+                properties["$ai_output_choices"] = self._with_privacy_mode(_safe_json(output_items))
+
         self._capture_event("$ai_generation", properties, distinct_id)
 
     def _handle_custom_span(
@@ -594,6 +619,24 @@ def _handle_audio_span(
         if hasattr(span_data, "model") and span_data.model:
             properties["$ai_model"] = span_data.model
 
+        # Add model config if available (pass-through property)
+        if hasattr(span_data, "model_config") and span_data.model_config:
+            properties["model_config"] = _safe_json(span_data.model_config)
+
+        # Add time to first audio byte for speech spans (pass-through property)
+        if hasattr(span_data, "first_content_at") and span_data.first_content_at:
+            properties["first_content_at"] = span_data.first_content_at
+
+        # Add audio format info (pass-through properties)
+        if hasattr(span_data, "input_format"):
+            properties["audio_input_format"] = span_data.input_format
+        if hasattr(span_data, "output_format"):
+            properties["audio_output_format"] = span_data.output_format
+
+        # Add text input for TTS
+        if hasattr(span_data, "input") and span_data.input and isinstance(span_data.input, str):
+            properties["$ai_input"] = self._with_privacy_mode(span_data.input)
+
         # Don't include audio data (base64) - just metadata
         if hasattr(span_data, "output") and isinstance(span_data.output, str):
             # For transcription, output is the text
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index bab7d798..77eb4b11 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -11,6 +11,9 @@
         GenerationSpanData,
         GuardrailSpanData,
         HandoffSpanData,
+        ResponseSpanData,
+        SpeechSpanData,
+        TranscriptionSpanData,
     )
 
     from posthog.ai.openai_agents import PostHogTracingProcessor, instrument
@@ -318,6 +321,171 @@ def test_error_handling_in_span(self, processor, mock_client, mock_span):
         assert call_kwargs["properties"]["$ai_is_error"] is True
         assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded"
 
+    def test_generation_span_includes_total_tokens(self, processor, mock_client, mock_span):
+        """Test that $ai_total_tokens is calculated and included."""
+        span_data = GenerationSpanData(
+            model="gpt-4o",
+            usage={"input_tokens": 100, "output_tokens": 50},
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_total_tokens"] == 150
+
+    def test_error_type_categorization_model_behavior(self, processor, mock_client, mock_span):
+        """Test that ModelBehaviorError is categorized correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "ModelBehaviorError: Invalid JSON output", "type": "ModelBehaviorError"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
+
+    def test_error_type_categorization_user_error(self, processor, mock_client, mock_span):
+        """Test that UserError is categorized correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "UserError: Tool failed", "type": "UserError"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "user_error"
+
+    def test_error_type_categorization_input_guardrail(self, processor, mock_client, mock_span):
+        """Test that InputGuardrailTripwireTriggered is categorized correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "InputGuardrailTripwireTriggered: Content blocked"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered"
+
+    def test_error_type_categorization_output_guardrail(self, processor, mock_client, mock_span):
+        """Test that OutputGuardrailTripwireTriggered is categorized correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "OutputGuardrailTripwireTriggered: Response blocked"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered"
+
+    def test_error_type_categorization_max_turns(self, processor, mock_client, mock_span):
+        """Test that MaxTurnsExceeded is categorized correctly."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "MaxTurnsExceeded: Agent exceeded maximum turns"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "max_turns_exceeded"
+
+    def test_error_type_categorization_unknown(self, processor, mock_client, mock_span):
+        """Test that unknown errors are categorized as unknown."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "Some random error occurred"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "unknown"
+
+    def test_response_span_with_output_and_total_tokens(self, processor, mock_client, mock_span):
+        """Test ResponseSpanData includes output choices and total tokens."""
+        # Create a mock response object
+        mock_response = MagicMock()
+        mock_response.id = "resp_123"
+        mock_response.model = "gpt-4o"
+        mock_response.output = [{"type": "message", "content": "Hello!"}]
+        mock_response.usage = MagicMock()
+        mock_response.usage.input_tokens = 25
+        mock_response.usage.output_tokens = 10
+
+        span_data = ResponseSpanData(
+            response=mock_response,
+            input="Hello, world!",
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_generation"
+        assert call_kwargs["properties"]["$ai_total_tokens"] == 35
+        assert call_kwargs["properties"]["$ai_output_choices"] == [{"type": "message", "content": "Hello!"}]
+        assert call_kwargs["properties"]["$ai_response_id"] == "resp_123"
+
+    def test_speech_span_with_pass_through_properties(self, processor, mock_client, mock_span):
+        """Test SpeechSpanData includes pass-through properties."""
+        span_data = SpeechSpanData(
+            input="Hello, how can I help you?",
+            output="base64_audio_data",
+            output_format="pcm",
+            model="tts-1",
+            model_config={"voice": "alloy", "speed": 1.0},
+            first_content_at="2024-01-01T00:00:00.500Z",
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_type"] == "speech"
+        assert call_kwargs["properties"]["$ai_model"] == "tts-1"
+        # Pass-through properties (no $ai_ prefix)
+        assert call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z"
+        assert call_kwargs["properties"]["audio_output_format"] == "pcm"
+        assert call_kwargs["properties"]["model_config"] == {"voice": "alloy", "speed": 1.0}
+        # Text input should be captured
+        assert call_kwargs["properties"]["$ai_input"] == "Hello, how can I help you?"
+
+    def test_transcription_span_with_pass_through_properties(self, processor, mock_client, mock_span):
+        """Test TranscriptionSpanData includes pass-through properties."""
+        span_data = TranscriptionSpanData(
+            input="base64_audio_data",
+            input_format="pcm",
+            output="This is the transcribed text.",
+            model="whisper-1",
+            model_config={"language": "en"},
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+
+        assert call_kwargs["event"] == "$ai_span"
+        assert call_kwargs["properties"]["$ai_span_type"] == "transcription"
+        assert call_kwargs["properties"]["$ai_model"] == "whisper-1"
+        # Pass-through properties (no $ai_ prefix)
+        assert call_kwargs["properties"]["audio_input_format"] == "pcm"
+        assert call_kwargs["properties"]["model_config"] == {"language": "en"}
+        # Transcription output should be captured
+        assert call_kwargs["properties"]["$ai_output_state"] == "This is the transcribed text."
+
     def test_latency_calculation(self, processor, mock_client, mock_span):
         """Test that latency is calculated correctly."""
         span_data = GenerationSpanData(model="gpt-4o")

From 2445b409b2bfc17f5c55e64238540ef612e122b3 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Thu, 15 Jan 2026 15:45:54 +0100
Subject: [PATCH 04/24] Add $ai_framework property and standardize $ai_provider
 for OpenAI Agents

- Add $ai_framework="openai-agents" to all events for framework identification
- Standardize $ai_provider="openai" on all events (previously some used "openai_agents")
- Follows pattern from posthog-js where $ai_provider is the underlying LLM provider
---
 posthog/ai/openai_agents/processor.py         | 29 +++++++++++++------
 .../test/ai/openai_agents/test_processor.py   |  4 ++-
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 26805c2c..07385d2c 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -183,7 +183,8 @@ def on_trace_start(self, trace: Trace) -> None:
             properties = {
                 "$ai_trace_id": trace_id,
                 "$ai_trace_name": trace_name,
-                "$ai_provider": "openai_agents",
+                "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             }
 
             # Include group_id for linking related traces (e.g., conversation threads)
@@ -345,6 +346,7 @@ def _handle_generation_span(
             "$ai_span_id": span_id,
             "$ai_parent_id": parent_id,
             "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_model": span_data.model,
             "$ai_model_parameters": model_params if model_params else None,
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
@@ -388,7 +390,8 @@ def _handle_function_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": span_data.name,
             "$ai_span_type": "tool",
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)),
             "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)),
             "$ai_latency": latency,
@@ -423,7 +426,8 @@ def _handle_agent_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": span_data.name,
             "$ai_span_type": "agent",
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_latency": latency,
             **error_properties,
         }
@@ -460,7 +464,8 @@ def _handle_handoff_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}",
             "$ai_span_type": "handoff",
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_handoff_from_agent": span_data.from_agent,
             "$ai_handoff_to_agent": span_data.to_agent,
             "$ai_latency": latency,
@@ -491,7 +496,8 @@ def _handle_guardrail_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": span_data.name,
             "$ai_span_type": "guardrail",
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_guardrail_triggered": span_data.triggered,
             "$ai_latency": latency,
             **error_properties,
@@ -534,6 +540,7 @@ def _handle_response_span(
             "$ai_span_id": span_id,
             "$ai_parent_id": parent_id,
             "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_model": model,
             "$ai_response_id": response_id,
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
@@ -574,7 +581,8 @@ def _handle_custom_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": span_data.name,
             "$ai_span_type": "custom",
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)),
             "$ai_latency": latency,
             **error_properties,
@@ -606,7 +614,8 @@ def _handle_audio_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": span_type,
             "$ai_span_type": span_type,
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_latency": latency,
             **error_properties,
         }
@@ -662,7 +671,8 @@ def _handle_mcp_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": f"mcp:{span_data.server}",
             "$ai_span_type": "mcp_tools",
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_mcp_server": span_data.server,
             "$ai_mcp_tools": span_data.result,
             "$ai_latency": latency,
@@ -695,7 +705,8 @@ def _handle_generic_span(
             "$ai_parent_id": parent_id,
             "$ai_span_name": span_type,
             "$ai_span_type": span_type,
-            "$ai_provider": "openai_agents",
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
             "$ai_latency": latency,
             **error_properties,
         }
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index 77eb4b11..b5509340 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -110,7 +110,8 @@ def test_on_trace_start(self, processor, mock_client, mock_trace):
         assert call_kwargs["distinct_id"] == "test-user"
         assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
         assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow"
-        assert call_kwargs["properties"]["$ai_provider"] == "openai_agents"
+        assert call_kwargs["properties"]["$ai_provider"] == "openai"
+        assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
 
     def test_on_trace_end_clears_metadata(self, processor, mock_trace):
         """Test that on_trace_end clears stored trace metadata."""
@@ -146,6 +147,7 @@ def test_generation_span_mapping(self, processor, mock_client, mock_span):
         assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
         assert call_kwargs["properties"]["$ai_span_id"] == "span_987654321"
         assert call_kwargs["properties"]["$ai_provider"] == "openai"
+        assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
         assert call_kwargs["properties"]["$ai_model"] == "gpt-4o"
         assert call_kwargs["properties"]["$ai_input_tokens"] == 10
         assert call_kwargs["properties"]["$ai_output_tokens"] == 20

From 6193698228de3c4be881ebc36ea0007c41259223 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Thu, 15 Jan 2026 15:48:02 +0100
Subject: [PATCH 05/24] chore: bump version to 7.7.0 for OpenAI Agents SDK
 integration

---
 CHANGELOG.md       | 735 +--------------------------------------------
 posthog/version.py |   2 +-
 2 files changed, 7 insertions(+), 730 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 21b8cc37..f674fdc0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# 7.7.0 - 2026-01-15
+
+feat(ai): Add OpenAI Agents SDK integration
+
+Automatic tracing for agent workflows, handoffs, tool calls, guardrails, and custom spans. Includes `$ai_total_tokens`, `$ai_error_type` categorization, and `$ai_framework` property.
+
 # 7.6.0 - 2026-01-12
 
 feat: add device_id to flags request payload
@@ -30,732 +36,3 @@ When using OpenAI stored prompts, the model is defined in the OpenAI dashboard r
 # 7.4.0 - 2025-12-16
 
 feat: Add automatic retries for feature flag requests
-
-Feature flag API requests now automatically retry on transient failures:
-
-- Network errors (connection refused, DNS failures, timeouts)
-- Server errors (500, 502, 503, 504)
-- Up to 2 retries with exponential backoff (0.5s, 1s delays)
-
-Rate limit (429) and quota (402) errors are not retried.
-
-# 7.3.1 - 2025-12-06
-
-fix: remove unused $exception_message and $exception_type
-
-# 7.3.0 - 2025-12-05
-
-feat: improve code variables capture masking
-
-# 7.2.0 - 2025-12-01
-
-feat: add $feature_flag_evaluated_at properties to $feature_flag_called events
-
-# 7.1.0 - 2025-11-26
-
-Add support for the async version of Gemini.
-
-# 7.0.2 - 2025-11-18
-
-Add support for Python 3.14.
-Projects upgrading to Python 3.14 should ensure any Pydantic models passed into the SDK use Pydantic v2, as Pydantic v1 is not compatible with Python 3.14.
-
-# 7.0.1 - 2025-11-15
-
-Try to use repr() when formatting code variables
-
-# 7.0.0 - 2025-11-11
-
-NB Python 3.9 is no longer supported
-
-- chore(llma): update LLM provider SDKs to latest major versions
-  - openai: 1.102.0 → 2.7.1
-  - anthropic: 0.64.0 → 0.72.0
-  - google-genai: 1.32.0 → 1.49.0
-  - langchain-core: 0.3.75 → 1.0.3
-  - langchain-openai: 0.3.32 → 1.0.2
-  - langchain-anthropic: 0.3.19 → 1.0.1
-  - langchain-community: 0.3.29 → 0.4.1
-  - langgraph: 0.6.6 → 1.0.2
-
-# 6.9.3 - 2025-11-10
-
-- feat(ph-ai): PostHog properties dict in GenerationMetadata
-
-# 6.9.2 - 2025-11-10
-
-- fix(llma): fix cache token double subtraction in Langchain for non-Anthropic providers causing negative costs
-
-# 6.9.1 - 2025-11-07
-
-- fix(error-tracking): pass code variables config from init to client
-
-# 6.9.0 - 2025-11-06
-
-- feat(error-tracking): add local variables capture
-
-# 6.8.0 - 2025-11-03
-
-- feat(llma): send web search calls to be used for LLM cost calculations
-
-# 6.7.14 - 2025-11-03
-
-- fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355)
-- test(django): Add Django 5 integration test suite with real ASGI application testing async middleware behavior
-
-# 6.7.13 - 2025-11-02
-
-- fix(llma): cache cost calculation in the LangChain callback
-
-# 6.7.12 - 2025-11-02
-
-- fix(django): Restore process_exception method to capture view and downstream middleware exceptions (fixes #329)
-- fix(ai/langchain): Add LangChain 1.0+ compatibility for CallbackHandler imports (fixes #362)
-
-# 6.7.11 - 2025-10-28
-
-- feat(ai): Add `$ai_framework` property for framework integrations (e.g. LangChain)
-
-# 6.7.10 - 2025-10-24
-
-- fix(django): Make middleware truly hybrid - compatible with both sync (WSGI) and async (ASGI) Django stacks without breaking sync-only deployments
-
-# 6.7.9 - 2025-10-22
-
-- fix(flags): multi-condition flags with static cohorts returning wrong variants
-
-# 6.7.8 - 2025-10-16
-
-- fix(llma): missing async for OpenAI's streaming implementation
-
-# 6.7.7 - 2025-10-14
-
-- fix: remove deprecated attribute $exception_personURL from exception events
-
-# 6.7.6 - 2025-09-16
-
-- fix: don't sort condition sets with variant overrides to the top
-- fix: Prevent core Client methods from raising exceptions
-
-# 6.7.5 - 2025-09-16
-
-- feat: Django middleware now supports async request handling.
-
-# 6.7.4 - 2025-09-05
-
-- fix: Missing system prompts for some providers
-
-# 6.7.3 - 2025-09-04
-
-- fix: missing usage tokens in Gemini
-
-# 6.7.2 - 2025-09-03
-
-- fix: tool call results in streaming providers
-
-# 6.7.1 - 2025-09-01
-
-- fix: Add base64 inline image sanitization
-
-# 6.7.0 - 2025-08-26
-
-- feat: Add support for feature flag dependencies
-
-# 6.6.1 - 2025-08-21
-
-- fix: Prevent `NoneType` error when `group_properties` is `None`
-
-# 6.6.0 - 2025-08-15
-
-- feat: Add `flag_keys_to_evaluate` parameter to optimize feature flag evaluation performance by only evaluating specified flags
-- feat: Add `flag_keys_filter` option to `send_feature_flags` for selective flag evaluation in capture events
-
-# 6.5.0 - 2025-08-08
-
-- feat: Add `$context_tags` to an event to know which properties were included as tags
-
-# 6.4.1 - 2025-08-06
-
-- fix: Always pass project API key in `remote_config` requests for deterministic project routing
-
-# 6.4.0 - 2025-08-05
-
-- feat: support Vertex AI for Gemini
-
-# 6.3.4 - 2025-08-04
-
-- fix: set `$ai_tools` for all providers and `$ai_output_choices` for all non-streaming provider flows properly
-
-# 6.3.3 - 2025-08-01
-
-- fix: `get_feature_flag_result` now correctly returns FeatureFlagResult when payload is empty string instead of None
-
-# 6.3.2 - 2025-07-31
-
-- fix: Anthropic's tool calls are now handled properly
-
-# 6.3.0 - 2025-07-22
-
-- feat: Enhanced `send_feature_flags` parameter to accept `SendFeatureFlagsOptions` object for declarative control over local/remote evaluation and custom properties
-
-# 6.2.1 - 2025-07-21
-
-- feat: make `posthog_client` an optional argument in PostHog AI providers wrappers (`posthog.ai.*`), intuitively using the default client as the default
-
-# 6.1.1 - 2025-07-16
-
-- fix: correctly capture exceptions processed by Django from views or middleware
-
-# 6.1.0 - 2025-07-10
-
-- feat: decouple feature flag local evaluation from personal API keys; support decrypting remote config payloads without relying on the feature flags poller
-
-# 6.0.4 - 2025-07-09
-
-- fix: add POSTHOG_MW_CLIENT setting to django middleware, to support custom clients for exception capture.
-
-# 6.0.3 - 2025-07-07
-
-- feat: add a feature flag evaluation cache (local storage or redis) to support returning flag evaluations when the service is down
-
-# 6.0.2 - 2025-07-02
-
-- fix: send_feature_flags changed to default to false in `Client::capture_exception`
-
-# 6.0.1
-
-- fix: response `$process_person_profile` property when passed to capture
-
-# 6.0.0
-
-This release contains a number of major breaking changes:
-
-- feat: make distinct_id an optional parameter in posthog.capture and related functions
-- feat: make capture and related functions return `Optional[str]`, which is the UUID of the sent event, if it was sent
-- fix: remove `identify` (prefer `posthog.set()`), and `page` and `screen` (prefer `posthog.capture()`)
-- fix: delete exception-capture specific integrations module. Prefer the general-purpose django middleware as a replacement for the django `Integration`.
-
-To migrate to this version, you'll mostly just need to switch to using named keyword arguments, rather than positional ones. For example:
-
-```python
-# Old calling convention
-posthog.capture("user123", "button_clicked", {"button_id": "123"})
-# New calling convention
-posthog.capture(distinct_id="user123", event="button_clicked", properties={"button_id": "123"})
-
-# Better pattern
-with posthog.new_context():
-    posthog.identify_context("user123")
-
-    # The event name is the first argument, and can be passed positionally, or as a keyword argument in a later position
-    posthog.capture("button_pressed")
-```
-
-Generally, arguments are now appropriately typed, and docstrings have been updated. If something is unclear, please open an issue, or submit a PR!
-
-# 5.4.0 - 2025-06-20
-
-- feat: add support to session_id context on page method
-
-# 5.3.0 - 2025-06-19
-
-- fix: safely handle exception values
-
-# 5.2.0 - 2025-06-19
-
-- feat: construct artificial stack traces if no traceback is available on a captured exception
-
-## 5.1.0 - 2025-06-18
-
-- feat: session and distinct ID's can now be associated with contexts, and are used as such
-- feat: django http request middleware
-
-## 5.0.0 - 2025-06-16
-
-- fix: removed deprecated sentry integration
-
-## 4.10.0 - 2025-06-13
-
-- fix: no longer fail in autocapture.
-
-## 4.9.0 - 2025-06-13
-
-- feat(ai): track reasoning and cache tokens in the LangChain callback
-
-## 4.8.0 - 2025-06-10
-
-- fix: export scoped, rather than tracked, decorator
-- feat: allow use of contexts without error tracking
-
-## 4.7.0 - 2025-06-10
-
-- feat: add support for parse endpoint in responses API (no longer beta)
-
-## 4.6.2 - 2025-06-09
-
-- fix: replace `import posthog` with direct method imports
-
-## 4.6.1 - 2025-06-09
-
-- fix: replace `import posthog` in `posthoganalytics` package
-
-## 4.6.0 - 2025-06-09
-
-- feat: add additional user and request context to captured exceptions via the Django integration
-- feat: Add `setup()` function to initialise default client
-
-## 4.5.0 - 2025-06-09
-
-- feat: add before_send callback (#249)
-
-## 4.4.2- 2025-06-09
-
-- empty point release to fix release automation
-
-## 4.4.1 2025-06-09
-
-- empty point release to fix release automation
-
-## 4.4.0 - 2025-06-09
-
-- Use the new `/flags` endpoint for all feature flag evaluations (don't fall back to `/decide` at all)
-
-## 4.3.2 - 2025-06-06
-
-1. Add context management:
-
-- New context manager with `posthog.new_context()`
-- Tag functions: `posthog.tag()`, `posthog.get_tags()`, `posthog.clear_tags()`
-- Function decorator:
-  - `@posthog.scoped` - Creates context and captures exceptions thrown within the function
-- Automatic deduplication of exceptions to ensure each exception is only captured once
-
-2. fix: feature flag request use geoip_disable (#235)
-3. chore: pin actions versions (#210)
-4. fix: opinionated setup and clean fn fix (#240)
-5. fix: release action failed (#241)
-
-## 4.2.0 - 2025-05-22
-
-Add support for google gemini
-
-## 4.1.0 - 2025-05-22
-
-Moved ai openai package to a composition approach over inheritance.
-
-## 4.0.1 – 2025-04-29
-
-1. Remove deprecated `monotonic` library. Use Python's core `time.monotonic` function instead
-2. Clarify Python 3.9+ is required
-
-## 4.0.0 - 2025-04-24
-
-1. Added new method `get_feature_flag_result` which returns a `FeatureFlagResult` object. This object breaks down the result of a feature flag into its enabled state, variant, and payload. The benefit of this method is it allows you to retrieve the result of a feature flag and its payload in a single API call. You can call `get_value` on the result to get the value of the feature flag, which is the same value returned by `get_feature_flag` (aka the string `variant` if the flag is a multivariate flag or the `boolean` value if the flag is a boolean flag).
-
-Example:
-
-```python
-result = posthog.get_feature_flag_result("my-flag", "distinct_id")
-print(result.enabled)     # True or False
-print(result.variant)     # 'the-variant-value' or None
-print(result.payload)     # {'foo': 'bar'}
-print(result.get_value()) # 'the-variant-value' or True or False
-print(result.reason)      # 'matched condition set 2' (Not available for local evaluation)
-```
-
-Breaking change:
-
-1. `get_feature_flag_payload` now deserializes payloads from JSON strings to `Any`. Previously, it returned the payload as a JSON encoded string.
-
-Before:
-
-```python
-payload = get_feature_flag_payload('key', 'distinct_id') # "{\"some\": \"payload\"}"
-```
-
-After:
-
-```python
-payload = get_feature_flag_payload('key', 'distinct_id') # {"some": "payload"}
-```
-
-## 3.25.0 – 2025-04-15
-
-1. Roll out new `/flags` endpoint to 100% of `/decide` traffic, excluding the top 10 customers.
-
-## 3.24.3 – 2025-04-15
-
-1. Fix hash inclusion/exclusion for flag rollout
-
-## 3.24.2 – 2025-04-15
-
-1. Roll out new /flags endpoint to 10% of /decide traffic
-
-## 3.24.1 – 2025-04-11
-
-1. Add `log_captured_exceptions` option to proxy setup
-
-## 3.24.0 – 2025-04-10
-
-1. Add config option to `log_captured_exceptions`
-
-## 3.23.0 – 2025-03-26
-
-1. Expand automatic retries to include read errors (e.g. RemoteDisconnected)
-
-## 3.22.0 – 2025-03-26
-
-1. Add more information to `$feature_flag_called` events.
-2. Support for the `/decide?v=4` endpoint which contains more information about feature flags.
-
-## 3.21.0 – 2025-03-17
-
-1. Support serializing dataclasses.
-
-## 3.20.0 – 2025-03-13
-
-1. Add support for OpenAI Responses API.
-
-## 3.19.2 – 2025-03-11
-
-1. Fix install requirements for analytics package
-
-## 3.19.1 – 2025-03-11
-
-1. Fix bug where None is sent as delta in azure
-
-## 3.19.0 – 2025-03-04
-
-1. Add support for tool calls in OpenAI and Anthropic.
-2. Add support for cached tokens.
-
-## 3.18.1 – 2025-03-03
-
-1. Improve quota-limited feature flag logs
-
-## 3.18.0 - 2025-02-28
-
-1. Add support for Azure OpenAI.
-
-## 3.17.0 - 2025-02-27
-
-1. The LangChain handler now captures tools in `$ai_generation` events, in property `$ai_tools`. This allows for displaying tools provided to the LLM call in PostHog UI. Note that support for `$ai_tools` in OpenAI and Anthropic SDKs is coming soon.
-
-## 3.16.0 - 2025-02-26
-
-1. feat: add some platform info to events (#198)
-
-## 3.15.1 - 2025-02-23
-
-1. Fix async client support for OpenAI.
-
-## 3.15.0 - 2025-02-19
-
-1. Support quota-limited feature flags
-
-## 3.14.2 - 2025-02-19
-
-1. Evaluate feature flag payloads with case sensitivity correctly. Fixes <https://github.com/PostHog/posthog-python/issues/178>
-
-## 3.14.1 - 2025-02-18
-
-1. Add support for Bedrock Anthropic Usage
-
-## 3.13.0 - 2025-02-12
-
-1. Automatically retry connection errors
-
-## 3.12.1 - 2025-02-11
-
-1. Fix mypy support for 3.12.0
-2. Deprecate `is_simple_flag`
-
-## 3.12.0 - 2025-02-11
-
-1. Add support for OpenAI beta parse API.
-2. Deprecate `context` parameter
-
-## 3.11.1 - 2025-02-06
-
-1. Fix LangChain callback handler to capture parent run ID.
-
-## 3.11.0 - 2025-01-28
-
-1. Add the `$ai_span` event to the LangChain callback handler to capture the input and output of intermediary chains.
-
-   > LLM observability naming change: event property `$ai_trace_name` is now `$ai_span_name`.
-
-2. Fix serialiazation of Pydantic models in methods.
-
-## 3.10.0 - 2025-01-24
-
-1. Add `$ai_error` and `$ai_is_error` properties to LangChain callback handler, OpenAI, and Anthropic.
-
-## 3.9.3 - 2025-01-23
-
-1. Fix capturing of multiple traces in the LangChain callback handler.
-
-## 3.9.2 - 2025-01-22
-
-1. Fix importing of LangChain callback handler under certain circumstances.
-
-## 3.9.0 - 2025-01-22
-
-1. Add `$ai_trace` event emission to LangChain callback handler.
-
-## 3.8.4 - 2025-01-17
-
-1. Add Anthropic support for LLM Observability.
-2. Update LLM Observability to use output_choices.
-
-## 3.8.3 - 2025-01-14
-
-1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages for the `posthoganalytics` package.
-
-## 3.8.2 - 2025-01-14
-
-1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages.
-
-## 3.8.1 - 2025-01-14
-
-1. Add LLM Observability with support for OpenAI and Langchain callbacks.
-
-## 3.7.5 - 2025-01-03
-
-1. Add `distinct_id` to group_identify
-
-## 3.7.4 - 2024-11-25
-
-1. Fix bug where this SDK incorrectly sent feature flag events with null values when calling `get_feature_flag_payload`.
-
-## 3.7.3 - 2024-11-25
-
-1. Use personless mode when sending an exception without a provided `distinct_id`.
-
-## 3.7.2 - 2024-11-19
-
-1. Add `type` property to exception stacks.
-
-## 3.7.1 - 2024-10-24
-
-1. Add `platform` property to each frame of exception stacks.
-
-## 3.7.0 - 2024-10-03
-
-1. Adds a new `super_properties` parameter on the client that are appended to every /capture call.
-
-## 3.6.7 - 2024-09-24
-
-1. Remove deprecated datetime.utcnow() in favour of datetime.now(tz=tzutc())
-
-## 3.6.6 - 2024-09-16
-
-1. Fix manual capture support for in app frames
-
-## 3.6.5 - 2024-09-10
-
-1. Fix django integration support for manual exception capture.
-
-## 3.6.4 - 2024-09-05
-
-1. Add manual exception capture.
-
-## 3.6.3 - 2024-09-03
-
-1. Make sure setup.py for posthoganalytics package also discovers the new exception integration package.
-
-## 3.6.2 - 2024-09-03
-
-1. Make sure setup.py discovers the new exception integration package.
-
-## 3.6.1 - 2024-09-03
-
-1. Adds django integration to exception autocapture in alpha state. This feature is not yet stable and may change in future versions.
-
-## 3.6.0 - 2024-08-28
-
-1. Adds exception autocapture in alpha state. This feature is not yet stable and may change in future versions.
-
-## 3.5.2 - 2024-08-21
-
-1. Guard for None values in local evaluation
-
-## 3.5.1 - 2024-08-13
-
-1. Remove "-api" suffix from ingestion hostnames
-
-## 3.5.0 - 2024-02-29
-
-1. - Adds a new `feature_flags_request_timeout_seconds` timeout parameter for feature flags which defaults to 3 seconds, updated from the default 10s for all other API calls.
-
-## 3.4.2 - 2024-02-20
-
-1. Add `historical_migration` option for bulk migration to PostHog Cloud.
-
-## 3.4.1 - 2024-02-09
-
-1. Use new hosts for event capture as well
-
-## 3.4.0 - 2024-02-05
-
-1. Point given hosts to new ingestion hosts
-
-## 3.3.4 - 2024-01-30
-
-1. Update type hints for module variables to work with newer versions of mypy
-
-## 3.3.3 - 2024-01-26
-
-1. Remove new relative date operators, combine into regular date operators
-
-## 3.3.2 - 2024-01-19
-
-1. Return success/failure with all capture calls from module functions
-
-## 3.3.1 - 2024-01-10
-
-1. Make sure we don't override any existing feature flag properties when adding locally evaluated feature flag properties.
-
-## 3.3.0 - 2024-01-09
-
-1. When local evaluation is enabled, we automatically add flag information to all events sent to PostHog, whenever possible. This makes it easier to use these events in experiments.
-
-## 3.2.0 - 2024-01-09
-
-1. Numeric property handling for feature flags now does the expected: When passed in a number, we do a numeric comparison. When passed in a string, we do a string comparison. Previously, we always did a string comparison.
-2. Add support for relative date operators for local evaluation.
-
-## 3.1.0 - 2023-12-04
-
-1. Increase maximum event size and batch size
-
-## 3.0.2 - 2023-08-17
-
-1. Returns the current flag property with $feature_flag_called events, to make it easier to use in experiments
-
-## 3.0.1 - 2023-04-21
-
-1. Restore how feature flags work when the client library is disabled: All requests return `None` and no events are sent when the client is disabled.
-2. Add a `feature_flag_definitions()` debug option, which returns currently loaded feature flag definitions. You can use this to more cleverly decide when to request local evaluation of feature flags.
-
-## 3.0.0 - 2023-04-14
-
-Breaking change:
-
-All events by default now send the `$geoip_disable` property to disable geoip lookup in app. This is because usually we don't
-want to update person properties to take the server's location.
-
-The same now happens for feature flag requests, where we discard the IP address of the server for matching on geoip properties like city, country, continent.
-
-To restore previous behaviour, you can set the default to False like so:
-
-```python
-posthog.disable_geoip = False
-
-# // and if using client instantiation:
-posthog = Posthog('api_key', disable_geoip=False)
-
-```
-
-## 2.5.0 - 2023-04-10
-
-1. Add option for instantiating separate client object
-
-## 2.4.2 - 2023-03-30
-
-1. Update backoff dependency for posthoganalytics package to be the same as posthog package
-
-## 2.4.1 - 2023-03-17
-
-1. Removes accidental print call left in for decide response
-
-## 2.4.0 - 2023-03-14
-
-1. Support evaluating all cohorts in feature flags for local evaluation
-
-## 2.3.1 - 2023-02-07
-
-1. Log instead of raise error on posthog personal api key errors
-2. Remove upper bound on backoff dependency
-
-## 2.3.0 - 2023-01-31
-
-1. Add support for returning payloads of matched feature flags
-
-## 2.2.0 - 2022-11-14
-
-Changes:
-
-1. Add support for feature flag variant overrides with local evaluation
-
-## 2.1.2 - 2022-09-15
-
-Changes:
-
-1. Fixes issues with date comparison.
-
-## 2.1.1 - 2022-09-14
-
-Changes:
-
-1. Feature flags local evaluation now supports date property filters as well. Accepts both strings and datetime objects.
-
-## 2.1.0 - 2022-08-11
-
-Changes:
-
-1. Feature flag defaults have been removed
-2. Setup logging only when debug mode is enabled.
-
-## 2.0.1 - 2022-08-04
-
-- Make poll_interval configurable
-- Add `send_feature_flag_events` parameter to feature flag calls, which determine whether the `$feature_flag_called` event should be sent or not.
-- Add `only_evaluate_locally` parameter to feature flag calls, which determines whether the feature flag should only be evaluated locally or not.
-
-## 2.0.0 - 2022-08-02
-
-Breaking changes:
-
-1. The minimum version requirement for PostHog servers is now 1.38. If you're using PostHog Cloud, you satisfy this requirement automatically.
-2. Feature flag defaults apply only when there's an error fetching feature flag results. Earlier, if the default was set to `True`, even if a flag resolved to `False`, the default would override this.
-   **Note: These are removed in 2.0.2**
-3. Feature flag remote evaluation doesn't require a personal API key.
-
-New Changes:
-
-1. You can now evaluate feature flags locally (i.e. without sending a request to your PostHog servers) by setting a personal API key, and passing in groups and person properties to `is_feature_enabled` and `get_feature_flag` calls.
-2. Introduces a `get_all_flags` method that returns all feature flags. This is useful for when you want to seed your frontend with some initial flags, given a user ID.
-
-## 1.4.9 - 2022-06-13
-
-- Support for sending feature flags with capture calls
-
-## 1.4.8 - 2022-05-12
-
-- Support multi variate feature flags
-
-## 1.4.7 - 2022-04-25
-
-- Allow feature flags usage without project_api_key
-
-## 1.4.1 - 2021-05-28
-
-- Fix packaging issues with Sentry integrations
-
-## 1.4.0 - 2021-05-18
-
-- Improve support for `project_api_key` (#32)
-- Resolve polling issues with feature flags (#29)
-- Add Sentry (and Sentry+Django) integrations (#13)
-- Fix feature flag issue with no percentage rollout (#30)
-
-## 1.3.1 - 2021-05-07
-
-- Add `$set` and `$set_once` support (#23)
-- Add distinct ID to `$create_alias` event (#27)
-- Add `UUID` to `ID_TYPES` (#26)
-
-## 1.2.1 - 2021-02-05
-
-Initial release logged in CHANGELOG.md.
diff --git a/posthog/version.py b/posthog/version.py
index 05114bf5..fdc77316 100644
--- a/posthog/version.py
+++ b/posthog/version.py
@@ -1,4 +1,4 @@
-VERSION = "7.6.0"
+VERSION = "7.7.0"
 
 if __name__ == "__main__":
     print(VERSION, end="")  # noqa: T201

From df0fcc0f924484bc83ba612fbede67a8298d1619 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:02:58 +0000
Subject: [PATCH 06/24] fix: add openai_agents package to setuptools config

Without this, the module is not included in the distribution
and users get an ImportError after pip install.
---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index b0e69264..0e3e8001 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,9 +84,12 @@ packages = [
     "posthog.ai",
     "posthog.ai.langchain",
     "posthog.ai.openai",
+    "posthog.ai.openai_agents",
     "posthog.ai.anthropic",
     "posthog.ai.gemini",
     "posthog.test",
+    "posthog.test.ai",
+    "posthog.test.ai.openai_agents",
     "posthog.integrations",
 ]
 

From 6bf341ae115de0ee56ed2c4d890b4abab02fc22e Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:03:25 +0000
Subject: [PATCH 07/24] fix: correct indentation in on_trace_start properties
 dict

---
 posthog/ai/openai_agents/processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 07385d2c..c3e094fd 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -184,7 +184,7 @@ def on_trace_start(self, trace: Trace) -> None:
                 "$ai_trace_id": trace_id,
                 "$ai_trace_name": trace_name,
                 "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
+                "$ai_framework": "openai-agents",
             }
 
             # Include group_id for linking related traces (e.g., conversation threads)

From 71069ed03cecc8cd3789b6a5484f12f0df7d0eb0 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:06:36 +0000
Subject: [PATCH 08/24] fix: prevent unbounded growth of span/trace tracking
 dicts

Add max entry limit and eviction for _span_start_times and
_trace_metadata dicts. If on_span_end or on_trace_end is never
called (e.g., due to an SDK exception), these dicts could grow
indefinitely in long-running processes.
---
 posthog/ai/openai_agents/processor.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index c3e094fd..cc1c9bfd 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -105,6 +105,10 @@ def __init__(
         # Track trace metadata for associating with spans
         self._trace_metadata: Dict[str, Dict[str, Any]] = {}
 
+        # Max entries to prevent unbounded growth if on_span_end/on_trace_end
+        # is never called (e.g., due to an exception in the Agents SDK).
+        self._max_tracked_entries = 10000
+
     def _get_distinct_id(self, trace: Optional[Trace]) -> str:
         """Resolve the distinct ID for a trace."""
         if callable(self._distinct_id):
@@ -127,6 +131,22 @@ def _with_privacy_mode(self, value: Any) -> Any:
             return None
         return value
 
+    def _evict_stale_entries(self) -> None:
+        """Evict oldest entries if dicts exceed max size to prevent unbounded growth."""
+        if len(self._span_start_times) > self._max_tracked_entries:
+            # Remove oldest entries by start time
+            sorted_spans = sorted(self._span_start_times.items(), key=lambda x: x[1])
+            for span_id, _ in sorted_spans[: len(sorted_spans) // 2]:
+                del self._span_start_times[span_id]
+            log.debug("Evicted stale span start times (exceeded %d entries)", self._max_tracked_entries)
+
+        if len(self._trace_metadata) > self._max_tracked_entries:
+            # Remove half the entries (oldest inserted via dict ordering in Python 3.7+)
+            keys = list(self._trace_metadata.keys())
+            for key in keys[: len(keys) // 2]:
+                del self._trace_metadata[key]
+            log.debug("Evicted stale trace metadata (exceeded %d entries)", self._max_tracked_entries)
+
     def _get_group_id(self, trace_id: str) -> Optional[str]:
         """Get the group_id for a trace from stored metadata."""
         if trace_id in self._trace_metadata:
@@ -166,6 +186,7 @@ def _capture_event(
     def on_trace_start(self, trace: Trace) -> None:
         """Called when a new trace begins."""
         try:
+            self._evict_stale_entries()
             trace_id = trace.trace_id
             trace_name = trace.name
             group_id = getattr(trace, "group_id", None)
@@ -216,6 +237,7 @@ def on_trace_end(self, trace: Trace) -> None:
     def on_span_start(self, span: Span[Any]) -> None:
         """Called when a new span begins."""
         try:
+            self._evict_stale_entries()
             span_id = span.span_id
             self._span_start_times[span_id] = time.time()
         except Exception as e:

From 2f49c73581f18fc739a4d361116eef6e18e2829d Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:07:27 +0000
Subject: [PATCH 09/24] fix: resolve distinct_id from trace metadata in
 on_span_end

Previously on_span_end always called _get_distinct_id(None), which
meant callable distinct_id resolvers never received the trace object
for spans. Now the resolved distinct_id is stored at trace start and
looked up by trace_id during span end.
---
 posthog/ai/openai_agents/processor.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index cc1c9bfd..548b0b08 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -192,15 +192,16 @@ def on_trace_start(self, trace: Trace) -> None:
             group_id = getattr(trace, "group_id", None)
             metadata = getattr(trace, "metadata", None)
 
+            distinct_id = self._get_distinct_id(trace)
+
             # Store trace metadata for later (used by spans)
             self._trace_metadata[trace_id] = {
                 "name": trace_name,
                 "group_id": group_id,
                 "metadata": metadata,
+                "distinct_id": distinct_id,
             }
 
-            distinct_id = self._get_distinct_id(trace)
-
             properties = {
                 "$ai_trace_id": trace_id,
                 "$ai_trace_name": trace_name,
@@ -261,8 +262,9 @@ def on_span_end(self, span: Span[Any]) -> None:
                 ended = _parse_iso_timestamp(span.ended_at)
                 latency = (ended - started) if (started and ended) else 0
 
-            # Get distinct ID from trace metadata or default
-            distinct_id = self._get_distinct_id(None)
+            # Get distinct ID from trace metadata (resolved at trace start) or default
+            trace_info = self._trace_metadata.get(trace_id, {})
+            distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(None)
 
             # Get group_id from trace metadata for linking
             group_id = self._get_group_id(trace_id)

From 8d7a68dbb3733fc98d97556f2565bb42d41b4b51 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:16:13 +0000
Subject: [PATCH 10/24] refactor: extract _base_properties helper to reduce
 duplication

All span handlers repeated the same 6 base fields (trace_id, span_id,
parent_id, provider, framework, latency) plus the group_id conditional.
Extract into a shared helper to reduce ~100 lines of boilerplate.
---
 posthog/ai/openai_agents/processor.py | 145 ++++++--------------------
 1 file changed, 33 insertions(+), 112 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 548b0b08..a9ee6bd4 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -341,6 +341,29 @@ def on_span_end(self, span: Span[Any]) -> None:
         except Exception as e:
             log.debug(f"Error in on_span_end: {e}")
 
+    def _base_properties(
+        self,
+        trace_id: str,
+        span_id: str,
+        parent_id: Optional[str],
+        latency: float,
+        group_id: Optional[str],
+        error_properties: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """Build the base properties dict shared by all span handlers."""
+        properties = {
+            "$ai_trace_id": trace_id,
+            "$ai_span_id": span_id,
+            "$ai_parent_id": parent_id,
+            "$ai_provider": "openai",
+            "$ai_framework": "openai-agents",
+            "$ai_latency": latency,
+            **error_properties,
+        }
+        if group_id:
+            properties["$ai_group_id"] = group_id
+        return properties
+
     def _handle_generation_span(
         self,
         span_data: GenerationSpanData,
@@ -366,11 +389,7 @@ def _handle_generation_span(
                 model_params[param] = model_config[param]
 
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_model": span_data.model,
             "$ai_model_parameters": model_params if model_params else None,
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
@@ -378,14 +397,8 @@ def _handle_generation_span(
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
             "$ai_total_tokens": input_tokens + output_tokens,
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         # Add optional token fields if present
         if usage.get("reasoning_tokens"):
             properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"]
@@ -409,24 +422,13 @@ def _handle_function_span(
     ) -> None:
         """Handle function/tool call spans - maps to $ai_span event."""
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "tool",
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
             "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)),
             "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)),
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
-        # Add MCP data if present
         if span_data.mcp_data:
             properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data)
 
@@ -445,22 +447,11 @@ def _handle_agent_span(
     ) -> None:
         """Handle agent execution spans - maps to $ai_span event."""
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "agent",
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
-        # Add agent-specific metadata
         if span_data.handoffs:
             properties["$ai_agent_handoffs"] = span_data.handoffs
         if span_data.tools:
@@ -483,23 +474,13 @@ def _handle_handoff_span(
     ) -> None:
         """Handle agent handoff spans - maps to $ai_span event."""
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}",
             "$ai_span_type": "handoff",
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
             "$ai_handoff_from_agent": span_data.from_agent,
             "$ai_handoff_to_agent": span_data.to_agent,
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_guardrail_span(
@@ -515,22 +496,12 @@ def _handle_guardrail_span(
     ) -> None:
         """Handle guardrail execution spans - maps to $ai_span event."""
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "guardrail",
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
             "$ai_guardrail_triggered": span_data.triggered,
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_response_span(
@@ -560,25 +531,15 @@ def _handle_response_span(
         model = getattr(response, "model", None) if response else None
 
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_model": model,
             "$ai_response_id": response_id,
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
             "$ai_total_tokens": input_tokens + output_tokens,
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         # Extract output content from response
         if response:
             output_items = getattr(response, "output", None)
@@ -600,22 +561,12 @@ def _handle_custom_span(
     ) -> None:
         """Handle custom user-defined spans - maps to $ai_span event."""
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "custom",
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
             "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)),
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_audio_span(
@@ -633,21 +584,11 @@ def _handle_audio_span(
         span_type = span_data.type  # "transcription", "speech", or "speech_group"
 
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": span_type,
             "$ai_span_type": span_type,
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         # Add model info if available
         if hasattr(span_data, "model") and span_data.model:
             properties["$ai_model"] = span_data.model
@@ -690,23 +631,13 @@ def _handle_mcp_span(
     ) -> None:
         """Handle MCP (Model Context Protocol) spans - maps to $ai_span event."""
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": f"mcp:{span_data.server}",
             "$ai_span_type": "mcp_tools",
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
             "$ai_mcp_server": span_data.server,
             "$ai_mcp_tools": span_data.result,
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         self._capture_event("$ai_span", properties, distinct_id)
 
     def _handle_generic_span(
@@ -724,21 +655,11 @@ def _handle_generic_span(
         span_type = getattr(span_data, "type", "unknown")
 
         properties = {
-            "$ai_trace_id": trace_id,
-            "$ai_span_id": span_id,
-            "$ai_parent_id": parent_id,
+            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
             "$ai_span_name": span_type,
             "$ai_span_type": span_type,
-            "$ai_provider": "openai",
-            "$ai_framework": "openai-agents",
-            "$ai_latency": latency,
-            **error_properties,
         }
 
-        # Include group_id for linking related traces
-        if group_id:
-            properties["$ai_group_id"] = group_id
-
         # Try to export span data
         if hasattr(span_data, "export"):
             try:

From 27bd98c4bcabc2e28093305030570634b2a728d6 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:17:40 +0000
Subject: [PATCH 11/24] test: add missing edge case tests for openai agents
 processor

- test_generation_span_with_no_usage: zero tokens when usage is None
- test_generation_span_with_partial_usage: only input_tokens present
- test_error_type_categorization_by_type_field_only: type field without
  matching message content
- test_distinct_id_resolved_from_trace_for_spans: callable resolver
  uses trace context for span events
- test_eviction_of_stale_entries: memory leak prevention works
---
 .../test/ai/openai_agents/test_processor.py   | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index b5509340..2cf0bb9b 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -545,6 +545,88 @@ def test_force_flush_calls_client_flush(self, processor, mock_client):
         processor.force_flush()
         mock_client.flush.assert_called_once()
 
+    def test_generation_span_with_no_usage(self, processor, mock_client, mock_span):
+        """Test GenerationSpanData with no usage data defaults to zero tokens."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_input_tokens"] == 0
+        assert call_kwargs["properties"]["$ai_output_tokens"] == 0
+        assert call_kwargs["properties"]["$ai_total_tokens"] == 0
+
+    def test_generation_span_with_partial_usage(self, processor, mock_client, mock_span):
+        """Test GenerationSpanData with only input_tokens present."""
+        span_data = GenerationSpanData(
+            model="gpt-4o",
+            usage={"input_tokens": 42},
+        )
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_input_tokens"] == 42
+        assert call_kwargs["properties"]["$ai_output_tokens"] == 0
+        assert call_kwargs["properties"]["$ai_total_tokens"] == 42
+
+    def test_error_type_categorization_by_type_field_only(self, processor, mock_client, mock_span):
+        """Test error categorization works when only the type field matches."""
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+        mock_span.error = {"message": "Something went wrong", "type": "ModelBehaviorError"}
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
+
+    def test_distinct_id_resolved_from_trace_for_spans(self, mock_client, mock_trace, mock_span):
+        """Test that spans use the distinct_id resolved at trace start."""
+        resolver = lambda trace: f"user-{trace.name}"
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id=resolver,
+        )
+
+        # Start trace - this resolves and stores distinct_id
+        processor.on_trace_start(mock_trace)
+        mock_client.capture.reset_mock()
+
+        # End a span - should use the stored distinct_id from trace
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["distinct_id"] == "user-Test Workflow"
+
+    def test_eviction_of_stale_entries(self, mock_client):
+        """Test that stale entries are evicted when max is exceeded."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id="test-user",
+        )
+        processor._max_tracked_entries = 10
+
+        # Fill beyond max
+        for i in range(15):
+            processor._span_start_times[f"span_{i}"] = float(i)
+            processor._trace_metadata[f"trace_{i}"] = {"name": f"trace_{i}"}
+
+        processor._evict_stale_entries()
+
+        # Should have evicted half
+        assert len(processor._span_start_times) <= 10
+        assert len(processor._trace_metadata) <= 10
+
 
 class TestInstrumentHelper:
     """Tests for the instrument() convenience function."""

From 143ff91d8e9f76597e7f85a3582cd8dcaea4f0c5 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:20:09 +0000
Subject: [PATCH 12/24] fix: handle non-dict error_info in span error parsing

If span.error is a string instead of a dict, calling .get() would
raise AttributeError. Now falls back to str() for non-dict errors.
---
 posthog/ai/openai_agents/processor.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index a9ee6bd4..6f0830d5 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -273,8 +273,12 @@ def on_span_end(self, span: Span[Any]) -> None:
             error_info = span.error
             error_properties = {}
             if error_info:
-                error_message = error_info.get("message", str(error_info))
-                error_type_raw = error_info.get("type", "")
+                if isinstance(error_info, dict):
+                    error_message = error_info.get("message", str(error_info))
+                    error_type_raw = error_info.get("type", "")
+                else:
+                    error_message = str(error_info)
+                    error_type_raw = ""
 
                 # Categorize error type for cross-provider filtering/alerting
                 error_type = "unknown"

From ae519ef30377ce6aa92f1ed116194f2641150836 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:25:47 +0000
Subject: [PATCH 13/24] style: apply ruff formatting

---
 posthog/ai/openai_agents/processor.py         | 175 +++++++++++++++---
 .../test/ai/openai_agents/test_processor.py   | 100 +++++++---
 2 files changed, 222 insertions(+), 53 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 6f0830d5..459f4a90 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -138,14 +138,20 @@ def _evict_stale_entries(self) -> None:
             sorted_spans = sorted(self._span_start_times.items(), key=lambda x: x[1])
             for span_id, _ in sorted_spans[: len(sorted_spans) // 2]:
                 del self._span_start_times[span_id]
-            log.debug("Evicted stale span start times (exceeded %d entries)", self._max_tracked_entries)
+            log.debug(
+                "Evicted stale span start times (exceeded %d entries)",
+                self._max_tracked_entries,
+            )
 
         if len(self._trace_metadata) > self._max_tracked_entries:
             # Remove half the entries (oldest inserted via dict ordering in Python 3.7+)
             keys = list(self._trace_metadata.keys())
             for key in keys[: len(keys) // 2]:
                 del self._trace_metadata[key]
-            log.debug("Evicted stale trace metadata (exceeded %d entries)", self._max_tracked_entries)
+            log.debug(
+                "Evicted stale trace metadata (exceeded %d entries)",
+                self._max_tracked_entries,
+            )
 
     def _get_group_id(self, trace_id: str) -> Optional[str]:
         """Get the group_id for a trace from stored metadata."""
@@ -161,7 +167,9 @@ def _capture_event(
     ) -> None:
         """Capture an event to PostHog with error handling."""
         try:
-            if not hasattr(self._client, "capture") or not callable(self._client.capture):
+            if not hasattr(self._client, "capture") or not callable(
+                self._client.capture
+            ):
                 return
 
             final_distinct_id = distinct_id or "unknown"
@@ -282,7 +290,10 @@ def on_span_end(self, span: Span[Any]) -> None:
 
                 # Categorize error type for cross-provider filtering/alerting
                 error_type = "unknown"
-                if "ModelBehaviorError" in error_type_raw or "ModelBehaviorError" in error_message:
+                if (
+                    "ModelBehaviorError" in error_type_raw
+                    or "ModelBehaviorError" in error_message
+                ):
                     error_type = "model_behavior_error"
                 elif "UserError" in error_type_raw or "UserError" in error_message:
                     error_type = "user_error"
@@ -302,44 +313,116 @@ def on_span_end(self, span: Span[Any]) -> None:
             # Dispatch based on span data type
             if isinstance(span_data, GenerationSpanData):
                 self._handle_generation_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, FunctionSpanData):
                 self._handle_function_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, AgentSpanData):
                 self._handle_agent_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, HandoffSpanData):
                 self._handle_handoff_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, GuardrailSpanData):
                 self._handle_guardrail_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, ResponseSpanData):
                 self._handle_response_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, CustomSpanData):
                 self._handle_custom_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
-            elif isinstance(span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)):
+            elif isinstance(
+                span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)
+            ):
                 self._handle_audio_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             elif isinstance(span_data, MCPListToolsSpanData):
                 self._handle_mcp_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
             else:
                 # Unknown span type - capture as generic span
                 self._handle_generic_span(
-                    span_data, trace_id, span_id, parent_id, latency, distinct_id, group_id, error_properties
+                    span_data,
+                    trace_id,
+                    span_id,
+                    parent_id,
+                    latency,
+                    distinct_id,
+                    group_id,
+                    error_properties,
                 )
 
         except Exception as e:
@@ -388,12 +471,20 @@ def _handle_generation_span(
         # Extract model config parameters
         model_config = span_data.model_config or {}
         model_params = {}
-        for param in ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]:
+        for param in [
+            "temperature",
+            "max_tokens",
+            "top_p",
+            "frequency_penalty",
+            "presence_penalty",
+        ]:
             if param in model_config:
                 model_params[param] = model_config[param]
 
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_model": span_data.model,
             "$ai_model_parameters": model_params if model_params else None,
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
@@ -409,7 +500,9 @@ def _handle_generation_span(
         if usage.get("cache_read_input_tokens"):
             properties["$ai_cache_read_input_tokens"] = usage["cache_read_input_tokens"]
         if usage.get("cache_creation_input_tokens"):
-            properties["$ai_cache_creation_input_tokens"] = usage["cache_creation_input_tokens"]
+            properties["$ai_cache_creation_input_tokens"] = usage[
+                "cache_creation_input_tokens"
+            ]
 
         self._capture_event("$ai_generation", properties, distinct_id)
 
@@ -426,7 +519,9 @@ def _handle_function_span(
     ) -> None:
         """Handle function/tool call spans - maps to $ai_span event."""
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "tool",
             "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)),
@@ -451,7 +546,9 @@ def _handle_agent_span(
     ) -> None:
         """Handle agent execution spans - maps to $ai_span event."""
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "agent",
         }
@@ -478,7 +575,9 @@ def _handle_handoff_span(
     ) -> None:
         """Handle agent handoff spans - maps to $ai_span event."""
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}",
             "$ai_span_type": "handoff",
             "$ai_handoff_from_agent": span_data.from_agent,
@@ -500,7 +599,9 @@ def _handle_guardrail_span(
     ) -> None:
         """Handle guardrail execution spans - maps to $ai_span event."""
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "guardrail",
             "$ai_guardrail_triggered": span_data.triggered,
@@ -535,7 +636,9 @@ def _handle_response_span(
         model = getattr(response, "model", None) if response else None
 
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_model": model,
             "$ai_response_id": response_id,
             "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
@@ -548,7 +651,9 @@ def _handle_response_span(
         if response:
             output_items = getattr(response, "output", None)
             if output_items:
-                properties["$ai_output_choices"] = self._with_privacy_mode(_safe_json(output_items))
+                properties["$ai_output_choices"] = self._with_privacy_mode(
+                    _safe_json(output_items)
+                )
 
         self._capture_event("$ai_generation", properties, distinct_id)
 
@@ -565,7 +670,9 @@ def _handle_custom_span(
     ) -> None:
         """Handle custom user-defined spans - maps to $ai_span event."""
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "custom",
             "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)),
@@ -588,7 +695,9 @@ def _handle_audio_span(
         span_type = span_data.type  # "transcription", "speech", or "speech_group"
 
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": span_type,
             "$ai_span_type": span_type,
         }
@@ -612,7 +721,11 @@ def _handle_audio_span(
             properties["audio_output_format"] = span_data.output_format
 
         # Add text input for TTS
-        if hasattr(span_data, "input") and span_data.input and isinstance(span_data.input, str):
+        if (
+            hasattr(span_data, "input")
+            and span_data.input
+            and isinstance(span_data.input, str)
+        ):
             properties["$ai_input"] = self._with_privacy_mode(span_data.input)
 
         # Don't include audio data (base64) - just metadata
@@ -635,7 +748,9 @@ def _handle_mcp_span(
     ) -> None:
         """Handle MCP (Model Context Protocol) spans - maps to $ai_span event."""
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": f"mcp:{span_data.server}",
             "$ai_span_type": "mcp_tools",
             "$ai_mcp_server": span_data.server,
@@ -659,7 +774,9 @@ def _handle_generic_span(
         span_type = getattr(span_data, "type", "unknown")
 
         properties = {
-            **self._base_properties(trace_id, span_id, parent_id, latency, group_id, error_properties),
+            **self._base_properties(
+                trace_id, span_id, parent_id, latency, group_id, error_properties
+            ),
             "$ai_span_name": span_type,
             "$ai_span_type": span_type,
         }
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index 2cf0bb9b..37ec3733 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -158,7 +158,9 @@ def test_generation_span_mapping(self, processor, mock_client, mock_span):
             {"role": "assistant", "content": "Hi there!"}
         ]
 
-    def test_generation_span_with_reasoning_tokens(self, processor, mock_client, mock_span):
+    def test_generation_span_with_reasoning_tokens(
+        self, processor, mock_client, mock_span
+    ):
         """Test GenerationSpanData includes reasoning tokens when present."""
         span_data = GenerationSpanData(
             model="o1-preview",
@@ -193,7 +195,9 @@ def test_function_span_mapping(self, processor, mock_client, mock_span):
         assert call_kwargs["event"] == "$ai_span"
         assert call_kwargs["properties"]["$ai_span_name"] == "get_weather"
         assert call_kwargs["properties"]["$ai_span_type"] == "tool"
-        assert call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}'
+        assert (
+            call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}'
+        )
         assert call_kwargs["properties"]["$ai_output_state"] == "Sunny, 72F"
 
     def test_agent_span_mapping(self, processor, mock_client, mock_span):
@@ -323,7 +327,9 @@ def test_error_handling_in_span(self, processor, mock_client, mock_span):
         assert call_kwargs["properties"]["$ai_is_error"] is True
         assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded"
 
-    def test_generation_span_includes_total_tokens(self, processor, mock_client, mock_span):
+    def test_generation_span_includes_total_tokens(
+        self, processor, mock_client, mock_span
+    ):
         """Test that $ai_total_tokens is calculated and included."""
         span_data = GenerationSpanData(
             model="gpt-4o",
@@ -337,11 +343,16 @@ def test_generation_span_includes_total_tokens(self, processor, mock_client, moc
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$ai_total_tokens"] == 150
 
-    def test_error_type_categorization_model_behavior(self, processor, mock_client, mock_span):
+    def test_error_type_categorization_model_behavior(
+        self, processor, mock_client, mock_span
+    ):
         """Test that ModelBehaviorError is categorized correctly."""
         span_data = GenerationSpanData(model="gpt-4o")
         mock_span.span_data = span_data
-        mock_span.error = {"message": "ModelBehaviorError: Invalid JSON output", "type": "ModelBehaviorError"}
+        mock_span.error = {
+            "message": "ModelBehaviorError: Invalid JSON output",
+            "type": "ModelBehaviorError",
+        }
 
         processor.on_span_start(mock_span)
         processor.on_span_end(mock_span)
@@ -349,7 +360,9 @@ def test_error_type_categorization_model_behavior(self, processor, mock_client,
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
 
-    def test_error_type_categorization_user_error(self, processor, mock_client, mock_span):
+    def test_error_type_categorization_user_error(
+        self, processor, mock_client, mock_span
+    ):
         """Test that UserError is categorized correctly."""
         span_data = GenerationSpanData(model="gpt-4o")
         mock_span.span_data = span_data
@@ -361,31 +374,45 @@ def test_error_type_categorization_user_error(self, processor, mock_client, mock
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$ai_error_type"] == "user_error"
 
-    def test_error_type_categorization_input_guardrail(self, processor, mock_client, mock_span):
+    def test_error_type_categorization_input_guardrail(
+        self, processor, mock_client, mock_span
+    ):
         """Test that InputGuardrailTripwireTriggered is categorized correctly."""
         span_data = GenerationSpanData(model="gpt-4o")
         mock_span.span_data = span_data
-        mock_span.error = {"message": "InputGuardrailTripwireTriggered: Content blocked"}
+        mock_span.error = {
+            "message": "InputGuardrailTripwireTriggered: Content blocked"
+        }
 
         processor.on_span_start(mock_span)
         processor.on_span_end(mock_span)
 
         call_kwargs = mock_client.capture.call_args[1]
-        assert call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered"
+        assert (
+            call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered"
+        )
 
-    def test_error_type_categorization_output_guardrail(self, processor, mock_client, mock_span):
+    def test_error_type_categorization_output_guardrail(
+        self, processor, mock_client, mock_span
+    ):
         """Test that OutputGuardrailTripwireTriggered is categorized correctly."""
         span_data = GenerationSpanData(model="gpt-4o")
         mock_span.span_data = span_data
-        mock_span.error = {"message": "OutputGuardrailTripwireTriggered: Response blocked"}
+        mock_span.error = {
+            "message": "OutputGuardrailTripwireTriggered: Response blocked"
+        }
 
         processor.on_span_start(mock_span)
         processor.on_span_end(mock_span)
 
         call_kwargs = mock_client.capture.call_args[1]
-        assert call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered"
+        assert (
+            call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered"
+        )
 
-    def test_error_type_categorization_max_turns(self, processor, mock_client, mock_span):
+    def test_error_type_categorization_max_turns(
+        self, processor, mock_client, mock_span
+    ):
         """Test that MaxTurnsExceeded is categorized correctly."""
         span_data = GenerationSpanData(model="gpt-4o")
         mock_span.span_data = span_data
@@ -409,7 +436,9 @@ def test_error_type_categorization_unknown(self, processor, mock_client, mock_sp
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$ai_error_type"] == "unknown"
 
-    def test_response_span_with_output_and_total_tokens(self, processor, mock_client, mock_span):
+    def test_response_span_with_output_and_total_tokens(
+        self, processor, mock_client, mock_span
+    ):
         """Test ResponseSpanData includes output choices and total tokens."""
         # Create a mock response object
         mock_response = MagicMock()
@@ -433,10 +462,14 @@ def test_response_span_with_output_and_total_tokens(self, processor, mock_client
 
         assert call_kwargs["event"] == "$ai_generation"
         assert call_kwargs["properties"]["$ai_total_tokens"] == 35
-        assert call_kwargs["properties"]["$ai_output_choices"] == [{"type": "message", "content": "Hello!"}]
+        assert call_kwargs["properties"]["$ai_output_choices"] == [
+            {"type": "message", "content": "Hello!"}
+        ]
         assert call_kwargs["properties"]["$ai_response_id"] == "resp_123"
 
-    def test_speech_span_with_pass_through_properties(self, processor, mock_client, mock_span):
+    def test_speech_span_with_pass_through_properties(
+        self, processor, mock_client, mock_span
+    ):
         """Test SpeechSpanData includes pass-through properties."""
         span_data = SpeechSpanData(
             input="Hello, how can I help you?",
@@ -457,13 +490,20 @@ def test_speech_span_with_pass_through_properties(self, processor, mock_client,
         assert call_kwargs["properties"]["$ai_span_type"] == "speech"
         assert call_kwargs["properties"]["$ai_model"] == "tts-1"
         # Pass-through properties (no $ai_ prefix)
-        assert call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z"
+        assert (
+            call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z"
+        )
         assert call_kwargs["properties"]["audio_output_format"] == "pcm"
-        assert call_kwargs["properties"]["model_config"] == {"voice": "alloy", "speed": 1.0}
+        assert call_kwargs["properties"]["model_config"] == {
+            "voice": "alloy",
+            "speed": 1.0,
+        }
         # Text input should be captured
         assert call_kwargs["properties"]["$ai_input"] == "Hello, how can I help you?"
 
-    def test_transcription_span_with_pass_through_properties(self, processor, mock_client, mock_span):
+    def test_transcription_span_with_pass_through_properties(
+        self, processor, mock_client, mock_span
+    ):
         """Test TranscriptionSpanData includes pass-through properties."""
         span_data = TranscriptionSpanData(
             input="base64_audio_data",
@@ -486,7 +526,10 @@ def test_transcription_span_with_pass_through_properties(self, processor, mock_c
         assert call_kwargs["properties"]["audio_input_format"] == "pcm"
         assert call_kwargs["properties"]["model_config"] == {"language": "en"}
         # Transcription output should be captured
-        assert call_kwargs["properties"]["$ai_output_state"] == "This is the transcribed text."
+        assert (
+            call_kwargs["properties"]["$ai_output_state"]
+            == "This is the transcribed text."
+        )
 
     def test_latency_calculation(self, processor, mock_client, mock_span):
         """Test that latency is calculated correctly."""
@@ -558,7 +601,9 @@ def test_generation_span_with_no_usage(self, processor, mock_client, mock_span):
         assert call_kwargs["properties"]["$ai_output_tokens"] == 0
         assert call_kwargs["properties"]["$ai_total_tokens"] == 0
 
-    def test_generation_span_with_partial_usage(self, processor, mock_client, mock_span):
+    def test_generation_span_with_partial_usage(
+        self, processor, mock_client, mock_span
+    ):
         """Test GenerationSpanData with only input_tokens present."""
         span_data = GenerationSpanData(
             model="gpt-4o",
@@ -574,11 +619,16 @@ def test_generation_span_with_partial_usage(self, processor, mock_client, mock_s
         assert call_kwargs["properties"]["$ai_output_tokens"] == 0
         assert call_kwargs["properties"]["$ai_total_tokens"] == 42
 
-    def test_error_type_categorization_by_type_field_only(self, processor, mock_client, mock_span):
+    def test_error_type_categorization_by_type_field_only(
+        self, processor, mock_client, mock_span
+    ):
         """Test error categorization works when only the type field matches."""
         span_data = GenerationSpanData(model="gpt-4o")
         mock_span.span_data = span_data
-        mock_span.error = {"message": "Something went wrong", "type": "ModelBehaviorError"}
+        mock_span.error = {
+            "message": "Something went wrong",
+            "type": "ModelBehaviorError",
+        }
 
         processor.on_span_start(mock_span)
         processor.on_span_end(mock_span)
@@ -586,7 +636,9 @@ def test_error_type_categorization_by_type_field_only(self, processor, mock_clie
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
 
-    def test_distinct_id_resolved_from_trace_for_spans(self, mock_client, mock_trace, mock_span):
+    def test_distinct_id_resolved_from_trace_for_spans(
+        self, mock_client, mock_trace, mock_span
+    ):
         """Test that spans use the distinct_id resolved at trace start."""
         resolver = lambda trace: f"user-{trace.name}"
         processor = PostHogTracingProcessor(

From 789be8d45c3711f2136cd702e0b2250dd48c49b9 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 13:30:33 +0000
Subject: [PATCH 14/24] style: replace lambda assignments with def (ruff E731)

---
 posthog/test/ai/openai_agents/test_processor.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index 37ec3733..6759bf7d 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -89,7 +89,10 @@ def test_initialization(self, mock_client):
 
     def test_initialization_with_callable_distinct_id(self, mock_client, mock_trace):
         """Test processor with callable distinct_id resolver."""
-        resolver = lambda trace: trace.metadata.get("user_id", "default")
+
+        def resolver(trace):
+            return trace.metadata.get("user_id", "default")
+
         processor = PostHogTracingProcessor(
             client=mock_client,
             distinct_id=resolver,
@@ -640,7 +643,10 @@ def test_distinct_id_resolved_from_trace_for_spans(
         self, mock_client, mock_trace, mock_span
     ):
         """Test that spans use the distinct_id resolved at trace start."""
-        resolver = lambda trace: f"user-{trace.name}"
+
+        def resolver(trace):
+            return f"user-{trace.name}"
+
         processor = PostHogTracingProcessor(
             client=mock_client,
             distinct_id=resolver,

From b3c631ce8f78a18db51c65c539aa6d8c904d1ccb Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 14:03:09 +0000
Subject: [PATCH 15/24] fix: restore full CHANGELOG.md history

The rebase conflict resolution accidentally truncated the changelog
to only the most recent entries. Restored all historical entries.
---
 CHANGELOG.md | 729 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 729 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f674fdc0..466f2a44 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,3 +36,732 @@ When using OpenAI stored prompts, the model is defined in the OpenAI dashboard r
 # 7.4.0 - 2025-12-16
 
 feat: Add automatic retries for feature flag requests
+
+Feature flag API requests now automatically retry on transient failures:
+
+- Network errors (connection refused, DNS failures, timeouts)
+- Server errors (500, 502, 503, 504)
+- Up to 2 retries with exponential backoff (0.5s, 1s delays)
+
+Rate limit (429) and quota (402) errors are not retried.
+
+# 7.3.1 - 2025-12-06
+
+fix: remove unused $exception_message and $exception_type
+
+# 7.3.0 - 2025-12-05
+
+feat: improve code variables capture masking
+
+# 7.2.0 - 2025-12-01
+
+feat: add $feature_flag_evaluated_at properties to $feature_flag_called events
+
+# 7.1.0 - 2025-11-26
+
+Add support for the async version of Gemini.
+
+# 7.0.2 - 2025-11-18
+
+Add support for Python 3.14.
+Projects upgrading to Python 3.14 should ensure any Pydantic models passed into the SDK use Pydantic v2, as Pydantic v1 is not compatible with Python 3.14.
+
+# 7.0.1 - 2025-11-15
+
+Try to use repr() when formatting code variables
+
+# 7.0.0 - 2025-11-11
+
+NB Python 3.9 is no longer supported
+
+- chore(llma): update LLM provider SDKs to latest major versions
+  - openai: 1.102.0 → 2.7.1
+  - anthropic: 0.64.0 → 0.72.0
+  - google-genai: 1.32.0 → 1.49.0
+  - langchain-core: 0.3.75 → 1.0.3
+  - langchain-openai: 0.3.32 → 1.0.2
+  - langchain-anthropic: 0.3.19 → 1.0.1
+  - langchain-community: 0.3.29 → 0.4.1
+  - langgraph: 0.6.6 → 1.0.2
+
+# 6.9.3 - 2025-11-10
+
+- feat(ph-ai): PostHog properties dict in GenerationMetadata
+
+# 6.9.2 - 2025-11-10
+
+- fix(llma): fix cache token double subtraction in Langchain for non-Anthropic providers causing negative costs
+
+# 6.9.1 - 2025-11-07
+
+- fix(error-tracking): pass code variables config from init to client
+
+# 6.9.0 - 2025-11-06
+
+- feat(error-tracking): add local variables capture
+
+# 6.8.0 - 2025-11-03
+
+- feat(llma): send web search calls to be used for LLM cost calculations
+
+# 6.7.14 - 2025-11-03
+
+- fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355)
+- test(django): Add Django 5 integration test suite with real ASGI application testing async middleware behavior
+
+# 6.7.13 - 2025-11-02
+
+- fix(llma): cache cost calculation in the LangChain callback
+
+# 6.7.12 - 2025-11-02
+
+- fix(django): Restore process_exception method to capture view and downstream middleware exceptions (fixes #329)
+- fix(ai/langchain): Add LangChain 1.0+ compatibility for CallbackHandler imports (fixes #362)
+
+# 6.7.11 - 2025-10-28
+
+- feat(ai): Add `$ai_framework` property for framework integrations (e.g. LangChain)
+
+# 6.7.10 - 2025-10-24
+
+- fix(django): Make middleware truly hybrid - compatible with both sync (WSGI) and async (ASGI) Django stacks without breaking sync-only deployments
+
+# 6.7.9 - 2025-10-22
+
+- fix(flags): multi-condition flags with static cohorts returning wrong variants
+
+# 6.7.8 - 2025-10-16
+
+- fix(llma): missing async for OpenAI's streaming implementation
+
+# 6.7.7 - 2025-10-14
+
+- fix: remove deprecated attribute $exception_personURL from exception events
+
+# 6.7.6 - 2025-09-16
+
+- fix: don't sort condition sets with variant overrides to the top
+- fix: Prevent core Client methods from raising exceptions
+
+# 6.7.5 - 2025-09-16
+
+- feat: Django middleware now supports async request handling.
+
+# 6.7.4 - 2025-09-05
+
+- fix: Missing system prompts for some providers
+
+# 6.7.3 - 2025-09-04
+
+- fix: missing usage tokens in Gemini
+
+# 6.7.2 - 2025-09-03
+
+- fix: tool call results in streaming providers
+
+# 6.7.1 - 2025-09-01
+
+- fix: Add base64 inline image sanitization
+
+# 6.7.0 - 2025-08-26
+
+- feat: Add support for feature flag dependencies
+
+# 6.6.1 - 2025-08-21
+
+- fix: Prevent `NoneType` error when `group_properties` is `None`
+
+# 6.6.0 - 2025-08-15
+
+- feat: Add `flag_keys_to_evaluate` parameter to optimize feature flag evaluation performance by only evaluating specified flags
+- feat: Add `flag_keys_filter` option to `send_feature_flags` for selective flag evaluation in capture events
+
+# 6.5.0 - 2025-08-08
+
+- feat: Add `$context_tags` to an event to know which properties were included as tags
+
+# 6.4.1 - 2025-08-06
+
+- fix: Always pass project API key in `remote_config` requests for deterministic project routing
+
+# 6.4.0 - 2025-08-05
+
+- feat: support Vertex AI for Gemini
+
+# 6.3.4 - 2025-08-04
+
+- fix: set `$ai_tools` for all providers and `$ai_output_choices` for all non-streaming provider flows properly
+
+# 6.3.3 - 2025-08-01
+
+- fix: `get_feature_flag_result` now correctly returns FeatureFlagResult when payload is empty string instead of None
+
+# 6.3.2 - 2025-07-31
+
+- fix: Anthropic's tool calls are now handled properly
+
+# 6.3.0 - 2025-07-22
+
+- feat: Enhanced `send_feature_flags` parameter to accept `SendFeatureFlagsOptions` object for declarative control over local/remote evaluation and custom properties
+
+# 6.2.1 - 2025-07-21
+
+- feat: make `posthog_client` an optional argument in PostHog AI providers wrappers (`posthog.ai.*`), intuitively using the default client as the default
+
+# 6.1.1 - 2025-07-16
+
+- fix: correctly capture exceptions processed by Django from views or middleware
+
+# 6.1.0 - 2025-07-10
+
+- feat: decouple feature flag local evaluation from personal API keys; support decrypting remote config payloads without relying on the feature flags poller
+
+# 6.0.4 - 2025-07-09
+
+- fix: add POSTHOG_MW_CLIENT setting to django middleware, to support custom clients for exception capture.
+
+# 6.0.3 - 2025-07-07
+
+- feat: add a feature flag evaluation cache (local storage or redis) to support returning flag evaluations when the service is down
+
+# 6.0.2 - 2025-07-02
+
+- fix: send_feature_flags changed to default to false in `Client::capture_exception`
+
+# 6.0.1
+
+- fix: response `$process_person_profile` property when passed to capture
+
+# 6.0.0
+
+This release contains a number of major breaking changes:
+
+- feat: make distinct_id an optional parameter in posthog.capture and related functions
+- feat: make capture and related functions return `Optional[str]`, which is the UUID of the sent event, if it was sent
+- fix: remove `identify` (prefer `posthog.set()`), and `page` and `screen` (prefer `posthog.capture()`)
+- fix: delete exception-capture specific integrations module. Prefer the general-purpose django middleware as a replacement for the django `Integration`.
+
+To migrate to this version, you'll mostly just need to switch to using named keyword arguments, rather than positional ones. For example:
+
+```python
+# Old calling convention
+posthog.capture("user123", "button_clicked", {"button_id": "123"})
+# New calling convention
+posthog.capture(distinct_id="user123", event="button_clicked", properties={"button_id": "123"})
+
+# Better pattern
+with posthog.new_context():
+    posthog.identify_context("user123")
+
+    # The event name is the first argument, and can be passed positionally, or as a keyword argument in a later position
+    posthog.capture("button_pressed")
+```
+
+Generally, arguments are now appropriately typed, and docstrings have been updated. If something is unclear, please open an issue, or submit a PR!
+
+# 5.4.0 - 2025-06-20
+
+- feat: add support to session_id context on page method
+
+# 5.3.0 - 2025-06-19
+
+- fix: safely handle exception values
+
+# 5.2.0 - 2025-06-19
+
+- feat: construct artificial stack traces if no traceback is available on a captured exception
+
+## 5.1.0 - 2025-06-18
+
+- feat: session and distinct ID's can now be associated with contexts, and are used as such
+- feat: django http request middleware
+
+## 5.0.0 - 2025-06-16
+
+- fix: removed deprecated sentry integration
+
+## 4.10.0 - 2025-06-13
+
+- fix: no longer fail in autocapture.
+
+## 4.9.0 - 2025-06-13
+
+- feat(ai): track reasoning and cache tokens in the LangChain callback
+
+## 4.8.0 - 2025-06-10
+
+- fix: export scoped, rather than tracked, decorator
+- feat: allow use of contexts without error tracking
+
+## 4.7.0 - 2025-06-10
+
+- feat: add support for parse endpoint in responses API (no longer beta)
+
+## 4.6.2 - 2025-06-09
+
+- fix: replace `import posthog` with direct method imports
+
+## 4.6.1 - 2025-06-09
+
+- fix: replace `import posthog` in `posthoganalytics` package
+
+## 4.6.0 - 2025-06-09
+
+- feat: add additional user and request context to captured exceptions via the Django integration
+- feat: Add `setup()` function to initialise default client
+
+## 4.5.0 - 2025-06-09
+
+- feat: add before_send callback (#249)
+
+## 4.4.2- 2025-06-09
+
+- empty point release to fix release automation
+
+## 4.4.1 2025-06-09
+
+- empty point release to fix release automation
+
+## 4.4.0 - 2025-06-09
+
+- Use the new `/flags` endpoint for all feature flag evaluations (don't fall back to `/decide` at all)
+
+## 4.3.2 - 2025-06-06
+
+1. Add context management:
+
+- New context manager with `posthog.new_context()`
+- Tag functions: `posthog.tag()`, `posthog.get_tags()`, `posthog.clear_tags()`
+- Function decorator:
+  - `@posthog.scoped` - Creates context and captures exceptions thrown within the function
+- Automatic deduplication of exceptions to ensure each exception is only captured once
+
+2. fix: feature flag request use geoip_disable (#235)
+3. chore: pin actions versions (#210)
+4. fix: opinionated setup and clean fn fix (#240)
+5. fix: release action failed (#241)
+
+## 4.2.0 - 2025-05-22
+
+Add support for google gemini
+
+## 4.1.0 - 2025-05-22
+
+Moved ai openai package to a composition approach over inheritance.
+
+## 4.0.1 – 2025-04-29
+
+1. Remove deprecated `monotonic` library. Use Python's core `time.monotonic` function instead
+2. Clarify Python 3.9+ is required
+
+## 4.0.0 - 2025-04-24
+
+1. Added new method `get_feature_flag_result` which returns a `FeatureFlagResult` object. This object breaks down the result of a feature flag into its enabled state, variant, and payload. The benefit of this method is it allows you to retrieve the result of a feature flag and its payload in a single API call. You can call `get_value` on the result to get the value of the feature flag, which is the same value returned by `get_feature_flag` (aka the string `variant` if the flag is a multivariate flag or the `boolean` value if the flag is a boolean flag).
+
+Example:
+
+```python
+result = posthog.get_feature_flag_result("my-flag", "distinct_id")
+print(result.enabled)     # True or False
+print(result.variant)     # 'the-variant-value' or None
+print(result.payload)     # {'foo': 'bar'}
+print(result.get_value()) # 'the-variant-value' or True or False
+print(result.reason)      # 'matched condition set 2' (Not available for local evaluation)
+```
+
+Breaking change:
+
+1. `get_feature_flag_payload` now deserializes payloads from JSON strings to `Any`. Previously, it returned the payload as a JSON encoded string.
+
+Before:
+
+```python
+payload = get_feature_flag_payload('key', 'distinct_id') # "{\"some\": \"payload\"}"
+```
+
+After:
+
+```python
+payload = get_feature_flag_payload('key', 'distinct_id') # {"some": "payload"}
+```
+
+## 3.25.0 – 2025-04-15
+
+1. Roll out new `/flags` endpoint to 100% of `/decide` traffic, excluding the top 10 customers.
+
+## 3.24.3 – 2025-04-15
+
+1. Fix hash inclusion/exclusion for flag rollout
+
+## 3.24.2 – 2025-04-15
+
+1. Roll out new /flags endpoint to 10% of /decide traffic
+
+## 3.24.1 – 2025-04-11
+
+1. Add `log_captured_exceptions` option to proxy setup
+
+## 3.24.0 – 2025-04-10
+
+1. Add config option to `log_captured_exceptions`
+
+## 3.23.0 – 2025-03-26
+
+1. Expand automatic retries to include read errors (e.g. RemoteDisconnected)
+
+## 3.22.0 – 2025-03-26
+
+1. Add more information to `$feature_flag_called` events.
+2. Support for the `/decide?v=4` endpoint which contains more information about feature flags.
+
+## 3.21.0 – 2025-03-17
+
+1. Support serializing dataclasses.
+
+## 3.20.0 – 2025-03-13
+
+1. Add support for OpenAI Responses API.
+
+## 3.19.2 – 2025-03-11
+
+1. Fix install requirements for analytics package
+
+## 3.19.1 – 2025-03-11
+
+1. Fix bug where None is sent as delta in azure
+
+## 3.19.0 – 2025-03-04
+
+1. Add support for tool calls in OpenAI and Anthropic.
+2. Add support for cached tokens.
+
+## 3.18.1 – 2025-03-03
+
+1. Improve quota-limited feature flag logs
+
+## 3.18.0 - 2025-02-28
+
+1. Add support for Azure OpenAI.
+
+## 3.17.0 - 2025-02-27
+
+1. The LangChain handler now captures tools in `$ai_generation` events, in property `$ai_tools`. This allows for displaying tools provided to the LLM call in PostHog UI. Note that support for `$ai_tools` in OpenAI and Anthropic SDKs is coming soon.
+
+## 3.16.0 - 2025-02-26
+
+1. feat: add some platform info to events (#198)
+
+## 3.15.1 - 2025-02-23
+
+1. Fix async client support for OpenAI.
+
+## 3.15.0 - 2025-02-19
+
+1. Support quota-limited feature flags
+
+## 3.14.2 - 2025-02-19
+
+1. Evaluate feature flag payloads with case sensitivity correctly. Fixes <https://github.com/PostHog/posthog-python/issues/178>
+
+## 3.14.1 - 2025-02-18
+
+1. Add support for Bedrock Anthropic Usage
+
+## 3.13.0 - 2025-02-12
+
+1. Automatically retry connection errors
+
+## 3.12.1 - 2025-02-11
+
+1. Fix mypy support for 3.12.0
+2. Deprecate `is_simple_flag`
+
+## 3.12.0 - 2025-02-11
+
+1. Add support for OpenAI beta parse API.
+2. Deprecate `context` parameter
+
+## 3.11.1 - 2025-02-06
+
+1. Fix LangChain callback handler to capture parent run ID.
+
+## 3.11.0 - 2025-01-28
+
+1. Add the `$ai_span` event to the LangChain callback handler to capture the input and output of intermediary chains.
+
+   > LLM observability naming change: event property `$ai_trace_name` is now `$ai_span_name`.
+
+2. Fix serialiazation of Pydantic models in methods.
+
+## 3.10.0 - 2025-01-24
+
+1. Add `$ai_error` and `$ai_is_error` properties to LangChain callback handler, OpenAI, and Anthropic.
+
+## 3.9.3 - 2025-01-23
+
+1. Fix capturing of multiple traces in the LangChain callback handler.
+
+## 3.9.2 - 2025-01-22
+
+1. Fix importing of LangChain callback handler under certain circumstances.
+
+## 3.9.0 - 2025-01-22
+
+1. Add `$ai_trace` event emission to LangChain callback handler.
+
+## 3.8.4 - 2025-01-17
+
+1. Add Anthropic support for LLM Observability.
+2. Update LLM Observability to use output_choices.
+
+## 3.8.3 - 2025-01-14
+
+1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages for the `posthoganalytics` package.
+
+## 3.8.2 - 2025-01-14
+
+1. Fix setuptools to include the `posthog.ai.openai` and `posthog.ai.langchain` packages.
+
+## 3.8.1 - 2025-01-14
+
+1. Add LLM Observability with support for OpenAI and Langchain callbacks.
+
+## 3.7.5 - 2025-01-03
+
+1. Add `distinct_id` to group_identify
+
+## 3.7.4 - 2024-11-25
+
+1. Fix bug where this SDK incorrectly sent feature flag events with null values when calling `get_feature_flag_payload`.
+
+## 3.7.3 - 2024-11-25
+
+1. Use personless mode when sending an exception without a provided `distinct_id`.
+
+## 3.7.2 - 2024-11-19
+
+1. Add `type` property to exception stacks.
+
+## 3.7.1 - 2024-10-24
+
+1. Add `platform` property to each frame of exception stacks.
+
+## 3.7.0 - 2024-10-03
+
+1. Adds a new `super_properties` parameter on the client that are appended to every /capture call.
+
+## 3.6.7 - 2024-09-24
+
+1. Remove deprecated datetime.utcnow() in favour of datetime.now(tz=tzutc())
+
+## 3.6.6 - 2024-09-16
+
+1. Fix manual capture support for in app frames
+
+## 3.6.5 - 2024-09-10
+
+1. Fix django integration support for manual exception capture.
+
+## 3.6.4 - 2024-09-05
+
+1. Add manual exception capture.
+
+## 3.6.3 - 2024-09-03
+
+1. Make sure setup.py for posthoganalytics package also discovers the new exception integration package.
+
+## 3.6.2 - 2024-09-03
+
+1. Make sure setup.py discovers the new exception integration package.
+
+## 3.6.1 - 2024-09-03
+
+1. Adds django integration to exception autocapture in alpha state. This feature is not yet stable and may change in future versions.
+
+## 3.6.0 - 2024-08-28
+
+1. Adds exception autocapture in alpha state. This feature is not yet stable and may change in future versions.
+
+## 3.5.2 - 2024-08-21
+
+1. Guard for None values in local evaluation
+
+## 3.5.1 - 2024-08-13
+
+1. Remove "-api" suffix from ingestion hostnames
+
+## 3.5.0 - 2024-02-29
+
+1. - Adds a new `feature_flags_request_timeout_seconds` timeout parameter for feature flags which defaults to 3 seconds, updated from the default 10s for all other API calls.
+
+## 3.4.2 - 2024-02-20
+
+1. Add `historical_migration` option for bulk migration to PostHog Cloud.
+
+## 3.4.1 - 2024-02-09
+
+1. Use new hosts for event capture as well
+
+## 3.4.0 - 2024-02-05
+
+1. Point given hosts to new ingestion hosts
+
+## 3.3.4 - 2024-01-30
+
+1. Update type hints for module variables to work with newer versions of mypy
+
+## 3.3.3 - 2024-01-26
+
+1. Remove new relative date operators, combine into regular date operators
+
+## 3.3.2 - 2024-01-19
+
+1. Return success/failure with all capture calls from module functions
+
+## 3.3.1 - 2024-01-10
+
+1. Make sure we don't override any existing feature flag properties when adding locally evaluated feature flag properties.
+
+## 3.3.0 - 2024-01-09
+
+1. When local evaluation is enabled, we automatically add flag information to all events sent to PostHog, whenever possible. This makes it easier to use these events in experiments.
+
+## 3.2.0 - 2024-01-09
+
+1. Numeric property handling for feature flags now does the expected: When passed in a number, we do a numeric comparison. When passed in a string, we do a string comparison. Previously, we always did a string comparison.
+2. Add support for relative date operators for local evaluation.
+
+## 3.1.0 - 2023-12-04
+
+1. Increase maximum event size and batch size
+
+## 3.0.2 - 2023-08-17
+
+1. Returns the current flag property with $feature_flag_called events, to make it easier to use in experiments
+
+## 3.0.1 - 2023-04-21
+
+1. Restore how feature flags work when the client library is disabled: All requests return `None` and no events are sent when the client is disabled.
+2. Add a `feature_flag_definitions()` debug option, which returns currently loaded feature flag definitions. You can use this to more cleverly decide when to request local evaluation of feature flags.
+
+## 3.0.0 - 2023-04-14
+
+Breaking change:
+
+All events by default now send the `$geoip_disable` property to disable geoip lookup in app. This is because usually we don't
+want to update person properties to take the server's location.
+
+The same now happens for feature flag requests, where we discard the IP address of the server for matching on geoip properties like city, country, continent.
+
+To restore previous behaviour, you can set the default to False like so:
+
+```python
+posthog.disable_geoip = False
+
+# // and if using client instantiation:
+posthog = Posthog('api_key', disable_geoip=False)
+
+```
+
+## 2.5.0 - 2023-04-10
+
+1. Add option for instantiating separate client object
+
+## 2.4.2 - 2023-03-30
+
+1. Update backoff dependency for posthoganalytics package to be the same as posthog package
+
+## 2.4.1 - 2023-03-17
+
+1. Removes accidental print call left in for decide response
+
+## 2.4.0 - 2023-03-14
+
+1. Support evaluating all cohorts in feature flags for local evaluation
+
+## 2.3.1 - 2023-02-07
+
+1. Log instead of raise error on posthog personal api key errors
+2. Remove upper bound on backoff dependency
+
+## 2.3.0 - 2023-01-31
+
+1. Add support for returning payloads of matched feature flags
+
+## 2.2.0 - 2022-11-14
+
+Changes:
+
+1. Add support for feature flag variant overrides with local evaluation
+
+## 2.1.2 - 2022-09-15
+
+Changes:
+
+1. Fixes issues with date comparison.
+
+## 2.1.1 - 2022-09-14
+
+Changes:
+
+1. Feature flags local evaluation now supports date property filters as well. Accepts both strings and datetime objects.
+
+## 2.1.0 - 2022-08-11
+
+Changes:
+
+1. Feature flag defaults have been removed
+2. Setup logging only when debug mode is enabled.
+
+## 2.0.1 - 2022-08-04
+
+- Make poll_interval configurable
+- Add `send_feature_flag_events` parameter to feature flag calls, which determine whether the `$feature_flag_called` event should be sent or not.
+- Add `only_evaluate_locally` parameter to feature flag calls, which determines whether the feature flag should only be evaluated locally or not.
+
+## 2.0.0 - 2022-08-02
+
+Breaking changes:
+
+1. The minimum version requirement for PostHog servers is now 1.38. If you're using PostHog Cloud, you satisfy this requirement automatically.
+2. Feature flag defaults apply only when there's an error fetching feature flag results. Earlier, if the default was set to `True`, even if a flag resolved to `False`, the default would override this.
+   **Note: These are removed in 2.0.2**
+3. Feature flag remote evaluation doesn't require a personal API key.
+
+New Changes:
+
+1. You can now evaluate feature flags locally (i.e. without sending a request to your PostHog servers) by setting a personal API key, and passing in groups and person properties to `is_feature_enabled` and `get_feature_flag` calls.
+2. Introduces a `get_all_flags` method that returns all feature flags. This is useful for when you want to seed your frontend with some initial flags, given a user ID.
+
+## 1.4.9 - 2022-06-13
+
+- Support for sending feature flags with capture calls
+
+## 1.4.8 - 2022-05-12
+
+- Support multi variate feature flags
+
+## 1.4.7 - 2022-04-25
+
+- Allow feature flags usage without project_api_key
+
+## 1.4.1 - 2021-05-28
+
+- Fix packaging issues with Sentry integrations
+
+## 1.4.0 - 2021-05-18
+
+- Improve support for `project_api_key` (#32)
+- Resolve polling issues with feature flags (#29)
+- Add Sentry (and Sentry+Django) integrations (#13)
+- Fix feature flag issue with no percentage rollout (#30)
+
+## 1.3.1 - 2021-05-07
+
+- Add `$set` and `$set_once` support (#23)
+- Add distinct ID to `$create_alias` event (#27)
+- Add `UUID` to `ID_TYPES` (#26)
+
+## 1.2.1 - 2021-02-05
+
+Initial release logged in CHANGELOG.md.

From fea14207605a13ecb08f48b031648b9ad1202ca5 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 14:06:12 +0000
Subject: [PATCH 16/24] fix: preserve personless mode for trace-id fallback
 distinct IDs

When no distinct_id is provided, _get_distinct_id falls back to
trace_id or "unknown". Since these are non-None strings, the
$process_person_profile=False check in _capture_event never fired,
creating unwanted person profiles keyed by trace IDs.

Track whether the user explicitly provided a distinct_id and use
that flag to control personless mode, matching the pattern used
by the langchain and openai integrations.
---
 posthog/ai/openai_agents/processor.py         |  5 ++--
 .../test/ai/openai_agents/test_processor.py   | 23 +++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 459f4a90..6c61d10a 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -95,6 +95,7 @@ def __init__(
         """
         self._client = client or setup()
         self._distinct_id = distinct_id
+        self._has_user_distinct_id = distinct_id is not None
         self._privacy_mode = privacy_mode
         self._groups = groups or {}
         self._properties = properties or {}
@@ -178,8 +179,8 @@ def _capture_event(
                 **self._properties,
             }
 
-            # Don't process person profile if no distinct_id
-            if distinct_id is None:
+            # Don't create person profiles when using fallback IDs (trace_id, "unknown")
+            if not self._has_user_distinct_id:
                 final_properties["$process_person_profile"] = False
 
             self._client.capture(
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index 6759bf7d..a3c9367a 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -116,6 +116,29 @@ def test_on_trace_start(self, processor, mock_client, mock_trace):
         assert call_kwargs["properties"]["$ai_provider"] == "openai"
         assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
 
+    def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
+        """Test that events use personless mode when no distinct_id is provided."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+        )
+
+        processor.on_trace_start(mock_trace)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$process_person_profile"] is False
+
+    def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace):
+        """Test that events create person profiles when distinct_id is provided."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id="real-user",
+        )
+
+        processor.on_trace_start(mock_trace)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert "$process_person_profile" not in call_kwargs["properties"]
+
     def test_on_trace_end_clears_metadata(self, processor, mock_trace):
         """Test that on_trace_end clears stored trace metadata."""
         processor.on_trace_start(mock_trace)

From fb4f1da8d15efe836098471c5c0249a63274ca19 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 14:23:15 +0000
Subject: [PATCH 17/24] fix: restore changelog history and fix personless mode
 edge cases

Two fixes from bot review:

1. CHANGELOG.md was accidentally truncated to 38 lines during rebase
   conflict resolution. Restored all 767 lines of history.

2. Personless mode now follows the same pattern as langchain/openai
   integrations: _get_distinct_id returns None when no user-provided
   ID is available, and callers set $process_person_profile=False
   before falling back to trace_id. This covers the edge case where
   a callable distinct_id returns None.
---
 posthog/ai/openai_agents/processor.py         | 46 +++++++++++------
 .../test/ai/openai_agents/test_processor.py   | 51 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 17 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 6c61d10a..9b76000c 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -95,7 +95,6 @@ def __init__(
         """
         self._client = client or setup()
         self._distinct_id = distinct_id
-        self._has_user_distinct_id = distinct_id is not None
         self._privacy_mode = privacy_mode
         self._groups = groups or {}
         self._properties = properties or {}
@@ -110,19 +109,22 @@ def __init__(
         # is never called (e.g., due to an exception in the Agents SDK).
         self._max_tracked_entries = 10000
 
-    def _get_distinct_id(self, trace: Optional[Trace]) -> str:
-        """Resolve the distinct ID for a trace."""
+    def _get_distinct_id(self, trace: Optional[Trace]) -> Optional[str]:
+        """Resolve the distinct ID for a trace.
+
+        Returns the user-provided distinct ID (string or callable result),
+        or None if no user-provided ID is available. Callers should treat
+        None as a signal to use a fallback ID in personless mode.
+        """
         if callable(self._distinct_id):
             if trace:
                 result = self._distinct_id(trace)
                 if result:
                     return str(result)
-            return trace.trace_id if trace else "unknown"
+            return None
         elif self._distinct_id:
             return str(self._distinct_id)
-        elif trace:
-            return trace.trace_id
-        return "unknown"
+        return None
 
     def _with_privacy_mode(self, value: Any) -> Any:
         """Apply privacy mode redaction if enabled."""
@@ -166,25 +168,27 @@ def _capture_event(
         properties: Dict[str, Any],
         distinct_id: Optional[str] = None,
     ) -> None:
-        """Capture an event to PostHog with error handling."""
+        """Capture an event to PostHog with error handling.
+
+        Args:
+            distinct_id: The resolved distinct ID. When the user didn't provide
+                one, callers should pass ``user_distinct_id or fallback_id``
+                (matching the langchain/openai pattern) and separately set
+                ``$process_person_profile`` in properties.
+        """
         try:
             if not hasattr(self._client, "capture") or not callable(
                 self._client.capture
             ):
                 return
 
-            final_distinct_id = distinct_id or "unknown"
             final_properties = {
                 **properties,
                 **self._properties,
             }
 
-            # Don't create person profiles when using fallback IDs (trace_id, "unknown")
-            if not self._has_user_distinct_id:
-                final_properties["$process_person_profile"] = False
-
             self._client.capture(
-                distinct_id=final_distinct_id,
+                distinct_id=distinct_id or "unknown",
                 event=event,
                 properties=final_properties,
                 groups=self._groups,
@@ -226,9 +230,12 @@ def on_trace_start(self, trace: Trace) -> None:
             if metadata:
                 properties["$ai_trace_metadata"] = _safe_json(metadata)
 
+            if distinct_id is None:
+                properties["$process_person_profile"] = False
+
             self._capture_event(
                 event="$ai_trace",
-                distinct_id=distinct_id,
+                distinct_id=distinct_id or trace_id,
                 properties=properties,
             )
         except Exception as e:
@@ -271,7 +278,9 @@ def on_span_end(self, span: Span[Any]) -> None:
                 ended = _parse_iso_timestamp(span.ended_at)
                 latency = (ended - started) if (started and ended) else 0
 
-            # Get distinct ID from trace metadata (resolved at trace start) or default
+            # Get user-provided distinct ID from trace metadata (resolved at trace start).
+            # None means no user-provided ID — use trace_id as fallback in personless mode,
+            # matching the langchain/openai pattern: `distinct_id or trace_id`.
             trace_info = self._trace_metadata.get(trace_id, {})
             distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(None)
 
@@ -311,6 +320,11 @@ def on_span_end(self, span: Span[Any]) -> None:
                     "$ai_error_type": error_type,
                 }
 
+            # Personless mode: no user-provided distinct_id, fallback to trace_id
+            if distinct_id is None:
+                error_properties["$process_person_profile"] = False
+                distinct_id = trace_id
+
             # Dispatch based on span data type
             if isinstance(span_data, GenerationSpanData):
                 self._handle_generation_span(
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index a3c9367a..eafe83f0 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -117,7 +117,7 @@ def test_on_trace_start(self, processor, mock_client, mock_trace):
         assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
 
     def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
-        """Test that events use personless mode when no distinct_id is provided."""
+        """Test that trace events use personless mode when no distinct_id is provided."""
         processor = PostHogTracingProcessor(
             client=mock_client,
         )
@@ -126,6 +126,55 @@ def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
 
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$process_person_profile"] is False
+        # Should fallback to trace_id as the distinct_id
+        assert call_kwargs["distinct_id"] == mock_trace.trace_id
+
+    def test_personless_mode_for_spans_when_no_distinct_id(
+        self, mock_client, mock_trace, mock_span
+    ):
+        """Test that span events use personless mode when no distinct_id is provided."""
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+        )
+
+        processor.on_trace_start(mock_trace)
+        mock_client.capture.reset_mock()
+
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$process_person_profile"] is False
+        assert call_kwargs["distinct_id"] == mock_span.trace_id
+
+    def test_personless_mode_when_callable_returns_none(
+        self, mock_client, mock_trace, mock_span
+    ):
+        """Test personless mode when callable distinct_id returns None."""
+
+        def resolver(trace):
+            return None  # Simulate no user ID available
+
+        processor = PostHogTracingProcessor(
+            client=mock_client,
+            distinct_id=resolver,
+        )
+
+        processor.on_trace_start(mock_trace)
+        mock_client.capture.reset_mock()
+
+        span_data = GenerationSpanData(model="gpt-4o")
+        mock_span.span_data = span_data
+
+        processor.on_span_start(mock_span)
+        processor.on_span_end(mock_span)
+
+        call_kwargs = mock_client.capture.call_args[1]
+        assert call_kwargs["properties"]["$process_person_profile"] is False
+        assert call_kwargs["distinct_id"] == mock_span.trace_id
 
     def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace):
         """Test that events create person profiles when distinct_id is provided."""

From 61d43e3bd1b40a479df0a5dec8eecbf4ce02c3ef Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 20:25:03 +0000
Subject: [PATCH 18/24] fix: handle None token counts in generation span

Guard against input_tokens or output_tokens being None when computing
$ai_total_tokens to avoid TypeError.
---
 posthog/ai/openai_agents/processor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 9b76000c..29c2cf88 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -480,8 +480,8 @@ def _handle_generation_span(
         """Handle LLM generation spans - maps to $ai_generation event."""
         # Extract token usage
         usage = span_data.usage or {}
-        input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens", 0)
-        output_tokens = usage.get("output_tokens") or usage.get("completion_tokens", 0)
+        input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens") or 0
+        output_tokens = usage.get("output_tokens") or usage.get("completion_tokens") or 0
 
         # Extract model config parameters
         model_config = span_data.model_config or {}
@@ -506,7 +506,7 @@ def _handle_generation_span(
             "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
-            "$ai_total_tokens": input_tokens + output_tokens,
+            "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0),
         }
 
         # Add optional token fields if present

From b626a16b155586538deb0a61d805675dc0547e50 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 20:26:44 +0000
Subject: [PATCH 19/24] fix: check error_type_raw for all error categories

Check both error_type_raw and error_message for guardrail and
max_turns errors, consistent with how ModelBehaviorError and
UserError are already checked.
---
 posthog/ai/openai_agents/processor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 29c2cf88..d3cba1d0 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -307,11 +307,11 @@ def on_span_end(self, span: Span[Any]) -> None:
                     error_type = "model_behavior_error"
                 elif "UserError" in error_type_raw or "UserError" in error_message:
                     error_type = "user_error"
-                elif "InputGuardrailTripwireTriggered" in error_message:
+                elif "InputGuardrailTripwireTriggered" in error_type_raw or "InputGuardrailTripwireTriggered" in error_message:
                     error_type = "input_guardrail_triggered"
-                elif "OutputGuardrailTripwireTriggered" in error_message:
+                elif "OutputGuardrailTripwireTriggered" in error_type_raw or "OutputGuardrailTripwireTriggered" in error_message:
                     error_type = "output_guardrail_triggered"
-                elif "MaxTurnsExceeded" in error_message:
+                elif "MaxTurnsExceeded" in error_type_raw or "MaxTurnsExceeded" in error_message:
                     error_type = "max_turns_exceeded"
 
                 error_properties = {

From b4a2d8be2e68f492fd5bc8061c66133d226a03d3 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 20:28:46 +0000
Subject: [PATCH 20/24] fix: add type hints to instrument() function

---
 posthog/ai/openai_agents/__init__.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/posthog/ai/openai_agents/__init__.py b/posthog/ai/openai_agents/__init__.py
index 49e4186e..2c7f277c 100644
--- a/posthog/ai/openai_agents/__init__.py
+++ b/posthog/ai/openai_agents/__init__.py
@@ -1,3 +1,5 @@
+from typing import Any, Callable, Dict, Optional, Union
+
 try:
     import agents  # noqa: F401
 except ImportError:
@@ -11,12 +13,12 @@
 
 
 def instrument(
-    client=None,
-    distinct_id=None,
+    client: Optional["Client"] = None,
+    distinct_id: Optional[Union[str, Callable[["Trace"], Optional[str]]]] = None,
     privacy_mode: bool = False,
-    groups=None,
-    properties=None,
-):
+    groups: Optional[Dict[str, Any]] = None,
+    properties: Optional[Dict[str, Any]] = None,
+) -> PostHogTracingProcessor:
     """
     One-liner to instrument OpenAI Agents SDK with PostHog tracing.
 

From d4f4a3a7c168477fcede28ffa3009d320a47adac Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 20:29:26 +0000
Subject: [PATCH 21/24] refactor: rename _safe_json to _ensure_serializable for
 clarity

The function validates JSON serializability and falls back to str(),
not serializes. Rename and update docstring to make the contract clear.
---
 posthog/ai/openai_agents/processor.py | 31 ++++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index d3cba1d0..02fdf359 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -26,8 +26,13 @@
 log = logging.getLogger("posthog")
 
 
-def _safe_json(obj: Any) -> Any:
-    """Safely convert object to JSON-serializable format."""
+def _ensure_serializable(obj: Any) -> Any:
+    """Ensure an object is JSON-serializable, converting to str as fallback.
+
+    Returns the original object if it's already serializable (dict, list, str,
+    int, etc.), or str(obj) for non-serializable types so that downstream
+    json.dumps() calls won't fail.
+    """
     if obj is None:
         return None
     try:
@@ -228,7 +233,7 @@ def on_trace_start(self, trace: Trace) -> None:
 
             # Include trace metadata if present
             if metadata:
-                properties["$ai_trace_metadata"] = _safe_json(metadata)
+                properties["$ai_trace_metadata"] = _ensure_serializable(metadata)
 
             if distinct_id is None:
                 properties["$process_person_profile"] = False
@@ -502,8 +507,8 @@ def _handle_generation_span(
             ),
             "$ai_model": span_data.model,
             "$ai_model_parameters": model_params if model_params else None,
-            "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
-            "$ai_output_choices": self._with_privacy_mode(_safe_json(span_data.output)),
+            "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
+            "$ai_output_choices": self._with_privacy_mode(_ensure_serializable(span_data.output)),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
             "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0),
@@ -539,12 +544,12 @@ def _handle_function_span(
             ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "tool",
-            "$ai_input_state": self._with_privacy_mode(_safe_json(span_data.input)),
-            "$ai_output_state": self._with_privacy_mode(_safe_json(span_data.output)),
+            "$ai_input_state": self._with_privacy_mode(_ensure_serializable(span_data.input)),
+            "$ai_output_state": self._with_privacy_mode(_ensure_serializable(span_data.output)),
         }
 
         if span_data.mcp_data:
-            properties["$ai_mcp_data"] = _safe_json(span_data.mcp_data)
+            properties["$ai_mcp_data"] = _ensure_serializable(span_data.mcp_data)
 
         self._capture_event("$ai_span", properties, distinct_id)
 
@@ -656,7 +661,7 @@ def _handle_response_span(
             ),
             "$ai_model": model,
             "$ai_response_id": response_id,
-            "$ai_input": self._with_privacy_mode(_safe_json(span_data.input)),
+            "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
             "$ai_total_tokens": input_tokens + output_tokens,
@@ -667,7 +672,7 @@ def _handle_response_span(
             output_items = getattr(response, "output", None)
             if output_items:
                 properties["$ai_output_choices"] = self._with_privacy_mode(
-                    _safe_json(output_items)
+                    _ensure_serializable(output_items)
                 )
 
         self._capture_event("$ai_generation", properties, distinct_id)
@@ -690,7 +695,7 @@ def _handle_custom_span(
             ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "custom",
-            "$ai_custom_data": self._with_privacy_mode(_safe_json(span_data.data)),
+            "$ai_custom_data": self._with_privacy_mode(_ensure_serializable(span_data.data)),
         }
 
         self._capture_event("$ai_span", properties, distinct_id)
@@ -723,7 +728,7 @@ def _handle_audio_span(
 
         # Add model config if available (pass-through property)
         if hasattr(span_data, "model_config") and span_data.model_config:
-            properties["model_config"] = _safe_json(span_data.model_config)
+            properties["model_config"] = _ensure_serializable(span_data.model_config)
 
         # Add time to first audio byte for speech spans (pass-through property)
         if hasattr(span_data, "first_content_at") and span_data.first_content_at:
@@ -800,7 +805,7 @@ def _handle_generic_span(
         if hasattr(span_data, "export"):
             try:
                 exported = span_data.export()
-                properties["$ai_span_data"] = _safe_json(exported)
+                properties["$ai_span_data"] = _ensure_serializable(exported)
             except Exception:
                 pass
 

From 7a534de1a82630b5eff7343a7d8a80a904c9e8b0 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 20:36:17 +0000
Subject: [PATCH 22/24] refactor: emit $ai_trace at trace end instead of start

Move the $ai_trace event from on_trace_start to on_trace_end to
capture full metadata including latency, matching the LangChain
integration approach. on_trace_start now only stores metadata for
use by spans.
---
 posthog/ai/openai_agents/processor.py         | 36 ++++++++++++-------
 .../test/ai/openai_agents/test_processor.py   | 21 +++++++++--
 2 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 02fdf359..0c32b696 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -202,7 +202,7 @@ def _capture_event(
             log.debug(f"Failed to capture PostHog event: {e}")
 
     def on_trace_start(self, trace: Trace) -> None:
-        """Called when a new trace begins."""
+        """Called when a new trace begins. Stores metadata for spans; the $ai_trace event is emitted in on_trace_end."""
         try:
             self._evict_stale_entries()
             trace_id = trace.trace_id
@@ -212,13 +212,32 @@ def on_trace_start(self, trace: Trace) -> None:
 
             distinct_id = self._get_distinct_id(trace)
 
-            # Store trace metadata for later (used by spans)
+            # Store trace metadata for later (used by spans and on_trace_end)
             self._trace_metadata[trace_id] = {
                 "name": trace_name,
                 "group_id": group_id,
                 "metadata": metadata,
                 "distinct_id": distinct_id,
+                "start_time": time.time(),
             }
+        except Exception as e:
+            log.debug(f"Error in on_trace_start: {e}")
+
+    def on_trace_end(self, trace: Trace) -> None:
+        """Called when a trace completes. Emits the $ai_trace event with full metadata."""
+        try:
+            trace_id = trace.trace_id
+
+            # Pop stored metadata (also cleans up)
+            trace_info = self._trace_metadata.pop(trace_id, {})
+            trace_name = trace_info.get("name") or trace.name
+            group_id = trace_info.get("group_id") or getattr(trace, "group_id", None)
+            metadata = trace_info.get("metadata") or getattr(trace, "metadata", None)
+            distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(trace)
+
+            # Calculate trace-level latency
+            start_time = trace_info.get("start_time")
+            latency = (time.time() - start_time) if start_time else None
 
             properties = {
                 "$ai_trace_id": trace_id,
@@ -227,6 +246,9 @@ def on_trace_start(self, trace: Trace) -> None:
                 "$ai_framework": "openai-agents",
             }
 
+            if latency is not None:
+                properties["$ai_latency"] = latency
+
             # Include group_id for linking related traces (e.g., conversation threads)
             if group_id:
                 properties["$ai_group_id"] = group_id
@@ -243,16 +265,6 @@ def on_trace_start(self, trace: Trace) -> None:
                 distinct_id=distinct_id or trace_id,
                 properties=properties,
             )
-        except Exception as e:
-            log.debug(f"Error in on_trace_start: {e}")
-
-    def on_trace_end(self, trace: Trace) -> None:
-        """Called when a trace completes."""
-        try:
-            trace_id = trace.trace_id
-
-            # Clean up stored metadata
-            self._trace_metadata.pop(trace_id, None)
         except Exception as e:
             log.debug(f"Error in on_trace_end: {e}")
 
diff --git a/posthog/test/ai/openai_agents/test_processor.py b/posthog/test/ai/openai_agents/test_processor.py
index eafe83f0..99ad7b43 100644
--- a/posthog/test/ai/openai_agents/test_processor.py
+++ b/posthog/test/ai/openai_agents/test_processor.py
@@ -102,10 +102,18 @@ def resolver(trace):
         distinct_id = processor._get_distinct_id(mock_trace)
         assert distinct_id == "resolved-user"
 
-    def test_on_trace_start(self, processor, mock_client, mock_trace):
-        """Test that on_trace_start captures $ai_trace event."""
+    def test_on_trace_start_stores_metadata(self, processor, mock_client, mock_trace):
+        """Test that on_trace_start stores metadata but does not capture an event."""
         processor.on_trace_start(mock_trace)
 
+        mock_client.capture.assert_not_called()
+        assert mock_trace.trace_id in processor._trace_metadata
+
+    def test_on_trace_end_captures_ai_trace(self, processor, mock_client, mock_trace):
+        """Test that on_trace_end captures $ai_trace event."""
+        processor.on_trace_start(mock_trace)
+        processor.on_trace_end(mock_trace)
+
         mock_client.capture.assert_called_once()
         call_kwargs = mock_client.capture.call_args[1]
 
@@ -115,6 +123,7 @@ def test_on_trace_start(self, processor, mock_client, mock_trace):
         assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow"
         assert call_kwargs["properties"]["$ai_provider"] == "openai"
         assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
+        assert "$ai_latency" in call_kwargs["properties"]
 
     def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
         """Test that trace events use personless mode when no distinct_id is provided."""
@@ -123,6 +132,7 @@ def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
         )
 
         processor.on_trace_start(mock_trace)
+        processor.on_trace_end(mock_trace)
 
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["$process_person_profile"] is False
@@ -184,17 +194,20 @@ def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace)
         )
 
         processor.on_trace_start(mock_trace)
+        processor.on_trace_end(mock_trace)
 
         call_kwargs = mock_client.capture.call_args[1]
         assert "$process_person_profile" not in call_kwargs["properties"]
 
-    def test_on_trace_end_clears_metadata(self, processor, mock_trace):
+    def test_on_trace_end_clears_metadata(self, processor, mock_client, mock_trace):
         """Test that on_trace_end clears stored trace metadata."""
         processor.on_trace_start(mock_trace)
         assert mock_trace.trace_id in processor._trace_metadata
 
         processor.on_trace_end(mock_trace)
         assert mock_trace.trace_id not in processor._trace_metadata
+        # Also verify it captured the event
+        mock_client.capture.assert_called_once()
 
     def test_on_span_start_tracks_time(self, processor, mock_span):
         """Test that on_span_start records start time."""
@@ -630,6 +643,7 @@ def test_groups_included_in_events(self, mock_client, mock_trace, mock_span):
         )
 
         processor.on_trace_start(mock_trace)
+        processor.on_trace_end(mock_trace)
 
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["groups"] == {"company": "acme", "team": "engineering"}
@@ -643,6 +657,7 @@ def test_additional_properties_included(self, mock_client, mock_trace):
         )
 
         processor.on_trace_start(mock_trace)
+        processor.on_trace_end(mock_trace)
 
         call_kwargs = mock_client.capture.call_args[1]
         assert call_kwargs["properties"]["environment"] == "production"

From ea6cba36a09de39a8fd10de1b81fa58272e82c28 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 20:45:52 +0000
Subject: [PATCH 23/24] style: fix ruff formatting

---
 posthog/ai/openai_agents/processor.py | 35 +++++++++++++++++++++------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/posthog/ai/openai_agents/processor.py b/posthog/ai/openai_agents/processor.py
index 0c32b696..fcc51e45 100644
--- a/posthog/ai/openai_agents/processor.py
+++ b/posthog/ai/openai_agents/processor.py
@@ -324,11 +324,20 @@ def on_span_end(self, span: Span[Any]) -> None:
                     error_type = "model_behavior_error"
                 elif "UserError" in error_type_raw or "UserError" in error_message:
                     error_type = "user_error"
-                elif "InputGuardrailTripwireTriggered" in error_type_raw or "InputGuardrailTripwireTriggered" in error_message:
+                elif (
+                    "InputGuardrailTripwireTriggered" in error_type_raw
+                    or "InputGuardrailTripwireTriggered" in error_message
+                ):
                     error_type = "input_guardrail_triggered"
-                elif "OutputGuardrailTripwireTriggered" in error_type_raw or "OutputGuardrailTripwireTriggered" in error_message:
+                elif (
+                    "OutputGuardrailTripwireTriggered" in error_type_raw
+                    or "OutputGuardrailTripwireTriggered" in error_message
+                ):
                     error_type = "output_guardrail_triggered"
-                elif "MaxTurnsExceeded" in error_type_raw or "MaxTurnsExceeded" in error_message:
+                elif (
+                    "MaxTurnsExceeded" in error_type_raw
+                    or "MaxTurnsExceeded" in error_message
+                ):
                     error_type = "max_turns_exceeded"
 
                 error_properties = {
@@ -498,7 +507,9 @@ def _handle_generation_span(
         # Extract token usage
         usage = span_data.usage or {}
         input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens") or 0
-        output_tokens = usage.get("output_tokens") or usage.get("completion_tokens") or 0
+        output_tokens = (
+            usage.get("output_tokens") or usage.get("completion_tokens") or 0
+        )
 
         # Extract model config parameters
         model_config = span_data.model_config or {}
@@ -520,7 +531,9 @@ def _handle_generation_span(
             "$ai_model": span_data.model,
             "$ai_model_parameters": model_params if model_params else None,
             "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
-            "$ai_output_choices": self._with_privacy_mode(_ensure_serializable(span_data.output)),
+            "$ai_output_choices": self._with_privacy_mode(
+                _ensure_serializable(span_data.output)
+            ),
             "$ai_input_tokens": input_tokens,
             "$ai_output_tokens": output_tokens,
             "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0),
@@ -556,8 +569,12 @@ def _handle_function_span(
             ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "tool",
-            "$ai_input_state": self._with_privacy_mode(_ensure_serializable(span_data.input)),
-            "$ai_output_state": self._with_privacy_mode(_ensure_serializable(span_data.output)),
+            "$ai_input_state": self._with_privacy_mode(
+                _ensure_serializable(span_data.input)
+            ),
+            "$ai_output_state": self._with_privacy_mode(
+                _ensure_serializable(span_data.output)
+            ),
         }
 
         if span_data.mcp_data:
@@ -707,7 +724,9 @@ def _handle_custom_span(
             ),
             "$ai_span_name": span_data.name,
             "$ai_span_type": "custom",
-            "$ai_custom_data": self._with_privacy_mode(_ensure_serializable(span_data.data)),
+            "$ai_custom_data": self._with_privacy_mode(
+                _ensure_serializable(span_data.data)
+            ),
         }
 
         self._capture_event("$ai_span", properties, distinct_id)

From f239609954ba67351a565c433750ed91f5ec49b0 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 27 Jan 2026 21:11:12 +0000
Subject: [PATCH 24/24] fix: add TYPE_CHECKING imports for type hints in
 instrument()

---
 posthog/ai/openai_agents/__init__.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/posthog/ai/openai_agents/__init__.py b/posthog/ai/openai_agents/__init__.py
index 2c7f277c..2e1611cc 100644
--- a/posthog/ai/openai_agents/__init__.py
+++ b/posthog/ai/openai_agents/__init__.py
@@ -1,4 +1,11 @@
-from typing import Any, Callable, Dict, Optional, Union
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
+
+if TYPE_CHECKING:
+    from agents.tracing import Trace
+
+    from posthog.client import Client
 
 try:
     import agents  # noqa: F401
@@ -13,8 +20,8 @@
 
 
 def instrument(
-    client: Optional["Client"] = None,
-    distinct_id: Optional[Union[str, Callable[["Trace"], Optional[str]]]] = None,
+    client: Optional[Client] = None,
+    distinct_id: Optional[Union[str, Callable[[Trace], Optional[str]]]] = None,
     privacy_mode: bool = False,
     groups: Optional[Dict[str, Any]] = None,
     properties: Optional[Dict[str, Any]] = None,