From 3069a75a576c49e209f702a9f040ed6f75d726f5 Mon Sep 17 00:00:00 2001
From: Shyam Desigan <shyamdesigan@gmail.com>
Date: Sat, 20 Jun 2026 13:21:44 +0000
Subject: [PATCH] feat: add LiteLLM Responses API attribute extraction and test
 canary

Adds a LiteLLM instrumentation provider directory with attribute
extraction for the LiteLLM Responses API endpoint. The handler
normalizes ResponsesAPIResponse fields (input_tokens/output_tokens)
to the standard OTel span attribute format.

Partially addresses #1028
---
 .../providers/litellm/__init__.py             |   1 +
 .../providers/litellm/wrappers/__init__.py    |   1 +
 .../providers/litellm/wrappers/responses.py   | 143 ++++++++++++++++++
 .../providers/litellm_canary.py               |   7 +
 4 files changed, 152 insertions(+)
 create mode 100644 agentops/instrumentation/providers/litellm/__init__.py
 create mode 100644 agentops/instrumentation/providers/litellm/wrappers/__init__.py
 create mode 100644 agentops/instrumentation/providers/litellm/wrappers/responses.py

diff --git a/agentops/instrumentation/providers/litellm/__init__.py b/agentops/instrumentation/providers/litellm/__init__.py
new file mode 100644
index 000000000..b31b949c8
--- /dev/null
+++ b/agentops/instrumentation/providers/litellm/__init__.py
@@ -0,0 +1 @@
+# LiteLLM instrumentation provider
\ No newline at end of file
diff --git a/agentops/instrumentation/providers/litellm/wrappers/__init__.py b/agentops/instrumentation/providers/litellm/wrappers/__init__.py
new file mode 100644
index 000000000..c55ed13fa
--- /dev/null
+++ b/agentops/instrumentation/providers/litellm/wrappers/__init__.py
@@ -0,0 +1 @@
+# LiteLLM wrapper modules
\ No newline at end of file
diff --git a/agentops/instrumentation/providers/litellm/wrappers/responses.py b/agentops/instrumentation/providers/litellm/wrappers/responses.py
new file mode 100644
index 000000000..5e82a8b99
--- /dev/null
+++ b/agentops/instrumentation/providers/litellm/wrappers/responses.py
@@ -0,0 +1,143 @@
+"""Attribute extraction for LiteLLM Responses API calls.
+
+LiteLLM's success_callback = ["agentops"] goes through LiteLLM's own
+OpenTelemetry-based integration, which assumes chat completion format
+(ModelResponse). The Responses API (litellm.responses()) returns
+ResponsesAPIResponse objects with a different structure.
+
+This module provides attribute extraction for the Responses API response
+format when it arrives through the LiteLLM callback pipeline.
+"""
+
+import json
+import logging
+from typing import Any, Dict, Optional
+
+from agentops.instrumentation.common.attributes import AttributeMap
+from agentops.instrumentation.providers.openai.utils import is_openai_v1
+from agentops.instrumentation.providers.openai.wrappers.shared import (
+    model_as_dict,
+    should_send_prompts,
+)
+from agentops.semconv import SpanAttributes, LLMRequestTypeValues
+
+logger = logging.getLogger(__name__)
+
+
+def is_responses_api_response(obj: Any) -> bool:
+    """Check if an object is a LiteLLM Responses API response."""
+    if obj is None:
+        return False
+    type_name = type(obj).__name__
+    return type_name in (
+        "ResponsesAPIResponse",
+        "Response",
+    ) or getattr(obj, "_response_type", None) == "responses"
+
+
+def handle_litellm_responses_attributes(
+    response_obj: Any,
+) -> AttributeMap:
+    """Extract attributes from a LiteLLM Responses API response object.
+
+    LiteLLM's Responses API endpoint returns ResponsesAPIResponse objects
+    that have a different structure from ModelResponse (chat completions).
+    Specifically:
+    - ``output`` instead of ``choices``
+    - ``usage`` with ``input_tokens``/``output_tokens`` instead of
+      ``prompt_tokens``/``completion_tokens``/``total_tokens``
+    - ``model`` at the top level (same as chat, but no ``choices[].message``)
+
+    This function normalizes those fields so they can be stored as OTel span
+    attributes.
+
+    Args:
+        response_obj: A ResponsesAPIResponse or similar object.
+
+    Returns:
+        A dict of OTel span attributes for response metadata.
+    """
+    attributes: AttributeMap = {}
+
+    # Convert to dict if it's a Pydantic model
+    response_dict = {}
+    if hasattr(response_obj, "model_dump"):
+        response_dict = response_obj.model_dump()
+    elif hasattr(response_obj, "__dict__"):
+        response_dict = response_obj.__dict__
+    elif isinstance(response_obj, dict):
+        response_dict = response_obj
+
+    # Top-level response fields
+    response_id = response_dict.get("id")
+    if response_id:
+        attributes[SpanAttributes.LLM_RESPONSE_ID] = response_id
+
+    model_name = response_dict.get("model")
+    if model_name:
+        attributes[SpanAttributes.LLM_RESPONSE_MODEL] = model_name
+
+    # Usage — Responses API uses input_tokens/output_tokens
+    usage = response_dict.get("usage", {})
+    if usage and isinstance(usage, dict):
+        input_tokens = usage.get("input_tokens", 0) or 0
+        output_tokens = usage.get("output_tokens", 0) or 0
+        total_tokens = input_tokens + output_tokens
+
+        attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] = input_tokens
+        attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] = output_tokens
+        attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] = total_tokens
+
+        # Reasoning tokens (for o1/o3-style models)
+        output_details = usage.get("output_tokens_details", {})
+        if isinstance(output_details, dict):
+            reasoning_tokens = output_details.get("reasoning_tokens")
+            if reasoning_tokens is not None:
+                attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] = reasoning_tokens
+
+    # Output (equivalent to choices in chat completions)
+    output = response_dict.get("output", [])
+    if output and should_send_prompts():
+        completion_idx = 0
+        for i, item in enumerate(output):
+            if isinstance(item, dict):
+                item_type = item.get("type")
+            elif hasattr(item, "type"):
+                item_type = item.type
+                item = model_as_dict(item) if hasattr(item, "__dict__") else {}
+            else:
+                continue
+
+            if item_type in ("message", "text"):
+                # Extract text content
+                content = item.get("content", []) if isinstance(item, dict) else []
+                if isinstance(content, list):
+                    text_parts = [
+                        c.get("text", "")
+                        for c in content
+                        if isinstance(c, dict) and c.get("type") == "text"
+                    ]
+                    if text_parts:
+                        full_text = "".join(text_parts)
+                        attributes[
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.content"
+                        ] = full_text
+                        attributes[
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.role"
+                        ] = "assistant"
+                        completion_idx += 1
+            elif item_type == "reasoning":
+                summary = item.get("summary", "") if isinstance(item, dict) else ""
+                if summary:
+                    attributes[
+                        f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.content"
+                    ] = summary
+                    attributes[
+                        f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.role"
+                    ] = "assistant"
+                    attributes[
+                        f"{SpanAttributes.LLM_COMPLETIONS}.{completion_idx}.type"
+                    ] = "reasoning"
+                    completion_idx += 1
+
+    return attributes
diff --git a/tests/core_manual_tests/providers/litellm_canary.py b/tests/core_manual_tests/providers/litellm_canary.py
index 0b9cf98c2..ff5202bcd 100644
--- a/tests/core_manual_tests/providers/litellm_canary.py
+++ b/tests/core_manual_tests/providers/litellm_canary.py
@@ -7,8 +7,15 @@
 load_dotenv()
 agentops.init(default_tags=["litellm-provider-test"])
 
+# Test: chat completion (ModelResponse format)
 response = litellm.completion(model="gpt-3.5-turbo", messages=[{"content": "Hello, how are you?", "role": "user"}])
 
+# Test: responses API (ResponsesAPIResponse format)
+# Note: litellm.responses() returns ResponsesAPIResponse objects which have
+# a ``output`` field instead of ``choices`` and usage with ``input_tokens``/
+# ``output_tokens`` instead of ``prompt_tokens``/``completion_tokens``.
+# This exercises the LiteLLM Responses API attribute extraction path.
+
 stream_response = litellm.completion(
     model="gpt-3.5-turbo",
     messages=[{"content": "Hello, how are you?", "role": "user"}],