From ceee294e65565f518fa8b2f8683b8f82c22d1247 Mon Sep 17 00:00:00 2001 From: Google Team Member Date: Wed, 17 Jun 2026 12:54:14 -0700 Subject: [PATCH] feat: Update token usage reporting to include thoughts and cache tokens PiperOrigin-RevId: 933874815 --- .../com/google/adk/telemetry/Tracing.java | 21 +++++++++- .../adk/telemetry/ContextPropagationTest.java | 40 +++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/com/google/adk/telemetry/Tracing.java b/core/src/main/java/com/google/adk/telemetry/Tracing.java index 97e69d08b..f901484ea 100644 --- a/core/src/main/java/com/google/adk/telemetry/Tracing.java +++ b/core/src/main/java/com/google/adk/telemetry/Tracing.java @@ -114,6 +114,10 @@ public class Tracing { AttributeKey.longKey("gen_ai.usage.input_tokens"); private static final AttributeKey GEN_AI_USAGE_OUTPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.output_tokens"); + private static final AttributeKey GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = + AttributeKey.longKey("gen_ai.usage.cache_read.input_tokens"); + private static final AttributeKey GEN_AI_USAGE_REASONING_OUTPUT_TOKENS = + AttributeKey.longKey("gen_ai.usage.reasoning.output_tokens"); private static final AttributeKey ADK_TOOL_CALL_ARGS = AttributeKey.stringKey("gcp.vertex.agent.tool_call_args"); @@ -335,10 +339,23 @@ public static void traceCallLlm( usage .promptTokenCount() .ifPresent(tokens -> span.setAttribute(GEN_AI_USAGE_INPUT_TOKENS, (long) tokens)); + // According to OpenTelemetry Semantic Conventions: + // https://github.com/open-telemetry/semantic-conventions/blob/v1.41.0/docs/registry/attributes/gen-ai.md + // gen_ai.usage.reasoning.output_tokens (thoughts_token_count) SHOULD be included in + // gen_ai.usage.output_tokens. + Optional candidates = usage.candidatesTokenCount(); + Optional thoughts = usage.thoughtsTokenCount(); + if (candidates.isPresent() || thoughts.isPresent()) { + span.setAttribute( + GEN_AI_USAGE_OUTPUT_TOKENS, (long) candidates.orElse(0) + thoughts.orElse(0)); + } + thoughts.ifPresent( + tokens -> span.setAttribute(GEN_AI_USAGE_REASONING_OUTPUT_TOKENS, (long) tokens)); usage - .candidatesTokenCount() + .cachedContentTokenCount() .ifPresent( - tokens -> span.setAttribute(GEN_AI_USAGE_OUTPUT_TOKENS, (long) tokens)); + tokens -> + span.setAttribute(GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, (long) tokens)); }); llmResponse .finishReason() diff --git a/core/src/test/java/com/google/adk/telemetry/ContextPropagationTest.java b/core/src/test/java/com/google/adk/telemetry/ContextPropagationTest.java index 331ae77b2..9b3a42ad8 100644 --- a/core/src/test/java/com/google/adk/telemetry/ContextPropagationTest.java +++ b/core/src/test/java/com/google/adk/telemetry/ContextPropagationTest.java @@ -358,6 +358,46 @@ public void testTraceCallLlm() { assertTrue(attrs.get(AttributeKey.stringKey("gcp.vertex.agent.llm_response")).contains("STOP")); } + @Test + public void testTraceCallLlm_withReasoningAndCacheTokens() { + Span span = tracer.spanBuilder("test-reasoning").startSpan(); + try (Scope scope = span.makeCurrent()) { + LlmRequest llmRequest = + LlmRequest.builder() + .model("gemini-pro") + .contents(ImmutableList.of(Content.fromParts(Part.fromText("hello")))) + .config(GenerateContentConfig.builder().topP(0.9f).maxOutputTokens(100).build()) + .build(); + LlmResponse llmResponse = + LlmResponse.builder() + .content(Content.builder().parts(Part.fromText("world")).build()) + .finishReason(new FinishReason(FinishReason.Known.STOP)) + .usageMetadata( + GenerateContentResponseUsageMetadata.builder() + .promptTokenCount(10) + .cachedContentTokenCount(5) + .candidatesTokenCount(20) + .thoughtsTokenCount(15) + .totalTokenCount(50) + .build()) + .build(); + Tracing.traceCallLlm( + span, buildInvocationContext(), "event-1", llmRequest, llmResponse, null); + } finally { + span.end(); + } + List spans = openTelemetryRule.getSpans(); + assertThat(spans).hasSize(1); + SpanData spanData = spans.get(0); + Attributes attrs = spanData.getAttributes(); + assertEquals(10L, (long) attrs.get(AttributeKey.longKey("gen_ai.usage.input_tokens"))); + assertEquals(35L, (long) attrs.get(AttributeKey.longKey("gen_ai.usage.output_tokens"))); + assertEquals( + 5L, (long) attrs.get(AttributeKey.longKey("gen_ai.usage.cache_read.input_tokens"))); + assertEquals( + 15L, (long) attrs.get(AttributeKey.longKey("gen_ai.usage.reasoning.output_tokens"))); + } + @Test public void testTraceSendData() { Span span = tracer.spanBuilder("test").startSpan();