chore: initialize recovered claude workspace

2026-04-02 15:29:01 +08:00
commit a10efa3b4b
1940 changed files with 506426 additions and 0 deletions
--- a/src/utils/telemetry/betaSessionTracing.ts
+++ b/src/utils/telemetry/betaSessionTracing.ts
@@ -0,0 +1,491 @@
+/**
+ * Beta Session Tracing for Claude Code
+ *
+ * This module contains beta tracing features enabled when
+ * ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set.
+ *
+ * For external users, tracing is enabled in SDK/headless mode, or in
+ * interactive mode when the org is allowlisted via the
+ * tengu_trace_lantern GrowthBook gate.
+ * For ant users, tracing is enabled in all modes.
+ *
+ * Visibility Rules:
+ * | Content          | External | Ant  |
+ * |------------------|----------|------|
+ * | System prompts   | ✅                  | ✅   |
+ * | Model output     | ✅                  | ✅   |
+ * | Thinking output  | ❌                  | ✅   |
+ * | Tools            | ✅                  | ✅   |
+ * | new_context      | ✅                  | ✅   |
+ *
+ * Features:
+ * - Per-agent message tracking with hash-based deduplication
+ * - System prompt logging (once per unique hash)
+ * - Hook execution spans
+ * - Detailed new_context attributes for LLM requests
+ */
+
+import type { Span } from '@opentelemetry/api'
+import { createHash } from 'crypto'
+import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
+import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js'
+import type { AssistantMessage, UserMessage } from '../../types/message.js'
+import { isEnvTruthy } from '../envUtils.js'
+import { jsonParse, jsonStringify } from '../slowOperations.js'
+import { logOTelEvent } from './events.js'
+
+// Message type for API calls (UserMessage or AssistantMessage)
+type APIMessage = UserMessage | AssistantMessage
+
+/**
+ * Track hashes we've already logged this session (system prompts, tools, etc).
+ *
+ * WHY: System prompts and tool schemas are large and rarely change within a session.
+ * Sending full content on every request would be wasteful. Instead, we hash and
+ * only log the full content once per unique hash.
+ */
+const seenHashes = new Set<string>()
+
+/**
+ * Track the last reported message hash per querySource (agent) for incremental context.
+ *
+ * WHY: When debugging traces, we want to see what NEW information was added each turn,
+ * not the entire conversation history (which can be huge). By tracking the last message
+ * we reported per agent, we can compute and send only the delta (new messages since
+ * the last request). This is tracked per-agent (querySource) because different agents
+ * (main thread, subagents, warmup requests) have independent conversation contexts.
+ */
+const lastReportedMessageHash = new Map<string, string>()
+
+/**
+ * Clear tracking state after compaction.
+ * Old hashes are irrelevant once messages have been replaced.
+ */
+export function clearBetaTracingState(): void {
+  seenHashes.clear()
+  lastReportedMessageHash.clear()
+}
+
+const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe)
+
+/**
+ * Check if beta detailed tracing is enabled.
+ * - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
+ * - For external users, enabled in SDK/headless mode OR when org is
+ *   allowlisted via the tengu_trace_lantern GrowthBook gate
+ */
+export function isBetaTracingEnabled(): boolean {
+  const baseEnabled =
+    isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) &&
+    Boolean(process.env.BETA_TRACING_ENDPOINT)
+
+  if (!baseEnabled) {
+    return false
+  }
+
+  // For external users, enable in SDK/headless mode OR when org is allowlisted.
+  // Gate reads from disk cache, so first run after allowlisting returns false;
+  // works from second run onward (same behavior as enhanced_telemetry_beta).
+  if (process.env.USER_TYPE !== 'ant') {
+    return (
+      getIsNonInteractiveSession() ||
+      getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false)
+    )
+  }
+
+  return true
+}
+
+/**
+ * Truncate content to fit within Honeycomb limits.
+ */
+export function truncateContent(
+  content: string,
+  maxSize: number = MAX_CONTENT_SIZE,
+): { content: string; truncated: boolean } {
+  if (content.length <= maxSize) {
+    return { content, truncated: false }
+  }
+
+  return {
+    content:
+      content.slice(0, maxSize) +
+      '\n\n[TRUNCATED - Content exceeds 60KB limit]',
+    truncated: true,
+  }
+}
+
+/**
+ * Generate a short hash (first 12 hex chars of SHA-256).
+ */
+function shortHash(content: string): string {
+  return createHash('sha256').update(content).digest('hex').slice(0, 12)
+}
+
+/**
+ * Generate a hash for a system prompt.
+ */
+function hashSystemPrompt(systemPrompt: string): string {
+  return `sp_${shortHash(systemPrompt)}`
+}
+
+/**
+ * Generate a hash for a message based on its content.
+ */
+function hashMessage(message: APIMessage): string {
+  const content = jsonStringify(message.message.content)
+  return `msg_${shortHash(content)}`
+}
+
+// Regex to detect content wrapped in <system-reminder> tags
+const SYSTEM_REMINDER_REGEX =
+  /^<system-reminder>\n?([\s\S]*?)\n?<\/system-reminder>$/
+
+/**
+ * Check if text is entirely a system reminder (wrapped in <system-reminder> tags).
+ * Returns the inner content if it is, null otherwise.
+ */
+function extractSystemReminderContent(text: string): string | null {
+  const match = text.trim().match(SYSTEM_REMINDER_REGEX)
+  return match && match[1] ? match[1].trim() : null
+}
+
+/**
+ * Result of formatting messages - separates regular content from system reminders.
+ */
+interface FormattedMessages {
+  contextParts: string[]
+  systemReminders: string[]
+}
+
+/**
+ * Format user messages for new_context display, separating system reminders.
+ * Only handles user messages (assistant messages are filtered out before this is called).
+ */
+function formatMessagesForContext(messages: UserMessage[]): FormattedMessages {
+  const contextParts: string[] = []
+  const systemReminders: string[] = []
+
+  for (const message of messages) {
+    const content = message.message.content
+    if (typeof content === 'string') {
+      const reminderContent = extractSystemReminderContent(content)
+      if (reminderContent) {
+        systemReminders.push(reminderContent)
+      } else {
+        contextParts.push(`[USER]\n${content}`)
+      }
+    } else if (Array.isArray(content)) {
+      for (const block of content) {
+        if (block.type === 'text') {
+          const reminderContent = extractSystemReminderContent(block.text)
+          if (reminderContent) {
+            systemReminders.push(reminderContent)
+          } else {
+            contextParts.push(`[USER]\n${block.text}`)
+          }
+        } else if (block.type === 'tool_result') {
+          const resultContent =
+            typeof block.content === 'string'
+              ? block.content
+              : jsonStringify(block.content)
+          // Tool results can also contain system reminders (e.g., malware warning)
+          const reminderContent = extractSystemReminderContent(resultContent)
+          if (reminderContent) {
+            systemReminders.push(reminderContent)
+          } else {
+            contextParts.push(
+              `[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`,
+            )
+          }
+        }
+      }
+    }
+  }
+
+  return { contextParts, systemReminders }
+}
+
+export interface LLMRequestNewContext {
+  /** System prompt (typically only on first request or if changed) */
+  systemPrompt?: string
+  /** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */
+  querySource?: string
+  /** Tool schemas sent with the request */
+  tools?: string
+}
+
+/**
+ * Add beta attributes to an interaction span.
+ * Adds new_context with the user prompt.
+ */
+export function addBetaInteractionAttributes(
+  span: Span,
+  userPrompt: string,
+): void {
+  if (!isBetaTracingEnabled()) {
+    return
+  }
+
+  const { content: truncatedPrompt, truncated } = truncateContent(
+    `[USER PROMPT]\n${userPrompt}`,
+  )
+  span.setAttributes({
+    new_context: truncatedPrompt,
+    ...(truncated && {
+      new_context_truncated: true,
+      new_context_original_length: userPrompt.length,
+    }),
+  })
+}
+
+/**
+ * Add beta attributes to an LLM request span.
+ * Handles system prompt logging and new_context computation.
+ */
+export function addBetaLLMRequestAttributes(
+  span: Span,
+  newContext?: LLMRequestNewContext,
+  messagesForAPI?: APIMessage[],
+): void {
+  if (!isBetaTracingEnabled()) {
+    return
+  }
+
+  // Add system prompt info to the span
+  if (newContext?.systemPrompt) {
+    const promptHash = hashSystemPrompt(newContext.systemPrompt)
+    const preview = newContext.systemPrompt.slice(0, 500)
+
+    // Always add hash, preview, and length to the span
+    span.setAttribute('system_prompt_hash', promptHash)
+    span.setAttribute('system_prompt_preview', preview)
+    span.setAttribute('system_prompt_length', newContext.systemPrompt.length)
+
+    // Log the full system prompt only once per unique hash this session
+    if (!seenHashes.has(promptHash)) {
+      seenHashes.add(promptHash)
+
+      // Truncate for the log if needed
+      const { content: truncatedPrompt, truncated } = truncateContent(
+        newContext.systemPrompt,
+      )
+
+      void logOTelEvent('system_prompt', {
+        system_prompt_hash: promptHash,
+        system_prompt: truncatedPrompt,
+        system_prompt_length: String(newContext.systemPrompt.length),
+        ...(truncated && { system_prompt_truncated: 'true' }),
+      })
+    }
+  }
+
+  // Add tools info to the span
+  if (newContext?.tools) {
+    try {
+      const toolsArray = jsonParse(newContext.tools) as Record<
+        string,
+        unknown
+      >[]
+
+      // Build array of {name, hash} for each tool
+      const toolsWithHashes = toolsArray.map(tool => {
+        const toolJson = jsonStringify(tool)
+        const toolHash = shortHash(toolJson)
+        return {
+          name: typeof tool.name === 'string' ? tool.name : 'unknown',
+          hash: toolHash,
+          json: toolJson,
+        }
+      })
+
+      // Set span attribute with array of name/hash pairs
+      span.setAttribute(
+        'tools',
+        jsonStringify(
+          toolsWithHashes.map(({ name, hash }) => ({ name, hash })),
+        ),
+      )
+      span.setAttribute('tools_count', toolsWithHashes.length)
+
+      // Log each tool's full description once per unique hash
+      for (const { name, hash, json } of toolsWithHashes) {
+        if (!seenHashes.has(`tool_${hash}`)) {
+          seenHashes.add(`tool_${hash}`)
+
+          const { content: truncatedTool, truncated } = truncateContent(json)
+
+          void logOTelEvent('tool', {
+            tool_name: sanitizeToolNameForAnalytics(name),
+            tool_hash: hash,
+            tool: truncatedTool,
+            ...(truncated && { tool_truncated: 'true' }),
+          })
+        }
+      }
+    } catch {
+      // If parsing fails, log the raw tools string
+      span.setAttribute('tools_parse_error', true)
+    }
+  }
+
+  // Add new_context using hash-based tracking (visible to all users)
+  if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) {
+    const querySource = newContext.querySource
+    const lastHash = lastReportedMessageHash.get(querySource)
+
+    // Find where the last reported message is in the array
+    let startIndex = 0
+    if (lastHash) {
+      for (let i = 0; i < messagesForAPI.length; i++) {
+        const msg = messagesForAPI[i]
+        if (msg && hashMessage(msg) === lastHash) {
+          startIndex = i + 1 // Start after the last reported message
+          break
+        }
+      }
+      // If lastHash not found, startIndex stays 0 (send everything)
+    }
+
+    // Get new messages (filter out assistant messages - we only want user input/tool results)
+    const newMessages = messagesForAPI
+      .slice(startIndex)
+      .filter((m): m is UserMessage => m.type === 'user')
+
+    if (newMessages.length > 0) {
+      // Format new messages, separating system reminders from regular content
+      const { contextParts, systemReminders } =
+        formatMessagesForContext(newMessages)
+
+      // Set new_context (regular user content and tool results)
+      if (contextParts.length > 0) {
+        const fullContext = contextParts.join('\n\n---\n\n')
+        const { content: truncatedContext, truncated } =
+          truncateContent(fullContext)
+
+        span.setAttributes({
+          new_context: truncatedContext,
+          new_context_message_count: newMessages.length,
+          ...(truncated && {
+            new_context_truncated: true,
+            new_context_original_length: fullContext.length,
+          }),
+        })
+      }
+
+      // Set system_reminders as a separate attribute
+      if (systemReminders.length > 0) {
+        const fullReminders = systemReminders.join('\n\n---\n\n')
+        const { content: truncatedReminders, truncated: remindersTruncated } =
+          truncateContent(fullReminders)
+
+        span.setAttributes({
+          system_reminders: truncatedReminders,
+          system_reminders_count: systemReminders.length,
+          ...(remindersTruncated && {
+            system_reminders_truncated: true,
+            system_reminders_original_length: fullReminders.length,
+          }),
+        })
+      }
+
+      // Update last reported hash to the last message in the array
+      const lastMessage = messagesForAPI[messagesForAPI.length - 1]
+      if (lastMessage) {
+        lastReportedMessageHash.set(querySource, hashMessage(lastMessage))
+      }
+    }
+  }
+}
+
+/**
+ * Add beta attributes to endLLMRequestSpan.
+ * Handles model_output and thinking_output truncation.
+ */
+export function addBetaLLMResponseAttributes(
+  endAttributes: Record<string, string | number | boolean>,
+  metadata?: {
+    modelOutput?: string
+    thinkingOutput?: string
+  },
+): void {
+  if (!isBetaTracingEnabled() || !metadata) {
+    return
+  }
+
+  // Add model_output (text content) - visible to all users
+  if (metadata.modelOutput !== undefined) {
+    const { content: modelOutput, truncated: outputTruncated } =
+      truncateContent(metadata.modelOutput)
+    endAttributes['response.model_output'] = modelOutput
+    if (outputTruncated) {
+      endAttributes['response.model_output_truncated'] = true
+      endAttributes['response.model_output_original_length'] =
+        metadata.modelOutput.length
+    }
+  }
+
+  // Add thinking_output - ant-only
+  if (
+    process.env.USER_TYPE === 'ant' &&
+    metadata.thinkingOutput !== undefined
+  ) {
+    const { content: thinkingOutput, truncated: thinkingTruncated } =
+      truncateContent(metadata.thinkingOutput)
+    endAttributes['response.thinking_output'] = thinkingOutput
+    if (thinkingTruncated) {
+      endAttributes['response.thinking_output_truncated'] = true
+      endAttributes['response.thinking_output_original_length'] =
+        metadata.thinkingOutput.length
+    }
+  }
+}
+
+/**
+ * Add beta attributes to startToolSpan.
+ * Adds tool_input with the serialized tool input.
+ */
+export function addBetaToolInputAttributes(
+  span: Span,
+  toolName: string,
+  toolInput: string,
+): void {
+  if (!isBetaTracingEnabled()) {
+    return
+  }
+
+  const { content: truncatedInput, truncated } = truncateContent(
+    `[TOOL INPUT: ${toolName}]\n${toolInput}`,
+  )
+  span.setAttributes({
+    tool_input: truncatedInput,
+    ...(truncated && {
+      tool_input_truncated: true,
+      tool_input_original_length: toolInput.length,
+    }),
+  })
+}
+
+/**
+ * Add beta attributes to endToolSpan.
+ * Adds new_context with the tool result.
+ */
+export function addBetaToolResultAttributes(
+  endAttributes: Record<string, string | number | boolean>,
+  toolName: string | number | boolean,
+  toolResult: string,
+): void {
+  if (!isBetaTracingEnabled()) {
+    return
+  }
+
+  const { content: truncatedResult, truncated } = truncateContent(
+    `[TOOL RESULT: ${toolName}]\n${toolResult}`,
+  )
+  endAttributes['new_context'] = truncatedResult
+  if (truncated) {
+    endAttributes['new_context_truncated'] = true
+    endAttributes['new_context_original_length'] = toolResult.length
+  }
+}
--- a/src/utils/telemetry/bigqueryExporter.ts
+++ b/src/utils/telemetry/bigqueryExporter.ts
@@ -0,0 +1,252 @@
+import type { Attributes, HrTime } from '@opentelemetry/api'
+import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
+import {
+  AggregationTemporality,
+  type MetricData,
+  type DataPoint as OTelDataPoint,
+  type PushMetricExporter,
+  type ResourceMetrics,
+} from '@opentelemetry/sdk-metrics'
+import axios from 'axios'
+import { checkMetricsEnabled } from 'src/services/api/metricsOptOut.js'
+import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
+import { getSubscriptionType, isClaudeAISubscriber } from '../auth.js'
+import { checkHasTrustDialogAccepted } from '../config.js'
+import { logForDebugging } from '../debug.js'
+import { errorMessage, toError } from '../errors.js'
+import { getAuthHeaders } from '../http.js'
+import { logError } from '../log.js'
+import { jsonStringify } from '../slowOperations.js'
+import { getClaudeCodeUserAgent } from '../userAgent.js'
+
+type DataPoint = {
+  attributes: Record<string, string>
+  value: number
+  timestamp: string
+}
+
+type Metric = {
+  name: string
+  description?: string
+  unit?: string
+  data_points: DataPoint[]
+}
+
+type InternalMetricsPayload = {
+  resource_attributes: Record<string, string>
+  metrics: Metric[]
+}
+
+export class BigQueryMetricsExporter implements PushMetricExporter {
+  private readonly endpoint: string
+  private readonly timeout: number
+  private pendingExports: Promise<void>[] = []
+  private isShutdown = false
+
+  constructor(options: { timeout?: number } = {}) {
+    const defaultEndpoint = 'https://api.anthropic.com/api/claude_code/metrics'
+
+    if (
+      process.env.USER_TYPE === 'ant' &&
+      process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT
+    ) {
+      this.endpoint =
+        process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT +
+        '/api/claude_code/metrics'
+    } else {
+      this.endpoint = defaultEndpoint
+    }
+
+    this.timeout = options.timeout || 5000
+  }
+
+  async export(
+    metrics: ResourceMetrics,
+    resultCallback: (result: ExportResult) => void,
+  ): Promise<void> {
+    if (this.isShutdown) {
+      resultCallback({
+        code: ExportResultCode.FAILED,
+        error: new Error('Exporter has been shutdown'),
+      })
+      return
+    }
+
+    const exportPromise = this.doExport(metrics, resultCallback)
+    this.pendingExports.push(exportPromise)
+
+    // Clean up completed exports
+    void exportPromise.finally(() => {
+      const index = this.pendingExports.indexOf(exportPromise)
+      if (index > -1) {
+        void this.pendingExports.splice(index, 1)
+      }
+    })
+  }
+
+  private async doExport(
+    metrics: ResourceMetrics,
+    resultCallback: (result: ExportResult) => void,
+  ): Promise<void> {
+    try {
+      // Skip if trust not established in interactive mode
+      // This prevents triggering apiKeyHelper before trust dialog
+      const hasTrust =
+        checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
+      if (!hasTrust) {
+        logForDebugging(
+          'BigQuery metrics export: trust not established, skipping',
+        )
+        resultCallback({ code: ExportResultCode.SUCCESS })
+        return
+      }
+
+      // Check organization-level metrics opt-out
+      const metricsStatus = await checkMetricsEnabled()
+      if (!metricsStatus.enabled) {
+        logForDebugging('Metrics export disabled by organization setting')
+        resultCallback({ code: ExportResultCode.SUCCESS })
+        return
+      }
+
+      const payload = this.transformMetricsForInternal(metrics)
+
+      const authResult = getAuthHeaders()
+      if (authResult.error) {
+        logForDebugging(`Metrics export failed: ${authResult.error}`)
+        resultCallback({
+          code: ExportResultCode.FAILED,
+          error: new Error(authResult.error),
+        })
+        return
+      }
+
+      const headers: Record<string, string> = {
+        'Content-Type': 'application/json',
+        'User-Agent': getClaudeCodeUserAgent(),
+        ...authResult.headers,
+      }
+
+      const response = await axios.post(this.endpoint, payload, {
+        timeout: this.timeout,
+        headers,
+      })
+
+      logForDebugging('BigQuery metrics exported successfully')
+      logForDebugging(
+        `BigQuery API Response: ${jsonStringify(response.data, null, 2)}`,
+      )
+      resultCallback({ code: ExportResultCode.SUCCESS })
+    } catch (error) {
+      logForDebugging(`BigQuery metrics export failed: ${errorMessage(error)}`)
+      logError(error)
+      resultCallback({
+        code: ExportResultCode.FAILED,
+        error: toError(error),
+      })
+    }
+  }
+
+  private transformMetricsForInternal(
+    metrics: ResourceMetrics,
+  ): InternalMetricsPayload {
+    const attrs = metrics.resource.attributes
+
+    const resourceAttributes: Record<string, string> = {
+      'service.name': (attrs['service.name'] as string) || 'claude-code',
+      'service.version': (attrs['service.version'] as string) || 'unknown',
+      'os.type': (attrs['os.type'] as string) || 'unknown',
+      'os.version': (attrs['os.version'] as string) || 'unknown',
+      'host.arch': (attrs['host.arch'] as string) || 'unknown',
+      'aggregation.temporality':
+        this.selectAggregationTemporality() === AggregationTemporality.DELTA
+          ? 'delta'
+          : 'cumulative',
+    }
+
+    // Only add wsl.version if it exists (omit instead of default)
+    if (attrs['wsl.version']) {
+      resourceAttributes['wsl.version'] = attrs['wsl.version'] as string
+    }
+
+    // Add customer type and subscription type
+    if (isClaudeAISubscriber()) {
+      resourceAttributes['user.customer_type'] = 'claude_ai'
+      const subscriptionType = getSubscriptionType()
+      if (subscriptionType) {
+        resourceAttributes['user.subscription_type'] = subscriptionType
+      }
+    } else {
+      resourceAttributes['user.customer_type'] = 'api'
+    }
+
+    const transformed = {
+      resource_attributes: resourceAttributes,
+      metrics: metrics.scopeMetrics.flatMap(scopeMetric =>
+        scopeMetric.metrics.map(metric => ({
+          name: metric.descriptor.name,
+          description: metric.descriptor.description,
+          unit: metric.descriptor.unit,
+          data_points: this.extractDataPoints(metric),
+        })),
+      ),
+    }
+
+    return transformed
+  }
+
+  private extractDataPoints(metric: MetricData): DataPoint[] {
+    const dataPoints = metric.dataPoints || []
+
+    return dataPoints
+      .filter(
+        (point): point is OTelDataPoint<number> =>
+          typeof point.value === 'number',
+      )
+      .map(point => ({
+        attributes: this.convertAttributes(point.attributes),
+        value: point.value,
+        timestamp: this.hrTimeToISOString(
+          point.endTime || point.startTime || [Date.now() / 1000, 0],
+        ),
+      }))
+  }
+
+  async shutdown(): Promise<void> {
+    this.isShutdown = true
+    await this.forceFlush()
+    logForDebugging('BigQuery metrics exporter shutdown complete')
+  }
+
+  async forceFlush(): Promise<void> {
+    await Promise.all(this.pendingExports)
+    logForDebugging('BigQuery metrics exporter flush complete')
+  }
+
+  private convertAttributes(
+    attributes: Attributes | undefined,
+  ): Record<string, string> {
+    const result: Record<string, string> = {}
+    if (attributes) {
+      for (const [key, value] of Object.entries(attributes)) {
+        if (value !== undefined && value !== null) {
+          result[key] = String(value)
+        }
+      }
+    }
+    return result
+  }
+
+  private hrTimeToISOString(hrTime: HrTime): string {
+    const [seconds, nanoseconds] = hrTime
+    const date = new Date(seconds * 1000 + nanoseconds / 1000000)
+    return date.toISOString()
+  }
+
+  selectAggregationTemporality(): AggregationTemporality {
+    // DO NOT CHANGE THIS TO CUMULATIVE
+    // It would mess up the aggregation of metrics
+    // for CC Productivity metrics dashboard
+    return AggregationTemporality.DELTA
+  }
+}
--- a/src/utils/telemetry/events.ts
+++ b/src/utils/telemetry/events.ts
@@ -0,0 +1,75 @@
+import type { Attributes } from '@opentelemetry/api'
+import { getEventLogger, getPromptId } from 'src/bootstrap/state.js'
+import { logForDebugging } from '../debug.js'
+import { isEnvTruthy } from '../envUtils.js'
+import { getTelemetryAttributes } from '../telemetryAttributes.js'
+
+// Monotonically increasing counter for ordering events within a session
+let eventSequence = 0
+
+// Track whether we've already warned about a null event logger to avoid spamming
+let hasWarnedNoEventLogger = false
+
+function isUserPromptLoggingEnabled() {
+  return isEnvTruthy(process.env.OTEL_LOG_USER_PROMPTS)
+}
+
+export function redactIfDisabled(content: string): string {
+  return isUserPromptLoggingEnabled() ? content : '<REDACTED>'
+}
+
+export async function logOTelEvent(
+  eventName: string,
+  metadata: { [key: string]: string | undefined } = {},
+): Promise<void> {
+  const eventLogger = getEventLogger()
+  if (!eventLogger) {
+    if (!hasWarnedNoEventLogger) {
+      hasWarnedNoEventLogger = true
+      logForDebugging(
+        `[3P telemetry] Event dropped (no event logger initialized): ${eventName}`,
+        { level: 'warn' },
+      )
+    }
+    return
+  }
+
+  // Skip logging in test environment
+  if (process.env.NODE_ENV === 'test') {
+    return
+  }
+
+  const attributes: Attributes = {
+    ...getTelemetryAttributes(),
+    'event.name': eventName,
+    'event.timestamp': new Date().toISOString(),
+    'event.sequence': eventSequence++,
+  }
+
+  // Add prompt ID to events (but not metrics, where it would cause unbounded cardinality)
+  const promptId = getPromptId()
+  if (promptId) {
+    attributes['prompt.id'] = promptId
+  }
+
+  // Workspace directory from the desktop app (host path). Events only —
+  // filesystem paths are too high-cardinality for metric dimensions, and
+  // the BQ metrics pipeline must never see them.
+  const workspaceDir = process.env.CLAUDE_CODE_WORKSPACE_HOST_PATHS
+  if (workspaceDir) {
+    attributes['workspace.host_paths'] = workspaceDir.split('|')
+  }
+
+  // Add metadata as attributes - all values are already strings
+  for (const [key, value] of Object.entries(metadata)) {
+    if (value !== undefined) {
+      attributes[key] = value
+    }
+  }
+
+  // Emit log record as an event
+  eventLogger.emit({
+    body: `claude_code.${eventName}`,
+    attributes,
+  })
+}
--- a/src/utils/telemetry/instrumentation.ts
+++ b/src/utils/telemetry/instrumentation.ts
@@ -0,0 +1,825 @@
+import { DiagLogLevel, diag, trace } from '@opentelemetry/api'
+import { logs } from '@opentelemetry/api-logs'
+// OTLP/Prometheus exporters are dynamically imported inside the protocol
+// switch statements below. A process uses at most one protocol variant per
+// signal, but static imports would load all 6 (~1.2MB) on every startup.
+import {
+  envDetector,
+  hostDetector,
+  osDetector,
+  resourceFromAttributes,
+} from '@opentelemetry/resources'
+import {
+  BatchLogRecordProcessor,
+  ConsoleLogRecordExporter,
+  LoggerProvider,
+} from '@opentelemetry/sdk-logs'
+import {
+  ConsoleMetricExporter,
+  MeterProvider,
+  PeriodicExportingMetricReader,
+} from '@opentelemetry/sdk-metrics'
+import {
+  BasicTracerProvider,
+  BatchSpanProcessor,
+  ConsoleSpanExporter,
+} from '@opentelemetry/sdk-trace-base'
+import {
+  ATTR_SERVICE_NAME,
+  ATTR_SERVICE_VERSION,
+  SEMRESATTRS_HOST_ARCH,
+} from '@opentelemetry/semantic-conventions'
+import { HttpsProxyAgent } from 'https-proxy-agent'
+import {
+  getLoggerProvider,
+  getMeterProvider,
+  getTracerProvider,
+  setEventLogger,
+  setLoggerProvider,
+  setMeterProvider,
+  setTracerProvider,
+} from 'src/bootstrap/state.js'
+import {
+  getOtelHeadersFromHelper,
+  getSubscriptionType,
+  is1PApiCustomer,
+  isClaudeAISubscriber,
+} from 'src/utils/auth.js'
+import { getPlatform, getWslVersion } from 'src/utils/platform.js'
+
+import { getCACertificates } from '../caCerts.js'
+import { registerCleanup } from '../cleanupRegistry.js'
+import { getHasFormattedOutput, logForDebugging } from '../debug.js'
+import { isEnvTruthy } from '../envUtils.js'
+import { errorMessage } from '../errors.js'
+import { getMTLSConfig } from '../mtls.js'
+import { getProxyUrl, shouldBypassProxy } from '../proxy.js'
+import { getSettings_DEPRECATED } from '../settings/settings.js'
+import { jsonStringify } from '../slowOperations.js'
+import { profileCheckpoint } from '../startupProfiler.js'
+import { isBetaTracingEnabled } from './betaSessionTracing.js'
+import { BigQueryMetricsExporter } from './bigqueryExporter.js'
+import { ClaudeCodeDiagLogger } from './logger.js'
+import { initializePerfettoTracing } from './perfettoTracing.js'
+import {
+  endInteractionSpan,
+  isEnhancedTelemetryEnabled,
+} from './sessionTracing.js'
+
+const DEFAULT_METRICS_EXPORT_INTERVAL_MS = 60000
+const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 5000
+const DEFAULT_TRACES_EXPORT_INTERVAL_MS = 5000
+
+class TelemetryTimeoutError extends Error {}
+
+function telemetryTimeout(ms: number, message: string): Promise<never> {
+  return new Promise((_, reject) => {
+    setTimeout(
+      (rej: (e: Error) => void, msg: string) =>
+        rej(new TelemetryTimeoutError(msg)),
+      ms,
+      reject,
+      message,
+    ).unref()
+  })
+}
+
+export function bootstrapTelemetry() {
+  if (process.env.USER_TYPE === 'ant') {
+    // Read from ANT_ prefixed variables that are defined at build time
+    if (process.env.ANT_OTEL_METRICS_EXPORTER) {
+      process.env.OTEL_METRICS_EXPORTER = process.env.ANT_OTEL_METRICS_EXPORTER
+    }
+    if (process.env.ANT_OTEL_LOGS_EXPORTER) {
+      process.env.OTEL_LOGS_EXPORTER = process.env.ANT_OTEL_LOGS_EXPORTER
+    }
+    if (process.env.ANT_OTEL_TRACES_EXPORTER) {
+      process.env.OTEL_TRACES_EXPORTER = process.env.ANT_OTEL_TRACES_EXPORTER
+    }
+    if (process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL) {
+      process.env.OTEL_EXPORTER_OTLP_PROTOCOL =
+        process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL
+    }
+    if (process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT) {
+      process.env.OTEL_EXPORTER_OTLP_ENDPOINT =
+        process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT
+    }
+    if (process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS) {
+      process.env.OTEL_EXPORTER_OTLP_HEADERS =
+        process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS
+    }
+  }
+
+  // Set default tempoality to 'delta' because it's the more sane default
+  if (!process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE) {
+    process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = 'delta'
+  }
+}
+
+// Per OTEL spec, "none" means "no automatically configured exporter for this signal".
+// https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#exporter-selection
+export function parseExporterTypes(value: string | undefined): string[] {
+  return (value || '')
+    .trim()
+    .split(',')
+    .filter(Boolean)
+    .map(t => t.trim())
+    .filter(t => t !== 'none')
+}
+
+async function getOtlpReaders() {
+  const exporterTypes = parseExporterTypes(process.env.OTEL_METRICS_EXPORTER)
+  const exportInterval = parseInt(
+    process.env.OTEL_METRIC_EXPORT_INTERVAL ||
+      DEFAULT_METRICS_EXPORT_INTERVAL_MS.toString(),
+  )
+
+  const exporters = []
+  for (const exporterType of exporterTypes) {
+    if (exporterType === 'console') {
+      // Custom console exporter that shows resource attributes
+      const consoleExporter = new ConsoleMetricExporter()
+      const originalExport = consoleExporter.export.bind(consoleExporter)
+
+      consoleExporter.export = (metrics, callback) => {
+        // Log resource attributes once at the start
+        if (metrics.resource && metrics.resource.attributes) {
+          // The console exporter is for debugging, so console output is intentional here
+
+          logForDebugging('\n=== Resource Attributes ===')
+          logForDebugging(jsonStringify(metrics.resource.attributes))
+          logForDebugging('===========================\n')
+        }
+
+        return originalExport(metrics, callback)
+      }
+
+      exporters.push(consoleExporter)
+    } else if (exporterType === 'otlp') {
+      const protocol =
+        process.env.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL?.trim() ||
+        process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
+
+      const httpConfig = getOTLPExporterConfig()
+
+      switch (protocol) {
+        case 'grpc': {
+          // Lazy-import to keep @grpc/grpc-js (~700KB) out of the telemetry chunk
+          // when the protocol is http/protobuf (ant default) or http/json.
+          const { OTLPMetricExporter } = await import(
+            '@opentelemetry/exporter-metrics-otlp-grpc'
+          )
+          exporters.push(new OTLPMetricExporter())
+          break
+        }
+        case 'http/json': {
+          const { OTLPMetricExporter } = await import(
+            '@opentelemetry/exporter-metrics-otlp-http'
+          )
+          exporters.push(new OTLPMetricExporter(httpConfig))
+          break
+        }
+        case 'http/protobuf': {
+          const { OTLPMetricExporter } = await import(
+            '@opentelemetry/exporter-metrics-otlp-proto'
+          )
+          exporters.push(new OTLPMetricExporter(httpConfig))
+          break
+        }
+        default:
+          throw new Error(
+            `Unknown protocol set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
+          )
+      }
+    } else if (exporterType === 'prometheus') {
+      const { PrometheusExporter } = await import(
+        '@opentelemetry/exporter-prometheus'
+      )
+      exporters.push(new PrometheusExporter())
+    } else {
+      throw new Error(
+        `Unknown exporter type set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${exporterType}`,
+      )
+    }
+  }
+
+  return exporters.map(exporter => {
+    if ('export' in exporter) {
+      return new PeriodicExportingMetricReader({
+        exporter,
+        exportIntervalMillis: exportInterval,
+      })
+    }
+    return exporter
+  })
+}
+
+async function getOtlpLogExporters() {
+  const exporterTypes = parseExporterTypes(process.env.OTEL_LOGS_EXPORTER)
+
+  const protocol =
+    process.env.OTEL_EXPORTER_OTLP_LOGS_PROTOCOL?.trim() ||
+    process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
+  const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
+
+  logForDebugging(
+    `[3P telemetry] getOtlpLogExporters: types=${jsonStringify(exporterTypes)}, protocol=${protocol}, endpoint=${endpoint}`,
+  )
+
+  const exporters = []
+  for (const exporterType of exporterTypes) {
+    if (exporterType === 'console') {
+      exporters.push(new ConsoleLogRecordExporter())
+    } else if (exporterType === 'otlp') {
+      const httpConfig = getOTLPExporterConfig()
+
+      switch (protocol) {
+        case 'grpc': {
+          const { OTLPLogExporter } = await import(
+            '@opentelemetry/exporter-logs-otlp-grpc'
+          )
+          exporters.push(new OTLPLogExporter())
+          break
+        }
+        case 'http/json': {
+          const { OTLPLogExporter } = await import(
+            '@opentelemetry/exporter-logs-otlp-http'
+          )
+          exporters.push(new OTLPLogExporter(httpConfig))
+          break
+        }
+        case 'http/protobuf': {
+          const { OTLPLogExporter } = await import(
+            '@opentelemetry/exporter-logs-otlp-proto'
+          )
+          exporters.push(new OTLPLogExporter(httpConfig))
+          break
+        }
+        default:
+          throw new Error(
+            `Unknown protocol set in OTEL_EXPORTER_OTLP_LOGS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
+          )
+      }
+    } else {
+      throw new Error(
+        `Unknown exporter type set in OTEL_LOGS_EXPORTER env var: ${exporterType}`,
+      )
+    }
+  }
+
+  return exporters
+}
+
+async function getOtlpTraceExporters() {
+  const exporterTypes = parseExporterTypes(process.env.OTEL_TRACES_EXPORTER)
+
+  const exporters = []
+  for (const exporterType of exporterTypes) {
+    if (exporterType === 'console') {
+      exporters.push(new ConsoleSpanExporter())
+    } else if (exporterType === 'otlp') {
+      const protocol =
+        process.env.OTEL_EXPORTER_OTLP_TRACES_PROTOCOL?.trim() ||
+        process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
+
+      const httpConfig = getOTLPExporterConfig()
+
+      switch (protocol) {
+        case 'grpc': {
+          const { OTLPTraceExporter } = await import(
+            '@opentelemetry/exporter-trace-otlp-grpc'
+          )
+          exporters.push(new OTLPTraceExporter())
+          break
+        }
+        case 'http/json': {
+          const { OTLPTraceExporter } = await import(
+            '@opentelemetry/exporter-trace-otlp-http'
+          )
+          exporters.push(new OTLPTraceExporter(httpConfig))
+          break
+        }
+        case 'http/protobuf': {
+          const { OTLPTraceExporter } = await import(
+            '@opentelemetry/exporter-trace-otlp-proto'
+          )
+          exporters.push(new OTLPTraceExporter(httpConfig))
+          break
+        }
+        default:
+          throw new Error(
+            `Unknown protocol set in OTEL_EXPORTER_OTLP_TRACES_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
+          )
+      }
+    } else {
+      throw new Error(
+        `Unknown exporter type set in OTEL_TRACES_EXPORTER env var: ${exporterType}`,
+      )
+    }
+  }
+
+  return exporters
+}
+
+export function isTelemetryEnabled() {
+  return isEnvTruthy(process.env.CLAUDE_CODE_ENABLE_TELEMETRY)
+}
+
+function getBigQueryExportingReader() {
+  const bigqueryExporter = new BigQueryMetricsExporter()
+  return new PeriodicExportingMetricReader({
+    exporter: bigqueryExporter,
+    exportIntervalMillis: 5 * 60 * 1000, // 5mins for BigQuery metrics exporter to reduce load
+  })
+}
+
+function isBigQueryMetricsEnabled() {
+  // BigQuery metrics are enabled for:
+  // 1. API customers (excluding Claude.ai subscribers and Bedrock/Vertex)
+  // 2. Claude for Enterprise (C4E) users
+  // 3. Claude for Teams users
+  const subscriptionType = getSubscriptionType()
+  const isC4EOrTeamUser =
+    isClaudeAISubscriber() &&
+    (subscriptionType === 'enterprise' || subscriptionType === 'team')
+
+  return is1PApiCustomer() || isC4EOrTeamUser
+}
+
+/**
+ * Initialize beta tracing - a separate code path for detailed debugging.
+ * Uses BETA_TRACING_ENDPOINT instead of OTEL_EXPORTER_OTLP_ENDPOINT.
+ */
+async function initializeBetaTracing(
+  resource: ReturnType<typeof resourceFromAttributes>,
+): Promise<void> {
+  const endpoint = process.env.BETA_TRACING_ENDPOINT
+  if (!endpoint) {
+    return
+  }
+
+  const [{ OTLPTraceExporter }, { OTLPLogExporter }] = await Promise.all([
+    import('@opentelemetry/exporter-trace-otlp-http'),
+    import('@opentelemetry/exporter-logs-otlp-http'),
+  ])
+
+  const httpConfig = {
+    url: `${endpoint}/v1/traces`,
+  }
+
+  const logHttpConfig = {
+    url: `${endpoint}/v1/logs`,
+  }
+
+  // Initialize trace exporter
+  const traceExporter = new OTLPTraceExporter(httpConfig)
+  const spanProcessor = new BatchSpanProcessor(traceExporter, {
+    scheduledDelayMillis: DEFAULT_TRACES_EXPORT_INTERVAL_MS,
+  })
+
+  const tracerProvider = new BasicTracerProvider({
+    resource,
+    spanProcessors: [spanProcessor],
+  })
+
+  trace.setGlobalTracerProvider(tracerProvider)
+  setTracerProvider(tracerProvider)
+
+  // Initialize log exporter
+  const logExporter = new OTLPLogExporter(logHttpConfig)
+  const loggerProvider = new LoggerProvider({
+    resource,
+    processors: [
+      new BatchLogRecordProcessor(logExporter, {
+        scheduledDelayMillis: DEFAULT_LOGS_EXPORT_INTERVAL_MS,
+      }),
+    ],
+  })
+
+  logs.setGlobalLoggerProvider(loggerProvider)
+  setLoggerProvider(loggerProvider)
+
+  // Initialize event logger
+  const eventLogger = logs.getLogger(
+    'com.anthropic.claude_code.events',
+    MACRO.VERSION,
+  )
+  setEventLogger(eventLogger)
+
+  // Setup flush handlers - flush both logs AND traces
+  process.on('beforeExit', async () => {
+    await loggerProvider?.forceFlush()
+    await tracerProvider?.forceFlush()
+  })
+
+  process.on('exit', () => {
+    void loggerProvider?.forceFlush()
+    void tracerProvider?.forceFlush()
+  })
+}
+
+export async function initializeTelemetry() {
+  profileCheckpoint('telemetry_init_start')
+  bootstrapTelemetry()
+
+  // Console exporters call console.dir on a timer (5s logs/traces, 60s
+  // metrics), writing pretty-printed objects to stdout. In stream-json
+  // mode stdout is the SDK message channel; the first line (`{`) breaks
+  // the SDK's line reader. Stripped here (not main.tsx) because init.ts
+  // re-runs applyConfigEnvironmentVariables() inside initializeTelemetry-
+  // AfterTrust for remote-managed-settings users, and bootstrapTelemetry
+  // above copies ANT_OTEL_* for ant users — both would undo an earlier strip.
+  if (getHasFormattedOutput()) {
+    for (const key of [
+      'OTEL_METRICS_EXPORTER',
+      'OTEL_LOGS_EXPORTER',
+      'OTEL_TRACES_EXPORTER',
+    ] as const) {
+      const v = process.env[key]
+      if (v?.includes('console')) {
+        process.env[key] = v
+          .split(',')
+          .map(s => s.trim())
+          .filter(s => s !== 'console')
+          .join(',')
+      }
+    }
+  }
+
+  diag.setLogger(new ClaudeCodeDiagLogger(), DiagLogLevel.ERROR)
+
+  // Initialize Perfetto tracing (independent of OTEL)
+  // Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path>
+  initializePerfettoTracing()
+
+  const readers = []
+
+  // Add customer exporters (if enabled)
+  const telemetryEnabled = isTelemetryEnabled()
+  logForDebugging(
+    `[3P telemetry] isTelemetryEnabled=${telemetryEnabled} (CLAUDE_CODE_ENABLE_TELEMETRY=${process.env.CLAUDE_CODE_ENABLE_TELEMETRY})`,
+  )
+  if (telemetryEnabled) {
+    readers.push(...(await getOtlpReaders()))
+  }
+
+  // Add BigQuery exporter (for API customers, C4E users, and internal users)
+  if (isBigQueryMetricsEnabled()) {
+    readers.push(getBigQueryExportingReader())
+  }
+
+  // Create base resource with service attributes
+  const platform = getPlatform()
+  const baseAttributes: Record<string, string> = {
+    [ATTR_SERVICE_NAME]: 'claude-code',
+    [ATTR_SERVICE_VERSION]: MACRO.VERSION,
+  }
+
+  // Add WSL-specific attributes if running on WSL
+  if (platform === 'wsl') {
+    const wslVersion = getWslVersion()
+    if (wslVersion) {
+      baseAttributes['wsl.version'] = wslVersion
+    }
+  }
+
+  const baseResource = resourceFromAttributes(baseAttributes)
+
+  // Use OpenTelemetry detectors
+  const osResource = resourceFromAttributes(
+    osDetector.detect().attributes || {},
+  )
+
+  // Extract only host.arch from hostDetector
+  const hostDetected = hostDetector.detect()
+  const hostArchAttributes = hostDetected.attributes?.[SEMRESATTRS_HOST_ARCH]
+    ? {
+        [SEMRESATTRS_HOST_ARCH]: hostDetected.attributes[SEMRESATTRS_HOST_ARCH],
+      }
+    : {}
+  const hostArchResource = resourceFromAttributes(hostArchAttributes)
+
+  const envResource = resourceFromAttributes(
+    envDetector.detect().attributes || {},
+  )
+
+  // Merge resources - later resources take precedence
+  const resource = baseResource
+    .merge(osResource)
+    .merge(hostArchResource)
+    .merge(envResource)
+
+  // Check if beta tracing is enabled - this is a separate code path
+  // Available to all users who set ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
+  if (isBetaTracingEnabled()) {
+    void initializeBetaTracing(resource).catch(e =>
+      logForDebugging(`Beta tracing init failed: ${e}`, { level: 'error' }),
+    )
+    // Still set up meter provider for metrics (but skip regular logs/traces setup)
+    const meterProvider = new MeterProvider({
+      resource,
+      views: [],
+      readers,
+    })
+    setMeterProvider(meterProvider)
+
+    // Register shutdown for beta tracing
+    const shutdownTelemetry = async () => {
+      const timeoutMs = parseInt(
+        process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
+      )
+      try {
+        endInteractionSpan()
+
+        // Force flush + shutdown together inside the timeout. Previously forceFlush
+        // was awaited unbounded BEFORE the race, blocking exit on slow OTLP endpoints.
+        // Each provider's flush→shutdown is chained independently so a slow logger
+        // flush doesn't delay meterProvider/tracerProvider shutdown (no waterfall).
+        const loggerProvider = getLoggerProvider()
+        const tracerProvider = getTracerProvider()
+
+        const chains: Promise<void>[] = [meterProvider.shutdown()]
+        if (loggerProvider) {
+          chains.push(
+            loggerProvider.forceFlush().then(() => loggerProvider.shutdown()),
+          )
+        }
+        if (tracerProvider) {
+          chains.push(
+            tracerProvider.forceFlush().then(() => tracerProvider.shutdown()),
+          )
+        }
+
+        await Promise.race([
+          Promise.all(chains),
+          telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
+        ])
+      } catch {
+        // Ignore shutdown errors
+      }
+    }
+    registerCleanup(shutdownTelemetry)
+
+    return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
+  }
+
+  const meterProvider = new MeterProvider({
+    resource,
+    views: [],
+    readers,
+  })
+
+  // Store reference in state for flushing
+  setMeterProvider(meterProvider)
+
+  // Initialize logs if telemetry is enabled
+  if (telemetryEnabled) {
+    const logExporters = await getOtlpLogExporters()
+    logForDebugging(
+      `[3P telemetry] Created ${logExporters.length} log exporter(s)`,
+    )
+
+    if (logExporters.length > 0) {
+      const loggerProvider = new LoggerProvider({
+        resource,
+        // Add batch processors for each exporter
+        processors: logExporters.map(
+          exporter =>
+            new BatchLogRecordProcessor(exporter, {
+              scheduledDelayMillis: parseInt(
+                process.env.OTEL_LOGS_EXPORT_INTERVAL ||
+                  DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
+              ),
+            }),
+        ),
+      })
+
+      // Register the logger provider globally
+      logs.setGlobalLoggerProvider(loggerProvider)
+      setLoggerProvider(loggerProvider)
+
+      // Initialize event logger
+      const eventLogger = logs.getLogger(
+        'com.anthropic.claude_code.events',
+        MACRO.VERSION,
+      )
+      setEventLogger(eventLogger)
+      logForDebugging('[3P telemetry] Event logger set successfully')
+
+      // 'beforeExit' is emitted when Node.js empties its event loop and has no additional work to schedule.
+      // Unlike 'exit', it allows us to perform async operations, so it works well for letting
+      // network requests complete before the process exits naturally.
+      process.on('beforeExit', async () => {
+        await loggerProvider?.forceFlush()
+        // Also flush traces - they use BatchSpanProcessor which needs explicit flush
+        const tracerProvider = getTracerProvider()
+        await tracerProvider?.forceFlush()
+      })
+
+      process.on('exit', () => {
+        // Final attempt to flush logs and traces
+        void loggerProvider?.forceFlush()
+        void getTracerProvider()?.forceFlush()
+      })
+    }
+  }
+
+  // Initialize tracing if enhanced telemetry is enabled (BETA)
+  if (telemetryEnabled && isEnhancedTelemetryEnabled()) {
+    const traceExporters = await getOtlpTraceExporters()
+    if (traceExporters.length > 0) {
+      // Create span processors for each exporter
+      const spanProcessors = traceExporters.map(
+        exporter =>
+          new BatchSpanProcessor(exporter, {
+            scheduledDelayMillis: parseInt(
+              process.env.OTEL_TRACES_EXPORT_INTERVAL ||
+                DEFAULT_TRACES_EXPORT_INTERVAL_MS.toString(),
+            ),
+          }),
+      )
+
+      const tracerProvider = new BasicTracerProvider({
+        resource,
+        spanProcessors,
+      })
+
+      // Register the tracer provider globally
+      trace.setGlobalTracerProvider(tracerProvider)
+      setTracerProvider(tracerProvider)
+    }
+  }
+
+  // Shutdown metrics and logs on exit (flushes and closes exporters)
+  const shutdownTelemetry = async () => {
+    const timeoutMs = parseInt(
+      process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
+    )
+
+    try {
+      // End any active interaction span before shutdown
+      endInteractionSpan()
+
+      const shutdownPromises = [meterProvider.shutdown()]
+      const loggerProvider = getLoggerProvider()
+      if (loggerProvider) {
+        shutdownPromises.push(loggerProvider.shutdown())
+      }
+      const tracerProvider = getTracerProvider()
+      if (tracerProvider) {
+        shutdownPromises.push(tracerProvider.shutdown())
+      }
+
+      await Promise.race([
+        Promise.all(shutdownPromises),
+        telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
+      ])
+    } catch (error) {
+      if (error instanceof Error && error.message.includes('timeout')) {
+        logForDebugging(
+          `
+OpenTelemetry telemetry flush timed out after ${timeoutMs}ms
+
+To resolve this issue, you can:
+1. Increase the timeout by setting CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS env var (e.g., 5000 for 5 seconds)
+2. Check if your OpenTelemetry backend is experiencing scalability issues
+3. Disable OpenTelemetry by unsetting CLAUDE_CODE_ENABLE_TELEMETRY env var
+
+Current timeout: ${timeoutMs}ms
+`,
+          { level: 'error' },
+        )
+      }
+      throw error
+    }
+  }
+
+  // Always register shutdown (internal metrics are always enabled)
+  registerCleanup(shutdownTelemetry)
+
+  return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
+}
+
+/**
+ * Flush all pending telemetry data immediately.
+ * This should be called before logout or org switching to prevent data leakage.
+ */
+export async function flushTelemetry(): Promise<void> {
+  const meterProvider = getMeterProvider()
+  if (!meterProvider) {
+    return
+  }
+
+  const timeoutMs = parseInt(
+    process.env.CLAUDE_CODE_OTEL_FLUSH_TIMEOUT_MS || '5000',
+  )
+
+  try {
+    const flushPromises = [meterProvider.forceFlush()]
+    const loggerProvider = getLoggerProvider()
+    if (loggerProvider) {
+      flushPromises.push(loggerProvider.forceFlush())
+    }
+    const tracerProvider = getTracerProvider()
+    if (tracerProvider) {
+      flushPromises.push(tracerProvider.forceFlush())
+    }
+
+    await Promise.race([
+      Promise.all(flushPromises),
+      telemetryTimeout(timeoutMs, 'OpenTelemetry flush timeout'),
+    ])
+
+    logForDebugging('Telemetry flushed successfully')
+  } catch (error) {
+    if (error instanceof TelemetryTimeoutError) {
+      logForDebugging(
+        `Telemetry flush timed out after ${timeoutMs}ms. Some metrics may not be exported.`,
+        { level: 'warn' },
+      )
+    } else {
+      logForDebugging(`Telemetry flush failed: ${errorMessage(error)}`, {
+        level: 'error',
+      })
+    }
+    // Don't throw - allow logout to continue even if flush fails
+  }
+}
+
+function parseOtelHeadersEnvVar(): Record<string, string> {
+  const headers: Record<string, string> = {}
+  const envHeaders = process.env.OTEL_EXPORTER_OTLP_HEADERS
+  if (envHeaders) {
+    for (const pair of envHeaders.split(',')) {
+      const [key, ...valueParts] = pair.split('=')
+      if (key && valueParts.length > 0) {
+        headers[key.trim()] = valueParts.join('=').trim()
+      }
+    }
+  }
+  return headers
+}
+
+/**
+ * Get configuration for OTLP exporters including:
+ * - HTTP agent options (proxy, mTLS)
+ * - Dynamic headers via otelHeadersHelper or static headers from env var
+ */
+function getOTLPExporterConfig() {
+  const proxyUrl = getProxyUrl()
+  const mtlsConfig = getMTLSConfig()
+  const settings = getSettings_DEPRECATED()
+
+  // Build base config
+  const config: Record<string, unknown> = {}
+
+  // Parse static headers from env var once (doesn't change at runtime)
+  const staticHeaders = parseOtelHeadersEnvVar()
+
+  // If otelHeadersHelper is configured, use async headers function for dynamic refresh
+  // Otherwise just return static headers if any exist
+  if (settings?.otelHeadersHelper) {
+    config.headers = async (): Promise<Record<string, string>> => {
+      const dynamicHeaders = getOtelHeadersFromHelper()
+      return { ...staticHeaders, ...dynamicHeaders }
+    }
+  } else if (Object.keys(staticHeaders).length > 0) {
+    config.headers = async (): Promise<Record<string, string>> => staticHeaders
+  }
+
+  // Check if we should bypass proxy for OTEL endpoint
+  const otelEndpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
+  if (!proxyUrl || (otelEndpoint && shouldBypassProxy(otelEndpoint))) {
+    // No proxy configured or OTEL endpoint should bypass proxy
+    const caCerts = getCACertificates()
+    if (mtlsConfig || caCerts) {
+      config.httpAgentOptions = {
+        ...mtlsConfig,
+        ...(caCerts && { ca: caCerts }),
+      }
+    }
+    return config
+  }
+
+  // Return an HttpAgentFactory function that creates our proxy agent
+  const caCerts = getCACertificates()
+  const agentFactory = (_protocol: string) => {
+    // Create and return the proxy agent with mTLS and CA cert config
+    const proxyAgent =
+      mtlsConfig || caCerts
+        ? new HttpsProxyAgent(proxyUrl, {
+            ...(mtlsConfig && {
+              cert: mtlsConfig.cert,
+              key: mtlsConfig.key,
+              passphrase: mtlsConfig.passphrase,
+            }),
+            ...(caCerts && { ca: caCerts }),
+          })
+        : new HttpsProxyAgent(proxyUrl)
+
+    return proxyAgent
+  }
+
+  config.httpAgentOptions = agentFactory
+  return config
+}
--- a/src/utils/telemetry/logger.ts
+++ b/src/utils/telemetry/logger.ts
@@ -0,0 +1,26 @@
+import type { DiagLogger } from '@opentelemetry/api'
+import { logForDebugging } from '../debug.js'
+import { logError } from '../log.js'
+export class ClaudeCodeDiagLogger implements DiagLogger {
+  error(message: string, ..._: unknown[]) {
+    logError(new Error(message))
+    logForDebugging(`[3P telemetry] OTEL diag error: ${message}`, {
+      level: 'error',
+    })
+  }
+  warn(message: string, ..._: unknown[]) {
+    logError(new Error(message))
+    logForDebugging(`[3P telemetry] OTEL diag warn: ${message}`, {
+      level: 'warn',
+    })
+  }
+  info(_message: string, ..._args: unknown[]) {
+    return
+  }
+  debug(_message: string, ..._args: unknown[]) {
+    return
+  }
+  verbose(_message: string, ..._args: unknown[]) {
+    return
+  }
+}
--- a/src/utils/telemetry/perfettoTracing.ts
+++ b/src/utils/telemetry/perfettoTracing.ts
--- a/src/utils/telemetry/pluginTelemetry.ts
+++ b/src/utils/telemetry/pluginTelemetry.ts
@@ -0,0 +1,289 @@
+/**
+ * Plugin telemetry helpers — shared field builders for plugin lifecycle events.
+ *
+ * Implements the twin-column privacy pattern: every user-defined-name field
+ * emits both a raw value (routed to PII-tagged _PROTO_* BQ columns) and a
+ * redacted twin (real name iff marketplace ∈ allowlist, else 'third-party').
+ *
+ * plugin_id_hash provides an opaque per-plugin aggregation key with no privacy
+ * dependency — sha256(name@marketplace + FIXED_SALT) truncated to 16 chars.
+ * This answers distinct-count and per-plugin-trend questions that the
+ * redacted column can't, without exposing user-defined names.
+ */
+
+import { createHash } from 'crypto'
+import { sep } from 'path'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+  logEvent,
+} from '../../services/analytics/index.js'
+import type {
+  LoadedPlugin,
+  PluginError,
+  PluginManifest,
+} from '../../types/plugin.js'
+import {
+  isOfficialMarketplaceName,
+  parsePluginIdentifier,
+} from '../plugins/pluginIdentifier.js'
+
+// builtinPlugins.ts:BUILTIN_MARKETPLACE_NAME — inlined to avoid the cycle
+// through commands.js. Marketplace schemas.ts enforces 'builtin' is reserved.
+const BUILTIN_MARKETPLACE_NAME = 'builtin'
+
+// Fixed salt for plugin_id_hash. Same constant across all repos and emission
+// sites. Not per-org, not rotated — per-org salt would defeat cross-org
+// distinct-count, rotation would break trend lines. Customers can compute the
+// same hash on their known plugin names to reverse-match their own telemetry.
+const PLUGIN_ID_HASH_SALT = 'claude-plugin-telemetry-v1'
+
+/**
+ * Opaque per-plugin aggregation key. Input is the name@marketplace string as
+ * it appears in enabledPlugins keys, lowercased on the marketplace suffix for
+ * reproducibility. 16-char truncation keeps BQ GROUP BY cardinality manageable
+ * while making collisions negligible at projected 10k-plugin scale. Name case
+ * is preserved in both branches (enabledPlugins keys are case-sensitive).
+ */
+export function hashPluginId(name: string, marketplace?: string): string {
+  const key = marketplace ? `${name}@${marketplace.toLowerCase()}` : name
+  return createHash('sha256')
+    .update(key + PLUGIN_ID_HASH_SALT)
+    .digest('hex')
+    .slice(0, 16)
+}
+
+/**
+ * 4-value scope enum for plugin origin. Distinct from PluginScope
+ * (managed/user/project/local) which is installation-target — this is
+ * marketplace-origin.
+ *
+ * - official: from an allowlisted Anthropic marketplace
+ * - default-bundle: ships with product (@builtin), auto-enabled
+ * - org: enterprise admin-pushed via managed settings (policySettings)
+ * - user-local: user added marketplace or local plugin
+ */
+export type TelemetryPluginScope =
+  | 'official'
+  | 'org'
+  | 'user-local'
+  | 'default-bundle'
+
+export function getTelemetryPluginScope(
+  name: string,
+  marketplace: string | undefined,
+  managedNames: Set<string> | null,
+): TelemetryPluginScope {
+  if (marketplace === BUILTIN_MARKETPLACE_NAME) return 'default-bundle'
+  if (isOfficialMarketplaceName(marketplace)) return 'official'
+  if (managedNames?.has(name)) return 'org'
+  return 'user-local'
+}
+
+/**
+ * How a plugin arrived in the session. Splits self-selected from org-pushed
+ * — plugin_scope alone doesn't (an official plugin can be user-installed OR
+ * org-pushed; both are scope='official').
+ */
+export type EnabledVia =
+  | 'user-install'
+  | 'org-policy'
+  | 'default-enable'
+  | 'seed-mount'
+
+/** How a skill/command invocation was triggered. */
+export type InvocationTrigger =
+  | 'user-slash'
+  | 'claude-proactive'
+  | 'nested-skill'
+
+/** Where a skill invocation executes. */
+export type SkillExecutionContext = 'fork' | 'inline' | 'remote'
+
+/** How a plugin install was initiated. */
+export type InstallSource =
+  | 'cli-explicit'
+  | 'ui-discover'
+  | 'ui-suggestion'
+  | 'deep-link'
+
+export function getEnabledVia(
+  plugin: LoadedPlugin,
+  managedNames: Set<string> | null,
+  seedDirs: string[],
+): EnabledVia {
+  if (plugin.isBuiltin) return 'default-enable'
+  if (managedNames?.has(plugin.name)) return 'org-policy'
+  // Trailing sep: /opt/plugins must not match /opt/plugins-extra
+  if (
+    seedDirs.some(dir =>
+      plugin.path.startsWith(dir.endsWith(sep) ? dir : dir + sep),
+    )
+  ) {
+    return 'seed-mount'
+  }
+  return 'user-install'
+}
+
+/**
+ * Common plugin telemetry fields keyed off name@marketplace. Returns the
+ * hash, scope enum, and the redacted-twin columns. Callers add the raw
+ * _PROTO_* fields separately (those require the PII-tagged marker type).
+ */
+export function buildPluginTelemetryFields(
+  name: string,
+  marketplace: string | undefined,
+  managedNames: Set<string> | null = null,
+): {
+  plugin_id_hash: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  plugin_scope: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  plugin_name_redacted: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  marketplace_name_redacted: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  is_official_plugin: boolean
+} {
+  const scope = getTelemetryPluginScope(name, marketplace, managedNames)
+  // Both official marketplaces and builtin plugins are Anthropic-controlled
+  // — safe to expose real names in the redacted columns.
+  const isAnthropicControlled =
+    scope === 'official' || scope === 'default-bundle'
+  return {
+    plugin_id_hash: hashPluginId(
+      name,
+      marketplace,
+    ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    plugin_scope:
+      scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    plugin_name_redacted: (isAnthropicControlled
+      ? name
+      : 'third-party') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    marketplace_name_redacted: (isAnthropicControlled && marketplace
+      ? marketplace
+      : 'third-party') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    is_official_plugin: isAnthropicControlled,
+  }
+}
+
+/**
+ * Per-invocation callers (SkillTool, processSlashCommand) pass
+ * managedNames=null — the session-level tengu_plugin_enabled_for_session
+ * event carries the authoritative plugin_scope, and per-invocation rows can
+ * join on plugin_id_hash to recover it. This keeps hot-path call sites free
+ * of the extra settings read.
+ */
+export function buildPluginCommandTelemetryFields(
+  pluginInfo: { pluginManifest: PluginManifest; repository: string },
+  managedNames: Set<string> | null = null,
+): ReturnType<typeof buildPluginTelemetryFields> {
+  const { marketplace } = parsePluginIdentifier(pluginInfo.repository)
+  return buildPluginTelemetryFields(
+    pluginInfo.pluginManifest.name,
+    marketplace,
+    managedNames,
+  )
+}
+
+/**
+ * Emit tengu_plugin_enabled_for_session once per enabled plugin at session
+ * start. Supplements tengu_skill_loaded (which still fires per-skill) — use
+ * this for plugin-level aggregates instead of DISTINCT-on-prefix hacks.
+ * A plugin with 5 skills emits 5 skill_loaded rows but 1 of these.
+ */
+export function logPluginsEnabledForSession(
+  plugins: LoadedPlugin[],
+  managedNames: Set<string> | null,
+  seedDirs: string[],
+): void {
+  for (const plugin of plugins) {
+    const { marketplace } = parsePluginIdentifier(plugin.repository)
+
+    logEvent('tengu_plugin_enabled_for_session', {
+      _PROTO_plugin_name:
+        plugin.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+      ...(marketplace && {
+        _PROTO_marketplace_name:
+          marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+      }),
+      ...buildPluginTelemetryFields(plugin.name, marketplace, managedNames),
+      enabled_via: getEnabledVia(
+        plugin,
+        managedNames,
+        seedDirs,
+      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      skill_path_count:
+        (plugin.skillsPath ? 1 : 0) + (plugin.skillsPaths?.length ?? 0),
+      command_path_count:
+        (plugin.commandsPath ? 1 : 0) + (plugin.commandsPaths?.length ?? 0),
+      has_mcp: plugin.manifest.mcpServers !== undefined,
+      has_hooks: plugin.hooksConfig !== undefined,
+      ...(plugin.manifest.version && {
+        version: plugin.manifest
+          .version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      }),
+    })
+  }
+}
+
+/**
+ * Bounded-cardinality error bucket for CLI plugin operation failures.
+ * Maps free-form error messages to 5 stable categories so dashboard
+ * GROUP BY stays tractable.
+ */
+export type PluginCommandErrorCategory =
+  | 'network'
+  | 'not-found'
+  | 'permission'
+  | 'validation'
+  | 'unknown'
+
+export function classifyPluginCommandError(
+  error: unknown,
+): PluginCommandErrorCategory {
+  const msg = String((error as { message?: unknown })?.message ?? error)
+  if (
+    /ENOTFOUND|ECONNREFUSED|EAI_AGAIN|ETIMEDOUT|ECONNRESET|network|Could not resolve|Connection refused|timed out/i.test(
+      msg,
+    )
+  ) {
+    return 'network'
+  }
+  if (/\b404\b|not found|does not exist|no such plugin/i.test(msg)) {
+    return 'not-found'
+  }
+  if (/\b40[13]\b|EACCES|EPERM|permission denied|unauthorized/i.test(msg)) {
+    return 'permission'
+  }
+  if (/invalid|malformed|schema|validation|parse error/i.test(msg)) {
+    return 'validation'
+  }
+  return 'unknown'
+}
+
+/**
+ * Emit tengu_plugin_load_failed once per error surfaced by session-start
+ * plugin loading. Pairs with tengu_plugin_enabled_for_session so dashboards
+ * can compute a load-success rate. PluginError.type is already a bounded
+ * enum — use it directly as error_category.
+ */
+export function logPluginLoadErrors(
+  errors: PluginError[],
+  managedNames: Set<string> | null,
+): void {
+  for (const err of errors) {
+    const { name, marketplace } = parsePluginIdentifier(err.source)
+    // Not all PluginError variants carry a plugin name (some have pluginId,
+    // some are marketplace-level). Use the 'plugin' property if present,
+    // fall back to the name parsed from err.source.
+    const pluginName = 'plugin' in err && err.plugin ? err.plugin : name
+    logEvent('tengu_plugin_load_failed', {
+      error_category:
+        err.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      _PROTO_plugin_name:
+        pluginName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+      ...(marketplace && {
+        _PROTO_marketplace_name:
+          marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+      }),
+      ...buildPluginTelemetryFields(pluginName, marketplace, managedNames),
+    })
+  }
+}
--- a/src/utils/telemetry/sessionTracing.ts
+++ b/src/utils/telemetry/sessionTracing.ts
@@ -0,0 +1,927 @@
+/**
+ * Session Tracing for Claude Code using OpenTelemetry (BETA)
+ *
+ * This module provides a high-level API for creating and managing spans
+ * to trace Claude Code workflows. Each user interaction creates a root
+ * interaction span, which contains operation spans (LLM requests, tool calls, etc.).
+ *
+ * Requirements:
+ * - Enhanced telemetry is enabled via feature('ENHANCED_TELEMETRY_BETA')
+ * - Configure OTEL_TRACES_EXPORTER (console, otlp, etc.)
+ */
+
+import { feature } from 'bun:bundle'
+import { context as otelContext, type Span, trace } from '@opentelemetry/api'
+import { AsyncLocalStorage } from 'async_hooks'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
+import type { AssistantMessage, UserMessage } from '../../types/message.js'
+import { isEnvDefinedFalsy, isEnvTruthy } from '../envUtils.js'
+import { getTelemetryAttributes } from '../telemetryAttributes.js'
+import {
+  addBetaInteractionAttributes,
+  addBetaLLMRequestAttributes,
+  addBetaLLMResponseAttributes,
+  addBetaToolInputAttributes,
+  addBetaToolResultAttributes,
+  isBetaTracingEnabled,
+  type LLMRequestNewContext,
+  truncateContent,
+} from './betaSessionTracing.js'
+import {
+  endInteractionPerfettoSpan,
+  endLLMRequestPerfettoSpan,
+  endToolPerfettoSpan,
+  endUserInputPerfettoSpan,
+  isPerfettoTracingEnabled,
+  startInteractionPerfettoSpan,
+  startLLMRequestPerfettoSpan,
+  startToolPerfettoSpan,
+  startUserInputPerfettoSpan,
+} from './perfettoTracing.js'
+
+// Re-export for callers
+export type { Span }
+export { isBetaTracingEnabled, type LLMRequestNewContext }
+
+// Message type for API calls (UserMessage or AssistantMessage)
+type APIMessage = UserMessage | AssistantMessage
+
+type SpanType =
+  | 'interaction'
+  | 'llm_request'
+  | 'tool'
+  | 'tool.blocked_on_user'
+  | 'tool.execution'
+  | 'hook'
+
+interface SpanContext {
+  span: Span
+  startTime: number
+  attributes: Record<string, string | number | boolean>
+  ended?: boolean
+  perfettoSpanId?: string
+}
+
+// ALS stores SpanContext directly so it holds a strong reference while a span
+// is active. With that, activeSpans can use WeakRef — when ALS is cleared
+// (enterWith(undefined)) and no other code holds the SpanContext, GC can collect
+// it and the WeakRef goes stale.
+const interactionContext = new AsyncLocalStorage<SpanContext | undefined>()
+const toolContext = new AsyncLocalStorage<SpanContext | undefined>()
+const activeSpans = new Map<string, WeakRef<SpanContext>>()
+// Spans not stored in ALS (LLM request, blocked-on-user, tool execution, hook)
+// need a strong reference to prevent GC from collecting the SpanContext before
+// the corresponding end* function retrieves it.
+const strongSpans = new Map<string, SpanContext>()
+let interactionSequence = 0
+let _cleanupIntervalStarted = false
+
+const SPAN_TTL_MS = 30 * 60 * 1000 // 30 minutes
+
+function getSpanId(span: Span): string {
+  return span.spanContext().spanId || ''
+}
+
+/**
+ * Lazily start a background interval that evicts orphaned spans from activeSpans.
+ *
+ * Normal teardown calls endInteractionSpan / endToolSpan, which delete spans
+ * immediately. This interval is a safety net for spans that were never ended
+ * (e.g. aborted streams, uncaught exceptions mid-query) — without it they
+ * accumulate in activeSpans indefinitely, holding references to Span objects
+ * and the OpenTelemetry context chain.
+ *
+ * Initialized on the first startInteractionSpan call (not at module load) to
+ * avoid triggering the no-top-level-side-effects lint rule and to keep the
+ * interval from running in processes that never start a span.
+ * unref() prevents the timer from keeping the process alive after all other
+ * work is done.
+ */
+function ensureCleanupInterval(): void {
+  if (_cleanupIntervalStarted) return
+  _cleanupIntervalStarted = true
+  const interval = setInterval(() => {
+    const cutoff = Date.now() - SPAN_TTL_MS
+    for (const [spanId, weakRef] of activeSpans) {
+      const ctx = weakRef.deref()
+      if (ctx === undefined) {
+        activeSpans.delete(spanId)
+        strongSpans.delete(spanId)
+      } else if (ctx.startTime < cutoff) {
+        if (!ctx.ended) ctx.span.end() // flush any recorded attributes to the exporter
+        activeSpans.delete(spanId)
+        strongSpans.delete(spanId)
+      }
+    }
+  }, 60_000)
+  if (typeof interval.unref === 'function') {
+    interval.unref() // Node.js / Bun: don't block process exit
+  }
+}
+
+/**
+ * Check if enhanced telemetry is enabled.
+ * Priority: env var override > ant build > GrowthBook gate
+ */
+export function isEnhancedTelemetryEnabled(): boolean {
+  if (feature('ENHANCED_TELEMETRY_BETA')) {
+    const env =
+      process.env.CLAUDE_CODE_ENHANCED_TELEMETRY_BETA ??
+      process.env.ENABLE_ENHANCED_TELEMETRY_BETA
+    if (isEnvTruthy(env)) {
+      return true
+    }
+    if (isEnvDefinedFalsy(env)) {
+      return false
+    }
+    return (
+      process.env.USER_TYPE === 'ant' ||
+      getFeatureValue_CACHED_MAY_BE_STALE('enhanced_telemetry_beta', false)
+    )
+  }
+  return false
+}
+
+/**
+ * Check if any tracing is enabled (either standard enhanced telemetry OR beta tracing)
+ */
+function isAnyTracingEnabled(): boolean {
+  return isEnhancedTelemetryEnabled() || isBetaTracingEnabled()
+}
+
+function getTracer() {
+  return trace.getTracer('com.anthropic.claude_code.tracing', '1.0.0')
+}
+
+function createSpanAttributes(
+  spanType: SpanType,
+  customAttributes: Record<string, string | number | boolean> = {},
+): Record<string, string | number | boolean> {
+  const baseAttributes = getTelemetryAttributes()
+
+  const attributes: Record<string, string | number | boolean> = {
+    ...baseAttributes,
+    'span.type': spanType,
+    ...customAttributes,
+  }
+
+  return attributes
+}
+
+/**
+ * Start an interaction span. This wraps a user request -> Claude response cycle.
+ * This is now a root span that includes all session-level attributes.
+ * Sets the interaction context for all subsequent operations.
+ */
+export function startInteractionSpan(userPrompt: string): Span {
+  ensureCleanupInterval()
+
+  // Start Perfetto span regardless of OTel tracing state
+  const perfettoSpanId = isPerfettoTracingEnabled()
+    ? startInteractionPerfettoSpan(userPrompt)
+    : undefined
+
+  if (!isAnyTracingEnabled()) {
+    // Still track Perfetto span even if OTel is disabled
+    if (perfettoSpanId) {
+      const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy')
+      const spanId = getSpanId(dummySpan)
+      const spanContextObj: SpanContext = {
+        span: dummySpan,
+        startTime: Date.now(),
+        attributes: {},
+        perfettoSpanId,
+      }
+      activeSpans.set(spanId, new WeakRef(spanContextObj))
+      interactionContext.enterWith(spanContextObj)
+      return dummySpan
+    }
+    return trace.getActiveSpan() || getTracer().startSpan('dummy')
+  }
+
+  const tracer = getTracer()
+  const isUserPromptLoggingEnabled = isEnvTruthy(
+    process.env.OTEL_LOG_USER_PROMPTS,
+  )
+  const promptToLog = isUserPromptLoggingEnabled ? userPrompt : '<REDACTED>'
+
+  interactionSequence++
+
+  const attributes = createSpanAttributes('interaction', {
+    user_prompt: promptToLog,
+    user_prompt_length: userPrompt.length,
+    'interaction.sequence': interactionSequence,
+  })
+
+  const span = tracer.startSpan('claude_code.interaction', {
+    attributes,
+  })
+
+  // Add experimental attributes (new_context)
+  addBetaInteractionAttributes(span, userPrompt)
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes,
+    perfettoSpanId,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+
+  interactionContext.enterWith(spanContextObj)
+
+  return span
+}
+
+export function endInteractionSpan(): void {
+  const spanContext = interactionContext.getStore()
+  if (!spanContext) {
+    return
+  }
+
+  if (spanContext.ended) {
+    return
+  }
+
+  // End Perfetto span
+  if (spanContext.perfettoSpanId) {
+    endInteractionPerfettoSpan(spanContext.perfettoSpanId)
+  }
+
+  if (!isAnyTracingEnabled()) {
+    spanContext.ended = true
+    activeSpans.delete(getSpanId(spanContext.span))
+    // Clear the store so async continuations created after this point (timers,
+    // promise callbacks, I/O) do not inherit a reference to the ended span.
+    // enterWith(undefined) is intentional: exit(() => {}) is a no-op because it
+    // only suppresses the store inside the callback and returns immediately.
+    interactionContext.enterWith(undefined)
+    return
+  }
+
+  const duration = Date.now() - spanContext.startTime
+  spanContext.span.setAttributes({
+    'interaction.duration_ms': duration,
+  })
+
+  spanContext.span.end()
+  spanContext.ended = true
+  activeSpans.delete(getSpanId(spanContext.span))
+  interactionContext.enterWith(undefined)
+}
+
+export function startLLMRequestSpan(
+  model: string,
+  newContext?: LLMRequestNewContext,
+  messagesForAPI?: APIMessage[],
+  fastMode?: boolean,
+): Span {
+  // Start Perfetto span regardless of OTel tracing state
+  const perfettoSpanId = isPerfettoTracingEnabled()
+    ? startLLMRequestPerfettoSpan({
+        model,
+        querySource: newContext?.querySource,
+        messageId: undefined, // Will be set in endLLMRequestSpan
+      })
+    : undefined
+
+  if (!isAnyTracingEnabled()) {
+    // Still track Perfetto span even if OTel is disabled
+    if (perfettoSpanId) {
+      const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy')
+      const spanId = getSpanId(dummySpan)
+      const spanContextObj: SpanContext = {
+        span: dummySpan,
+        startTime: Date.now(),
+        attributes: { model },
+        perfettoSpanId,
+      }
+      activeSpans.set(spanId, new WeakRef(spanContextObj))
+      strongSpans.set(spanId, spanContextObj)
+      return dummySpan
+    }
+    return trace.getActiveSpan() || getTracer().startSpan('dummy')
+  }
+
+  const tracer = getTracer()
+  const parentSpanCtx = interactionContext.getStore()
+
+  const attributes = createSpanAttributes('llm_request', {
+    model: model,
+    'llm_request.context': parentSpanCtx ? 'interaction' : 'standalone',
+    speed: fastMode ? 'fast' : 'normal',
+  })
+
+  const ctx = parentSpanCtx
+    ? trace.setSpan(otelContext.active(), parentSpanCtx.span)
+    : otelContext.active()
+  const span = tracer.startSpan('claude_code.llm_request', { attributes }, ctx)
+
+  // Add query_source (agent name) if provided
+  if (newContext?.querySource) {
+    span.setAttribute('query_source', newContext.querySource)
+  }
+
+  // Add experimental attributes (system prompt, new_context)
+  addBetaLLMRequestAttributes(span, newContext, messagesForAPI)
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes,
+    perfettoSpanId,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+  strongSpans.set(spanId, spanContextObj)
+
+  return span
+}
+
+/**
+ * End an LLM request span and attach response metadata.
+ *
+ * @param span - Optional. The exact span returned by startLLMRequestSpan().
+ *   IMPORTANT: When multiple LLM requests run in parallel (e.g., warmup requests,
+ *   topic classifier, file path extractor, main thread), you MUST pass the specific span
+ *   to ensure responses are attached to the correct request. Without it, responses may be
+ *   incorrectly attached to whichever span happens to be "last" in the activeSpans map.
+ *
+ *   If not provided, falls back to finding the most recent llm_request span (legacy behavior).
+ */
+export function endLLMRequestSpan(
+  span?: Span,
+  metadata?: {
+    inputTokens?: number
+    outputTokens?: number
+    cacheReadTokens?: number
+    cacheCreationTokens?: number
+    success?: boolean
+    statusCode?: number
+    error?: string
+    attempt?: number
+    modelResponse?: string
+    /** Text output from the model (non-thinking content) */
+    modelOutput?: string
+    /** Thinking/reasoning output from the model */
+    thinkingOutput?: string
+    /** Whether the output included tool calls (look at tool spans for details) */
+    hasToolCall?: boolean
+    /** Time to first token in milliseconds */
+    ttftMs?: number
+    /** Time spent in pre-request setup before the successful attempt */
+    requestSetupMs?: number
+    /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */
+    attemptStartTimes?: number[]
+  },
+): void {
+  let llmSpanContext: SpanContext | undefined
+
+  if (span) {
+    // Use the provided span directly - this is the correct approach for parallel requests
+    const spanId = getSpanId(span)
+    llmSpanContext = activeSpans.get(spanId)?.deref()
+  } else {
+    // Legacy fallback: find the most recent llm_request span
+    // WARNING: This can cause mismatched responses when multiple requests are in flight
+    llmSpanContext = Array.from(activeSpans.values())
+      .findLast(r => {
+        const ctx = r.deref()
+        return (
+          ctx?.attributes['span.type'] === 'llm_request' ||
+          ctx?.attributes['model']
+        )
+      })
+      ?.deref()
+  }
+
+  if (!llmSpanContext) {
+    // Span was already ended or never tracked
+    return
+  }
+
+  const duration = Date.now() - llmSpanContext.startTime
+
+  // End Perfetto span with full metadata
+  if (llmSpanContext.perfettoSpanId) {
+    endLLMRequestPerfettoSpan(llmSpanContext.perfettoSpanId, {
+      ttftMs: metadata?.ttftMs,
+      ttltMs: duration, // Time to last token is the total duration
+      promptTokens: metadata?.inputTokens,
+      outputTokens: metadata?.outputTokens,
+      cacheReadTokens: metadata?.cacheReadTokens,
+      cacheCreationTokens: metadata?.cacheCreationTokens,
+      success: metadata?.success,
+      error: metadata?.error,
+      requestSetupMs: metadata?.requestSetupMs,
+      attemptStartTimes: metadata?.attemptStartTimes,
+    })
+  }
+
+  if (!isAnyTracingEnabled()) {
+    const spanId = getSpanId(llmSpanContext.span)
+    activeSpans.delete(spanId)
+    strongSpans.delete(spanId)
+    return
+  }
+
+  const endAttributes: Record<string, string | number | boolean> = {
+    duration_ms: duration,
+  }
+
+  if (metadata) {
+    if (metadata.inputTokens !== undefined)
+      endAttributes['input_tokens'] = metadata.inputTokens
+    if (metadata.outputTokens !== undefined)
+      endAttributes['output_tokens'] = metadata.outputTokens
+    if (metadata.cacheReadTokens !== undefined)
+      endAttributes['cache_read_tokens'] = metadata.cacheReadTokens
+    if (metadata.cacheCreationTokens !== undefined)
+      endAttributes['cache_creation_tokens'] = metadata.cacheCreationTokens
+    if (metadata.success !== undefined)
+      endAttributes['success'] = metadata.success
+    if (metadata.statusCode !== undefined)
+      endAttributes['status_code'] = metadata.statusCode
+    if (metadata.error !== undefined) endAttributes['error'] = metadata.error
+    if (metadata.attempt !== undefined)
+      endAttributes['attempt'] = metadata.attempt
+    if (metadata.hasToolCall !== undefined)
+      endAttributes['response.has_tool_call'] = metadata.hasToolCall
+    if (metadata.ttftMs !== undefined)
+      endAttributes['ttft_ms'] = metadata.ttftMs
+
+    // Add experimental response attributes (model_output, thinking_output)
+    addBetaLLMResponseAttributes(endAttributes, metadata)
+  }
+
+  llmSpanContext.span.setAttributes(endAttributes)
+  llmSpanContext.span.end()
+
+  const spanId = getSpanId(llmSpanContext.span)
+  activeSpans.delete(spanId)
+  strongSpans.delete(spanId)
+}
+
+export function startToolSpan(
+  toolName: string,
+  toolAttributes?: Record<string, string | number | boolean>,
+  toolInput?: string,
+): Span {
+  // Start Perfetto span regardless of OTel tracing state
+  const perfettoSpanId = isPerfettoTracingEnabled()
+    ? startToolPerfettoSpan(toolName, toolAttributes)
+    : undefined
+
+  if (!isAnyTracingEnabled()) {
+    // Still track Perfetto span even if OTel is disabled
+    if (perfettoSpanId) {
+      const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy')
+      const spanId = getSpanId(dummySpan)
+      const spanContextObj: SpanContext = {
+        span: dummySpan,
+        startTime: Date.now(),
+        attributes: { 'span.type': 'tool', tool_name: toolName },
+        perfettoSpanId,
+      }
+      activeSpans.set(spanId, new WeakRef(spanContextObj))
+      toolContext.enterWith(spanContextObj)
+      return dummySpan
+    }
+    return trace.getActiveSpan() || getTracer().startSpan('dummy')
+  }
+
+  const tracer = getTracer()
+  const parentSpanCtx = interactionContext.getStore()
+
+  const attributes = createSpanAttributes('tool', {
+    tool_name: toolName,
+    ...toolAttributes,
+  })
+
+  const ctx = parentSpanCtx
+    ? trace.setSpan(otelContext.active(), parentSpanCtx.span)
+    : otelContext.active()
+  const span = tracer.startSpan('claude_code.tool', { attributes }, ctx)
+
+  // Add experimental tool input attributes
+  if (toolInput) {
+    addBetaToolInputAttributes(span, toolName, toolInput)
+  }
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes,
+    perfettoSpanId,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+
+  toolContext.enterWith(spanContextObj)
+
+  return span
+}
+
+export function startToolBlockedOnUserSpan(): Span {
+  // Start Perfetto span regardless of OTel tracing state
+  const perfettoSpanId = isPerfettoTracingEnabled()
+    ? startUserInputPerfettoSpan('tool_permission')
+    : undefined
+
+  if (!isAnyTracingEnabled()) {
+    // Still track Perfetto span even if OTel is disabled
+    if (perfettoSpanId) {
+      const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy')
+      const spanId = getSpanId(dummySpan)
+      const spanContextObj: SpanContext = {
+        span: dummySpan,
+        startTime: Date.now(),
+        attributes: { 'span.type': 'tool.blocked_on_user' },
+        perfettoSpanId,
+      }
+      activeSpans.set(spanId, new WeakRef(spanContextObj))
+      strongSpans.set(spanId, spanContextObj)
+      return dummySpan
+    }
+    return trace.getActiveSpan() || getTracer().startSpan('dummy')
+  }
+
+  const tracer = getTracer()
+  const parentSpanCtx = toolContext.getStore()
+
+  const attributes = createSpanAttributes('tool.blocked_on_user')
+
+  const ctx = parentSpanCtx
+    ? trace.setSpan(otelContext.active(), parentSpanCtx.span)
+    : otelContext.active()
+  const span = tracer.startSpan(
+    'claude_code.tool.blocked_on_user',
+    { attributes },
+    ctx,
+  )
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes,
+    perfettoSpanId,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+  strongSpans.set(spanId, spanContextObj)
+
+  return span
+}
+
+export function endToolBlockedOnUserSpan(
+  decision?: string,
+  source?: string,
+): void {
+  const blockedSpanContext = Array.from(activeSpans.values())
+    .findLast(
+      r => r.deref()?.attributes['span.type'] === 'tool.blocked_on_user',
+    )
+    ?.deref()
+
+  if (!blockedSpanContext) {
+    return
+  }
+
+  // End Perfetto span
+  if (blockedSpanContext.perfettoSpanId) {
+    endUserInputPerfettoSpan(blockedSpanContext.perfettoSpanId, {
+      decision,
+      source,
+    })
+  }
+
+  if (!isAnyTracingEnabled()) {
+    const spanId = getSpanId(blockedSpanContext.span)
+    activeSpans.delete(spanId)
+    strongSpans.delete(spanId)
+    return
+  }
+
+  const duration = Date.now() - blockedSpanContext.startTime
+  const attributes: Record<string, string | number | boolean> = {
+    duration_ms: duration,
+  }
+
+  if (decision) {
+    attributes['decision'] = decision
+  }
+  if (source) {
+    attributes['source'] = source
+  }
+
+  blockedSpanContext.span.setAttributes(attributes)
+  blockedSpanContext.span.end()
+
+  const spanId = getSpanId(blockedSpanContext.span)
+  activeSpans.delete(spanId)
+  strongSpans.delete(spanId)
+}
+
+export function startToolExecutionSpan(): Span {
+  if (!isAnyTracingEnabled()) {
+    return trace.getActiveSpan() || getTracer().startSpan('dummy')
+  }
+
+  const tracer = getTracer()
+  const parentSpanCtx = toolContext.getStore()
+
+  const attributes = createSpanAttributes('tool.execution')
+
+  const ctx = parentSpanCtx
+    ? trace.setSpan(otelContext.active(), parentSpanCtx.span)
+    : otelContext.active()
+  const span = tracer.startSpan(
+    'claude_code.tool.execution',
+    { attributes },
+    ctx,
+  )
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+  strongSpans.set(spanId, spanContextObj)
+
+  return span
+}
+
+export function endToolExecutionSpan(metadata?: {
+  success?: boolean
+  error?: string
+}): void {
+  if (!isAnyTracingEnabled()) {
+    return
+  }
+
+  const executionSpanContext = Array.from(activeSpans.values())
+    .findLast(r => r.deref()?.attributes['span.type'] === 'tool.execution')
+    ?.deref()
+
+  if (!executionSpanContext) {
+    return
+  }
+
+  const duration = Date.now() - executionSpanContext.startTime
+  const attributes: Record<string, string | number | boolean> = {
+    duration_ms: duration,
+  }
+
+  if (metadata) {
+    if (metadata.success !== undefined) attributes['success'] = metadata.success
+    if (metadata.error !== undefined) attributes['error'] = metadata.error
+  }
+
+  executionSpanContext.span.setAttributes(attributes)
+  executionSpanContext.span.end()
+
+  const spanId = getSpanId(executionSpanContext.span)
+  activeSpans.delete(spanId)
+  strongSpans.delete(spanId)
+}
+
+export function endToolSpan(toolResult?: string, resultTokens?: number): void {
+  const toolSpanContext = toolContext.getStore()
+
+  if (!toolSpanContext) {
+    return
+  }
+
+  // End Perfetto span
+  if (toolSpanContext.perfettoSpanId) {
+    endToolPerfettoSpan(toolSpanContext.perfettoSpanId, {
+      success: true,
+      resultTokens,
+    })
+  }
+
+  if (!isAnyTracingEnabled()) {
+    const spanId = getSpanId(toolSpanContext.span)
+    activeSpans.delete(spanId)
+    // Same reasoning as interactionContext above: clear so subsequent async
+    // work doesn't hold a stale reference to the ended tool span.
+    toolContext.enterWith(undefined)
+    return
+  }
+
+  const duration = Date.now() - toolSpanContext.startTime
+  const endAttributes: Record<string, string | number | boolean> = {
+    duration_ms: duration,
+  }
+
+  // Add experimental tool result attributes (new_context)
+  if (toolResult) {
+    const toolName = toolSpanContext.attributes['tool_name'] || 'unknown'
+    addBetaToolResultAttributes(endAttributes, toolName, toolResult)
+  }
+
+  if (resultTokens !== undefined) {
+    endAttributes['result_tokens'] = resultTokens
+  }
+
+  toolSpanContext.span.setAttributes(endAttributes)
+  toolSpanContext.span.end()
+
+  const spanId = getSpanId(toolSpanContext.span)
+  activeSpans.delete(spanId)
+  toolContext.enterWith(undefined)
+}
+
+function isToolContentLoggingEnabled(): boolean {
+  return isEnvTruthy(process.env.OTEL_LOG_TOOL_CONTENT)
+}
+
+/**
+ * Add a span event with tool content/output data.
+ * Only logs if OTEL_LOG_TOOL_CONTENT=1 is set.
+ * Truncates content if it exceeds MAX_CONTENT_SIZE.
+ */
+export function addToolContentEvent(
+  eventName: string,
+  attributes: Record<string, string | number | boolean>,
+): void {
+  if (!isAnyTracingEnabled() || !isToolContentLoggingEnabled()) {
+    return
+  }
+
+  const currentSpanCtx = toolContext.getStore()
+  if (!currentSpanCtx) {
+    return
+  }
+
+  // Truncate string attributes that might be large
+  const processedAttributes: Record<string, string | number | boolean> = {}
+  for (const [key, value] of Object.entries(attributes)) {
+    if (typeof value === 'string') {
+      const { content, truncated } = truncateContent(value)
+      processedAttributes[key] = content
+      if (truncated) {
+        processedAttributes[`${key}_truncated`] = true
+        processedAttributes[`${key}_original_length`] = value.length
+      }
+    } else {
+      processedAttributes[key] = value
+    }
+  }
+
+  currentSpanCtx.span.addEvent(eventName, processedAttributes)
+}
+
+export function getCurrentSpan(): Span | null {
+  if (!isAnyTracingEnabled()) {
+    return null
+  }
+
+  return (
+    toolContext.getStore()?.span ?? interactionContext.getStore()?.span ?? null
+  )
+}
+
+export async function executeInSpan<T>(
+  spanName: string,
+  fn: (span: Span) => Promise<T>,
+  attributes?: Record<string, string | number | boolean>,
+): Promise<T> {
+  if (!isAnyTracingEnabled()) {
+    return fn(trace.getActiveSpan() || getTracer().startSpan('dummy'))
+  }
+
+  const tracer = getTracer()
+  const parentSpanCtx = toolContext.getStore() ?? interactionContext.getStore()
+
+  const finalAttributes = createSpanAttributes('tool', {
+    ...attributes,
+  })
+
+  const ctx = parentSpanCtx
+    ? trace.setSpan(otelContext.active(), parentSpanCtx.span)
+    : otelContext.active()
+  const span = tracer.startSpan(spanName, { attributes: finalAttributes }, ctx)
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes: finalAttributes,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+  strongSpans.set(spanId, spanContextObj)
+
+  try {
+    const result = await fn(span)
+    span.end()
+    activeSpans.delete(spanId)
+    strongSpans.delete(spanId)
+    return result
+  } catch (error) {
+    if (error instanceof Error) {
+      span.recordException(error)
+    }
+    span.end()
+    activeSpans.delete(spanId)
+    strongSpans.delete(spanId)
+    throw error
+  }
+}
+
+/**
+ * Start a hook execution span.
+ * Only creates a span when beta tracing is enabled.
+ * @param hookEvent The hook event type (e.g., 'PreToolUse', 'PostToolUse')
+ * @param hookName The full hook name (e.g., 'PreToolUse:Write')
+ * @param numHooks The number of hooks being executed
+ * @param hookDefinitions JSON string of hook definitions for tracing
+ * @returns The span (or a dummy span if tracing is disabled)
+ */
+export function startHookSpan(
+  hookEvent: string,
+  hookName: string,
+  numHooks: number,
+  hookDefinitions: string,
+): Span {
+  if (!isBetaTracingEnabled()) {
+    return trace.getActiveSpan() || getTracer().startSpan('dummy')
+  }
+
+  const tracer = getTracer()
+  const parentSpanCtx = toolContext.getStore() ?? interactionContext.getStore()
+
+  const attributes = createSpanAttributes('hook', {
+    hook_event: hookEvent,
+    hook_name: hookName,
+    num_hooks: numHooks,
+    hook_definitions: hookDefinitions,
+  })
+
+  const ctx = parentSpanCtx
+    ? trace.setSpan(otelContext.active(), parentSpanCtx.span)
+    : otelContext.active()
+  const span = tracer.startSpan('claude_code.hook', { attributes }, ctx)
+
+  const spanId = getSpanId(span)
+  const spanContextObj: SpanContext = {
+    span,
+    startTime: Date.now(),
+    attributes,
+  }
+  activeSpans.set(spanId, new WeakRef(spanContextObj))
+  strongSpans.set(spanId, spanContextObj)
+
+  return span
+}
+
+/**
+ * End a hook execution span with outcome metadata.
+ * Only does work when beta tracing is enabled.
+ * @param span The span to end (returned from startHookSpan)
+ * @param metadata The outcome metadata for the hook execution
+ */
+export function endHookSpan(
+  span: Span,
+  metadata?: {
+    numSuccess?: number
+    numBlocking?: number
+    numNonBlockingError?: number
+    numCancelled?: number
+  },
+): void {
+  if (!isBetaTracingEnabled()) {
+    return
+  }
+
+  const spanId = getSpanId(span)
+  const spanContext = activeSpans.get(spanId)?.deref()
+
+  if (!spanContext) {
+    return
+  }
+
+  const duration = Date.now() - spanContext.startTime
+  const endAttributes: Record<string, string | number | boolean> = {
+    duration_ms: duration,
+  }
+
+  if (metadata) {
+    if (metadata.numSuccess !== undefined)
+      endAttributes['num_success'] = metadata.numSuccess
+    if (metadata.numBlocking !== undefined)
+      endAttributes['num_blocking'] = metadata.numBlocking
+    if (metadata.numNonBlockingError !== undefined)
+      endAttributes['num_non_blocking_error'] = metadata.numNonBlockingError
+    if (metadata.numCancelled !== undefined)
+      endAttributes['num_cancelled'] = metadata.numCancelled
+  }
+
+  spanContext.span.setAttributes(endAttributes)
+  spanContext.span.end()
+  activeSpans.delete(spanId)
+  strongSpans.delete(spanId)
+}
--- a/src/utils/telemetry/skillLoadedEvent.ts
+++ b/src/utils/telemetry/skillLoadedEvent.ts
@@ -0,0 +1,39 @@
+import { getSkillToolCommands } from '../../commands.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+  logEvent,
+} from '../../services/analytics/index.js'
+import { getCharBudget } from '../../tools/SkillTool/prompt.js'
+
+/**
+ * Logs a tengu_skill_loaded event for each skill available at session startup.
+ * This enables analytics on which skills are available across sessions.
+ */
+export async function logSkillsLoaded(
+  cwd: string,
+  contextWindowTokens: number,
+): Promise<void> {
+  const skills = await getSkillToolCommands(cwd)
+  const skillBudget = getCharBudget(contextWindowTokens)
+
+  for (const skill of skills) {
+    if (skill.type !== 'prompt') continue
+
+    logEvent('tengu_skill_loaded', {
+      // _PROTO_skill_name routes to the privileged skill_name BQ column.
+      // Unredacted names don't go in additional_metadata.
+      _PROTO_skill_name:
+        skill.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+      skill_source:
+        skill.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      skill_loaded_from:
+        skill.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      skill_budget: skillBudget,
+      ...(skill.kind && {
+        skill_kind:
+          skill.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      }),
+    })
+  }
+}