From a95f0a540a5648ce42c111bb3beb1a7b757317b4 Mon Sep 17 00:00:00 2001 From: YoVinchen Date: Sat, 4 Apr 2026 01:12:54 +0800 Subject: [PATCH] Remove dead telemetry stubs --- .../firstPartyEventLoggingExporter.ts | 806 --------------- src/utils/telemetry/betaSessionTracing.ts | 473 +-------- src/utils/telemetry/bigqueryExporter.ts | 252 ----- src/utils/telemetry/instrumentation.ts | 9 - src/utils/telemetry/logger.ts | 26 - src/utils/telemetry/sessionTracing.ts | 961 ++---------------- src/utils/telemetryAttributes.ts | 71 -- 7 files changed, 137 insertions(+), 2461 deletions(-) delete mode 100644 src/services/analytics/firstPartyEventLoggingExporter.ts delete mode 100644 src/utils/telemetry/bigqueryExporter.ts delete mode 100644 src/utils/telemetry/logger.ts delete mode 100644 src/utils/telemetryAttributes.ts diff --git a/src/services/analytics/firstPartyEventLoggingExporter.ts b/src/services/analytics/firstPartyEventLoggingExporter.ts deleted file mode 100644 index aefb22c..0000000 --- a/src/services/analytics/firstPartyEventLoggingExporter.ts +++ /dev/null @@ -1,806 +0,0 @@ -import type { HrTime } from '@opentelemetry/api' -import { type ExportResult, ExportResultCode } from '@opentelemetry/core' -import type { - LogRecordExporter, - ReadableLogRecord, -} from '@opentelemetry/sdk-logs' -import axios from 'axios' -import { randomUUID } from 'crypto' -import { appendFile, mkdir, readdir, unlink, writeFile } from 'fs/promises' -import * as path from 'path' -import type { CoreUserData } from 'src/utils/user.js' -import { - getIsNonInteractiveSession, - getSessionId, -} from '../../bootstrap/state.js' -import { ClaudeCodeInternalEvent } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js' -import { GrowthbookExperimentEvent } from '../../types/generated/events_mono/growthbook/v1/growthbook_experiment_event.js' -import { - getClaudeAIOAuthTokens, - hasProfileScope, - isClaudeAISubscriber, -} from '../../utils/auth.js' -import { checkHasTrustDialogAccepted } from '../../utils/config.js' -import { logForDebugging } from '../../utils/debug.js' -import { getClaudeConfigHomeDir } from '../../utils/envUtils.js' -import { errorMessage, isFsInaccessible, toError } from '../../utils/errors.js' -import { getAuthHeaders } from '../../utils/http.js' -import { readJSONLFile } from '../../utils/json.js' -import { logError } from '../../utils/log.js' -import { sleep } from '../../utils/sleep.js' -import { jsonStringify } from '../../utils/slowOperations.js' -import { getClaudeCodeUserAgent } from '../../utils/userAgent.js' -import { isOAuthTokenExpired } from '../oauth/client.js' -import { stripProtoFields } from './index.js' -import { type EventMetadata, to1PEventFormat } from './metadata.js' - -// Unique ID for this process run - used to isolate failed event files between runs -const BATCH_UUID = randomUUID() - -// File prefix for failed event storage -const FILE_PREFIX = '1p_failed_events.' - -// Storage directory for failed events - evaluated at runtime to respect CLAUDE_CONFIG_DIR in tests -function getStorageDir(): string { - return path.join(getClaudeConfigHomeDir(), 'telemetry') -} - -// API envelope - event_data is the JSON output from proto toJSON() -type FirstPartyEventLoggingEvent = { - event_type: 'ClaudeCodeInternalEvent' | 'GrowthbookExperimentEvent' - event_data: unknown -} - -type FirstPartyEventLoggingPayload = { - events: FirstPartyEventLoggingEvent[] -} - -/** - * Exporter for 1st-party event logging to /api/event_logging/batch. - * - * Export cycles are controlled by OpenTelemetry's BatchLogRecordProcessor, which - * triggers export() when either: - * - Time interval elapses (default: 5 seconds via scheduledDelayMillis) - * - Batch size is reached (default: 200 events via maxExportBatchSize) - * - * This exporter adds resilience on top: - * - Append-only log for failed events (concurrency-safe) - * - Quadratic backoff retry for failed events, dropped after maxAttempts - * - Immediate retry of queued events when any export succeeds (endpoint is healthy) - * - Chunking large event sets into smaller batches - * - Auth fallback: retries without auth on 401 errors - */ -export class FirstPartyEventLoggingExporter implements LogRecordExporter { - private readonly endpoint: string - private readonly timeout: number - private readonly maxBatchSize: number - private readonly skipAuth: boolean - private readonly batchDelayMs: number - private readonly baseBackoffDelayMs: number - private readonly maxBackoffDelayMs: number - private readonly maxAttempts: number - private readonly isKilled: () => boolean - private pendingExports: Promise[] = [] - private isShutdown = false - private readonly schedule: ( - fn: () => Promise, - delayMs: number, - ) => () => void - private cancelBackoff: (() => void) | null = null - private attempts = 0 - private isRetrying = false - private lastExportErrorContext: string | undefined - - constructor( - options: { - timeout?: number - maxBatchSize?: number - skipAuth?: boolean - batchDelayMs?: number - baseBackoffDelayMs?: number - maxBackoffDelayMs?: number - maxAttempts?: number - path?: string - baseUrl?: string - // Injected killswitch probe. Checked per-POST so that disabling the - // firstParty sink also stops backoff retries (not just new emits). - // Passed in rather than imported to avoid a cycle with firstPartyEventLogger.ts. - isKilled?: () => boolean - schedule?: (fn: () => Promise, delayMs: number) => () => void - } = {}, - ) { - // Default: prod, except when ANTHROPIC_BASE_URL is explicitly staging. - // Overridable via tengu_1p_event_batch_config.baseUrl. - const baseUrl = - options.baseUrl || - (process.env.ANTHROPIC_BASE_URL === 'https://api-staging.anthropic.com' - ? 'https://api-staging.anthropic.com' - : 'https://api.anthropic.com') - - this.endpoint = `${baseUrl}${options.path || '/api/event_logging/batch'}` - - this.timeout = options.timeout || 10000 - this.maxBatchSize = options.maxBatchSize || 200 - this.skipAuth = options.skipAuth ?? false - this.batchDelayMs = options.batchDelayMs || 100 - this.baseBackoffDelayMs = options.baseBackoffDelayMs || 500 - this.maxBackoffDelayMs = options.maxBackoffDelayMs || 30000 - this.maxAttempts = options.maxAttempts ?? 8 - this.isKilled = options.isKilled ?? (() => false) - this.schedule = - options.schedule ?? - ((fn, ms) => { - const t = setTimeout(fn, ms) - return () => clearTimeout(t) - }) - - // Retry any failed events from previous runs of this session (in background) - void this.retryPreviousBatches() - } - - // Expose for testing - async getQueuedEventCount(): Promise { - return (await this.loadEventsFromCurrentBatch()).length - } - - // --- Storage helpers --- - - private getCurrentBatchFilePath(): string { - return path.join( - getStorageDir(), - `${FILE_PREFIX}${getSessionId()}.${BATCH_UUID}.json`, - ) - } - - private async loadEventsFromFile( - filePath: string, - ): Promise { - try { - return await readJSONLFile(filePath) - } catch { - return [] - } - } - - private async loadEventsFromCurrentBatch(): Promise< - FirstPartyEventLoggingEvent[] - > { - return this.loadEventsFromFile(this.getCurrentBatchFilePath()) - } - - private async saveEventsToFile( - filePath: string, - events: FirstPartyEventLoggingEvent[], - ): Promise { - try { - if (events.length === 0) { - try { - await unlink(filePath) - } catch { - // File doesn't exist, nothing to delete - } - } else { - // Ensure storage directory exists - await mkdir(getStorageDir(), { recursive: true }) - // Write as JSON lines (one event per line) - const content = events.map(e => jsonStringify(e)).join('\n') + '\n' - await writeFile(filePath, content, 'utf8') - } - } catch (error) { - logError(error) - } - } - - private async appendEventsToFile( - filePath: string, - events: FirstPartyEventLoggingEvent[], - ): Promise { - if (events.length === 0) return - try { - // Ensure storage directory exists - await mkdir(getStorageDir(), { recursive: true }) - // Append as JSON lines (one event per line) - atomic on most filesystems - const content = events.map(e => jsonStringify(e)).join('\n') + '\n' - await appendFile(filePath, content, 'utf8') - } catch (error) { - logError(error) - } - } - - private async deleteFile(filePath: string): Promise { - try { - await unlink(filePath) - } catch { - // File doesn't exist or can't be deleted, ignore - } - } - - // --- Previous batch retry (startup) --- - - private async retryPreviousBatches(): Promise { - try { - const prefix = `${FILE_PREFIX}${getSessionId()}.` - let files: string[] - try { - files = (await readdir(getStorageDir())) - .filter((f: string) => f.startsWith(prefix) && f.endsWith('.json')) - .filter((f: string) => !f.includes(BATCH_UUID)) // Exclude current batch - } catch (e) { - if (isFsInaccessible(e)) return - throw e - } - - for (const file of files) { - const filePath = path.join(getStorageDir(), file) - void this.retryFileInBackground(filePath) - } - } catch (error) { - logError(error) - } - } - - private async retryFileInBackground(filePath: string): Promise { - if (this.attempts >= this.maxAttempts) { - await this.deleteFile(filePath) - return - } - - const events = await this.loadEventsFromFile(filePath) - if (events.length === 0) { - await this.deleteFile(filePath) - return - } - - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: retrying ${events.length} events from previous batch`, - ) - } - - const failedEvents = await this.sendEventsInBatches(events) - if (failedEvents.length === 0) { - await this.deleteFile(filePath) - if (process.env.USER_TYPE === 'ant') { - logForDebugging('1P event logging: previous batch retry succeeded') - } - } else { - // Save only the failed events back (not all original events) - await this.saveEventsToFile(filePath, failedEvents) - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: previous batch retry failed, ${failedEvents.length} events remain`, - ) - } - } - } - - async export( - logs: ReadableLogRecord[], - resultCallback: (result: ExportResult) => void, - ): Promise { - if (this.isShutdown) { - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - '1P event logging export failed: Exporter has been shutdown', - ) - } - resultCallback({ - code: ExportResultCode.FAILED, - error: new Error('Exporter has been shutdown'), - }) - return - } - - const exportPromise = this.doExport(logs, resultCallback) - this.pendingExports.push(exportPromise) - - // Clean up completed exports - void exportPromise.finally(() => { - const index = this.pendingExports.indexOf(exportPromise) - if (index > -1) { - void this.pendingExports.splice(index, 1) - } - }) - } - - private async doExport( - logs: ReadableLogRecord[], - resultCallback: (result: ExportResult) => void, - ): Promise { - try { - // Filter for event logs only (by scope name) - const eventLogs = logs.filter( - log => - log.instrumentationScope?.name === 'com.anthropic.claude_code.events', - ) - - if (eventLogs.length === 0) { - resultCallback({ code: ExportResultCode.SUCCESS }) - return - } - - // Transform new logs (failed events are retried independently via backoff) - const events = this.transformLogsToEvents(eventLogs).events - - if (events.length === 0) { - resultCallback({ code: ExportResultCode.SUCCESS }) - return - } - - if (this.attempts >= this.maxAttempts) { - resultCallback({ - code: ExportResultCode.FAILED, - error: new Error( - `Dropped ${events.length} events: max attempts (${this.maxAttempts}) reached`, - ), - }) - return - } - - // Send events - const failedEvents = await this.sendEventsInBatches(events) - this.attempts++ - - if (failedEvents.length > 0) { - await this.queueFailedEvents(failedEvents) - this.scheduleBackoffRetry() - const context = this.lastExportErrorContext - ? ` (${this.lastExportErrorContext})` - : '' - resultCallback({ - code: ExportResultCode.FAILED, - error: new Error( - `Failed to export ${failedEvents.length} events${context}`, - ), - }) - return - } - - // Success - reset backoff and immediately retry any queued events - this.resetBackoff() - if ((await this.getQueuedEventCount()) > 0 && !this.isRetrying) { - void this.retryFailedEvents() - } - resultCallback({ code: ExportResultCode.SUCCESS }) - } catch (error) { - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging export failed: ${errorMessage(error)}`, - ) - } - logError(error) - resultCallback({ - code: ExportResultCode.FAILED, - error: toError(error), - }) - } - } - - private async sendEventsInBatches( - events: FirstPartyEventLoggingEvent[], - ): Promise { - // Chunk events into batches - const batches: FirstPartyEventLoggingEvent[][] = [] - for (let i = 0; i < events.length; i += this.maxBatchSize) { - batches.push(events.slice(i, i + this.maxBatchSize)) - } - - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: exporting ${events.length} events in ${batches.length} batch(es)`, - ) - } - - // Send each batch with delay between them. On first failure, assume the - // endpoint is down and short-circuit: queue the failed batch plus all - // remaining unsent batches without POSTing them. The backoff retry will - // probe again with a single batch next tick. - const failedBatchEvents: FirstPartyEventLoggingEvent[] = [] - let lastErrorContext: string | undefined - for (let i = 0; i < batches.length; i++) { - const batch = batches[i]! - try { - await this.sendBatchWithRetry({ events: batch }) - } catch (error) { - lastErrorContext = getAxiosErrorContext(error) - for (let j = i; j < batches.length; j++) { - failedBatchEvents.push(...batches[j]!) - } - if (process.env.USER_TYPE === 'ant') { - const skipped = batches.length - 1 - i - logForDebugging( - `1P event logging: batch ${i + 1}/${batches.length} failed (${lastErrorContext}); short-circuiting ${skipped} remaining batch(es)`, - ) - } - break - } - - if (i < batches.length - 1 && this.batchDelayMs > 0) { - await sleep(this.batchDelayMs) - } - } - - if (failedBatchEvents.length > 0 && lastErrorContext) { - this.lastExportErrorContext = lastErrorContext - } - - return failedBatchEvents - } - - private async queueFailedEvents( - events: FirstPartyEventLoggingEvent[], - ): Promise { - const filePath = this.getCurrentBatchFilePath() - - // Append-only: just add new events to file (atomic on most filesystems) - await this.appendEventsToFile(filePath, events) - - const context = this.lastExportErrorContext - ? ` (${this.lastExportErrorContext})` - : '' - const message = `1P event logging: ${events.length} events failed to export${context}` - logError(new Error(message)) - } - - private scheduleBackoffRetry(): void { - // Don't schedule if already retrying or shutdown - if (this.cancelBackoff || this.isRetrying || this.isShutdown) { - return - } - - // Quadratic backoff (matching Statsig SDK): base * attempts² - const delay = Math.min( - this.baseBackoffDelayMs * this.attempts * this.attempts, - this.maxBackoffDelayMs, - ) - - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: scheduling backoff retry in ${delay}ms (attempt ${this.attempts})`, - ) - } - - this.cancelBackoff = this.schedule(async () => { - this.cancelBackoff = null - await this.retryFailedEvents() - }, delay) - } - - private async retryFailedEvents(): Promise { - const filePath = this.getCurrentBatchFilePath() - - // Keep retrying while there are events and endpoint is healthy - while (!this.isShutdown) { - const events = await this.loadEventsFromFile(filePath) - if (events.length === 0) break - - if (this.attempts >= this.maxAttempts) { - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: max attempts (${this.maxAttempts}) reached, dropping ${events.length} events`, - ) - } - await this.deleteFile(filePath) - this.resetBackoff() - return - } - - this.isRetrying = true - - // Clear file before retry (we have events in memory now) - await this.deleteFile(filePath) - - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: retrying ${events.length} failed events (attempt ${this.attempts + 1})`, - ) - } - - const failedEvents = await this.sendEventsInBatches(events) - this.attempts++ - - this.isRetrying = false - - if (failedEvents.length > 0) { - // Write failures back to disk - await this.saveEventsToFile(filePath, failedEvents) - this.scheduleBackoffRetry() - return // Failed - wait for backoff - } - - // Success - reset backoff and continue loop to drain any newly queued events - this.resetBackoff() - if (process.env.USER_TYPE === 'ant') { - logForDebugging('1P event logging: backoff retry succeeded') - } - } - } - - private resetBackoff(): void { - this.attempts = 0 - if (this.cancelBackoff) { - this.cancelBackoff() - this.cancelBackoff = null - } - } - - private async sendBatchWithRetry( - payload: FirstPartyEventLoggingPayload, - ): Promise { - if (this.isKilled()) { - // Throw so the caller short-circuits remaining batches and queues - // everything to disk. Zero network traffic while killed; the backoff - // timer keeps ticking and will resume POSTs as soon as the GrowthBook - // cache picks up the cleared flag. - throw new Error('firstParty sink killswitch active') - } - - const baseHeaders: Record = { - 'Content-Type': 'application/json', - 'User-Agent': getClaudeCodeUserAgent(), - 'x-service-name': 'claude-code', - } - - // Skip auth if trust hasn't been established yet - // This prevents executing apiKeyHelper commands before the trust dialog - // Non-interactive sessions implicitly have workspace trust - const hasTrust = - checkHasTrustDialogAccepted() || getIsNonInteractiveSession() - if (process.env.USER_TYPE === 'ant' && !hasTrust) { - logForDebugging('1P event logging: Trust not accepted') - } - - // Skip auth when the OAuth token is expired or lacks user:profile - // scope (service key sessions). Falls through to unauthenticated send. - let shouldSkipAuth = this.skipAuth || !hasTrust - if (!shouldSkipAuth && isClaudeAISubscriber()) { - const tokens = getClaudeAIOAuthTokens() - if (!hasProfileScope()) { - shouldSkipAuth = true - } else if (tokens && isOAuthTokenExpired(tokens.expiresAt)) { - shouldSkipAuth = true - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - '1P event logging: OAuth token expired, skipping auth to avoid 401', - ) - } - } - } - - // Try with auth headers first (unless trust not established or token is known to be expired) - const authResult = shouldSkipAuth - ? { headers: {}, error: 'trust not established or Oauth token expired' } - : getAuthHeaders() - const useAuth = !authResult.error - - if (!useAuth && process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: auth not available, sending without auth`, - ) - } - - const headers = useAuth - ? { ...baseHeaders, ...authResult.headers } - : baseHeaders - - try { - const response = await axios.post(this.endpoint, payload, { - timeout: this.timeout, - headers, - }) - this.logSuccess(payload.events.length, useAuth, response.data) - return - } catch (error) { - // Handle 401 by retrying without auth - if ( - useAuth && - axios.isAxiosError(error) && - error.response?.status === 401 - ) { - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - '1P event logging: 401 auth error, retrying without auth', - ) - } - const response = await axios.post(this.endpoint, payload, { - timeout: this.timeout, - headers: baseHeaders, - }) - this.logSuccess(payload.events.length, false, response.data) - return - } - - throw error - } - } - - private logSuccess( - eventCount: number, - withAuth: boolean, - responseData: unknown, - ): void { - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: ${eventCount} events exported successfully${withAuth ? ' (with auth)' : ' (without auth)'}`, - ) - logForDebugging(`API Response: ${jsonStringify(responseData, null, 2)}`) - } - } - - private hrTimeToDate(hrTime: HrTime): Date { - const [seconds, nanoseconds] = hrTime - return new Date(seconds * 1000 + nanoseconds / 1000000) - } - - private transformLogsToEvents( - logs: ReadableLogRecord[], - ): FirstPartyEventLoggingPayload { - const events: FirstPartyEventLoggingEvent[] = [] - - for (const log of logs) { - const attributes = log.attributes || {} - - // Check if this is a GrowthBook experiment event - if (attributes.event_type === 'GrowthbookExperimentEvent') { - const timestamp = this.hrTimeToDate(log.hrTime) - const account_uuid = attributes.account_uuid as string | undefined - const organization_uuid = attributes.organization_uuid as - | string - | undefined - events.push({ - event_type: 'GrowthbookExperimentEvent', - event_data: GrowthbookExperimentEvent.toJSON({ - event_id: attributes.event_id as string, - timestamp, - experiment_id: attributes.experiment_id as string, - variation_id: attributes.variation_id as number, - environment: attributes.environment as string, - user_attributes: attributes.user_attributes as string, - experiment_metadata: attributes.experiment_metadata as string, - device_id: attributes.device_id as string, - session_id: attributes.session_id as string, - auth: - account_uuid || organization_uuid - ? { account_uuid, organization_uuid } - : undefined, - }), - }) - continue - } - - // Extract event name - const eventName = - (attributes.event_name as string) || (log.body as string) || 'unknown' - - // Extract metadata objects directly (no JSON parsing needed) - const coreMetadata = attributes.core_metadata as EventMetadata | undefined - const userMetadata = attributes.user_metadata as CoreUserData - const eventMetadata = (attributes.event_metadata || {}) as Record< - string, - unknown - > - - if (!coreMetadata) { - // Emit partial event if core metadata is missing - if (process.env.USER_TYPE === 'ant') { - logForDebugging( - `1P event logging: core_metadata missing for event ${eventName}`, - ) - } - events.push({ - event_type: 'ClaudeCodeInternalEvent', - event_data: ClaudeCodeInternalEvent.toJSON({ - event_id: attributes.event_id as string | undefined, - event_name: eventName, - client_timestamp: this.hrTimeToDate(log.hrTime), - session_id: getSessionId(), - additional_metadata: Buffer.from( - jsonStringify({ - transform_error: 'core_metadata attribute is missing', - }), - ).toString('base64'), - }), - }) - continue - } - - // Transform to 1P format - const formatted = to1PEventFormat( - coreMetadata, - userMetadata, - eventMetadata, - ) - - // _PROTO_* keys are PII-tagged values meant only for privileged BQ - // columns. Hoist known keys to proto fields, then defensively strip any - // remaining _PROTO_* so an unrecognized future key can't silently land - // in the general-access additional_metadata blob. sink.ts applies the - // same strip before Datadog; this closes the 1P side. - const { - _PROTO_skill_name, - _PROTO_plugin_name, - _PROTO_marketplace_name, - ...rest - } = formatted.additional - const additionalMetadata = stripProtoFields(rest) - - events.push({ - event_type: 'ClaudeCodeInternalEvent', - event_data: ClaudeCodeInternalEvent.toJSON({ - event_id: attributes.event_id as string | undefined, - event_name: eventName, - client_timestamp: this.hrTimeToDate(log.hrTime), - device_id: attributes.user_id as string | undefined, - email: userMetadata?.email, - auth: formatted.auth, - ...formatted.core, - env: formatted.env, - process: formatted.process, - skill_name: - typeof _PROTO_skill_name === 'string' - ? _PROTO_skill_name - : undefined, - plugin_name: - typeof _PROTO_plugin_name === 'string' - ? _PROTO_plugin_name - : undefined, - marketplace_name: - typeof _PROTO_marketplace_name === 'string' - ? _PROTO_marketplace_name - : undefined, - additional_metadata: - Object.keys(additionalMetadata).length > 0 - ? Buffer.from(jsonStringify(additionalMetadata)).toString( - 'base64', - ) - : undefined, - }), - }) - } - - return { events } - } - - async shutdown(): Promise { - this.isShutdown = true - this.resetBackoff() - await this.forceFlush() - if (process.env.USER_TYPE === 'ant') { - logForDebugging('1P event logging exporter shutdown complete') - } - } - - async forceFlush(): Promise { - await Promise.all(this.pendingExports) - if (process.env.USER_TYPE === 'ant') { - logForDebugging('1P event logging exporter flush complete') - } - } -} - -function getAxiosErrorContext(error: unknown): string { - if (!axios.isAxiosError(error)) { - return errorMessage(error) - } - - const parts: string[] = [] - - const requestId = error.response?.headers?.['request-id'] - if (requestId) { - parts.push(`request-id=${requestId}`) - } - - if (error.response?.status) { - parts.push(`status=${error.response.status}`) - } - - if (error.code) { - parts.push(`code=${error.code}`) - } - - if (error.message) { - parts.push(error.message) - } - - return parts.join(', ') -} diff --git a/src/utils/telemetry/betaSessionTracing.ts b/src/utils/telemetry/betaSessionTracing.ts index 6699721..8d22e8e 100644 --- a/src/utils/telemetry/betaSessionTracing.ts +++ b/src/utils/telemetry/betaSessionTracing.ts @@ -1,105 +1,33 @@ /** - * Beta Session Tracing for Claude Code + * Detailed beta tracing egress is disabled in this build. * - * This module contains beta tracing features enabled when - * ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set. - * - * For external users, tracing is enabled in SDK/headless mode, or in - * interactive mode when the org is allowlisted via the - * tengu_trace_lantern GrowthBook gate. - * For ant users, tracing is enabled in all modes. - * - * Visibility Rules: - * | Content | External | Ant | - * |------------------|----------|------| - * | System prompts | ✅ | ✅ | - * | Model output | ✅ | ✅ | - * | Thinking output | ❌ | ✅ | - * | Tools | ✅ | ✅ | - * | new_context | ✅ | ✅ | - * - * Features: - * - Per-agent message tracking with hash-based deduplication - * - System prompt logging (once per unique hash) - * - Hook execution spans - * - Detailed new_context attributes for LLM requests + * The exported helpers remain for compile-time compatibility, but do not + * retain tracing state or emit tracing attributes. */ -import type { Span } from '@opentelemetry/api' -import { createHash } from 'crypto' -import { getIsNonInteractiveSession } from '../../bootstrap/state.js' -import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js' -import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js' -import type { AssistantMessage, UserMessage } from '../../types/message.js' -import { isEnvTruthy } from '../envUtils.js' -import { jsonParse, jsonStringify } from '../slowOperations.js' -import { logOTelEvent } from './events.js' +type AttributeValue = string | number | boolean -// Message type for API calls (UserMessage or AssistantMessage) -type APIMessage = UserMessage | AssistantMessage +export interface SpanAttributeWriter { + setAttribute?(_key: string, _value: AttributeValue): void + setAttributes?(_attributes: Record): void +} -/** - * Track hashes we've already logged this session (system prompts, tools, etc). - * - * WHY: System prompts and tool schemas are large and rarely change within a session. - * Sending full content on every request would be wasteful. Instead, we hash and - * only log the full content once per unique hash. - */ -const seenHashes = new Set() +export interface LLMRequestNewContext { + systemPrompt?: string + querySource?: string + tools?: string +} -/** - * Track the last reported message hash per querySource (agent) for incremental context. - * - * WHY: When debugging traces, we want to see what NEW information was added each turn, - * not the entire conversation history (which can be huge). By tracking the last message - * we reported per agent, we can compute and send only the delta (new messages since - * the last request). This is tracked per-agent (querySource) because different agents - * (main thread, subagents, warmup requests) have independent conversation contexts. - */ -const lastReportedMessageHash = new Map() +const MAX_CONTENT_SIZE = 60 * 1024 -/** - * Clear tracking state after compaction. - * Old hashes are irrelevant once messages have been replaced. - */ export function clearBetaTracingState(): void { - seenHashes.clear() - lastReportedMessageHash.clear() + return } -const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe) - -/** - * Check if beta detailed tracing is enabled. - * - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT - * - For external users, enabled in SDK/headless mode OR when org is - * allowlisted via the tengu_trace_lantern GrowthBook gate - */ export function isBetaTracingEnabled(): boolean { - const baseEnabled = - isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) && - Boolean(process.env.BETA_TRACING_ENDPOINT) - - if (!baseEnabled) { - return false - } - - // For external users, enable in SDK/headless mode OR when org is allowlisted. - // Gate reads from disk cache, so first run after allowlisting returns false; - // works from second run onward (same behavior as enhanced_telemetry_beta). - if (process.env.USER_TYPE !== 'ant') { - return ( - getIsNonInteractiveSession() || - getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false) - ) - } - - return true + return false } -/** - * Truncate content to fit within Honeycomb limits. - */ export function truncateContent( content: string, maxSize: number = MAX_CONTENT_SIZE, @@ -116,376 +44,43 @@ export function truncateContent( } } -/** - * Generate a short hash (first 12 hex chars of SHA-256). - */ -function shortHash(content: string): string { - return createHash('sha256').update(content).digest('hex').slice(0, 12) -} - -/** - * Generate a hash for a system prompt. - */ -function hashSystemPrompt(systemPrompt: string): string { - return `sp_${shortHash(systemPrompt)}` -} - -/** - * Generate a hash for a message based on its content. - */ -function hashMessage(message: APIMessage): string { - const content = jsonStringify(message.message.content) - return `msg_${shortHash(content)}` -} - -// Regex to detect content wrapped in tags -const SYSTEM_REMINDER_REGEX = - /^\n?([\s\S]*?)\n?<\/system-reminder>$/ - -/** - * Check if text is entirely a system reminder (wrapped in tags). - * Returns the inner content if it is, null otherwise. - */ -function extractSystemReminderContent(text: string): string | null { - const match = text.trim().match(SYSTEM_REMINDER_REGEX) - return match && match[1] ? match[1].trim() : null -} - -/** - * Result of formatting messages - separates regular content from system reminders. - */ -interface FormattedMessages { - contextParts: string[] - systemReminders: string[] -} - -/** - * Format user messages for new_context display, separating system reminders. - * Only handles user messages (assistant messages are filtered out before this is called). - */ -function formatMessagesForContext(messages: UserMessage[]): FormattedMessages { - const contextParts: string[] = [] - const systemReminders: string[] = [] - - for (const message of messages) { - const content = message.message.content - if (typeof content === 'string') { - const reminderContent = extractSystemReminderContent(content) - if (reminderContent) { - systemReminders.push(reminderContent) - } else { - contextParts.push(`[USER]\n${content}`) - } - } else if (Array.isArray(content)) { - for (const block of content) { - if (block.type === 'text') { - const reminderContent = extractSystemReminderContent(block.text) - if (reminderContent) { - systemReminders.push(reminderContent) - } else { - contextParts.push(`[USER]\n${block.text}`) - } - } else if (block.type === 'tool_result') { - const resultContent = - typeof block.content === 'string' - ? block.content - : jsonStringify(block.content) - // Tool results can also contain system reminders (e.g., malware warning) - const reminderContent = extractSystemReminderContent(resultContent) - if (reminderContent) { - systemReminders.push(reminderContent) - } else { - contextParts.push( - `[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`, - ) - } - } - } - } - } - - return { contextParts, systemReminders } -} - -export interface LLMRequestNewContext { - /** System prompt (typically only on first request or if changed) */ - systemPrompt?: string - /** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */ - querySource?: string - /** Tool schemas sent with the request */ - tools?: string -} - -/** - * Add beta attributes to an interaction span. - * Adds new_context with the user prompt. - */ export function addBetaInteractionAttributes( - span: Span, - userPrompt: string, + _span: SpanAttributeWriter, + _userPrompt: string, ): void { - if (!isBetaTracingEnabled()) { - return - } - - const { content: truncatedPrompt, truncated } = truncateContent( - `[USER PROMPT]\n${userPrompt}`, - ) - span.setAttributes({ - new_context: truncatedPrompt, - ...(truncated && { - new_context_truncated: true, - new_context_original_length: userPrompt.length, - }), - }) + return } -/** - * Add beta attributes to an LLM request span. - * Handles system prompt logging and new_context computation. - */ export function addBetaLLMRequestAttributes( - span: Span, - newContext?: LLMRequestNewContext, - messagesForAPI?: APIMessage[], + _span: SpanAttributeWriter, + _newContext?: LLMRequestNewContext, + _messagesForAPI?: unknown[], ): void { - if (!isBetaTracingEnabled()) { - return - } - - // Add system prompt info to the span - if (newContext?.systemPrompt) { - const promptHash = hashSystemPrompt(newContext.systemPrompt) - const preview = newContext.systemPrompt.slice(0, 500) - - // Always add hash, preview, and length to the span - span.setAttribute('system_prompt_hash', promptHash) - span.setAttribute('system_prompt_preview', preview) - span.setAttribute('system_prompt_length', newContext.systemPrompt.length) - - // Log the full system prompt only once per unique hash this session - if (!seenHashes.has(promptHash)) { - seenHashes.add(promptHash) - - // Truncate for the log if needed - const { content: truncatedPrompt, truncated } = truncateContent( - newContext.systemPrompt, - ) - - void logOTelEvent('system_prompt', { - system_prompt_hash: promptHash, - system_prompt: truncatedPrompt, - system_prompt_length: String(newContext.systemPrompt.length), - ...(truncated && { system_prompt_truncated: 'true' }), - }) - } - } - - // Add tools info to the span - if (newContext?.tools) { - try { - const toolsArray = jsonParse(newContext.tools) as Record< - string, - unknown - >[] - - // Build array of {name, hash} for each tool - const toolsWithHashes = toolsArray.map(tool => { - const toolJson = jsonStringify(tool) - const toolHash = shortHash(toolJson) - return { - name: typeof tool.name === 'string' ? tool.name : 'unknown', - hash: toolHash, - json: toolJson, - } - }) - - // Set span attribute with array of name/hash pairs - span.setAttribute( - 'tools', - jsonStringify( - toolsWithHashes.map(({ name, hash }) => ({ name, hash })), - ), - ) - span.setAttribute('tools_count', toolsWithHashes.length) - - // Log each tool's full description once per unique hash - for (const { name, hash, json } of toolsWithHashes) { - if (!seenHashes.has(`tool_${hash}`)) { - seenHashes.add(`tool_${hash}`) - - const { content: truncatedTool, truncated } = truncateContent(json) - - void logOTelEvent('tool', { - tool_name: sanitizeToolNameForAnalytics(name), - tool_hash: hash, - tool: truncatedTool, - ...(truncated && { tool_truncated: 'true' }), - }) - } - } - } catch { - // If parsing fails, log the raw tools string - span.setAttribute('tools_parse_error', true) - } - } - - // Add new_context using hash-based tracking (visible to all users) - if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) { - const querySource = newContext.querySource - const lastHash = lastReportedMessageHash.get(querySource) - - // Find where the last reported message is in the array - let startIndex = 0 - if (lastHash) { - for (let i = 0; i < messagesForAPI.length; i++) { - const msg = messagesForAPI[i] - if (msg && hashMessage(msg) === lastHash) { - startIndex = i + 1 // Start after the last reported message - break - } - } - // If lastHash not found, startIndex stays 0 (send everything) - } - - // Get new messages (filter out assistant messages - we only want user input/tool results) - const newMessages = messagesForAPI - .slice(startIndex) - .filter((m): m is UserMessage => m.type === 'user') - - if (newMessages.length > 0) { - // Format new messages, separating system reminders from regular content - const { contextParts, systemReminders } = - formatMessagesForContext(newMessages) - - // Set new_context (regular user content and tool results) - if (contextParts.length > 0) { - const fullContext = contextParts.join('\n\n---\n\n') - const { content: truncatedContext, truncated } = - truncateContent(fullContext) - - span.setAttributes({ - new_context: truncatedContext, - new_context_message_count: newMessages.length, - ...(truncated && { - new_context_truncated: true, - new_context_original_length: fullContext.length, - }), - }) - } - - // Set system_reminders as a separate attribute - if (systemReminders.length > 0) { - const fullReminders = systemReminders.join('\n\n---\n\n') - const { content: truncatedReminders, truncated: remindersTruncated } = - truncateContent(fullReminders) - - span.setAttributes({ - system_reminders: truncatedReminders, - system_reminders_count: systemReminders.length, - ...(remindersTruncated && { - system_reminders_truncated: true, - system_reminders_original_length: fullReminders.length, - }), - }) - } - - // Update last reported hash to the last message in the array - const lastMessage = messagesForAPI[messagesForAPI.length - 1] - if (lastMessage) { - lastReportedMessageHash.set(querySource, hashMessage(lastMessage)) - } - } - } + return } -/** - * Add beta attributes to endLLMRequestSpan. - * Handles model_output and thinking_output truncation. - */ export function addBetaLLMResponseAttributes( - endAttributes: Record, - metadata?: { + _attributes: Record, + _metadata?: { modelOutput?: string thinkingOutput?: string }, ): void { - if (!isBetaTracingEnabled() || !metadata) { - return - } - - // Add model_output (text content) - visible to all users - if (metadata.modelOutput !== undefined) { - const { content: modelOutput, truncated: outputTruncated } = - truncateContent(metadata.modelOutput) - endAttributes['response.model_output'] = modelOutput - if (outputTruncated) { - endAttributes['response.model_output_truncated'] = true - endAttributes['response.model_output_original_length'] = - metadata.modelOutput.length - } - } - - // Add thinking_output - ant-only - if ( - process.env.USER_TYPE === 'ant' && - metadata.thinkingOutput !== undefined - ) { - const { content: thinkingOutput, truncated: thinkingTruncated } = - truncateContent(metadata.thinkingOutput) - endAttributes['response.thinking_output'] = thinkingOutput - if (thinkingTruncated) { - endAttributes['response.thinking_output_truncated'] = true - endAttributes['response.thinking_output_original_length'] = - metadata.thinkingOutput.length - } - } + return } -/** - * Add beta attributes to startToolSpan. - * Adds tool_input with the serialized tool input. - */ export function addBetaToolInputAttributes( - span: Span, - toolName: string, - toolInput: string, + _span: SpanAttributeWriter, + _toolName: string, + _toolInput: string, ): void { - if (!isBetaTracingEnabled()) { - return - } - - const { content: truncatedInput, truncated } = truncateContent( - `[TOOL INPUT: ${toolName}]\n${toolInput}`, - ) - span.setAttributes({ - tool_input: truncatedInput, - ...(truncated && { - tool_input_truncated: true, - tool_input_original_length: toolInput.length, - }), - }) + return } -/** - * Add beta attributes to endToolSpan. - * Adds new_context with the tool result. - */ export function addBetaToolResultAttributes( - endAttributes: Record, - toolName: string | number | boolean, - toolResult: string, + _attributes: Record, + _toolName: string | number | boolean, + _toolResult: string, ): void { - if (!isBetaTracingEnabled()) { - return - } - - const { content: truncatedResult, truncated } = truncateContent( - `[TOOL RESULT: ${toolName}]\n${toolResult}`, - ) - endAttributes['new_context'] = truncatedResult - if (truncated) { - endAttributes['new_context_truncated'] = true - endAttributes['new_context_original_length'] = toolResult.length - } + return } diff --git a/src/utils/telemetry/bigqueryExporter.ts b/src/utils/telemetry/bigqueryExporter.ts deleted file mode 100644 index 2f935c4..0000000 --- a/src/utils/telemetry/bigqueryExporter.ts +++ /dev/null @@ -1,252 +0,0 @@ -import type { Attributes, HrTime } from '@opentelemetry/api' -import { type ExportResult, ExportResultCode } from '@opentelemetry/core' -import { - AggregationTemporality, - type MetricData, - type DataPoint as OTelDataPoint, - type PushMetricExporter, - type ResourceMetrics, -} from '@opentelemetry/sdk-metrics' -import axios from 'axios' -import { checkMetricsEnabled } from 'src/services/api/metricsOptOut.js' -import { getIsNonInteractiveSession } from '../../bootstrap/state.js' -import { getSubscriptionType, isClaudeAISubscriber } from '../auth.js' -import { checkHasTrustDialogAccepted } from '../config.js' -import { logForDebugging } from '../debug.js' -import { errorMessage, toError } from '../errors.js' -import { getAuthHeaders } from '../http.js' -import { logError } from '../log.js' -import { jsonStringify } from '../slowOperations.js' -import { getClaudeCodeUserAgent } from '../userAgent.js' - -type DataPoint = { - attributes: Record - value: number - timestamp: string -} - -type Metric = { - name: string - description?: string - unit?: string - data_points: DataPoint[] -} - -type InternalMetricsPayload = { - resource_attributes: Record - metrics: Metric[] -} - -export class BigQueryMetricsExporter implements PushMetricExporter { - private readonly endpoint: string - private readonly timeout: number - private pendingExports: Promise[] = [] - private isShutdown = false - - constructor(options: { timeout?: number } = {}) { - const defaultEndpoint = 'https://api.anthropic.com/api/claude_code/metrics' - - if ( - process.env.USER_TYPE === 'ant' && - process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT - ) { - this.endpoint = - process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT + - '/api/claude_code/metrics' - } else { - this.endpoint = defaultEndpoint - } - - this.timeout = options.timeout || 5000 - } - - async export( - metrics: ResourceMetrics, - resultCallback: (result: ExportResult) => void, - ): Promise { - if (this.isShutdown) { - resultCallback({ - code: ExportResultCode.FAILED, - error: new Error('Exporter has been shutdown'), - }) - return - } - - const exportPromise = this.doExport(metrics, resultCallback) - this.pendingExports.push(exportPromise) - - // Clean up completed exports - void exportPromise.finally(() => { - const index = this.pendingExports.indexOf(exportPromise) - if (index > -1) { - void this.pendingExports.splice(index, 1) - } - }) - } - - private async doExport( - metrics: ResourceMetrics, - resultCallback: (result: ExportResult) => void, - ): Promise { - try { - // Skip if trust not established in interactive mode - // This prevents triggering apiKeyHelper before trust dialog - const hasTrust = - checkHasTrustDialogAccepted() || getIsNonInteractiveSession() - if (!hasTrust) { - logForDebugging( - 'BigQuery metrics export: trust not established, skipping', - ) - resultCallback({ code: ExportResultCode.SUCCESS }) - return - } - - // Check organization-level metrics opt-out - const metricsStatus = await checkMetricsEnabled() - if (!metricsStatus.enabled) { - logForDebugging('Metrics export disabled by organization setting') - resultCallback({ code: ExportResultCode.SUCCESS }) - return - } - - const payload = this.transformMetricsForInternal(metrics) - - const authResult = getAuthHeaders() - if (authResult.error) { - logForDebugging(`Metrics export failed: ${authResult.error}`) - resultCallback({ - code: ExportResultCode.FAILED, - error: new Error(authResult.error), - }) - return - } - - const headers: Record = { - 'Content-Type': 'application/json', - 'User-Agent': getClaudeCodeUserAgent(), - ...authResult.headers, - } - - const response = await axios.post(this.endpoint, payload, { - timeout: this.timeout, - headers, - }) - - logForDebugging('BigQuery metrics exported successfully') - logForDebugging( - `BigQuery API Response: ${jsonStringify(response.data, null, 2)}`, - ) - resultCallback({ code: ExportResultCode.SUCCESS }) - } catch (error) { - logForDebugging(`BigQuery metrics export failed: ${errorMessage(error)}`) - logError(error) - resultCallback({ - code: ExportResultCode.FAILED, - error: toError(error), - }) - } - } - - private transformMetricsForInternal( - metrics: ResourceMetrics, - ): InternalMetricsPayload { - const attrs = metrics.resource.attributes - - const resourceAttributes: Record = { - 'service.name': (attrs['service.name'] as string) || 'claude-code', - 'service.version': (attrs['service.version'] as string) || 'unknown', - 'os.type': (attrs['os.type'] as string) || 'unknown', - 'os.version': (attrs['os.version'] as string) || 'unknown', - 'host.arch': (attrs['host.arch'] as string) || 'unknown', - 'aggregation.temporality': - this.selectAggregationTemporality() === AggregationTemporality.DELTA - ? 'delta' - : 'cumulative', - } - - // Only add wsl.version if it exists (omit instead of default) - if (attrs['wsl.version']) { - resourceAttributes['wsl.version'] = attrs['wsl.version'] as string - } - - // Add customer type and subscription type - if (isClaudeAISubscriber()) { - resourceAttributes['user.customer_type'] = 'claude_ai' - const subscriptionType = getSubscriptionType() - if (subscriptionType) { - resourceAttributes['user.subscription_type'] = subscriptionType - } - } else { - resourceAttributes['user.customer_type'] = 'api' - } - - const transformed = { - resource_attributes: resourceAttributes, - metrics: metrics.scopeMetrics.flatMap(scopeMetric => - scopeMetric.metrics.map(metric => ({ - name: metric.descriptor.name, - description: metric.descriptor.description, - unit: metric.descriptor.unit, - data_points: this.extractDataPoints(metric), - })), - ), - } - - return transformed - } - - private extractDataPoints(metric: MetricData): DataPoint[] { - const dataPoints = metric.dataPoints || [] - - return dataPoints - .filter( - (point): point is OTelDataPoint => - typeof point.value === 'number', - ) - .map(point => ({ - attributes: this.convertAttributes(point.attributes), - value: point.value, - timestamp: this.hrTimeToISOString( - point.endTime || point.startTime || [Date.now() / 1000, 0], - ), - })) - } - - async shutdown(): Promise { - this.isShutdown = true - await this.forceFlush() - logForDebugging('BigQuery metrics exporter shutdown complete') - } - - async forceFlush(): Promise { - await Promise.all(this.pendingExports) - logForDebugging('BigQuery metrics exporter flush complete') - } - - private convertAttributes( - attributes: Attributes | undefined, - ): Record { - const result: Record = {} - if (attributes) { - for (const [key, value] of Object.entries(attributes)) { - if (value !== undefined && value !== null) { - result[key] = String(value) - } - } - } - return result - } - - private hrTimeToISOString(hrTime: HrTime): string { - const [seconds, nanoseconds] = hrTime - const date = new Date(seconds * 1000 + nanoseconds / 1000000) - return date.toISOString() - } - - selectAggregationTemporality(): AggregationTemporality { - // DO NOT CHANGE THIS TO CUMULATIVE - // It would mess up the aggregation of metrics - // for CC Productivity metrics dashboard - return AggregationTemporality.DELTA - } -} diff --git a/src/utils/telemetry/instrumentation.ts b/src/utils/telemetry/instrumentation.ts index 7f1f8ef..93ca928 100644 --- a/src/utils/telemetry/instrumentation.ts +++ b/src/utils/telemetry/instrumentation.ts @@ -1,14 +1,5 @@ export function bootstrapTelemetry(): void {} -export function parseExporterTypes(value: string | undefined): string[] { - return (value || '') - .trim() - .split(',') - .filter(Boolean) - .map(t => t.trim()) - .filter(t => t !== 'none') -} - export function isTelemetryEnabled(): boolean { return false } diff --git a/src/utils/telemetry/logger.ts b/src/utils/telemetry/logger.ts deleted file mode 100644 index 0f080f5..0000000 --- a/src/utils/telemetry/logger.ts +++ /dev/null @@ -1,26 +0,0 @@ -import type { DiagLogger } from '@opentelemetry/api' -import { logForDebugging } from '../debug.js' -import { logError } from '../log.js' -export class ClaudeCodeDiagLogger implements DiagLogger { - error(message: string, ..._: unknown[]) { - logError(new Error(message)) - logForDebugging(`[3P telemetry] OTEL diag error: ${message}`, { - level: 'error', - }) - } - warn(message: string, ..._: unknown[]) { - logError(new Error(message)) - logForDebugging(`[3P telemetry] OTEL diag warn: ${message}`, { - level: 'warn', - }) - } - info(_message: string, ..._args: unknown[]) { - return - } - debug(_message: string, ..._args: unknown[]) { - return - } - verbose(_message: string, ..._args: unknown[]) { - return - } -} diff --git a/src/utils/telemetry/sessionTracing.ts b/src/utils/telemetry/sessionTracing.ts index 0ee4f38..5193607 100644 --- a/src/utils/telemetry/sessionTracing.ts +++ b/src/utils/telemetry/sessionTracing.ts @@ -1,927 +1,172 @@ /** - * Session Tracing for Claude Code using OpenTelemetry (BETA) + * OpenTelemetry session tracing is disabled in this build. * - * This module provides a high-level API for creating and managing spans - * to trace Claude Code workflows. Each user interaction creates a root - * interaction span, which contains operation spans (LLM requests, tool calls, etc.). - * - * Requirements: - * - Enhanced telemetry is enabled via feature('ENHANCED_TELEMETRY_BETA') - * - Configure OTEL_TRACES_EXPORTER (console, otlp, etc.) + * This module preserves the tracing API surface for callers, but all exported + * operations are local no-ops and never collect or forward tracing data. */ -import { feature } from 'bun:bundle' -import { context as otelContext, type Span, trace } from '@opentelemetry/api' -import { AsyncLocalStorage } from 'async_hooks' -import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js' -import type { AssistantMessage, UserMessage } from '../../types/message.js' -import { isEnvDefinedFalsy, isEnvTruthy } from '../envUtils.js' -import { getTelemetryAttributes } from '../telemetryAttributes.js' -import { - addBetaInteractionAttributes, - addBetaLLMRequestAttributes, - addBetaLLMResponseAttributes, - addBetaToolInputAttributes, - addBetaToolResultAttributes, - isBetaTracingEnabled, - type LLMRequestNewContext, - truncateContent, -} from './betaSessionTracing.js' -import { - endInteractionPerfettoSpan, - endLLMRequestPerfettoSpan, - endToolPerfettoSpan, - endUserInputPerfettoSpan, - isPerfettoTracingEnabled, - startInteractionPerfettoSpan, - startLLMRequestPerfettoSpan, - startToolPerfettoSpan, - startUserInputPerfettoSpan, -} from './perfettoTracing.js' +export { isBetaTracingEnabled, type LLMRequestNewContext } from './betaSessionTracing.js' -// Re-export for callers -export type { Span } -export { isBetaTracingEnabled, type LLMRequestNewContext } - -// Message type for API calls (UserMessage or AssistantMessage) -type APIMessage = UserMessage | AssistantMessage - -type SpanType = - | 'interaction' - | 'llm_request' - | 'tool' - | 'tool.blocked_on_user' - | 'tool.execution' - | 'hook' - -interface SpanContext { - span: Span - startTime: number - attributes: Record - ended?: boolean - perfettoSpanId?: string +export interface Span { + end(): void + setAttribute( + _key: string, + _value: string | number | boolean, + ): void + setAttributes( + _attributes: Record, + ): void + addEvent( + _eventName: string, + _attributes?: Record, + ): void + recordException(_error: Error): void } -// ALS stores SpanContext directly so it holds a strong reference while a span -// is active. With that, activeSpans can use WeakRef — when ALS is cleared -// (enterWith(undefined)) and no other code holds the SpanContext, GC can collect -// it and the WeakRef goes stale. -const interactionContext = new AsyncLocalStorage() -const toolContext = new AsyncLocalStorage() -const activeSpans = new Map>() -// Spans not stored in ALS (LLM request, blocked-on-user, tool execution, hook) -// need a strong reference to prevent GC from collecting the SpanContext before -// the corresponding end* function retrieves it. -const strongSpans = new Map() -let interactionSequence = 0 -let _cleanupIntervalStarted = false +class NoopSpan implements Span { + end(): void {} -const SPAN_TTL_MS = 30 * 60 * 1000 // 30 minutes + setAttribute( + _key: string, + _value: string | number | boolean, + ): void {} -function getSpanId(span: Span): string { - return span.spanContext().spanId || '' + setAttributes( + _attributes: Record, + ): void {} + + addEvent( + _eventName: string, + _attributes?: Record, + ): void {} + + recordException(_error: Error): void {} } -/** - * Lazily start a background interval that evicts orphaned spans from activeSpans. - * - * Normal teardown calls endInteractionSpan / endToolSpan, which delete spans - * immediately. This interval is a safety net for spans that were never ended - * (e.g. aborted streams, uncaught exceptions mid-query) — without it they - * accumulate in activeSpans indefinitely, holding references to Span objects - * and the OpenTelemetry context chain. - * - * Initialized on the first startInteractionSpan call (not at module load) to - * avoid triggering the no-top-level-side-effects lint rule and to keep the - * interval from running in processes that never start a span. - * unref() prevents the timer from keeping the process alive after all other - * work is done. - */ -function ensureCleanupInterval(): void { - if (_cleanupIntervalStarted) return - _cleanupIntervalStarted = true - const interval = setInterval(() => { - const cutoff = Date.now() - SPAN_TTL_MS - for (const [spanId, weakRef] of activeSpans) { - const ctx = weakRef.deref() - if (ctx === undefined) { - activeSpans.delete(spanId) - strongSpans.delete(spanId) - } else if (ctx.startTime < cutoff) { - if (!ctx.ended) ctx.span.end() // flush any recorded attributes to the exporter - activeSpans.delete(spanId) - strongSpans.delete(spanId) - } - } - }, 60_000) - if (typeof interval.unref === 'function') { - interval.unref() // Node.js / Bun: don't block process exit - } +const NOOP_SPAN: Span = new NoopSpan() + +type LLMRequestMetadata = { + inputTokens?: number + outputTokens?: number + cacheReadTokens?: number + cacheCreationTokens?: number + success?: boolean + statusCode?: number + error?: string + attempt?: number + modelResponse?: string + modelOutput?: string + thinkingOutput?: string + hasToolCall?: boolean + ttftMs?: number + requestSetupMs?: number + attemptStartTimes?: number[] +} + +type HookSpanMetadata = { + numSuccess?: number + numBlocking?: number + numNonBlockingError?: number + numCancelled?: number } -/** - * Check if enhanced telemetry is enabled. - * Priority: env var override > ant build > GrowthBook gate - */ export function isEnhancedTelemetryEnabled(): boolean { - if (feature('ENHANCED_TELEMETRY_BETA')) { - const env = - process.env.CLAUDE_CODE_ENHANCED_TELEMETRY_BETA ?? - process.env.ENABLE_ENHANCED_TELEMETRY_BETA - if (isEnvTruthy(env)) { - return true - } - if (isEnvDefinedFalsy(env)) { - return false - } - return ( - process.env.USER_TYPE === 'ant' || - getFeatureValue_CACHED_MAY_BE_STALE('enhanced_telemetry_beta', false) - ) - } return false } -/** - * Check if any tracing is enabled (either standard enhanced telemetry OR beta tracing) - */ -function isAnyTracingEnabled(): boolean { - return isEnhancedTelemetryEnabled() || isBetaTracingEnabled() -} - -function getTracer() { - return trace.getTracer('com.anthropic.claude_code.tracing', '1.0.0') -} - -function createSpanAttributes( - spanType: SpanType, - customAttributes: Record = {}, -): Record { - const baseAttributes = getTelemetryAttributes() - - const attributes: Record = { - ...baseAttributes, - 'span.type': spanType, - ...customAttributes, - } - - return attributes -} - -/** - * Start an interaction span. This wraps a user request -> Claude response cycle. - * This is now a root span that includes all session-level attributes. - * Sets the interaction context for all subsequent operations. - */ -export function startInteractionSpan(userPrompt: string): Span { - ensureCleanupInterval() - - // Start Perfetto span regardless of OTel tracing state - const perfettoSpanId = isPerfettoTracingEnabled() - ? startInteractionPerfettoSpan(userPrompt) - : undefined - - if (!isAnyTracingEnabled()) { - // Still track Perfetto span even if OTel is disabled - if (perfettoSpanId) { - const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy') - const spanId = getSpanId(dummySpan) - const spanContextObj: SpanContext = { - span: dummySpan, - startTime: Date.now(), - attributes: {}, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - interactionContext.enterWith(spanContextObj) - return dummySpan - } - return trace.getActiveSpan() || getTracer().startSpan('dummy') - } - - const tracer = getTracer() - const isUserPromptLoggingEnabled = isEnvTruthy( - process.env.OTEL_LOG_USER_PROMPTS, - ) - const promptToLog = isUserPromptLoggingEnabled ? userPrompt : '' - - interactionSequence++ - - const attributes = createSpanAttributes('interaction', { - user_prompt: promptToLog, - user_prompt_length: userPrompt.length, - 'interaction.sequence': interactionSequence, - }) - - const span = tracer.startSpan('claude_code.interaction', { - attributes, - }) - - // Add experimental attributes (new_context) - addBetaInteractionAttributes(span, userPrompt) - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - - interactionContext.enterWith(spanContextObj) - - return span +export function startInteractionSpan(_userPrompt: string): Span { + return NOOP_SPAN } export function endInteractionSpan(): void { - const spanContext = interactionContext.getStore() - if (!spanContext) { - return - } - - if (spanContext.ended) { - return - } - - // End Perfetto span - if (spanContext.perfettoSpanId) { - endInteractionPerfettoSpan(spanContext.perfettoSpanId) - } - - if (!isAnyTracingEnabled()) { - spanContext.ended = true - activeSpans.delete(getSpanId(spanContext.span)) - // Clear the store so async continuations created after this point (timers, - // promise callbacks, I/O) do not inherit a reference to the ended span. - // enterWith(undefined) is intentional: exit(() => {}) is a no-op because it - // only suppresses the store inside the callback and returns immediately. - interactionContext.enterWith(undefined) - return - } - - const duration = Date.now() - spanContext.startTime - spanContext.span.setAttributes({ - 'interaction.duration_ms': duration, - }) - - spanContext.span.end() - spanContext.ended = true - activeSpans.delete(getSpanId(spanContext.span)) - interactionContext.enterWith(undefined) + return } export function startLLMRequestSpan( - model: string, - newContext?: LLMRequestNewContext, - messagesForAPI?: APIMessage[], - fastMode?: boolean, + _model: string, + _newContext?: import('./betaSessionTracing.js').LLMRequestNewContext, + _messagesForAPI?: unknown[], + _fastMode?: boolean, ): Span { - // Start Perfetto span regardless of OTel tracing state - const perfettoSpanId = isPerfettoTracingEnabled() - ? startLLMRequestPerfettoSpan({ - model, - querySource: newContext?.querySource, - messageId: undefined, // Will be set in endLLMRequestSpan - }) - : undefined - - if (!isAnyTracingEnabled()) { - // Still track Perfetto span even if OTel is disabled - if (perfettoSpanId) { - const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy') - const spanId = getSpanId(dummySpan) - const spanContextObj: SpanContext = { - span: dummySpan, - startTime: Date.now(), - attributes: { model }, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - return dummySpan - } - return trace.getActiveSpan() || getTracer().startSpan('dummy') - } - - const tracer = getTracer() - const parentSpanCtx = interactionContext.getStore() - - const attributes = createSpanAttributes('llm_request', { - model: model, - 'llm_request.context': parentSpanCtx ? 'interaction' : 'standalone', - speed: fastMode ? 'fast' : 'normal', - }) - - const ctx = parentSpanCtx - ? trace.setSpan(otelContext.active(), parentSpanCtx.span) - : otelContext.active() - const span = tracer.startSpan('claude_code.llm_request', { attributes }, ctx) - - // Add query_source (agent name) if provided - if (newContext?.querySource) { - span.setAttribute('query_source', newContext.querySource) - } - - // Add experimental attributes (system prompt, new_context) - addBetaLLMRequestAttributes(span, newContext, messagesForAPI) - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - - return span + return NOOP_SPAN } -/** - * End an LLM request span and attach response metadata. - * - * @param span - Optional. The exact span returned by startLLMRequestSpan(). - * IMPORTANT: When multiple LLM requests run in parallel (e.g., warmup requests, - * topic classifier, file path extractor, main thread), you MUST pass the specific span - * to ensure responses are attached to the correct request. Without it, responses may be - * incorrectly attached to whichever span happens to be "last" in the activeSpans map. - * - * If not provided, falls back to finding the most recent llm_request span (legacy behavior). - */ export function endLLMRequestSpan( - span?: Span, - metadata?: { - inputTokens?: number - outputTokens?: number - cacheReadTokens?: number - cacheCreationTokens?: number - success?: boolean - statusCode?: number - error?: string - attempt?: number - modelResponse?: string - /** Text output from the model (non-thinking content) */ - modelOutput?: string - /** Thinking/reasoning output from the model */ - thinkingOutput?: string - /** Whether the output included tool calls (look at tool spans for details) */ - hasToolCall?: boolean - /** Time to first token in milliseconds */ - ttftMs?: number - /** Time spent in pre-request setup before the successful attempt */ - requestSetupMs?: number - /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */ - attemptStartTimes?: number[] - }, + _span?: Span, + _metadata?: LLMRequestMetadata, ): void { - let llmSpanContext: SpanContext | undefined - - if (span) { - // Use the provided span directly - this is the correct approach for parallel requests - const spanId = getSpanId(span) - llmSpanContext = activeSpans.get(spanId)?.deref() - } else { - // Legacy fallback: find the most recent llm_request span - // WARNING: This can cause mismatched responses when multiple requests are in flight - llmSpanContext = Array.from(activeSpans.values()) - .findLast(r => { - const ctx = r.deref() - return ( - ctx?.attributes['span.type'] === 'llm_request' || - ctx?.attributes['model'] - ) - }) - ?.deref() - } - - if (!llmSpanContext) { - // Span was already ended or never tracked - return - } - - const duration = Date.now() - llmSpanContext.startTime - - // End Perfetto span with full metadata - if (llmSpanContext.perfettoSpanId) { - endLLMRequestPerfettoSpan(llmSpanContext.perfettoSpanId, { - ttftMs: metadata?.ttftMs, - ttltMs: duration, // Time to last token is the total duration - promptTokens: metadata?.inputTokens, - outputTokens: metadata?.outputTokens, - cacheReadTokens: metadata?.cacheReadTokens, - cacheCreationTokens: metadata?.cacheCreationTokens, - success: metadata?.success, - error: metadata?.error, - requestSetupMs: metadata?.requestSetupMs, - attemptStartTimes: metadata?.attemptStartTimes, - }) - } - - if (!isAnyTracingEnabled()) { - const spanId = getSpanId(llmSpanContext.span) - activeSpans.delete(spanId) - strongSpans.delete(spanId) - return - } - - const endAttributes: Record = { - duration_ms: duration, - } - - if (metadata) { - if (metadata.inputTokens !== undefined) - endAttributes['input_tokens'] = metadata.inputTokens - if (metadata.outputTokens !== undefined) - endAttributes['output_tokens'] = metadata.outputTokens - if (metadata.cacheReadTokens !== undefined) - endAttributes['cache_read_tokens'] = metadata.cacheReadTokens - if (metadata.cacheCreationTokens !== undefined) - endAttributes['cache_creation_tokens'] = metadata.cacheCreationTokens - if (metadata.success !== undefined) - endAttributes['success'] = metadata.success - if (metadata.statusCode !== undefined) - endAttributes['status_code'] = metadata.statusCode - if (metadata.error !== undefined) endAttributes['error'] = metadata.error - if (metadata.attempt !== undefined) - endAttributes['attempt'] = metadata.attempt - if (metadata.hasToolCall !== undefined) - endAttributes['response.has_tool_call'] = metadata.hasToolCall - if (metadata.ttftMs !== undefined) - endAttributes['ttft_ms'] = metadata.ttftMs - - // Add experimental response attributes (model_output, thinking_output) - addBetaLLMResponseAttributes(endAttributes, metadata) - } - - llmSpanContext.span.setAttributes(endAttributes) - llmSpanContext.span.end() - - const spanId = getSpanId(llmSpanContext.span) - activeSpans.delete(spanId) - strongSpans.delete(spanId) + return } export function startToolSpan( - toolName: string, - toolAttributes?: Record, - toolInput?: string, + _toolName: string, + _toolAttributes?: Record, + _toolInput?: string, ): Span { - // Start Perfetto span regardless of OTel tracing state - const perfettoSpanId = isPerfettoTracingEnabled() - ? startToolPerfettoSpan(toolName, toolAttributes) - : undefined - - if (!isAnyTracingEnabled()) { - // Still track Perfetto span even if OTel is disabled - if (perfettoSpanId) { - const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy') - const spanId = getSpanId(dummySpan) - const spanContextObj: SpanContext = { - span: dummySpan, - startTime: Date.now(), - attributes: { 'span.type': 'tool', tool_name: toolName }, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - toolContext.enterWith(spanContextObj) - return dummySpan - } - return trace.getActiveSpan() || getTracer().startSpan('dummy') - } - - const tracer = getTracer() - const parentSpanCtx = interactionContext.getStore() - - const attributes = createSpanAttributes('tool', { - tool_name: toolName, - ...toolAttributes, - }) - - const ctx = parentSpanCtx - ? trace.setSpan(otelContext.active(), parentSpanCtx.span) - : otelContext.active() - const span = tracer.startSpan('claude_code.tool', { attributes }, ctx) - - // Add experimental tool input attributes - if (toolInput) { - addBetaToolInputAttributes(span, toolName, toolInput) - } - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - - toolContext.enterWith(spanContextObj) - - return span + return NOOP_SPAN } export function startToolBlockedOnUserSpan(): Span { - // Start Perfetto span regardless of OTel tracing state - const perfettoSpanId = isPerfettoTracingEnabled() - ? startUserInputPerfettoSpan('tool_permission') - : undefined - - if (!isAnyTracingEnabled()) { - // Still track Perfetto span even if OTel is disabled - if (perfettoSpanId) { - const dummySpan = trace.getActiveSpan() || getTracer().startSpan('dummy') - const spanId = getSpanId(dummySpan) - const spanContextObj: SpanContext = { - span: dummySpan, - startTime: Date.now(), - attributes: { 'span.type': 'tool.blocked_on_user' }, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - return dummySpan - } - return trace.getActiveSpan() || getTracer().startSpan('dummy') - } - - const tracer = getTracer() - const parentSpanCtx = toolContext.getStore() - - const attributes = createSpanAttributes('tool.blocked_on_user') - - const ctx = parentSpanCtx - ? trace.setSpan(otelContext.active(), parentSpanCtx.span) - : otelContext.active() - const span = tracer.startSpan( - 'claude_code.tool.blocked_on_user', - { attributes }, - ctx, - ) - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes, - perfettoSpanId, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - - return span + return NOOP_SPAN } export function endToolBlockedOnUserSpan( - decision?: string, - source?: string, + _decision?: string, + _source?: string, ): void { - const blockedSpanContext = Array.from(activeSpans.values()) - .findLast( - r => r.deref()?.attributes['span.type'] === 'tool.blocked_on_user', - ) - ?.deref() - - if (!blockedSpanContext) { - return - } - - // End Perfetto span - if (blockedSpanContext.perfettoSpanId) { - endUserInputPerfettoSpan(blockedSpanContext.perfettoSpanId, { - decision, - source, - }) - } - - if (!isAnyTracingEnabled()) { - const spanId = getSpanId(blockedSpanContext.span) - activeSpans.delete(spanId) - strongSpans.delete(spanId) - return - } - - const duration = Date.now() - blockedSpanContext.startTime - const attributes: Record = { - duration_ms: duration, - } - - if (decision) { - attributes['decision'] = decision - } - if (source) { - attributes['source'] = source - } - - blockedSpanContext.span.setAttributes(attributes) - blockedSpanContext.span.end() - - const spanId = getSpanId(blockedSpanContext.span) - activeSpans.delete(spanId) - strongSpans.delete(spanId) + return } export function startToolExecutionSpan(): Span { - if (!isAnyTracingEnabled()) { - return trace.getActiveSpan() || getTracer().startSpan('dummy') - } - - const tracer = getTracer() - const parentSpanCtx = toolContext.getStore() - - const attributes = createSpanAttributes('tool.execution') - - const ctx = parentSpanCtx - ? trace.setSpan(otelContext.active(), parentSpanCtx.span) - : otelContext.active() - const span = tracer.startSpan( - 'claude_code.tool.execution', - { attributes }, - ctx, - ) - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - - return span + return NOOP_SPAN } export function endToolExecutionSpan(metadata?: { success?: boolean error?: string }): void { - if (!isAnyTracingEnabled()) { - return - } - - const executionSpanContext = Array.from(activeSpans.values()) - .findLast(r => r.deref()?.attributes['span.type'] === 'tool.execution') - ?.deref() - - if (!executionSpanContext) { - return - } - - const duration = Date.now() - executionSpanContext.startTime - const attributes: Record = { - duration_ms: duration, - } - - if (metadata) { - if (metadata.success !== undefined) attributes['success'] = metadata.success - if (metadata.error !== undefined) attributes['error'] = metadata.error - } - - executionSpanContext.span.setAttributes(attributes) - executionSpanContext.span.end() - - const spanId = getSpanId(executionSpanContext.span) - activeSpans.delete(spanId) - strongSpans.delete(spanId) + void metadata + return } -export function endToolSpan(toolResult?: string, resultTokens?: number): void { - const toolSpanContext = toolContext.getStore() - - if (!toolSpanContext) { - return - } - - // End Perfetto span - if (toolSpanContext.perfettoSpanId) { - endToolPerfettoSpan(toolSpanContext.perfettoSpanId, { - success: true, - resultTokens, - }) - } - - if (!isAnyTracingEnabled()) { - const spanId = getSpanId(toolSpanContext.span) - activeSpans.delete(spanId) - // Same reasoning as interactionContext above: clear so subsequent async - // work doesn't hold a stale reference to the ended tool span. - toolContext.enterWith(undefined) - return - } - - const duration = Date.now() - toolSpanContext.startTime - const endAttributes: Record = { - duration_ms: duration, - } - - // Add experimental tool result attributes (new_context) - if (toolResult) { - const toolName = toolSpanContext.attributes['tool_name'] || 'unknown' - addBetaToolResultAttributes(endAttributes, toolName, toolResult) - } - - if (resultTokens !== undefined) { - endAttributes['result_tokens'] = resultTokens - } - - toolSpanContext.span.setAttributes(endAttributes) - toolSpanContext.span.end() - - const spanId = getSpanId(toolSpanContext.span) - activeSpans.delete(spanId) - toolContext.enterWith(undefined) -} - -function isToolContentLoggingEnabled(): boolean { - return isEnvTruthy(process.env.OTEL_LOG_TOOL_CONTENT) -} - -/** - * Add a span event with tool content/output data. - * Only logs if OTEL_LOG_TOOL_CONTENT=1 is set. - * Truncates content if it exceeds MAX_CONTENT_SIZE. - */ -export function addToolContentEvent( - eventName: string, - attributes: Record, +export function endToolSpan( + _toolResult?: string, + _resultTokens?: number, ): void { - if (!isAnyTracingEnabled() || !isToolContentLoggingEnabled()) { - return - } + return +} - const currentSpanCtx = toolContext.getStore() - if (!currentSpanCtx) { - return - } - - // Truncate string attributes that might be large - const processedAttributes: Record = {} - for (const [key, value] of Object.entries(attributes)) { - if (typeof value === 'string') { - const { content, truncated } = truncateContent(value) - processedAttributes[key] = content - if (truncated) { - processedAttributes[`${key}_truncated`] = true - processedAttributes[`${key}_original_length`] = value.length - } - } else { - processedAttributes[key] = value - } - } - - currentSpanCtx.span.addEvent(eventName, processedAttributes) +export function addToolContentEvent( + _eventName: string, + _attributes: Record, +): void { + return } export function getCurrentSpan(): Span | null { - if (!isAnyTracingEnabled()) { - return null - } - - return ( - toolContext.getStore()?.span ?? interactionContext.getStore()?.span ?? null - ) + return null } export async function executeInSpan( - spanName: string, + _spanName: string, fn: (span: Span) => Promise, - attributes?: Record, + _attributes?: Record, ): Promise { - if (!isAnyTracingEnabled()) { - return fn(trace.getActiveSpan() || getTracer().startSpan('dummy')) - } - - const tracer = getTracer() - const parentSpanCtx = toolContext.getStore() ?? interactionContext.getStore() - - const finalAttributes = createSpanAttributes('tool', { - ...attributes, - }) - - const ctx = parentSpanCtx - ? trace.setSpan(otelContext.active(), parentSpanCtx.span) - : otelContext.active() - const span = tracer.startSpan(spanName, { attributes: finalAttributes }, ctx) - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes: finalAttributes, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - - try { - const result = await fn(span) - span.end() - activeSpans.delete(spanId) - strongSpans.delete(spanId) - return result - } catch (error) { - if (error instanceof Error) { - span.recordException(error) - } - span.end() - activeSpans.delete(spanId) - strongSpans.delete(spanId) - throw error - } + return fn(NOOP_SPAN) } -/** - * Start a hook execution span. - * Only creates a span when beta tracing is enabled. - * @param hookEvent The hook event type (e.g., 'PreToolUse', 'PostToolUse') - * @param hookName The full hook name (e.g., 'PreToolUse:Write') - * @param numHooks The number of hooks being executed - * @param hookDefinitions JSON string of hook definitions for tracing - * @returns The span (or a dummy span if tracing is disabled) - */ export function startHookSpan( - hookEvent: string, - hookName: string, - numHooks: number, - hookDefinitions: string, + _hookEvent: string, + _hookName: string, + _numHooks: number, + _hookDefinitions: string, ): Span { - if (!isBetaTracingEnabled()) { - return trace.getActiveSpan() || getTracer().startSpan('dummy') - } - - const tracer = getTracer() - const parentSpanCtx = toolContext.getStore() ?? interactionContext.getStore() - - const attributes = createSpanAttributes('hook', { - hook_event: hookEvent, - hook_name: hookName, - num_hooks: numHooks, - hook_definitions: hookDefinitions, - }) - - const ctx = parentSpanCtx - ? trace.setSpan(otelContext.active(), parentSpanCtx.span) - : otelContext.active() - const span = tracer.startSpan('claude_code.hook', { attributes }, ctx) - - const spanId = getSpanId(span) - const spanContextObj: SpanContext = { - span, - startTime: Date.now(), - attributes, - } - activeSpans.set(spanId, new WeakRef(spanContextObj)) - strongSpans.set(spanId, spanContextObj) - - return span + return NOOP_SPAN } -/** - * End a hook execution span with outcome metadata. - * Only does work when beta tracing is enabled. - * @param span The span to end (returned from startHookSpan) - * @param metadata The outcome metadata for the hook execution - */ export function endHookSpan( - span: Span, - metadata?: { - numSuccess?: number - numBlocking?: number - numNonBlockingError?: number - numCancelled?: number - }, + _span: Span, + _metadata?: HookSpanMetadata, ): void { - if (!isBetaTracingEnabled()) { - return - } - - const spanId = getSpanId(span) - const spanContext = activeSpans.get(spanId)?.deref() - - if (!spanContext) { - return - } - - const duration = Date.now() - spanContext.startTime - const endAttributes: Record = { - duration_ms: duration, - } - - if (metadata) { - if (metadata.numSuccess !== undefined) - endAttributes['num_success'] = metadata.numSuccess - if (metadata.numBlocking !== undefined) - endAttributes['num_blocking'] = metadata.numBlocking - if (metadata.numNonBlockingError !== undefined) - endAttributes['num_non_blocking_error'] = metadata.numNonBlockingError - if (metadata.numCancelled !== undefined) - endAttributes['num_cancelled'] = metadata.numCancelled - } - - spanContext.span.setAttributes(endAttributes) - spanContext.span.end() - activeSpans.delete(spanId) - strongSpans.delete(spanId) + return } diff --git a/src/utils/telemetryAttributes.ts b/src/utils/telemetryAttributes.ts deleted file mode 100644 index 2038c10..0000000 --- a/src/utils/telemetryAttributes.ts +++ /dev/null @@ -1,71 +0,0 @@ -import type { Attributes } from '@opentelemetry/api' -import { getSessionId } from 'src/bootstrap/state.js' -import { getOauthAccountInfo } from './auth.js' -import { getOrCreateUserID } from './config.js' -import { envDynamic } from './envDynamic.js' -import { isEnvTruthy } from './envUtils.js' -import { toTaggedId } from './taggedId.js' - -// Default configuration for metrics cardinality -const METRICS_CARDINALITY_DEFAULTS = { - OTEL_METRICS_INCLUDE_SESSION_ID: true, - OTEL_METRICS_INCLUDE_VERSION: false, - OTEL_METRICS_INCLUDE_ACCOUNT_UUID: true, -} - -function shouldIncludeAttribute( - envVar: keyof typeof METRICS_CARDINALITY_DEFAULTS, -): boolean { - const defaultValue = METRICS_CARDINALITY_DEFAULTS[envVar] - const envValue = process.env[envVar] - - if (envValue === undefined) { - return defaultValue - } - - return isEnvTruthy(envValue) -} - -export function getTelemetryAttributes(): Attributes { - const userId = getOrCreateUserID() - const sessionId = getSessionId() - - const attributes: Attributes = { - 'user.id': userId, - } - - if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_SESSION_ID')) { - attributes['session.id'] = sessionId - } - if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_VERSION')) { - attributes['app.version'] = MACRO.VERSION - } - - // Only include OAuth account data when actively using OAuth authentication - const oauthAccount = getOauthAccountInfo() - if (oauthAccount) { - const orgId = oauthAccount.organizationUuid - const email = oauthAccount.emailAddress - const accountUuid = oauthAccount.accountUuid - - if (orgId) attributes['organization.id'] = orgId - if (email) attributes['user.email'] = email - - if ( - accountUuid && - shouldIncludeAttribute('OTEL_METRICS_INCLUDE_ACCOUNT_UUID') - ) { - attributes['user.account_uuid'] = accountUuid - attributes['user.account_id'] = - process.env.CLAUDE_CODE_ACCOUNT_TAGGED_ID || - toTaggedId('user', accountUuid) - } - } - - // Add terminal type if available - if (envDynamic.terminal) { - attributes['terminal.type'] = envDynamic.terminal - } - - return attributes -}