Remove dead telemetry stubs
This commit is contained in:
@@ -1,806 +0,0 @@
|
||||
import type { HrTime } from '@opentelemetry/api'
|
||||
import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
|
||||
import type {
|
||||
LogRecordExporter,
|
||||
ReadableLogRecord,
|
||||
} from '@opentelemetry/sdk-logs'
|
||||
import axios from 'axios'
|
||||
import { randomUUID } from 'crypto'
|
||||
import { appendFile, mkdir, readdir, unlink, writeFile } from 'fs/promises'
|
||||
import * as path from 'path'
|
||||
import type { CoreUserData } from 'src/utils/user.js'
|
||||
import {
|
||||
getIsNonInteractiveSession,
|
||||
getSessionId,
|
||||
} from '../../bootstrap/state.js'
|
||||
import { ClaudeCodeInternalEvent } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
|
||||
import { GrowthbookExperimentEvent } from '../../types/generated/events_mono/growthbook/v1/growthbook_experiment_event.js'
|
||||
import {
|
||||
getClaudeAIOAuthTokens,
|
||||
hasProfileScope,
|
||||
isClaudeAISubscriber,
|
||||
} from '../../utils/auth.js'
|
||||
import { checkHasTrustDialogAccepted } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { errorMessage, isFsInaccessible, toError } from '../../utils/errors.js'
|
||||
import { getAuthHeaders } from '../../utils/http.js'
|
||||
import { readJSONLFile } from '../../utils/json.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { sleep } from '../../utils/sleep.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
import { isOAuthTokenExpired } from '../oauth/client.js'
|
||||
import { stripProtoFields } from './index.js'
|
||||
import { type EventMetadata, to1PEventFormat } from './metadata.js'
|
||||
|
||||
// Unique ID for this process run - used to isolate failed event files between runs
|
||||
const BATCH_UUID = randomUUID()
|
||||
|
||||
// File prefix for failed event storage
|
||||
const FILE_PREFIX = '1p_failed_events.'
|
||||
|
||||
// Storage directory for failed events - evaluated at runtime to respect CLAUDE_CONFIG_DIR in tests
|
||||
function getStorageDir(): string {
|
||||
return path.join(getClaudeConfigHomeDir(), 'telemetry')
|
||||
}
|
||||
|
||||
// API envelope - event_data is the JSON output from proto toJSON()
|
||||
type FirstPartyEventLoggingEvent = {
|
||||
event_type: 'ClaudeCodeInternalEvent' | 'GrowthbookExperimentEvent'
|
||||
event_data: unknown
|
||||
}
|
||||
|
||||
type FirstPartyEventLoggingPayload = {
|
||||
events: FirstPartyEventLoggingEvent[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Exporter for 1st-party event logging to /api/event_logging/batch.
|
||||
*
|
||||
* Export cycles are controlled by OpenTelemetry's BatchLogRecordProcessor, which
|
||||
* triggers export() when either:
|
||||
* - Time interval elapses (default: 5 seconds via scheduledDelayMillis)
|
||||
* - Batch size is reached (default: 200 events via maxExportBatchSize)
|
||||
*
|
||||
* This exporter adds resilience on top:
|
||||
* - Append-only log for failed events (concurrency-safe)
|
||||
* - Quadratic backoff retry for failed events, dropped after maxAttempts
|
||||
* - Immediate retry of queued events when any export succeeds (endpoint is healthy)
|
||||
* - Chunking large event sets into smaller batches
|
||||
* - Auth fallback: retries without auth on 401 errors
|
||||
*/
|
||||
export class FirstPartyEventLoggingExporter implements LogRecordExporter {
|
||||
private readonly endpoint: string
|
||||
private readonly timeout: number
|
||||
private readonly maxBatchSize: number
|
||||
private readonly skipAuth: boolean
|
||||
private readonly batchDelayMs: number
|
||||
private readonly baseBackoffDelayMs: number
|
||||
private readonly maxBackoffDelayMs: number
|
||||
private readonly maxAttempts: number
|
||||
private readonly isKilled: () => boolean
|
||||
private pendingExports: Promise<void>[] = []
|
||||
private isShutdown = false
|
||||
private readonly schedule: (
|
||||
fn: () => Promise<void>,
|
||||
delayMs: number,
|
||||
) => () => void
|
||||
private cancelBackoff: (() => void) | null = null
|
||||
private attempts = 0
|
||||
private isRetrying = false
|
||||
private lastExportErrorContext: string | undefined
|
||||
|
||||
constructor(
|
||||
options: {
|
||||
timeout?: number
|
||||
maxBatchSize?: number
|
||||
skipAuth?: boolean
|
||||
batchDelayMs?: number
|
||||
baseBackoffDelayMs?: number
|
||||
maxBackoffDelayMs?: number
|
||||
maxAttempts?: number
|
||||
path?: string
|
||||
baseUrl?: string
|
||||
// Injected killswitch probe. Checked per-POST so that disabling the
|
||||
// firstParty sink also stops backoff retries (not just new emits).
|
||||
// Passed in rather than imported to avoid a cycle with firstPartyEventLogger.ts.
|
||||
isKilled?: () => boolean
|
||||
schedule?: (fn: () => Promise<void>, delayMs: number) => () => void
|
||||
} = {},
|
||||
) {
|
||||
// Default: prod, except when ANTHROPIC_BASE_URL is explicitly staging.
|
||||
// Overridable via tengu_1p_event_batch_config.baseUrl.
|
||||
const baseUrl =
|
||||
options.baseUrl ||
|
||||
(process.env.ANTHROPIC_BASE_URL === 'https://api-staging.anthropic.com'
|
||||
? 'https://api-staging.anthropic.com'
|
||||
: 'https://api.anthropic.com')
|
||||
|
||||
this.endpoint = `${baseUrl}${options.path || '/api/event_logging/batch'}`
|
||||
|
||||
this.timeout = options.timeout || 10000
|
||||
this.maxBatchSize = options.maxBatchSize || 200
|
||||
this.skipAuth = options.skipAuth ?? false
|
||||
this.batchDelayMs = options.batchDelayMs || 100
|
||||
this.baseBackoffDelayMs = options.baseBackoffDelayMs || 500
|
||||
this.maxBackoffDelayMs = options.maxBackoffDelayMs || 30000
|
||||
this.maxAttempts = options.maxAttempts ?? 8
|
||||
this.isKilled = options.isKilled ?? (() => false)
|
||||
this.schedule =
|
||||
options.schedule ??
|
||||
((fn, ms) => {
|
||||
const t = setTimeout(fn, ms)
|
||||
return () => clearTimeout(t)
|
||||
})
|
||||
|
||||
// Retry any failed events from previous runs of this session (in background)
|
||||
void this.retryPreviousBatches()
|
||||
}
|
||||
|
||||
// Expose for testing
|
||||
async getQueuedEventCount(): Promise<number> {
|
||||
return (await this.loadEventsFromCurrentBatch()).length
|
||||
}
|
||||
|
||||
// --- Storage helpers ---
|
||||
|
||||
private getCurrentBatchFilePath(): string {
|
||||
return path.join(
|
||||
getStorageDir(),
|
||||
`${FILE_PREFIX}${getSessionId()}.${BATCH_UUID}.json`,
|
||||
)
|
||||
}
|
||||
|
||||
private async loadEventsFromFile(
|
||||
filePath: string,
|
||||
): Promise<FirstPartyEventLoggingEvent[]> {
|
||||
try {
|
||||
return await readJSONLFile<FirstPartyEventLoggingEvent>(filePath)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
private async loadEventsFromCurrentBatch(): Promise<
|
||||
FirstPartyEventLoggingEvent[]
|
||||
> {
|
||||
return this.loadEventsFromFile(this.getCurrentBatchFilePath())
|
||||
}
|
||||
|
||||
private async saveEventsToFile(
|
||||
filePath: string,
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<void> {
|
||||
try {
|
||||
if (events.length === 0) {
|
||||
try {
|
||||
await unlink(filePath)
|
||||
} catch {
|
||||
// File doesn't exist, nothing to delete
|
||||
}
|
||||
} else {
|
||||
// Ensure storage directory exists
|
||||
await mkdir(getStorageDir(), { recursive: true })
|
||||
// Write as JSON lines (one event per line)
|
||||
const content = events.map(e => jsonStringify(e)).join('\n') + '\n'
|
||||
await writeFile(filePath, content, 'utf8')
|
||||
}
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
private async appendEventsToFile(
|
||||
filePath: string,
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<void> {
|
||||
if (events.length === 0) return
|
||||
try {
|
||||
// Ensure storage directory exists
|
||||
await mkdir(getStorageDir(), { recursive: true })
|
||||
// Append as JSON lines (one event per line) - atomic on most filesystems
|
||||
const content = events.map(e => jsonStringify(e)).join('\n') + '\n'
|
||||
await appendFile(filePath, content, 'utf8')
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
private async deleteFile(filePath: string): Promise<void> {
|
||||
try {
|
||||
await unlink(filePath)
|
||||
} catch {
|
||||
// File doesn't exist or can't be deleted, ignore
|
||||
}
|
||||
}
|
||||
|
||||
// --- Previous batch retry (startup) ---
|
||||
|
||||
private async retryPreviousBatches(): Promise<void> {
|
||||
try {
|
||||
const prefix = `${FILE_PREFIX}${getSessionId()}.`
|
||||
let files: string[]
|
||||
try {
|
||||
files = (await readdir(getStorageDir()))
|
||||
.filter((f: string) => f.startsWith(prefix) && f.endsWith('.json'))
|
||||
.filter((f: string) => !f.includes(BATCH_UUID)) // Exclude current batch
|
||||
} catch (e) {
|
||||
if (isFsInaccessible(e)) return
|
||||
throw e
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
const filePath = path.join(getStorageDir(), file)
|
||||
void this.retryFileInBackground(filePath)
|
||||
}
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
private async retryFileInBackground(filePath: string): Promise<void> {
|
||||
if (this.attempts >= this.maxAttempts) {
|
||||
await this.deleteFile(filePath)
|
||||
return
|
||||
}
|
||||
|
||||
const events = await this.loadEventsFromFile(filePath)
|
||||
if (events.length === 0) {
|
||||
await this.deleteFile(filePath)
|
||||
return
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: retrying ${events.length} events from previous batch`,
|
||||
)
|
||||
}
|
||||
|
||||
const failedEvents = await this.sendEventsInBatches(events)
|
||||
if (failedEvents.length === 0) {
|
||||
await this.deleteFile(filePath)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging: previous batch retry succeeded')
|
||||
}
|
||||
} else {
|
||||
// Save only the failed events back (not all original events)
|
||||
await this.saveEventsToFile(filePath, failedEvents)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: previous batch retry failed, ${failedEvents.length} events remain`,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async export(
|
||||
logs: ReadableLogRecord[],
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
if (this.isShutdown) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
'1P event logging export failed: Exporter has been shutdown',
|
||||
)
|
||||
}
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error('Exporter has been shutdown'),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const exportPromise = this.doExport(logs, resultCallback)
|
||||
this.pendingExports.push(exportPromise)
|
||||
|
||||
// Clean up completed exports
|
||||
void exportPromise.finally(() => {
|
||||
const index = this.pendingExports.indexOf(exportPromise)
|
||||
if (index > -1) {
|
||||
void this.pendingExports.splice(index, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private async doExport(
|
||||
logs: ReadableLogRecord[],
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Filter for event logs only (by scope name)
|
||||
const eventLogs = logs.filter(
|
||||
log =>
|
||||
log.instrumentationScope?.name === 'com.anthropic.claude_code.events',
|
||||
)
|
||||
|
||||
if (eventLogs.length === 0) {
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
// Transform new logs (failed events are retried independently via backoff)
|
||||
const events = this.transformLogsToEvents(eventLogs).events
|
||||
|
||||
if (events.length === 0) {
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
if (this.attempts >= this.maxAttempts) {
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error(
|
||||
`Dropped ${events.length} events: max attempts (${this.maxAttempts}) reached`,
|
||||
),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Send events
|
||||
const failedEvents = await this.sendEventsInBatches(events)
|
||||
this.attempts++
|
||||
|
||||
if (failedEvents.length > 0) {
|
||||
await this.queueFailedEvents(failedEvents)
|
||||
this.scheduleBackoffRetry()
|
||||
const context = this.lastExportErrorContext
|
||||
? ` (${this.lastExportErrorContext})`
|
||||
: ''
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error(
|
||||
`Failed to export ${failedEvents.length} events${context}`,
|
||||
),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Success - reset backoff and immediately retry any queued events
|
||||
this.resetBackoff()
|
||||
if ((await this.getQueuedEventCount()) > 0 && !this.isRetrying) {
|
||||
void this.retryFailedEvents()
|
||||
}
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
} catch (error) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging export failed: ${errorMessage(error)}`,
|
||||
)
|
||||
}
|
||||
logError(error)
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: toError(error),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
private async sendEventsInBatches(
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<FirstPartyEventLoggingEvent[]> {
|
||||
// Chunk events into batches
|
||||
const batches: FirstPartyEventLoggingEvent[][] = []
|
||||
for (let i = 0; i < events.length; i += this.maxBatchSize) {
|
||||
batches.push(events.slice(i, i + this.maxBatchSize))
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: exporting ${events.length} events in ${batches.length} batch(es)`,
|
||||
)
|
||||
}
|
||||
|
||||
// Send each batch with delay between them. On first failure, assume the
|
||||
// endpoint is down and short-circuit: queue the failed batch plus all
|
||||
// remaining unsent batches without POSTing them. The backoff retry will
|
||||
// probe again with a single batch next tick.
|
||||
const failedBatchEvents: FirstPartyEventLoggingEvent[] = []
|
||||
let lastErrorContext: string | undefined
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
const batch = batches[i]!
|
||||
try {
|
||||
await this.sendBatchWithRetry({ events: batch })
|
||||
} catch (error) {
|
||||
lastErrorContext = getAxiosErrorContext(error)
|
||||
for (let j = i; j < batches.length; j++) {
|
||||
failedBatchEvents.push(...batches[j]!)
|
||||
}
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
const skipped = batches.length - 1 - i
|
||||
logForDebugging(
|
||||
`1P event logging: batch ${i + 1}/${batches.length} failed (${lastErrorContext}); short-circuiting ${skipped} remaining batch(es)`,
|
||||
)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if (i < batches.length - 1 && this.batchDelayMs > 0) {
|
||||
await sleep(this.batchDelayMs)
|
||||
}
|
||||
}
|
||||
|
||||
if (failedBatchEvents.length > 0 && lastErrorContext) {
|
||||
this.lastExportErrorContext = lastErrorContext
|
||||
}
|
||||
|
||||
return failedBatchEvents
|
||||
}
|
||||
|
||||
private async queueFailedEvents(
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<void> {
|
||||
const filePath = this.getCurrentBatchFilePath()
|
||||
|
||||
// Append-only: just add new events to file (atomic on most filesystems)
|
||||
await this.appendEventsToFile(filePath, events)
|
||||
|
||||
const context = this.lastExportErrorContext
|
||||
? ` (${this.lastExportErrorContext})`
|
||||
: ''
|
||||
const message = `1P event logging: ${events.length} events failed to export${context}`
|
||||
logError(new Error(message))
|
||||
}
|
||||
|
||||
private scheduleBackoffRetry(): void {
|
||||
// Don't schedule if already retrying or shutdown
|
||||
if (this.cancelBackoff || this.isRetrying || this.isShutdown) {
|
||||
return
|
||||
}
|
||||
|
||||
// Quadratic backoff (matching Statsig SDK): base * attempts²
|
||||
const delay = Math.min(
|
||||
this.baseBackoffDelayMs * this.attempts * this.attempts,
|
||||
this.maxBackoffDelayMs,
|
||||
)
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: scheduling backoff retry in ${delay}ms (attempt ${this.attempts})`,
|
||||
)
|
||||
}
|
||||
|
||||
this.cancelBackoff = this.schedule(async () => {
|
||||
this.cancelBackoff = null
|
||||
await this.retryFailedEvents()
|
||||
}, delay)
|
||||
}
|
||||
|
||||
private async retryFailedEvents(): Promise<void> {
|
||||
const filePath = this.getCurrentBatchFilePath()
|
||||
|
||||
// Keep retrying while there are events and endpoint is healthy
|
||||
while (!this.isShutdown) {
|
||||
const events = await this.loadEventsFromFile(filePath)
|
||||
if (events.length === 0) break
|
||||
|
||||
if (this.attempts >= this.maxAttempts) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: max attempts (${this.maxAttempts}) reached, dropping ${events.length} events`,
|
||||
)
|
||||
}
|
||||
await this.deleteFile(filePath)
|
||||
this.resetBackoff()
|
||||
return
|
||||
}
|
||||
|
||||
this.isRetrying = true
|
||||
|
||||
// Clear file before retry (we have events in memory now)
|
||||
await this.deleteFile(filePath)
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: retrying ${events.length} failed events (attempt ${this.attempts + 1})`,
|
||||
)
|
||||
}
|
||||
|
||||
const failedEvents = await this.sendEventsInBatches(events)
|
||||
this.attempts++
|
||||
|
||||
this.isRetrying = false
|
||||
|
||||
if (failedEvents.length > 0) {
|
||||
// Write failures back to disk
|
||||
await this.saveEventsToFile(filePath, failedEvents)
|
||||
this.scheduleBackoffRetry()
|
||||
return // Failed - wait for backoff
|
||||
}
|
||||
|
||||
// Success - reset backoff and continue loop to drain any newly queued events
|
||||
this.resetBackoff()
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging: backoff retry succeeded')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private resetBackoff(): void {
|
||||
this.attempts = 0
|
||||
if (this.cancelBackoff) {
|
||||
this.cancelBackoff()
|
||||
this.cancelBackoff = null
|
||||
}
|
||||
}
|
||||
|
||||
private async sendBatchWithRetry(
|
||||
payload: FirstPartyEventLoggingPayload,
|
||||
): Promise<void> {
|
||||
if (this.isKilled()) {
|
||||
// Throw so the caller short-circuits remaining batches and queues
|
||||
// everything to disk. Zero network traffic while killed; the backoff
|
||||
// timer keeps ticking and will resume POSTs as soon as the GrowthBook
|
||||
// cache picks up the cleared flag.
|
||||
throw new Error('firstParty sink killswitch active')
|
||||
}
|
||||
|
||||
const baseHeaders: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
'x-service-name': 'claude-code',
|
||||
}
|
||||
|
||||
// Skip auth if trust hasn't been established yet
|
||||
// This prevents executing apiKeyHelper commands before the trust dialog
|
||||
// Non-interactive sessions implicitly have workspace trust
|
||||
const hasTrust =
|
||||
checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
|
||||
if (process.env.USER_TYPE === 'ant' && !hasTrust) {
|
||||
logForDebugging('1P event logging: Trust not accepted')
|
||||
}
|
||||
|
||||
// Skip auth when the OAuth token is expired or lacks user:profile
|
||||
// scope (service key sessions). Falls through to unauthenticated send.
|
||||
let shouldSkipAuth = this.skipAuth || !hasTrust
|
||||
if (!shouldSkipAuth && isClaudeAISubscriber()) {
|
||||
const tokens = getClaudeAIOAuthTokens()
|
||||
if (!hasProfileScope()) {
|
||||
shouldSkipAuth = true
|
||||
} else if (tokens && isOAuthTokenExpired(tokens.expiresAt)) {
|
||||
shouldSkipAuth = true
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
'1P event logging: OAuth token expired, skipping auth to avoid 401',
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try with auth headers first (unless trust not established or token is known to be expired)
|
||||
const authResult = shouldSkipAuth
|
||||
? { headers: {}, error: 'trust not established or Oauth token expired' }
|
||||
: getAuthHeaders()
|
||||
const useAuth = !authResult.error
|
||||
|
||||
if (!useAuth && process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: auth not available, sending without auth`,
|
||||
)
|
||||
}
|
||||
|
||||
const headers = useAuth
|
||||
? { ...baseHeaders, ...authResult.headers }
|
||||
: baseHeaders
|
||||
|
||||
try {
|
||||
const response = await axios.post(this.endpoint, payload, {
|
||||
timeout: this.timeout,
|
||||
headers,
|
||||
})
|
||||
this.logSuccess(payload.events.length, useAuth, response.data)
|
||||
return
|
||||
} catch (error) {
|
||||
// Handle 401 by retrying without auth
|
||||
if (
|
||||
useAuth &&
|
||||
axios.isAxiosError(error) &&
|
||||
error.response?.status === 401
|
||||
) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
'1P event logging: 401 auth error, retrying without auth',
|
||||
)
|
||||
}
|
||||
const response = await axios.post(this.endpoint, payload, {
|
||||
timeout: this.timeout,
|
||||
headers: baseHeaders,
|
||||
})
|
||||
this.logSuccess(payload.events.length, false, response.data)
|
||||
return
|
||||
}
|
||||
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
private logSuccess(
|
||||
eventCount: number,
|
||||
withAuth: boolean,
|
||||
responseData: unknown,
|
||||
): void {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: ${eventCount} events exported successfully${withAuth ? ' (with auth)' : ' (without auth)'}`,
|
||||
)
|
||||
logForDebugging(`API Response: ${jsonStringify(responseData, null, 2)}`)
|
||||
}
|
||||
}
|
||||
|
||||
private hrTimeToDate(hrTime: HrTime): Date {
|
||||
const [seconds, nanoseconds] = hrTime
|
||||
return new Date(seconds * 1000 + nanoseconds / 1000000)
|
||||
}
|
||||
|
||||
private transformLogsToEvents(
|
||||
logs: ReadableLogRecord[],
|
||||
): FirstPartyEventLoggingPayload {
|
||||
const events: FirstPartyEventLoggingEvent[] = []
|
||||
|
||||
for (const log of logs) {
|
||||
const attributes = log.attributes || {}
|
||||
|
||||
// Check if this is a GrowthBook experiment event
|
||||
if (attributes.event_type === 'GrowthbookExperimentEvent') {
|
||||
const timestamp = this.hrTimeToDate(log.hrTime)
|
||||
const account_uuid = attributes.account_uuid as string | undefined
|
||||
const organization_uuid = attributes.organization_uuid as
|
||||
| string
|
||||
| undefined
|
||||
events.push({
|
||||
event_type: 'GrowthbookExperimentEvent',
|
||||
event_data: GrowthbookExperimentEvent.toJSON({
|
||||
event_id: attributes.event_id as string,
|
||||
timestamp,
|
||||
experiment_id: attributes.experiment_id as string,
|
||||
variation_id: attributes.variation_id as number,
|
||||
environment: attributes.environment as string,
|
||||
user_attributes: attributes.user_attributes as string,
|
||||
experiment_metadata: attributes.experiment_metadata as string,
|
||||
device_id: attributes.device_id as string,
|
||||
session_id: attributes.session_id as string,
|
||||
auth:
|
||||
account_uuid || organization_uuid
|
||||
? { account_uuid, organization_uuid }
|
||||
: undefined,
|
||||
}),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract event name
|
||||
const eventName =
|
||||
(attributes.event_name as string) || (log.body as string) || 'unknown'
|
||||
|
||||
// Extract metadata objects directly (no JSON parsing needed)
|
||||
const coreMetadata = attributes.core_metadata as EventMetadata | undefined
|
||||
const userMetadata = attributes.user_metadata as CoreUserData
|
||||
const eventMetadata = (attributes.event_metadata || {}) as Record<
|
||||
string,
|
||||
unknown
|
||||
>
|
||||
|
||||
if (!coreMetadata) {
|
||||
// Emit partial event if core metadata is missing
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: core_metadata missing for event ${eventName}`,
|
||||
)
|
||||
}
|
||||
events.push({
|
||||
event_type: 'ClaudeCodeInternalEvent',
|
||||
event_data: ClaudeCodeInternalEvent.toJSON({
|
||||
event_id: attributes.event_id as string | undefined,
|
||||
event_name: eventName,
|
||||
client_timestamp: this.hrTimeToDate(log.hrTime),
|
||||
session_id: getSessionId(),
|
||||
additional_metadata: Buffer.from(
|
||||
jsonStringify({
|
||||
transform_error: 'core_metadata attribute is missing',
|
||||
}),
|
||||
).toString('base64'),
|
||||
}),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Transform to 1P format
|
||||
const formatted = to1PEventFormat(
|
||||
coreMetadata,
|
||||
userMetadata,
|
||||
eventMetadata,
|
||||
)
|
||||
|
||||
// _PROTO_* keys are PII-tagged values meant only for privileged BQ
|
||||
// columns. Hoist known keys to proto fields, then defensively strip any
|
||||
// remaining _PROTO_* so an unrecognized future key can't silently land
|
||||
// in the general-access additional_metadata blob. sink.ts applies the
|
||||
// same strip before Datadog; this closes the 1P side.
|
||||
const {
|
||||
_PROTO_skill_name,
|
||||
_PROTO_plugin_name,
|
||||
_PROTO_marketplace_name,
|
||||
...rest
|
||||
} = formatted.additional
|
||||
const additionalMetadata = stripProtoFields(rest)
|
||||
|
||||
events.push({
|
||||
event_type: 'ClaudeCodeInternalEvent',
|
||||
event_data: ClaudeCodeInternalEvent.toJSON({
|
||||
event_id: attributes.event_id as string | undefined,
|
||||
event_name: eventName,
|
||||
client_timestamp: this.hrTimeToDate(log.hrTime),
|
||||
device_id: attributes.user_id as string | undefined,
|
||||
email: userMetadata?.email,
|
||||
auth: formatted.auth,
|
||||
...formatted.core,
|
||||
env: formatted.env,
|
||||
process: formatted.process,
|
||||
skill_name:
|
||||
typeof _PROTO_skill_name === 'string'
|
||||
? _PROTO_skill_name
|
||||
: undefined,
|
||||
plugin_name:
|
||||
typeof _PROTO_plugin_name === 'string'
|
||||
? _PROTO_plugin_name
|
||||
: undefined,
|
||||
marketplace_name:
|
||||
typeof _PROTO_marketplace_name === 'string'
|
||||
? _PROTO_marketplace_name
|
||||
: undefined,
|
||||
additional_metadata:
|
||||
Object.keys(additionalMetadata).length > 0
|
||||
? Buffer.from(jsonStringify(additionalMetadata)).toString(
|
||||
'base64',
|
||||
)
|
||||
: undefined,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
return { events }
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
this.isShutdown = true
|
||||
this.resetBackoff()
|
||||
await this.forceFlush()
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging exporter shutdown complete')
|
||||
}
|
||||
}
|
||||
|
||||
async forceFlush(): Promise<void> {
|
||||
await Promise.all(this.pendingExports)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging exporter flush complete')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getAxiosErrorContext(error: unknown): string {
|
||||
if (!axios.isAxiosError(error)) {
|
||||
return errorMessage(error)
|
||||
}
|
||||
|
||||
const parts: string[] = []
|
||||
|
||||
const requestId = error.response?.headers?.['request-id']
|
||||
if (requestId) {
|
||||
parts.push(`request-id=${requestId}`)
|
||||
}
|
||||
|
||||
if (error.response?.status) {
|
||||
parts.push(`status=${error.response.status}`)
|
||||
}
|
||||
|
||||
if (error.code) {
|
||||
parts.push(`code=${error.code}`)
|
||||
}
|
||||
|
||||
if (error.message) {
|
||||
parts.push(error.message)
|
||||
}
|
||||
|
||||
return parts.join(', ')
|
||||
}
|
||||
@@ -1,105 +1,33 @@
|
||||
/**
|
||||
* Beta Session Tracing for Claude Code
|
||||
* Detailed beta tracing egress is disabled in this build.
|
||||
*
|
||||
* This module contains beta tracing features enabled when
|
||||
* ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set.
|
||||
*
|
||||
* For external users, tracing is enabled in SDK/headless mode, or in
|
||||
* interactive mode when the org is allowlisted via the
|
||||
* tengu_trace_lantern GrowthBook gate.
|
||||
* For ant users, tracing is enabled in all modes.
|
||||
*
|
||||
* Visibility Rules:
|
||||
* | Content | External | Ant |
|
||||
* |------------------|----------|------|
|
||||
* | System prompts | ✅ | ✅ |
|
||||
* | Model output | ✅ | ✅ |
|
||||
* | Thinking output | ❌ | ✅ |
|
||||
* | Tools | ✅ | ✅ |
|
||||
* | new_context | ✅ | ✅ |
|
||||
*
|
||||
* Features:
|
||||
* - Per-agent message tracking with hash-based deduplication
|
||||
* - System prompt logging (once per unique hash)
|
||||
* - Hook execution spans
|
||||
* - Detailed new_context attributes for LLM requests
|
||||
* The exported helpers remain for compile-time compatibility, but do not
|
||||
* retain tracing state or emit tracing attributes.
|
||||
*/
|
||||
|
||||
import type { Span } from '@opentelemetry/api'
|
||||
import { createHash } from 'crypto'
|
||||
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
|
||||
import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js'
|
||||
import type { AssistantMessage, UserMessage } from '../../types/message.js'
|
||||
import { isEnvTruthy } from '../envUtils.js'
|
||||
import { jsonParse, jsonStringify } from '../slowOperations.js'
|
||||
import { logOTelEvent } from './events.js'
|
||||
type AttributeValue = string | number | boolean
|
||||
|
||||
// Message type for API calls (UserMessage or AssistantMessage)
|
||||
type APIMessage = UserMessage | AssistantMessage
|
||||
export interface SpanAttributeWriter {
|
||||
setAttribute?(_key: string, _value: AttributeValue): void
|
||||
setAttributes?(_attributes: Record<string, AttributeValue>): void
|
||||
}
|
||||
|
||||
/**
|
||||
* Track hashes we've already logged this session (system prompts, tools, etc).
|
||||
*
|
||||
* WHY: System prompts and tool schemas are large and rarely change within a session.
|
||||
* Sending full content on every request would be wasteful. Instead, we hash and
|
||||
* only log the full content once per unique hash.
|
||||
*/
|
||||
const seenHashes = new Set<string>()
|
||||
export interface LLMRequestNewContext {
|
||||
systemPrompt?: string
|
||||
querySource?: string
|
||||
tools?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Track the last reported message hash per querySource (agent) for incremental context.
|
||||
*
|
||||
* WHY: When debugging traces, we want to see what NEW information was added each turn,
|
||||
* not the entire conversation history (which can be huge). By tracking the last message
|
||||
* we reported per agent, we can compute and send only the delta (new messages since
|
||||
* the last request). This is tracked per-agent (querySource) because different agents
|
||||
* (main thread, subagents, warmup requests) have independent conversation contexts.
|
||||
*/
|
||||
const lastReportedMessageHash = new Map<string, string>()
|
||||
const MAX_CONTENT_SIZE = 60 * 1024
|
||||
|
||||
/**
|
||||
* Clear tracking state after compaction.
|
||||
* Old hashes are irrelevant once messages have been replaced.
|
||||
*/
|
||||
export function clearBetaTracingState(): void {
|
||||
seenHashes.clear()
|
||||
lastReportedMessageHash.clear()
|
||||
return
|
||||
}
|
||||
|
||||
const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe)
|
||||
|
||||
/**
|
||||
* Check if beta detailed tracing is enabled.
|
||||
* - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
|
||||
* - For external users, enabled in SDK/headless mode OR when org is
|
||||
* allowlisted via the tengu_trace_lantern GrowthBook gate
|
||||
*/
|
||||
export function isBetaTracingEnabled(): boolean {
|
||||
const baseEnabled =
|
||||
isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) &&
|
||||
Boolean(process.env.BETA_TRACING_ENDPOINT)
|
||||
|
||||
if (!baseEnabled) {
|
||||
return false
|
||||
}
|
||||
|
||||
// For external users, enable in SDK/headless mode OR when org is allowlisted.
|
||||
// Gate reads from disk cache, so first run after allowlisting returns false;
|
||||
// works from second run onward (same behavior as enhanced_telemetry_beta).
|
||||
if (process.env.USER_TYPE !== 'ant') {
|
||||
return (
|
||||
getIsNonInteractiveSession() ||
|
||||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false)
|
||||
)
|
||||
}
|
||||
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate content to fit within Honeycomb limits.
|
||||
*/
|
||||
export function truncateContent(
|
||||
content: string,
|
||||
maxSize: number = MAX_CONTENT_SIZE,
|
||||
@@ -116,376 +44,43 @@ export function truncateContent(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a short hash (first 12 hex chars of SHA-256).
|
||||
*/
|
||||
function shortHash(content: string): string {
|
||||
return createHash('sha256').update(content).digest('hex').slice(0, 12)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hash for a system prompt.
|
||||
*/
|
||||
function hashSystemPrompt(systemPrompt: string): string {
|
||||
return `sp_${shortHash(systemPrompt)}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hash for a message based on its content.
|
||||
*/
|
||||
function hashMessage(message: APIMessage): string {
|
||||
const content = jsonStringify(message.message.content)
|
||||
return `msg_${shortHash(content)}`
|
||||
}
|
||||
|
||||
// Regex to detect content wrapped in <system-reminder> tags
|
||||
const SYSTEM_REMINDER_REGEX =
|
||||
/^<system-reminder>\n?([\s\S]*?)\n?<\/system-reminder>$/
|
||||
|
||||
/**
|
||||
* Check if text is entirely a system reminder (wrapped in <system-reminder> tags).
|
||||
* Returns the inner content if it is, null otherwise.
|
||||
*/
|
||||
function extractSystemReminderContent(text: string): string | null {
|
||||
const match = text.trim().match(SYSTEM_REMINDER_REGEX)
|
||||
return match && match[1] ? match[1].trim() : null
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of formatting messages - separates regular content from system reminders.
|
||||
*/
|
||||
interface FormattedMessages {
|
||||
contextParts: string[]
|
||||
systemReminders: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Format user messages for new_context display, separating system reminders.
|
||||
* Only handles user messages (assistant messages are filtered out before this is called).
|
||||
*/
|
||||
function formatMessagesForContext(messages: UserMessage[]): FormattedMessages {
|
||||
const contextParts: string[] = []
|
||||
const systemReminders: string[] = []
|
||||
|
||||
for (const message of messages) {
|
||||
const content = message.message.content
|
||||
if (typeof content === 'string') {
|
||||
const reminderContent = extractSystemReminderContent(content)
|
||||
if (reminderContent) {
|
||||
systemReminders.push(reminderContent)
|
||||
} else {
|
||||
contextParts.push(`[USER]\n${content}`)
|
||||
}
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'text') {
|
||||
const reminderContent = extractSystemReminderContent(block.text)
|
||||
if (reminderContent) {
|
||||
systemReminders.push(reminderContent)
|
||||
} else {
|
||||
contextParts.push(`[USER]\n${block.text}`)
|
||||
}
|
||||
} else if (block.type === 'tool_result') {
|
||||
const resultContent =
|
||||
typeof block.content === 'string'
|
||||
? block.content
|
||||
: jsonStringify(block.content)
|
||||
// Tool results can also contain system reminders (e.g., malware warning)
|
||||
const reminderContent = extractSystemReminderContent(resultContent)
|
||||
if (reminderContent) {
|
||||
systemReminders.push(reminderContent)
|
||||
} else {
|
||||
contextParts.push(
|
||||
`[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { contextParts, systemReminders }
|
||||
}
|
||||
|
||||
export interface LLMRequestNewContext {
|
||||
/** System prompt (typically only on first request or if changed) */
|
||||
systemPrompt?: string
|
||||
/** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */
|
||||
querySource?: string
|
||||
/** Tool schemas sent with the request */
|
||||
tools?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to an interaction span.
|
||||
* Adds new_context with the user prompt.
|
||||
*/
|
||||
export function addBetaInteractionAttributes(
|
||||
span: Span,
|
||||
userPrompt: string,
|
||||
_span: SpanAttributeWriter,
|
||||
_userPrompt: string,
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
const { content: truncatedPrompt, truncated } = truncateContent(
|
||||
`[USER PROMPT]\n${userPrompt}`,
|
||||
)
|
||||
span.setAttributes({
|
||||
new_context: truncatedPrompt,
|
||||
...(truncated && {
|
||||
new_context_truncated: true,
|
||||
new_context_original_length: userPrompt.length,
|
||||
}),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to an LLM request span.
|
||||
* Handles system prompt logging and new_context computation.
|
||||
*/
|
||||
export function addBetaLLMRequestAttributes(
|
||||
span: Span,
|
||||
newContext?: LLMRequestNewContext,
|
||||
messagesForAPI?: APIMessage[],
|
||||
_span: SpanAttributeWriter,
|
||||
_newContext?: LLMRequestNewContext,
|
||||
_messagesForAPI?: unknown[],
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
// Add system prompt info to the span
|
||||
if (newContext?.systemPrompt) {
|
||||
const promptHash = hashSystemPrompt(newContext.systemPrompt)
|
||||
const preview = newContext.systemPrompt.slice(0, 500)
|
||||
|
||||
// Always add hash, preview, and length to the span
|
||||
span.setAttribute('system_prompt_hash', promptHash)
|
||||
span.setAttribute('system_prompt_preview', preview)
|
||||
span.setAttribute('system_prompt_length', newContext.systemPrompt.length)
|
||||
|
||||
// Log the full system prompt only once per unique hash this session
|
||||
if (!seenHashes.has(promptHash)) {
|
||||
seenHashes.add(promptHash)
|
||||
|
||||
// Truncate for the log if needed
|
||||
const { content: truncatedPrompt, truncated } = truncateContent(
|
||||
newContext.systemPrompt,
|
||||
)
|
||||
|
||||
void logOTelEvent('system_prompt', {
|
||||
system_prompt_hash: promptHash,
|
||||
system_prompt: truncatedPrompt,
|
||||
system_prompt_length: String(newContext.systemPrompt.length),
|
||||
...(truncated && { system_prompt_truncated: 'true' }),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add tools info to the span
|
||||
if (newContext?.tools) {
|
||||
try {
|
||||
const toolsArray = jsonParse(newContext.tools) as Record<
|
||||
string,
|
||||
unknown
|
||||
>[]
|
||||
|
||||
// Build array of {name, hash} for each tool
|
||||
const toolsWithHashes = toolsArray.map(tool => {
|
||||
const toolJson = jsonStringify(tool)
|
||||
const toolHash = shortHash(toolJson)
|
||||
return {
|
||||
name: typeof tool.name === 'string' ? tool.name : 'unknown',
|
||||
hash: toolHash,
|
||||
json: toolJson,
|
||||
}
|
||||
})
|
||||
|
||||
// Set span attribute with array of name/hash pairs
|
||||
span.setAttribute(
|
||||
'tools',
|
||||
jsonStringify(
|
||||
toolsWithHashes.map(({ name, hash }) => ({ name, hash })),
|
||||
),
|
||||
)
|
||||
span.setAttribute('tools_count', toolsWithHashes.length)
|
||||
|
||||
// Log each tool's full description once per unique hash
|
||||
for (const { name, hash, json } of toolsWithHashes) {
|
||||
if (!seenHashes.has(`tool_${hash}`)) {
|
||||
seenHashes.add(`tool_${hash}`)
|
||||
|
||||
const { content: truncatedTool, truncated } = truncateContent(json)
|
||||
|
||||
void logOTelEvent('tool', {
|
||||
tool_name: sanitizeToolNameForAnalytics(name),
|
||||
tool_hash: hash,
|
||||
tool: truncatedTool,
|
||||
...(truncated && { tool_truncated: 'true' }),
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// If parsing fails, log the raw tools string
|
||||
span.setAttribute('tools_parse_error', true)
|
||||
}
|
||||
}
|
||||
|
||||
// Add new_context using hash-based tracking (visible to all users)
|
||||
if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) {
|
||||
const querySource = newContext.querySource
|
||||
const lastHash = lastReportedMessageHash.get(querySource)
|
||||
|
||||
// Find where the last reported message is in the array
|
||||
let startIndex = 0
|
||||
if (lastHash) {
|
||||
for (let i = 0; i < messagesForAPI.length; i++) {
|
||||
const msg = messagesForAPI[i]
|
||||
if (msg && hashMessage(msg) === lastHash) {
|
||||
startIndex = i + 1 // Start after the last reported message
|
||||
break
|
||||
}
|
||||
}
|
||||
// If lastHash not found, startIndex stays 0 (send everything)
|
||||
}
|
||||
|
||||
// Get new messages (filter out assistant messages - we only want user input/tool results)
|
||||
const newMessages = messagesForAPI
|
||||
.slice(startIndex)
|
||||
.filter((m): m is UserMessage => m.type === 'user')
|
||||
|
||||
if (newMessages.length > 0) {
|
||||
// Format new messages, separating system reminders from regular content
|
||||
const { contextParts, systemReminders } =
|
||||
formatMessagesForContext(newMessages)
|
||||
|
||||
// Set new_context (regular user content and tool results)
|
||||
if (contextParts.length > 0) {
|
||||
const fullContext = contextParts.join('\n\n---\n\n')
|
||||
const { content: truncatedContext, truncated } =
|
||||
truncateContent(fullContext)
|
||||
|
||||
span.setAttributes({
|
||||
new_context: truncatedContext,
|
||||
new_context_message_count: newMessages.length,
|
||||
...(truncated && {
|
||||
new_context_truncated: true,
|
||||
new_context_original_length: fullContext.length,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
// Set system_reminders as a separate attribute
|
||||
if (systemReminders.length > 0) {
|
||||
const fullReminders = systemReminders.join('\n\n---\n\n')
|
||||
const { content: truncatedReminders, truncated: remindersTruncated } =
|
||||
truncateContent(fullReminders)
|
||||
|
||||
span.setAttributes({
|
||||
system_reminders: truncatedReminders,
|
||||
system_reminders_count: systemReminders.length,
|
||||
...(remindersTruncated && {
|
||||
system_reminders_truncated: true,
|
||||
system_reminders_original_length: fullReminders.length,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
// Update last reported hash to the last message in the array
|
||||
const lastMessage = messagesForAPI[messagesForAPI.length - 1]
|
||||
if (lastMessage) {
|
||||
lastReportedMessageHash.set(querySource, hashMessage(lastMessage))
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to endLLMRequestSpan.
|
||||
* Handles model_output and thinking_output truncation.
|
||||
*/
|
||||
export function addBetaLLMResponseAttributes(
|
||||
endAttributes: Record<string, string | number | boolean>,
|
||||
metadata?: {
|
||||
_attributes: Record<string, AttributeValue>,
|
||||
_metadata?: {
|
||||
modelOutput?: string
|
||||
thinkingOutput?: string
|
||||
},
|
||||
): void {
|
||||
if (!isBetaTracingEnabled() || !metadata) {
|
||||
return
|
||||
}
|
||||
|
||||
// Add model_output (text content) - visible to all users
|
||||
if (metadata.modelOutput !== undefined) {
|
||||
const { content: modelOutput, truncated: outputTruncated } =
|
||||
truncateContent(metadata.modelOutput)
|
||||
endAttributes['response.model_output'] = modelOutput
|
||||
if (outputTruncated) {
|
||||
endAttributes['response.model_output_truncated'] = true
|
||||
endAttributes['response.model_output_original_length'] =
|
||||
metadata.modelOutput.length
|
||||
}
|
||||
}
|
||||
|
||||
// Add thinking_output - ant-only
|
||||
if (
|
||||
process.env.USER_TYPE === 'ant' &&
|
||||
metadata.thinkingOutput !== undefined
|
||||
) {
|
||||
const { content: thinkingOutput, truncated: thinkingTruncated } =
|
||||
truncateContent(metadata.thinkingOutput)
|
||||
endAttributes['response.thinking_output'] = thinkingOutput
|
||||
if (thinkingTruncated) {
|
||||
endAttributes['response.thinking_output_truncated'] = true
|
||||
endAttributes['response.thinking_output_original_length'] =
|
||||
metadata.thinkingOutput.length
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to startToolSpan.
|
||||
* Adds tool_input with the serialized tool input.
|
||||
*/
|
||||
export function addBetaToolInputAttributes(
|
||||
span: Span,
|
||||
toolName: string,
|
||||
toolInput: string,
|
||||
_span: SpanAttributeWriter,
|
||||
_toolName: string,
|
||||
_toolInput: string,
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
const { content: truncatedInput, truncated } = truncateContent(
|
||||
`[TOOL INPUT: ${toolName}]\n${toolInput}`,
|
||||
)
|
||||
span.setAttributes({
|
||||
tool_input: truncatedInput,
|
||||
...(truncated && {
|
||||
tool_input_truncated: true,
|
||||
tool_input_original_length: toolInput.length,
|
||||
}),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to endToolSpan.
|
||||
* Adds new_context with the tool result.
|
||||
*/
|
||||
export function addBetaToolResultAttributes(
|
||||
endAttributes: Record<string, string | number | boolean>,
|
||||
toolName: string | number | boolean,
|
||||
toolResult: string,
|
||||
_attributes: Record<string, AttributeValue>,
|
||||
_toolName: string | number | boolean,
|
||||
_toolResult: string,
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
const { content: truncatedResult, truncated } = truncateContent(
|
||||
`[TOOL RESULT: ${toolName}]\n${toolResult}`,
|
||||
)
|
||||
endAttributes['new_context'] = truncatedResult
|
||||
if (truncated) {
|
||||
endAttributes['new_context_truncated'] = true
|
||||
endAttributes['new_context_original_length'] = toolResult.length
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
import type { Attributes, HrTime } from '@opentelemetry/api'
|
||||
import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
|
||||
import {
|
||||
AggregationTemporality,
|
||||
type MetricData,
|
||||
type DataPoint as OTelDataPoint,
|
||||
type PushMetricExporter,
|
||||
type ResourceMetrics,
|
||||
} from '@opentelemetry/sdk-metrics'
|
||||
import axios from 'axios'
|
||||
import { checkMetricsEnabled } from 'src/services/api/metricsOptOut.js'
|
||||
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
||||
import { getSubscriptionType, isClaudeAISubscriber } from '../auth.js'
|
||||
import { checkHasTrustDialogAccepted } from '../config.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { errorMessage, toError } from '../errors.js'
|
||||
import { getAuthHeaders } from '../http.js'
|
||||
import { logError } from '../log.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
import { getClaudeCodeUserAgent } from '../userAgent.js'
|
||||
|
||||
type DataPoint = {
|
||||
attributes: Record<string, string>
|
||||
value: number
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
type Metric = {
|
||||
name: string
|
||||
description?: string
|
||||
unit?: string
|
||||
data_points: DataPoint[]
|
||||
}
|
||||
|
||||
type InternalMetricsPayload = {
|
||||
resource_attributes: Record<string, string>
|
||||
metrics: Metric[]
|
||||
}
|
||||
|
||||
export class BigQueryMetricsExporter implements PushMetricExporter {
|
||||
private readonly endpoint: string
|
||||
private readonly timeout: number
|
||||
private pendingExports: Promise<void>[] = []
|
||||
private isShutdown = false
|
||||
|
||||
constructor(options: { timeout?: number } = {}) {
|
||||
const defaultEndpoint = 'https://api.anthropic.com/api/claude_code/metrics'
|
||||
|
||||
if (
|
||||
process.env.USER_TYPE === 'ant' &&
|
||||
process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT
|
||||
) {
|
||||
this.endpoint =
|
||||
process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT +
|
||||
'/api/claude_code/metrics'
|
||||
} else {
|
||||
this.endpoint = defaultEndpoint
|
||||
}
|
||||
|
||||
this.timeout = options.timeout || 5000
|
||||
}
|
||||
|
||||
async export(
|
||||
metrics: ResourceMetrics,
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
if (this.isShutdown) {
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error('Exporter has been shutdown'),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const exportPromise = this.doExport(metrics, resultCallback)
|
||||
this.pendingExports.push(exportPromise)
|
||||
|
||||
// Clean up completed exports
|
||||
void exportPromise.finally(() => {
|
||||
const index = this.pendingExports.indexOf(exportPromise)
|
||||
if (index > -1) {
|
||||
void this.pendingExports.splice(index, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private async doExport(
|
||||
metrics: ResourceMetrics,
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Skip if trust not established in interactive mode
|
||||
// This prevents triggering apiKeyHelper before trust dialog
|
||||
const hasTrust =
|
||||
checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
|
||||
if (!hasTrust) {
|
||||
logForDebugging(
|
||||
'BigQuery metrics export: trust not established, skipping',
|
||||
)
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
// Check organization-level metrics opt-out
|
||||
const metricsStatus = await checkMetricsEnabled()
|
||||
if (!metricsStatus.enabled) {
|
||||
logForDebugging('Metrics export disabled by organization setting')
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
const payload = this.transformMetricsForInternal(metrics)
|
||||
|
||||
const authResult = getAuthHeaders()
|
||||
if (authResult.error) {
|
||||
logForDebugging(`Metrics export failed: ${authResult.error}`)
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error(authResult.error),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
...authResult.headers,
|
||||
}
|
||||
|
||||
const response = await axios.post(this.endpoint, payload, {
|
||||
timeout: this.timeout,
|
||||
headers,
|
||||
})
|
||||
|
||||
logForDebugging('BigQuery metrics exported successfully')
|
||||
logForDebugging(
|
||||
`BigQuery API Response: ${jsonStringify(response.data, null, 2)}`,
|
||||
)
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
} catch (error) {
|
||||
logForDebugging(`BigQuery metrics export failed: ${errorMessage(error)}`)
|
||||
logError(error)
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: toError(error),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
private transformMetricsForInternal(
|
||||
metrics: ResourceMetrics,
|
||||
): InternalMetricsPayload {
|
||||
const attrs = metrics.resource.attributes
|
||||
|
||||
const resourceAttributes: Record<string, string> = {
|
||||
'service.name': (attrs['service.name'] as string) || 'claude-code',
|
||||
'service.version': (attrs['service.version'] as string) || 'unknown',
|
||||
'os.type': (attrs['os.type'] as string) || 'unknown',
|
||||
'os.version': (attrs['os.version'] as string) || 'unknown',
|
||||
'host.arch': (attrs['host.arch'] as string) || 'unknown',
|
||||
'aggregation.temporality':
|
||||
this.selectAggregationTemporality() === AggregationTemporality.DELTA
|
||||
? 'delta'
|
||||
: 'cumulative',
|
||||
}
|
||||
|
||||
// Only add wsl.version if it exists (omit instead of default)
|
||||
if (attrs['wsl.version']) {
|
||||
resourceAttributes['wsl.version'] = attrs['wsl.version'] as string
|
||||
}
|
||||
|
||||
// Add customer type and subscription type
|
||||
if (isClaudeAISubscriber()) {
|
||||
resourceAttributes['user.customer_type'] = 'claude_ai'
|
||||
const subscriptionType = getSubscriptionType()
|
||||
if (subscriptionType) {
|
||||
resourceAttributes['user.subscription_type'] = subscriptionType
|
||||
}
|
||||
} else {
|
||||
resourceAttributes['user.customer_type'] = 'api'
|
||||
}
|
||||
|
||||
const transformed = {
|
||||
resource_attributes: resourceAttributes,
|
||||
metrics: metrics.scopeMetrics.flatMap(scopeMetric =>
|
||||
scopeMetric.metrics.map(metric => ({
|
||||
name: metric.descriptor.name,
|
||||
description: metric.descriptor.description,
|
||||
unit: metric.descriptor.unit,
|
||||
data_points: this.extractDataPoints(metric),
|
||||
})),
|
||||
),
|
||||
}
|
||||
|
||||
return transformed
|
||||
}
|
||||
|
||||
private extractDataPoints(metric: MetricData): DataPoint[] {
|
||||
const dataPoints = metric.dataPoints || []
|
||||
|
||||
return dataPoints
|
||||
.filter(
|
||||
(point): point is OTelDataPoint<number> =>
|
||||
typeof point.value === 'number',
|
||||
)
|
||||
.map(point => ({
|
||||
attributes: this.convertAttributes(point.attributes),
|
||||
value: point.value,
|
||||
timestamp: this.hrTimeToISOString(
|
||||
point.endTime || point.startTime || [Date.now() / 1000, 0],
|
||||
),
|
||||
}))
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
this.isShutdown = true
|
||||
await this.forceFlush()
|
||||
logForDebugging('BigQuery metrics exporter shutdown complete')
|
||||
}
|
||||
|
||||
async forceFlush(): Promise<void> {
|
||||
await Promise.all(this.pendingExports)
|
||||
logForDebugging('BigQuery metrics exporter flush complete')
|
||||
}
|
||||
|
||||
private convertAttributes(
|
||||
attributes: Attributes | undefined,
|
||||
): Record<string, string> {
|
||||
const result: Record<string, string> = {}
|
||||
if (attributes) {
|
||||
for (const [key, value] of Object.entries(attributes)) {
|
||||
if (value !== undefined && value !== null) {
|
||||
result[key] = String(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private hrTimeToISOString(hrTime: HrTime): string {
|
||||
const [seconds, nanoseconds] = hrTime
|
||||
const date = new Date(seconds * 1000 + nanoseconds / 1000000)
|
||||
return date.toISOString()
|
||||
}
|
||||
|
||||
selectAggregationTemporality(): AggregationTemporality {
|
||||
// DO NOT CHANGE THIS TO CUMULATIVE
|
||||
// It would mess up the aggregation of metrics
|
||||
// for CC Productivity metrics dashboard
|
||||
return AggregationTemporality.DELTA
|
||||
}
|
||||
}
|
||||
@@ -1,14 +1,5 @@
|
||||
export function bootstrapTelemetry(): void {}
|
||||
|
||||
export function parseExporterTypes(value: string | undefined): string[] {
|
||||
return (value || '')
|
||||
.trim()
|
||||
.split(',')
|
||||
.filter(Boolean)
|
||||
.map(t => t.trim())
|
||||
.filter(t => t !== 'none')
|
||||
}
|
||||
|
||||
export function isTelemetryEnabled(): boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
import type { DiagLogger } from '@opentelemetry/api'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { logError } from '../log.js'
|
||||
export class ClaudeCodeDiagLogger implements DiagLogger {
|
||||
error(message: string, ..._: unknown[]) {
|
||||
logError(new Error(message))
|
||||
logForDebugging(`[3P telemetry] OTEL diag error: ${message}`, {
|
||||
level: 'error',
|
||||
})
|
||||
}
|
||||
warn(message: string, ..._: unknown[]) {
|
||||
logError(new Error(message))
|
||||
logForDebugging(`[3P telemetry] OTEL diag warn: ${message}`, {
|
||||
level: 'warn',
|
||||
})
|
||||
}
|
||||
info(_message: string, ..._args: unknown[]) {
|
||||
return
|
||||
}
|
||||
debug(_message: string, ..._args: unknown[]) {
|
||||
return
|
||||
}
|
||||
verbose(_message: string, ..._args: unknown[]) {
|
||||
return
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,71 +0,0 @@
|
||||
import type { Attributes } from '@opentelemetry/api'
|
||||
import { getSessionId } from 'src/bootstrap/state.js'
|
||||
import { getOauthAccountInfo } from './auth.js'
|
||||
import { getOrCreateUserID } from './config.js'
|
||||
import { envDynamic } from './envDynamic.js'
|
||||
import { isEnvTruthy } from './envUtils.js'
|
||||
import { toTaggedId } from './taggedId.js'
|
||||
|
||||
// Default configuration for metrics cardinality
|
||||
const METRICS_CARDINALITY_DEFAULTS = {
|
||||
OTEL_METRICS_INCLUDE_SESSION_ID: true,
|
||||
OTEL_METRICS_INCLUDE_VERSION: false,
|
||||
OTEL_METRICS_INCLUDE_ACCOUNT_UUID: true,
|
||||
}
|
||||
|
||||
function shouldIncludeAttribute(
|
||||
envVar: keyof typeof METRICS_CARDINALITY_DEFAULTS,
|
||||
): boolean {
|
||||
const defaultValue = METRICS_CARDINALITY_DEFAULTS[envVar]
|
||||
const envValue = process.env[envVar]
|
||||
|
||||
if (envValue === undefined) {
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
return isEnvTruthy(envValue)
|
||||
}
|
||||
|
||||
export function getTelemetryAttributes(): Attributes {
|
||||
const userId = getOrCreateUserID()
|
||||
const sessionId = getSessionId()
|
||||
|
||||
const attributes: Attributes = {
|
||||
'user.id': userId,
|
||||
}
|
||||
|
||||
if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_SESSION_ID')) {
|
||||
attributes['session.id'] = sessionId
|
||||
}
|
||||
if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_VERSION')) {
|
||||
attributes['app.version'] = MACRO.VERSION
|
||||
}
|
||||
|
||||
// Only include OAuth account data when actively using OAuth authentication
|
||||
const oauthAccount = getOauthAccountInfo()
|
||||
if (oauthAccount) {
|
||||
const orgId = oauthAccount.organizationUuid
|
||||
const email = oauthAccount.emailAddress
|
||||
const accountUuid = oauthAccount.accountUuid
|
||||
|
||||
if (orgId) attributes['organization.id'] = orgId
|
||||
if (email) attributes['user.email'] = email
|
||||
|
||||
if (
|
||||
accountUuid &&
|
||||
shouldIncludeAttribute('OTEL_METRICS_INCLUDE_ACCOUNT_UUID')
|
||||
) {
|
||||
attributes['user.account_uuid'] = accountUuid
|
||||
attributes['user.account_id'] =
|
||||
process.env.CLAUDE_CODE_ACCOUNT_TAGGED_ID ||
|
||||
toTaggedId('user', accountUuid)
|
||||
}
|
||||
}
|
||||
|
||||
// Add terminal type if available
|
||||
if (envDynamic.terminal) {
|
||||
attributes['terminal.type'] = envDynamic.terminal
|
||||
}
|
||||
|
||||
return attributes
|
||||
}
|
||||
Reference in New Issue
Block a user