Compare commits
4 Commits
codex-remo
...
dccd151718
| Author | SHA1 | Date | |
|---|---|---|---|
| dccd151718 | |||
| a95f0a540a | |||
| 497f81f4f9 | |||
| 9e7338d54c |
21
README.md
21
README.md
@@ -33,3 +33,24 @@ bun run compile
|
|||||||
|
|
||||||
- `node_modules/`, `dist/`, and generated CLI binaries are ignored by Git.
|
- `node_modules/`, `dist/`, and generated CLI binaries are ignored by Git.
|
||||||
- `bun.lock` is kept in the repository for reproducible installs.
|
- `bun.lock` is kept in the repository for reproducible installs.
|
||||||
|
|
||||||
|
## Local Info Egress Status
|
||||||
|
|
||||||
|
This fork has removed several local system and project metadata egress paths that existed in the recovered upstream code.
|
||||||
|
|
||||||
|
Removed in this repository:
|
||||||
|
|
||||||
|
- Model-request context injection of working directory, git status/history, `CLAUDE.md`, current date, platform, shell, and OS version.
|
||||||
|
- Feedback upload and transcript-share upload paths.
|
||||||
|
- Remote Control / Bridge registration fields that sent machine name, git branch, and git repository URL, plus git source/outcome data in bridge session creation.
|
||||||
|
- Trusted-device enrollment and trusted-device token header emission for bridge requests.
|
||||||
|
- `/insights` automatic S3 upload; reports now stay local via `file://` paths only.
|
||||||
|
- Datadog analytics and Anthropic 1P event-logging egress.
|
||||||
|
- GrowthBook remote evaluation/network fetches; local env/config overrides and cached values remain available for compatibility.
|
||||||
|
- OpenTelemetry initialization and event export paths.
|
||||||
|
- Extra dead telemetry scaffolding tied to the removed egress paths, including startup/session analytics fanout, logout telemetry flush, and remote GrowthBook metadata collectors.
|
||||||
|
|
||||||
|
Still present:
|
||||||
|
|
||||||
|
- Normal Claude API requests are still part of product functionality; this fork only removes extra local metadata injection, not core model/network access.
|
||||||
|
- Minimal compatibility helpers for analytics and GrowthBook still exist in the tree as local no-op or cache-only code.
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
import memoize from 'lodash-es/memoize.js'
|
|
||||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
|
|
||||||
import { logForDebugging } from '../utils/debug.js'
|
import { logForDebugging } from '../utils/debug.js'
|
||||||
import { getSecureStorage } from '../utils/secureStorage/index.js'
|
import { getSecureStorage } from '../utils/secureStorage/index.js'
|
||||||
|
|
||||||
@@ -21,36 +19,12 @@ import { getSecureStorage } from '../utils/secureStorage/index.js'
|
|||||||
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
|
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
|
|
||||||
|
|
||||||
function isGateEnabled(): boolean {
|
|
||||||
return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
|
|
||||||
// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
|
|
||||||
// Cache cleared on logout (clearAuthRelatedCaches) and after any local update.
|
|
||||||
//
|
|
||||||
// Only the storage read is memoized — the GrowthBook gate is checked live so
|
|
||||||
// that a gate flip after GrowthBook refresh takes effect without a restart.
|
|
||||||
const readStoredToken = memoize((): string | undefined => {
|
|
||||||
// Env var takes precedence for testing/canary.
|
|
||||||
const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
|
|
||||||
if (envToken) {
|
|
||||||
return envToken
|
|
||||||
}
|
|
||||||
return getSecureStorage().read()?.trustedDeviceToken
|
|
||||||
})
|
|
||||||
|
|
||||||
export function getTrustedDeviceToken(): string | undefined {
|
export function getTrustedDeviceToken(): string | undefined {
|
||||||
if (!isGateEnabled()) {
|
return undefined
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
return readStoredToken()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function clearTrustedDeviceTokenCache(): void {
|
export function clearTrustedDeviceTokenCache(): void {
|
||||||
readStoredToken.cache?.clear?.()
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -59,9 +33,6 @@ export function clearTrustedDeviceTokenCache(): void {
|
|||||||
* as X-Trusted-Device-Token after account switches.
|
* as X-Trusted-Device-Token after account switches.
|
||||||
*/
|
*/
|
||||||
export function clearTrustedDeviceToken(): void {
|
export function clearTrustedDeviceToken(): void {
|
||||||
if (!isGateEnabled()) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const secureStorage = getSecureStorage()
|
const secureStorage = getSecureStorage()
|
||||||
try {
|
try {
|
||||||
const data = secureStorage.read()
|
const data = secureStorage.read()
|
||||||
@@ -72,7 +43,6 @@ export function clearTrustedDeviceToken(): void {
|
|||||||
} catch {
|
} catch {
|
||||||
// Best-effort — don't block login if storage is inaccessible
|
// Best-effort — don't block login if storage is inaccessible
|
||||||
}
|
}
|
||||||
readStoredToken.cache?.clear?.()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import { Text } from '../../ink.js';
|
|||||||
import { refreshGrowthBookAfterAuthChange } from '../../services/analytics/growthbook.js';
|
import { refreshGrowthBookAfterAuthChange } from '../../services/analytics/growthbook.js';
|
||||||
import { getGroveNoticeConfig, getGroveSettings } from '../../services/api/grove.js';
|
import { getGroveNoticeConfig, getGroveSettings } from '../../services/api/grove.js';
|
||||||
import { clearPolicyLimitsCache } from '../../services/policyLimits/index.js';
|
import { clearPolicyLimitsCache } from '../../services/policyLimits/index.js';
|
||||||
// flushTelemetry is loaded lazily to avoid pulling in ~1.1MB of OpenTelemetry at startup
|
|
||||||
import { clearRemoteManagedSettingsCache } from '../../services/remoteManagedSettings/index.js';
|
import { clearRemoteManagedSettingsCache } from '../../services/remoteManagedSettings/index.js';
|
||||||
import { getClaudeAIOAuthTokens, removeApiKey } from '../../utils/auth.js';
|
import { getClaudeAIOAuthTokens, removeApiKey } from '../../utils/auth.js';
|
||||||
import { clearBetasCaches } from '../../utils/betas.js';
|
import { clearBetasCaches } from '../../utils/betas.js';
|
||||||
@@ -16,11 +15,6 @@ import { resetUserCache } from '../../utils/user.js';
|
|||||||
export async function performLogout({
|
export async function performLogout({
|
||||||
clearOnboarding = false
|
clearOnboarding = false
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
// Flush telemetry BEFORE clearing credentials to prevent org data leakage
|
|
||||||
const {
|
|
||||||
flushTelemetry
|
|
||||||
} = await import('../../utils/telemetry/instrumentation.js');
|
|
||||||
await flushTelemetry();
|
|
||||||
await removeApiKey();
|
await removeApiKey();
|
||||||
|
|
||||||
// Wipe all secure storage data on logout
|
// Wipe all secure storage data on logout
|
||||||
|
|||||||
@@ -1,11 +1,8 @@
|
|||||||
import { profileCheckpoint } from '../utils/startupProfiler.js'
|
import { profileCheckpoint } from '../utils/startupProfiler.js'
|
||||||
import '../bootstrap/state.js'
|
import '../bootstrap/state.js'
|
||||||
import '../utils/config.js'
|
import '../utils/config.js'
|
||||||
import type { Attributes, MetricOptions } from '@opentelemetry/api'
|
|
||||||
import memoize from 'lodash-es/memoize.js'
|
import memoize from 'lodash-es/memoize.js'
|
||||||
import { getIsNonInteractiveSession } from 'src/bootstrap/state.js'
|
import { getIsNonInteractiveSession } from 'src/bootstrap/state.js'
|
||||||
import type { AttributedCounter } from '../bootstrap/state.js'
|
|
||||||
import { getSessionCounter, setMeter } from '../bootstrap/state.js'
|
|
||||||
import { shutdownLspServerManager } from '../services/lsp/manager.js'
|
import { shutdownLspServerManager } from '../services/lsp/manager.js'
|
||||||
import { populateOAuthAccountInfoIfNeeded } from '../services/oauth/client.js'
|
import { populateOAuthAccountInfoIfNeeded } from '../services/oauth/client.js'
|
||||||
import {
|
import {
|
||||||
@@ -41,19 +38,9 @@ import {
|
|||||||
ensureScratchpadDir,
|
ensureScratchpadDir,
|
||||||
isScratchpadEnabled,
|
isScratchpadEnabled,
|
||||||
} from '../utils/permissions/filesystem.js'
|
} from '../utils/permissions/filesystem.js'
|
||||||
// initializeTelemetry is loaded lazily via import() in setMeterState() to defer
|
|
||||||
// ~400KB of OpenTelemetry + protobuf modules until telemetry is actually initialized.
|
|
||||||
// gRPC exporters (~700KB via @grpc/grpc-js) are further lazy-loaded within instrumentation.ts.
|
|
||||||
import { configureGlobalAgents } from '../utils/proxy.js'
|
import { configureGlobalAgents } from '../utils/proxy.js'
|
||||||
import { isBetaTracingEnabled } from '../utils/telemetry/betaSessionTracing.js'
|
|
||||||
import { getTelemetryAttributes } from '../utils/telemetryAttributes.js'
|
|
||||||
import { setShellIfWindows } from '../utils/windowsPaths.js'
|
import { setShellIfWindows } from '../utils/windowsPaths.js'
|
||||||
|
|
||||||
// initialize1PEventLogging is dynamically imported to defer OpenTelemetry sdk-logs/resources
|
|
||||||
|
|
||||||
// Track if telemetry has been initialized to prevent double initialization
|
|
||||||
let telemetryInitialized = false
|
|
||||||
|
|
||||||
export const init = memoize(async (): Promise<void> => {
|
export const init = memoize(async (): Promise<void> => {
|
||||||
const initStartTime = Date.now()
|
const initStartTime = Date.now()
|
||||||
logForDiagnosticsNoPII('info', 'init_started')
|
logForDiagnosticsNoPII('info', 'init_started')
|
||||||
@@ -87,22 +74,8 @@ export const init = memoize(async (): Promise<void> => {
|
|||||||
setupGracefulShutdown()
|
setupGracefulShutdown()
|
||||||
profileCheckpoint('init_after_graceful_shutdown')
|
profileCheckpoint('init_after_graceful_shutdown')
|
||||||
|
|
||||||
// Initialize 1P event logging (no security concerns, but deferred to avoid
|
// Telemetry/log export is disabled in this build. Keep the startup
|
||||||
// loading OpenTelemetry sdk-logs at startup). growthbook.js is already in
|
// checkpoint so callers depending on the init timeline still see it.
|
||||||
// the module cache by this point (firstPartyEventLogger imports it), so the
|
|
||||||
// second dynamic import adds no load cost.
|
|
||||||
void Promise.all([
|
|
||||||
import('../services/analytics/firstPartyEventLogger.js'),
|
|
||||||
import('../services/analytics/growthbook.js'),
|
|
||||||
]).then(([fp, gb]) => {
|
|
||||||
fp.initialize1PEventLogging()
|
|
||||||
// Rebuild the logger provider if tengu_1p_event_batch_config changes
|
|
||||||
// mid-session. Change detection (isEqual) is inside the handler so
|
|
||||||
// unchanged refreshes are no-ops.
|
|
||||||
gb.onGrowthBookRefresh(() => {
|
|
||||||
void fp.reinitialize1PEventLoggingIfConfigChanged()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
profileCheckpoint('init_after_1p_event_logging')
|
profileCheckpoint('init_after_1p_event_logging')
|
||||||
|
|
||||||
// Populate OAuth account info if it is not already cached in config. This is needed since the
|
// Populate OAuth account info if it is not already cached in config. This is needed since the
|
||||||
@@ -236,105 +209,3 @@ export const init = memoize(async (): Promise<void> => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize telemetry after trust has been granted.
|
|
||||||
* For remote-settings-eligible users, waits for settings to load (non-blocking),
|
|
||||||
* then re-applies env vars (to include remote settings) before initializing telemetry.
|
|
||||||
* For non-eligible users, initializes telemetry immediately.
|
|
||||||
* This should only be called once, after the trust dialog has been accepted.
|
|
||||||
*/
|
|
||||||
export function initializeTelemetryAfterTrust(): void {
|
|
||||||
if (isEligibleForRemoteManagedSettings()) {
|
|
||||||
// For SDK/headless mode with beta tracing, initialize eagerly first
|
|
||||||
// to ensure the tracer is ready before the first query runs.
|
|
||||||
// The async path below will still run but doInitializeTelemetry() guards against double init.
|
|
||||||
if (getIsNonInteractiveSession() && isBetaTracingEnabled()) {
|
|
||||||
void doInitializeTelemetry().catch(error => {
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Eager telemetry init failed (beta tracing): ${errorMessage(error)}`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
logForDebugging(
|
|
||||||
'[3P telemetry] Waiting for remote managed settings before telemetry init',
|
|
||||||
)
|
|
||||||
void waitForRemoteManagedSettingsToLoad()
|
|
||||||
.then(async () => {
|
|
||||||
logForDebugging(
|
|
||||||
'[3P telemetry] Remote managed settings loaded, initializing telemetry',
|
|
||||||
)
|
|
||||||
// Re-apply env vars to pick up remote settings before initializing telemetry.
|
|
||||||
applyConfigEnvironmentVariables()
|
|
||||||
await doInitializeTelemetry()
|
|
||||||
})
|
|
||||||
.catch(error => {
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Telemetry init failed (remote settings path): ${errorMessage(error)}`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
void doInitializeTelemetry().catch(error => {
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Telemetry init failed: ${errorMessage(error)}`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function doInitializeTelemetry(): Promise<void> {
|
|
||||||
if (telemetryInitialized) {
|
|
||||||
// Already initialized, nothing to do
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set flag before init to prevent double initialization
|
|
||||||
telemetryInitialized = true
|
|
||||||
try {
|
|
||||||
await setMeterState()
|
|
||||||
} catch (error) {
|
|
||||||
// Reset flag on failure so subsequent calls can retry
|
|
||||||
telemetryInitialized = false
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function setMeterState(): Promise<void> {
|
|
||||||
// Lazy-load instrumentation to defer ~400KB of OpenTelemetry + protobuf
|
|
||||||
const { initializeTelemetry } = await import(
|
|
||||||
'../utils/telemetry/instrumentation.js'
|
|
||||||
)
|
|
||||||
// Initialize customer OTLP telemetry (metrics, logs, traces)
|
|
||||||
const meter = await initializeTelemetry()
|
|
||||||
if (meter) {
|
|
||||||
// Create factory function for attributed counters
|
|
||||||
const createAttributedCounter = (
|
|
||||||
name: string,
|
|
||||||
options: MetricOptions,
|
|
||||||
): AttributedCounter => {
|
|
||||||
const counter = meter?.createCounter(name, options)
|
|
||||||
|
|
||||||
return {
|
|
||||||
add(value: number, additionalAttributes: Attributes = {}) {
|
|
||||||
// Always fetch fresh telemetry attributes to ensure they're up to date
|
|
||||||
const currentAttributes = getTelemetryAttributes()
|
|
||||||
const mergedAttributes = {
|
|
||||||
...currentAttributes,
|
|
||||||
...additionalAttributes,
|
|
||||||
}
|
|
||||||
counter?.add(value, mergedAttributes)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setMeter(meter, createAttributedCounter)
|
|
||||||
|
|
||||||
// Increment session counter here because the startup telemetry path
|
|
||||||
// runs before this async initialization completes, so the counter
|
|
||||||
// would be null there.
|
|
||||||
getSessionCounter()?.add(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import { type ChannelEntry, getAllowedChannels, setAllowedChannels, setHasDevCha
|
|||||||
import type { Command } from './commands.js';
|
import type { Command } from './commands.js';
|
||||||
import { createStatsStore, type StatsStore } from './context/stats.js';
|
import { createStatsStore, type StatsStore } from './context/stats.js';
|
||||||
import { getSystemContext } from './context.js';
|
import { getSystemContext } from './context.js';
|
||||||
import { initializeTelemetryAfterTrust } from './entrypoints/init.js';
|
|
||||||
import { isSynchronizedOutputSupported } from './ink/terminal.js';
|
import { isSynchronizedOutputSupported } from './ink/terminal.js';
|
||||||
import type { RenderOptions, Root, TextProps } from './ink.js';
|
import type { RenderOptions, Root, TextProps } from './ink.js';
|
||||||
import { KeybindingSetup } from './keybindings/KeybindingProviderSetup.js';
|
import { KeybindingSetup } from './keybindings/KeybindingProviderSetup.js';
|
||||||
@@ -183,11 +182,6 @@ export async function showSetupScreens(root: Root, permissionMode: PermissionMod
|
|||||||
// This includes potentially dangerous environment variables from untrusted sources
|
// This includes potentially dangerous environment variables from untrusted sources
|
||||||
applyConfigEnvironmentVariables();
|
applyConfigEnvironmentVariables();
|
||||||
|
|
||||||
// Initialize telemetry after env vars are applied so OTEL endpoint env vars and
|
|
||||||
// otelHeadersHelper (which requires trust to execute) are available.
|
|
||||||
// Defer to next tick so the OTel dynamic import resolves after first render
|
|
||||||
// instead of during the pre-render microtask queue.
|
|
||||||
setImmediate(() => initializeTelemetryAfterTrust());
|
|
||||||
if (await isQualifiedForGrove()) {
|
if (await isQualifiedForGrove()) {
|
||||||
const {
|
const {
|
||||||
GroveDialog
|
GroveDialog
|
||||||
|
|||||||
85
src/main.tsx
85
src/main.tsx
@@ -29,7 +29,7 @@ import React from 'react';
|
|||||||
import { getOauthConfig } from './constants/oauth.js';
|
import { getOauthConfig } from './constants/oauth.js';
|
||||||
import { getRemoteSessionUrl } from './constants/product.js';
|
import { getRemoteSessionUrl } from './constants/product.js';
|
||||||
import { getSystemContext, getUserContext } from './context.js';
|
import { getSystemContext, getUserContext } from './context.js';
|
||||||
import { init, initializeTelemetryAfterTrust } from './entrypoints/init.js';
|
import { init } from './entrypoints/init.js';
|
||||||
import { addToHistory } from './history.js';
|
import { addToHistory } from './history.js';
|
||||||
import type { Root } from './ink.js';
|
import type { Root } from './ink.js';
|
||||||
import { launchRepl } from './replLauncher.js';
|
import { launchRepl } from './replLauncher.js';
|
||||||
@@ -49,7 +49,7 @@ import { isAgentSwarmsEnabled } from './utils/agentSwarmsEnabled.js';
|
|||||||
import { count, uniq } from './utils/array.js';
|
import { count, uniq } from './utils/array.js';
|
||||||
import { installAsciicastRecorder } from './utils/asciicast.js';
|
import { installAsciicastRecorder } from './utils/asciicast.js';
|
||||||
import { getSubscriptionType, isClaudeAISubscriber, prefetchAwsCredentialsAndBedRockInfoIfSafe, prefetchGcpCredentialsIfSafe, validateForceLoginOrg } from './utils/auth.js';
|
import { getSubscriptionType, isClaudeAISubscriber, prefetchAwsCredentialsAndBedRockInfoIfSafe, prefetchGcpCredentialsIfSafe, validateForceLoginOrg } from './utils/auth.js';
|
||||||
import { checkHasTrustDialogAccepted, getGlobalConfig, getRemoteControlAtStartup, isAutoUpdaterDisabled, saveGlobalConfig } from './utils/config.js';
|
import { checkHasTrustDialogAccepted, getGlobalConfig, getRemoteControlAtStartup, saveGlobalConfig } from './utils/config.js';
|
||||||
import { seedEarlyInput, stopCapturingEarlyInput } from './utils/earlyInput.js';
|
import { seedEarlyInput, stopCapturingEarlyInput } from './utils/earlyInput.js';
|
||||||
import { getInitialEffortSetting, parseEffortValue } from './utils/effort.js';
|
import { getInitialEffortSetting, parseEffortValue } from './utils/effort.js';
|
||||||
import { getInitialFastModeSetting, isFastModeEnabled, prefetchFastModeStatus, resolveFastModeStatusFromCache } from './utils/fastMode.js';
|
import { getInitialFastModeSetting, isFastModeEnabled, prefetchFastModeStatus, resolveFastModeStatusFromCache } from './utils/fastMode.js';
|
||||||
@@ -80,10 +80,8 @@ const coordinatorModeModule = feature('COORDINATOR_MODE') ? require('./coordinat
|
|||||||
const assistantModule = feature('KAIROS') ? require('./assistant/index.js') as typeof import('./assistant/index.js') : null;
|
const assistantModule = feature('KAIROS') ? require('./assistant/index.js') as typeof import('./assistant/index.js') : null;
|
||||||
const kairosGate = feature('KAIROS') ? require('./assistant/gate.js') as typeof import('./assistant/gate.js') : null;
|
const kairosGate = feature('KAIROS') ? require('./assistant/gate.js') as typeof import('./assistant/gate.js') : null;
|
||||||
import { relative, resolve } from 'path';
|
import { relative, resolve } from 'path';
|
||||||
import { isAnalyticsDisabled } from 'src/services/analytics/config.js';
|
|
||||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js';
|
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js';
|
||||||
import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent } from 'src/services/analytics/index.js';
|
import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent } from 'src/services/analytics/index.js';
|
||||||
import { initializeAnalyticsGates } from 'src/services/analytics/sink.js';
|
|
||||||
import { getOriginalCwd, setAdditionalDirectoriesForClaudeMd, setIsRemoteMode, setMainLoopModelOverride, setMainThreadAgentType, setTeleportedSessionInfo } from './bootstrap/state.js';
|
import { getOriginalCwd, setAdditionalDirectoriesForClaudeMd, setIsRemoteMode, setMainLoopModelOverride, setMainThreadAgentType, setTeleportedSessionInfo } from './bootstrap/state.js';
|
||||||
import { filterCommandsForRemoteMode, getCommands } from './commands.js';
|
import { filterCommandsForRemoteMode, getCommands } from './commands.js';
|
||||||
import type { StatsStore } from './context/stats.js';
|
import type { StatsStore } from './context/stats.js';
|
||||||
@@ -103,15 +101,13 @@ import type { Message as MessageType } from './types/message.js';
|
|||||||
import { assertMinVersion } from './utils/autoUpdater.js';
|
import { assertMinVersion } from './utils/autoUpdater.js';
|
||||||
import { CLAUDE_IN_CHROME_SKILL_HINT, CLAUDE_IN_CHROME_SKILL_HINT_WITH_WEBBROWSER } from './utils/claudeInChrome/prompt.js';
|
import { CLAUDE_IN_CHROME_SKILL_HINT, CLAUDE_IN_CHROME_SKILL_HINT_WITH_WEBBROWSER } from './utils/claudeInChrome/prompt.js';
|
||||||
import { setupClaudeInChrome, shouldAutoEnableClaudeInChrome, shouldEnableClaudeInChrome } from './utils/claudeInChrome/setup.js';
|
import { setupClaudeInChrome, shouldAutoEnableClaudeInChrome, shouldEnableClaudeInChrome } from './utils/claudeInChrome/setup.js';
|
||||||
import { getContextWindowForModel } from './utils/context.js';
|
|
||||||
import { loadConversationForResume } from './utils/conversationRecovery.js';
|
import { loadConversationForResume } from './utils/conversationRecovery.js';
|
||||||
import { buildDeepLinkBanner } from './utils/deepLink/banner.js';
|
import { buildDeepLinkBanner } from './utils/deepLink/banner.js';
|
||||||
import { hasNodeOption, isBareMode, isEnvTruthy, isInProtectedNamespace } from './utils/envUtils.js';
|
import { isBareMode, isEnvTruthy, isInProtectedNamespace } from './utils/envUtils.js';
|
||||||
import { refreshExampleCommands } from './utils/exampleCommands.js';
|
import { refreshExampleCommands } from './utils/exampleCommands.js';
|
||||||
import type { FpsMetrics } from './utils/fpsTracker.js';
|
import type { FpsMetrics } from './utils/fpsTracker.js';
|
||||||
import { getWorktreePaths } from './utils/getWorktreePaths.js';
|
import { getWorktreePaths } from './utils/getWorktreePaths.js';
|
||||||
import { findGitRoot, getBranch, getIsGit, getWorktreeCount } from './utils/git.js';
|
import { findGitRoot, getBranch } from './utils/git.js';
|
||||||
import { getGhAuthStatus } from './utils/github/ghAuthStatus.js';
|
|
||||||
import { safeParseJSON } from './utils/json.js';
|
import { safeParseJSON } from './utils/json.js';
|
||||||
import { logError } from './utils/log.js';
|
import { logError } from './utils/log.js';
|
||||||
import { getModelDeprecationWarning } from './utils/model/deprecation.js';
|
import { getModelDeprecationWarning } from './utils/model/deprecation.js';
|
||||||
@@ -121,9 +117,7 @@ import { PERMISSION_MODES } from './utils/permissions/PermissionMode.js';
|
|||||||
import { checkAndDisableBypassPermissions, getAutoModeEnabledStateIfCached, initializeToolPermissionContext, initialPermissionModeFromCLI, isDefaultPermissionModeAuto, parseToolListFromCLI, removeDangerousPermissions, stripDangerousPermissionsForAutoMode, verifyAutoModeGateAccess } from './utils/permissions/permissionSetup.js';
|
import { checkAndDisableBypassPermissions, getAutoModeEnabledStateIfCached, initializeToolPermissionContext, initialPermissionModeFromCLI, isDefaultPermissionModeAuto, parseToolListFromCLI, removeDangerousPermissions, stripDangerousPermissionsForAutoMode, verifyAutoModeGateAccess } from './utils/permissions/permissionSetup.js';
|
||||||
import { cleanupOrphanedPluginVersionsInBackground } from './utils/plugins/cacheUtils.js';
|
import { cleanupOrphanedPluginVersionsInBackground } from './utils/plugins/cacheUtils.js';
|
||||||
import { initializeVersionedPlugins } from './utils/plugins/installedPluginsManager.js';
|
import { initializeVersionedPlugins } from './utils/plugins/installedPluginsManager.js';
|
||||||
import { getManagedPluginNames } from './utils/plugins/managedPlugins.js';
|
|
||||||
import { getGlobExclusionsForPluginCache } from './utils/plugins/orphanedPluginFilter.js';
|
import { getGlobExclusionsForPluginCache } from './utils/plugins/orphanedPluginFilter.js';
|
||||||
import { getPluginSeedDirs } from './utils/plugins/pluginDirectories.js';
|
|
||||||
import { countFilesRoundedRg } from './utils/ripgrep.js';
|
import { countFilesRoundedRg } from './utils/ripgrep.js';
|
||||||
import { processSessionStartHooks, processSetupHooks } from './utils/sessionStart.js';
|
import { processSessionStartHooks, processSetupHooks } from './utils/sessionStart.js';
|
||||||
import { cacheSessionTitle, getSessionIdFromLog, loadTranscriptFromFile, saveAgentSetting, saveMode, searchSessionsByCustomTitle, sessionIdExists } from './utils/sessionStorage.js';
|
import { cacheSessionTitle, getSessionIdFromLog, loadTranscriptFromFile, saveAgentSetting, saveMode, searchSessionsByCustomTitle, sessionIdExists } from './utils/sessionStorage.js';
|
||||||
@@ -132,8 +126,6 @@ import { getInitialSettings, getManagedSettingsKeysForLogging, getSettingsForSou
|
|||||||
import { resetSettingsCache } from './utils/settings/settingsCache.js';
|
import { resetSettingsCache } from './utils/settings/settingsCache.js';
|
||||||
import type { ValidationError } from './utils/settings/validation.js';
|
import type { ValidationError } from './utils/settings/validation.js';
|
||||||
import { DEFAULT_TASKS_MODE_TASK_LIST_ID, TASK_STATUSES } from './utils/tasks.js';
|
import { DEFAULT_TASKS_MODE_TASK_LIST_ID, TASK_STATUSES } from './utils/tasks.js';
|
||||||
import { logPluginLoadErrors, logPluginsEnabledForSession } from './utils/telemetry/pluginTelemetry.js';
|
|
||||||
import { logSkillsLoaded } from './utils/telemetry/skillLoadedEvent.js';
|
|
||||||
import { generateTempFilePath } from './utils/tempfile.js';
|
import { generateTempFilePath } from './utils/tempfile.js';
|
||||||
import { validateUuid } from './utils/uuid.js';
|
import { validateUuid } from './utils/uuid.js';
|
||||||
// Plugin startup checks are now handled non-blockingly in REPL.tsx
|
// Plugin startup checks are now handled non-blockingly in REPL.tsx
|
||||||
@@ -196,7 +188,7 @@ import { filterAllowedSdkBetas } from './utils/betas.js';
|
|||||||
import { isInBundledMode, isRunningWithBun } from './utils/bundledMode.js';
|
import { isInBundledMode, isRunningWithBun } from './utils/bundledMode.js';
|
||||||
import { logForDiagnosticsNoPII } from './utils/diagLogs.js';
|
import { logForDiagnosticsNoPII } from './utils/diagLogs.js';
|
||||||
import { filterExistingPaths, getKnownPathsForRepo } from './utils/githubRepoPathMapping.js';
|
import { filterExistingPaths, getKnownPathsForRepo } from './utils/githubRepoPathMapping.js';
|
||||||
import { clearPluginCache, loadAllPluginsCacheOnly } from './utils/plugins/pluginLoader.js';
|
import { clearPluginCache } from './utils/plugins/pluginLoader.js';
|
||||||
import { migrateChangelogFromConfig } from './utils/releaseNotes.js';
|
import { migrateChangelogFromConfig } from './utils/releaseNotes.js';
|
||||||
import { SandboxManager } from './utils/sandbox/sandbox-adapter.js';
|
import { SandboxManager } from './utils/sandbox/sandbox-adapter.js';
|
||||||
import { fetchSession, prepareApiRequest } from './utils/teleport/api.js';
|
import { fetchSession, prepareApiRequest } from './utils/teleport/api.js';
|
||||||
@@ -270,56 +262,6 @@ if ("external" !== 'ant' && isBeingDebugged()) {
|
|||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Per-session skill/plugin telemetry. Called from both the interactive path
|
|
||||||
* and the headless -p path (before runHeadless) — both go through
|
|
||||||
* main.tsx but branch before the interactive startup path, so it needs two
|
|
||||||
* call sites here rather than one here + one in QueryEngine.
|
|
||||||
*/
|
|
||||||
function logSessionTelemetry(): void {
|
|
||||||
const model = parseUserSpecifiedModel(getInitialMainLoopModel() ?? getDefaultMainLoopModel());
|
|
||||||
void logSkillsLoaded(getCwd(), getContextWindowForModel(model, getSdkBetas()));
|
|
||||||
void loadAllPluginsCacheOnly().then(({
|
|
||||||
enabled,
|
|
||||||
errors
|
|
||||||
}) => {
|
|
||||||
const managedNames = getManagedPluginNames();
|
|
||||||
logPluginsEnabledForSession(enabled, managedNames, getPluginSeedDirs());
|
|
||||||
logPluginLoadErrors(errors, managedNames);
|
|
||||||
}).catch(err => logError(err));
|
|
||||||
}
|
|
||||||
function getCertEnvVarTelemetry(): Record<string, boolean> {
|
|
||||||
const result: Record<string, boolean> = {};
|
|
||||||
if (process.env.NODE_EXTRA_CA_CERTS) {
|
|
||||||
result.has_node_extra_ca_certs = true;
|
|
||||||
}
|
|
||||||
if (process.env.CLAUDE_CODE_CLIENT_CERT) {
|
|
||||||
result.has_client_cert = true;
|
|
||||||
}
|
|
||||||
if (hasNodeOption('--use-system-ca')) {
|
|
||||||
result.has_use_system_ca = true;
|
|
||||||
}
|
|
||||||
if (hasNodeOption('--use-openssl-ca')) {
|
|
||||||
result.has_use_openssl_ca = true;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
async function logStartupTelemetry(): Promise<void> {
|
|
||||||
if (isAnalyticsDisabled()) return;
|
|
||||||
const [isGit, worktreeCount, ghAuthStatus] = await Promise.all([getIsGit(), getWorktreeCount(), getGhAuthStatus()]);
|
|
||||||
logEvent('tengu_startup_telemetry', {
|
|
||||||
is_git: isGit,
|
|
||||||
worktree_count: worktreeCount,
|
|
||||||
gh_auth_status: ghAuthStatus as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
sandbox_enabled: SandboxManager.isSandboxingEnabled(),
|
|
||||||
are_unsandboxed_commands_allowed: SandboxManager.areUnsandboxedCommandsAllowed(),
|
|
||||||
is_auto_bash_allowed_if_sandbox_enabled: SandboxManager.isAutoAllowBashIfSandboxedEnabled(),
|
|
||||||
auto_updater_disabled: isAutoUpdaterDisabled(),
|
|
||||||
prefers_reduced_motion: getInitialSettings().prefersReducedMotion ?? false,
|
|
||||||
...getCertEnvVarTelemetry()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// @[MODEL LAUNCH]: Consider any migrations you may need for model strings. See migrateSonnet1mToSonnet45.ts for an example.
|
// @[MODEL LAUNCH]: Consider any migrations you may need for model strings. See migrateSonnet1mToSonnet45.ts for an example.
|
||||||
// Bump this when adding a new sync migration so existing users re-run the set.
|
// Bump this when adding a new sync migration so existing users re-run the set.
|
||||||
const CURRENT_MIGRATION_VERSION = 11;
|
const CURRENT_MIGRATION_VERSION = 11;
|
||||||
@@ -413,8 +355,7 @@ export function startDeferredPrefetches(): void {
|
|||||||
}
|
}
|
||||||
void countFilesRoundedRg(getCwd(), AbortSignal.timeout(3000), []);
|
void countFilesRoundedRg(getCwd(), AbortSignal.timeout(3000), []);
|
||||||
|
|
||||||
// Analytics and feature flag initialization
|
// Feature flag initialization
|
||||||
void initializeAnalyticsGates();
|
|
||||||
void prefetchOfficialMcpUrls();
|
void prefetchOfficialMcpUrls();
|
||||||
void refreshModelCapabilities();
|
void refreshModelCapabilities();
|
||||||
|
|
||||||
@@ -2587,15 +2528,10 @@ async function run(): Promise<CommanderCommand> {
|
|||||||
setHasFormattedOutput(true);
|
setHasFormattedOutput(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply full environment variables in print mode since trust dialog is bypassed
|
// Apply full environment variables in print mode since trust dialog is bypassed.
|
||||||
// This includes potentially dangerous environment variables from untrusted sources
|
|
||||||
// but print mode is considered trusted (as documented in help text)
|
// but print mode is considered trusted (as documented in help text)
|
||||||
applyConfigEnvironmentVariables();
|
applyConfigEnvironmentVariables();
|
||||||
|
|
||||||
// Initialize telemetry after env vars are applied so OTEL endpoint env vars and
|
|
||||||
// otelHeadersHelper (which requires trust to execute) are available.
|
|
||||||
initializeTelemetryAfterTrust();
|
|
||||||
|
|
||||||
// Kick SessionStart hooks now so the subprocess spawn overlaps with
|
// Kick SessionStart hooks now so the subprocess spawn overlaps with
|
||||||
// MCP connect + plugin init + print.ts import below. loadInitialMessages
|
// MCP connect + plugin init + print.ts import below. loadInitialMessages
|
||||||
// joins this at print.ts:4397. Guarded same as loadInitialMessages —
|
// joins this at print.ts:4397. Guarded same as loadInitialMessages —
|
||||||
@@ -2820,7 +2756,6 @@ async function run(): Promise<CommanderCommand> {
|
|||||||
void import('./utils/sdkHeapDumpMonitor.js').then(m => m.startSdkMemoryMonitor());
|
void import('./utils/sdkHeapDumpMonitor.js').then(m => m.startSdkMemoryMonitor());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logSessionTelemetry();
|
|
||||||
profileCheckpoint('before_print_import');
|
profileCheckpoint('before_print_import');
|
||||||
const {
|
const {
|
||||||
runHeadless
|
runHeadless
|
||||||
@@ -3043,15 +2978,11 @@ async function run(): Promise<CommanderCommand> {
|
|||||||
|
|
||||||
// Increment numStartups synchronously — first-render readers like
|
// Increment numStartups synchronously — first-render readers like
|
||||||
// shouldShowEffortCallout (via useState initializer) need the updated
|
// shouldShowEffortCallout (via useState initializer) need the updated
|
||||||
// value before setImmediate fires. Defer only telemetry.
|
// value immediately.
|
||||||
saveGlobalConfig(current => ({
|
saveGlobalConfig(current => ({
|
||||||
...current,
|
...current,
|
||||||
numStartups: (current.numStartups ?? 0) + 1
|
numStartups: (current.numStartups ?? 0) + 1
|
||||||
}));
|
}));
|
||||||
setImmediate(() => {
|
|
||||||
void logStartupTelemetry();
|
|
||||||
logSessionTelemetry();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Set up per-turn session environment data uploader (ant-only build).
|
// Set up per-turn session environment data uploader (ant-only build).
|
||||||
// Default-enabled for all ant users when working in an Anthropic-owned
|
// Default-enabled for all ant users when working in an Anthropic-owned
|
||||||
|
|||||||
@@ -1,307 +1,9 @@
|
|||||||
import axios from 'axios'
|
|
||||||
import { createHash } from 'crypto'
|
|
||||||
import memoize from 'lodash-es/memoize.js'
|
|
||||||
import { getOrCreateUserID } from '../../utils/config.js'
|
|
||||||
import { logError } from '../../utils/log.js'
|
|
||||||
import { getCanonicalName } from '../../utils/model/model.js'
|
|
||||||
import { getAPIProvider } from '../../utils/model/providers.js'
|
|
||||||
import { MODEL_COSTS } from '../../utils/modelCost.js'
|
|
||||||
import { isAnalyticsDisabled } from './config.js'
|
|
||||||
import { getEventMetadata } from './metadata.js'
|
|
||||||
|
|
||||||
const DATADOG_LOGS_ENDPOINT =
|
|
||||||
'https://http-intake.logs.us5.datadoghq.com/api/v2/logs'
|
|
||||||
const DATADOG_CLIENT_TOKEN = 'pubbbf48e6d78dae54bceaa4acf463299bf'
|
|
||||||
const DEFAULT_FLUSH_INTERVAL_MS = 15000
|
|
||||||
const MAX_BATCH_SIZE = 100
|
|
||||||
const NETWORK_TIMEOUT_MS = 5000
|
|
||||||
|
|
||||||
const DATADOG_ALLOWED_EVENTS = new Set([
|
|
||||||
'chrome_bridge_connection_succeeded',
|
|
||||||
'chrome_bridge_connection_failed',
|
|
||||||
'chrome_bridge_disconnected',
|
|
||||||
'chrome_bridge_tool_call_completed',
|
|
||||||
'chrome_bridge_tool_call_error',
|
|
||||||
'chrome_bridge_tool_call_started',
|
|
||||||
'chrome_bridge_tool_call_timeout',
|
|
||||||
'tengu_api_error',
|
|
||||||
'tengu_api_success',
|
|
||||||
'tengu_brief_mode_enabled',
|
|
||||||
'tengu_brief_mode_toggled',
|
|
||||||
'tengu_brief_send',
|
|
||||||
'tengu_cancel',
|
|
||||||
'tengu_compact_failed',
|
|
||||||
'tengu_exit',
|
|
||||||
'tengu_flicker',
|
|
||||||
'tengu_init',
|
|
||||||
'tengu_model_fallback_triggered',
|
|
||||||
'tengu_oauth_error',
|
|
||||||
'tengu_oauth_success',
|
|
||||||
'tengu_oauth_token_refresh_failure',
|
|
||||||
'tengu_oauth_token_refresh_success',
|
|
||||||
'tengu_oauth_token_refresh_lock_acquiring',
|
|
||||||
'tengu_oauth_token_refresh_lock_acquired',
|
|
||||||
'tengu_oauth_token_refresh_starting',
|
|
||||||
'tengu_oauth_token_refresh_completed',
|
|
||||||
'tengu_oauth_token_refresh_lock_releasing',
|
|
||||||
'tengu_oauth_token_refresh_lock_released',
|
|
||||||
'tengu_query_error',
|
|
||||||
'tengu_session_file_read',
|
|
||||||
'tengu_started',
|
|
||||||
'tengu_tool_use_error',
|
|
||||||
'tengu_tool_use_granted_in_prompt_permanent',
|
|
||||||
'tengu_tool_use_granted_in_prompt_temporary',
|
|
||||||
'tengu_tool_use_rejected_in_prompt',
|
|
||||||
'tengu_tool_use_success',
|
|
||||||
'tengu_uncaught_exception',
|
|
||||||
'tengu_unhandled_rejection',
|
|
||||||
'tengu_voice_recording_started',
|
|
||||||
'tengu_voice_toggled',
|
|
||||||
'tengu_team_mem_sync_pull',
|
|
||||||
'tengu_team_mem_sync_push',
|
|
||||||
'tengu_team_mem_sync_started',
|
|
||||||
'tengu_team_mem_entries_capped',
|
|
||||||
])
|
|
||||||
|
|
||||||
const TAG_FIELDS = [
|
|
||||||
'arch',
|
|
||||||
'clientType',
|
|
||||||
'errorType',
|
|
||||||
'http_status_range',
|
|
||||||
'http_status',
|
|
||||||
'kairosActive',
|
|
||||||
'model',
|
|
||||||
'platform',
|
|
||||||
'provider',
|
|
||||||
'skillMode',
|
|
||||||
'subscriptionType',
|
|
||||||
'toolName',
|
|
||||||
'userBucket',
|
|
||||||
'userType',
|
|
||||||
'version',
|
|
||||||
'versionBase',
|
|
||||||
]
|
|
||||||
|
|
||||||
function camelToSnakeCase(str: string): string {
|
|
||||||
return str.replace(/[A-Z]/g, letter => `_${letter.toLowerCase()}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
type DatadogLog = {
|
|
||||||
ddsource: string
|
|
||||||
ddtags: string
|
|
||||||
message: string
|
|
||||||
service: string
|
|
||||||
hostname: string
|
|
||||||
[key: string]: unknown
|
|
||||||
}
|
|
||||||
|
|
||||||
let logBatch: DatadogLog[] = []
|
|
||||||
let flushTimer: NodeJS.Timeout | null = null
|
|
||||||
let datadogInitialized: boolean | null = null
|
|
||||||
|
|
||||||
async function flushLogs(): Promise<void> {
|
|
||||||
if (logBatch.length === 0) return
|
|
||||||
|
|
||||||
const logsToSend = logBatch
|
|
||||||
logBatch = []
|
|
||||||
|
|
||||||
try {
|
|
||||||
await axios.post(DATADOG_LOGS_ENDPOINT, logsToSend, {
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'DD-API-KEY': DATADOG_CLIENT_TOKEN,
|
|
||||||
},
|
|
||||||
timeout: NETWORK_TIMEOUT_MS,
|
|
||||||
})
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function scheduleFlush(): void {
|
|
||||||
if (flushTimer) return
|
|
||||||
|
|
||||||
flushTimer = setTimeout(() => {
|
|
||||||
flushTimer = null
|
|
||||||
void flushLogs()
|
|
||||||
}, getFlushIntervalMs()).unref()
|
|
||||||
}
|
|
||||||
|
|
||||||
export const initializeDatadog = memoize(async (): Promise<boolean> => {
|
|
||||||
if (isAnalyticsDisabled()) {
|
|
||||||
datadogInitialized = false
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
datadogInitialized = true
|
|
||||||
return true
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
datadogInitialized = false
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Flush remaining Datadog logs and shut down.
|
* Datadog analytics egress is disabled in this build.
|
||||||
* Called from gracefulShutdown() before process.exit() since
|
*
|
||||||
* forceExit() prevents the beforeExit handler from firing.
|
* Only shutdown compatibility remains for existing cleanup paths.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export async function shutdownDatadog(): Promise<void> {
|
export async function shutdownDatadog(): Promise<void> {
|
||||||
if (flushTimer) {
|
return
|
||||||
clearTimeout(flushTimer)
|
|
||||||
flushTimer = null
|
|
||||||
}
|
|
||||||
await flushLogs()
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: use via src/services/analytics/index.ts > logEvent
|
|
||||||
export async function trackDatadogEvent(
|
|
||||||
eventName: string,
|
|
||||||
properties: { [key: string]: boolean | number | undefined },
|
|
||||||
): Promise<void> {
|
|
||||||
if (process.env.NODE_ENV !== 'production') {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't send events for 3P providers (Bedrock, Vertex, Foundry)
|
|
||||||
if (getAPIProvider() !== 'firstParty') {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fast path: use cached result if available to avoid await overhead
|
|
||||||
let initialized = datadogInitialized
|
|
||||||
if (initialized === null) {
|
|
||||||
initialized = await initializeDatadog()
|
|
||||||
}
|
|
||||||
if (!initialized || !DATADOG_ALLOWED_EVENTS.has(eventName)) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const metadata = await getEventMetadata({
|
|
||||||
model: properties.model,
|
|
||||||
betas: properties.betas,
|
|
||||||
})
|
|
||||||
// Destructure to avoid duplicate envContext (once nested, once flattened)
|
|
||||||
const { envContext, ...restMetadata } = metadata
|
|
||||||
const allData: Record<string, unknown> = {
|
|
||||||
...restMetadata,
|
|
||||||
...envContext,
|
|
||||||
...properties,
|
|
||||||
userBucket: getUserBucket(),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize MCP tool names to "mcp" for cardinality reduction
|
|
||||||
if (
|
|
||||||
typeof allData.toolName === 'string' &&
|
|
||||||
allData.toolName.startsWith('mcp__')
|
|
||||||
) {
|
|
||||||
allData.toolName = 'mcp'
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize model names for cardinality reduction (external users only)
|
|
||||||
if (process.env.USER_TYPE !== 'ant' && typeof allData.model === 'string') {
|
|
||||||
const shortName = getCanonicalName(allData.model.replace(/\[1m]$/i, ''))
|
|
||||||
allData.model = shortName in MODEL_COSTS ? shortName : 'other'
|
|
||||||
}
|
|
||||||
|
|
||||||
// Truncate dev version to base + date (remove timestamp and sha for cardinality reduction)
|
|
||||||
// e.g. "2.0.53-dev.20251124.t173302.sha526cc6a" -> "2.0.53-dev.20251124"
|
|
||||||
if (typeof allData.version === 'string') {
|
|
||||||
allData.version = allData.version.replace(
|
|
||||||
/^(\d+\.\d+\.\d+-dev\.\d{8})\.t\d+\.sha[a-f0-9]+$/,
|
|
||||||
'$1',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transform status to http_status and http_status_range to avoid Datadog reserved field
|
|
||||||
if (allData.status !== undefined && allData.status !== null) {
|
|
||||||
const statusCode = String(allData.status)
|
|
||||||
allData.http_status = statusCode
|
|
||||||
|
|
||||||
// Determine status range (1xx, 2xx, 3xx, 4xx, 5xx)
|
|
||||||
const firstDigit = statusCode.charAt(0)
|
|
||||||
if (firstDigit >= '1' && firstDigit <= '5') {
|
|
||||||
allData.http_status_range = `${firstDigit}xx`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove original status field to avoid conflict with Datadog's reserved field
|
|
||||||
delete allData.status
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build ddtags with high-cardinality fields for filtering.
|
|
||||||
// event:<name> is prepended so the event name is searchable via the
|
|
||||||
// log search API — the `message` field (where eventName also lives)
|
|
||||||
// is a DD reserved field and is NOT queryable from dashboard widget
|
|
||||||
// queries or the aggregation API. See scripts/release/MONITORING.md.
|
|
||||||
const allDataRecord = allData
|
|
||||||
const tags = [
|
|
||||||
`event:${eventName}`,
|
|
||||||
...TAG_FIELDS.filter(
|
|
||||||
field =>
|
|
||||||
allDataRecord[field] !== undefined && allDataRecord[field] !== null,
|
|
||||||
).map(field => `${camelToSnakeCase(field)}:${allDataRecord[field]}`),
|
|
||||||
]
|
|
||||||
|
|
||||||
const log: DatadogLog = {
|
|
||||||
ddsource: 'nodejs',
|
|
||||||
ddtags: tags.join(','),
|
|
||||||
message: eventName,
|
|
||||||
service: 'claude-code',
|
|
||||||
hostname: 'claude-code',
|
|
||||||
env: process.env.USER_TYPE,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add all fields as searchable attributes (not duplicated in tags)
|
|
||||||
for (const [key, value] of Object.entries(allData)) {
|
|
||||||
if (value !== undefined && value !== null) {
|
|
||||||
log[camelToSnakeCase(key)] = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logBatch.push(log)
|
|
||||||
|
|
||||||
// Flush immediately if batch is full, otherwise schedule
|
|
||||||
if (logBatch.length >= MAX_BATCH_SIZE) {
|
|
||||||
if (flushTimer) {
|
|
||||||
clearTimeout(flushTimer)
|
|
||||||
flushTimer = null
|
|
||||||
}
|
|
||||||
void flushLogs()
|
|
||||||
} else {
|
|
||||||
scheduleFlush()
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const NUM_USER_BUCKETS = 30
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a 'bucket' that the user ID falls into.
|
|
||||||
*
|
|
||||||
* For alerting purposes, we want to alert on the number of users impacted
|
|
||||||
* by an issue, rather than the number of events- often a small number of users
|
|
||||||
* can generate a large number of events (e.g. due to retries). To approximate
|
|
||||||
* this without ruining cardinality by counting user IDs directly, we hash the user ID
|
|
||||||
* and assign it to one of a fixed number of buckets.
|
|
||||||
*
|
|
||||||
* This allows us to estimate the number of unique users by counting unique buckets,
|
|
||||||
* while preserving user privacy and reducing cardinality.
|
|
||||||
*/
|
|
||||||
const getUserBucket = memoize((): number => {
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
const hash = createHash('sha256').update(userId).digest('hex')
|
|
||||||
return parseInt(hash.slice(0, 8), 16) % NUM_USER_BUCKETS
|
|
||||||
})
|
|
||||||
|
|
||||||
function getFlushIntervalMs(): number {
|
|
||||||
// Allow tests to override to not block on the default flush interval.
|
|
||||||
return (
|
|
||||||
parseInt(process.env.CLAUDE_CODE_DATADOG_FLUSH_INTERVAL_MS || '', 10) ||
|
|
||||||
DEFAULT_FLUSH_INTERVAL_MS
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,449 +1,16 @@
|
|||||||
import type { AnyValueMap, Logger, logs } from '@opentelemetry/api-logs'
|
|
||||||
import { resourceFromAttributes } from '@opentelemetry/resources'
|
|
||||||
import {
|
|
||||||
BatchLogRecordProcessor,
|
|
||||||
LoggerProvider,
|
|
||||||
} from '@opentelemetry/sdk-logs'
|
|
||||||
import {
|
|
||||||
ATTR_SERVICE_NAME,
|
|
||||||
ATTR_SERVICE_VERSION,
|
|
||||||
} from '@opentelemetry/semantic-conventions'
|
|
||||||
import { randomUUID } from 'crypto'
|
|
||||||
import { isEqual } from 'lodash-es'
|
|
||||||
import { getOrCreateUserID } from '../../utils/config.js'
|
|
||||||
import { logForDebugging } from '../../utils/debug.js'
|
|
||||||
import { logError } from '../../utils/log.js'
|
|
||||||
import { getPlatform, getWslVersion } from '../../utils/platform.js'
|
|
||||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
||||||
import { profileCheckpoint } from '../../utils/startupProfiler.js'
|
|
||||||
import { getCoreUserData } from '../../utils/user.js'
|
|
||||||
import { isAnalyticsDisabled } from './config.js'
|
|
||||||
import { FirstPartyEventLoggingExporter } from './firstPartyEventLoggingExporter.js'
|
|
||||||
import type { GrowthBookUserAttributes } from './growthbook.js'
|
|
||||||
import { getDynamicConfig_CACHED_MAY_BE_STALE } from './growthbook.js'
|
|
||||||
import { getEventMetadata } from './metadata.js'
|
|
||||||
import { isSinkKilled } from './sinkKillswitch.js'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration for sampling individual event types.
|
* Anthropic 1P event logging egress is disabled in this build.
|
||||||
* Each event name maps to an object containing sample_rate (0-1).
|
|
||||||
* Events not in the config are logged at 100% rate.
|
|
||||||
*/
|
|
||||||
export type EventSamplingConfig = {
|
|
||||||
[eventName: string]: {
|
|
||||||
sample_rate: number
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const EVENT_SAMPLING_CONFIG_NAME = 'tengu_event_sampling_config'
|
|
||||||
/**
|
|
||||||
* Get the event sampling configuration from GrowthBook.
|
|
||||||
* Uses cached value if available, updates cache in background.
|
|
||||||
*/
|
|
||||||
export function getEventSamplingConfig(): EventSamplingConfig {
|
|
||||||
return getDynamicConfig_CACHED_MAY_BE_STALE<EventSamplingConfig>(
|
|
||||||
EVENT_SAMPLING_CONFIG_NAME,
|
|
||||||
{},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determine if an event should be sampled based on its sample rate.
|
|
||||||
* Returns the sample rate if sampled, null if not sampled.
|
|
||||||
*
|
*
|
||||||
* @param eventName - Name of the event to check
|
* Only the shutdown and feedback call sites still need a local stub.
|
||||||
* @returns The sample_rate if event should be logged, null if it should be dropped
|
|
||||||
*/
|
*/
|
||||||
export function shouldSampleEvent(eventName: string): number | null {
|
|
||||||
const config = getEventSamplingConfig()
|
|
||||||
const eventConfig = config[eventName]
|
|
||||||
|
|
||||||
// If no config for this event, log at 100% rate (no sampling)
|
|
||||||
if (!eventConfig) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
const sampleRate = eventConfig.sample_rate
|
|
||||||
|
|
||||||
// Validate sample rate is in valid range
|
|
||||||
if (typeof sampleRate !== 'number' || sampleRate < 0 || sampleRate > 1) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sample rate of 1 means log everything (no need to add metadata)
|
|
||||||
if (sampleRate >= 1) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sample rate of 0 means drop everything
|
|
||||||
if (sampleRate <= 0) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Randomly decide whether to sample this event
|
|
||||||
return Math.random() < sampleRate ? sampleRate : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
const BATCH_CONFIG_NAME = 'tengu_1p_event_batch_config'
|
|
||||||
type BatchConfig = {
|
|
||||||
scheduledDelayMillis?: number
|
|
||||||
maxExportBatchSize?: number
|
|
||||||
maxQueueSize?: number
|
|
||||||
skipAuth?: boolean
|
|
||||||
maxAttempts?: number
|
|
||||||
path?: string
|
|
||||||
baseUrl?: string
|
|
||||||
}
|
|
||||||
function getBatchConfig(): BatchConfig {
|
|
||||||
return getDynamicConfig_CACHED_MAY_BE_STALE<BatchConfig>(
|
|
||||||
BATCH_CONFIG_NAME,
|
|
||||||
{},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Module-local state for event logging (not exposed globally)
|
|
||||||
let firstPartyEventLogger: ReturnType<typeof logs.getLogger> | null = null
|
|
||||||
let firstPartyEventLoggerProvider: LoggerProvider | null = null
|
|
||||||
// Last batch config used to construct the provider — used by
|
|
||||||
// reinitialize1PEventLoggingIfConfigChanged to decide whether a rebuild is
|
|
||||||
// needed when GrowthBook refreshes.
|
|
||||||
let lastBatchConfig: BatchConfig | null = null
|
|
||||||
/**
|
|
||||||
* Flush and shutdown the 1P event logger.
|
|
||||||
* This should be called as the final step before process exit to ensure
|
|
||||||
* all events (including late ones from API responses) are exported.
|
|
||||||
*/
|
|
||||||
export async function shutdown1PEventLogging(): Promise<void> {
|
export async function shutdown1PEventLogging(): Promise<void> {
|
||||||
if (!firstPartyEventLoggerProvider) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
await firstPartyEventLoggerProvider.shutdown()
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging: final shutdown complete')
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Ignore shutdown errors
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if 1P event logging is enabled.
|
|
||||||
* Respects the same opt-outs as other analytics sinks:
|
|
||||||
* - Test environment
|
|
||||||
* - Third-party cloud providers (Bedrock/Vertex)
|
|
||||||
* - Global telemetry opt-outs
|
|
||||||
* - Non-essential traffic disabled
|
|
||||||
*
|
|
||||||
* Note: Unlike BigQuery metrics, event logging does NOT check organization-level
|
|
||||||
* metrics opt-out via API. It follows the same pattern as Statsig event logging.
|
|
||||||
*/
|
|
||||||
export function is1PEventLoggingEnabled(): boolean {
|
|
||||||
// Respect standard analytics opt-outs
|
|
||||||
return !isAnalyticsDisabled()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log a 1st-party event for internal analytics (async version).
|
|
||||||
* Events are batched and exported to /api/event_logging/batch
|
|
||||||
*
|
|
||||||
* This enriches the event with core metadata (model, session, env context, etc.)
|
|
||||||
* at log time, similar to logEventToStatsig.
|
|
||||||
*
|
|
||||||
* @param eventName - Name of the event (e.g., 'tengu_api_query')
|
|
||||||
* @param metadata - Additional metadata for the event (intentionally no strings, to avoid accidentally logging code/filepaths)
|
|
||||||
*/
|
|
||||||
async function logEventTo1PAsync(
|
|
||||||
firstPartyEventLogger: Logger,
|
|
||||||
eventName: string,
|
|
||||||
metadata: Record<string, number | boolean | undefined> = {},
|
|
||||||
): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Enrich with core metadata at log time (similar to Statsig pattern)
|
|
||||||
const coreMetadata = await getEventMetadata({
|
|
||||||
model: metadata.model,
|
|
||||||
betas: metadata.betas,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Build attributes - OTel supports nested objects natively via AnyValueMap
|
|
||||||
// Cast through unknown since our nested objects are structurally compatible
|
|
||||||
// with AnyValue but TS doesn't recognize it due to missing index signatures
|
|
||||||
const attributes = {
|
|
||||||
event_name: eventName,
|
|
||||||
event_id: randomUUID(),
|
|
||||||
// Pass objects directly - no JSON serialization needed
|
|
||||||
core_metadata: coreMetadata,
|
|
||||||
user_metadata: getCoreUserData(true),
|
|
||||||
event_metadata: metadata,
|
|
||||||
} as unknown as AnyValueMap
|
|
||||||
|
|
||||||
// Add user_id if available
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
if (userId) {
|
|
||||||
attributes.user_id = userId
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug logging when debug mode is enabled
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`[ANT-ONLY] 1P event: ${eventName} ${jsonStringify(metadata, null, 0)}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit log record
|
|
||||||
firstPartyEventLogger.emit({
|
|
||||||
body: eventName,
|
|
||||||
attributes,
|
|
||||||
})
|
|
||||||
} catch (e) {
|
|
||||||
if (process.env.NODE_ENV === 'development') {
|
|
||||||
throw e
|
|
||||||
}
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logError(e as Error)
|
|
||||||
}
|
|
||||||
// swallow
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log a 1st-party event for internal analytics.
|
|
||||||
* Events are batched and exported to /api/event_logging/batch
|
|
||||||
*
|
|
||||||
* @param eventName - Name of the event (e.g., 'tengu_api_query')
|
|
||||||
* @param metadata - Additional metadata for the event (intentionally no strings, to avoid accidentally logging code/filepaths)
|
|
||||||
*/
|
|
||||||
export function logEventTo1P(
|
export function logEventTo1P(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
metadata: Record<string, number | boolean | undefined> = {},
|
_metadata: Record<string, number | boolean | undefined> = {},
|
||||||
): void {
|
): void {
|
||||||
if (!is1PEventLoggingEnabled()) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!firstPartyEventLogger || isSinkKilled('firstParty')) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fire and forget - don't block on metadata enrichment
|
|
||||||
void logEventTo1PAsync(firstPartyEventLogger, eventName, metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GrowthBook experiment event data for logging
|
|
||||||
*/
|
|
||||||
export type GrowthBookExperimentData = {
|
|
||||||
experimentId: string
|
|
||||||
variationId: number
|
|
||||||
userAttributes?: GrowthBookUserAttributes
|
|
||||||
experimentMetadata?: Record<string, unknown>
|
|
||||||
}
|
|
||||||
|
|
||||||
// api.anthropic.com only serves the "production" GrowthBook environment
|
|
||||||
// (see starling/starling/cli/cli.py DEFAULT_ENVIRONMENTS). Staging and
|
|
||||||
// development environments are not exported to the prod API.
|
|
||||||
function getEnvironmentForGrowthBook(): string {
|
|
||||||
return 'production'
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log a GrowthBook experiment assignment event to 1P.
|
|
||||||
* Events are batched and exported to /api/event_logging/batch
|
|
||||||
*
|
|
||||||
* @param data - GrowthBook experiment assignment data
|
|
||||||
*/
|
|
||||||
export function logGrowthBookExperimentTo1P(
|
|
||||||
data: GrowthBookExperimentData,
|
|
||||||
): void {
|
|
||||||
if (!is1PEventLoggingEnabled()) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!firstPartyEventLogger || isSinkKilled('firstParty')) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
const { accountUuid, organizationUuid } = getCoreUserData(true)
|
|
||||||
|
|
||||||
// Build attributes for GrowthbookExperimentEvent
|
|
||||||
const attributes = {
|
|
||||||
event_type: 'GrowthbookExperimentEvent',
|
|
||||||
event_id: randomUUID(),
|
|
||||||
experiment_id: data.experimentId,
|
|
||||||
variation_id: data.variationId,
|
|
||||||
...(userId && { device_id: userId }),
|
|
||||||
...(accountUuid && { account_uuid: accountUuid }),
|
|
||||||
...(organizationUuid && { organization_uuid: organizationUuid }),
|
|
||||||
...(data.userAttributes && {
|
|
||||||
session_id: data.userAttributes.sessionId,
|
|
||||||
user_attributes: jsonStringify(data.userAttributes),
|
|
||||||
}),
|
|
||||||
...(data.experimentMetadata && {
|
|
||||||
experiment_metadata: jsonStringify(data.experimentMetadata),
|
|
||||||
}),
|
|
||||||
environment: getEnvironmentForGrowthBook(),
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`[ANT-ONLY] 1P GrowthBook experiment: ${data.experimentId} variation=${data.variationId}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
firstPartyEventLogger.emit({
|
|
||||||
body: 'growthbook_experiment',
|
|
||||||
attributes,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 10000
|
|
||||||
const DEFAULT_MAX_EXPORT_BATCH_SIZE = 200
|
|
||||||
const DEFAULT_MAX_QUEUE_SIZE = 8192
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize 1P event logging infrastructure.
|
|
||||||
* This creates a separate LoggerProvider for internal event logging,
|
|
||||||
* independent of customer OTLP telemetry.
|
|
||||||
*
|
|
||||||
* This uses its own minimal resource configuration with just the attributes
|
|
||||||
* we need for internal analytics (service name, version, platform info).
|
|
||||||
*/
|
|
||||||
export function initialize1PEventLogging(): void {
|
|
||||||
profileCheckpoint('1p_event_logging_start')
|
|
||||||
const enabled = is1PEventLoggingEnabled()
|
|
||||||
|
|
||||||
if (!enabled) {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging not enabled')
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch batch processor configuration from GrowthBook dynamic config
|
|
||||||
// Uses cached value if available, refreshes in background
|
|
||||||
const batchConfig = getBatchConfig()
|
|
||||||
lastBatchConfig = batchConfig
|
|
||||||
profileCheckpoint('1p_event_after_growthbook_config')
|
|
||||||
|
|
||||||
const scheduledDelayMillis =
|
|
||||||
batchConfig.scheduledDelayMillis ||
|
|
||||||
parseInt(
|
|
||||||
process.env.OTEL_LOGS_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
)
|
|
||||||
|
|
||||||
const maxExportBatchSize =
|
|
||||||
batchConfig.maxExportBatchSize || DEFAULT_MAX_EXPORT_BATCH_SIZE
|
|
||||||
|
|
||||||
const maxQueueSize = batchConfig.maxQueueSize || DEFAULT_MAX_QUEUE_SIZE
|
|
||||||
|
|
||||||
// Build our own resource for 1P event logging with minimal attributes
|
|
||||||
const platform = getPlatform()
|
|
||||||
const attributes: Record<string, string> = {
|
|
||||||
[ATTR_SERVICE_NAME]: 'claude-code',
|
|
||||||
[ATTR_SERVICE_VERSION]: MACRO.VERSION,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add WSL-specific attributes if running on WSL
|
|
||||||
if (platform === 'wsl') {
|
|
||||||
const wslVersion = getWslVersion()
|
|
||||||
if (wslVersion) {
|
|
||||||
attributes['wsl.version'] = wslVersion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const resource = resourceFromAttributes(attributes)
|
|
||||||
|
|
||||||
// Create a new LoggerProvider with the EventLoggingExporter
|
|
||||||
// NOTE: This is kept separate from customer telemetry logs to ensure
|
|
||||||
// internal events don't leak to customer endpoints and vice versa.
|
|
||||||
// We don't register this globally - it's only used for internal event logging.
|
|
||||||
const eventLoggingExporter = new FirstPartyEventLoggingExporter({
|
|
||||||
maxBatchSize: maxExportBatchSize,
|
|
||||||
skipAuth: batchConfig.skipAuth,
|
|
||||||
maxAttempts: batchConfig.maxAttempts,
|
|
||||||
path: batchConfig.path,
|
|
||||||
baseUrl: batchConfig.baseUrl,
|
|
||||||
isKilled: () => isSinkKilled('firstParty'),
|
|
||||||
})
|
|
||||||
firstPartyEventLoggerProvider = new LoggerProvider({
|
|
||||||
resource,
|
|
||||||
processors: [
|
|
||||||
new BatchLogRecordProcessor(eventLoggingExporter, {
|
|
||||||
scheduledDelayMillis,
|
|
||||||
maxExportBatchSize,
|
|
||||||
maxQueueSize,
|
|
||||||
}),
|
|
||||||
],
|
|
||||||
})
|
|
||||||
|
|
||||||
// Initialize event logger from our internal provider (NOT from global API)
|
|
||||||
// IMPORTANT: We must get the logger from our local provider, not logs.getLogger()
|
|
||||||
// because logs.getLogger() returns a logger from the global provider, which is
|
|
||||||
// separate and used for customer telemetry.
|
|
||||||
firstPartyEventLogger = firstPartyEventLoggerProvider.getLogger(
|
|
||||||
'com.anthropic.claude_code.events',
|
|
||||||
MACRO.VERSION,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rebuild the 1P event logging pipeline if the batch config changed.
|
|
||||||
* Register this with onGrowthBookRefresh so long-running sessions pick up
|
|
||||||
* changes to batch size, delay, endpoint, etc.
|
|
||||||
*
|
|
||||||
* Event-loss safety:
|
|
||||||
* 1. Null the logger first — concurrent logEventTo1P() calls hit the
|
|
||||||
* !firstPartyEventLogger guard and bail during the swap window. This drops
|
|
||||||
* a handful of events but prevents emitting to a draining provider.
|
|
||||||
* 2. forceFlush() drains the old BatchLogRecordProcessor buffer to the
|
|
||||||
* exporter. Export failures go to disk at getCurrentBatchFilePath() which
|
|
||||||
* is keyed by module-level BATCH_UUID + sessionId — unchanged across
|
|
||||||
* reinit — so the NEW exporter's disk-backed retry picks them up.
|
|
||||||
* 3. Swap to new provider/logger; old provider shutdown runs in background
|
|
||||||
* (buffer already drained, just cleanup).
|
|
||||||
*/
|
|
||||||
export async function reinitialize1PEventLoggingIfConfigChanged(): Promise<void> {
|
|
||||||
if (!is1PEventLoggingEnabled() || !firstPartyEventLoggerProvider) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const newConfig = getBatchConfig()
|
|
||||||
|
|
||||||
if (isEqual(newConfig, lastBatchConfig)) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: ${BATCH_CONFIG_NAME} changed, reinitializing`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const oldProvider = firstPartyEventLoggerProvider
|
|
||||||
const oldLogger = firstPartyEventLogger
|
|
||||||
firstPartyEventLogger = null
|
|
||||||
|
|
||||||
try {
|
|
||||||
await oldProvider.forceFlush()
|
|
||||||
} catch {
|
|
||||||
// Export failures are already on disk; new exporter will retry them.
|
|
||||||
}
|
|
||||||
|
|
||||||
firstPartyEventLoggerProvider = null
|
|
||||||
try {
|
|
||||||
initialize1PEventLogging()
|
|
||||||
} catch (e) {
|
|
||||||
// Restore so the next GrowthBook refresh can retry. oldProvider was
|
|
||||||
// only forceFlush()'d, not shut down — it's still functional. Without
|
|
||||||
// this, both stay null and the !firstPartyEventLoggerProvider gate at
|
|
||||||
// the top makes recovery impossible.
|
|
||||||
firstPartyEventLoggerProvider = oldProvider
|
|
||||||
firstPartyEventLogger = oldLogger
|
|
||||||
logError(e)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
void oldProvider.shutdown().catch(() => {})
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,806 +0,0 @@
|
|||||||
import type { HrTime } from '@opentelemetry/api'
|
|
||||||
import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
|
|
||||||
import type {
|
|
||||||
LogRecordExporter,
|
|
||||||
ReadableLogRecord,
|
|
||||||
} from '@opentelemetry/sdk-logs'
|
|
||||||
import axios from 'axios'
|
|
||||||
import { randomUUID } from 'crypto'
|
|
||||||
import { appendFile, mkdir, readdir, unlink, writeFile } from 'fs/promises'
|
|
||||||
import * as path from 'path'
|
|
||||||
import type { CoreUserData } from 'src/utils/user.js'
|
|
||||||
import {
|
|
||||||
getIsNonInteractiveSession,
|
|
||||||
getSessionId,
|
|
||||||
} from '../../bootstrap/state.js'
|
|
||||||
import { ClaudeCodeInternalEvent } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
|
|
||||||
import { GrowthbookExperimentEvent } from '../../types/generated/events_mono/growthbook/v1/growthbook_experiment_event.js'
|
|
||||||
import {
|
|
||||||
getClaudeAIOAuthTokens,
|
|
||||||
hasProfileScope,
|
|
||||||
isClaudeAISubscriber,
|
|
||||||
} from '../../utils/auth.js'
|
|
||||||
import { checkHasTrustDialogAccepted } from '../../utils/config.js'
|
|
||||||
import { logForDebugging } from '../../utils/debug.js'
|
|
||||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
|
||||||
import { errorMessage, isFsInaccessible, toError } from '../../utils/errors.js'
|
|
||||||
import { getAuthHeaders } from '../../utils/http.js'
|
|
||||||
import { readJSONLFile } from '../../utils/json.js'
|
|
||||||
import { logError } from '../../utils/log.js'
|
|
||||||
import { sleep } from '../../utils/sleep.js'
|
|
||||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
||||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
|
||||||
import { isOAuthTokenExpired } from '../oauth/client.js'
|
|
||||||
import { stripProtoFields } from './index.js'
|
|
||||||
import { type EventMetadata, to1PEventFormat } from './metadata.js'
|
|
||||||
|
|
||||||
// Unique ID for this process run - used to isolate failed event files between runs
|
|
||||||
const BATCH_UUID = randomUUID()
|
|
||||||
|
|
||||||
// File prefix for failed event storage
|
|
||||||
const FILE_PREFIX = '1p_failed_events.'
|
|
||||||
|
|
||||||
// Storage directory for failed events - evaluated at runtime to respect CLAUDE_CONFIG_DIR in tests
|
|
||||||
function getStorageDir(): string {
|
|
||||||
return path.join(getClaudeConfigHomeDir(), 'telemetry')
|
|
||||||
}
|
|
||||||
|
|
||||||
// API envelope - event_data is the JSON output from proto toJSON()
|
|
||||||
type FirstPartyEventLoggingEvent = {
|
|
||||||
event_type: 'ClaudeCodeInternalEvent' | 'GrowthbookExperimentEvent'
|
|
||||||
event_data: unknown
|
|
||||||
}
|
|
||||||
|
|
||||||
type FirstPartyEventLoggingPayload = {
|
|
||||||
events: FirstPartyEventLoggingEvent[]
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Exporter for 1st-party event logging to /api/event_logging/batch.
|
|
||||||
*
|
|
||||||
* Export cycles are controlled by OpenTelemetry's BatchLogRecordProcessor, which
|
|
||||||
* triggers export() when either:
|
|
||||||
* - Time interval elapses (default: 5 seconds via scheduledDelayMillis)
|
|
||||||
* - Batch size is reached (default: 200 events via maxExportBatchSize)
|
|
||||||
*
|
|
||||||
* This exporter adds resilience on top:
|
|
||||||
* - Append-only log for failed events (concurrency-safe)
|
|
||||||
* - Quadratic backoff retry for failed events, dropped after maxAttempts
|
|
||||||
* - Immediate retry of queued events when any export succeeds (endpoint is healthy)
|
|
||||||
* - Chunking large event sets into smaller batches
|
|
||||||
* - Auth fallback: retries without auth on 401 errors
|
|
||||||
*/
|
|
||||||
export class FirstPartyEventLoggingExporter implements LogRecordExporter {
|
|
||||||
private readonly endpoint: string
|
|
||||||
private readonly timeout: number
|
|
||||||
private readonly maxBatchSize: number
|
|
||||||
private readonly skipAuth: boolean
|
|
||||||
private readonly batchDelayMs: number
|
|
||||||
private readonly baseBackoffDelayMs: number
|
|
||||||
private readonly maxBackoffDelayMs: number
|
|
||||||
private readonly maxAttempts: number
|
|
||||||
private readonly isKilled: () => boolean
|
|
||||||
private pendingExports: Promise<void>[] = []
|
|
||||||
private isShutdown = false
|
|
||||||
private readonly schedule: (
|
|
||||||
fn: () => Promise<void>,
|
|
||||||
delayMs: number,
|
|
||||||
) => () => void
|
|
||||||
private cancelBackoff: (() => void) | null = null
|
|
||||||
private attempts = 0
|
|
||||||
private isRetrying = false
|
|
||||||
private lastExportErrorContext: string | undefined
|
|
||||||
|
|
||||||
constructor(
|
|
||||||
options: {
|
|
||||||
timeout?: number
|
|
||||||
maxBatchSize?: number
|
|
||||||
skipAuth?: boolean
|
|
||||||
batchDelayMs?: number
|
|
||||||
baseBackoffDelayMs?: number
|
|
||||||
maxBackoffDelayMs?: number
|
|
||||||
maxAttempts?: number
|
|
||||||
path?: string
|
|
||||||
baseUrl?: string
|
|
||||||
// Injected killswitch probe. Checked per-POST so that disabling the
|
|
||||||
// firstParty sink also stops backoff retries (not just new emits).
|
|
||||||
// Passed in rather than imported to avoid a cycle with firstPartyEventLogger.ts.
|
|
||||||
isKilled?: () => boolean
|
|
||||||
schedule?: (fn: () => Promise<void>, delayMs: number) => () => void
|
|
||||||
} = {},
|
|
||||||
) {
|
|
||||||
// Default: prod, except when ANTHROPIC_BASE_URL is explicitly staging.
|
|
||||||
// Overridable via tengu_1p_event_batch_config.baseUrl.
|
|
||||||
const baseUrl =
|
|
||||||
options.baseUrl ||
|
|
||||||
(process.env.ANTHROPIC_BASE_URL === 'https://api-staging.anthropic.com'
|
|
||||||
? 'https://api-staging.anthropic.com'
|
|
||||||
: 'https://api.anthropic.com')
|
|
||||||
|
|
||||||
this.endpoint = `${baseUrl}${options.path || '/api/event_logging/batch'}`
|
|
||||||
|
|
||||||
this.timeout = options.timeout || 10000
|
|
||||||
this.maxBatchSize = options.maxBatchSize || 200
|
|
||||||
this.skipAuth = options.skipAuth ?? false
|
|
||||||
this.batchDelayMs = options.batchDelayMs || 100
|
|
||||||
this.baseBackoffDelayMs = options.baseBackoffDelayMs || 500
|
|
||||||
this.maxBackoffDelayMs = options.maxBackoffDelayMs || 30000
|
|
||||||
this.maxAttempts = options.maxAttempts ?? 8
|
|
||||||
this.isKilled = options.isKilled ?? (() => false)
|
|
||||||
this.schedule =
|
|
||||||
options.schedule ??
|
|
||||||
((fn, ms) => {
|
|
||||||
const t = setTimeout(fn, ms)
|
|
||||||
return () => clearTimeout(t)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Retry any failed events from previous runs of this session (in background)
|
|
||||||
void this.retryPreviousBatches()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expose for testing
|
|
||||||
async getQueuedEventCount(): Promise<number> {
|
|
||||||
return (await this.loadEventsFromCurrentBatch()).length
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Storage helpers ---
|
|
||||||
|
|
||||||
private getCurrentBatchFilePath(): string {
|
|
||||||
return path.join(
|
|
||||||
getStorageDir(),
|
|
||||||
`${FILE_PREFIX}${getSessionId()}.${BATCH_UUID}.json`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private async loadEventsFromFile(
|
|
||||||
filePath: string,
|
|
||||||
): Promise<FirstPartyEventLoggingEvent[]> {
|
|
||||||
try {
|
|
||||||
return await readJSONLFile<FirstPartyEventLoggingEvent>(filePath)
|
|
||||||
} catch {
|
|
||||||
return []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async loadEventsFromCurrentBatch(): Promise<
|
|
||||||
FirstPartyEventLoggingEvent[]
|
|
||||||
> {
|
|
||||||
return this.loadEventsFromFile(this.getCurrentBatchFilePath())
|
|
||||||
}
|
|
||||||
|
|
||||||
private async saveEventsToFile(
|
|
||||||
filePath: string,
|
|
||||||
events: FirstPartyEventLoggingEvent[],
|
|
||||||
): Promise<void> {
|
|
||||||
try {
|
|
||||||
if (events.length === 0) {
|
|
||||||
try {
|
|
||||||
await unlink(filePath)
|
|
||||||
} catch {
|
|
||||||
// File doesn't exist, nothing to delete
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Ensure storage directory exists
|
|
||||||
await mkdir(getStorageDir(), { recursive: true })
|
|
||||||
// Write as JSON lines (one event per line)
|
|
||||||
const content = events.map(e => jsonStringify(e)).join('\n') + '\n'
|
|
||||||
await writeFile(filePath, content, 'utf8')
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async appendEventsToFile(
|
|
||||||
filePath: string,
|
|
||||||
events: FirstPartyEventLoggingEvent[],
|
|
||||||
): Promise<void> {
|
|
||||||
if (events.length === 0) return
|
|
||||||
try {
|
|
||||||
// Ensure storage directory exists
|
|
||||||
await mkdir(getStorageDir(), { recursive: true })
|
|
||||||
// Append as JSON lines (one event per line) - atomic on most filesystems
|
|
||||||
const content = events.map(e => jsonStringify(e)).join('\n') + '\n'
|
|
||||||
await appendFile(filePath, content, 'utf8')
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async deleteFile(filePath: string): Promise<void> {
|
|
||||||
try {
|
|
||||||
await unlink(filePath)
|
|
||||||
} catch {
|
|
||||||
// File doesn't exist or can't be deleted, ignore
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Previous batch retry (startup) ---
|
|
||||||
|
|
||||||
private async retryPreviousBatches(): Promise<void> {
|
|
||||||
try {
|
|
||||||
const prefix = `${FILE_PREFIX}${getSessionId()}.`
|
|
||||||
let files: string[]
|
|
||||||
try {
|
|
||||||
files = (await readdir(getStorageDir()))
|
|
||||||
.filter((f: string) => f.startsWith(prefix) && f.endsWith('.json'))
|
|
||||||
.filter((f: string) => !f.includes(BATCH_UUID)) // Exclude current batch
|
|
||||||
} catch (e) {
|
|
||||||
if (isFsInaccessible(e)) return
|
|
||||||
throw e
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const file of files) {
|
|
||||||
const filePath = path.join(getStorageDir(), file)
|
|
||||||
void this.retryFileInBackground(filePath)
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async retryFileInBackground(filePath: string): Promise<void> {
|
|
||||||
if (this.attempts >= this.maxAttempts) {
|
|
||||||
await this.deleteFile(filePath)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const events = await this.loadEventsFromFile(filePath)
|
|
||||||
if (events.length === 0) {
|
|
||||||
await this.deleteFile(filePath)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: retrying ${events.length} events from previous batch`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const failedEvents = await this.sendEventsInBatches(events)
|
|
||||||
if (failedEvents.length === 0) {
|
|
||||||
await this.deleteFile(filePath)
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging: previous batch retry succeeded')
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Save only the failed events back (not all original events)
|
|
||||||
await this.saveEventsToFile(filePath, failedEvents)
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: previous batch retry failed, ${failedEvents.length} events remain`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async export(
|
|
||||||
logs: ReadableLogRecord[],
|
|
||||||
resultCallback: (result: ExportResult) => void,
|
|
||||||
): Promise<void> {
|
|
||||||
if (this.isShutdown) {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
'1P event logging export failed: Exporter has been shutdown',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: new Error('Exporter has been shutdown'),
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const exportPromise = this.doExport(logs, resultCallback)
|
|
||||||
this.pendingExports.push(exportPromise)
|
|
||||||
|
|
||||||
// Clean up completed exports
|
|
||||||
void exportPromise.finally(() => {
|
|
||||||
const index = this.pendingExports.indexOf(exportPromise)
|
|
||||||
if (index > -1) {
|
|
||||||
void this.pendingExports.splice(index, 1)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
private async doExport(
|
|
||||||
logs: ReadableLogRecord[],
|
|
||||||
resultCallback: (result: ExportResult) => void,
|
|
||||||
): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Filter for event logs only (by scope name)
|
|
||||||
const eventLogs = logs.filter(
|
|
||||||
log =>
|
|
||||||
log.instrumentationScope?.name === 'com.anthropic.claude_code.events',
|
|
||||||
)
|
|
||||||
|
|
||||||
if (eventLogs.length === 0) {
|
|
||||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transform new logs (failed events are retried independently via backoff)
|
|
||||||
const events = this.transformLogsToEvents(eventLogs).events
|
|
||||||
|
|
||||||
if (events.length === 0) {
|
|
||||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.attempts >= this.maxAttempts) {
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: new Error(
|
|
||||||
`Dropped ${events.length} events: max attempts (${this.maxAttempts}) reached`,
|
|
||||||
),
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send events
|
|
||||||
const failedEvents = await this.sendEventsInBatches(events)
|
|
||||||
this.attempts++
|
|
||||||
|
|
||||||
if (failedEvents.length > 0) {
|
|
||||||
await this.queueFailedEvents(failedEvents)
|
|
||||||
this.scheduleBackoffRetry()
|
|
||||||
const context = this.lastExportErrorContext
|
|
||||||
? ` (${this.lastExportErrorContext})`
|
|
||||||
: ''
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: new Error(
|
|
||||||
`Failed to export ${failedEvents.length} events${context}`,
|
|
||||||
),
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Success - reset backoff and immediately retry any queued events
|
|
||||||
this.resetBackoff()
|
|
||||||
if ((await this.getQueuedEventCount()) > 0 && !this.isRetrying) {
|
|
||||||
void this.retryFailedEvents()
|
|
||||||
}
|
|
||||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
|
||||||
} catch (error) {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging export failed: ${errorMessage(error)}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
logError(error)
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: toError(error),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async sendEventsInBatches(
|
|
||||||
events: FirstPartyEventLoggingEvent[],
|
|
||||||
): Promise<FirstPartyEventLoggingEvent[]> {
|
|
||||||
// Chunk events into batches
|
|
||||||
const batches: FirstPartyEventLoggingEvent[][] = []
|
|
||||||
for (let i = 0; i < events.length; i += this.maxBatchSize) {
|
|
||||||
batches.push(events.slice(i, i + this.maxBatchSize))
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: exporting ${events.length} events in ${batches.length} batch(es)`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send each batch with delay between them. On first failure, assume the
|
|
||||||
// endpoint is down and short-circuit: queue the failed batch plus all
|
|
||||||
// remaining unsent batches without POSTing them. The backoff retry will
|
|
||||||
// probe again with a single batch next tick.
|
|
||||||
const failedBatchEvents: FirstPartyEventLoggingEvent[] = []
|
|
||||||
let lastErrorContext: string | undefined
|
|
||||||
for (let i = 0; i < batches.length; i++) {
|
|
||||||
const batch = batches[i]!
|
|
||||||
try {
|
|
||||||
await this.sendBatchWithRetry({ events: batch })
|
|
||||||
} catch (error) {
|
|
||||||
lastErrorContext = getAxiosErrorContext(error)
|
|
||||||
for (let j = i; j < batches.length; j++) {
|
|
||||||
failedBatchEvents.push(...batches[j]!)
|
|
||||||
}
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
const skipped = batches.length - 1 - i
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: batch ${i + 1}/${batches.length} failed (${lastErrorContext}); short-circuiting ${skipped} remaining batch(es)`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i < batches.length - 1 && this.batchDelayMs > 0) {
|
|
||||||
await sleep(this.batchDelayMs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (failedBatchEvents.length > 0 && lastErrorContext) {
|
|
||||||
this.lastExportErrorContext = lastErrorContext
|
|
||||||
}
|
|
||||||
|
|
||||||
return failedBatchEvents
|
|
||||||
}
|
|
||||||
|
|
||||||
private async queueFailedEvents(
|
|
||||||
events: FirstPartyEventLoggingEvent[],
|
|
||||||
): Promise<void> {
|
|
||||||
const filePath = this.getCurrentBatchFilePath()
|
|
||||||
|
|
||||||
// Append-only: just add new events to file (atomic on most filesystems)
|
|
||||||
await this.appendEventsToFile(filePath, events)
|
|
||||||
|
|
||||||
const context = this.lastExportErrorContext
|
|
||||||
? ` (${this.lastExportErrorContext})`
|
|
||||||
: ''
|
|
||||||
const message = `1P event logging: ${events.length} events failed to export${context}`
|
|
||||||
logError(new Error(message))
|
|
||||||
}
|
|
||||||
|
|
||||||
private scheduleBackoffRetry(): void {
|
|
||||||
// Don't schedule if already retrying or shutdown
|
|
||||||
if (this.cancelBackoff || this.isRetrying || this.isShutdown) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Quadratic backoff (matching Statsig SDK): base * attempts²
|
|
||||||
const delay = Math.min(
|
|
||||||
this.baseBackoffDelayMs * this.attempts * this.attempts,
|
|
||||||
this.maxBackoffDelayMs,
|
|
||||||
)
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: scheduling backoff retry in ${delay}ms (attempt ${this.attempts})`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
this.cancelBackoff = this.schedule(async () => {
|
|
||||||
this.cancelBackoff = null
|
|
||||||
await this.retryFailedEvents()
|
|
||||||
}, delay)
|
|
||||||
}
|
|
||||||
|
|
||||||
private async retryFailedEvents(): Promise<void> {
|
|
||||||
const filePath = this.getCurrentBatchFilePath()
|
|
||||||
|
|
||||||
// Keep retrying while there are events and endpoint is healthy
|
|
||||||
while (!this.isShutdown) {
|
|
||||||
const events = await this.loadEventsFromFile(filePath)
|
|
||||||
if (events.length === 0) break
|
|
||||||
|
|
||||||
if (this.attempts >= this.maxAttempts) {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: max attempts (${this.maxAttempts}) reached, dropping ${events.length} events`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
await this.deleteFile(filePath)
|
|
||||||
this.resetBackoff()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
this.isRetrying = true
|
|
||||||
|
|
||||||
// Clear file before retry (we have events in memory now)
|
|
||||||
await this.deleteFile(filePath)
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: retrying ${events.length} failed events (attempt ${this.attempts + 1})`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const failedEvents = await this.sendEventsInBatches(events)
|
|
||||||
this.attempts++
|
|
||||||
|
|
||||||
this.isRetrying = false
|
|
||||||
|
|
||||||
if (failedEvents.length > 0) {
|
|
||||||
// Write failures back to disk
|
|
||||||
await this.saveEventsToFile(filePath, failedEvents)
|
|
||||||
this.scheduleBackoffRetry()
|
|
||||||
return // Failed - wait for backoff
|
|
||||||
}
|
|
||||||
|
|
||||||
// Success - reset backoff and continue loop to drain any newly queued events
|
|
||||||
this.resetBackoff()
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging: backoff retry succeeded')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private resetBackoff(): void {
|
|
||||||
this.attempts = 0
|
|
||||||
if (this.cancelBackoff) {
|
|
||||||
this.cancelBackoff()
|
|
||||||
this.cancelBackoff = null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async sendBatchWithRetry(
|
|
||||||
payload: FirstPartyEventLoggingPayload,
|
|
||||||
): Promise<void> {
|
|
||||||
if (this.isKilled()) {
|
|
||||||
// Throw so the caller short-circuits remaining batches and queues
|
|
||||||
// everything to disk. Zero network traffic while killed; the backoff
|
|
||||||
// timer keeps ticking and will resume POSTs as soon as the GrowthBook
|
|
||||||
// cache picks up the cleared flag.
|
|
||||||
throw new Error('firstParty sink killswitch active')
|
|
||||||
}
|
|
||||||
|
|
||||||
const baseHeaders: Record<string, string> = {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'User-Agent': getClaudeCodeUserAgent(),
|
|
||||||
'x-service-name': 'claude-code',
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip auth if trust hasn't been established yet
|
|
||||||
// This prevents executing apiKeyHelper commands before the trust dialog
|
|
||||||
// Non-interactive sessions implicitly have workspace trust
|
|
||||||
const hasTrust =
|
|
||||||
checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
|
|
||||||
if (process.env.USER_TYPE === 'ant' && !hasTrust) {
|
|
||||||
logForDebugging('1P event logging: Trust not accepted')
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip auth when the OAuth token is expired or lacks user:profile
|
|
||||||
// scope (service key sessions). Falls through to unauthenticated send.
|
|
||||||
let shouldSkipAuth = this.skipAuth || !hasTrust
|
|
||||||
if (!shouldSkipAuth && isClaudeAISubscriber()) {
|
|
||||||
const tokens = getClaudeAIOAuthTokens()
|
|
||||||
if (!hasProfileScope()) {
|
|
||||||
shouldSkipAuth = true
|
|
||||||
} else if (tokens && isOAuthTokenExpired(tokens.expiresAt)) {
|
|
||||||
shouldSkipAuth = true
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
'1P event logging: OAuth token expired, skipping auth to avoid 401',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try with auth headers first (unless trust not established or token is known to be expired)
|
|
||||||
const authResult = shouldSkipAuth
|
|
||||||
? { headers: {}, error: 'trust not established or Oauth token expired' }
|
|
||||||
: getAuthHeaders()
|
|
||||||
const useAuth = !authResult.error
|
|
||||||
|
|
||||||
if (!useAuth && process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: auth not available, sending without auth`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const headers = useAuth
|
|
||||||
? { ...baseHeaders, ...authResult.headers }
|
|
||||||
: baseHeaders
|
|
||||||
|
|
||||||
try {
|
|
||||||
const response = await axios.post(this.endpoint, payload, {
|
|
||||||
timeout: this.timeout,
|
|
||||||
headers,
|
|
||||||
})
|
|
||||||
this.logSuccess(payload.events.length, useAuth, response.data)
|
|
||||||
return
|
|
||||||
} catch (error) {
|
|
||||||
// Handle 401 by retrying without auth
|
|
||||||
if (
|
|
||||||
useAuth &&
|
|
||||||
axios.isAxiosError(error) &&
|
|
||||||
error.response?.status === 401
|
|
||||||
) {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
'1P event logging: 401 auth error, retrying without auth',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
const response = await axios.post(this.endpoint, payload, {
|
|
||||||
timeout: this.timeout,
|
|
||||||
headers: baseHeaders,
|
|
||||||
})
|
|
||||||
this.logSuccess(payload.events.length, false, response.data)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private logSuccess(
|
|
||||||
eventCount: number,
|
|
||||||
withAuth: boolean,
|
|
||||||
responseData: unknown,
|
|
||||||
): void {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: ${eventCount} events exported successfully${withAuth ? ' (with auth)' : ' (without auth)'}`,
|
|
||||||
)
|
|
||||||
logForDebugging(`API Response: ${jsonStringify(responseData, null, 2)}`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private hrTimeToDate(hrTime: HrTime): Date {
|
|
||||||
const [seconds, nanoseconds] = hrTime
|
|
||||||
return new Date(seconds * 1000 + nanoseconds / 1000000)
|
|
||||||
}
|
|
||||||
|
|
||||||
private transformLogsToEvents(
|
|
||||||
logs: ReadableLogRecord[],
|
|
||||||
): FirstPartyEventLoggingPayload {
|
|
||||||
const events: FirstPartyEventLoggingEvent[] = []
|
|
||||||
|
|
||||||
for (const log of logs) {
|
|
||||||
const attributes = log.attributes || {}
|
|
||||||
|
|
||||||
// Check if this is a GrowthBook experiment event
|
|
||||||
if (attributes.event_type === 'GrowthbookExperimentEvent') {
|
|
||||||
const timestamp = this.hrTimeToDate(log.hrTime)
|
|
||||||
const account_uuid = attributes.account_uuid as string | undefined
|
|
||||||
const organization_uuid = attributes.organization_uuid as
|
|
||||||
| string
|
|
||||||
| undefined
|
|
||||||
events.push({
|
|
||||||
event_type: 'GrowthbookExperimentEvent',
|
|
||||||
event_data: GrowthbookExperimentEvent.toJSON({
|
|
||||||
event_id: attributes.event_id as string,
|
|
||||||
timestamp,
|
|
||||||
experiment_id: attributes.experiment_id as string,
|
|
||||||
variation_id: attributes.variation_id as number,
|
|
||||||
environment: attributes.environment as string,
|
|
||||||
user_attributes: attributes.user_attributes as string,
|
|
||||||
experiment_metadata: attributes.experiment_metadata as string,
|
|
||||||
device_id: attributes.device_id as string,
|
|
||||||
session_id: attributes.session_id as string,
|
|
||||||
auth:
|
|
||||||
account_uuid || organization_uuid
|
|
||||||
? { account_uuid, organization_uuid }
|
|
||||||
: undefined,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract event name
|
|
||||||
const eventName =
|
|
||||||
(attributes.event_name as string) || (log.body as string) || 'unknown'
|
|
||||||
|
|
||||||
// Extract metadata objects directly (no JSON parsing needed)
|
|
||||||
const coreMetadata = attributes.core_metadata as EventMetadata | undefined
|
|
||||||
const userMetadata = attributes.user_metadata as CoreUserData
|
|
||||||
const eventMetadata = (attributes.event_metadata || {}) as Record<
|
|
||||||
string,
|
|
||||||
unknown
|
|
||||||
>
|
|
||||||
|
|
||||||
if (!coreMetadata) {
|
|
||||||
// Emit partial event if core metadata is missing
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: core_metadata missing for event ${eventName}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
events.push({
|
|
||||||
event_type: 'ClaudeCodeInternalEvent',
|
|
||||||
event_data: ClaudeCodeInternalEvent.toJSON({
|
|
||||||
event_id: attributes.event_id as string | undefined,
|
|
||||||
event_name: eventName,
|
|
||||||
client_timestamp: this.hrTimeToDate(log.hrTime),
|
|
||||||
session_id: getSessionId(),
|
|
||||||
additional_metadata: Buffer.from(
|
|
||||||
jsonStringify({
|
|
||||||
transform_error: 'core_metadata attribute is missing',
|
|
||||||
}),
|
|
||||||
).toString('base64'),
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transform to 1P format
|
|
||||||
const formatted = to1PEventFormat(
|
|
||||||
coreMetadata,
|
|
||||||
userMetadata,
|
|
||||||
eventMetadata,
|
|
||||||
)
|
|
||||||
|
|
||||||
// _PROTO_* keys are PII-tagged values meant only for privileged BQ
|
|
||||||
// columns. Hoist known keys to proto fields, then defensively strip any
|
|
||||||
// remaining _PROTO_* so an unrecognized future key can't silently land
|
|
||||||
// in the general-access additional_metadata blob. sink.ts applies the
|
|
||||||
// same strip before Datadog; this closes the 1P side.
|
|
||||||
const {
|
|
||||||
_PROTO_skill_name,
|
|
||||||
_PROTO_plugin_name,
|
|
||||||
_PROTO_marketplace_name,
|
|
||||||
...rest
|
|
||||||
} = formatted.additional
|
|
||||||
const additionalMetadata = stripProtoFields(rest)
|
|
||||||
|
|
||||||
events.push({
|
|
||||||
event_type: 'ClaudeCodeInternalEvent',
|
|
||||||
event_data: ClaudeCodeInternalEvent.toJSON({
|
|
||||||
event_id: attributes.event_id as string | undefined,
|
|
||||||
event_name: eventName,
|
|
||||||
client_timestamp: this.hrTimeToDate(log.hrTime),
|
|
||||||
device_id: attributes.user_id as string | undefined,
|
|
||||||
email: userMetadata?.email,
|
|
||||||
auth: formatted.auth,
|
|
||||||
...formatted.core,
|
|
||||||
env: formatted.env,
|
|
||||||
process: formatted.process,
|
|
||||||
skill_name:
|
|
||||||
typeof _PROTO_skill_name === 'string'
|
|
||||||
? _PROTO_skill_name
|
|
||||||
: undefined,
|
|
||||||
plugin_name:
|
|
||||||
typeof _PROTO_plugin_name === 'string'
|
|
||||||
? _PROTO_plugin_name
|
|
||||||
: undefined,
|
|
||||||
marketplace_name:
|
|
||||||
typeof _PROTO_marketplace_name === 'string'
|
|
||||||
? _PROTO_marketplace_name
|
|
||||||
: undefined,
|
|
||||||
additional_metadata:
|
|
||||||
Object.keys(additionalMetadata).length > 0
|
|
||||||
? Buffer.from(jsonStringify(additionalMetadata)).toString(
|
|
||||||
'base64',
|
|
||||||
)
|
|
||||||
: undefined,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return { events }
|
|
||||||
}
|
|
||||||
|
|
||||||
async shutdown(): Promise<void> {
|
|
||||||
this.isShutdown = true
|
|
||||||
this.resetBackoff()
|
|
||||||
await this.forceFlush()
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging exporter shutdown complete')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async forceFlush(): Promise<void> {
|
|
||||||
await Promise.all(this.pendingExports)
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging exporter flush complete')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function getAxiosErrorContext(error: unknown): string {
|
|
||||||
if (!axios.isAxiosError(error)) {
|
|
||||||
return errorMessage(error)
|
|
||||||
}
|
|
||||||
|
|
||||||
const parts: string[] = []
|
|
||||||
|
|
||||||
const requestId = error.response?.headers?.['request-id']
|
|
||||||
if (requestId) {
|
|
||||||
parts.push(`request-id=${requestId}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (error.response?.status) {
|
|
||||||
parts.push(`status=${error.response.status}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (error.code) {
|
|
||||||
parts.push(`code=${error.code}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (error.message) {
|
|
||||||
parts.push(error.message)
|
|
||||||
}
|
|
||||||
|
|
||||||
return parts.join(', ')
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -19,45 +19,15 @@
|
|||||||
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
|
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marker type for values routed to PII-tagged proto columns via `_PROTO_*`
|
* Marker type for values that previously flowed to privileged `_PROTO_*`
|
||||||
* payload keys. The destination BQ column has privileged access controls,
|
* columns. The export remains so existing call sites keep their explicit
|
||||||
* so unredacted values are acceptable — unlike general-access backends.
|
* privacy annotations even though external analytics export is disabled.
|
||||||
*
|
|
||||||
* sink.ts strips `_PROTO_*` keys before Datadog fanout; only the 1P
|
|
||||||
* exporter (firstPartyEventLoggingExporter) sees them and hoists them to the
|
|
||||||
* top-level proto field. A single stripProtoFields call guards all non-1P
|
|
||||||
* sinks — no per-sink filtering to forget.
|
|
||||||
*
|
*
|
||||||
* Usage: `rawName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED`
|
* Usage: `rawName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED`
|
||||||
*/
|
*/
|
||||||
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED = never
|
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED = never
|
||||||
|
|
||||||
/**
|
// Internal type for logEvent metadata in the local no-op sink.
|
||||||
* Strip `_PROTO_*` keys from a payload destined for general-access storage.
|
|
||||||
* Used by:
|
|
||||||
* - sink.ts: before Datadog fanout (never sees PII-tagged values)
|
|
||||||
* - firstPartyEventLoggingExporter: defensive strip of additional_metadata
|
|
||||||
* after hoisting known _PROTO_* keys to proto fields — prevents a future
|
|
||||||
* unrecognized _PROTO_foo from silently landing in the BQ JSON blob.
|
|
||||||
*
|
|
||||||
* Returns the input unchanged (same reference) when no _PROTO_ keys present.
|
|
||||||
*/
|
|
||||||
export function stripProtoFields<V>(
|
|
||||||
metadata: Record<string, V>,
|
|
||||||
): Record<string, V> {
|
|
||||||
let result: Record<string, V> | undefined
|
|
||||||
for (const key in metadata) {
|
|
||||||
if (key.startsWith('_PROTO_')) {
|
|
||||||
if (result === undefined) {
|
|
||||||
result = { ...metadata }
|
|
||||||
}
|
|
||||||
delete result[key]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result ?? metadata
|
|
||||||
}
|
|
||||||
|
|
||||||
// Internal type for logEvent metadata - different from the enriched EventMetadata in metadata.ts
|
|
||||||
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
||||||
|
|
||||||
type QueuedEvent = {
|
type QueuedEvent = {
|
||||||
|
|||||||
@@ -1,72 +1,13 @@
|
|||||||
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
|
|
||||||
/**
|
|
||||||
* Shared event metadata enrichment for analytics systems
|
|
||||||
*
|
|
||||||
* This module provides a single source of truth for collecting and formatting
|
|
||||||
* event metadata across all analytics systems (Datadog, 1P).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { extname } from 'path'
|
import { extname } from 'path'
|
||||||
import memoize from 'lodash-es/memoize.js'
|
import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './index.js'
|
||||||
import { env, getHostPlatformForAnalytics } from '../../utils/env.js'
|
|
||||||
import { envDynamic } from '../../utils/envDynamic.js'
|
|
||||||
import { getModelBetas } from '../../utils/betas.js'
|
|
||||||
import { getMainLoopModel } from '../../utils/model/model.js'
|
|
||||||
import {
|
|
||||||
getSessionId,
|
|
||||||
getIsInteractive,
|
|
||||||
getKairosActive,
|
|
||||||
getClientType,
|
|
||||||
getParentSessionId as getParentSessionIdFromState,
|
|
||||||
} from '../../bootstrap/state.js'
|
|
||||||
import { isEnvTruthy } from '../../utils/envUtils.js'
|
|
||||||
import { isOfficialMcpUrl } from '../mcp/officialRegistry.js'
|
|
||||||
import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js'
|
|
||||||
import { getRepoRemoteHash } from '../../utils/git.js'
|
|
||||||
import {
|
|
||||||
getWslVersion,
|
|
||||||
getLinuxDistroInfo,
|
|
||||||
detectVcs,
|
|
||||||
} from '../../utils/platform.js'
|
|
||||||
import type { CoreUserData } from 'src/utils/user.js'
|
|
||||||
import { getAgentContext } from '../../utils/agentContext.js'
|
|
||||||
import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
|
|
||||||
import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js'
|
|
||||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
||||||
import {
|
|
||||||
getAgentId,
|
|
||||||
getParentSessionId as getTeammateParentSessionId,
|
|
||||||
getTeamName,
|
|
||||||
isTeammate,
|
|
||||||
} from '../../utils/teammate.js'
|
|
||||||
import { feature } from 'bun:bundle'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Marker type for verifying analytics metadata doesn't contain sensitive data
|
* Local-only analytics helpers retained for compatibility after telemetry
|
||||||
*
|
* export removal. These helpers only sanitize or classify values in-process.
|
||||||
* This type forces explicit verification that string values being logged
|
|
||||||
* don't contain code snippets, file paths, or other sensitive information.
|
|
||||||
*
|
|
||||||
* The metadata is expected to be JSON-serializable.
|
|
||||||
*
|
|
||||||
* Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
|
|
||||||
*
|
|
||||||
* The type is `never` which means it can never actually hold a value - this is
|
|
||||||
* intentional as it's only used for type-casting to document developer intent.
|
|
||||||
*/
|
*/
|
||||||
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
|
|
||||||
|
|
||||||
/**
|
export type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS }
|
||||||
* Sanitizes tool names for analytics logging to avoid PII exposure.
|
|
||||||
*
|
|
||||||
* MCP tool names follow the format `mcp__<server>__<tool>` and can reveal
|
|
||||||
* user-specific server configurations, which is considered PII-medium.
|
|
||||||
* This function redacts MCP tool names while preserving built-in tool names
|
|
||||||
* (Bash, Read, Write, etc.) which are safe to log.
|
|
||||||
*
|
|
||||||
* @param toolName - The tool name to sanitize
|
|
||||||
* @returns The original name for built-in tools, or 'mcp_tool' for MCP tools
|
|
||||||
*/
|
|
||||||
export function sanitizeToolNameForAnalytics(
|
export function sanitizeToolNameForAnalytics(
|
||||||
toolName: string,
|
toolName: string,
|
||||||
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
|
||||||
@@ -76,103 +17,17 @@ export function sanitizeToolNameForAnalytics(
|
|||||||
return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if detailed tool name logging is enabled for OTLP events.
|
|
||||||
* When enabled, MCP server/tool names and Skill names are logged.
|
|
||||||
* Disabled by default to protect PII (user-specific server configurations).
|
|
||||||
*
|
|
||||||
* Enable with OTEL_LOG_TOOL_DETAILS=1
|
|
||||||
*/
|
|
||||||
export function isToolDetailsLoggingEnabled(): boolean {
|
export function isToolDetailsLoggingEnabled(): boolean {
|
||||||
return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if detailed tool name logging (MCP server/tool names) is enabled
|
|
||||||
* for analytics events.
|
|
||||||
*
|
|
||||||
* Per go/taxonomy, MCP names are medium PII. We log them for:
|
|
||||||
* - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs
|
|
||||||
* - claude.ai-proxied connectors — always official (from claude.ai's list)
|
|
||||||
* - Servers whose URL matches the official MCP registry — directory
|
|
||||||
* connectors added via `claude mcp add`, not customer-specific config
|
|
||||||
*
|
|
||||||
* Custom/user-configured MCPs stay sanitized (toolName='mcp_tool').
|
|
||||||
*/
|
|
||||||
export function isAnalyticsToolDetailsLoggingEnabled(
|
|
||||||
mcpServerType: string | undefined,
|
|
||||||
mcpServerBaseUrl: string | undefined,
|
|
||||||
): boolean {
|
|
||||||
if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if (mcpServerType === 'claudeai-proxy') {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
export function mcpToolDetailsForAnalytics(): {
|
||||||
* Built-in first-party MCP servers whose names are fixed reserved strings,
|
|
||||||
* not user-configured — so logging them is not PII. Checked in addition to
|
|
||||||
* isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio
|
|
||||||
* built-in would otherwise fail.
|
|
||||||
*
|
|
||||||
* Feature-gated so the set is empty when the feature is off: the name
|
|
||||||
* reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so
|
|
||||||
* a user-configured 'computer-use' is possible in builds without the feature.
|
|
||||||
*/
|
|
||||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
|
||||||
const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set(
|
|
||||||
feature('CHICAGO_MCP')
|
|
||||||
? [
|
|
||||||
(
|
|
||||||
require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
|
|
||||||
).COMPUTER_USE_MCP_SERVER_NAME,
|
|
||||||
]
|
|
||||||
: [],
|
|
||||||
)
|
|
||||||
/* eslint-enable @typescript-eslint/no-require-imports */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName}
|
|
||||||
* if the gate passes, empty object otherwise. Consolidates the identical IIFE
|
|
||||||
* pattern at each tengu_tool_use_* call site.
|
|
||||||
*/
|
|
||||||
export function mcpToolDetailsForAnalytics(
|
|
||||||
toolName: string,
|
|
||||||
mcpServerType: string | undefined,
|
|
||||||
mcpServerBaseUrl: string | undefined,
|
|
||||||
): {
|
|
||||||
mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
} {
|
} {
|
||||||
const details = extractMcpToolDetails(toolName)
|
return {}
|
||||||
if (!details) {
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
!BUILTIN_MCP_SERVER_NAMES.has(details.serverName) &&
|
|
||||||
!isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl)
|
|
||||||
) {
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
mcpServerName: details.serverName,
|
|
||||||
mcpToolName: details.mcpToolName,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract MCP server and tool names from a full MCP tool name.
|
|
||||||
* MCP tool names follow the format: mcp__<server>__<tool>
|
|
||||||
*
|
|
||||||
* @param toolName - The full tool name (e.g., 'mcp__slack__read_channel')
|
|
||||||
* @returns Object with serverName and toolName, or undefined if not an MCP tool
|
|
||||||
*/
|
|
||||||
export function extractMcpToolDetails(toolName: string):
|
export function extractMcpToolDetails(toolName: string):
|
||||||
| {
|
| {
|
||||||
serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
@@ -183,16 +38,13 @@ export function extractMcpToolDetails(toolName: string):
|
|||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format: mcp__<server>__<tool>
|
|
||||||
const parts = toolName.split('__')
|
const parts = toolName.split('__')
|
||||||
if (parts.length < 3) {
|
if (parts.length < 3) {
|
||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
const serverName = parts[1]
|
const serverName = parts[1]
|
||||||
// Tool name may contain __ so rejoin remaining parts
|
|
||||||
const mcpToolName = parts.slice(2).join('__')
|
const mcpToolName = parts.slice(2).join('__')
|
||||||
|
|
||||||
if (!serverName || !mcpToolName) {
|
if (!serverName || !mcpToolName) {
|
||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
@@ -205,13 +57,6 @@ export function extractMcpToolDetails(toolName: string):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract skill name from Skill tool input.
|
|
||||||
*
|
|
||||||
* @param toolName - The tool name (should be 'Skill')
|
|
||||||
* @param input - The tool input containing the skill name
|
|
||||||
* @returns The skill name if this is a Skill tool call, undefined otherwise
|
|
||||||
*/
|
|
||||||
export function extractSkillName(
|
export function extractSkillName(
|
||||||
toolName: string,
|
toolName: string,
|
||||||
input: unknown,
|
input: unknown,
|
||||||
@@ -233,93 +78,14 @@ export function extractSkillName(
|
|||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
const TOOL_INPUT_STRING_TRUNCATE_AT = 512
|
|
||||||
const TOOL_INPUT_STRING_TRUNCATE_TO = 128
|
|
||||||
const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024
|
|
||||||
const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20
|
|
||||||
const TOOL_INPUT_MAX_DEPTH = 2
|
|
||||||
|
|
||||||
function truncateToolInputValue(value: unknown, depth = 0): unknown {
|
|
||||||
if (typeof value === 'string') {
|
|
||||||
if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) {
|
|
||||||
return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]`
|
|
||||||
}
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
typeof value === 'number' ||
|
|
||||||
typeof value === 'boolean' ||
|
|
||||||
value === null ||
|
|
||||||
value === undefined
|
|
||||||
) {
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
if (depth >= TOOL_INPUT_MAX_DEPTH) {
|
|
||||||
return '<nested>'
|
|
||||||
}
|
|
||||||
if (Array.isArray(value)) {
|
|
||||||
const mapped = value
|
|
||||||
.slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
|
|
||||||
.map(v => truncateToolInputValue(v, depth + 1))
|
|
||||||
if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
|
|
||||||
mapped.push(`…[${value.length} items]`)
|
|
||||||
}
|
|
||||||
return mapped
|
|
||||||
}
|
|
||||||
if (typeof value === 'object') {
|
|
||||||
const entries = Object.entries(value as Record<string, unknown>)
|
|
||||||
// Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by
|
|
||||||
// SedEditPermissionRequest) so they don't leak into telemetry.
|
|
||||||
.filter(([k]) => !k.startsWith('_'))
|
|
||||||
const mapped = entries
|
|
||||||
.slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
|
|
||||||
.map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)])
|
|
||||||
if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
|
|
||||||
mapped.push(['…', `${entries.length} keys`])
|
|
||||||
}
|
|
||||||
return Object.fromEntries(mapped)
|
|
||||||
}
|
|
||||||
return String(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Serialize a tool's input arguments for the OTel tool_result event.
|
|
||||||
* Truncates long strings and deep nesting to keep the output bounded while
|
|
||||||
* preserving forensically useful fields like file paths, URLs, and MCP args.
|
|
||||||
* Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled.
|
|
||||||
*/
|
|
||||||
export function extractToolInputForTelemetry(
|
export function extractToolInputForTelemetry(
|
||||||
input: unknown,
|
_input: unknown,
|
||||||
): string | undefined {
|
): string | undefined {
|
||||||
if (!isToolDetailsLoggingEnabled()) {
|
return undefined
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
const truncated = truncateToolInputValue(input)
|
|
||||||
let json = jsonStringify(truncated)
|
|
||||||
if (json.length > TOOL_INPUT_MAX_JSON_CHARS) {
|
|
||||||
json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]'
|
|
||||||
}
|
|
||||||
return json
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Maximum length for file extensions to be logged.
|
|
||||||
* Extensions longer than this are considered potentially sensitive
|
|
||||||
* (e.g., hash-based filenames like "key-hash-abcd-123-456") and
|
|
||||||
* will be replaced with 'other'.
|
|
||||||
*/
|
|
||||||
const MAX_FILE_EXTENSION_LENGTH = 10
|
const MAX_FILE_EXTENSION_LENGTH = 10
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts and sanitizes a file extension for analytics logging.
|
|
||||||
*
|
|
||||||
* Uses Node's path.extname for reliable cross-platform extension extraction.
|
|
||||||
* Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid
|
|
||||||
* logging potentially sensitive data (like hash-based filenames).
|
|
||||||
*
|
|
||||||
* @param filePath - The file path to extract the extension from
|
|
||||||
* @returns The sanitized extension, 'other' for long extensions, or undefined if no extension
|
|
||||||
*/
|
|
||||||
export function getFileExtensionForAnalytics(
|
export function getFileExtensionForAnalytics(
|
||||||
filePath: string,
|
filePath: string,
|
||||||
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
||||||
@@ -328,7 +94,7 @@ export function getFileExtensionForAnalytics(
|
|||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
const extension = ext.slice(1) // remove leading dot
|
const extension = ext.slice(1)
|
||||||
if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
|
if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
|
||||||
return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
}
|
}
|
||||||
@@ -336,7 +102,6 @@ export function getFileExtensionForAnalytics(
|
|||||||
return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Allow list of commands we extract file extensions from. */
|
|
||||||
const FILE_COMMANDS = new Set([
|
const FILE_COMMANDS = new Set([
|
||||||
'rm',
|
'rm',
|
||||||
'mv',
|
'mv',
|
||||||
@@ -357,23 +122,16 @@ const FILE_COMMANDS = new Set([
|
|||||||
'sed',
|
'sed',
|
||||||
])
|
])
|
||||||
|
|
||||||
/** Regex to split bash commands on compound operators (&&, ||, ;, |). */
|
|
||||||
const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
|
const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
|
||||||
|
|
||||||
/** Regex to split on whitespace. */
|
|
||||||
const WHITESPACE_REGEX = /\s+/
|
const WHITESPACE_REGEX = /\s+/
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts file extensions from a bash command for analytics.
|
|
||||||
* Best-effort: splits on operators and whitespace, extracts extensions
|
|
||||||
* from non-flag args of allowed commands. No heavy shell parsing needed
|
|
||||||
* because grep patterns and sed scripts rarely resemble file extensions.
|
|
||||||
*/
|
|
||||||
export function getFileExtensionsFromBashCommand(
|
export function getFileExtensionsFromBashCommand(
|
||||||
command: string,
|
command: string,
|
||||||
simulatedSedEditFilePath?: string,
|
simulatedSedEditFilePath?: string,
|
||||||
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
||||||
if (!command.includes('.') && !simulatedSedEditFilePath) return undefined
|
if (!command.includes('.') && !simulatedSedEditFilePath) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
let result: string | undefined
|
let result: string | undefined
|
||||||
const seen = new Set<string>()
|
const seen = new Set<string>()
|
||||||
@@ -398,7 +156,7 @@ export function getFileExtensionsFromBashCommand(
|
|||||||
|
|
||||||
for (let i = 1; i < tokens.length; i++) {
|
for (let i = 1; i < tokens.length; i++) {
|
||||||
const arg = tokens[i]!
|
const arg = tokens[i]!
|
||||||
if (arg.charCodeAt(0) === 45 /* - */) continue
|
if (arg.charCodeAt(0) === 45) continue
|
||||||
const ext = getFileExtensionForAnalytics(arg)
|
const ext = getFileExtensionForAnalytics(arg)
|
||||||
if (ext && !seen.has(ext)) {
|
if (ext && !seen.has(ext)) {
|
||||||
seen.add(ext)
|
seen.add(ext)
|
||||||
@@ -407,567 +165,8 @@ export function getFileExtensionsFromBashCommand(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!result) return undefined
|
if (!result) {
|
||||||
return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Environment context metadata
|
|
||||||
*/
|
|
||||||
export type EnvContext = {
|
|
||||||
platform: string
|
|
||||||
platformRaw: string
|
|
||||||
arch: string
|
|
||||||
nodeVersion: string
|
|
||||||
terminal: string | null
|
|
||||||
packageManagers: string
|
|
||||||
runtimes: string
|
|
||||||
isRunningWithBun: boolean
|
|
||||||
isCi: boolean
|
|
||||||
isClaubbit: boolean
|
|
||||||
isClaudeCodeRemote: boolean
|
|
||||||
isLocalAgentMode: boolean
|
|
||||||
isConductor: boolean
|
|
||||||
remoteEnvironmentType?: string
|
|
||||||
coworkerType?: string
|
|
||||||
claudeCodeContainerId?: string
|
|
||||||
claudeCodeRemoteSessionId?: string
|
|
||||||
tags?: string
|
|
||||||
isGithubAction: boolean
|
|
||||||
isClaudeCodeAction: boolean
|
|
||||||
isClaudeAiAuth: boolean
|
|
||||||
version: string
|
|
||||||
versionBase?: string
|
|
||||||
buildTime: string
|
|
||||||
deploymentEnvironment: string
|
|
||||||
githubEventName?: string
|
|
||||||
githubActionsRunnerEnvironment?: string
|
|
||||||
githubActionsRunnerOs?: string
|
|
||||||
githubActionRef?: string
|
|
||||||
wslVersion?: string
|
|
||||||
linuxDistroId?: string
|
|
||||||
linuxDistroVersion?: string
|
|
||||||
linuxKernel?: string
|
|
||||||
vcs?: string
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Process metrics included with all analytics events.
|
|
||||||
*/
|
|
||||||
export type ProcessMetrics = {
|
|
||||||
uptime: number
|
|
||||||
rss: number
|
|
||||||
heapTotal: number
|
|
||||||
heapUsed: number
|
|
||||||
external: number
|
|
||||||
arrayBuffers: number
|
|
||||||
constrainedMemory: number | undefined
|
|
||||||
cpuUsage: NodeJS.CpuUsage
|
|
||||||
cpuPercent: number | undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Core event metadata shared across all analytics systems
|
|
||||||
*/
|
|
||||||
export type EventMetadata = {
|
|
||||||
model: string
|
|
||||||
sessionId: string
|
|
||||||
userType: string
|
|
||||||
betas?: string
|
|
||||||
envContext: EnvContext
|
|
||||||
entrypoint?: string
|
|
||||||
agentSdkVersion?: string
|
|
||||||
isInteractive: string
|
|
||||||
clientType: string
|
|
||||||
processMetrics?: ProcessMetrics
|
|
||||||
sweBenchRunId: string
|
|
||||||
sweBenchInstanceId: string
|
|
||||||
sweBenchTaskId: string
|
|
||||||
// Swarm/team agent identification for analytics attribution
|
|
||||||
agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID
|
|
||||||
parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session)
|
|
||||||
agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents
|
|
||||||
teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage)
|
|
||||||
subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team)
|
|
||||||
rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data
|
|
||||||
kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check)
|
|
||||||
skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation)
|
|
||||||
observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Options for enriching event metadata
|
|
||||||
*/
|
|
||||||
export type EnrichMetadataOptions = {
|
|
||||||
// Model to use, falls back to getMainLoopModel() if not provided
|
|
||||||
model?: unknown
|
|
||||||
// Explicit betas string (already joined)
|
|
||||||
betas?: unknown
|
|
||||||
// Additional metadata to include (optional)
|
|
||||||
additionalMetadata?: Record<string, unknown>
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get agent identification for analytics.
|
|
||||||
* Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
|
|
||||||
*/
|
|
||||||
function getAgentIdentification(): {
|
|
||||||
agentId?: string
|
|
||||||
parentSessionId?: string
|
|
||||||
agentType?: 'teammate' | 'subagent' | 'standalone'
|
|
||||||
teamName?: string
|
|
||||||
} {
|
|
||||||
// Check AsyncLocalStorage first (for subagents running in same process)
|
|
||||||
const agentContext = getAgentContext()
|
|
||||||
if (agentContext) {
|
|
||||||
const result: ReturnType<typeof getAgentIdentification> = {
|
|
||||||
agentId: agentContext.agentId,
|
|
||||||
parentSessionId: agentContext.parentSessionId,
|
|
||||||
agentType: agentContext.agentType,
|
|
||||||
}
|
|
||||||
if (agentContext.agentType === 'teammate') {
|
|
||||||
result.teamName = agentContext.teamName
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to swarm helpers (for swarm agents)
|
|
||||||
const agentId = getAgentId()
|
|
||||||
const parentSessionId = getTeammateParentSessionId()
|
|
||||||
const teamName = getTeamName()
|
|
||||||
const isSwarmAgent = isTeammate()
|
|
||||||
// For standalone agents (have agent ID but not a teammate), set agentType to 'standalone'
|
|
||||||
const agentType = isSwarmAgent
|
|
||||||
? ('teammate' as const)
|
|
||||||
: agentId
|
|
||||||
? ('standalone' as const)
|
|
||||||
: undefined
|
|
||||||
if (agentId || agentType || parentSessionId || teamName) {
|
|
||||||
return {
|
|
||||||
...(agentId ? { agentId } : {}),
|
|
||||||
...(agentType ? { agentType } : {}),
|
|
||||||
...(parentSessionId ? { parentSessionId } : {}),
|
|
||||||
...(teamName ? { teamName } : {}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check bootstrap state for parent session ID (e.g., plan mode -> implementation)
|
|
||||||
const stateParentSessionId = getParentSessionIdFromState()
|
|
||||||
if (stateParentSessionId) {
|
|
||||||
return { parentSessionId: stateParentSessionId }
|
|
||||||
}
|
|
||||||
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev"
|
|
||||||
*/
|
|
||||||
const getVersionBase = memoize((): string | undefined => {
|
|
||||||
const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/)
|
|
||||||
return match ? match[0] : undefined
|
|
||||||
})
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds the environment context object
|
|
||||||
*/
|
|
||||||
const buildEnvContext = memoize(async (): Promise<EnvContext> => {
|
|
||||||
const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([
|
|
||||||
env.getPackageManagers(),
|
|
||||||
env.getRuntimes(),
|
|
||||||
getLinuxDistroInfo(),
|
|
||||||
detectVcs(),
|
|
||||||
])
|
|
||||||
|
|
||||||
return {
|
|
||||||
platform: getHostPlatformForAnalytics(),
|
|
||||||
// Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ.
|
|
||||||
// getHostPlatformForAnalytics() buckets those into 'linux'; here we want
|
|
||||||
// the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote.
|
|
||||||
platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform,
|
|
||||||
arch: env.arch,
|
|
||||||
nodeVersion: env.nodeVersion,
|
|
||||||
terminal: envDynamic.terminal,
|
|
||||||
packageManagers: packageManagers.join(','),
|
|
||||||
runtimes: runtimes.join(','),
|
|
||||||
isRunningWithBun: env.isRunningWithBun(),
|
|
||||||
isCi: isEnvTruthy(process.env.CI),
|
|
||||||
isClaubbit: isEnvTruthy(process.env.CLAUBBIT),
|
|
||||||
isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE),
|
|
||||||
isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent',
|
|
||||||
isConductor: env.isConductor(),
|
|
||||||
...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && {
|
|
||||||
remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE,
|
|
||||||
}),
|
|
||||||
// Gated by feature flag to prevent leaking "coworkerType" string in external builds
|
|
||||||
...(feature('COWORKER_TYPE_TELEMETRY')
|
|
||||||
? process.env.CLAUDE_CODE_COWORKER_TYPE
|
|
||||||
? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE }
|
|
||||||
: {}
|
|
||||||
: {}),
|
|
||||||
...(process.env.CLAUDE_CODE_CONTAINER_ID && {
|
|
||||||
claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID,
|
|
||||||
}),
|
|
||||||
...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && {
|
|
||||||
claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID,
|
|
||||||
}),
|
|
||||||
...(process.env.CLAUDE_CODE_TAGS && {
|
|
||||||
tags: process.env.CLAUDE_CODE_TAGS,
|
|
||||||
}),
|
|
||||||
isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS),
|
|
||||||
isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION),
|
|
||||||
isClaudeAiAuth: isClaudeAISubscriber(),
|
|
||||||
version: MACRO.VERSION,
|
|
||||||
versionBase: getVersionBase(),
|
|
||||||
buildTime: MACRO.BUILD_TIME,
|
|
||||||
deploymentEnvironment: env.detectDeploymentEnvironment(),
|
|
||||||
...(isEnvTruthy(process.env.GITHUB_ACTIONS) && {
|
|
||||||
githubEventName: process.env.GITHUB_EVENT_NAME,
|
|
||||||
githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT,
|
|
||||||
githubActionsRunnerOs: process.env.RUNNER_OS,
|
|
||||||
githubActionRef: process.env.GITHUB_ACTION_PATH?.includes(
|
|
||||||
'claude-code-action/',
|
|
||||||
)
|
|
||||||
? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1]
|
|
||||||
: undefined,
|
|
||||||
}),
|
|
||||||
...(getWslVersion() && { wslVersion: getWslVersion() }),
|
|
||||||
...(linuxDistroInfo ?? {}),
|
|
||||||
...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// --
|
|
||||||
// CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts
|
|
||||||
let prevCpuUsage: NodeJS.CpuUsage | null = null
|
|
||||||
let prevWallTimeMs: number | null = null
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Builds process metrics object for all users.
|
|
||||||
*/
|
|
||||||
function buildProcessMetrics(): ProcessMetrics | undefined {
|
|
||||||
try {
|
|
||||||
const mem = process.memoryUsage()
|
|
||||||
const cpu = process.cpuUsage()
|
|
||||||
const now = Date.now()
|
|
||||||
|
|
||||||
let cpuPercent: number | undefined
|
|
||||||
if (prevCpuUsage && prevWallTimeMs) {
|
|
||||||
const wallDeltaMs = now - prevWallTimeMs
|
|
||||||
if (wallDeltaMs > 0) {
|
|
||||||
const userDeltaUs = cpu.user - prevCpuUsage.user
|
|
||||||
const systemDeltaUs = cpu.system - prevCpuUsage.system
|
|
||||||
cpuPercent =
|
|
||||||
((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100
|
|
||||||
}
|
|
||||||
}
|
|
||||||
prevCpuUsage = cpu
|
|
||||||
prevWallTimeMs = now
|
|
||||||
|
|
||||||
return {
|
|
||||||
uptime: process.uptime(),
|
|
||||||
rss: mem.rss,
|
|
||||||
heapTotal: mem.heapTotal,
|
|
||||||
heapUsed: mem.heapUsed,
|
|
||||||
external: mem.external,
|
|
||||||
arrayBuffers: mem.arrayBuffers,
|
|
||||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
|
||||||
constrainedMemory: process.constrainedMemory(),
|
|
||||||
cpuUsage: cpu,
|
|
||||||
cpuPercent,
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
}
|
return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||||
|
|
||||||
/**
|
|
||||||
* Get core event metadata shared across all analytics systems.
|
|
||||||
*
|
|
||||||
* This function collects environment, runtime, and context information
|
|
||||||
* that should be included with all analytics events.
|
|
||||||
*
|
|
||||||
* @param options - Configuration options
|
|
||||||
* @returns Promise resolving to enriched metadata object
|
|
||||||
*/
|
|
||||||
export async function getEventMetadata(
|
|
||||||
options: EnrichMetadataOptions = {},
|
|
||||||
): Promise<EventMetadata> {
|
|
||||||
const model = options.model ? String(options.model) : getMainLoopModel()
|
|
||||||
const betas =
|
|
||||||
typeof options.betas === 'string'
|
|
||||||
? options.betas
|
|
||||||
: getModelBetas(model).join(',')
|
|
||||||
const [envContext, repoRemoteHash] = await Promise.all([
|
|
||||||
buildEnvContext(),
|
|
||||||
getRepoRemoteHash(),
|
|
||||||
])
|
|
||||||
const processMetrics = buildProcessMetrics()
|
|
||||||
|
|
||||||
const metadata: EventMetadata = {
|
|
||||||
model,
|
|
||||||
sessionId: getSessionId(),
|
|
||||||
userType: process.env.USER_TYPE || '',
|
|
||||||
...(betas.length > 0 ? { betas: betas } : {}),
|
|
||||||
envContext,
|
|
||||||
...(process.env.CLAUDE_CODE_ENTRYPOINT && {
|
|
||||||
entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT,
|
|
||||||
}),
|
|
||||||
...(process.env.CLAUDE_AGENT_SDK_VERSION && {
|
|
||||||
agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION,
|
|
||||||
}),
|
|
||||||
isInteractive: String(getIsInteractive()),
|
|
||||||
clientType: getClientType(),
|
|
||||||
...(processMetrics && { processMetrics }),
|
|
||||||
sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '',
|
|
||||||
sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '',
|
|
||||||
sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '',
|
|
||||||
// Swarm/team agent identification
|
|
||||||
// Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
|
|
||||||
...getAgentIdentification(),
|
|
||||||
// Subscription tier for DAU-by-tier analytics
|
|
||||||
...(getSubscriptionType() && {
|
|
||||||
subscriptionType: getSubscriptionType()!,
|
|
||||||
}),
|
|
||||||
// Assistant mode tag — lives outside memoized buildEnvContext() because
|
|
||||||
// setKairosActive() runs at main.tsx:~1648, after the first event may
|
|
||||||
// have already fired and memoized the env. Read fresh per-event instead.
|
|
||||||
...(feature('KAIROS') && getKairosActive()
|
|
||||||
? { kairosActive: true as const }
|
|
||||||
: {}),
|
|
||||||
// Repo remote hash for joining with server-side repo bundle data
|
|
||||||
...(repoRemoteHash && { rh: repoRemoteHash }),
|
|
||||||
}
|
|
||||||
|
|
||||||
return metadata
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Core event metadata for 1P event logging (snake_case format).
|
|
||||||
*/
|
|
||||||
export type FirstPartyEventLoggingCoreMetadata = {
|
|
||||||
session_id: string
|
|
||||||
model: string
|
|
||||||
user_type: string
|
|
||||||
betas?: string
|
|
||||||
entrypoint?: string
|
|
||||||
agent_sdk_version?: string
|
|
||||||
is_interactive: boolean
|
|
||||||
client_type: string
|
|
||||||
swe_bench_run_id?: string
|
|
||||||
swe_bench_instance_id?: string
|
|
||||||
swe_bench_task_id?: string
|
|
||||||
// Swarm/team agent identification
|
|
||||||
agent_id?: string
|
|
||||||
parent_session_id?: string
|
|
||||||
agent_type?: 'teammate' | 'subagent' | 'standalone'
|
|
||||||
team_name?: string
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Complete event logging metadata format for 1P events.
|
|
||||||
*/
|
|
||||||
export type FirstPartyEventLoggingMetadata = {
|
|
||||||
env: EnvironmentMetadata
|
|
||||||
process?: string
|
|
||||||
// auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth).
|
|
||||||
// account_id is intentionally omitted — only UUID fields are populated client-side.
|
|
||||||
auth?: PublicApiAuth
|
|
||||||
// core fields correspond to the top level of ClaudeCodeInternalEvent.
|
|
||||||
// They get directly exported to their individual columns in the BigQuery tables
|
|
||||||
core: FirstPartyEventLoggingCoreMetadata
|
|
||||||
// additional fields are populated in the additional_metadata field of the
|
|
||||||
// ClaudeCodeInternalEvent proto. Includes but is not limited to information
|
|
||||||
// that differs by event type.
|
|
||||||
additional: Record<string, unknown>
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert metadata to 1P event logging format (snake_case fields).
|
|
||||||
*
|
|
||||||
* The /api/event_logging/batch endpoint expects snake_case field names
|
|
||||||
* for environment and core metadata.
|
|
||||||
*
|
|
||||||
* @param metadata - Core event metadata
|
|
||||||
* @param additionalMetadata - Additional metadata to include
|
|
||||||
* @returns Metadata formatted for 1P event logging
|
|
||||||
*/
|
|
||||||
export function to1PEventFormat(
|
|
||||||
metadata: EventMetadata,
|
|
||||||
userMetadata: CoreUserData,
|
|
||||||
additionalMetadata: Record<string, unknown> = {},
|
|
||||||
): FirstPartyEventLoggingMetadata {
|
|
||||||
const {
|
|
||||||
envContext,
|
|
||||||
processMetrics,
|
|
||||||
rh,
|
|
||||||
kairosActive,
|
|
||||||
skillMode,
|
|
||||||
observerMode,
|
|
||||||
...coreFields
|
|
||||||
} = metadata
|
|
||||||
|
|
||||||
// Convert envContext to snake_case.
|
|
||||||
// IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that
|
|
||||||
// adding a field here that the proto doesn't define is a compile error. The
|
|
||||||
// generated toJSON() serializer silently drops unknown keys — a hand-written
|
|
||||||
// parallel type previously let #11318, #13924, #19448, and coworker_type all
|
|
||||||
// ship fields that never reached BQ.
|
|
||||||
// Adding a field? Update the monorepo proto first (go/cc-logging):
|
|
||||||
// event_schemas/.../claude_code/v1/claude_code_internal_event.proto
|
|
||||||
// then run `bun run generate:proto` here.
|
|
||||||
const env: EnvironmentMetadata = {
|
|
||||||
platform: envContext.platform,
|
|
||||||
platform_raw: envContext.platformRaw,
|
|
||||||
arch: envContext.arch,
|
|
||||||
node_version: envContext.nodeVersion,
|
|
||||||
terminal: envContext.terminal || 'unknown',
|
|
||||||
package_managers: envContext.packageManagers,
|
|
||||||
runtimes: envContext.runtimes,
|
|
||||||
is_running_with_bun: envContext.isRunningWithBun,
|
|
||||||
is_ci: envContext.isCi,
|
|
||||||
is_claubbit: envContext.isClaubbit,
|
|
||||||
is_claude_code_remote: envContext.isClaudeCodeRemote,
|
|
||||||
is_local_agent_mode: envContext.isLocalAgentMode,
|
|
||||||
is_conductor: envContext.isConductor,
|
|
||||||
is_github_action: envContext.isGithubAction,
|
|
||||||
is_claude_code_action: envContext.isClaudeCodeAction,
|
|
||||||
is_claude_ai_auth: envContext.isClaudeAiAuth,
|
|
||||||
version: envContext.version,
|
|
||||||
build_time: envContext.buildTime,
|
|
||||||
deployment_environment: envContext.deploymentEnvironment,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add optional env fields
|
|
||||||
if (envContext.remoteEnvironmentType) {
|
|
||||||
env.remote_environment_type = envContext.remoteEnvironmentType
|
|
||||||
}
|
|
||||||
if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) {
|
|
||||||
env.coworker_type = envContext.coworkerType
|
|
||||||
}
|
|
||||||
if (envContext.claudeCodeContainerId) {
|
|
||||||
env.claude_code_container_id = envContext.claudeCodeContainerId
|
|
||||||
}
|
|
||||||
if (envContext.claudeCodeRemoteSessionId) {
|
|
||||||
env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId
|
|
||||||
}
|
|
||||||
if (envContext.tags) {
|
|
||||||
env.tags = envContext.tags
|
|
||||||
.split(',')
|
|
||||||
.map(t => t.trim())
|
|
||||||
.filter(Boolean)
|
|
||||||
}
|
|
||||||
if (envContext.githubEventName) {
|
|
||||||
env.github_event_name = envContext.githubEventName
|
|
||||||
}
|
|
||||||
if (envContext.githubActionsRunnerEnvironment) {
|
|
||||||
env.github_actions_runner_environment =
|
|
||||||
envContext.githubActionsRunnerEnvironment
|
|
||||||
}
|
|
||||||
if (envContext.githubActionsRunnerOs) {
|
|
||||||
env.github_actions_runner_os = envContext.githubActionsRunnerOs
|
|
||||||
}
|
|
||||||
if (envContext.githubActionRef) {
|
|
||||||
env.github_action_ref = envContext.githubActionRef
|
|
||||||
}
|
|
||||||
if (envContext.wslVersion) {
|
|
||||||
env.wsl_version = envContext.wslVersion
|
|
||||||
}
|
|
||||||
if (envContext.linuxDistroId) {
|
|
||||||
env.linux_distro_id = envContext.linuxDistroId
|
|
||||||
}
|
|
||||||
if (envContext.linuxDistroVersion) {
|
|
||||||
env.linux_distro_version = envContext.linuxDistroVersion
|
|
||||||
}
|
|
||||||
if (envContext.linuxKernel) {
|
|
||||||
env.linux_kernel = envContext.linuxKernel
|
|
||||||
}
|
|
||||||
if (envContext.vcs) {
|
|
||||||
env.vcs = envContext.vcs
|
|
||||||
}
|
|
||||||
if (envContext.versionBase) {
|
|
||||||
env.version_base = envContext.versionBase
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert core fields to snake_case
|
|
||||||
const core: FirstPartyEventLoggingCoreMetadata = {
|
|
||||||
session_id: coreFields.sessionId,
|
|
||||||
model: coreFields.model,
|
|
||||||
user_type: coreFields.userType,
|
|
||||||
is_interactive: coreFields.isInteractive === 'true',
|
|
||||||
client_type: coreFields.clientType,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add other core fields
|
|
||||||
if (coreFields.betas) {
|
|
||||||
core.betas = coreFields.betas
|
|
||||||
}
|
|
||||||
if (coreFields.entrypoint) {
|
|
||||||
core.entrypoint = coreFields.entrypoint
|
|
||||||
}
|
|
||||||
if (coreFields.agentSdkVersion) {
|
|
||||||
core.agent_sdk_version = coreFields.agentSdkVersion
|
|
||||||
}
|
|
||||||
if (coreFields.sweBenchRunId) {
|
|
||||||
core.swe_bench_run_id = coreFields.sweBenchRunId
|
|
||||||
}
|
|
||||||
if (coreFields.sweBenchInstanceId) {
|
|
||||||
core.swe_bench_instance_id = coreFields.sweBenchInstanceId
|
|
||||||
}
|
|
||||||
if (coreFields.sweBenchTaskId) {
|
|
||||||
core.swe_bench_task_id = coreFields.sweBenchTaskId
|
|
||||||
}
|
|
||||||
// Swarm/team agent identification
|
|
||||||
if (coreFields.agentId) {
|
|
||||||
core.agent_id = coreFields.agentId
|
|
||||||
}
|
|
||||||
if (coreFields.parentSessionId) {
|
|
||||||
core.parent_session_id = coreFields.parentSessionId
|
|
||||||
}
|
|
||||||
if (coreFields.agentType) {
|
|
||||||
core.agent_type = coreFields.agentType
|
|
||||||
}
|
|
||||||
if (coreFields.teamName) {
|
|
||||||
core.team_name = coreFields.teamName
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map userMetadata to output fields.
|
|
||||||
// Based on src/utils/user.ts getUser(), but with fields present in other
|
|
||||||
// parts of ClaudeCodeInternalEvent deduplicated.
|
|
||||||
// Convert camelCase GitHubActionsMetadata to snake_case for 1P API
|
|
||||||
// Note: github_actions_metadata is placed inside env (EnvironmentMetadata)
|
|
||||||
// rather than at the top level of ClaudeCodeInternalEvent
|
|
||||||
if (userMetadata.githubActionsMetadata) {
|
|
||||||
const ghMeta = userMetadata.githubActionsMetadata
|
|
||||||
env.github_actions_metadata = {
|
|
||||||
actor_id: ghMeta.actorId,
|
|
||||||
repository_id: ghMeta.repositoryId,
|
|
||||||
repository_owner_id: ghMeta.repositoryOwnerId,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let auth: PublicApiAuth | undefined
|
|
||||||
if (userMetadata.accountUuid || userMetadata.organizationUuid) {
|
|
||||||
auth = {
|
|
||||||
account_uuid: userMetadata.accountUuid,
|
|
||||||
organization_uuid: userMetadata.organizationUuid,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
env,
|
|
||||||
...(processMetrics && {
|
|
||||||
process: Buffer.from(jsonStringify(processMetrics)).toString('base64'),
|
|
||||||
}),
|
|
||||||
...(auth && { auth }),
|
|
||||||
core,
|
|
||||||
additional: {
|
|
||||||
...(rh && { rh }),
|
|
||||||
...(kairosActive && { is_assistant_mode: true }),
|
|
||||||
...(skillMode && { skill_mode: skillMode }),
|
|
||||||
...(observerMode && { observer_mode: observerMode }),
|
|
||||||
...additionalMetadata,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,111 +1,28 @@
|
|||||||
/**
|
/**
|
||||||
* Analytics sink implementation
|
* Analytics sink implementation
|
||||||
*
|
*
|
||||||
* This module contains the actual analytics routing logic and should be
|
* This open build keeps the analytics sink boundary for compatibility, but
|
||||||
* initialized during app startup. It routes events to Datadog and 1P event
|
* drops all queued analytics events locally instead of routing them onward.
|
||||||
* logging.
|
|
||||||
*
|
|
||||||
* Usage: Call initializeAnalyticsSink() during app startup to attach the sink.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { trackDatadogEvent } from './datadog.js'
|
import { attachAnalyticsSink } from './index.js'
|
||||||
import { logEventTo1P, shouldSampleEvent } from './firstPartyEventLogger.js'
|
|
||||||
import { checkStatsigFeatureGate_CACHED_MAY_BE_STALE } from './growthbook.js'
|
|
||||||
import { attachAnalyticsSink, stripProtoFields } from './index.js'
|
|
||||||
import { isSinkKilled } from './sinkKillswitch.js'
|
|
||||||
|
|
||||||
// Local type matching the logEvent metadata signature
|
|
||||||
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
||||||
|
|
||||||
const DATADOG_GATE_NAME = 'tengu_log_datadog_events'
|
function logEventImpl(
|
||||||
|
_eventName: string,
|
||||||
// Module-level gate state - starts undefined, initialized during startup
|
_metadata: LogEventMetadata,
|
||||||
let isDatadogGateEnabled: boolean | undefined = undefined
|
): void {
|
||||||
|
return
|
||||||
/**
|
|
||||||
* Check if Datadog tracking is enabled.
|
|
||||||
* Falls back to cached value from previous session if not yet initialized.
|
|
||||||
*/
|
|
||||||
function shouldTrackDatadog(): boolean {
|
|
||||||
if (isSinkKilled('datadog')) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if (isDatadogGateEnabled !== undefined) {
|
|
||||||
return isDatadogGateEnabled
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to cached value from previous session
|
|
||||||
try {
|
|
||||||
return checkStatsigFeatureGate_CACHED_MAY_BE_STALE(DATADOG_GATE_NAME)
|
|
||||||
} catch {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Log an event (synchronous implementation)
|
|
||||||
*/
|
|
||||||
function logEventImpl(eventName: string, metadata: LogEventMetadata): void {
|
|
||||||
// Check if this event should be sampled
|
|
||||||
const sampleResult = shouldSampleEvent(eventName)
|
|
||||||
|
|
||||||
// If sample result is 0, the event was not selected for logging
|
|
||||||
if (sampleResult === 0) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If sample result is a positive number, add it to metadata
|
|
||||||
const metadataWithSampleRate =
|
|
||||||
sampleResult !== null
|
|
||||||
? { ...metadata, sample_rate: sampleResult }
|
|
||||||
: metadata
|
|
||||||
|
|
||||||
if (shouldTrackDatadog()) {
|
|
||||||
// Datadog is a general-access backend — strip _PROTO_* keys
|
|
||||||
// (unredacted PII-tagged values meant only for the 1P privileged column).
|
|
||||||
void trackDatadogEvent(eventName, stripProtoFields(metadataWithSampleRate))
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1P receives the full payload including _PROTO_* — the exporter
|
|
||||||
// destructures and routes those keys to proto fields itself.
|
|
||||||
logEventTo1P(eventName, metadataWithSampleRate)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log an event (asynchronous implementation)
|
|
||||||
*
|
|
||||||
* With Segment removed the two remaining sinks are fire-and-forget, so this
|
|
||||||
* just wraps the sync impl — kept to preserve the sink interface contract.
|
|
||||||
*/
|
|
||||||
function logEventAsyncImpl(
|
function logEventAsyncImpl(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
metadata: LogEventMetadata,
|
_metadata: LogEventMetadata,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
logEventImpl(eventName, metadata)
|
|
||||||
return Promise.resolve()
|
return Promise.resolve()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize analytics gates during startup.
|
|
||||||
*
|
|
||||||
* Updates gate values from server. Early events use cached values from previous
|
|
||||||
* session to avoid data loss during initialization.
|
|
||||||
*
|
|
||||||
* Called from main.tsx during setupBackend().
|
|
||||||
*/
|
|
||||||
export function initializeAnalyticsGates(): void {
|
|
||||||
isDatadogGateEnabled =
|
|
||||||
checkStatsigFeatureGate_CACHED_MAY_BE_STALE(DATADOG_GATE_NAME)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize the analytics sink.
|
|
||||||
*
|
|
||||||
* Call this during app startup to attach the analytics backend.
|
|
||||||
* Any events logged before this is called will be queued and drained.
|
|
||||||
*
|
|
||||||
* Idempotent: safe to call multiple times (subsequent calls are no-ops).
|
|
||||||
*/
|
|
||||||
export function initializeAnalyticsSink(): void {
|
export function initializeAnalyticsSink(): void {
|
||||||
attachAnalyticsSink({
|
attachAnalyticsSink({
|
||||||
logEvent: logEventImpl,
|
logEvent: logEventImpl,
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
import { getDynamicConfig_CACHED_MAY_BE_STALE } from './growthbook.js'
|
|
||||||
|
|
||||||
// Mangled name: per-sink analytics killswitch
|
|
||||||
const SINK_KILLSWITCH_CONFIG_NAME = 'tengu_frond_boric'
|
|
||||||
|
|
||||||
export type SinkName = 'datadog' | 'firstParty'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GrowthBook JSON config that disables individual analytics sinks.
|
|
||||||
* Shape: { datadog?: boolean, firstParty?: boolean }
|
|
||||||
* A value of true for a key stops all dispatch to that sink.
|
|
||||||
* Default {} (nothing killed). Fail-open: missing/malformed config = sink stays on.
|
|
||||||
*
|
|
||||||
* NOTE: Must NOT be called from inside is1PEventLoggingEnabled() -
|
|
||||||
* growthbook.ts:isGrowthBookEnabled() calls that, so a lookup here would recurse.
|
|
||||||
* Call at per-event dispatch sites instead.
|
|
||||||
*/
|
|
||||||
export function isSinkKilled(sink: SinkName): boolean {
|
|
||||||
const config = getDynamicConfig_CACHED_MAY_BE_STALE<
|
|
||||||
Partial<Record<SinkName, boolean>>
|
|
||||||
>(SINK_KILLSWITCH_CONFIG_NAME, {})
|
|
||||||
// getFeatureValue_CACHED_MAY_BE_STALE guards on `!== undefined`, so a
|
|
||||||
// cached JSON null leaks through instead of falling back to {}.
|
|
||||||
return config?.[sink] === true
|
|
||||||
}
|
|
||||||
@@ -1,159 +0,0 @@
|
|||||||
import axios from 'axios'
|
|
||||||
import { hasProfileScope, isClaudeAISubscriber } from '../../utils/auth.js'
|
|
||||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
|
||||||
import { logForDebugging } from '../../utils/debug.js'
|
|
||||||
import { errorMessage } from '../../utils/errors.js'
|
|
||||||
import { getAuthHeaders, withOAuth401Retry } from '../../utils/http.js'
|
|
||||||
import { logError } from '../../utils/log.js'
|
|
||||||
import { memoizeWithTTLAsync } from '../../utils/memoize.js'
|
|
||||||
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
|
||||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
|
||||||
|
|
||||||
type MetricsEnabledResponse = {
|
|
||||||
metrics_logging_enabled: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
type MetricsStatus = {
|
|
||||||
enabled: boolean
|
|
||||||
hasError: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
// In-memory TTL — dedupes calls within a single process
|
|
||||||
const CACHE_TTL_MS = 60 * 60 * 1000
|
|
||||||
|
|
||||||
// Disk TTL — org settings rarely change. When disk cache is fresher than this,
|
|
||||||
// we skip the network entirely (no background refresh). This is what collapses
|
|
||||||
// N `claude -p` invocations into ~1 API call/day.
|
|
||||||
const DISK_CACHE_TTL_MS = 24 * 60 * 60 * 1000
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal function to call the API and check if metrics are enabled
|
|
||||||
* This is wrapped by memoizeWithTTLAsync to add caching behavior
|
|
||||||
*/
|
|
||||||
async function _fetchMetricsEnabled(): Promise<MetricsEnabledResponse> {
|
|
||||||
const authResult = getAuthHeaders()
|
|
||||||
if (authResult.error) {
|
|
||||||
throw new Error(`Auth error: ${authResult.error}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
const headers = {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'User-Agent': getClaudeCodeUserAgent(),
|
|
||||||
...authResult.headers,
|
|
||||||
}
|
|
||||||
|
|
||||||
const endpoint = `https://api.anthropic.com/api/claude_code/organizations/metrics_enabled`
|
|
||||||
const response = await axios.get<MetricsEnabledResponse>(endpoint, {
|
|
||||||
headers,
|
|
||||||
timeout: 5000,
|
|
||||||
})
|
|
||||||
return response.data
|
|
||||||
}
|
|
||||||
|
|
||||||
async function _checkMetricsEnabledAPI(): Promise<MetricsStatus> {
|
|
||||||
// Incident kill switch: skip the network call when nonessential traffic is disabled.
|
|
||||||
// Returning enabled:false sheds load at the consumer (bigqueryExporter skips
|
|
||||||
// export). Matches the non-subscriber early-return shape below.
|
|
||||||
if (isEssentialTrafficOnly()) {
|
|
||||||
return { enabled: false, hasError: false }
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const data = await withOAuth401Retry(_fetchMetricsEnabled, {
|
|
||||||
also403Revoked: true,
|
|
||||||
})
|
|
||||||
|
|
||||||
logForDebugging(
|
|
||||||
`Metrics opt-out API response: enabled=${data.metrics_logging_enabled}`,
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
enabled: data.metrics_logging_enabled,
|
|
||||||
hasError: false,
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logForDebugging(
|
|
||||||
`Failed to check metrics opt-out status: ${errorMessage(error)}`,
|
|
||||||
)
|
|
||||||
logError(error)
|
|
||||||
return { enabled: false, hasError: true }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create memoized version with custom error handling
|
|
||||||
const memoizedCheckMetrics = memoizeWithTTLAsync(
|
|
||||||
_checkMetricsEnabledAPI,
|
|
||||||
CACHE_TTL_MS,
|
|
||||||
)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fetch (in-memory memoized) and persist to disk on change.
|
|
||||||
* Errors are not persisted — a transient failure should not overwrite a
|
|
||||||
* known-good disk value.
|
|
||||||
*/
|
|
||||||
async function refreshMetricsStatus(): Promise<MetricsStatus> {
|
|
||||||
const result = await memoizedCheckMetrics()
|
|
||||||
if (result.hasError) {
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
const cached = getGlobalConfig().metricsStatusCache
|
|
||||||
const unchanged = cached !== undefined && cached.enabled === result.enabled
|
|
||||||
// Skip write when unchanged AND timestamp still fresh — avoids config churn
|
|
||||||
// when concurrent callers race past a stale disk entry and all try to write.
|
|
||||||
if (unchanged && Date.now() - cached.timestamp < DISK_CACHE_TTL_MS) {
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
saveGlobalConfig(current => ({
|
|
||||||
...current,
|
|
||||||
metricsStatusCache: {
|
|
||||||
enabled: result.enabled,
|
|
||||||
timestamp: Date.now(),
|
|
||||||
},
|
|
||||||
}))
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if metrics are enabled for the current organization.
|
|
||||||
*
|
|
||||||
* Two-tier cache:
|
|
||||||
* - Disk (24h TTL): survives process restarts. Fresh disk cache → zero network.
|
|
||||||
* - In-memory (1h TTL): dedupes the background refresh within a process.
|
|
||||||
*
|
|
||||||
* The caller (bigqueryExporter) tolerates stale reads — a missed export or
|
|
||||||
* an extra one during the 24h window is acceptable.
|
|
||||||
*/
|
|
||||||
export async function checkMetricsEnabled(): Promise<MetricsStatus> {
|
|
||||||
// Service key OAuth sessions lack user:profile scope → would 403.
|
|
||||||
// API key users (non-subscribers) fall through and use x-api-key auth.
|
|
||||||
// This check runs before the disk read so we never persist auth-state-derived
|
|
||||||
// answers — only real API responses go to disk. Otherwise a service-key
|
|
||||||
// session would poison the cache for a later full-OAuth session.
|
|
||||||
if (isClaudeAISubscriber() && !hasProfileScope()) {
|
|
||||||
return { enabled: false, hasError: false }
|
|
||||||
}
|
|
||||||
|
|
||||||
const cached = getGlobalConfig().metricsStatusCache
|
|
||||||
if (cached) {
|
|
||||||
if (Date.now() - cached.timestamp > DISK_CACHE_TTL_MS) {
|
|
||||||
// saveGlobalConfig's fallback path (config.ts:731) can throw if both
|
|
||||||
// locked and fallback writes fail — catch here so fire-and-forget
|
|
||||||
// doesn't become an unhandled rejection.
|
|
||||||
void refreshMetricsStatus().catch(logError)
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
enabled: cached.enabled,
|
|
||||||
hasError: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// First-ever run on this machine: block on the network to populate disk.
|
|
||||||
return refreshMetricsStatus()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Export for testing purposes only
|
|
||||||
export const _clearMetricsEnabledCacheForTesting = (): void => {
|
|
||||||
memoizedCheckMetrics.cache.clear()
|
|
||||||
}
|
|
||||||
@@ -1,105 +1,33 @@
|
|||||||
/**
|
/**
|
||||||
* Beta Session Tracing for Claude Code
|
* Detailed beta tracing egress is disabled in this build.
|
||||||
*
|
*
|
||||||
* This module contains beta tracing features enabled when
|
* The exported helpers remain for compile-time compatibility, but do not
|
||||||
* ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set.
|
* retain tracing state or emit tracing attributes.
|
||||||
*
|
|
||||||
* For external users, tracing is enabled in SDK/headless mode, or in
|
|
||||||
* interactive mode when the org is allowlisted via the
|
|
||||||
* tengu_trace_lantern GrowthBook gate.
|
|
||||||
* For ant users, tracing is enabled in all modes.
|
|
||||||
*
|
|
||||||
* Visibility Rules:
|
|
||||||
* | Content | External | Ant |
|
|
||||||
* |------------------|----------|------|
|
|
||||||
* | System prompts | ✅ | ✅ |
|
|
||||||
* | Model output | ✅ | ✅ |
|
|
||||||
* | Thinking output | ❌ | ✅ |
|
|
||||||
* | Tools | ✅ | ✅ |
|
|
||||||
* | new_context | ✅ | ✅ |
|
|
||||||
*
|
|
||||||
* Features:
|
|
||||||
* - Per-agent message tracking with hash-based deduplication
|
|
||||||
* - System prompt logging (once per unique hash)
|
|
||||||
* - Hook execution spans
|
|
||||||
* - Detailed new_context attributes for LLM requests
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import type { Span } from '@opentelemetry/api'
|
type AttributeValue = string | number | boolean
|
||||||
import { createHash } from 'crypto'
|
|
||||||
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
|
||||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
|
|
||||||
import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js'
|
|
||||||
import type { AssistantMessage, UserMessage } from '../../types/message.js'
|
|
||||||
import { isEnvTruthy } from '../envUtils.js'
|
|
||||||
import { jsonParse, jsonStringify } from '../slowOperations.js'
|
|
||||||
import { logOTelEvent } from './events.js'
|
|
||||||
|
|
||||||
// Message type for API calls (UserMessage or AssistantMessage)
|
export interface SpanAttributeWriter {
|
||||||
type APIMessage = UserMessage | AssistantMessage
|
setAttribute?(_key: string, _value: AttributeValue): void
|
||||||
|
setAttributes?(_attributes: Record<string, AttributeValue>): void
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
export interface LLMRequestNewContext {
|
||||||
* Track hashes we've already logged this session (system prompts, tools, etc).
|
systemPrompt?: string
|
||||||
*
|
querySource?: string
|
||||||
* WHY: System prompts and tool schemas are large and rarely change within a session.
|
tools?: string
|
||||||
* Sending full content on every request would be wasteful. Instead, we hash and
|
}
|
||||||
* only log the full content once per unique hash.
|
|
||||||
*/
|
|
||||||
const seenHashes = new Set<string>()
|
|
||||||
|
|
||||||
/**
|
const MAX_CONTENT_SIZE = 60 * 1024
|
||||||
* Track the last reported message hash per querySource (agent) for incremental context.
|
|
||||||
*
|
|
||||||
* WHY: When debugging traces, we want to see what NEW information was added each turn,
|
|
||||||
* not the entire conversation history (which can be huge). By tracking the last message
|
|
||||||
* we reported per agent, we can compute and send only the delta (new messages since
|
|
||||||
* the last request). This is tracked per-agent (querySource) because different agents
|
|
||||||
* (main thread, subagents, warmup requests) have independent conversation contexts.
|
|
||||||
*/
|
|
||||||
const lastReportedMessageHash = new Map<string, string>()
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clear tracking state after compaction.
|
|
||||||
* Old hashes are irrelevant once messages have been replaced.
|
|
||||||
*/
|
|
||||||
export function clearBetaTracingState(): void {
|
export function clearBetaTracingState(): void {
|
||||||
seenHashes.clear()
|
return
|
||||||
lastReportedMessageHash.clear()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if beta detailed tracing is enabled.
|
|
||||||
* - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
|
|
||||||
* - For external users, enabled in SDK/headless mode OR when org is
|
|
||||||
* allowlisted via the tengu_trace_lantern GrowthBook gate
|
|
||||||
*/
|
|
||||||
export function isBetaTracingEnabled(): boolean {
|
export function isBetaTracingEnabled(): boolean {
|
||||||
const baseEnabled =
|
return false
|
||||||
isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) &&
|
|
||||||
Boolean(process.env.BETA_TRACING_ENDPOINT)
|
|
||||||
|
|
||||||
if (!baseEnabled) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// For external users, enable in SDK/headless mode OR when org is allowlisted.
|
|
||||||
// Gate reads from disk cache, so first run after allowlisting returns false;
|
|
||||||
// works from second run onward (same behavior as enhanced_telemetry_beta).
|
|
||||||
if (process.env.USER_TYPE !== 'ant') {
|
|
||||||
return (
|
|
||||||
getIsNonInteractiveSession() ||
|
|
||||||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Truncate content to fit within Honeycomb limits.
|
|
||||||
*/
|
|
||||||
export function truncateContent(
|
export function truncateContent(
|
||||||
content: string,
|
content: string,
|
||||||
maxSize: number = MAX_CONTENT_SIZE,
|
maxSize: number = MAX_CONTENT_SIZE,
|
||||||
@@ -116,376 +44,43 @@ export function truncateContent(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a short hash (first 12 hex chars of SHA-256).
|
|
||||||
*/
|
|
||||||
function shortHash(content: string): string {
|
|
||||||
return createHash('sha256').update(content).digest('hex').slice(0, 12)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a hash for a system prompt.
|
|
||||||
*/
|
|
||||||
function hashSystemPrompt(systemPrompt: string): string {
|
|
||||||
return `sp_${shortHash(systemPrompt)}`
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a hash for a message based on its content.
|
|
||||||
*/
|
|
||||||
function hashMessage(message: APIMessage): string {
|
|
||||||
const content = jsonStringify(message.message.content)
|
|
||||||
return `msg_${shortHash(content)}`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Regex to detect content wrapped in <system-reminder> tags
|
|
||||||
const SYSTEM_REMINDER_REGEX =
|
|
||||||
/^<system-reminder>\n?([\s\S]*?)\n?<\/system-reminder>$/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if text is entirely a system reminder (wrapped in <system-reminder> tags).
|
|
||||||
* Returns the inner content if it is, null otherwise.
|
|
||||||
*/
|
|
||||||
function extractSystemReminderContent(text: string): string | null {
|
|
||||||
const match = text.trim().match(SYSTEM_REMINDER_REGEX)
|
|
||||||
return match && match[1] ? match[1].trim() : null
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Result of formatting messages - separates regular content from system reminders.
|
|
||||||
*/
|
|
||||||
interface FormattedMessages {
|
|
||||||
contextParts: string[]
|
|
||||||
systemReminders: string[]
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format user messages for new_context display, separating system reminders.
|
|
||||||
* Only handles user messages (assistant messages are filtered out before this is called).
|
|
||||||
*/
|
|
||||||
function formatMessagesForContext(messages: UserMessage[]): FormattedMessages {
|
|
||||||
const contextParts: string[] = []
|
|
||||||
const systemReminders: string[] = []
|
|
||||||
|
|
||||||
for (const message of messages) {
|
|
||||||
const content = message.message.content
|
|
||||||
if (typeof content === 'string') {
|
|
||||||
const reminderContent = extractSystemReminderContent(content)
|
|
||||||
if (reminderContent) {
|
|
||||||
systemReminders.push(reminderContent)
|
|
||||||
} else {
|
|
||||||
contextParts.push(`[USER]\n${content}`)
|
|
||||||
}
|
|
||||||
} else if (Array.isArray(content)) {
|
|
||||||
for (const block of content) {
|
|
||||||
if (block.type === 'text') {
|
|
||||||
const reminderContent = extractSystemReminderContent(block.text)
|
|
||||||
if (reminderContent) {
|
|
||||||
systemReminders.push(reminderContent)
|
|
||||||
} else {
|
|
||||||
contextParts.push(`[USER]\n${block.text}`)
|
|
||||||
}
|
|
||||||
} else if (block.type === 'tool_result') {
|
|
||||||
const resultContent =
|
|
||||||
typeof block.content === 'string'
|
|
||||||
? block.content
|
|
||||||
: jsonStringify(block.content)
|
|
||||||
// Tool results can also contain system reminders (e.g., malware warning)
|
|
||||||
const reminderContent = extractSystemReminderContent(resultContent)
|
|
||||||
if (reminderContent) {
|
|
||||||
systemReminders.push(reminderContent)
|
|
||||||
} else {
|
|
||||||
contextParts.push(
|
|
||||||
`[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return { contextParts, systemReminders }
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface LLMRequestNewContext {
|
|
||||||
/** System prompt (typically only on first request or if changed) */
|
|
||||||
systemPrompt?: string
|
|
||||||
/** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */
|
|
||||||
querySource?: string
|
|
||||||
/** Tool schemas sent with the request */
|
|
||||||
tools?: string
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add beta attributes to an interaction span.
|
|
||||||
* Adds new_context with the user prompt.
|
|
||||||
*/
|
|
||||||
export function addBetaInteractionAttributes(
|
export function addBetaInteractionAttributes(
|
||||||
span: Span,
|
_span: SpanAttributeWriter,
|
||||||
userPrompt: string,
|
_userPrompt: string,
|
||||||
): void {
|
): void {
|
||||||
if (!isBetaTracingEnabled()) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const { content: truncatedPrompt, truncated } = truncateContent(
|
|
||||||
`[USER PROMPT]\n${userPrompt}`,
|
|
||||||
)
|
|
||||||
span.setAttributes({
|
|
||||||
new_context: truncatedPrompt,
|
|
||||||
...(truncated && {
|
|
||||||
new_context_truncated: true,
|
|
||||||
new_context_original_length: userPrompt.length,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Add beta attributes to an LLM request span.
|
|
||||||
* Handles system prompt logging and new_context computation.
|
|
||||||
*/
|
|
||||||
export function addBetaLLMRequestAttributes(
|
export function addBetaLLMRequestAttributes(
|
||||||
span: Span,
|
_span: SpanAttributeWriter,
|
||||||
newContext?: LLMRequestNewContext,
|
_newContext?: LLMRequestNewContext,
|
||||||
messagesForAPI?: APIMessage[],
|
_messagesForAPI?: unknown[],
|
||||||
): void {
|
): void {
|
||||||
if (!isBetaTracingEnabled()) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add system prompt info to the span
|
|
||||||
if (newContext?.systemPrompt) {
|
|
||||||
const promptHash = hashSystemPrompt(newContext.systemPrompt)
|
|
||||||
const preview = newContext.systemPrompt.slice(0, 500)
|
|
||||||
|
|
||||||
// Always add hash, preview, and length to the span
|
|
||||||
span.setAttribute('system_prompt_hash', promptHash)
|
|
||||||
span.setAttribute('system_prompt_preview', preview)
|
|
||||||
span.setAttribute('system_prompt_length', newContext.systemPrompt.length)
|
|
||||||
|
|
||||||
// Log the full system prompt only once per unique hash this session
|
|
||||||
if (!seenHashes.has(promptHash)) {
|
|
||||||
seenHashes.add(promptHash)
|
|
||||||
|
|
||||||
// Truncate for the log if needed
|
|
||||||
const { content: truncatedPrompt, truncated } = truncateContent(
|
|
||||||
newContext.systemPrompt,
|
|
||||||
)
|
|
||||||
|
|
||||||
void logOTelEvent('system_prompt', {
|
|
||||||
system_prompt_hash: promptHash,
|
|
||||||
system_prompt: truncatedPrompt,
|
|
||||||
system_prompt_length: String(newContext.systemPrompt.length),
|
|
||||||
...(truncated && { system_prompt_truncated: 'true' }),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add tools info to the span
|
|
||||||
if (newContext?.tools) {
|
|
||||||
try {
|
|
||||||
const toolsArray = jsonParse(newContext.tools) as Record<
|
|
||||||
string,
|
|
||||||
unknown
|
|
||||||
>[]
|
|
||||||
|
|
||||||
// Build array of {name, hash} for each tool
|
|
||||||
const toolsWithHashes = toolsArray.map(tool => {
|
|
||||||
const toolJson = jsonStringify(tool)
|
|
||||||
const toolHash = shortHash(toolJson)
|
|
||||||
return {
|
|
||||||
name: typeof tool.name === 'string' ? tool.name : 'unknown',
|
|
||||||
hash: toolHash,
|
|
||||||
json: toolJson,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Set span attribute with array of name/hash pairs
|
|
||||||
span.setAttribute(
|
|
||||||
'tools',
|
|
||||||
jsonStringify(
|
|
||||||
toolsWithHashes.map(({ name, hash }) => ({ name, hash })),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
span.setAttribute('tools_count', toolsWithHashes.length)
|
|
||||||
|
|
||||||
// Log each tool's full description once per unique hash
|
|
||||||
for (const { name, hash, json } of toolsWithHashes) {
|
|
||||||
if (!seenHashes.has(`tool_${hash}`)) {
|
|
||||||
seenHashes.add(`tool_${hash}`)
|
|
||||||
|
|
||||||
const { content: truncatedTool, truncated } = truncateContent(json)
|
|
||||||
|
|
||||||
void logOTelEvent('tool', {
|
|
||||||
tool_name: sanitizeToolNameForAnalytics(name),
|
|
||||||
tool_hash: hash,
|
|
||||||
tool: truncatedTool,
|
|
||||||
...(truncated && { tool_truncated: 'true' }),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// If parsing fails, log the raw tools string
|
|
||||||
span.setAttribute('tools_parse_error', true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add new_context using hash-based tracking (visible to all users)
|
|
||||||
if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) {
|
|
||||||
const querySource = newContext.querySource
|
|
||||||
const lastHash = lastReportedMessageHash.get(querySource)
|
|
||||||
|
|
||||||
// Find where the last reported message is in the array
|
|
||||||
let startIndex = 0
|
|
||||||
if (lastHash) {
|
|
||||||
for (let i = 0; i < messagesForAPI.length; i++) {
|
|
||||||
const msg = messagesForAPI[i]
|
|
||||||
if (msg && hashMessage(msg) === lastHash) {
|
|
||||||
startIndex = i + 1 // Start after the last reported message
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If lastHash not found, startIndex stays 0 (send everything)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get new messages (filter out assistant messages - we only want user input/tool results)
|
|
||||||
const newMessages = messagesForAPI
|
|
||||||
.slice(startIndex)
|
|
||||||
.filter((m): m is UserMessage => m.type === 'user')
|
|
||||||
|
|
||||||
if (newMessages.length > 0) {
|
|
||||||
// Format new messages, separating system reminders from regular content
|
|
||||||
const { contextParts, systemReminders } =
|
|
||||||
formatMessagesForContext(newMessages)
|
|
||||||
|
|
||||||
// Set new_context (regular user content and tool results)
|
|
||||||
if (contextParts.length > 0) {
|
|
||||||
const fullContext = contextParts.join('\n\n---\n\n')
|
|
||||||
const { content: truncatedContext, truncated } =
|
|
||||||
truncateContent(fullContext)
|
|
||||||
|
|
||||||
span.setAttributes({
|
|
||||||
new_context: truncatedContext,
|
|
||||||
new_context_message_count: newMessages.length,
|
|
||||||
...(truncated && {
|
|
||||||
new_context_truncated: true,
|
|
||||||
new_context_original_length: fullContext.length,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set system_reminders as a separate attribute
|
|
||||||
if (systemReminders.length > 0) {
|
|
||||||
const fullReminders = systemReminders.join('\n\n---\n\n')
|
|
||||||
const { content: truncatedReminders, truncated: remindersTruncated } =
|
|
||||||
truncateContent(fullReminders)
|
|
||||||
|
|
||||||
span.setAttributes({
|
|
||||||
system_reminders: truncatedReminders,
|
|
||||||
system_reminders_count: systemReminders.length,
|
|
||||||
...(remindersTruncated && {
|
|
||||||
system_reminders_truncated: true,
|
|
||||||
system_reminders_original_length: fullReminders.length,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update last reported hash to the last message in the array
|
|
||||||
const lastMessage = messagesForAPI[messagesForAPI.length - 1]
|
|
||||||
if (lastMessage) {
|
|
||||||
lastReportedMessageHash.set(querySource, hashMessage(lastMessage))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Add beta attributes to endLLMRequestSpan.
|
|
||||||
* Handles model_output and thinking_output truncation.
|
|
||||||
*/
|
|
||||||
export function addBetaLLMResponseAttributes(
|
export function addBetaLLMResponseAttributes(
|
||||||
endAttributes: Record<string, string | number | boolean>,
|
_attributes: Record<string, AttributeValue>,
|
||||||
metadata?: {
|
_metadata?: {
|
||||||
modelOutput?: string
|
modelOutput?: string
|
||||||
thinkingOutput?: string
|
thinkingOutput?: string
|
||||||
},
|
},
|
||||||
): void {
|
): void {
|
||||||
if (!isBetaTracingEnabled() || !metadata) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add model_output (text content) - visible to all users
|
|
||||||
if (metadata.modelOutput !== undefined) {
|
|
||||||
const { content: modelOutput, truncated: outputTruncated } =
|
|
||||||
truncateContent(metadata.modelOutput)
|
|
||||||
endAttributes['response.model_output'] = modelOutput
|
|
||||||
if (outputTruncated) {
|
|
||||||
endAttributes['response.model_output_truncated'] = true
|
|
||||||
endAttributes['response.model_output_original_length'] =
|
|
||||||
metadata.modelOutput.length
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add thinking_output - ant-only
|
|
||||||
if (
|
|
||||||
process.env.USER_TYPE === 'ant' &&
|
|
||||||
metadata.thinkingOutput !== undefined
|
|
||||||
) {
|
|
||||||
const { content: thinkingOutput, truncated: thinkingTruncated } =
|
|
||||||
truncateContent(metadata.thinkingOutput)
|
|
||||||
endAttributes['response.thinking_output'] = thinkingOutput
|
|
||||||
if (thinkingTruncated) {
|
|
||||||
endAttributes['response.thinking_output_truncated'] = true
|
|
||||||
endAttributes['response.thinking_output_original_length'] =
|
|
||||||
metadata.thinkingOutput.length
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Add beta attributes to startToolSpan.
|
|
||||||
* Adds tool_input with the serialized tool input.
|
|
||||||
*/
|
|
||||||
export function addBetaToolInputAttributes(
|
export function addBetaToolInputAttributes(
|
||||||
span: Span,
|
_span: SpanAttributeWriter,
|
||||||
toolName: string,
|
_toolName: string,
|
||||||
toolInput: string,
|
_toolInput: string,
|
||||||
): void {
|
): void {
|
||||||
if (!isBetaTracingEnabled()) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const { content: truncatedInput, truncated } = truncateContent(
|
|
||||||
`[TOOL INPUT: ${toolName}]\n${toolInput}`,
|
|
||||||
)
|
|
||||||
span.setAttributes({
|
|
||||||
tool_input: truncatedInput,
|
|
||||||
...(truncated && {
|
|
||||||
tool_input_truncated: true,
|
|
||||||
tool_input_original_length: toolInput.length,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Add beta attributes to endToolSpan.
|
|
||||||
* Adds new_context with the tool result.
|
|
||||||
*/
|
|
||||||
export function addBetaToolResultAttributes(
|
export function addBetaToolResultAttributes(
|
||||||
endAttributes: Record<string, string | number | boolean>,
|
_attributes: Record<string, AttributeValue>,
|
||||||
toolName: string | number | boolean,
|
_toolName: string | number | boolean,
|
||||||
toolResult: string,
|
_toolResult: string,
|
||||||
): void {
|
): void {
|
||||||
if (!isBetaTracingEnabled()) {
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const { content: truncatedResult, truncated } = truncateContent(
|
|
||||||
`[TOOL RESULT: ${toolName}]\n${toolResult}`,
|
|
||||||
)
|
|
||||||
endAttributes['new_context'] = truncatedResult
|
|
||||||
if (truncated) {
|
|
||||||
endAttributes['new_context_truncated'] = true
|
|
||||||
endAttributes['new_context_original_length'] = toolResult.length
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,252 +0,0 @@
|
|||||||
import type { Attributes, HrTime } from '@opentelemetry/api'
|
|
||||||
import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
|
|
||||||
import {
|
|
||||||
AggregationTemporality,
|
|
||||||
type MetricData,
|
|
||||||
type DataPoint as OTelDataPoint,
|
|
||||||
type PushMetricExporter,
|
|
||||||
type ResourceMetrics,
|
|
||||||
} from '@opentelemetry/sdk-metrics'
|
|
||||||
import axios from 'axios'
|
|
||||||
import { checkMetricsEnabled } from 'src/services/api/metricsOptOut.js'
|
|
||||||
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
|
||||||
import { getSubscriptionType, isClaudeAISubscriber } from '../auth.js'
|
|
||||||
import { checkHasTrustDialogAccepted } from '../config.js'
|
|
||||||
import { logForDebugging } from '../debug.js'
|
|
||||||
import { errorMessage, toError } from '../errors.js'
|
|
||||||
import { getAuthHeaders } from '../http.js'
|
|
||||||
import { logError } from '../log.js'
|
|
||||||
import { jsonStringify } from '../slowOperations.js'
|
|
||||||
import { getClaudeCodeUserAgent } from '../userAgent.js'
|
|
||||||
|
|
||||||
type DataPoint = {
|
|
||||||
attributes: Record<string, string>
|
|
||||||
value: number
|
|
||||||
timestamp: string
|
|
||||||
}
|
|
||||||
|
|
||||||
type Metric = {
|
|
||||||
name: string
|
|
||||||
description?: string
|
|
||||||
unit?: string
|
|
||||||
data_points: DataPoint[]
|
|
||||||
}
|
|
||||||
|
|
||||||
type InternalMetricsPayload = {
|
|
||||||
resource_attributes: Record<string, string>
|
|
||||||
metrics: Metric[]
|
|
||||||
}
|
|
||||||
|
|
||||||
export class BigQueryMetricsExporter implements PushMetricExporter {
|
|
||||||
private readonly endpoint: string
|
|
||||||
private readonly timeout: number
|
|
||||||
private pendingExports: Promise<void>[] = []
|
|
||||||
private isShutdown = false
|
|
||||||
|
|
||||||
constructor(options: { timeout?: number } = {}) {
|
|
||||||
const defaultEndpoint = 'https://api.anthropic.com/api/claude_code/metrics'
|
|
||||||
|
|
||||||
if (
|
|
||||||
process.env.USER_TYPE === 'ant' &&
|
|
||||||
process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT
|
|
||||||
) {
|
|
||||||
this.endpoint =
|
|
||||||
process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT +
|
|
||||||
'/api/claude_code/metrics'
|
|
||||||
} else {
|
|
||||||
this.endpoint = defaultEndpoint
|
|
||||||
}
|
|
||||||
|
|
||||||
this.timeout = options.timeout || 5000
|
|
||||||
}
|
|
||||||
|
|
||||||
async export(
|
|
||||||
metrics: ResourceMetrics,
|
|
||||||
resultCallback: (result: ExportResult) => void,
|
|
||||||
): Promise<void> {
|
|
||||||
if (this.isShutdown) {
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: new Error('Exporter has been shutdown'),
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const exportPromise = this.doExport(metrics, resultCallback)
|
|
||||||
this.pendingExports.push(exportPromise)
|
|
||||||
|
|
||||||
// Clean up completed exports
|
|
||||||
void exportPromise.finally(() => {
|
|
||||||
const index = this.pendingExports.indexOf(exportPromise)
|
|
||||||
if (index > -1) {
|
|
||||||
void this.pendingExports.splice(index, 1)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
private async doExport(
|
|
||||||
metrics: ResourceMetrics,
|
|
||||||
resultCallback: (result: ExportResult) => void,
|
|
||||||
): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Skip if trust not established in interactive mode
|
|
||||||
// This prevents triggering apiKeyHelper before trust dialog
|
|
||||||
const hasTrust =
|
|
||||||
checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
|
|
||||||
if (!hasTrust) {
|
|
||||||
logForDebugging(
|
|
||||||
'BigQuery metrics export: trust not established, skipping',
|
|
||||||
)
|
|
||||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check organization-level metrics opt-out
|
|
||||||
const metricsStatus = await checkMetricsEnabled()
|
|
||||||
if (!metricsStatus.enabled) {
|
|
||||||
logForDebugging('Metrics export disabled by organization setting')
|
|
||||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const payload = this.transformMetricsForInternal(metrics)
|
|
||||||
|
|
||||||
const authResult = getAuthHeaders()
|
|
||||||
if (authResult.error) {
|
|
||||||
logForDebugging(`Metrics export failed: ${authResult.error}`)
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: new Error(authResult.error),
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const headers: Record<string, string> = {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'User-Agent': getClaudeCodeUserAgent(),
|
|
||||||
...authResult.headers,
|
|
||||||
}
|
|
||||||
|
|
||||||
const response = await axios.post(this.endpoint, payload, {
|
|
||||||
timeout: this.timeout,
|
|
||||||
headers,
|
|
||||||
})
|
|
||||||
|
|
||||||
logForDebugging('BigQuery metrics exported successfully')
|
|
||||||
logForDebugging(
|
|
||||||
`BigQuery API Response: ${jsonStringify(response.data, null, 2)}`,
|
|
||||||
)
|
|
||||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
|
||||||
} catch (error) {
|
|
||||||
logForDebugging(`BigQuery metrics export failed: ${errorMessage(error)}`)
|
|
||||||
logError(error)
|
|
||||||
resultCallback({
|
|
||||||
code: ExportResultCode.FAILED,
|
|
||||||
error: toError(error),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private transformMetricsForInternal(
|
|
||||||
metrics: ResourceMetrics,
|
|
||||||
): InternalMetricsPayload {
|
|
||||||
const attrs = metrics.resource.attributes
|
|
||||||
|
|
||||||
const resourceAttributes: Record<string, string> = {
|
|
||||||
'service.name': (attrs['service.name'] as string) || 'claude-code',
|
|
||||||
'service.version': (attrs['service.version'] as string) || 'unknown',
|
|
||||||
'os.type': (attrs['os.type'] as string) || 'unknown',
|
|
||||||
'os.version': (attrs['os.version'] as string) || 'unknown',
|
|
||||||
'host.arch': (attrs['host.arch'] as string) || 'unknown',
|
|
||||||
'aggregation.temporality':
|
|
||||||
this.selectAggregationTemporality() === AggregationTemporality.DELTA
|
|
||||||
? 'delta'
|
|
||||||
: 'cumulative',
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only add wsl.version if it exists (omit instead of default)
|
|
||||||
if (attrs['wsl.version']) {
|
|
||||||
resourceAttributes['wsl.version'] = attrs['wsl.version'] as string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add customer type and subscription type
|
|
||||||
if (isClaudeAISubscriber()) {
|
|
||||||
resourceAttributes['user.customer_type'] = 'claude_ai'
|
|
||||||
const subscriptionType = getSubscriptionType()
|
|
||||||
if (subscriptionType) {
|
|
||||||
resourceAttributes['user.subscription_type'] = subscriptionType
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
resourceAttributes['user.customer_type'] = 'api'
|
|
||||||
}
|
|
||||||
|
|
||||||
const transformed = {
|
|
||||||
resource_attributes: resourceAttributes,
|
|
||||||
metrics: metrics.scopeMetrics.flatMap(scopeMetric =>
|
|
||||||
scopeMetric.metrics.map(metric => ({
|
|
||||||
name: metric.descriptor.name,
|
|
||||||
description: metric.descriptor.description,
|
|
||||||
unit: metric.descriptor.unit,
|
|
||||||
data_points: this.extractDataPoints(metric),
|
|
||||||
})),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
return transformed
|
|
||||||
}
|
|
||||||
|
|
||||||
private extractDataPoints(metric: MetricData): DataPoint[] {
|
|
||||||
const dataPoints = metric.dataPoints || []
|
|
||||||
|
|
||||||
return dataPoints
|
|
||||||
.filter(
|
|
||||||
(point): point is OTelDataPoint<number> =>
|
|
||||||
typeof point.value === 'number',
|
|
||||||
)
|
|
||||||
.map(point => ({
|
|
||||||
attributes: this.convertAttributes(point.attributes),
|
|
||||||
value: point.value,
|
|
||||||
timestamp: this.hrTimeToISOString(
|
|
||||||
point.endTime || point.startTime || [Date.now() / 1000, 0],
|
|
||||||
),
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
async shutdown(): Promise<void> {
|
|
||||||
this.isShutdown = true
|
|
||||||
await this.forceFlush()
|
|
||||||
logForDebugging('BigQuery metrics exporter shutdown complete')
|
|
||||||
}
|
|
||||||
|
|
||||||
async forceFlush(): Promise<void> {
|
|
||||||
await Promise.all(this.pendingExports)
|
|
||||||
logForDebugging('BigQuery metrics exporter flush complete')
|
|
||||||
}
|
|
||||||
|
|
||||||
private convertAttributes(
|
|
||||||
attributes: Attributes | undefined,
|
|
||||||
): Record<string, string> {
|
|
||||||
const result: Record<string, string> = {}
|
|
||||||
if (attributes) {
|
|
||||||
for (const [key, value] of Object.entries(attributes)) {
|
|
||||||
if (value !== undefined && value !== null) {
|
|
||||||
result[key] = String(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
private hrTimeToISOString(hrTime: HrTime): string {
|
|
||||||
const [seconds, nanoseconds] = hrTime
|
|
||||||
const date = new Date(seconds * 1000 + nanoseconds / 1000000)
|
|
||||||
return date.toISOString()
|
|
||||||
}
|
|
||||||
|
|
||||||
selectAggregationTemporality(): AggregationTemporality {
|
|
||||||
// DO NOT CHANGE THIS TO CUMULATIVE
|
|
||||||
// It would mess up the aggregation of metrics
|
|
||||||
// for CC Productivity metrics dashboard
|
|
||||||
return AggregationTemporality.DELTA
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,75 +1,14 @@
|
|||||||
import type { Attributes } from '@opentelemetry/api'
|
/**
|
||||||
import { getEventLogger, getPromptId } from 'src/bootstrap/state.js'
|
* OpenTelemetry event egress is disabled in this build.
|
||||||
import { logForDebugging } from '../debug.js'
|
*/
|
||||||
import { isEnvTruthy } from '../envUtils.js'
|
|
||||||
import { getTelemetryAttributes } from '../telemetryAttributes.js'
|
|
||||||
|
|
||||||
// Monotonically increasing counter for ordering events within a session
|
export function redactIfDisabled(_content: string): string {
|
||||||
let eventSequence = 0
|
return '<REDACTED>'
|
||||||
|
|
||||||
// Track whether we've already warned about a null event logger to avoid spamming
|
|
||||||
let hasWarnedNoEventLogger = false
|
|
||||||
|
|
||||||
function isUserPromptLoggingEnabled() {
|
|
||||||
return isEnvTruthy(process.env.OTEL_LOG_USER_PROMPTS)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function redactIfDisabled(content: string): string {
|
|
||||||
return isUserPromptLoggingEnabled() ? content : '<REDACTED>'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function logOTelEvent(
|
export async function logOTelEvent(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
metadata: { [key: string]: string | undefined } = {},
|
_metadata: { [key: string]: string | undefined } = {},
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const eventLogger = getEventLogger()
|
return
|
||||||
if (!eventLogger) {
|
|
||||||
if (!hasWarnedNoEventLogger) {
|
|
||||||
hasWarnedNoEventLogger = true
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Event dropped (no event logger initialized): ${eventName}`,
|
|
||||||
{ level: 'warn' },
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip logging in test environment
|
|
||||||
if (process.env.NODE_ENV === 'test') {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const attributes: Attributes = {
|
|
||||||
...getTelemetryAttributes(),
|
|
||||||
'event.name': eventName,
|
|
||||||
'event.timestamp': new Date().toISOString(),
|
|
||||||
'event.sequence': eventSequence++,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add prompt ID to events (but not metrics, where it would cause unbounded cardinality)
|
|
||||||
const promptId = getPromptId()
|
|
||||||
if (promptId) {
|
|
||||||
attributes['prompt.id'] = promptId
|
|
||||||
}
|
|
||||||
|
|
||||||
// Workspace directory from the desktop app (host path). Events only —
|
|
||||||
// filesystem paths are too high-cardinality for metric dimensions, and
|
|
||||||
// the BQ metrics pipeline must never see them.
|
|
||||||
const workspaceDir = process.env.CLAUDE_CODE_WORKSPACE_HOST_PATHS
|
|
||||||
if (workspaceDir) {
|
|
||||||
attributes['workspace.host_paths'] = workspaceDir.split('|')
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add metadata as attributes - all values are already strings
|
|
||||||
for (const [key, value] of Object.entries(metadata)) {
|
|
||||||
if (value !== undefined) {
|
|
||||||
attributes[key] = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit log record as an event
|
|
||||||
eventLogger.emit({
|
|
||||||
body: `claude_code.${eventName}`,
|
|
||||||
attributes,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,825 +0,0 @@
|
|||||||
import { DiagLogLevel, diag, trace } from '@opentelemetry/api'
|
|
||||||
import { logs } from '@opentelemetry/api-logs'
|
|
||||||
// OTLP/Prometheus exporters are dynamically imported inside the protocol
|
|
||||||
// switch statements below. A process uses at most one protocol variant per
|
|
||||||
// signal, but static imports would load all 6 (~1.2MB) on every startup.
|
|
||||||
import {
|
|
||||||
envDetector,
|
|
||||||
hostDetector,
|
|
||||||
osDetector,
|
|
||||||
resourceFromAttributes,
|
|
||||||
} from '@opentelemetry/resources'
|
|
||||||
import {
|
|
||||||
BatchLogRecordProcessor,
|
|
||||||
ConsoleLogRecordExporter,
|
|
||||||
LoggerProvider,
|
|
||||||
} from '@opentelemetry/sdk-logs'
|
|
||||||
import {
|
|
||||||
ConsoleMetricExporter,
|
|
||||||
MeterProvider,
|
|
||||||
PeriodicExportingMetricReader,
|
|
||||||
} from '@opentelemetry/sdk-metrics'
|
|
||||||
import {
|
|
||||||
BasicTracerProvider,
|
|
||||||
BatchSpanProcessor,
|
|
||||||
ConsoleSpanExporter,
|
|
||||||
} from '@opentelemetry/sdk-trace-base'
|
|
||||||
import {
|
|
||||||
ATTR_SERVICE_NAME,
|
|
||||||
ATTR_SERVICE_VERSION,
|
|
||||||
SEMRESATTRS_HOST_ARCH,
|
|
||||||
} from '@opentelemetry/semantic-conventions'
|
|
||||||
import { HttpsProxyAgent } from 'https-proxy-agent'
|
|
||||||
import {
|
|
||||||
getLoggerProvider,
|
|
||||||
getMeterProvider,
|
|
||||||
getTracerProvider,
|
|
||||||
setEventLogger,
|
|
||||||
setLoggerProvider,
|
|
||||||
setMeterProvider,
|
|
||||||
setTracerProvider,
|
|
||||||
} from 'src/bootstrap/state.js'
|
|
||||||
import {
|
|
||||||
getOtelHeadersFromHelper,
|
|
||||||
getSubscriptionType,
|
|
||||||
is1PApiCustomer,
|
|
||||||
isClaudeAISubscriber,
|
|
||||||
} from 'src/utils/auth.js'
|
|
||||||
import { getPlatform, getWslVersion } from 'src/utils/platform.js'
|
|
||||||
|
|
||||||
import { getCACertificates } from '../caCerts.js'
|
|
||||||
import { registerCleanup } from '../cleanupRegistry.js'
|
|
||||||
import { getHasFormattedOutput, logForDebugging } from '../debug.js'
|
|
||||||
import { isEnvTruthy } from '../envUtils.js'
|
|
||||||
import { errorMessage } from '../errors.js'
|
|
||||||
import { getMTLSConfig } from '../mtls.js'
|
|
||||||
import { getProxyUrl, shouldBypassProxy } from '../proxy.js'
|
|
||||||
import { getSettings_DEPRECATED } from '../settings/settings.js'
|
|
||||||
import { jsonStringify } from '../slowOperations.js'
|
|
||||||
import { profileCheckpoint } from '../startupProfiler.js'
|
|
||||||
import { isBetaTracingEnabled } from './betaSessionTracing.js'
|
|
||||||
import { BigQueryMetricsExporter } from './bigqueryExporter.js'
|
|
||||||
import { ClaudeCodeDiagLogger } from './logger.js'
|
|
||||||
import { initializePerfettoTracing } from './perfettoTracing.js'
|
|
||||||
import {
|
|
||||||
endInteractionSpan,
|
|
||||||
isEnhancedTelemetryEnabled,
|
|
||||||
} from './sessionTracing.js'
|
|
||||||
|
|
||||||
const DEFAULT_METRICS_EXPORT_INTERVAL_MS = 60000
|
|
||||||
const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 5000
|
|
||||||
const DEFAULT_TRACES_EXPORT_INTERVAL_MS = 5000
|
|
||||||
|
|
||||||
class TelemetryTimeoutError extends Error {}
|
|
||||||
|
|
||||||
function telemetryTimeout(ms: number, message: string): Promise<never> {
|
|
||||||
return new Promise((_, reject) => {
|
|
||||||
setTimeout(
|
|
||||||
(rej: (e: Error) => void, msg: string) =>
|
|
||||||
rej(new TelemetryTimeoutError(msg)),
|
|
||||||
ms,
|
|
||||||
reject,
|
|
||||||
message,
|
|
||||||
).unref()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
export function bootstrapTelemetry() {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
// Read from ANT_ prefixed variables that are defined at build time
|
|
||||||
if (process.env.ANT_OTEL_METRICS_EXPORTER) {
|
|
||||||
process.env.OTEL_METRICS_EXPORTER = process.env.ANT_OTEL_METRICS_EXPORTER
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_LOGS_EXPORTER) {
|
|
||||||
process.env.OTEL_LOGS_EXPORTER = process.env.ANT_OTEL_LOGS_EXPORTER
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_TRACES_EXPORTER) {
|
|
||||||
process.env.OTEL_TRACES_EXPORTER = process.env.ANT_OTEL_TRACES_EXPORTER
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL =
|
|
||||||
process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_ENDPOINT =
|
|
||||||
process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_HEADERS =
|
|
||||||
process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set default tempoality to 'delta' because it's the more sane default
|
|
||||||
if (!process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = 'delta'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per OTEL spec, "none" means "no automatically configured exporter for this signal".
|
|
||||||
// https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#exporter-selection
|
|
||||||
export function parseExporterTypes(value: string | undefined): string[] {
|
|
||||||
return (value || '')
|
|
||||||
.trim()
|
|
||||||
.split(',')
|
|
||||||
.filter(Boolean)
|
|
||||||
.map(t => t.trim())
|
|
||||||
.filter(t => t !== 'none')
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getOtlpReaders() {
|
|
||||||
const exporterTypes = parseExporterTypes(process.env.OTEL_METRICS_EXPORTER)
|
|
||||||
const exportInterval = parseInt(
|
|
||||||
process.env.OTEL_METRIC_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_METRICS_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
)
|
|
||||||
|
|
||||||
const exporters = []
|
|
||||||
for (const exporterType of exporterTypes) {
|
|
||||||
if (exporterType === 'console') {
|
|
||||||
// Custom console exporter that shows resource attributes
|
|
||||||
const consoleExporter = new ConsoleMetricExporter()
|
|
||||||
const originalExport = consoleExporter.export.bind(consoleExporter)
|
|
||||||
|
|
||||||
consoleExporter.export = (metrics, callback) => {
|
|
||||||
// Log resource attributes once at the start
|
|
||||||
if (metrics.resource && metrics.resource.attributes) {
|
|
||||||
// The console exporter is for debugging, so console output is intentional here
|
|
||||||
|
|
||||||
logForDebugging('\n=== Resource Attributes ===')
|
|
||||||
logForDebugging(jsonStringify(metrics.resource.attributes))
|
|
||||||
logForDebugging('===========================\n')
|
|
||||||
}
|
|
||||||
|
|
||||||
return originalExport(metrics, callback)
|
|
||||||
}
|
|
||||||
|
|
||||||
exporters.push(consoleExporter)
|
|
||||||
} else if (exporterType === 'otlp') {
|
|
||||||
const protocol =
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL?.trim() ||
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
|
||||||
|
|
||||||
const httpConfig = getOTLPExporterConfig()
|
|
||||||
|
|
||||||
switch (protocol) {
|
|
||||||
case 'grpc': {
|
|
||||||
// Lazy-import to keep @grpc/grpc-js (~700KB) out of the telemetry chunk
|
|
||||||
// when the protocol is http/protobuf (ant default) or http/json.
|
|
||||||
const { OTLPMetricExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-metrics-otlp-grpc'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPMetricExporter())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/json': {
|
|
||||||
const { OTLPMetricExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-metrics-otlp-http'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPMetricExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/protobuf': {
|
|
||||||
const { OTLPMetricExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-metrics-otlp-proto'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPMetricExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error(
|
|
||||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else if (exporterType === 'prometheus') {
|
|
||||||
const { PrometheusExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-prometheus'
|
|
||||||
)
|
|
||||||
exporters.push(new PrometheusExporter())
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Unknown exporter type set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${exporterType}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return exporters.map(exporter => {
|
|
||||||
if ('export' in exporter) {
|
|
||||||
return new PeriodicExportingMetricReader({
|
|
||||||
exporter,
|
|
||||||
exportIntervalMillis: exportInterval,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return exporter
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getOtlpLogExporters() {
|
|
||||||
const exporterTypes = parseExporterTypes(process.env.OTEL_LOGS_EXPORTER)
|
|
||||||
|
|
||||||
const protocol =
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_LOGS_PROTOCOL?.trim() ||
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
|
||||||
const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
|
||||||
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] getOtlpLogExporters: types=${jsonStringify(exporterTypes)}, protocol=${protocol}, endpoint=${endpoint}`,
|
|
||||||
)
|
|
||||||
|
|
||||||
const exporters = []
|
|
||||||
for (const exporterType of exporterTypes) {
|
|
||||||
if (exporterType === 'console') {
|
|
||||||
exporters.push(new ConsoleLogRecordExporter())
|
|
||||||
} else if (exporterType === 'otlp') {
|
|
||||||
const httpConfig = getOTLPExporterConfig()
|
|
||||||
|
|
||||||
switch (protocol) {
|
|
||||||
case 'grpc': {
|
|
||||||
const { OTLPLogExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-logs-otlp-grpc'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPLogExporter())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/json': {
|
|
||||||
const { OTLPLogExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-logs-otlp-http'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPLogExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/protobuf': {
|
|
||||||
const { OTLPLogExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-logs-otlp-proto'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPLogExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error(
|
|
||||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_LOGS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Unknown exporter type set in OTEL_LOGS_EXPORTER env var: ${exporterType}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return exporters
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getOtlpTraceExporters() {
|
|
||||||
const exporterTypes = parseExporterTypes(process.env.OTEL_TRACES_EXPORTER)
|
|
||||||
|
|
||||||
const exporters = []
|
|
||||||
for (const exporterType of exporterTypes) {
|
|
||||||
if (exporterType === 'console') {
|
|
||||||
exporters.push(new ConsoleSpanExporter())
|
|
||||||
} else if (exporterType === 'otlp') {
|
|
||||||
const protocol =
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_TRACES_PROTOCOL?.trim() ||
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
|
||||||
|
|
||||||
const httpConfig = getOTLPExporterConfig()
|
|
||||||
|
|
||||||
switch (protocol) {
|
|
||||||
case 'grpc': {
|
|
||||||
const { OTLPTraceExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-trace-otlp-grpc'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPTraceExporter())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/json': {
|
|
||||||
const { OTLPTraceExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-trace-otlp-http'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPTraceExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/protobuf': {
|
|
||||||
const { OTLPTraceExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-trace-otlp-proto'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPTraceExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error(
|
|
||||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_TRACES_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Unknown exporter type set in OTEL_TRACES_EXPORTER env var: ${exporterType}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return exporters
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isTelemetryEnabled() {
|
|
||||||
return isEnvTruthy(process.env.CLAUDE_CODE_ENABLE_TELEMETRY)
|
|
||||||
}
|
|
||||||
|
|
||||||
function getBigQueryExportingReader() {
|
|
||||||
const bigqueryExporter = new BigQueryMetricsExporter()
|
|
||||||
return new PeriodicExportingMetricReader({
|
|
||||||
exporter: bigqueryExporter,
|
|
||||||
exportIntervalMillis: 5 * 60 * 1000, // 5mins for BigQuery metrics exporter to reduce load
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function isBigQueryMetricsEnabled() {
|
|
||||||
// BigQuery metrics are enabled for:
|
|
||||||
// 1. API customers (excluding Claude.ai subscribers and Bedrock/Vertex)
|
|
||||||
// 2. Claude for Enterprise (C4E) users
|
|
||||||
// 3. Claude for Teams users
|
|
||||||
const subscriptionType = getSubscriptionType()
|
|
||||||
const isC4EOrTeamUser =
|
|
||||||
isClaudeAISubscriber() &&
|
|
||||||
(subscriptionType === 'enterprise' || subscriptionType === 'team')
|
|
||||||
|
|
||||||
return is1PApiCustomer() || isC4EOrTeamUser
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize beta tracing - a separate code path for detailed debugging.
|
|
||||||
* Uses BETA_TRACING_ENDPOINT instead of OTEL_EXPORTER_OTLP_ENDPOINT.
|
|
||||||
*/
|
|
||||||
async function initializeBetaTracing(
|
|
||||||
resource: ReturnType<typeof resourceFromAttributes>,
|
|
||||||
): Promise<void> {
|
|
||||||
const endpoint = process.env.BETA_TRACING_ENDPOINT
|
|
||||||
if (!endpoint) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const [{ OTLPTraceExporter }, { OTLPLogExporter }] = await Promise.all([
|
|
||||||
import('@opentelemetry/exporter-trace-otlp-http'),
|
|
||||||
import('@opentelemetry/exporter-logs-otlp-http'),
|
|
||||||
])
|
|
||||||
|
|
||||||
const httpConfig = {
|
|
||||||
url: `${endpoint}/v1/traces`,
|
|
||||||
}
|
|
||||||
|
|
||||||
const logHttpConfig = {
|
|
||||||
url: `${endpoint}/v1/logs`,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize trace exporter
|
|
||||||
const traceExporter = new OTLPTraceExporter(httpConfig)
|
|
||||||
const spanProcessor = new BatchSpanProcessor(traceExporter, {
|
|
||||||
scheduledDelayMillis: DEFAULT_TRACES_EXPORT_INTERVAL_MS,
|
|
||||||
})
|
|
||||||
|
|
||||||
const tracerProvider = new BasicTracerProvider({
|
|
||||||
resource,
|
|
||||||
spanProcessors: [spanProcessor],
|
|
||||||
})
|
|
||||||
|
|
||||||
trace.setGlobalTracerProvider(tracerProvider)
|
|
||||||
setTracerProvider(tracerProvider)
|
|
||||||
|
|
||||||
// Initialize log exporter
|
|
||||||
const logExporter = new OTLPLogExporter(logHttpConfig)
|
|
||||||
const loggerProvider = new LoggerProvider({
|
|
||||||
resource,
|
|
||||||
processors: [
|
|
||||||
new BatchLogRecordProcessor(logExporter, {
|
|
||||||
scheduledDelayMillis: DEFAULT_LOGS_EXPORT_INTERVAL_MS,
|
|
||||||
}),
|
|
||||||
],
|
|
||||||
})
|
|
||||||
|
|
||||||
logs.setGlobalLoggerProvider(loggerProvider)
|
|
||||||
setLoggerProvider(loggerProvider)
|
|
||||||
|
|
||||||
// Initialize event logger
|
|
||||||
const eventLogger = logs.getLogger(
|
|
||||||
'com.anthropic.claude_code.events',
|
|
||||||
MACRO.VERSION,
|
|
||||||
)
|
|
||||||
setEventLogger(eventLogger)
|
|
||||||
|
|
||||||
// Setup flush handlers - flush both logs AND traces
|
|
||||||
process.on('beforeExit', async () => {
|
|
||||||
await loggerProvider?.forceFlush()
|
|
||||||
await tracerProvider?.forceFlush()
|
|
||||||
})
|
|
||||||
|
|
||||||
process.on('exit', () => {
|
|
||||||
void loggerProvider?.forceFlush()
|
|
||||||
void tracerProvider?.forceFlush()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function initializeTelemetry() {
|
|
||||||
profileCheckpoint('telemetry_init_start')
|
|
||||||
bootstrapTelemetry()
|
|
||||||
|
|
||||||
// Console exporters call console.dir on a timer (5s logs/traces, 60s
|
|
||||||
// metrics), writing pretty-printed objects to stdout. In stream-json
|
|
||||||
// mode stdout is the SDK message channel; the first line (`{`) breaks
|
|
||||||
// the SDK's line reader. Stripped here (not main.tsx) because init.ts
|
|
||||||
// re-runs applyConfigEnvironmentVariables() inside initializeTelemetry-
|
|
||||||
// AfterTrust for remote-managed-settings users, and bootstrapTelemetry
|
|
||||||
// above copies ANT_OTEL_* for ant users — both would undo an earlier strip.
|
|
||||||
if (getHasFormattedOutput()) {
|
|
||||||
for (const key of [
|
|
||||||
'OTEL_METRICS_EXPORTER',
|
|
||||||
'OTEL_LOGS_EXPORTER',
|
|
||||||
'OTEL_TRACES_EXPORTER',
|
|
||||||
] as const) {
|
|
||||||
const v = process.env[key]
|
|
||||||
if (v?.includes('console')) {
|
|
||||||
process.env[key] = v
|
|
||||||
.split(',')
|
|
||||||
.map(s => s.trim())
|
|
||||||
.filter(s => s !== 'console')
|
|
||||||
.join(',')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
diag.setLogger(new ClaudeCodeDiagLogger(), DiagLogLevel.ERROR)
|
|
||||||
|
|
||||||
// Initialize Perfetto tracing (independent of OTEL)
|
|
||||||
// Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path>
|
|
||||||
initializePerfettoTracing()
|
|
||||||
|
|
||||||
const readers = []
|
|
||||||
|
|
||||||
// Add customer exporters (if enabled)
|
|
||||||
const telemetryEnabled = isTelemetryEnabled()
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] isTelemetryEnabled=${telemetryEnabled} (CLAUDE_CODE_ENABLE_TELEMETRY=${process.env.CLAUDE_CODE_ENABLE_TELEMETRY})`,
|
|
||||||
)
|
|
||||||
if (telemetryEnabled) {
|
|
||||||
readers.push(...(await getOtlpReaders()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add BigQuery exporter (for API customers, C4E users, and internal users)
|
|
||||||
if (isBigQueryMetricsEnabled()) {
|
|
||||||
readers.push(getBigQueryExportingReader())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create base resource with service attributes
|
|
||||||
const platform = getPlatform()
|
|
||||||
const baseAttributes: Record<string, string> = {
|
|
||||||
[ATTR_SERVICE_NAME]: 'claude-code',
|
|
||||||
[ATTR_SERVICE_VERSION]: MACRO.VERSION,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add WSL-specific attributes if running on WSL
|
|
||||||
if (platform === 'wsl') {
|
|
||||||
const wslVersion = getWslVersion()
|
|
||||||
if (wslVersion) {
|
|
||||||
baseAttributes['wsl.version'] = wslVersion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const baseResource = resourceFromAttributes(baseAttributes)
|
|
||||||
|
|
||||||
// Use OpenTelemetry detectors
|
|
||||||
const osResource = resourceFromAttributes(
|
|
||||||
osDetector.detect().attributes || {},
|
|
||||||
)
|
|
||||||
|
|
||||||
// Extract only host.arch from hostDetector
|
|
||||||
const hostDetected = hostDetector.detect()
|
|
||||||
const hostArchAttributes = hostDetected.attributes?.[SEMRESATTRS_HOST_ARCH]
|
|
||||||
? {
|
|
||||||
[SEMRESATTRS_HOST_ARCH]: hostDetected.attributes[SEMRESATTRS_HOST_ARCH],
|
|
||||||
}
|
|
||||||
: {}
|
|
||||||
const hostArchResource = resourceFromAttributes(hostArchAttributes)
|
|
||||||
|
|
||||||
const envResource = resourceFromAttributes(
|
|
||||||
envDetector.detect().attributes || {},
|
|
||||||
)
|
|
||||||
|
|
||||||
// Merge resources - later resources take precedence
|
|
||||||
const resource = baseResource
|
|
||||||
.merge(osResource)
|
|
||||||
.merge(hostArchResource)
|
|
||||||
.merge(envResource)
|
|
||||||
|
|
||||||
// Check if beta tracing is enabled - this is a separate code path
|
|
||||||
// Available to all users who set ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
|
|
||||||
if (isBetaTracingEnabled()) {
|
|
||||||
void initializeBetaTracing(resource).catch(e =>
|
|
||||||
logForDebugging(`Beta tracing init failed: ${e}`, { level: 'error' }),
|
|
||||||
)
|
|
||||||
// Still set up meter provider for metrics (but skip regular logs/traces setup)
|
|
||||||
const meterProvider = new MeterProvider({
|
|
||||||
resource,
|
|
||||||
views: [],
|
|
||||||
readers,
|
|
||||||
})
|
|
||||||
setMeterProvider(meterProvider)
|
|
||||||
|
|
||||||
// Register shutdown for beta tracing
|
|
||||||
const shutdownTelemetry = async () => {
|
|
||||||
const timeoutMs = parseInt(
|
|
||||||
process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
|
|
||||||
)
|
|
||||||
try {
|
|
||||||
endInteractionSpan()
|
|
||||||
|
|
||||||
// Force flush + shutdown together inside the timeout. Previously forceFlush
|
|
||||||
// was awaited unbounded BEFORE the race, blocking exit on slow OTLP endpoints.
|
|
||||||
// Each provider's flush→shutdown is chained independently so a slow logger
|
|
||||||
// flush doesn't delay meterProvider/tracerProvider shutdown (no waterfall).
|
|
||||||
const loggerProvider = getLoggerProvider()
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
|
|
||||||
const chains: Promise<void>[] = [meterProvider.shutdown()]
|
|
||||||
if (loggerProvider) {
|
|
||||||
chains.push(
|
|
||||||
loggerProvider.forceFlush().then(() => loggerProvider.shutdown()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if (tracerProvider) {
|
|
||||||
chains.push(
|
|
||||||
tracerProvider.forceFlush().then(() => tracerProvider.shutdown()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.race([
|
|
||||||
Promise.all(chains),
|
|
||||||
telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
|
|
||||||
])
|
|
||||||
} catch {
|
|
||||||
// Ignore shutdown errors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
registerCleanup(shutdownTelemetry)
|
|
||||||
|
|
||||||
return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
|
|
||||||
}
|
|
||||||
|
|
||||||
const meterProvider = new MeterProvider({
|
|
||||||
resource,
|
|
||||||
views: [],
|
|
||||||
readers,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Store reference in state for flushing
|
|
||||||
setMeterProvider(meterProvider)
|
|
||||||
|
|
||||||
// Initialize logs if telemetry is enabled
|
|
||||||
if (telemetryEnabled) {
|
|
||||||
const logExporters = await getOtlpLogExporters()
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Created ${logExporters.length} log exporter(s)`,
|
|
||||||
)
|
|
||||||
|
|
||||||
if (logExporters.length > 0) {
|
|
||||||
const loggerProvider = new LoggerProvider({
|
|
||||||
resource,
|
|
||||||
// Add batch processors for each exporter
|
|
||||||
processors: logExporters.map(
|
|
||||||
exporter =>
|
|
||||||
new BatchLogRecordProcessor(exporter, {
|
|
||||||
scheduledDelayMillis: parseInt(
|
|
||||||
process.env.OTEL_LOGS_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Register the logger provider globally
|
|
||||||
logs.setGlobalLoggerProvider(loggerProvider)
|
|
||||||
setLoggerProvider(loggerProvider)
|
|
||||||
|
|
||||||
// Initialize event logger
|
|
||||||
const eventLogger = logs.getLogger(
|
|
||||||
'com.anthropic.claude_code.events',
|
|
||||||
MACRO.VERSION,
|
|
||||||
)
|
|
||||||
setEventLogger(eventLogger)
|
|
||||||
logForDebugging('[3P telemetry] Event logger set successfully')
|
|
||||||
|
|
||||||
// 'beforeExit' is emitted when Node.js empties its event loop and has no additional work to schedule.
|
|
||||||
// Unlike 'exit', it allows us to perform async operations, so it works well for letting
|
|
||||||
// network requests complete before the process exits naturally.
|
|
||||||
process.on('beforeExit', async () => {
|
|
||||||
await loggerProvider?.forceFlush()
|
|
||||||
// Also flush traces - they use BatchSpanProcessor which needs explicit flush
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
await tracerProvider?.forceFlush()
|
|
||||||
})
|
|
||||||
|
|
||||||
process.on('exit', () => {
|
|
||||||
// Final attempt to flush logs and traces
|
|
||||||
void loggerProvider?.forceFlush()
|
|
||||||
void getTracerProvider()?.forceFlush()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize tracing if enhanced telemetry is enabled (BETA)
|
|
||||||
if (telemetryEnabled && isEnhancedTelemetryEnabled()) {
|
|
||||||
const traceExporters = await getOtlpTraceExporters()
|
|
||||||
if (traceExporters.length > 0) {
|
|
||||||
// Create span processors for each exporter
|
|
||||||
const spanProcessors = traceExporters.map(
|
|
||||||
exporter =>
|
|
||||||
new BatchSpanProcessor(exporter, {
|
|
||||||
scheduledDelayMillis: parseInt(
|
|
||||||
process.env.OTEL_TRACES_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_TRACES_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
const tracerProvider = new BasicTracerProvider({
|
|
||||||
resource,
|
|
||||||
spanProcessors,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Register the tracer provider globally
|
|
||||||
trace.setGlobalTracerProvider(tracerProvider)
|
|
||||||
setTracerProvider(tracerProvider)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Shutdown metrics and logs on exit (flushes and closes exporters)
|
|
||||||
const shutdownTelemetry = async () => {
|
|
||||||
const timeoutMs = parseInt(
|
|
||||||
process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
|
|
||||||
)
|
|
||||||
|
|
||||||
try {
|
|
||||||
// End any active interaction span before shutdown
|
|
||||||
endInteractionSpan()
|
|
||||||
|
|
||||||
const shutdownPromises = [meterProvider.shutdown()]
|
|
||||||
const loggerProvider = getLoggerProvider()
|
|
||||||
if (loggerProvider) {
|
|
||||||
shutdownPromises.push(loggerProvider.shutdown())
|
|
||||||
}
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
if (tracerProvider) {
|
|
||||||
shutdownPromises.push(tracerProvider.shutdown())
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.race([
|
|
||||||
Promise.all(shutdownPromises),
|
|
||||||
telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
|
|
||||||
])
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof Error && error.message.includes('timeout')) {
|
|
||||||
logForDebugging(
|
|
||||||
`
|
|
||||||
OpenTelemetry telemetry flush timed out after ${timeoutMs}ms
|
|
||||||
|
|
||||||
To resolve this issue, you can:
|
|
||||||
1. Increase the timeout by setting CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS env var (e.g., 5000 for 5 seconds)
|
|
||||||
2. Check if your OpenTelemetry backend is experiencing scalability issues
|
|
||||||
3. Disable OpenTelemetry by unsetting CLAUDE_CODE_ENABLE_TELEMETRY env var
|
|
||||||
|
|
||||||
Current timeout: ${timeoutMs}ms
|
|
||||||
`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
}
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Always register shutdown (internal metrics are always enabled)
|
|
||||||
registerCleanup(shutdownTelemetry)
|
|
||||||
|
|
||||||
return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush all pending telemetry data immediately.
|
|
||||||
* This should be called before logout or org switching to prevent data leakage.
|
|
||||||
*/
|
|
||||||
export async function flushTelemetry(): Promise<void> {
|
|
||||||
const meterProvider = getMeterProvider()
|
|
||||||
if (!meterProvider) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const timeoutMs = parseInt(
|
|
||||||
process.env.CLAUDE_CODE_OTEL_FLUSH_TIMEOUT_MS || '5000',
|
|
||||||
)
|
|
||||||
|
|
||||||
try {
|
|
||||||
const flushPromises = [meterProvider.forceFlush()]
|
|
||||||
const loggerProvider = getLoggerProvider()
|
|
||||||
if (loggerProvider) {
|
|
||||||
flushPromises.push(loggerProvider.forceFlush())
|
|
||||||
}
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
if (tracerProvider) {
|
|
||||||
flushPromises.push(tracerProvider.forceFlush())
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.race([
|
|
||||||
Promise.all(flushPromises),
|
|
||||||
telemetryTimeout(timeoutMs, 'OpenTelemetry flush timeout'),
|
|
||||||
])
|
|
||||||
|
|
||||||
logForDebugging('Telemetry flushed successfully')
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof TelemetryTimeoutError) {
|
|
||||||
logForDebugging(
|
|
||||||
`Telemetry flush timed out after ${timeoutMs}ms. Some metrics may not be exported.`,
|
|
||||||
{ level: 'warn' },
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
logForDebugging(`Telemetry flush failed: ${errorMessage(error)}`, {
|
|
||||||
level: 'error',
|
|
||||||
})
|
|
||||||
}
|
|
||||||
// Don't throw - allow logout to continue even if flush fails
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseOtelHeadersEnvVar(): Record<string, string> {
|
|
||||||
const headers: Record<string, string> = {}
|
|
||||||
const envHeaders = process.env.OTEL_EXPORTER_OTLP_HEADERS
|
|
||||||
if (envHeaders) {
|
|
||||||
for (const pair of envHeaders.split(',')) {
|
|
||||||
const [key, ...valueParts] = pair.split('=')
|
|
||||||
if (key && valueParts.length > 0) {
|
|
||||||
headers[key.trim()] = valueParts.join('=').trim()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return headers
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get configuration for OTLP exporters including:
|
|
||||||
* - HTTP agent options (proxy, mTLS)
|
|
||||||
* - Dynamic headers via otelHeadersHelper or static headers from env var
|
|
||||||
*/
|
|
||||||
function getOTLPExporterConfig() {
|
|
||||||
const proxyUrl = getProxyUrl()
|
|
||||||
const mtlsConfig = getMTLSConfig()
|
|
||||||
const settings = getSettings_DEPRECATED()
|
|
||||||
|
|
||||||
// Build base config
|
|
||||||
const config: Record<string, unknown> = {}
|
|
||||||
|
|
||||||
// Parse static headers from env var once (doesn't change at runtime)
|
|
||||||
const staticHeaders = parseOtelHeadersEnvVar()
|
|
||||||
|
|
||||||
// If otelHeadersHelper is configured, use async headers function for dynamic refresh
|
|
||||||
// Otherwise just return static headers if any exist
|
|
||||||
if (settings?.otelHeadersHelper) {
|
|
||||||
config.headers = async (): Promise<Record<string, string>> => {
|
|
||||||
const dynamicHeaders = getOtelHeadersFromHelper()
|
|
||||||
return { ...staticHeaders, ...dynamicHeaders }
|
|
||||||
}
|
|
||||||
} else if (Object.keys(staticHeaders).length > 0) {
|
|
||||||
config.headers = async (): Promise<Record<string, string>> => staticHeaders
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we should bypass proxy for OTEL endpoint
|
|
||||||
const otelEndpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
|
||||||
if (!proxyUrl || (otelEndpoint && shouldBypassProxy(otelEndpoint))) {
|
|
||||||
// No proxy configured or OTEL endpoint should bypass proxy
|
|
||||||
const caCerts = getCACertificates()
|
|
||||||
if (mtlsConfig || caCerts) {
|
|
||||||
config.httpAgentOptions = {
|
|
||||||
...mtlsConfig,
|
|
||||||
...(caCerts && { ca: caCerts }),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return config
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an HttpAgentFactory function that creates our proxy agent
|
|
||||||
const caCerts = getCACertificates()
|
|
||||||
const agentFactory = (_protocol: string) => {
|
|
||||||
// Create and return the proxy agent with mTLS and CA cert config
|
|
||||||
const proxyAgent =
|
|
||||||
mtlsConfig || caCerts
|
|
||||||
? new HttpsProxyAgent(proxyUrl, {
|
|
||||||
...(mtlsConfig && {
|
|
||||||
cert: mtlsConfig.cert,
|
|
||||||
key: mtlsConfig.key,
|
|
||||||
passphrase: mtlsConfig.passphrase,
|
|
||||||
}),
|
|
||||||
...(caCerts && { ca: caCerts }),
|
|
||||||
})
|
|
||||||
: new HttpsProxyAgent(proxyUrl)
|
|
||||||
|
|
||||||
return proxyAgent
|
|
||||||
}
|
|
||||||
|
|
||||||
config.httpAgentOptions = agentFactory
|
|
||||||
return config
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
import type { DiagLogger } from '@opentelemetry/api'
|
|
||||||
import { logForDebugging } from '../debug.js'
|
|
||||||
import { logError } from '../log.js'
|
|
||||||
export class ClaudeCodeDiagLogger implements DiagLogger {
|
|
||||||
error(message: string, ..._: unknown[]) {
|
|
||||||
logError(new Error(message))
|
|
||||||
logForDebugging(`[3P telemetry] OTEL diag error: ${message}`, {
|
|
||||||
level: 'error',
|
|
||||||
})
|
|
||||||
}
|
|
||||||
warn(message: string, ..._: unknown[]) {
|
|
||||||
logError(new Error(message))
|
|
||||||
logForDebugging(`[3P telemetry] OTEL diag warn: ${message}`, {
|
|
||||||
level: 'warn',
|
|
||||||
})
|
|
||||||
}
|
|
||||||
info(_message: string, ..._args: unknown[]) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
debug(_message: string, ..._args: unknown[]) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
verbose(_message: string, ..._args: unknown[]) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -12,17 +12,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { createHash } from 'crypto'
|
import { createHash } from 'crypto'
|
||||||
import { sep } from 'path'
|
|
||||||
import {
|
import {
|
||||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
logEvent,
|
|
||||||
} from '../../services/analytics/index.js'
|
} from '../../services/analytics/index.js'
|
||||||
import type {
|
import type { PluginManifest } from '../../types/plugin.js'
|
||||||
LoadedPlugin,
|
|
||||||
PluginError,
|
|
||||||
PluginManifest,
|
|
||||||
} from '../../types/plugin.js'
|
|
||||||
import {
|
import {
|
||||||
isOfficialMarketplaceName,
|
isOfficialMarketplaceName,
|
||||||
parsePluginIdentifier,
|
parsePluginIdentifier,
|
||||||
@@ -80,17 +73,6 @@ export function getTelemetryPluginScope(
|
|||||||
return 'user-local'
|
return 'user-local'
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* How a plugin arrived in the session. Splits self-selected from org-pushed
|
|
||||||
* — plugin_scope alone doesn't (an official plugin can be user-installed OR
|
|
||||||
* org-pushed; both are scope='official').
|
|
||||||
*/
|
|
||||||
export type EnabledVia =
|
|
||||||
| 'user-install'
|
|
||||||
| 'org-policy'
|
|
||||||
| 'default-enable'
|
|
||||||
| 'seed-mount'
|
|
||||||
|
|
||||||
/** How a skill/command invocation was triggered. */
|
/** How a skill/command invocation was triggered. */
|
||||||
export type InvocationTrigger =
|
export type InvocationTrigger =
|
||||||
| 'user-slash'
|
| 'user-slash'
|
||||||
@@ -107,24 +89,6 @@ export type InstallSource =
|
|||||||
| 'ui-suggestion'
|
| 'ui-suggestion'
|
||||||
| 'deep-link'
|
| 'deep-link'
|
||||||
|
|
||||||
export function getEnabledVia(
|
|
||||||
plugin: LoadedPlugin,
|
|
||||||
managedNames: Set<string> | null,
|
|
||||||
seedDirs: string[],
|
|
||||||
): EnabledVia {
|
|
||||||
if (plugin.isBuiltin) return 'default-enable'
|
|
||||||
if (managedNames?.has(plugin.name)) return 'org-policy'
|
|
||||||
// Trailing sep: /opt/plugins must not match /opt/plugins-extra
|
|
||||||
if (
|
|
||||||
seedDirs.some(dir =>
|
|
||||||
plugin.path.startsWith(dir.endsWith(sep) ? dir : dir + sep),
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
return 'seed-mount'
|
|
||||||
}
|
|
||||||
return 'user-install'
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common plugin telemetry fields keyed off name@marketplace. Returns the
|
* Common plugin telemetry fields keyed off name@marketplace. Returns the
|
||||||
* hash, scope enum, and the redacted-twin columns. Callers add the raw
|
* hash, scope enum, and the redacted-twin columns. Callers add the raw
|
||||||
@@ -165,10 +129,7 @@ export function buildPluginTelemetryFields(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Per-invocation callers (SkillTool, processSlashCommand) pass
|
* Per-invocation callers (SkillTool, processSlashCommand) pass
|
||||||
* managedNames=null — the session-level tengu_plugin_enabled_for_session
|
* managedNames=null to keep hot-path call sites free of the extra settings read.
|
||||||
* event carries the authoritative plugin_scope, and per-invocation rows can
|
|
||||||
* join on plugin_id_hash to recover it. This keeps hot-path call sites free
|
|
||||||
* of the extra settings read.
|
|
||||||
*/
|
*/
|
||||||
export function buildPluginCommandTelemetryFields(
|
export function buildPluginCommandTelemetryFields(
|
||||||
pluginInfo: { pluginManifest: PluginManifest; repository: string },
|
pluginInfo: { pluginManifest: PluginManifest; repository: string },
|
||||||
@@ -182,47 +143,6 @@ export function buildPluginCommandTelemetryFields(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Emit tengu_plugin_enabled_for_session once per enabled plugin at session
|
|
||||||
* start. Supplements tengu_skill_loaded (which still fires per-skill) — use
|
|
||||||
* this for plugin-level aggregates instead of DISTINCT-on-prefix hacks.
|
|
||||||
* A plugin with 5 skills emits 5 skill_loaded rows but 1 of these.
|
|
||||||
*/
|
|
||||||
export function logPluginsEnabledForSession(
|
|
||||||
plugins: LoadedPlugin[],
|
|
||||||
managedNames: Set<string> | null,
|
|
||||||
seedDirs: string[],
|
|
||||||
): void {
|
|
||||||
for (const plugin of plugins) {
|
|
||||||
const { marketplace } = parsePluginIdentifier(plugin.repository)
|
|
||||||
|
|
||||||
logEvent('tengu_plugin_enabled_for_session', {
|
|
||||||
_PROTO_plugin_name:
|
|
||||||
plugin.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
...(marketplace && {
|
|
||||||
_PROTO_marketplace_name:
|
|
||||||
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
}),
|
|
||||||
...buildPluginTelemetryFields(plugin.name, marketplace, managedNames),
|
|
||||||
enabled_via: getEnabledVia(
|
|
||||||
plugin,
|
|
||||||
managedNames,
|
|
||||||
seedDirs,
|
|
||||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
skill_path_count:
|
|
||||||
(plugin.skillsPath ? 1 : 0) + (plugin.skillsPaths?.length ?? 0),
|
|
||||||
command_path_count:
|
|
||||||
(plugin.commandsPath ? 1 : 0) + (plugin.commandsPaths?.length ?? 0),
|
|
||||||
has_mcp: plugin.manifest.mcpServers !== undefined,
|
|
||||||
has_hooks: plugin.hooksConfig !== undefined,
|
|
||||||
...(plugin.manifest.version && {
|
|
||||||
version: plugin.manifest
|
|
||||||
.version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bounded-cardinality error bucket for CLI plugin operation failures.
|
* Bounded-cardinality error bucket for CLI plugin operation failures.
|
||||||
* Maps free-form error messages to 5 stable categories so dashboard
|
* Maps free-form error messages to 5 stable categories so dashboard
|
||||||
@@ -257,33 +177,3 @@ export function classifyPluginCommandError(
|
|||||||
}
|
}
|
||||||
return 'unknown'
|
return 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Emit tengu_plugin_load_failed once per error surfaced by session-start
|
|
||||||
* plugin loading. Pairs with tengu_plugin_enabled_for_session so dashboards
|
|
||||||
* can compute a load-success rate. PluginError.type is already a bounded
|
|
||||||
* enum — use it directly as error_category.
|
|
||||||
*/
|
|
||||||
export function logPluginLoadErrors(
|
|
||||||
errors: PluginError[],
|
|
||||||
managedNames: Set<string> | null,
|
|
||||||
): void {
|
|
||||||
for (const err of errors) {
|
|
||||||
const { name, marketplace } = parsePluginIdentifier(err.source)
|
|
||||||
// Not all PluginError variants carry a plugin name (some have pluginId,
|
|
||||||
// some are marketplace-level). Use the 'plugin' property if present,
|
|
||||||
// fall back to the name parsed from err.source.
|
|
||||||
const pluginName = 'plugin' in err && err.plugin ? err.plugin : name
|
|
||||||
logEvent('tengu_plugin_load_failed', {
|
|
||||||
error_category:
|
|
||||||
err.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
_PROTO_plugin_name:
|
|
||||||
pluginName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
...(marketplace && {
|
|
||||||
_PROTO_marketplace_name:
|
|
||||||
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
}),
|
|
||||||
...buildPluginTelemetryFields(pluginName, marketplace, managedNames),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,39 +0,0 @@
|
|||||||
import { getSkillToolCommands } from '../../commands.js'
|
|
||||||
import {
|
|
||||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
logEvent,
|
|
||||||
} from '../../services/analytics/index.js'
|
|
||||||
import { getCharBudget } from '../../tools/SkillTool/prompt.js'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Logs a tengu_skill_loaded event for each skill available at session startup.
|
|
||||||
* This enables analytics on which skills are available across sessions.
|
|
||||||
*/
|
|
||||||
export async function logSkillsLoaded(
|
|
||||||
cwd: string,
|
|
||||||
contextWindowTokens: number,
|
|
||||||
): Promise<void> {
|
|
||||||
const skills = await getSkillToolCommands(cwd)
|
|
||||||
const skillBudget = getCharBudget(contextWindowTokens)
|
|
||||||
|
|
||||||
for (const skill of skills) {
|
|
||||||
if (skill.type !== 'prompt') continue
|
|
||||||
|
|
||||||
logEvent('tengu_skill_loaded', {
|
|
||||||
// _PROTO_skill_name routes to the privileged skill_name BQ column.
|
|
||||||
// Unredacted names don't go in additional_metadata.
|
|
||||||
_PROTO_skill_name:
|
|
||||||
skill.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
|
||||||
skill_source:
|
|
||||||
skill.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
skill_loaded_from:
|
|
||||||
skill.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
skill_budget: skillBudget,
|
|
||||||
...(skill.kind && {
|
|
||||||
skill_kind:
|
|
||||||
skill.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
import type { Attributes } from '@opentelemetry/api'
|
|
||||||
import { getSessionId } from 'src/bootstrap/state.js'
|
|
||||||
import { getOauthAccountInfo } from './auth.js'
|
|
||||||
import { getOrCreateUserID } from './config.js'
|
|
||||||
import { envDynamic } from './envDynamic.js'
|
|
||||||
import { isEnvTruthy } from './envUtils.js'
|
|
||||||
import { toTaggedId } from './taggedId.js'
|
|
||||||
|
|
||||||
// Default configuration for metrics cardinality
|
|
||||||
const METRICS_CARDINALITY_DEFAULTS = {
|
|
||||||
OTEL_METRICS_INCLUDE_SESSION_ID: true,
|
|
||||||
OTEL_METRICS_INCLUDE_VERSION: false,
|
|
||||||
OTEL_METRICS_INCLUDE_ACCOUNT_UUID: true,
|
|
||||||
}
|
|
||||||
|
|
||||||
function shouldIncludeAttribute(
|
|
||||||
envVar: keyof typeof METRICS_CARDINALITY_DEFAULTS,
|
|
||||||
): boolean {
|
|
||||||
const defaultValue = METRICS_CARDINALITY_DEFAULTS[envVar]
|
|
||||||
const envValue = process.env[envVar]
|
|
||||||
|
|
||||||
if (envValue === undefined) {
|
|
||||||
return defaultValue
|
|
||||||
}
|
|
||||||
|
|
||||||
return isEnvTruthy(envValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getTelemetryAttributes(): Attributes {
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
const sessionId = getSessionId()
|
|
||||||
|
|
||||||
const attributes: Attributes = {
|
|
||||||
'user.id': userId,
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_SESSION_ID')) {
|
|
||||||
attributes['session.id'] = sessionId
|
|
||||||
}
|
|
||||||
if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_VERSION')) {
|
|
||||||
attributes['app.version'] = MACRO.VERSION
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only include OAuth account data when actively using OAuth authentication
|
|
||||||
const oauthAccount = getOauthAccountInfo()
|
|
||||||
if (oauthAccount) {
|
|
||||||
const orgId = oauthAccount.organizationUuid
|
|
||||||
const email = oauthAccount.emailAddress
|
|
||||||
const accountUuid = oauthAccount.accountUuid
|
|
||||||
|
|
||||||
if (orgId) attributes['organization.id'] = orgId
|
|
||||||
if (email) attributes['user.email'] = email
|
|
||||||
|
|
||||||
if (
|
|
||||||
accountUuid &&
|
|
||||||
shouldIncludeAttribute('OTEL_METRICS_INCLUDE_ACCOUNT_UUID')
|
|
||||||
) {
|
|
||||||
attributes['user.account_uuid'] = accountUuid
|
|
||||||
attributes['user.account_id'] =
|
|
||||||
process.env.CLAUDE_CODE_ACCOUNT_TAGGED_ID ||
|
|
||||||
toTaggedId('user', accountUuid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add terminal type if available
|
|
||||||
if (envDynamic.terminal) {
|
|
||||||
attributes['terminal.type'] = envDynamic.terminal
|
|
||||||
}
|
|
||||||
|
|
||||||
return attributes
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user