Compare commits
2 Commits
codex-remo
...
497f81f4f9
| Author | SHA1 | Date | |
|---|---|---|---|
| 497f81f4f9 | |||
| 9e7338d54c |
20
README.md
20
README.md
@@ -33,3 +33,23 @@ bun run compile
|
|||||||
|
|
||||||
- `node_modules/`, `dist/`, and generated CLI binaries are ignored by Git.
|
- `node_modules/`, `dist/`, and generated CLI binaries are ignored by Git.
|
||||||
- `bun.lock` is kept in the repository for reproducible installs.
|
- `bun.lock` is kept in the repository for reproducible installs.
|
||||||
|
|
||||||
|
## Local Info Egress Status
|
||||||
|
|
||||||
|
This fork has removed several local system and project metadata egress paths that existed in the recovered upstream code.
|
||||||
|
|
||||||
|
Removed in this repository:
|
||||||
|
|
||||||
|
- Model-request context injection of working directory, git status/history, `CLAUDE.md`, current date, platform, shell, and OS version.
|
||||||
|
- Feedback upload and transcript-share upload paths.
|
||||||
|
- Remote Control / Bridge registration fields that sent machine name, git branch, and git repository URL, plus git source/outcome data in bridge session creation.
|
||||||
|
- Trusted-device enrollment and trusted-device token header emission for bridge requests.
|
||||||
|
- `/insights` automatic S3 upload; reports now stay local via `file://` paths only.
|
||||||
|
- Datadog analytics and Anthropic 1P event-logging egress.
|
||||||
|
- GrowthBook remote evaluation/network fetches; local env/config overrides and cached values remain available for compatibility.
|
||||||
|
- OpenTelemetry initialization and event export paths.
|
||||||
|
|
||||||
|
Still present:
|
||||||
|
|
||||||
|
- Normal Claude API requests are still part of product functionality; this fork only removes extra local metadata injection, not core model/network access.
|
||||||
|
- Compatibility scaffolding for analytics, GrowthBook, and telemetry still exists in the tree as local no-op or cache-only code.
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
import memoize from 'lodash-es/memoize.js'
|
|
||||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
|
|
||||||
import { logForDebugging } from '../utils/debug.js'
|
import { logForDebugging } from '../utils/debug.js'
|
||||||
import { getSecureStorage } from '../utils/secureStorage/index.js'
|
import { getSecureStorage } from '../utils/secureStorage/index.js'
|
||||||
|
|
||||||
@@ -21,36 +19,12 @@ import { getSecureStorage } from '../utils/secureStorage/index.js'
|
|||||||
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
|
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
|
|
||||||
|
|
||||||
function isGateEnabled(): boolean {
|
|
||||||
return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
|
|
||||||
// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
|
|
||||||
// Cache cleared on logout (clearAuthRelatedCaches) and after any local update.
|
|
||||||
//
|
|
||||||
// Only the storage read is memoized — the GrowthBook gate is checked live so
|
|
||||||
// that a gate flip after GrowthBook refresh takes effect without a restart.
|
|
||||||
const readStoredToken = memoize((): string | undefined => {
|
|
||||||
// Env var takes precedence for testing/canary.
|
|
||||||
const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
|
|
||||||
if (envToken) {
|
|
||||||
return envToken
|
|
||||||
}
|
|
||||||
return getSecureStorage().read()?.trustedDeviceToken
|
|
||||||
})
|
|
||||||
|
|
||||||
export function getTrustedDeviceToken(): string | undefined {
|
export function getTrustedDeviceToken(): string | undefined {
|
||||||
if (!isGateEnabled()) {
|
|
||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
return readStoredToken()
|
|
||||||
}
|
|
||||||
|
|
||||||
export function clearTrustedDeviceTokenCache(): void {
|
export function clearTrustedDeviceTokenCache(): void {
|
||||||
readStoredToken.cache?.clear?.()
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -59,9 +33,6 @@ export function clearTrustedDeviceTokenCache(): void {
|
|||||||
* as X-Trusted-Device-Token after account switches.
|
* as X-Trusted-Device-Token after account switches.
|
||||||
*/
|
*/
|
||||||
export function clearTrustedDeviceToken(): void {
|
export function clearTrustedDeviceToken(): void {
|
||||||
if (!isGateEnabled()) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const secureStorage = getSecureStorage()
|
const secureStorage = getSecureStorage()
|
||||||
try {
|
try {
|
||||||
const data = secureStorage.read()
|
const data = secureStorage.read()
|
||||||
@@ -72,7 +43,6 @@ export function clearTrustedDeviceToken(): void {
|
|||||||
} catch {
|
} catch {
|
||||||
// Best-effort — don't block login if storage is inaccessible
|
// Best-effort — don't block login if storage is inaccessible
|
||||||
}
|
}
|
||||||
readStoredToken.cache?.clear?.()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -87,22 +87,8 @@ export const init = memoize(async (): Promise<void> => {
|
|||||||
setupGracefulShutdown()
|
setupGracefulShutdown()
|
||||||
profileCheckpoint('init_after_graceful_shutdown')
|
profileCheckpoint('init_after_graceful_shutdown')
|
||||||
|
|
||||||
// Initialize 1P event logging (no security concerns, but deferred to avoid
|
// Telemetry/log export is disabled in this build. Keep the startup
|
||||||
// loading OpenTelemetry sdk-logs at startup). growthbook.js is already in
|
// checkpoint so callers depending on the init timeline still see it.
|
||||||
// the module cache by this point (firstPartyEventLogger imports it), so the
|
|
||||||
// second dynamic import adds no load cost.
|
|
||||||
void Promise.all([
|
|
||||||
import('../services/analytics/firstPartyEventLogger.js'),
|
|
||||||
import('../services/analytics/growthbook.js'),
|
|
||||||
]).then(([fp, gb]) => {
|
|
||||||
fp.initialize1PEventLogging()
|
|
||||||
// Rebuild the logger provider if tengu_1p_event_batch_config changes
|
|
||||||
// mid-session. Change detection (isEqual) is inside the handler so
|
|
||||||
// unchanged refreshes are no-ops.
|
|
||||||
gb.onGrowthBookRefresh(() => {
|
|
||||||
void fp.reinitialize1PEventLoggingIfConfigChanged()
|
|
||||||
})
|
|
||||||
})
|
|
||||||
profileCheckpoint('init_after_1p_event_logging')
|
profileCheckpoint('init_after_1p_event_logging')
|
||||||
|
|
||||||
// Populate OAuth account info if it is not already cached in config. This is needed since the
|
// Populate OAuth account info if it is not already cached in config. This is needed since the
|
||||||
@@ -245,96 +231,14 @@ export const init = memoize(async (): Promise<void> => {
|
|||||||
* This should only be called once, after the trust dialog has been accepted.
|
* This should only be called once, after the trust dialog has been accepted.
|
||||||
*/
|
*/
|
||||||
export function initializeTelemetryAfterTrust(): void {
|
export function initializeTelemetryAfterTrust(): void {
|
||||||
if (isEligibleForRemoteManagedSettings()) {
|
|
||||||
// For SDK/headless mode with beta tracing, initialize eagerly first
|
|
||||||
// to ensure the tracer is ready before the first query runs.
|
|
||||||
// The async path below will still run but doInitializeTelemetry() guards against double init.
|
|
||||||
if (getIsNonInteractiveSession() && isBetaTracingEnabled()) {
|
|
||||||
void doInitializeTelemetry().catch(error => {
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Eager telemetry init failed (beta tracing): ${errorMessage(error)}`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
logForDebugging(
|
|
||||||
'[3P telemetry] Waiting for remote managed settings before telemetry init',
|
|
||||||
)
|
|
||||||
void waitForRemoteManagedSettingsToLoad()
|
|
||||||
.then(async () => {
|
|
||||||
logForDebugging(
|
|
||||||
'[3P telemetry] Remote managed settings loaded, initializing telemetry',
|
|
||||||
)
|
|
||||||
// Re-apply env vars to pick up remote settings before initializing telemetry.
|
|
||||||
applyConfigEnvironmentVariables()
|
|
||||||
await doInitializeTelemetry()
|
|
||||||
})
|
|
||||||
.catch(error => {
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Telemetry init failed (remote settings path): ${errorMessage(error)}`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
void doInitializeTelemetry().catch(error => {
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Telemetry init failed: ${errorMessage(error)}`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function doInitializeTelemetry(): Promise<void> {
|
|
||||||
if (telemetryInitialized) {
|
|
||||||
// Already initialized, nothing to do
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set flag before init to prevent double initialization
|
async function doInitializeTelemetry(): Promise<void> {
|
||||||
telemetryInitialized = true
|
void telemetryInitialized
|
||||||
try {
|
return
|
||||||
await setMeterState()
|
|
||||||
} catch (error) {
|
|
||||||
// Reset flag on failure so subsequent calls can retry
|
|
||||||
telemetryInitialized = false
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function setMeterState(): Promise<void> {
|
async function setMeterState(): Promise<void> {
|
||||||
// Lazy-load instrumentation to defer ~400KB of OpenTelemetry + protobuf
|
return
|
||||||
const { initializeTelemetry } = await import(
|
|
||||||
'../utils/telemetry/instrumentation.js'
|
|
||||||
)
|
|
||||||
// Initialize customer OTLP telemetry (metrics, logs, traces)
|
|
||||||
const meter = await initializeTelemetry()
|
|
||||||
if (meter) {
|
|
||||||
// Create factory function for attributed counters
|
|
||||||
const createAttributedCounter = (
|
|
||||||
name: string,
|
|
||||||
options: MetricOptions,
|
|
||||||
): AttributedCounter => {
|
|
||||||
const counter = meter?.createCounter(name, options)
|
|
||||||
|
|
||||||
return {
|
|
||||||
add(value: number, additionalAttributes: Attributes = {}) {
|
|
||||||
// Always fetch fresh telemetry attributes to ensure they're up to date
|
|
||||||
const currentAttributes = getTelemetryAttributes()
|
|
||||||
const mergedAttributes = {
|
|
||||||
...currentAttributes,
|
|
||||||
...additionalAttributes,
|
|
||||||
}
|
|
||||||
counter?.add(value, mergedAttributes)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setMeter(meter, createAttributedCounter)
|
|
||||||
|
|
||||||
// Increment session counter here because the startup telemetry path
|
|
||||||
// runs before this async initialization completes, so the counter
|
|
||||||
// would be null there.
|
|
||||||
getSessionCounter()?.add(1)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,307 +1,20 @@
|
|||||||
import axios from 'axios'
|
|
||||||
import { createHash } from 'crypto'
|
|
||||||
import memoize from 'lodash-es/memoize.js'
|
|
||||||
import { getOrCreateUserID } from '../../utils/config.js'
|
|
||||||
import { logError } from '../../utils/log.js'
|
|
||||||
import { getCanonicalName } from '../../utils/model/model.js'
|
|
||||||
import { getAPIProvider } from '../../utils/model/providers.js'
|
|
||||||
import { MODEL_COSTS } from '../../utils/modelCost.js'
|
|
||||||
import { isAnalyticsDisabled } from './config.js'
|
|
||||||
import { getEventMetadata } from './metadata.js'
|
|
||||||
|
|
||||||
const DATADOG_LOGS_ENDPOINT =
|
|
||||||
'https://http-intake.logs.us5.datadoghq.com/api/v2/logs'
|
|
||||||
const DATADOG_CLIENT_TOKEN = 'pubbbf48e6d78dae54bceaa4acf463299bf'
|
|
||||||
const DEFAULT_FLUSH_INTERVAL_MS = 15000
|
|
||||||
const MAX_BATCH_SIZE = 100
|
|
||||||
const NETWORK_TIMEOUT_MS = 5000
|
|
||||||
|
|
||||||
const DATADOG_ALLOWED_EVENTS = new Set([
|
|
||||||
'chrome_bridge_connection_succeeded',
|
|
||||||
'chrome_bridge_connection_failed',
|
|
||||||
'chrome_bridge_disconnected',
|
|
||||||
'chrome_bridge_tool_call_completed',
|
|
||||||
'chrome_bridge_tool_call_error',
|
|
||||||
'chrome_bridge_tool_call_started',
|
|
||||||
'chrome_bridge_tool_call_timeout',
|
|
||||||
'tengu_api_error',
|
|
||||||
'tengu_api_success',
|
|
||||||
'tengu_brief_mode_enabled',
|
|
||||||
'tengu_brief_mode_toggled',
|
|
||||||
'tengu_brief_send',
|
|
||||||
'tengu_cancel',
|
|
||||||
'tengu_compact_failed',
|
|
||||||
'tengu_exit',
|
|
||||||
'tengu_flicker',
|
|
||||||
'tengu_init',
|
|
||||||
'tengu_model_fallback_triggered',
|
|
||||||
'tengu_oauth_error',
|
|
||||||
'tengu_oauth_success',
|
|
||||||
'tengu_oauth_token_refresh_failure',
|
|
||||||
'tengu_oauth_token_refresh_success',
|
|
||||||
'tengu_oauth_token_refresh_lock_acquiring',
|
|
||||||
'tengu_oauth_token_refresh_lock_acquired',
|
|
||||||
'tengu_oauth_token_refresh_starting',
|
|
||||||
'tengu_oauth_token_refresh_completed',
|
|
||||||
'tengu_oauth_token_refresh_lock_releasing',
|
|
||||||
'tengu_oauth_token_refresh_lock_released',
|
|
||||||
'tengu_query_error',
|
|
||||||
'tengu_session_file_read',
|
|
||||||
'tengu_started',
|
|
||||||
'tengu_tool_use_error',
|
|
||||||
'tengu_tool_use_granted_in_prompt_permanent',
|
|
||||||
'tengu_tool_use_granted_in_prompt_temporary',
|
|
||||||
'tengu_tool_use_rejected_in_prompt',
|
|
||||||
'tengu_tool_use_success',
|
|
||||||
'tengu_uncaught_exception',
|
|
||||||
'tengu_unhandled_rejection',
|
|
||||||
'tengu_voice_recording_started',
|
|
||||||
'tengu_voice_toggled',
|
|
||||||
'tengu_team_mem_sync_pull',
|
|
||||||
'tengu_team_mem_sync_push',
|
|
||||||
'tengu_team_mem_sync_started',
|
|
||||||
'tengu_team_mem_entries_capped',
|
|
||||||
])
|
|
||||||
|
|
||||||
const TAG_FIELDS = [
|
|
||||||
'arch',
|
|
||||||
'clientType',
|
|
||||||
'errorType',
|
|
||||||
'http_status_range',
|
|
||||||
'http_status',
|
|
||||||
'kairosActive',
|
|
||||||
'model',
|
|
||||||
'platform',
|
|
||||||
'provider',
|
|
||||||
'skillMode',
|
|
||||||
'subscriptionType',
|
|
||||||
'toolName',
|
|
||||||
'userBucket',
|
|
||||||
'userType',
|
|
||||||
'version',
|
|
||||||
'versionBase',
|
|
||||||
]
|
|
||||||
|
|
||||||
function camelToSnakeCase(str: string): string {
|
|
||||||
return str.replace(/[A-Z]/g, letter => `_${letter.toLowerCase()}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
type DatadogLog = {
|
|
||||||
ddsource: string
|
|
||||||
ddtags: string
|
|
||||||
message: string
|
|
||||||
service: string
|
|
||||||
hostname: string
|
|
||||||
[key: string]: unknown
|
|
||||||
}
|
|
||||||
|
|
||||||
let logBatch: DatadogLog[] = []
|
|
||||||
let flushTimer: NodeJS.Timeout | null = null
|
|
||||||
let datadogInitialized: boolean | null = null
|
|
||||||
|
|
||||||
async function flushLogs(): Promise<void> {
|
|
||||||
if (logBatch.length === 0) return
|
|
||||||
|
|
||||||
const logsToSend = logBatch
|
|
||||||
logBatch = []
|
|
||||||
|
|
||||||
try {
|
|
||||||
await axios.post(DATADOG_LOGS_ENDPOINT, logsToSend, {
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'DD-API-KEY': DATADOG_CLIENT_TOKEN,
|
|
||||||
},
|
|
||||||
timeout: NETWORK_TIMEOUT_MS,
|
|
||||||
})
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function scheduleFlush(): void {
|
|
||||||
if (flushTimer) return
|
|
||||||
|
|
||||||
flushTimer = setTimeout(() => {
|
|
||||||
flushTimer = null
|
|
||||||
void flushLogs()
|
|
||||||
}, getFlushIntervalMs()).unref()
|
|
||||||
}
|
|
||||||
|
|
||||||
export const initializeDatadog = memoize(async (): Promise<boolean> => {
|
|
||||||
if (isAnalyticsDisabled()) {
|
|
||||||
datadogInitialized = false
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
datadogInitialized = true
|
|
||||||
return true
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
datadogInitialized = false
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Flush remaining Datadog logs and shut down.
|
* Datadog analytics egress is disabled in this build.
|
||||||
* Called from gracefulShutdown() before process.exit() since
|
*
|
||||||
* forceExit() prevents the beforeExit handler from firing.
|
* The exported functions remain so existing call sites do not need to branch.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
export async function initializeDatadog(): Promise<boolean> {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
export async function shutdownDatadog(): Promise<void> {
|
export async function shutdownDatadog(): Promise<void> {
|
||||||
if (flushTimer) {
|
return
|
||||||
clearTimeout(flushTimer)
|
|
||||||
flushTimer = null
|
|
||||||
}
|
|
||||||
await flushLogs()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: use via src/services/analytics/index.ts > logEvent
|
|
||||||
export async function trackDatadogEvent(
|
export async function trackDatadogEvent(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
properties: { [key: string]: boolean | number | undefined },
|
_properties: { [key: string]: boolean | number | undefined },
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
if (process.env.NODE_ENV !== 'production') {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't send events for 3P providers (Bedrock, Vertex, Foundry)
|
|
||||||
if (getAPIProvider() !== 'firstParty') {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fast path: use cached result if available to avoid await overhead
|
|
||||||
let initialized = datadogInitialized
|
|
||||||
if (initialized === null) {
|
|
||||||
initialized = await initializeDatadog()
|
|
||||||
}
|
|
||||||
if (!initialized || !DATADOG_ALLOWED_EVENTS.has(eventName)) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const metadata = await getEventMetadata({
|
|
||||||
model: properties.model,
|
|
||||||
betas: properties.betas,
|
|
||||||
})
|
|
||||||
// Destructure to avoid duplicate envContext (once nested, once flattened)
|
|
||||||
const { envContext, ...restMetadata } = metadata
|
|
||||||
const allData: Record<string, unknown> = {
|
|
||||||
...restMetadata,
|
|
||||||
...envContext,
|
|
||||||
...properties,
|
|
||||||
userBucket: getUserBucket(),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize MCP tool names to "mcp" for cardinality reduction
|
|
||||||
if (
|
|
||||||
typeof allData.toolName === 'string' &&
|
|
||||||
allData.toolName.startsWith('mcp__')
|
|
||||||
) {
|
|
||||||
allData.toolName = 'mcp'
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize model names for cardinality reduction (external users only)
|
|
||||||
if (process.env.USER_TYPE !== 'ant' && typeof allData.model === 'string') {
|
|
||||||
const shortName = getCanonicalName(allData.model.replace(/\[1m]$/i, ''))
|
|
||||||
allData.model = shortName in MODEL_COSTS ? shortName : 'other'
|
|
||||||
}
|
|
||||||
|
|
||||||
// Truncate dev version to base + date (remove timestamp and sha for cardinality reduction)
|
|
||||||
// e.g. "2.0.53-dev.20251124.t173302.sha526cc6a" -> "2.0.53-dev.20251124"
|
|
||||||
if (typeof allData.version === 'string') {
|
|
||||||
allData.version = allData.version.replace(
|
|
||||||
/^(\d+\.\d+\.\d+-dev\.\d{8})\.t\d+\.sha[a-f0-9]+$/,
|
|
||||||
'$1',
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transform status to http_status and http_status_range to avoid Datadog reserved field
|
|
||||||
if (allData.status !== undefined && allData.status !== null) {
|
|
||||||
const statusCode = String(allData.status)
|
|
||||||
allData.http_status = statusCode
|
|
||||||
|
|
||||||
// Determine status range (1xx, 2xx, 3xx, 4xx, 5xx)
|
|
||||||
const firstDigit = statusCode.charAt(0)
|
|
||||||
if (firstDigit >= '1' && firstDigit <= '5') {
|
|
||||||
allData.http_status_range = `${firstDigit}xx`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove original status field to avoid conflict with Datadog's reserved field
|
|
||||||
delete allData.status
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build ddtags with high-cardinality fields for filtering.
|
|
||||||
// event:<name> is prepended so the event name is searchable via the
|
|
||||||
// log search API — the `message` field (where eventName also lives)
|
|
||||||
// is a DD reserved field and is NOT queryable from dashboard widget
|
|
||||||
// queries or the aggregation API. See scripts/release/MONITORING.md.
|
|
||||||
const allDataRecord = allData
|
|
||||||
const tags = [
|
|
||||||
`event:${eventName}`,
|
|
||||||
...TAG_FIELDS.filter(
|
|
||||||
field =>
|
|
||||||
allDataRecord[field] !== undefined && allDataRecord[field] !== null,
|
|
||||||
).map(field => `${camelToSnakeCase(field)}:${allDataRecord[field]}`),
|
|
||||||
]
|
|
||||||
|
|
||||||
const log: DatadogLog = {
|
|
||||||
ddsource: 'nodejs',
|
|
||||||
ddtags: tags.join(','),
|
|
||||||
message: eventName,
|
|
||||||
service: 'claude-code',
|
|
||||||
hostname: 'claude-code',
|
|
||||||
env: process.env.USER_TYPE,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add all fields as searchable attributes (not duplicated in tags)
|
|
||||||
for (const [key, value] of Object.entries(allData)) {
|
|
||||||
if (value !== undefined && value !== null) {
|
|
||||||
log[camelToSnakeCase(key)] = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logBatch.push(log)
|
|
||||||
|
|
||||||
// Flush immediately if batch is full, otherwise schedule
|
|
||||||
if (logBatch.length >= MAX_BATCH_SIZE) {
|
|
||||||
if (flushTimer) {
|
|
||||||
clearTimeout(flushTimer)
|
|
||||||
flushTimer = null
|
|
||||||
}
|
|
||||||
void flushLogs()
|
|
||||||
} else {
|
|
||||||
scheduleFlush()
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logError(error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const NUM_USER_BUCKETS = 30
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a 'bucket' that the user ID falls into.
|
|
||||||
*
|
|
||||||
* For alerting purposes, we want to alert on the number of users impacted
|
|
||||||
* by an issue, rather than the number of events- often a small number of users
|
|
||||||
* can generate a large number of events (e.g. due to retries). To approximate
|
|
||||||
* this without ruining cardinality by counting user IDs directly, we hash the user ID
|
|
||||||
* and assign it to one of a fixed number of buckets.
|
|
||||||
*
|
|
||||||
* This allows us to estimate the number of unique users by counting unique buckets,
|
|
||||||
* while preserving user privacy and reducing cardinality.
|
|
||||||
*/
|
|
||||||
const getUserBucket = memoize((): number => {
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
const hash = createHash('sha256').update(userId).digest('hex')
|
|
||||||
return parseInt(hash.slice(0, 8), 16) % NUM_USER_BUCKETS
|
|
||||||
})
|
|
||||||
|
|
||||||
function getFlushIntervalMs(): number {
|
|
||||||
// Allow tests to override to not block on the default flush interval.
|
|
||||||
return (
|
|
||||||
parseInt(process.env.CLAUDE_CODE_DATADOG_FLUSH_INTERVAL_MS || '', 10) ||
|
|
||||||
DEFAULT_FLUSH_INTERVAL_MS
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,237 +1,41 @@
|
|||||||
import type { AnyValueMap, Logger, logs } from '@opentelemetry/api-logs'
|
|
||||||
import { resourceFromAttributes } from '@opentelemetry/resources'
|
|
||||||
import {
|
|
||||||
BatchLogRecordProcessor,
|
|
||||||
LoggerProvider,
|
|
||||||
} from '@opentelemetry/sdk-logs'
|
|
||||||
import {
|
|
||||||
ATTR_SERVICE_NAME,
|
|
||||||
ATTR_SERVICE_VERSION,
|
|
||||||
} from '@opentelemetry/semantic-conventions'
|
|
||||||
import { randomUUID } from 'crypto'
|
|
||||||
import { isEqual } from 'lodash-es'
|
|
||||||
import { getOrCreateUserID } from '../../utils/config.js'
|
|
||||||
import { logForDebugging } from '../../utils/debug.js'
|
|
||||||
import { logError } from '../../utils/log.js'
|
|
||||||
import { getPlatform, getWslVersion } from '../../utils/platform.js'
|
|
||||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
|
||||||
import { profileCheckpoint } from '../../utils/startupProfiler.js'
|
|
||||||
import { getCoreUserData } from '../../utils/user.js'
|
|
||||||
import { isAnalyticsDisabled } from './config.js'
|
|
||||||
import { FirstPartyEventLoggingExporter } from './firstPartyEventLoggingExporter.js'
|
|
||||||
import type { GrowthBookUserAttributes } from './growthbook.js'
|
|
||||||
import { getDynamicConfig_CACHED_MAY_BE_STALE } from './growthbook.js'
|
|
||||||
import { getEventMetadata } from './metadata.js'
|
|
||||||
import { isSinkKilled } from './sinkKillswitch.js'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration for sampling individual event types.
|
* Anthropic 1P event logging egress is disabled in this build.
|
||||||
* Each event name maps to an object containing sample_rate (0-1).
|
*
|
||||||
* Events not in the config are logged at 100% rate.
|
* The module keeps its public API so the rest of the app can call into it
|
||||||
|
* without conditional imports.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import type { GrowthBookUserAttributes } from './growthbook.js'
|
||||||
|
|
||||||
export type EventSamplingConfig = {
|
export type EventSamplingConfig = {
|
||||||
[eventName: string]: {
|
[eventName: string]: {
|
||||||
sample_rate: number
|
sample_rate: number
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const EVENT_SAMPLING_CONFIG_NAME = 'tengu_event_sampling_config'
|
|
||||||
/**
|
|
||||||
* Get the event sampling configuration from GrowthBook.
|
|
||||||
* Uses cached value if available, updates cache in background.
|
|
||||||
*/
|
|
||||||
export function getEventSamplingConfig(): EventSamplingConfig {
|
export function getEventSamplingConfig(): EventSamplingConfig {
|
||||||
return getDynamicConfig_CACHED_MAY_BE_STALE<EventSamplingConfig>(
|
return {}
|
||||||
EVENT_SAMPLING_CONFIG_NAME,
|
|
||||||
{},
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
export function shouldSampleEvent(_eventName: string): number | null {
|
||||||
* Determine if an event should be sampled based on its sample rate.
|
|
||||||
* Returns the sample rate if sampled, null if not sampled.
|
|
||||||
*
|
|
||||||
* @param eventName - Name of the event to check
|
|
||||||
* @returns The sample_rate if event should be logged, null if it should be dropped
|
|
||||||
*/
|
|
||||||
export function shouldSampleEvent(eventName: string): number | null {
|
|
||||||
const config = getEventSamplingConfig()
|
|
||||||
const eventConfig = config[eventName]
|
|
||||||
|
|
||||||
// If no config for this event, log at 100% rate (no sampling)
|
|
||||||
if (!eventConfig) {
|
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
const sampleRate = eventConfig.sample_rate
|
|
||||||
|
|
||||||
// Validate sample rate is in valid range
|
|
||||||
if (typeof sampleRate !== 'number' || sampleRate < 0 || sampleRate > 1) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sample rate of 1 means log everything (no need to add metadata)
|
|
||||||
if (sampleRate >= 1) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sample rate of 0 means drop everything
|
|
||||||
if (sampleRate <= 0) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Randomly decide whether to sample this event
|
|
||||||
return Math.random() < sampleRate ? sampleRate : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
const BATCH_CONFIG_NAME = 'tengu_1p_event_batch_config'
|
|
||||||
type BatchConfig = {
|
|
||||||
scheduledDelayMillis?: number
|
|
||||||
maxExportBatchSize?: number
|
|
||||||
maxQueueSize?: number
|
|
||||||
skipAuth?: boolean
|
|
||||||
maxAttempts?: number
|
|
||||||
path?: string
|
|
||||||
baseUrl?: string
|
|
||||||
}
|
|
||||||
function getBatchConfig(): BatchConfig {
|
|
||||||
return getDynamicConfig_CACHED_MAY_BE_STALE<BatchConfig>(
|
|
||||||
BATCH_CONFIG_NAME,
|
|
||||||
{},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Module-local state for event logging (not exposed globally)
|
|
||||||
let firstPartyEventLogger: ReturnType<typeof logs.getLogger> | null = null
|
|
||||||
let firstPartyEventLoggerProvider: LoggerProvider | null = null
|
|
||||||
// Last batch config used to construct the provider — used by
|
|
||||||
// reinitialize1PEventLoggingIfConfigChanged to decide whether a rebuild is
|
|
||||||
// needed when GrowthBook refreshes.
|
|
||||||
let lastBatchConfig: BatchConfig | null = null
|
|
||||||
/**
|
|
||||||
* Flush and shutdown the 1P event logger.
|
|
||||||
* This should be called as the final step before process exit to ensure
|
|
||||||
* all events (including late ones from API responses) are exported.
|
|
||||||
*/
|
|
||||||
export async function shutdown1PEventLogging(): Promise<void> {
|
export async function shutdown1PEventLogging(): Promise<void> {
|
||||||
if (!firstPartyEventLoggerProvider) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
try {
|
|
||||||
await firstPartyEventLoggerProvider.shutdown()
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging: final shutdown complete')
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Ignore shutdown errors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if 1P event logging is enabled.
|
|
||||||
* Respects the same opt-outs as other analytics sinks:
|
|
||||||
* - Test environment
|
|
||||||
* - Third-party cloud providers (Bedrock/Vertex)
|
|
||||||
* - Global telemetry opt-outs
|
|
||||||
* - Non-essential traffic disabled
|
|
||||||
*
|
|
||||||
* Note: Unlike BigQuery metrics, event logging does NOT check organization-level
|
|
||||||
* metrics opt-out via API. It follows the same pattern as Statsig event logging.
|
|
||||||
*/
|
|
||||||
export function is1PEventLoggingEnabled(): boolean {
|
export function is1PEventLoggingEnabled(): boolean {
|
||||||
// Respect standard analytics opt-outs
|
return false
|
||||||
return !isAnalyticsDisabled()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Log a 1st-party event for internal analytics (async version).
|
|
||||||
* Events are batched and exported to /api/event_logging/batch
|
|
||||||
*
|
|
||||||
* This enriches the event with core metadata (model, session, env context, etc.)
|
|
||||||
* at log time, similar to logEventToStatsig.
|
|
||||||
*
|
|
||||||
* @param eventName - Name of the event (e.g., 'tengu_api_query')
|
|
||||||
* @param metadata - Additional metadata for the event (intentionally no strings, to avoid accidentally logging code/filepaths)
|
|
||||||
*/
|
|
||||||
async function logEventTo1PAsync(
|
|
||||||
firstPartyEventLogger: Logger,
|
|
||||||
eventName: string,
|
|
||||||
metadata: Record<string, number | boolean | undefined> = {},
|
|
||||||
): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Enrich with core metadata at log time (similar to Statsig pattern)
|
|
||||||
const coreMetadata = await getEventMetadata({
|
|
||||||
model: metadata.model,
|
|
||||||
betas: metadata.betas,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Build attributes - OTel supports nested objects natively via AnyValueMap
|
|
||||||
// Cast through unknown since our nested objects are structurally compatible
|
|
||||||
// with AnyValue but TS doesn't recognize it due to missing index signatures
|
|
||||||
const attributes = {
|
|
||||||
event_name: eventName,
|
|
||||||
event_id: randomUUID(),
|
|
||||||
// Pass objects directly - no JSON serialization needed
|
|
||||||
core_metadata: coreMetadata,
|
|
||||||
user_metadata: getCoreUserData(true),
|
|
||||||
event_metadata: metadata,
|
|
||||||
} as unknown as AnyValueMap
|
|
||||||
|
|
||||||
// Add user_id if available
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
if (userId) {
|
|
||||||
attributes.user_id = userId
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug logging when debug mode is enabled
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`[ANT-ONLY] 1P event: ${eventName} ${jsonStringify(metadata, null, 0)}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit log record
|
|
||||||
firstPartyEventLogger.emit({
|
|
||||||
body: eventName,
|
|
||||||
attributes,
|
|
||||||
})
|
|
||||||
} catch (e) {
|
|
||||||
if (process.env.NODE_ENV === 'development') {
|
|
||||||
throw e
|
|
||||||
}
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logError(e as Error)
|
|
||||||
}
|
|
||||||
// swallow
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log a 1st-party event for internal analytics.
|
|
||||||
* Events are batched and exported to /api/event_logging/batch
|
|
||||||
*
|
|
||||||
* @param eventName - Name of the event (e.g., 'tengu_api_query')
|
|
||||||
* @param metadata - Additional metadata for the event (intentionally no strings, to avoid accidentally logging code/filepaths)
|
|
||||||
*/
|
|
||||||
export function logEventTo1P(
|
export function logEventTo1P(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
metadata: Record<string, number | boolean | undefined> = {},
|
_metadata: Record<string, number | boolean | undefined> = {},
|
||||||
): void {
|
): void {
|
||||||
if (!is1PEventLoggingEnabled()) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!firstPartyEventLogger || isSinkKilled('firstParty')) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fire and forget - don't block on metadata enrichment
|
|
||||||
void logEventTo1PAsync(firstPartyEventLogger, eventName, metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GrowthBook experiment event data for logging
|
|
||||||
*/
|
|
||||||
export type GrowthBookExperimentData = {
|
export type GrowthBookExperimentData = {
|
||||||
experimentId: string
|
experimentId: string
|
||||||
variationId: number
|
variationId: number
|
||||||
@@ -239,211 +43,16 @@ export type GrowthBookExperimentData = {
|
|||||||
experimentMetadata?: Record<string, unknown>
|
experimentMetadata?: Record<string, unknown>
|
||||||
}
|
}
|
||||||
|
|
||||||
// api.anthropic.com only serves the "production" GrowthBook environment
|
|
||||||
// (see starling/starling/cli/cli.py DEFAULT_ENVIRONMENTS). Staging and
|
|
||||||
// development environments are not exported to the prod API.
|
|
||||||
function getEnvironmentForGrowthBook(): string {
|
|
||||||
return 'production'
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log a GrowthBook experiment assignment event to 1P.
|
|
||||||
* Events are batched and exported to /api/event_logging/batch
|
|
||||||
*
|
|
||||||
* @param data - GrowthBook experiment assignment data
|
|
||||||
*/
|
|
||||||
export function logGrowthBookExperimentTo1P(
|
export function logGrowthBookExperimentTo1P(
|
||||||
data: GrowthBookExperimentData,
|
_data: GrowthBookExperimentData,
|
||||||
): void {
|
): void {
|
||||||
if (!is1PEventLoggingEnabled()) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!firstPartyEventLogger || isSinkKilled('firstParty')) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const userId = getOrCreateUserID()
|
|
||||||
const { accountUuid, organizationUuid } = getCoreUserData(true)
|
|
||||||
|
|
||||||
// Build attributes for GrowthbookExperimentEvent
|
|
||||||
const attributes = {
|
|
||||||
event_type: 'GrowthbookExperimentEvent',
|
|
||||||
event_id: randomUUID(),
|
|
||||||
experiment_id: data.experimentId,
|
|
||||||
variation_id: data.variationId,
|
|
||||||
...(userId && { device_id: userId }),
|
|
||||||
...(accountUuid && { account_uuid: accountUuid }),
|
|
||||||
...(organizationUuid && { organization_uuid: organizationUuid }),
|
|
||||||
...(data.userAttributes && {
|
|
||||||
session_id: data.userAttributes.sessionId,
|
|
||||||
user_attributes: jsonStringify(data.userAttributes),
|
|
||||||
}),
|
|
||||||
...(data.experimentMetadata && {
|
|
||||||
experiment_metadata: jsonStringify(data.experimentMetadata),
|
|
||||||
}),
|
|
||||||
environment: getEnvironmentForGrowthBook(),
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`[ANT-ONLY] 1P GrowthBook experiment: ${data.experimentId} variation=${data.variationId}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
firstPartyEventLogger.emit({
|
|
||||||
body: 'growthbook_experiment',
|
|
||||||
attributes,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 10000
|
|
||||||
const DEFAULT_MAX_EXPORT_BATCH_SIZE = 200
|
|
||||||
const DEFAULT_MAX_QUEUE_SIZE = 8192
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize 1P event logging infrastructure.
|
|
||||||
* This creates a separate LoggerProvider for internal event logging,
|
|
||||||
* independent of customer OTLP telemetry.
|
|
||||||
*
|
|
||||||
* This uses its own minimal resource configuration with just the attributes
|
|
||||||
* we need for internal analytics (service name, version, platform info).
|
|
||||||
*/
|
|
||||||
export function initialize1PEventLogging(): void {
|
export function initialize1PEventLogging(): void {
|
||||||
profileCheckpoint('1p_event_logging_start')
|
|
||||||
const enabled = is1PEventLoggingEnabled()
|
|
||||||
|
|
||||||
if (!enabled) {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging('1P event logging not enabled')
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch batch processor configuration from GrowthBook dynamic config
|
|
||||||
// Uses cached value if available, refreshes in background
|
|
||||||
const batchConfig = getBatchConfig()
|
|
||||||
lastBatchConfig = batchConfig
|
|
||||||
profileCheckpoint('1p_event_after_growthbook_config')
|
|
||||||
|
|
||||||
const scheduledDelayMillis =
|
|
||||||
batchConfig.scheduledDelayMillis ||
|
|
||||||
parseInt(
|
|
||||||
process.env.OTEL_LOGS_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
)
|
|
||||||
|
|
||||||
const maxExportBatchSize =
|
|
||||||
batchConfig.maxExportBatchSize || DEFAULT_MAX_EXPORT_BATCH_SIZE
|
|
||||||
|
|
||||||
const maxQueueSize = batchConfig.maxQueueSize || DEFAULT_MAX_QUEUE_SIZE
|
|
||||||
|
|
||||||
// Build our own resource for 1P event logging with minimal attributes
|
|
||||||
const platform = getPlatform()
|
|
||||||
const attributes: Record<string, string> = {
|
|
||||||
[ATTR_SERVICE_NAME]: 'claude-code',
|
|
||||||
[ATTR_SERVICE_VERSION]: MACRO.VERSION,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add WSL-specific attributes if running on WSL
|
|
||||||
if (platform === 'wsl') {
|
|
||||||
const wslVersion = getWslVersion()
|
|
||||||
if (wslVersion) {
|
|
||||||
attributes['wsl.version'] = wslVersion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const resource = resourceFromAttributes(attributes)
|
|
||||||
|
|
||||||
// Create a new LoggerProvider with the EventLoggingExporter
|
|
||||||
// NOTE: This is kept separate from customer telemetry logs to ensure
|
|
||||||
// internal events don't leak to customer endpoints and vice versa.
|
|
||||||
// We don't register this globally - it's only used for internal event logging.
|
|
||||||
const eventLoggingExporter = new FirstPartyEventLoggingExporter({
|
|
||||||
maxBatchSize: maxExportBatchSize,
|
|
||||||
skipAuth: batchConfig.skipAuth,
|
|
||||||
maxAttempts: batchConfig.maxAttempts,
|
|
||||||
path: batchConfig.path,
|
|
||||||
baseUrl: batchConfig.baseUrl,
|
|
||||||
isKilled: () => isSinkKilled('firstParty'),
|
|
||||||
})
|
|
||||||
firstPartyEventLoggerProvider = new LoggerProvider({
|
|
||||||
resource,
|
|
||||||
processors: [
|
|
||||||
new BatchLogRecordProcessor(eventLoggingExporter, {
|
|
||||||
scheduledDelayMillis,
|
|
||||||
maxExportBatchSize,
|
|
||||||
maxQueueSize,
|
|
||||||
}),
|
|
||||||
],
|
|
||||||
})
|
|
||||||
|
|
||||||
// Initialize event logger from our internal provider (NOT from global API)
|
|
||||||
// IMPORTANT: We must get the logger from our local provider, not logs.getLogger()
|
|
||||||
// because logs.getLogger() returns a logger from the global provider, which is
|
|
||||||
// separate and used for customer telemetry.
|
|
||||||
firstPartyEventLogger = firstPartyEventLoggerProvider.getLogger(
|
|
||||||
'com.anthropic.claude_code.events',
|
|
||||||
MACRO.VERSION,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rebuild the 1P event logging pipeline if the batch config changed.
|
|
||||||
* Register this with onGrowthBookRefresh so long-running sessions pick up
|
|
||||||
* changes to batch size, delay, endpoint, etc.
|
|
||||||
*
|
|
||||||
* Event-loss safety:
|
|
||||||
* 1. Null the logger first — concurrent logEventTo1P() calls hit the
|
|
||||||
* !firstPartyEventLogger guard and bail during the swap window. This drops
|
|
||||||
* a handful of events but prevents emitting to a draining provider.
|
|
||||||
* 2. forceFlush() drains the old BatchLogRecordProcessor buffer to the
|
|
||||||
* exporter. Export failures go to disk at getCurrentBatchFilePath() which
|
|
||||||
* is keyed by module-level BATCH_UUID + sessionId — unchanged across
|
|
||||||
* reinit — so the NEW exporter's disk-backed retry picks them up.
|
|
||||||
* 3. Swap to new provider/logger; old provider shutdown runs in background
|
|
||||||
* (buffer already drained, just cleanup).
|
|
||||||
*/
|
|
||||||
export async function reinitialize1PEventLoggingIfConfigChanged(): Promise<void> {
|
export async function reinitialize1PEventLoggingIfConfigChanged(): Promise<void> {
|
||||||
if (!is1PEventLoggingEnabled() || !firstPartyEventLoggerProvider) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const newConfig = getBatchConfig()
|
|
||||||
|
|
||||||
if (isEqual(newConfig, lastBatchConfig)) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
logForDebugging(
|
|
||||||
`1P event logging: ${BATCH_CONFIG_NAME} changed, reinitializing`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const oldProvider = firstPartyEventLoggerProvider
|
|
||||||
const oldLogger = firstPartyEventLogger
|
|
||||||
firstPartyEventLogger = null
|
|
||||||
|
|
||||||
try {
|
|
||||||
await oldProvider.forceFlush()
|
|
||||||
} catch {
|
|
||||||
// Export failures are already on disk; new exporter will retry them.
|
|
||||||
}
|
|
||||||
|
|
||||||
firstPartyEventLoggerProvider = null
|
|
||||||
try {
|
|
||||||
initialize1PEventLogging()
|
|
||||||
} catch (e) {
|
|
||||||
// Restore so the next GrowthBook refresh can retry. oldProvider was
|
|
||||||
// only forceFlush()'d, not shut down — it's still functional. Without
|
|
||||||
// this, both stay null and the !firstPartyEventLoggerProvider gate at
|
|
||||||
// the top makes recovery impossible.
|
|
||||||
firstPartyEventLoggerProvider = oldProvider
|
|
||||||
firstPartyEventLogger = oldLogger
|
|
||||||
logError(e)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
void oldProvider.shutdown().catch(() => {})
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ import {
|
|||||||
getUserForGrowthBook,
|
getUserForGrowthBook,
|
||||||
} from '../../utils/user.js'
|
} from '../../utils/user.js'
|
||||||
import {
|
import {
|
||||||
is1PEventLoggingEnabled,
|
|
||||||
logGrowthBookExperimentTo1P,
|
logGrowthBookExperimentTo1P,
|
||||||
} from './firstPartyEventLogger.js'
|
} from './firstPartyEventLogger.js'
|
||||||
|
|
||||||
@@ -219,6 +218,19 @@ function getConfigOverrides(): Record<string, unknown> | undefined {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getCachedGrowthBookFeature<T>(feature: string): T | undefined {
|
||||||
|
if (remoteEvalFeatureValues.has(feature)) {
|
||||||
|
return remoteEvalFeatureValues.get(feature) as T
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const cached = getGlobalConfig().cachedGrowthBookFeatures?.[feature]
|
||||||
|
return cached !== undefined ? (cached as T) : undefined
|
||||||
|
} catch {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enumerate all known GrowthBook features and their current resolved values
|
* Enumerate all known GrowthBook features and their current resolved values
|
||||||
* (not including overrides). In-memory payload first, disk cache fallback —
|
* (not including overrides). In-memory payload first, disk cache fallback —
|
||||||
@@ -420,8 +432,9 @@ function syncRemoteEvalToDisk(): void {
|
|||||||
* Check if GrowthBook operations should be enabled
|
* Check if GrowthBook operations should be enabled
|
||||||
*/
|
*/
|
||||||
function isGrowthBookEnabled(): boolean {
|
function isGrowthBookEnabled(): boolean {
|
||||||
// GrowthBook depends on 1P event logging.
|
// Network-backed GrowthBook egress is disabled in this build. Callers still
|
||||||
return is1PEventLoggingEnabled()
|
// read local cache and explicit overrides through the helpers below.
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -682,6 +695,11 @@ async function getFeatureValueInternal<T>(
|
|||||||
return configOverrides[feature] as T
|
return configOverrides[feature] as T
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const cached = getCachedGrowthBookFeature<T>(feature)
|
||||||
|
if (cached !== undefined) {
|
||||||
|
return cached
|
||||||
|
}
|
||||||
|
|
||||||
if (!isGrowthBookEnabled()) {
|
if (!isGrowthBookEnabled()) {
|
||||||
return defaultValue
|
return defaultValue
|
||||||
}
|
}
|
||||||
@@ -745,6 +763,11 @@ export function getFeatureValue_CACHED_MAY_BE_STALE<T>(
|
|||||||
return configOverrides[feature] as T
|
return configOverrides[feature] as T
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const cached = getCachedGrowthBookFeature<T>(feature)
|
||||||
|
if (cached !== undefined) {
|
||||||
|
return cached
|
||||||
|
}
|
||||||
|
|
||||||
if (!isGrowthBookEnabled()) {
|
if (!isGrowthBookEnabled()) {
|
||||||
return defaultValue
|
return defaultValue
|
||||||
}
|
}
|
||||||
@@ -814,6 +837,16 @@ export function checkStatsigFeatureGate_CACHED_MAY_BE_STALE(
|
|||||||
return Boolean(configOverrides[gate])
|
return Boolean(configOverrides[gate])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const cached = getCachedGrowthBookFeature<boolean>(gate)
|
||||||
|
if (cached !== undefined) {
|
||||||
|
return Boolean(cached)
|
||||||
|
}
|
||||||
|
|
||||||
|
const statsigCached = getGlobalConfig().cachedStatsigGates?.[gate]
|
||||||
|
if (statsigCached !== undefined) {
|
||||||
|
return Boolean(statsigCached)
|
||||||
|
}
|
||||||
|
|
||||||
if (!isGrowthBookEnabled()) {
|
if (!isGrowthBookEnabled()) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -861,6 +894,16 @@ export async function checkSecurityRestrictionGate(
|
|||||||
return Boolean(configOverrides[gate])
|
return Boolean(configOverrides[gate])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const cached = getCachedGrowthBookFeature<boolean>(gate)
|
||||||
|
if (cached !== undefined) {
|
||||||
|
return Boolean(cached)
|
||||||
|
}
|
||||||
|
|
||||||
|
const statsigCached = getGlobalConfig().cachedStatsigGates?.[gate]
|
||||||
|
if (statsigCached !== undefined) {
|
||||||
|
return Boolean(statsigCached)
|
||||||
|
}
|
||||||
|
|
||||||
if (!isGrowthBookEnabled()) {
|
if (!isGrowthBookEnabled()) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -871,19 +914,6 @@ export async function checkSecurityRestrictionGate(
|
|||||||
await reinitializingPromise
|
await reinitializingPromise
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check Statsig cache first - it may have correct value from previous logged-in session
|
|
||||||
const config = getGlobalConfig()
|
|
||||||
const statsigCached = config.cachedStatsigGates?.[gate]
|
|
||||||
if (statsigCached !== undefined) {
|
|
||||||
return Boolean(statsigCached)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then check GrowthBook cache
|
|
||||||
const gbCached = config.cachedGrowthBookFeatures?.[gate]
|
|
||||||
if (gbCached !== undefined) {
|
|
||||||
return Boolean(gbCached)
|
|
||||||
}
|
|
||||||
|
|
||||||
// No cache - return false (don't block on init for uncached gates)
|
// No cache - return false (don't block on init for uncached gates)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -914,13 +944,23 @@ export async function checkGate_CACHED_OR_BLOCKING(
|
|||||||
return Boolean(configOverrides[gate])
|
return Boolean(configOverrides[gate])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const cached = getCachedGrowthBookFeature<boolean>(gate)
|
||||||
|
if (cached !== undefined) {
|
||||||
|
return Boolean(cached)
|
||||||
|
}
|
||||||
|
|
||||||
|
const statsigCached = getGlobalConfig().cachedStatsigGates?.[gate]
|
||||||
|
if (statsigCached !== undefined) {
|
||||||
|
return Boolean(statsigCached)
|
||||||
|
}
|
||||||
|
|
||||||
if (!isGrowthBookEnabled()) {
|
if (!isGrowthBookEnabled()) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fast path: disk cache already says true — trust it
|
// Fast path: disk cache already says true — trust it
|
||||||
const cached = getGlobalConfig().cachedGrowthBookFeatures?.[gate]
|
const diskCached = getGlobalConfig().cachedGrowthBookFeatures?.[gate]
|
||||||
if (cached === true) {
|
if (diskCached === true) {
|
||||||
// Log experiment exposure if data is available, otherwise defer
|
// Log experiment exposure if data is available, otherwise defer
|
||||||
if (experimentDataByFeature.has(gate)) {
|
if (experimentDataByFeature.has(gate)) {
|
||||||
logExposureForFeature(gate)
|
logExposureForFeature(gate)
|
||||||
|
|||||||
@@ -1,111 +1,32 @@
|
|||||||
/**
|
/**
|
||||||
* Analytics sink implementation
|
* Analytics sink implementation
|
||||||
*
|
*
|
||||||
* This module contains the actual analytics routing logic and should be
|
* This open build keeps the analytics sink boundary for compatibility, but
|
||||||
* initialized during app startup. It routes events to Datadog and 1P event
|
* drops all queued analytics events locally instead of routing them onward.
|
||||||
* logging.
|
|
||||||
*
|
|
||||||
* Usage: Call initializeAnalyticsSink() during app startup to attach the sink.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { trackDatadogEvent } from './datadog.js'
|
import { attachAnalyticsSink } from './index.js'
|
||||||
import { logEventTo1P, shouldSampleEvent } from './firstPartyEventLogger.js'
|
|
||||||
import { checkStatsigFeatureGate_CACHED_MAY_BE_STALE } from './growthbook.js'
|
|
||||||
import { attachAnalyticsSink, stripProtoFields } from './index.js'
|
|
||||||
import { isSinkKilled } from './sinkKillswitch.js'
|
|
||||||
|
|
||||||
// Local type matching the logEvent metadata signature
|
|
||||||
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
||||||
|
|
||||||
const DATADOG_GATE_NAME = 'tengu_log_datadog_events'
|
function logEventImpl(
|
||||||
|
_eventName: string,
|
||||||
// Module-level gate state - starts undefined, initialized during startup
|
_metadata: LogEventMetadata,
|
||||||
let isDatadogGateEnabled: boolean | undefined = undefined
|
): void {
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if Datadog tracking is enabled.
|
|
||||||
* Falls back to cached value from previous session if not yet initialized.
|
|
||||||
*/
|
|
||||||
function shouldTrackDatadog(): boolean {
|
|
||||||
if (isSinkKilled('datadog')) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if (isDatadogGateEnabled !== undefined) {
|
|
||||||
return isDatadogGateEnabled
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to cached value from previous session
|
|
||||||
try {
|
|
||||||
return checkStatsigFeatureGate_CACHED_MAY_BE_STALE(DATADOG_GATE_NAME)
|
|
||||||
} catch {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log an event (synchronous implementation)
|
|
||||||
*/
|
|
||||||
function logEventImpl(eventName: string, metadata: LogEventMetadata): void {
|
|
||||||
// Check if this event should be sampled
|
|
||||||
const sampleResult = shouldSampleEvent(eventName)
|
|
||||||
|
|
||||||
// If sample result is 0, the event was not selected for logging
|
|
||||||
if (sampleResult === 0) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// If sample result is a positive number, add it to metadata
|
|
||||||
const metadataWithSampleRate =
|
|
||||||
sampleResult !== null
|
|
||||||
? { ...metadata, sample_rate: sampleResult }
|
|
||||||
: metadata
|
|
||||||
|
|
||||||
if (shouldTrackDatadog()) {
|
|
||||||
// Datadog is a general-access backend — strip _PROTO_* keys
|
|
||||||
// (unredacted PII-tagged values meant only for the 1P privileged column).
|
|
||||||
void trackDatadogEvent(eventName, stripProtoFields(metadataWithSampleRate))
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1P receives the full payload including _PROTO_* — the exporter
|
|
||||||
// destructures and routes those keys to proto fields itself.
|
|
||||||
logEventTo1P(eventName, metadataWithSampleRate)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Log an event (asynchronous implementation)
|
|
||||||
*
|
|
||||||
* With Segment removed the two remaining sinks are fire-and-forget, so this
|
|
||||||
* just wraps the sync impl — kept to preserve the sink interface contract.
|
|
||||||
*/
|
|
||||||
function logEventAsyncImpl(
|
function logEventAsyncImpl(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
metadata: LogEventMetadata,
|
_metadata: LogEventMetadata,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
logEventImpl(eventName, metadata)
|
|
||||||
return Promise.resolve()
|
return Promise.resolve()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize analytics gates during startup.
|
|
||||||
*
|
|
||||||
* Updates gate values from server. Early events use cached values from previous
|
|
||||||
* session to avoid data loss during initialization.
|
|
||||||
*
|
|
||||||
* Called from main.tsx during setupBackend().
|
|
||||||
*/
|
|
||||||
export function initializeAnalyticsGates(): void {
|
export function initializeAnalyticsGates(): void {
|
||||||
isDatadogGateEnabled =
|
return
|
||||||
checkStatsigFeatureGate_CACHED_MAY_BE_STALE(DATADOG_GATE_NAME)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize the analytics sink.
|
|
||||||
*
|
|
||||||
* Call this during app startup to attach the analytics backend.
|
|
||||||
* Any events logged before this is called will be queued and drained.
|
|
||||||
*
|
|
||||||
* Idempotent: safe to call multiple times (subsequent calls are no-ops).
|
|
||||||
*/
|
|
||||||
export function initializeAnalyticsSink(): void {
|
export function initializeAnalyticsSink(): void {
|
||||||
attachAnalyticsSink({
|
attachAnalyticsSink({
|
||||||
logEvent: logEventImpl,
|
logEvent: logEventImpl,
|
||||||
|
|||||||
@@ -1,75 +1,14 @@
|
|||||||
import type { Attributes } from '@opentelemetry/api'
|
/**
|
||||||
import { getEventLogger, getPromptId } from 'src/bootstrap/state.js'
|
* OpenTelemetry event egress is disabled in this build.
|
||||||
import { logForDebugging } from '../debug.js'
|
*/
|
||||||
import { isEnvTruthy } from '../envUtils.js'
|
|
||||||
import { getTelemetryAttributes } from '../telemetryAttributes.js'
|
|
||||||
|
|
||||||
// Monotonically increasing counter for ordering events within a session
|
export function redactIfDisabled(_content: string): string {
|
||||||
let eventSequence = 0
|
return '<REDACTED>'
|
||||||
|
|
||||||
// Track whether we've already warned about a null event logger to avoid spamming
|
|
||||||
let hasWarnedNoEventLogger = false
|
|
||||||
|
|
||||||
function isUserPromptLoggingEnabled() {
|
|
||||||
return isEnvTruthy(process.env.OTEL_LOG_USER_PROMPTS)
|
|
||||||
}
|
|
||||||
|
|
||||||
export function redactIfDisabled(content: string): string {
|
|
||||||
return isUserPromptLoggingEnabled() ? content : '<REDACTED>'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function logOTelEvent(
|
export async function logOTelEvent(
|
||||||
eventName: string,
|
_eventName: string,
|
||||||
metadata: { [key: string]: string | undefined } = {},
|
_metadata: { [key: string]: string | undefined } = {},
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const eventLogger = getEventLogger()
|
|
||||||
if (!eventLogger) {
|
|
||||||
if (!hasWarnedNoEventLogger) {
|
|
||||||
hasWarnedNoEventLogger = true
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Event dropped (no event logger initialized): ${eventName}`,
|
|
||||||
{ level: 'warn' },
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip logging in test environment
|
|
||||||
if (process.env.NODE_ENV === 'test') {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const attributes: Attributes = {
|
|
||||||
...getTelemetryAttributes(),
|
|
||||||
'event.name': eventName,
|
|
||||||
'event.timestamp': new Date().toISOString(),
|
|
||||||
'event.sequence': eventSequence++,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add prompt ID to events (but not metrics, where it would cause unbounded cardinality)
|
|
||||||
const promptId = getPromptId()
|
|
||||||
if (promptId) {
|
|
||||||
attributes['prompt.id'] = promptId
|
|
||||||
}
|
|
||||||
|
|
||||||
// Workspace directory from the desktop app (host path). Events only —
|
|
||||||
// filesystem paths are too high-cardinality for metric dimensions, and
|
|
||||||
// the BQ metrics pipeline must never see them.
|
|
||||||
const workspaceDir = process.env.CLAUDE_CODE_WORKSPACE_HOST_PATHS
|
|
||||||
if (workspaceDir) {
|
|
||||||
attributes['workspace.host_paths'] = workspaceDir.split('|')
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add metadata as attributes - all values are already strings
|
|
||||||
for (const [key, value] of Object.entries(metadata)) {
|
|
||||||
if (value !== undefined) {
|
|
||||||
attributes[key] = value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit log record as an event
|
|
||||||
eventLogger.emit({
|
|
||||||
body: `claude_code.${eventName}`,
|
|
||||||
attributes,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,123 +1,5 @@
|
|||||||
import { DiagLogLevel, diag, trace } from '@opentelemetry/api'
|
export function bootstrapTelemetry(): void {}
|
||||||
import { logs } from '@opentelemetry/api-logs'
|
|
||||||
// OTLP/Prometheus exporters are dynamically imported inside the protocol
|
|
||||||
// switch statements below. A process uses at most one protocol variant per
|
|
||||||
// signal, but static imports would load all 6 (~1.2MB) on every startup.
|
|
||||||
import {
|
|
||||||
envDetector,
|
|
||||||
hostDetector,
|
|
||||||
osDetector,
|
|
||||||
resourceFromAttributes,
|
|
||||||
} from '@opentelemetry/resources'
|
|
||||||
import {
|
|
||||||
BatchLogRecordProcessor,
|
|
||||||
ConsoleLogRecordExporter,
|
|
||||||
LoggerProvider,
|
|
||||||
} from '@opentelemetry/sdk-logs'
|
|
||||||
import {
|
|
||||||
ConsoleMetricExporter,
|
|
||||||
MeterProvider,
|
|
||||||
PeriodicExportingMetricReader,
|
|
||||||
} from '@opentelemetry/sdk-metrics'
|
|
||||||
import {
|
|
||||||
BasicTracerProvider,
|
|
||||||
BatchSpanProcessor,
|
|
||||||
ConsoleSpanExporter,
|
|
||||||
} from '@opentelemetry/sdk-trace-base'
|
|
||||||
import {
|
|
||||||
ATTR_SERVICE_NAME,
|
|
||||||
ATTR_SERVICE_VERSION,
|
|
||||||
SEMRESATTRS_HOST_ARCH,
|
|
||||||
} from '@opentelemetry/semantic-conventions'
|
|
||||||
import { HttpsProxyAgent } from 'https-proxy-agent'
|
|
||||||
import {
|
|
||||||
getLoggerProvider,
|
|
||||||
getMeterProvider,
|
|
||||||
getTracerProvider,
|
|
||||||
setEventLogger,
|
|
||||||
setLoggerProvider,
|
|
||||||
setMeterProvider,
|
|
||||||
setTracerProvider,
|
|
||||||
} from 'src/bootstrap/state.js'
|
|
||||||
import {
|
|
||||||
getOtelHeadersFromHelper,
|
|
||||||
getSubscriptionType,
|
|
||||||
is1PApiCustomer,
|
|
||||||
isClaudeAISubscriber,
|
|
||||||
} from 'src/utils/auth.js'
|
|
||||||
import { getPlatform, getWslVersion } from 'src/utils/platform.js'
|
|
||||||
|
|
||||||
import { getCACertificates } from '../caCerts.js'
|
|
||||||
import { registerCleanup } from '../cleanupRegistry.js'
|
|
||||||
import { getHasFormattedOutput, logForDebugging } from '../debug.js'
|
|
||||||
import { isEnvTruthy } from '../envUtils.js'
|
|
||||||
import { errorMessage } from '../errors.js'
|
|
||||||
import { getMTLSConfig } from '../mtls.js'
|
|
||||||
import { getProxyUrl, shouldBypassProxy } from '../proxy.js'
|
|
||||||
import { getSettings_DEPRECATED } from '../settings/settings.js'
|
|
||||||
import { jsonStringify } from '../slowOperations.js'
|
|
||||||
import { profileCheckpoint } from '../startupProfiler.js'
|
|
||||||
import { isBetaTracingEnabled } from './betaSessionTracing.js'
|
|
||||||
import { BigQueryMetricsExporter } from './bigqueryExporter.js'
|
|
||||||
import { ClaudeCodeDiagLogger } from './logger.js'
|
|
||||||
import { initializePerfettoTracing } from './perfettoTracing.js'
|
|
||||||
import {
|
|
||||||
endInteractionSpan,
|
|
||||||
isEnhancedTelemetryEnabled,
|
|
||||||
} from './sessionTracing.js'
|
|
||||||
|
|
||||||
const DEFAULT_METRICS_EXPORT_INTERVAL_MS = 60000
|
|
||||||
const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 5000
|
|
||||||
const DEFAULT_TRACES_EXPORT_INTERVAL_MS = 5000
|
|
||||||
|
|
||||||
class TelemetryTimeoutError extends Error {}
|
|
||||||
|
|
||||||
function telemetryTimeout(ms: number, message: string): Promise<never> {
|
|
||||||
return new Promise((_, reject) => {
|
|
||||||
setTimeout(
|
|
||||||
(rej: (e: Error) => void, msg: string) =>
|
|
||||||
rej(new TelemetryTimeoutError(msg)),
|
|
||||||
ms,
|
|
||||||
reject,
|
|
||||||
message,
|
|
||||||
).unref()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
export function bootstrapTelemetry() {
|
|
||||||
if (process.env.USER_TYPE === 'ant') {
|
|
||||||
// Read from ANT_ prefixed variables that are defined at build time
|
|
||||||
if (process.env.ANT_OTEL_METRICS_EXPORTER) {
|
|
||||||
process.env.OTEL_METRICS_EXPORTER = process.env.ANT_OTEL_METRICS_EXPORTER
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_LOGS_EXPORTER) {
|
|
||||||
process.env.OTEL_LOGS_EXPORTER = process.env.ANT_OTEL_LOGS_EXPORTER
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_TRACES_EXPORTER) {
|
|
||||||
process.env.OTEL_TRACES_EXPORTER = process.env.ANT_OTEL_TRACES_EXPORTER
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL =
|
|
||||||
process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_ENDPOINT =
|
|
||||||
process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT
|
|
||||||
}
|
|
||||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_HEADERS =
|
|
||||||
process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set default tempoality to 'delta' because it's the more sane default
|
|
||||||
if (!process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE) {
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = 'delta'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per OTEL spec, "none" means "no automatically configured exporter for this signal".
|
|
||||||
// https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#exporter-selection
|
|
||||||
export function parseExporterTypes(value: string | undefined): string[] {
|
export function parseExporterTypes(value: string | undefined): string[] {
|
||||||
return (value || '')
|
return (value || '')
|
||||||
.trim()
|
.trim()
|
||||||
@@ -127,699 +9,14 @@ export function parseExporterTypes(value: string | undefined): string[] {
|
|||||||
.filter(t => t !== 'none')
|
.filter(t => t !== 'none')
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getOtlpReaders() {
|
export function isTelemetryEnabled(): boolean {
|
||||||
const exporterTypes = parseExporterTypes(process.env.OTEL_METRICS_EXPORTER)
|
return false
|
||||||
const exportInterval = parseInt(
|
|
||||||
process.env.OTEL_METRIC_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_METRICS_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
)
|
|
||||||
|
|
||||||
const exporters = []
|
|
||||||
for (const exporterType of exporterTypes) {
|
|
||||||
if (exporterType === 'console') {
|
|
||||||
// Custom console exporter that shows resource attributes
|
|
||||||
const consoleExporter = new ConsoleMetricExporter()
|
|
||||||
const originalExport = consoleExporter.export.bind(consoleExporter)
|
|
||||||
|
|
||||||
consoleExporter.export = (metrics, callback) => {
|
|
||||||
// Log resource attributes once at the start
|
|
||||||
if (metrics.resource && metrics.resource.attributes) {
|
|
||||||
// The console exporter is for debugging, so console output is intentional here
|
|
||||||
|
|
||||||
logForDebugging('\n=== Resource Attributes ===')
|
|
||||||
logForDebugging(jsonStringify(metrics.resource.attributes))
|
|
||||||
logForDebugging('===========================\n')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return originalExport(metrics, callback)
|
export async function initializeTelemetry(): Promise<null> {
|
||||||
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
exporters.push(consoleExporter)
|
|
||||||
} else if (exporterType === 'otlp') {
|
|
||||||
const protocol =
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL?.trim() ||
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
|
||||||
|
|
||||||
const httpConfig = getOTLPExporterConfig()
|
|
||||||
|
|
||||||
switch (protocol) {
|
|
||||||
case 'grpc': {
|
|
||||||
// Lazy-import to keep @grpc/grpc-js (~700KB) out of the telemetry chunk
|
|
||||||
// when the protocol is http/protobuf (ant default) or http/json.
|
|
||||||
const { OTLPMetricExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-metrics-otlp-grpc'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPMetricExporter())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/json': {
|
|
||||||
const { OTLPMetricExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-metrics-otlp-http'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPMetricExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/protobuf': {
|
|
||||||
const { OTLPMetricExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-metrics-otlp-proto'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPMetricExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error(
|
|
||||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else if (exporterType === 'prometheus') {
|
|
||||||
const { PrometheusExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-prometheus'
|
|
||||||
)
|
|
||||||
exporters.push(new PrometheusExporter())
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Unknown exporter type set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${exporterType}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return exporters.map(exporter => {
|
|
||||||
if ('export' in exporter) {
|
|
||||||
return new PeriodicExportingMetricReader({
|
|
||||||
exporter,
|
|
||||||
exportIntervalMillis: exportInterval,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return exporter
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getOtlpLogExporters() {
|
|
||||||
const exporterTypes = parseExporterTypes(process.env.OTEL_LOGS_EXPORTER)
|
|
||||||
|
|
||||||
const protocol =
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_LOGS_PROTOCOL?.trim() ||
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
|
||||||
const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
|
||||||
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] getOtlpLogExporters: types=${jsonStringify(exporterTypes)}, protocol=${protocol}, endpoint=${endpoint}`,
|
|
||||||
)
|
|
||||||
|
|
||||||
const exporters = []
|
|
||||||
for (const exporterType of exporterTypes) {
|
|
||||||
if (exporterType === 'console') {
|
|
||||||
exporters.push(new ConsoleLogRecordExporter())
|
|
||||||
} else if (exporterType === 'otlp') {
|
|
||||||
const httpConfig = getOTLPExporterConfig()
|
|
||||||
|
|
||||||
switch (protocol) {
|
|
||||||
case 'grpc': {
|
|
||||||
const { OTLPLogExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-logs-otlp-grpc'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPLogExporter())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/json': {
|
|
||||||
const { OTLPLogExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-logs-otlp-http'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPLogExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/protobuf': {
|
|
||||||
const { OTLPLogExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-logs-otlp-proto'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPLogExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error(
|
|
||||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_LOGS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Unknown exporter type set in OTEL_LOGS_EXPORTER env var: ${exporterType}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return exporters
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getOtlpTraceExporters() {
|
|
||||||
const exporterTypes = parseExporterTypes(process.env.OTEL_TRACES_EXPORTER)
|
|
||||||
|
|
||||||
const exporters = []
|
|
||||||
for (const exporterType of exporterTypes) {
|
|
||||||
if (exporterType === 'console') {
|
|
||||||
exporters.push(new ConsoleSpanExporter())
|
|
||||||
} else if (exporterType === 'otlp') {
|
|
||||||
const protocol =
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_TRACES_PROTOCOL?.trim() ||
|
|
||||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
|
||||||
|
|
||||||
const httpConfig = getOTLPExporterConfig()
|
|
||||||
|
|
||||||
switch (protocol) {
|
|
||||||
case 'grpc': {
|
|
||||||
const { OTLPTraceExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-trace-otlp-grpc'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPTraceExporter())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/json': {
|
|
||||||
const { OTLPTraceExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-trace-otlp-http'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPTraceExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case 'http/protobuf': {
|
|
||||||
const { OTLPTraceExporter } = await import(
|
|
||||||
'@opentelemetry/exporter-trace-otlp-proto'
|
|
||||||
)
|
|
||||||
exporters.push(new OTLPTraceExporter(httpConfig))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error(
|
|
||||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_TRACES_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Unknown exporter type set in OTEL_TRACES_EXPORTER env var: ${exporterType}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return exporters
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isTelemetryEnabled() {
|
|
||||||
return isEnvTruthy(process.env.CLAUDE_CODE_ENABLE_TELEMETRY)
|
|
||||||
}
|
|
||||||
|
|
||||||
function getBigQueryExportingReader() {
|
|
||||||
const bigqueryExporter = new BigQueryMetricsExporter()
|
|
||||||
return new PeriodicExportingMetricReader({
|
|
||||||
exporter: bigqueryExporter,
|
|
||||||
exportIntervalMillis: 5 * 60 * 1000, // 5mins for BigQuery metrics exporter to reduce load
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function isBigQueryMetricsEnabled() {
|
|
||||||
// BigQuery metrics are enabled for:
|
|
||||||
// 1. API customers (excluding Claude.ai subscribers and Bedrock/Vertex)
|
|
||||||
// 2. Claude for Enterprise (C4E) users
|
|
||||||
// 3. Claude for Teams users
|
|
||||||
const subscriptionType = getSubscriptionType()
|
|
||||||
const isC4EOrTeamUser =
|
|
||||||
isClaudeAISubscriber() &&
|
|
||||||
(subscriptionType === 'enterprise' || subscriptionType === 'team')
|
|
||||||
|
|
||||||
return is1PApiCustomer() || isC4EOrTeamUser
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize beta tracing - a separate code path for detailed debugging.
|
|
||||||
* Uses BETA_TRACING_ENDPOINT instead of OTEL_EXPORTER_OTLP_ENDPOINT.
|
|
||||||
*/
|
|
||||||
async function initializeBetaTracing(
|
|
||||||
resource: ReturnType<typeof resourceFromAttributes>,
|
|
||||||
): Promise<void> {
|
|
||||||
const endpoint = process.env.BETA_TRACING_ENDPOINT
|
|
||||||
if (!endpoint) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const [{ OTLPTraceExporter }, { OTLPLogExporter }] = await Promise.all([
|
|
||||||
import('@opentelemetry/exporter-trace-otlp-http'),
|
|
||||||
import('@opentelemetry/exporter-logs-otlp-http'),
|
|
||||||
])
|
|
||||||
|
|
||||||
const httpConfig = {
|
|
||||||
url: `${endpoint}/v1/traces`,
|
|
||||||
}
|
|
||||||
|
|
||||||
const logHttpConfig = {
|
|
||||||
url: `${endpoint}/v1/logs`,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize trace exporter
|
|
||||||
const traceExporter = new OTLPTraceExporter(httpConfig)
|
|
||||||
const spanProcessor = new BatchSpanProcessor(traceExporter, {
|
|
||||||
scheduledDelayMillis: DEFAULT_TRACES_EXPORT_INTERVAL_MS,
|
|
||||||
})
|
|
||||||
|
|
||||||
const tracerProvider = new BasicTracerProvider({
|
|
||||||
resource,
|
|
||||||
spanProcessors: [spanProcessor],
|
|
||||||
})
|
|
||||||
|
|
||||||
trace.setGlobalTracerProvider(tracerProvider)
|
|
||||||
setTracerProvider(tracerProvider)
|
|
||||||
|
|
||||||
// Initialize log exporter
|
|
||||||
const logExporter = new OTLPLogExporter(logHttpConfig)
|
|
||||||
const loggerProvider = new LoggerProvider({
|
|
||||||
resource,
|
|
||||||
processors: [
|
|
||||||
new BatchLogRecordProcessor(logExporter, {
|
|
||||||
scheduledDelayMillis: DEFAULT_LOGS_EXPORT_INTERVAL_MS,
|
|
||||||
}),
|
|
||||||
],
|
|
||||||
})
|
|
||||||
|
|
||||||
logs.setGlobalLoggerProvider(loggerProvider)
|
|
||||||
setLoggerProvider(loggerProvider)
|
|
||||||
|
|
||||||
// Initialize event logger
|
|
||||||
const eventLogger = logs.getLogger(
|
|
||||||
'com.anthropic.claude_code.events',
|
|
||||||
MACRO.VERSION,
|
|
||||||
)
|
|
||||||
setEventLogger(eventLogger)
|
|
||||||
|
|
||||||
// Setup flush handlers - flush both logs AND traces
|
|
||||||
process.on('beforeExit', async () => {
|
|
||||||
await loggerProvider?.forceFlush()
|
|
||||||
await tracerProvider?.forceFlush()
|
|
||||||
})
|
|
||||||
|
|
||||||
process.on('exit', () => {
|
|
||||||
void loggerProvider?.forceFlush()
|
|
||||||
void tracerProvider?.forceFlush()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function initializeTelemetry() {
|
|
||||||
profileCheckpoint('telemetry_init_start')
|
|
||||||
bootstrapTelemetry()
|
|
||||||
|
|
||||||
// Console exporters call console.dir on a timer (5s logs/traces, 60s
|
|
||||||
// metrics), writing pretty-printed objects to stdout. In stream-json
|
|
||||||
// mode stdout is the SDK message channel; the first line (`{`) breaks
|
|
||||||
// the SDK's line reader. Stripped here (not main.tsx) because init.ts
|
|
||||||
// re-runs applyConfigEnvironmentVariables() inside initializeTelemetry-
|
|
||||||
// AfterTrust for remote-managed-settings users, and bootstrapTelemetry
|
|
||||||
// above copies ANT_OTEL_* for ant users — both would undo an earlier strip.
|
|
||||||
if (getHasFormattedOutput()) {
|
|
||||||
for (const key of [
|
|
||||||
'OTEL_METRICS_EXPORTER',
|
|
||||||
'OTEL_LOGS_EXPORTER',
|
|
||||||
'OTEL_TRACES_EXPORTER',
|
|
||||||
] as const) {
|
|
||||||
const v = process.env[key]
|
|
||||||
if (v?.includes('console')) {
|
|
||||||
process.env[key] = v
|
|
||||||
.split(',')
|
|
||||||
.map(s => s.trim())
|
|
||||||
.filter(s => s !== 'console')
|
|
||||||
.join(',')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
diag.setLogger(new ClaudeCodeDiagLogger(), DiagLogLevel.ERROR)
|
|
||||||
|
|
||||||
// Initialize Perfetto tracing (independent of OTEL)
|
|
||||||
// Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path>
|
|
||||||
initializePerfettoTracing()
|
|
||||||
|
|
||||||
const readers = []
|
|
||||||
|
|
||||||
// Add customer exporters (if enabled)
|
|
||||||
const telemetryEnabled = isTelemetryEnabled()
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] isTelemetryEnabled=${telemetryEnabled} (CLAUDE_CODE_ENABLE_TELEMETRY=${process.env.CLAUDE_CODE_ENABLE_TELEMETRY})`,
|
|
||||||
)
|
|
||||||
if (telemetryEnabled) {
|
|
||||||
readers.push(...(await getOtlpReaders()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add BigQuery exporter (for API customers, C4E users, and internal users)
|
|
||||||
if (isBigQueryMetricsEnabled()) {
|
|
||||||
readers.push(getBigQueryExportingReader())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create base resource with service attributes
|
|
||||||
const platform = getPlatform()
|
|
||||||
const baseAttributes: Record<string, string> = {
|
|
||||||
[ATTR_SERVICE_NAME]: 'claude-code',
|
|
||||||
[ATTR_SERVICE_VERSION]: MACRO.VERSION,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add WSL-specific attributes if running on WSL
|
|
||||||
if (platform === 'wsl') {
|
|
||||||
const wslVersion = getWslVersion()
|
|
||||||
if (wslVersion) {
|
|
||||||
baseAttributes['wsl.version'] = wslVersion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const baseResource = resourceFromAttributes(baseAttributes)
|
|
||||||
|
|
||||||
// Use OpenTelemetry detectors
|
|
||||||
const osResource = resourceFromAttributes(
|
|
||||||
osDetector.detect().attributes || {},
|
|
||||||
)
|
|
||||||
|
|
||||||
// Extract only host.arch from hostDetector
|
|
||||||
const hostDetected = hostDetector.detect()
|
|
||||||
const hostArchAttributes = hostDetected.attributes?.[SEMRESATTRS_HOST_ARCH]
|
|
||||||
? {
|
|
||||||
[SEMRESATTRS_HOST_ARCH]: hostDetected.attributes[SEMRESATTRS_HOST_ARCH],
|
|
||||||
}
|
|
||||||
: {}
|
|
||||||
const hostArchResource = resourceFromAttributes(hostArchAttributes)
|
|
||||||
|
|
||||||
const envResource = resourceFromAttributes(
|
|
||||||
envDetector.detect().attributes || {},
|
|
||||||
)
|
|
||||||
|
|
||||||
// Merge resources - later resources take precedence
|
|
||||||
const resource = baseResource
|
|
||||||
.merge(osResource)
|
|
||||||
.merge(hostArchResource)
|
|
||||||
.merge(envResource)
|
|
||||||
|
|
||||||
// Check if beta tracing is enabled - this is a separate code path
|
|
||||||
// Available to all users who set ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
|
|
||||||
if (isBetaTracingEnabled()) {
|
|
||||||
void initializeBetaTracing(resource).catch(e =>
|
|
||||||
logForDebugging(`Beta tracing init failed: ${e}`, { level: 'error' }),
|
|
||||||
)
|
|
||||||
// Still set up meter provider for metrics (but skip regular logs/traces setup)
|
|
||||||
const meterProvider = new MeterProvider({
|
|
||||||
resource,
|
|
||||||
views: [],
|
|
||||||
readers,
|
|
||||||
})
|
|
||||||
setMeterProvider(meterProvider)
|
|
||||||
|
|
||||||
// Register shutdown for beta tracing
|
|
||||||
const shutdownTelemetry = async () => {
|
|
||||||
const timeoutMs = parseInt(
|
|
||||||
process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
|
|
||||||
)
|
|
||||||
try {
|
|
||||||
endInteractionSpan()
|
|
||||||
|
|
||||||
// Force flush + shutdown together inside the timeout. Previously forceFlush
|
|
||||||
// was awaited unbounded BEFORE the race, blocking exit on slow OTLP endpoints.
|
|
||||||
// Each provider's flush→shutdown is chained independently so a slow logger
|
|
||||||
// flush doesn't delay meterProvider/tracerProvider shutdown (no waterfall).
|
|
||||||
const loggerProvider = getLoggerProvider()
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
|
|
||||||
const chains: Promise<void>[] = [meterProvider.shutdown()]
|
|
||||||
if (loggerProvider) {
|
|
||||||
chains.push(
|
|
||||||
loggerProvider.forceFlush().then(() => loggerProvider.shutdown()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if (tracerProvider) {
|
|
||||||
chains.push(
|
|
||||||
tracerProvider.forceFlush().then(() => tracerProvider.shutdown()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.race([
|
|
||||||
Promise.all(chains),
|
|
||||||
telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
|
|
||||||
])
|
|
||||||
} catch {
|
|
||||||
// Ignore shutdown errors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
registerCleanup(shutdownTelemetry)
|
|
||||||
|
|
||||||
return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
|
|
||||||
}
|
|
||||||
|
|
||||||
const meterProvider = new MeterProvider({
|
|
||||||
resource,
|
|
||||||
views: [],
|
|
||||||
readers,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Store reference in state for flushing
|
|
||||||
setMeterProvider(meterProvider)
|
|
||||||
|
|
||||||
// Initialize logs if telemetry is enabled
|
|
||||||
if (telemetryEnabled) {
|
|
||||||
const logExporters = await getOtlpLogExporters()
|
|
||||||
logForDebugging(
|
|
||||||
`[3P telemetry] Created ${logExporters.length} log exporter(s)`,
|
|
||||||
)
|
|
||||||
|
|
||||||
if (logExporters.length > 0) {
|
|
||||||
const loggerProvider = new LoggerProvider({
|
|
||||||
resource,
|
|
||||||
// Add batch processors for each exporter
|
|
||||||
processors: logExporters.map(
|
|
||||||
exporter =>
|
|
||||||
new BatchLogRecordProcessor(exporter, {
|
|
||||||
scheduledDelayMillis: parseInt(
|
|
||||||
process.env.OTEL_LOGS_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Register the logger provider globally
|
|
||||||
logs.setGlobalLoggerProvider(loggerProvider)
|
|
||||||
setLoggerProvider(loggerProvider)
|
|
||||||
|
|
||||||
// Initialize event logger
|
|
||||||
const eventLogger = logs.getLogger(
|
|
||||||
'com.anthropic.claude_code.events',
|
|
||||||
MACRO.VERSION,
|
|
||||||
)
|
|
||||||
setEventLogger(eventLogger)
|
|
||||||
logForDebugging('[3P telemetry] Event logger set successfully')
|
|
||||||
|
|
||||||
// 'beforeExit' is emitted when Node.js empties its event loop and has no additional work to schedule.
|
|
||||||
// Unlike 'exit', it allows us to perform async operations, so it works well for letting
|
|
||||||
// network requests complete before the process exits naturally.
|
|
||||||
process.on('beforeExit', async () => {
|
|
||||||
await loggerProvider?.forceFlush()
|
|
||||||
// Also flush traces - they use BatchSpanProcessor which needs explicit flush
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
await tracerProvider?.forceFlush()
|
|
||||||
})
|
|
||||||
|
|
||||||
process.on('exit', () => {
|
|
||||||
// Final attempt to flush logs and traces
|
|
||||||
void loggerProvider?.forceFlush()
|
|
||||||
void getTracerProvider()?.forceFlush()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize tracing if enhanced telemetry is enabled (BETA)
|
|
||||||
if (telemetryEnabled && isEnhancedTelemetryEnabled()) {
|
|
||||||
const traceExporters = await getOtlpTraceExporters()
|
|
||||||
if (traceExporters.length > 0) {
|
|
||||||
// Create span processors for each exporter
|
|
||||||
const spanProcessors = traceExporters.map(
|
|
||||||
exporter =>
|
|
||||||
new BatchSpanProcessor(exporter, {
|
|
||||||
scheduledDelayMillis: parseInt(
|
|
||||||
process.env.OTEL_TRACES_EXPORT_INTERVAL ||
|
|
||||||
DEFAULT_TRACES_EXPORT_INTERVAL_MS.toString(),
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
const tracerProvider = new BasicTracerProvider({
|
|
||||||
resource,
|
|
||||||
spanProcessors,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Register the tracer provider globally
|
|
||||||
trace.setGlobalTracerProvider(tracerProvider)
|
|
||||||
setTracerProvider(tracerProvider)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Shutdown metrics and logs on exit (flushes and closes exporters)
|
|
||||||
const shutdownTelemetry = async () => {
|
|
||||||
const timeoutMs = parseInt(
|
|
||||||
process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
|
|
||||||
)
|
|
||||||
|
|
||||||
try {
|
|
||||||
// End any active interaction span before shutdown
|
|
||||||
endInteractionSpan()
|
|
||||||
|
|
||||||
const shutdownPromises = [meterProvider.shutdown()]
|
|
||||||
const loggerProvider = getLoggerProvider()
|
|
||||||
if (loggerProvider) {
|
|
||||||
shutdownPromises.push(loggerProvider.shutdown())
|
|
||||||
}
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
if (tracerProvider) {
|
|
||||||
shutdownPromises.push(tracerProvider.shutdown())
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.race([
|
|
||||||
Promise.all(shutdownPromises),
|
|
||||||
telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
|
|
||||||
])
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof Error && error.message.includes('timeout')) {
|
|
||||||
logForDebugging(
|
|
||||||
`
|
|
||||||
OpenTelemetry telemetry flush timed out after ${timeoutMs}ms
|
|
||||||
|
|
||||||
To resolve this issue, you can:
|
|
||||||
1. Increase the timeout by setting CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS env var (e.g., 5000 for 5 seconds)
|
|
||||||
2. Check if your OpenTelemetry backend is experiencing scalability issues
|
|
||||||
3. Disable OpenTelemetry by unsetting CLAUDE_CODE_ENABLE_TELEMETRY env var
|
|
||||||
|
|
||||||
Current timeout: ${timeoutMs}ms
|
|
||||||
`,
|
|
||||||
{ level: 'error' },
|
|
||||||
)
|
|
||||||
}
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Always register shutdown (internal metrics are always enabled)
|
|
||||||
registerCleanup(shutdownTelemetry)
|
|
||||||
|
|
||||||
return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush all pending telemetry data immediately.
|
|
||||||
* This should be called before logout or org switching to prevent data leakage.
|
|
||||||
*/
|
|
||||||
export async function flushTelemetry(): Promise<void> {
|
export async function flushTelemetry(): Promise<void> {
|
||||||
const meterProvider = getMeterProvider()
|
|
||||||
if (!meterProvider) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const timeoutMs = parseInt(
|
|
||||||
process.env.CLAUDE_CODE_OTEL_FLUSH_TIMEOUT_MS || '5000',
|
|
||||||
)
|
|
||||||
|
|
||||||
try {
|
|
||||||
const flushPromises = [meterProvider.forceFlush()]
|
|
||||||
const loggerProvider = getLoggerProvider()
|
|
||||||
if (loggerProvider) {
|
|
||||||
flushPromises.push(loggerProvider.forceFlush())
|
|
||||||
}
|
|
||||||
const tracerProvider = getTracerProvider()
|
|
||||||
if (tracerProvider) {
|
|
||||||
flushPromises.push(tracerProvider.forceFlush())
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.race([
|
|
||||||
Promise.all(flushPromises),
|
|
||||||
telemetryTimeout(timeoutMs, 'OpenTelemetry flush timeout'),
|
|
||||||
])
|
|
||||||
|
|
||||||
logForDebugging('Telemetry flushed successfully')
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof TelemetryTimeoutError) {
|
|
||||||
logForDebugging(
|
|
||||||
`Telemetry flush timed out after ${timeoutMs}ms. Some metrics may not be exported.`,
|
|
||||||
{ level: 'warn' },
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
logForDebugging(`Telemetry flush failed: ${errorMessage(error)}`, {
|
|
||||||
level: 'error',
|
|
||||||
})
|
|
||||||
}
|
|
||||||
// Don't throw - allow logout to continue even if flush fails
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseOtelHeadersEnvVar(): Record<string, string> {
|
|
||||||
const headers: Record<string, string> = {}
|
|
||||||
const envHeaders = process.env.OTEL_EXPORTER_OTLP_HEADERS
|
|
||||||
if (envHeaders) {
|
|
||||||
for (const pair of envHeaders.split(',')) {
|
|
||||||
const [key, ...valueParts] = pair.split('=')
|
|
||||||
if (key && valueParts.length > 0) {
|
|
||||||
headers[key.trim()] = valueParts.join('=').trim()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return headers
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get configuration for OTLP exporters including:
|
|
||||||
* - HTTP agent options (proxy, mTLS)
|
|
||||||
* - Dynamic headers via otelHeadersHelper or static headers from env var
|
|
||||||
*/
|
|
||||||
function getOTLPExporterConfig() {
|
|
||||||
const proxyUrl = getProxyUrl()
|
|
||||||
const mtlsConfig = getMTLSConfig()
|
|
||||||
const settings = getSettings_DEPRECATED()
|
|
||||||
|
|
||||||
// Build base config
|
|
||||||
const config: Record<string, unknown> = {}
|
|
||||||
|
|
||||||
// Parse static headers from env var once (doesn't change at runtime)
|
|
||||||
const staticHeaders = parseOtelHeadersEnvVar()
|
|
||||||
|
|
||||||
// If otelHeadersHelper is configured, use async headers function for dynamic refresh
|
|
||||||
// Otherwise just return static headers if any exist
|
|
||||||
if (settings?.otelHeadersHelper) {
|
|
||||||
config.headers = async (): Promise<Record<string, string>> => {
|
|
||||||
const dynamicHeaders = getOtelHeadersFromHelper()
|
|
||||||
return { ...staticHeaders, ...dynamicHeaders }
|
|
||||||
}
|
|
||||||
} else if (Object.keys(staticHeaders).length > 0) {
|
|
||||||
config.headers = async (): Promise<Record<string, string>> => staticHeaders
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we should bypass proxy for OTEL endpoint
|
|
||||||
const otelEndpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
|
||||||
if (!proxyUrl || (otelEndpoint && shouldBypassProxy(otelEndpoint))) {
|
|
||||||
// No proxy configured or OTEL endpoint should bypass proxy
|
|
||||||
const caCerts = getCACertificates()
|
|
||||||
if (mtlsConfig || caCerts) {
|
|
||||||
config.httpAgentOptions = {
|
|
||||||
...mtlsConfig,
|
|
||||||
...(caCerts && { ca: caCerts }),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return config
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an HttpAgentFactory function that creates our proxy agent
|
|
||||||
const caCerts = getCACertificates()
|
|
||||||
const agentFactory = (_protocol: string) => {
|
|
||||||
// Create and return the proxy agent with mTLS and CA cert config
|
|
||||||
const proxyAgent =
|
|
||||||
mtlsConfig || caCerts
|
|
||||||
? new HttpsProxyAgent(proxyUrl, {
|
|
||||||
...(mtlsConfig && {
|
|
||||||
cert: mtlsConfig.cert,
|
|
||||||
key: mtlsConfig.key,
|
|
||||||
passphrase: mtlsConfig.passphrase,
|
|
||||||
}),
|
|
||||||
...(caCerts && { ca: caCerts }),
|
|
||||||
})
|
|
||||||
: new HttpsProxyAgent(proxyUrl)
|
|
||||||
|
|
||||||
return proxyAgent
|
|
||||||
}
|
|
||||||
|
|
||||||
config.httpAgentOptions = agentFactory
|
|
||||||
return config
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user