Compare commits
17 Commits
codex-remo
...
86e7dbd1ab
| Author | SHA1 | Date | |
|---|---|---|---|
| 86e7dbd1ab | |||
| 5149320afd | |||
| 4d506aabf7 | |||
| 010ded8476 | |||
| 02f22d80bd | |||
| 7cf8afab73 | |||
| 832035a087 | |||
| f06a2c2740 | |||
| f65baebb3c | |||
| eb96764770 | |||
| 3e5461df9b | |||
| ce8f0dfd2b | |||
| 1b4603ed3b | |||
| dccd151718 | |||
| a95f0a540a | |||
| 497f81f4f9 | |||
| 9e7338d54c |
22
README.md
22
README.md
@@ -33,3 +33,25 @@ bun run compile
|
||||
|
||||
- `node_modules/`, `dist/`, and generated CLI binaries are ignored by Git.
|
||||
- `bun.lock` is kept in the repository for reproducible installs.
|
||||
|
||||
## Local Info Egress Status
|
||||
|
||||
This fork has removed several local system and project metadata egress paths that existed in the recovered upstream code.
|
||||
|
||||
Removed in this repository:
|
||||
|
||||
- Model-request context injection of working directory, git status/history, `CLAUDE.md`, current date, platform, shell, and OS version.
|
||||
- Feedback upload and transcript-share upload paths.
|
||||
- Remote Control / Bridge registration fields that sent machine name, git branch, and git repository URL, plus git source/outcome data in bridge session creation.
|
||||
- Trusted-device enrollment and trusted-device token header emission for bridge requests.
|
||||
- `/insights` automatic S3 upload; reports now stay local via `file://` paths only.
|
||||
- Datadog analytics and Anthropic 1P event-logging egress.
|
||||
- GrowthBook remote evaluation/network fetches; local env/config overrides and cached values remain available for compatibility.
|
||||
- OpenTelemetry initialization and event export paths.
|
||||
- Perfetto local trace-file output paths that could persist request/tool metadata to disk.
|
||||
- Extra dead telemetry scaffolding tied to the removed egress paths, including startup/session analytics fanout, logout telemetry flush, and remote GrowthBook metadata collectors.
|
||||
|
||||
Still present:
|
||||
|
||||
- Normal Claude API requests are still part of product functionality; this fork only removes extra local metadata injection, not core model/network access.
|
||||
- Minimal compatibility helpers for analytics and GrowthBook still exist in the tree as local no-op or cache-only code.
|
||||
|
||||
@@ -23,16 +23,6 @@ type BridgeApiDeps = {
|
||||
* tokens don't refresh, so 401 goes straight to BridgeFatalError.
|
||||
*/
|
||||
onAuth401?: (staleAccessToken: string) => Promise<boolean>
|
||||
/**
|
||||
* Returns the trusted device token to send as X-Trusted-Device-Token on
|
||||
* bridge API calls. Bridge sessions have SecurityTier=ELEVATED on the
|
||||
* server (CCR v2); when the server's enforcement flag is on,
|
||||
* ConnectBridgeWorker requires a trusted device at JWT-issuance.
|
||||
* Optional — when absent or returning undefined, the header is omitted
|
||||
* and the server falls through to its flag-off/no-op path. The CLI-side
|
||||
* gate is tengu_sessions_elevated_auth_enforcement (see trustedDevice.ts).
|
||||
*/
|
||||
getTrustedDeviceToken?: () => string | undefined
|
||||
}
|
||||
|
||||
const BETA_HEADER = 'environments-2025-11-01'
|
||||
@@ -65,6 +55,36 @@ export class BridgeFatalError extends Error {
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeBridgeApiPayloadForDebug(data: unknown): string {
|
||||
if (data === null) return 'null'
|
||||
if (data === undefined) return 'undefined'
|
||||
if (Array.isArray(data)) {
|
||||
return debugBody({
|
||||
type: 'array',
|
||||
length: data.length,
|
||||
})
|
||||
}
|
||||
if (typeof data !== 'object') {
|
||||
return String(data)
|
||||
}
|
||||
const value = data as Record<string, unknown>
|
||||
const workData =
|
||||
value.data && typeof value.data === 'object'
|
||||
? (value.data as Record<string, unknown>)
|
||||
: undefined
|
||||
return debugBody({
|
||||
type: 'object',
|
||||
keys: Object.keys(value)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
hasEnvironmentId: typeof value.environment_id === 'string',
|
||||
hasEnvironmentSecret: typeof value.environment_secret === 'string',
|
||||
hasWorkId: typeof value.id === 'string',
|
||||
workType: typeof workData?.type === 'string' ? workData.type : undefined,
|
||||
hasSessionId: typeof workData?.id === 'string',
|
||||
})
|
||||
}
|
||||
|
||||
export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
|
||||
function debug(msg: string): void {
|
||||
deps.onDebug?.(msg)
|
||||
@@ -74,18 +94,13 @@ export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
|
||||
const EMPTY_POLL_LOG_INTERVAL = 100
|
||||
|
||||
function getHeaders(accessToken: string): Record<string, string> {
|
||||
const headers: Record<string, string> = {
|
||||
return {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
'anthropic-version': '2023-06-01',
|
||||
'anthropic-beta': BETA_HEADER,
|
||||
'x-environment-runner-version': deps.runnerVersion,
|
||||
}
|
||||
const deviceToken = deps.getTrustedDeviceToken?.()
|
||||
if (deviceToken) {
|
||||
headers['X-Trusted-Device-Token'] = deviceToken
|
||||
}
|
||||
return headers
|
||||
}
|
||||
|
||||
function resolveAuth(): string {
|
||||
@@ -183,12 +198,14 @@ export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
|
||||
|
||||
handleErrorStatus(response.status, response.data, 'Registration')
|
||||
debug(
|
||||
`[bridge:api] POST /v1/environments/bridge -> ${response.status} environment_id=${response.data.environment_id}`,
|
||||
`[bridge:api] POST /v1/environments/bridge -> ${response.status}`,
|
||||
)
|
||||
debug(
|
||||
`[bridge:api] >>> ${debugBody({ max_sessions: config.maxSessions, metadata: { worker_type: config.workerType } })}`,
|
||||
)
|
||||
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
|
||||
debug(
|
||||
`[bridge:api] <<< ${summarizeBridgeApiPayloadForDebug(response.data)}`,
|
||||
)
|
||||
return response.data
|
||||
},
|
||||
|
||||
@@ -236,9 +253,11 @@ export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
|
||||
}
|
||||
|
||||
debug(
|
||||
`[bridge:api] GET .../work/poll -> ${response.status} workId=${response.data.id} type=${response.data.data?.type}${response.data.data?.id ? ` sessionId=${response.data.data.id}` : ''}`,
|
||||
`[bridge:api] GET .../work/poll -> ${response.status} type=${response.data.data?.type ?? 'unknown'}`,
|
||||
)
|
||||
debug(
|
||||
`[bridge:api] <<< ${summarizeBridgeApiPayloadForDebug(response.data)}`,
|
||||
)
|
||||
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
|
||||
return response.data
|
||||
},
|
||||
|
||||
@@ -442,7 +461,9 @@ export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
|
||||
`[bridge:api] POST /v1/sessions/${sessionId}/events -> ${response.status}`,
|
||||
)
|
||||
debug(`[bridge:api] >>> ${debugBody({ events: [event] })}`)
|
||||
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
|
||||
debug(
|
||||
`[bridge:api] <<< ${summarizeBridgeApiPayloadForDebug(response.data)}`,
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ import { createTokenRefreshScheduler } from './jwtUtils.js'
|
||||
import { getPollIntervalConfig } from './pollConfig.js'
|
||||
import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
|
||||
import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
|
||||
import { getTrustedDeviceToken } from './trustedDevice.js'
|
||||
import {
|
||||
BRIDGE_LOGIN_ERROR,
|
||||
type BridgeApiClient,
|
||||
@@ -2344,7 +2343,6 @@ export async function bridgeMain(args: string[]): Promise<void> {
|
||||
runnerVersion: MACRO.VERSION,
|
||||
onDebug: logForDebugging,
|
||||
onAuth401: handleOAuth401Error,
|
||||
getTrustedDeviceToken,
|
||||
})
|
||||
|
||||
// When resuming a session via --session-id, fetch it to learn its
|
||||
@@ -2877,7 +2875,6 @@ export async function runBridgeHeadless(
|
||||
runnerVersion: MACRO.VERSION,
|
||||
onDebug: log,
|
||||
onAuth401: opts.onAuth401,
|
||||
getTrustedDeviceToken,
|
||||
})
|
||||
|
||||
let environmentId: string
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
import axios from 'axios'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { errorMessage } from '../utils/errors.js'
|
||||
import { toError } from '../utils/errors.js'
|
||||
import { jsonStringify } from '../utils/slowOperations.js'
|
||||
import { extractErrorDetail } from './debugUtils.js'
|
||||
|
||||
@@ -23,6 +23,62 @@ function oauthHeaders(accessToken: string): Record<string, string> {
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeCodeSessionResponseForDebug(data: unknown): string {
|
||||
if (data === null) return 'null'
|
||||
if (data === undefined) return 'undefined'
|
||||
if (Array.isArray(data)) {
|
||||
return jsonStringify({
|
||||
payloadType: 'array',
|
||||
length: data.length,
|
||||
})
|
||||
}
|
||||
if (typeof data === 'object') {
|
||||
const value = data as Record<string, unknown>
|
||||
const session =
|
||||
value.session && typeof value.session === 'object'
|
||||
? (value.session as Record<string, unknown>)
|
||||
: undefined
|
||||
return jsonStringify({
|
||||
payloadType: 'object',
|
||||
keys: Object.keys(value)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
hasSession: Boolean(session),
|
||||
hasSessionId: typeof session?.id === 'string',
|
||||
hasWorkerJwt: typeof value.worker_jwt === 'string',
|
||||
hasApiBaseUrl: typeof value.api_base_url === 'string',
|
||||
hasExpiresIn: typeof value.expires_in === 'number',
|
||||
hasWorkerEpoch:
|
||||
typeof value.worker_epoch === 'number' ||
|
||||
typeof value.worker_epoch === 'string',
|
||||
})
|
||||
}
|
||||
return typeof data
|
||||
}
|
||||
|
||||
function summarizeCodeSessionErrorForDebug(err: unknown): string {
|
||||
const error = toError(err)
|
||||
const summary: Record<string, unknown> = {
|
||||
errorType: error.constructor.name,
|
||||
errorName: error.name,
|
||||
hasMessage: error.message.length > 0,
|
||||
hasStack: Boolean(error.stack),
|
||||
}
|
||||
if (err && typeof err === 'object') {
|
||||
const errorObj = err as Record<string, unknown>
|
||||
if (typeof errorObj.code === 'string' || typeof errorObj.code === 'number') {
|
||||
summary.code = errorObj.code
|
||||
}
|
||||
if (errorObj.response && typeof errorObj.response === 'object') {
|
||||
const response = errorObj.response as Record<string, unknown>
|
||||
if (typeof response.status === 'number') {
|
||||
summary.httpStatus = response.status
|
||||
}
|
||||
}
|
||||
}
|
||||
return jsonStringify(summary)
|
||||
}
|
||||
|
||||
export async function createCodeSession(
|
||||
baseUrl: string,
|
||||
accessToken: string,
|
||||
@@ -47,7 +103,9 @@ export async function createCodeSession(
|
||||
)
|
||||
} catch (err: unknown) {
|
||||
logForDebugging(
|
||||
`[code-session] Session create request failed: ${errorMessage(err)}`,
|
||||
`[code-session] Session create request failed: ${summarizeCodeSessionErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
@@ -72,7 +130,9 @@ export async function createCodeSession(
|
||||
!data.session.id.startsWith('cse_')
|
||||
) {
|
||||
logForDebugging(
|
||||
`[code-session] No session.id (cse_*) in response: ${jsonStringify(data).slice(0, 200)}`,
|
||||
`[code-session] No session.id (cse_*) in response: ${summarizeCodeSessionResponseForDebug(
|
||||
data,
|
||||
)}`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
@@ -95,27 +155,24 @@ export async function fetchRemoteCredentials(
|
||||
baseUrl: string,
|
||||
accessToken: string,
|
||||
timeoutMs: number,
|
||||
trustedDeviceToken?: string,
|
||||
): Promise<RemoteCredentials | null> {
|
||||
const url = `${baseUrl}/v1/code/sessions/${sessionId}/bridge`
|
||||
const headers = oauthHeaders(accessToken)
|
||||
if (trustedDeviceToken) {
|
||||
headers['X-Trusted-Device-Token'] = trustedDeviceToken
|
||||
}
|
||||
let response
|
||||
try {
|
||||
response = await axios.post(
|
||||
url,
|
||||
{},
|
||||
{
|
||||
headers,
|
||||
headers: oauthHeaders(accessToken),
|
||||
timeout: timeoutMs,
|
||||
validateStatus: s => s < 500,
|
||||
},
|
||||
)
|
||||
} catch (err: unknown) {
|
||||
logForDebugging(
|
||||
`[code-session] /bridge request failed: ${errorMessage(err)}`,
|
||||
`[code-session] /bridge request failed: ${summarizeCodeSessionErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
@@ -141,7 +198,9 @@ export async function fetchRemoteCredentials(
|
||||
!('worker_epoch' in data)
|
||||
) {
|
||||
logForDebugging(
|
||||
`[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${jsonStringify(data).slice(0, 200)}`,
|
||||
`[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${summarizeCodeSessionResponseForDebug(
|
||||
data,
|
||||
)}`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -21,15 +21,10 @@ const SECRET_PATTERN = new RegExp(
|
||||
'g',
|
||||
)
|
||||
|
||||
const REDACT_MIN_LENGTH = 16
|
||||
|
||||
export function redactSecrets(s: string): string {
|
||||
return s.replace(SECRET_PATTERN, (_match, field: string, value: string) => {
|
||||
if (value.length < REDACT_MIN_LENGTH) {
|
||||
return `"${field}":"[REDACTED]"`
|
||||
}
|
||||
const redacted = `${value.slice(0, 8)}...${value.slice(-4)}`
|
||||
return `"${field}":"${redacted}"`
|
||||
void value
|
||||
return `"${field}":"[REDACTED]"`
|
||||
})
|
||||
}
|
||||
|
||||
@@ -52,6 +47,73 @@ export function debugBody(data: unknown): string {
|
||||
return s.slice(0, DEBUG_MSG_LIMIT) + `... (${s.length} chars)`
|
||||
}
|
||||
|
||||
function summarizeValueShapeForDebug(value: unknown): unknown {
|
||||
if (value === null) return 'null'
|
||||
if (value === undefined) return 'undefined'
|
||||
if (Array.isArray(value)) {
|
||||
return {
|
||||
type: 'array',
|
||||
length: value.length,
|
||||
}
|
||||
}
|
||||
if (typeof value === 'object') {
|
||||
return {
|
||||
type: 'object',
|
||||
keys: Object.keys(value as Record<string, unknown>)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
}
|
||||
}
|
||||
return typeof value
|
||||
}
|
||||
|
||||
export function summarizeBridgeErrorForDebug(err: unknown): string {
|
||||
const summary: Record<string, unknown> = {}
|
||||
|
||||
if (err instanceof Error) {
|
||||
summary.errorType = err.constructor.name
|
||||
summary.errorName = err.name
|
||||
summary.hasMessage = err.message.length > 0
|
||||
summary.hasStack = Boolean(err.stack)
|
||||
} else {
|
||||
summary.errorType = typeof err
|
||||
summary.hasValue = err !== undefined && err !== null
|
||||
}
|
||||
|
||||
if (err && typeof err === 'object') {
|
||||
const errorObj = err as Record<string, unknown>
|
||||
if (
|
||||
typeof errorObj.code === 'string' ||
|
||||
typeof errorObj.code === 'number'
|
||||
) {
|
||||
summary.code = errorObj.code
|
||||
}
|
||||
if (
|
||||
typeof errorObj.errno === 'string' ||
|
||||
typeof errorObj.errno === 'number'
|
||||
) {
|
||||
summary.errno = errorObj.errno
|
||||
}
|
||||
if (typeof errorObj.status === 'number') {
|
||||
summary.status = errorObj.status
|
||||
}
|
||||
if (typeof errorObj.syscall === 'string') {
|
||||
summary.syscall = errorObj.syscall
|
||||
}
|
||||
if (errorObj.response && typeof errorObj.response === 'object') {
|
||||
const response = errorObj.response as Record<string, unknown>
|
||||
if (typeof response.status === 'number') {
|
||||
summary.httpStatus = response.status
|
||||
}
|
||||
if ('data' in response) {
|
||||
summary.responseData = summarizeValueShapeForDebug(response.data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return jsonStringify(summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a descriptive error message from an axios error (or any error).
|
||||
* For HTTP errors, appends the server's response body message if available,
|
||||
|
||||
@@ -107,7 +107,7 @@ export function createTokenRefreshScheduler({
|
||||
// (such as the follow-up refresh set by doRefresh) so the refresh
|
||||
// chain is not broken.
|
||||
logForDebugging(
|
||||
`[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
|
||||
`[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, keeping existing timer`,
|
||||
)
|
||||
return
|
||||
}
|
||||
@@ -209,7 +209,7 @@ export function createTokenRefreshScheduler({
|
||||
failureCounts.delete(sessionId)
|
||||
|
||||
logForDebugging(
|
||||
`[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}…`,
|
||||
`[${label}:token] Refreshing token for sessionId=${sessionId}`,
|
||||
)
|
||||
logEvent('tengu_bridge_token_refreshed', {})
|
||||
onRefresh(sessionId, oauthToken)
|
||||
|
||||
@@ -38,7 +38,6 @@ import { buildCCRv2SdkUrl } from './workSecret.js'
|
||||
import { toCompatSessionId } from './sessionIdCompat.js'
|
||||
import { FlushGate } from './flushGate.js'
|
||||
import { createTokenRefreshScheduler } from './jwtUtils.js'
|
||||
import { getTrustedDeviceToken } from './trustedDevice.js'
|
||||
import {
|
||||
getEnvLessBridgeConfig,
|
||||
type EnvLessBridgeConfig,
|
||||
@@ -51,7 +50,10 @@ import {
|
||||
extractTitleText,
|
||||
BoundedUUIDSet,
|
||||
} from './bridgeMessaging.js'
|
||||
import { logBridgeSkip } from './debugUtils.js'
|
||||
import {
|
||||
logBridgeSkip,
|
||||
summarizeBridgeErrorForDebug,
|
||||
} from './debugUtils.js'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
|
||||
import { isInProtectedNamespace } from '../utils/envUtils.js'
|
||||
@@ -182,7 +184,7 @@ export async function initEnvLessBridgeCore(
|
||||
return null
|
||||
}
|
||||
const sessionId: string = createdSessionId
|
||||
logForDebugging(`[remote-bridge] Created session ${sessionId}`)
|
||||
logForDebugging('[remote-bridge] Created remote bridge session')
|
||||
logForDiagnosticsNoPII('info', 'bridge_repl_v2_session_created')
|
||||
|
||||
// ── 2. Fetch bridge credentials (POST /bridge → worker_jwt, expires_in, api_base_url) ──
|
||||
@@ -215,7 +217,7 @@ export async function initEnvLessBridgeCore(
|
||||
|
||||
// ── 3. Build v2 transport (SSETransport + CCRClient) ────────────────────
|
||||
const sessionUrl = buildCCRv2SdkUrl(credentials.api_base_url, sessionId)
|
||||
logForDebugging(`[remote-bridge] v2 session URL: ${sessionUrl}`)
|
||||
logForDebugging('[remote-bridge] Configured v2 session transport endpoint')
|
||||
|
||||
let transport: ReplBridgeTransport
|
||||
try {
|
||||
@@ -236,10 +238,12 @@ export async function initEnvLessBridgeCore(
|
||||
})
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[remote-bridge] v2 transport setup failed: ${errorMessage(err)}`,
|
||||
`[remote-bridge] v2 transport setup failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
onStateChange?.('failed', `Transport setup failed: ${errorMessage(err)}`)
|
||||
onStateChange?.('failed', 'Transport setup failed')
|
||||
logBridgeSkip('v2_transport_setup_failed', undefined, true)
|
||||
void archiveSession(
|
||||
sessionId,
|
||||
@@ -357,7 +361,9 @@ export async function initEnvLessBridgeCore(
|
||||
)
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[remote-bridge] Proactive refresh rebuild failed: ${errorMessage(err)}`,
|
||||
`[remote-bridge] Proactive refresh rebuild failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
logForDiagnosticsNoPII(
|
||||
@@ -365,7 +371,7 @@ export async function initEnvLessBridgeCore(
|
||||
'bridge_repl_v2_proactive_refresh_failed',
|
||||
)
|
||||
if (!tornDown) {
|
||||
onStateChange?.('failed', `Refresh failed: ${errorMessage(err)}`)
|
||||
onStateChange?.('failed', 'Refresh failed')
|
||||
}
|
||||
} finally {
|
||||
authRecoveryInFlight = false
|
||||
@@ -395,9 +401,13 @@ export async function initEnvLessBridgeCore(
|
||||
// (Same guard pattern as replBridge.ts:1119.)
|
||||
const flushTransport = transport
|
||||
void flushHistory(initialMessages)
|
||||
.catch(e =>
|
||||
logForDebugging(`[remote-bridge] flushHistory failed: ${e}`),
|
||||
)
|
||||
.catch(e => {
|
||||
logForDebugging(
|
||||
`[remote-bridge] flushHistory failed: ${summarizeBridgeErrorForDebug(
|
||||
e,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
.finally(() => {
|
||||
// authRecoveryInFlight catches the v1-vs-v2 asymmetry: v1 nulls
|
||||
// transport synchronously in setOnClose (replBridge.ts:1175), so
|
||||
@@ -577,12 +587,14 @@ export async function initEnvLessBridgeCore(
|
||||
logForDebugging('[remote-bridge] Transport rebuilt after 401')
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[remote-bridge] 401 recovery failed: ${errorMessage(err)}`,
|
||||
`[remote-bridge] 401 recovery failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'bridge_repl_v2_jwt_refresh_failed')
|
||||
if (!tornDown) {
|
||||
onStateChange?.('failed', `JWT refresh failed: ${errorMessage(err)}`)
|
||||
onStateChange?.('failed', 'JWT refresh failed')
|
||||
}
|
||||
} finally {
|
||||
authRecoveryInFlight = false
|
||||
@@ -707,7 +719,9 @@ export async function initEnvLessBridgeCore(
|
||||
)
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[remote-bridge] Teardown 401 retry threw: ${errorMessage(err)}`,
|
||||
`[remote-bridge] Teardown 401 retry threw: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
}
|
||||
@@ -824,7 +838,7 @@ export async function initEnvLessBridgeCore(
|
||||
sendControlRequest(request: SDKControlRequest) {
|
||||
if (authRecoveryInFlight) {
|
||||
logForDebugging(
|
||||
`[remote-bridge] Dropping control_request during 401 recovery: ${request.request_id}`,
|
||||
'[remote-bridge] Dropping control_request during 401 recovery',
|
||||
)
|
||||
return
|
||||
}
|
||||
@@ -833,9 +847,7 @@ export async function initEnvLessBridgeCore(
|
||||
transport.reportState('requires_action')
|
||||
}
|
||||
void transport.write(event)
|
||||
logForDebugging(
|
||||
`[remote-bridge] Sent control_request request_id=${request.request_id}`,
|
||||
)
|
||||
logForDebugging('[remote-bridge] Sent control_request')
|
||||
},
|
||||
sendControlResponse(response: SDKControlResponse) {
|
||||
if (authRecoveryInFlight) {
|
||||
@@ -852,7 +864,7 @@ export async function initEnvLessBridgeCore(
|
||||
sendControlCancelRequest(requestId: string) {
|
||||
if (authRecoveryInFlight) {
|
||||
logForDebugging(
|
||||
`[remote-bridge] Dropping control_cancel_request during 401 recovery: ${requestId}`,
|
||||
'[remote-bridge] Dropping control_cancel_request during 401 recovery',
|
||||
)
|
||||
return
|
||||
}
|
||||
@@ -866,9 +878,7 @@ export async function initEnvLessBridgeCore(
|
||||
// those paths, so without this the server stays on requires_action.
|
||||
transport.reportState('running')
|
||||
void transport.write(event)
|
||||
logForDebugging(
|
||||
`[remote-bridge] Sent control_cancel_request request_id=${requestId}`,
|
||||
)
|
||||
logForDebugging('[remote-bridge] Sent control_cancel_request')
|
||||
},
|
||||
sendResult() {
|
||||
if (authRecoveryInFlight) {
|
||||
@@ -877,7 +887,7 @@ export async function initEnvLessBridgeCore(
|
||||
}
|
||||
transport.reportState('idle')
|
||||
void transport.write(makeResultMessage(sessionId))
|
||||
logForDebugging(`[remote-bridge] Sent result`)
|
||||
logForDebugging('[remote-bridge] Sent result')
|
||||
},
|
||||
async teardown() {
|
||||
unregister()
|
||||
@@ -925,9 +935,8 @@ import {
|
||||
} from './codeSessionApi.js'
|
||||
import { getBridgeBaseUrlOverride } from './bridgeConfig.js'
|
||||
|
||||
// CLI-side wrapper that applies the CLAUDE_BRIDGE_BASE_URL dev override and
|
||||
// injects the trusted-device token (both are env/GrowthBook reads that the
|
||||
// SDK-facing codeSessionApi.ts export must stay free of).
|
||||
// CLI-side wrapper that applies the CLAUDE_BRIDGE_BASE_URL dev override while
|
||||
// keeping the SDK-facing codeSessionApi.ts export free of CLI config reads.
|
||||
export async function fetchRemoteCredentials(
|
||||
sessionId: string,
|
||||
baseUrl: string,
|
||||
@@ -939,7 +948,6 @@ export async function fetchRemoteCredentials(
|
||||
baseUrl,
|
||||
accessToken,
|
||||
timeoutMs,
|
||||
getTrustedDeviceToken(),
|
||||
)
|
||||
if (!creds) return null
|
||||
return getBridgeBaseUrlOverride()
|
||||
@@ -995,12 +1003,13 @@ async function archiveSession(
|
||||
},
|
||||
)
|
||||
logForDebugging(
|
||||
`[remote-bridge] Archive ${compatId} status=${response.status}`,
|
||||
`[remote-bridge] Archive status=${response.status}`,
|
||||
)
|
||||
return response.status
|
||||
} catch (err) {
|
||||
const msg = errorMessage(err)
|
||||
logForDebugging(`[remote-bridge] Archive failed: ${msg}`)
|
||||
logForDebugging(
|
||||
`[remote-bridge] Archive failed: ${summarizeBridgeErrorForDebug(err)}`,
|
||||
)
|
||||
return axios.isAxiosError(err) && err.code === 'ECONNABORTED'
|
||||
? 'timeout'
|
||||
: 'error'
|
||||
|
||||
@@ -30,7 +30,6 @@ import {
|
||||
} from './workSecret.js'
|
||||
import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
|
||||
import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
|
||||
import { getTrustedDeviceToken } from './trustedDevice.js'
|
||||
import { HybridTransport } from '../cli/transports/HybridTransport.js'
|
||||
import {
|
||||
type ReplBridgeTransport,
|
||||
@@ -44,6 +43,7 @@ import {
|
||||
describeAxiosError,
|
||||
extractHttpStatus,
|
||||
logBridgeSkip,
|
||||
summarizeBridgeErrorForDebug,
|
||||
} from './debugUtils.js'
|
||||
import type { Message } from '../types/message.js'
|
||||
import type { SDKMessage } from '../entrypoints/agentSdkTypes.ts'
|
||||
@@ -304,7 +304,7 @@ export async function initBridgeCore(
|
||||
const prior = rawPrior?.source === 'repl' ? rawPrior : null
|
||||
|
||||
logForDebugging(
|
||||
`[bridge:repl] initBridgeCore #${seq} starting (initialMessages=${initialMessages?.length ?? 0}${prior ? ` perpetual prior=env:${prior.environmentId}` : ''})`,
|
||||
`[bridge:repl] initBridgeCore #${seq} starting (initialMessages=${initialMessages?.length ?? 0}${prior ? ' perpetual prior pointer present' : ''})`,
|
||||
)
|
||||
|
||||
// 5. Register bridge environment
|
||||
@@ -314,7 +314,6 @@ export async function initBridgeCore(
|
||||
runnerVersion: MACRO.VERSION,
|
||||
onDebug: logForDebugging,
|
||||
onAuth401,
|
||||
getTrustedDeviceToken,
|
||||
})
|
||||
// Ant-only: interpose so /bridge-kick can inject poll/register/heartbeat
|
||||
// failures. Zero cost in external builds (rawApi passes through unchanged).
|
||||
@@ -344,7 +343,9 @@ export async function initBridgeCore(
|
||||
} catch (err) {
|
||||
logBridgeSkip(
|
||||
'registration_failed',
|
||||
`[bridge:repl] Environment registration failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] Environment registration failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
// Stale pointer may be the cause (expired/deleted env) — clear it so
|
||||
// the next start doesn't retry the same dead ID.
|
||||
@@ -355,7 +356,7 @@ export async function initBridgeCore(
|
||||
return null
|
||||
}
|
||||
|
||||
logForDebugging(`[bridge:repl] Environment registered: ${environmentId}`)
|
||||
logForDebugging('[bridge:repl] Environment registered')
|
||||
logForDiagnosticsNoPII('info', 'bridge_repl_env_registered')
|
||||
logEvent('tengu_bridge_repl_env_registered', {})
|
||||
|
||||
@@ -373,7 +374,7 @@ export async function initBridgeCore(
|
||||
): Promise<boolean> {
|
||||
if (environmentId !== requestedEnvId) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Env mismatch (requested ${requestedEnvId}, got ${environmentId}) — cannot reconnect in place`,
|
||||
'[bridge:repl] Env mismatch — cannot reconnect in place',
|
||||
)
|
||||
return false
|
||||
}
|
||||
@@ -391,13 +392,13 @@ export async function initBridgeCore(
|
||||
for (const id of candidates) {
|
||||
try {
|
||||
await api.reconnectSession(environmentId, id)
|
||||
logForDebugging(
|
||||
`[bridge:repl] Reconnected session ${id} in place on env ${environmentId}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Reconnected existing session in place')
|
||||
return true
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] reconnectSession(${id}) failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] reconnectSession failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -681,7 +682,9 @@ export async function initBridgeCore(
|
||||
} catch (err) {
|
||||
bridgeConfig.reuseEnvironmentId = undefined
|
||||
logForDebugging(
|
||||
`[bridge:repl] Environment re-registration failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] Environment re-registration failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
return false
|
||||
}
|
||||
@@ -690,7 +693,7 @@ export async function initBridgeCore(
|
||||
bridgeConfig.reuseEnvironmentId = undefined
|
||||
|
||||
logForDebugging(
|
||||
`[bridge:repl] Re-registered: requested=${requestedEnvId} got=${environmentId}`,
|
||||
'[bridge:repl] Re-registered environment',
|
||||
)
|
||||
|
||||
// Bail out if teardown started while we were registering
|
||||
@@ -986,7 +989,7 @@ export async function initBridgeCore(
|
||||
injectFault: injectBridgeFault,
|
||||
wakePollLoop,
|
||||
describe: () =>
|
||||
`env=${environmentId} session=${currentSessionId} transport=${transport?.getStateLabel() ?? 'null'} workId=${currentWorkId ?? 'null'}`,
|
||||
`transport=${transport?.getStateLabel() ?? 'null'} hasSession=${Boolean(currentSessionId)} hasWork=${Boolean(currentWorkId)}`,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1040,7 +1043,9 @@ export async function initBridgeCore(
|
||||
.stopWork(environmentId, currentWorkId, false)
|
||||
.catch((e: unknown) => {
|
||||
logForDebugging(
|
||||
`[bridge:repl] stopWork after heartbeat fatal: ${errorMessage(e)}`,
|
||||
`[bridge:repl] stopWork after heartbeat fatal: ${summarizeBridgeErrorForDebug(
|
||||
e,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
}
|
||||
@@ -1367,7 +1372,7 @@ export async function initBridgeCore(
|
||||
const sessionUrl = buildCCRv2SdkUrl(baseUrl, workSessionId)
|
||||
const thisGen = v2Generation
|
||||
logForDebugging(
|
||||
`[bridge:repl] CCR v2: sessionUrl=${sessionUrl} session=${workSessionId} gen=${thisGen}`,
|
||||
`[bridge:repl] CCR v2: creating transport gen=${thisGen}`,
|
||||
)
|
||||
void createV2ReplTransport({
|
||||
sessionUrl,
|
||||
@@ -1401,7 +1406,9 @@ export async function initBridgeCore(
|
||||
},
|
||||
(err: unknown) => {
|
||||
logForDebugging(
|
||||
`[bridge:repl] CCR v2: createV2ReplTransport failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] CCR v2: createV2ReplTransport failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
logEvent('tengu_bridge_repl_ccr_v2_init_failed', {})
|
||||
@@ -1416,7 +1423,9 @@ export async function initBridgeCore(
|
||||
.stopWork(environmentId, currentWorkId, false)
|
||||
.catch((e: unknown) => {
|
||||
logForDebugging(
|
||||
`[bridge:repl] stopWork after v2 init failure: ${errorMessage(e)}`,
|
||||
`[bridge:repl] stopWork after v2 init failure: ${summarizeBridgeErrorForDebug(
|
||||
e,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
currentWorkId = null
|
||||
@@ -1437,10 +1446,8 @@ export async function initBridgeCore(
|
||||
// secret. refreshHeaders picks up the latest OAuth token on each
|
||||
// WS reconnect attempt.
|
||||
const wsUrl = buildSdkUrl(sessionIngressUrl, workSessionId)
|
||||
logForDebugging(`[bridge:repl] Ingress URL: ${wsUrl}`)
|
||||
logForDebugging(
|
||||
`[bridge:repl] Creating HybridTransport: session=${workSessionId}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Using session ingress WebSocket endpoint')
|
||||
logForDebugging('[bridge:repl] Creating HybridTransport')
|
||||
// v1OauthToken was validated non-null above (we'd have returned early).
|
||||
const oauthToken = v1OauthToken ?? ''
|
||||
wireTransport(
|
||||
@@ -1525,7 +1532,9 @@ export async function initBridgeCore(
|
||||
logForDebugging('[bridge:repl] keep_alive sent')
|
||||
void transport.write({ type: 'keep_alive' }).catch((err: unknown) => {
|
||||
logForDebugging(
|
||||
`[bridge:repl] keep_alive write failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] keep_alive write failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
}, keepAliveIntervalMs)
|
||||
@@ -1538,15 +1547,13 @@ export async function initBridgeCore(
|
||||
doTeardownImpl = async (): Promise<void> => {
|
||||
if (teardownStarted) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Teardown already in progress, skipping duplicate call env=${environmentId} session=${currentSessionId}`,
|
||||
'[bridge:repl] Teardown already in progress, skipping duplicate call',
|
||||
)
|
||||
return
|
||||
}
|
||||
teardownStarted = true
|
||||
const teardownStart = Date.now()
|
||||
logForDebugging(
|
||||
`[bridge:repl] Teardown starting: env=${environmentId} session=${currentSessionId} workId=${currentWorkId ?? 'none'} transportState=${transport?.getStateLabel() ?? 'null'}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Teardown starting')
|
||||
|
||||
if (pointerRefreshTimer !== null) {
|
||||
clearInterval(pointerRefreshTimer)
|
||||
@@ -1595,7 +1602,7 @@ export async function initBridgeCore(
|
||||
source: 'repl',
|
||||
})
|
||||
logForDebugging(
|
||||
`[bridge:repl] Teardown (perpetual): leaving env=${environmentId} session=${currentSessionId} alive on server, duration=${Date.now() - teardownStart}ms`,
|
||||
`[bridge:repl] Teardown (perpetual): leaving bridge session alive on server, duration=${Date.now() - teardownStart}ms`,
|
||||
)
|
||||
return
|
||||
}
|
||||
@@ -1621,7 +1628,9 @@ export async function initBridgeCore(
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Teardown stopWork failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] Teardown stopWork failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
: Promise.resolve()
|
||||
@@ -1638,7 +1647,9 @@ export async function initBridgeCore(
|
||||
|
||||
await api.deregisterEnvironment(environmentId).catch((err: unknown) => {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Teardown deregister failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl] Teardown deregister failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
|
||||
@@ -1648,16 +1659,14 @@ export async function initBridgeCore(
|
||||
await clearBridgePointer(dir)
|
||||
|
||||
logForDebugging(
|
||||
`[bridge:repl] Teardown complete: env=${environmentId} duration=${Date.now() - teardownStart}ms`,
|
||||
`[bridge:repl] Teardown complete: duration=${Date.now() - teardownStart}ms`,
|
||||
)
|
||||
}
|
||||
|
||||
// 8. Register cleanup for graceful shutdown
|
||||
const unregister = registerCleanup(() => doTeardownImpl?.())
|
||||
|
||||
logForDebugging(
|
||||
`[bridge:repl] Ready: env=${environmentId} session=${currentSessionId}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Ready')
|
||||
onStateChange?.('ready')
|
||||
|
||||
return {
|
||||
@@ -1715,7 +1724,7 @@ export async function initBridgeCore(
|
||||
if (!transport) {
|
||||
const types = filtered.map(m => m.type).join(',')
|
||||
logForDebugging(
|
||||
`[bridge:repl] Transport not configured, dropping ${filtered.length} message(s) [${types}] for session=${currentSessionId}`,
|
||||
`[bridge:repl] Transport not configured, dropping ${filtered.length} message(s) [${types}]`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
return
|
||||
@@ -1750,7 +1759,7 @@ export async function initBridgeCore(
|
||||
if (filtered.length === 0) return
|
||||
if (!transport) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Transport not configured, dropping ${filtered.length} SDK message(s) for session=${currentSessionId}`,
|
||||
`[bridge:repl] Transport not configured, dropping ${filtered.length} SDK message(s)`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
return
|
||||
@@ -1770,9 +1779,7 @@ export async function initBridgeCore(
|
||||
}
|
||||
const event = { ...request, session_id: currentSessionId }
|
||||
void transport.write(event)
|
||||
logForDebugging(
|
||||
`[bridge:repl] Sent control_request request_id=${request.request_id}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Sent control_request')
|
||||
},
|
||||
sendControlResponse(response: SDKControlResponse) {
|
||||
if (!transport) {
|
||||
@@ -1798,21 +1805,17 @@ export async function initBridgeCore(
|
||||
session_id: currentSessionId,
|
||||
}
|
||||
void transport.write(event)
|
||||
logForDebugging(
|
||||
`[bridge:repl] Sent control_cancel_request request_id=${requestId}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Sent control_cancel_request')
|
||||
},
|
||||
sendResult() {
|
||||
if (!transport) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] sendResult: skipping, transport not configured session=${currentSessionId}`,
|
||||
'[bridge:repl] sendResult: skipping, transport not configured',
|
||||
)
|
||||
return
|
||||
}
|
||||
void transport.write(makeResultMessage(currentSessionId))
|
||||
logForDebugging(
|
||||
`[bridge:repl] Sent result for session=${currentSessionId}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Sent result')
|
||||
},
|
||||
async teardown() {
|
||||
unregister()
|
||||
@@ -1905,7 +1908,7 @@ async function startWorkPollLoop({
|
||||
const MAX_ENVIRONMENT_RECREATIONS = 3
|
||||
|
||||
logForDebugging(
|
||||
`[bridge:repl] Starting work poll loop for env=${getCredentials().environmentId}`,
|
||||
'[bridge:repl] Starting work poll loop',
|
||||
)
|
||||
|
||||
let consecutiveErrors = 0
|
||||
@@ -2008,7 +2011,9 @@ async function startWorkPollLoop({
|
||||
)
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[bridge:repl:heartbeat] Failed: ${errorMessage(err)}`,
|
||||
`[bridge:repl:heartbeat] Failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
if (err instanceof BridgeFatalError) {
|
||||
cap.cleanup()
|
||||
@@ -2126,7 +2131,9 @@ async function startWorkPollLoop({
|
||||
secret = decodeWorkSecret(work.secret)
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Failed to decode work secret: ${errorMessage(err)}`,
|
||||
`[bridge:repl] Failed to decode work secret: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
logEvent('tengu_bridge_repl_work_secret_failed', {})
|
||||
// Can't ack (needs the JWT we failed to decode). stopWork uses OAuth.
|
||||
@@ -2137,12 +2144,14 @@ async function startWorkPollLoop({
|
||||
|
||||
// Explicitly acknowledge to prevent redelivery. Non-fatal on failure:
|
||||
// server re-delivers, and the onWorkReceived callback handles dedup.
|
||||
logForDebugging(`[bridge:repl] Acknowledging workId=${work.id}`)
|
||||
logForDebugging('[bridge:repl] Acknowledging work item')
|
||||
try {
|
||||
await api.acknowledgeWork(envId, work.id, secret.session_ingress_token)
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
|
||||
`[bridge:repl] Acknowledge failed: ${summarizeBridgeErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2194,7 +2203,7 @@ async function startWorkPollLoop({
|
||||
const currentEnvId = getCredentials().environmentId
|
||||
if (envId !== currentEnvId) {
|
||||
logForDebugging(
|
||||
`[bridge:repl] Stale poll error for old env=${envId}, current env=${currentEnvId} — skipping onEnvironmentLost`,
|
||||
'[bridge:repl] Stale poll error for superseded environment — skipping onEnvironmentLost',
|
||||
)
|
||||
consecutiveErrors = 0
|
||||
firstErrorTime = null
|
||||
@@ -2240,9 +2249,7 @@ async function startWorkPollLoop({
|
||||
consecutiveErrors = 0
|
||||
firstErrorTime = null
|
||||
onStateChange?.('ready')
|
||||
logForDebugging(
|
||||
`[bridge:repl] Re-registered environment: ${newCreds.environmentId}`,
|
||||
)
|
||||
logForDebugging('[bridge:repl] Re-registered environment')
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -2378,7 +2385,7 @@ async function startWorkPollLoop({
|
||||
}
|
||||
|
||||
logForDebugging(
|
||||
`[bridge:repl] Work poll loop ended (aborted=${signal.aborted}) env=${getCredentials().environmentId}`,
|
||||
`[bridge:repl] Work poll loop ended (aborted=${signal.aborted})`,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -54,8 +54,6 @@ export type ReplBridgeTransport = {
|
||||
* (user watches the REPL locally); multi-session worker callers do.
|
||||
*/
|
||||
reportState(state: SessionState): void
|
||||
/** PUT /worker external_metadata (v2 only; v1 is a no-op). */
|
||||
reportMetadata(metadata: Record<string, unknown>): void
|
||||
/**
|
||||
* POST /worker/events/{id}/delivery (v2 only; v1 is a no-op). Populates
|
||||
* CCR's processing_at/processed_at columns. `received` is auto-fired by
|
||||
@@ -96,7 +94,6 @@ export function createV1ReplTransport(
|
||||
return hybrid.droppedBatchCount
|
||||
},
|
||||
reportState: () => {},
|
||||
reportMetadata: () => {},
|
||||
reportDelivery: () => {},
|
||||
flush: () => Promise.resolve(),
|
||||
}
|
||||
@@ -324,9 +321,6 @@ export async function createV2ReplTransport(opts: {
|
||||
reportState(state) {
|
||||
ccr.reportState(state)
|
||||
},
|
||||
reportMetadata(metadata) {
|
||||
ccr.reportMetadata(metadata)
|
||||
},
|
||||
reportDelivery(eventId, status) {
|
||||
ccr.reportDelivery(eventId, status)
|
||||
},
|
||||
|
||||
@@ -1,67 +1,22 @@
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { getSecureStorage } from '../utils/secureStorage/index.js'
|
||||
|
||||
/**
|
||||
* Trusted device token source for bridge (remote-control) sessions.
|
||||
* Trusted-device compatibility helpers for bridge (remote-control) sessions.
|
||||
*
|
||||
* Bridge sessions have SecurityTier=ELEVATED on the server (CCR v2).
|
||||
* The server gates ConnectBridgeWorker on its own flag
|
||||
* (sessions_elevated_auth_enforcement in Anthropic Main); this CLI-side
|
||||
* flag controls whether the CLI sends X-Trusted-Device-Token at all.
|
||||
* Two flags so rollout can be staged: flip CLI-side first (headers
|
||||
* start flowing, server still no-ops), then flip server-side.
|
||||
*
|
||||
* Enrollment (POST /auth/trusted_devices) is gated server-side by
|
||||
* account_session.created_at < 10min, so it must happen during /login.
|
||||
* Token is persistent (90d rolling expiry) and stored in keychain.
|
||||
*
|
||||
* See anthropics/anthropic#274559 (spec), #310375 (B1b tenant RPCs),
|
||||
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
|
||||
* This fork disables trusted-device enrollment and header emission. The
|
||||
* remaining helpers only clear any previously stored token during login/logout
|
||||
* so old state is not carried forward.
|
||||
*/
|
||||
|
||||
const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
|
||||
|
||||
function isGateEnabled(): boolean {
|
||||
return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
|
||||
}
|
||||
|
||||
// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
|
||||
// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
|
||||
// Cache cleared on logout (clearAuthRelatedCaches) and after any local update.
|
||||
//
|
||||
// Only the storage read is memoized — the GrowthBook gate is checked live so
|
||||
// that a gate flip after GrowthBook refresh takes effect without a restart.
|
||||
const readStoredToken = memoize((): string | undefined => {
|
||||
// Env var takes precedence for testing/canary.
|
||||
const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
|
||||
if (envToken) {
|
||||
return envToken
|
||||
}
|
||||
return getSecureStorage().read()?.trustedDeviceToken
|
||||
})
|
||||
|
||||
export function getTrustedDeviceToken(): string | undefined {
|
||||
if (!isGateEnabled()) {
|
||||
return undefined
|
||||
}
|
||||
return readStoredToken()
|
||||
}
|
||||
|
||||
export function clearTrustedDeviceTokenCache(): void {
|
||||
readStoredToken.cache?.clear?.()
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the stored trusted device token from secure storage and the memo cache.
|
||||
* Called during /login so a stale token from the previous account isn't sent
|
||||
* as X-Trusted-Device-Token after account switches.
|
||||
* Clear any stored trusted-device token from secure storage.
|
||||
*/
|
||||
export function clearTrustedDeviceToken(): void {
|
||||
if (!isGateEnabled()) {
|
||||
return
|
||||
}
|
||||
const secureStorage = getSecureStorage()
|
||||
try {
|
||||
const data = secureStorage.read()
|
||||
@@ -72,7 +27,6 @@ export function clearTrustedDeviceToken(): void {
|
||||
} catch {
|
||||
// Best-effort — don't block login if storage is inaccessible
|
||||
}
|
||||
readStoredToken.cache?.clear?.()
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -2,6 +2,33 @@ import axios from 'axios'
|
||||
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
|
||||
import type { WorkSecret } from './types.js'
|
||||
|
||||
function summarizeRegisterWorkerResponseForDebug(data: unknown): string {
|
||||
if (data === null) return 'null'
|
||||
if (data === undefined) return 'undefined'
|
||||
if (Array.isArray(data)) {
|
||||
return jsonStringify({
|
||||
payloadType: 'array',
|
||||
length: data.length,
|
||||
})
|
||||
}
|
||||
if (typeof data === 'object') {
|
||||
const value = data as Record<string, unknown>
|
||||
return jsonStringify({
|
||||
payloadType: 'object',
|
||||
keys: Object.keys(value)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
hasWorkerEpoch:
|
||||
typeof value.worker_epoch === 'number' ||
|
||||
typeof value.worker_epoch === 'string',
|
||||
hasSessionIngressToken:
|
||||
typeof value.session_ingress_token === 'string',
|
||||
hasApiBaseUrl: typeof value.api_base_url === 'string',
|
||||
})
|
||||
}
|
||||
return typeof data
|
||||
}
|
||||
|
||||
/** Decode a base64url-encoded work secret and validate its version. */
|
||||
export function decodeWorkSecret(secret: string): WorkSecret {
|
||||
const json = Buffer.from(secret, 'base64url').toString('utf-8')
|
||||
@@ -120,7 +147,9 @@ export async function registerWorker(
|
||||
!Number.isSafeInteger(epoch)
|
||||
) {
|
||||
throw new Error(
|
||||
`registerWorker: invalid worker_epoch in response: ${jsonStringify(response.data)}`,
|
||||
`registerWorker: invalid worker_epoch in response: ${summarizeRegisterWorkerResponseForDebug(
|
||||
response.data,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
return epoch
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,11 +1,8 @@
|
||||
import { profileCheckpoint } from '../utils/startupProfiler.js'
|
||||
import '../bootstrap/state.js'
|
||||
import '../utils/config.js'
|
||||
import type { Attributes, MetricOptions } from '@opentelemetry/api'
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import { getIsNonInteractiveSession } from 'src/bootstrap/state.js'
|
||||
import type { AttributedCounter } from '../bootstrap/state.js'
|
||||
import { getSessionCounter, setMeter } from '../bootstrap/state.js'
|
||||
import { shutdownLspServerManager } from '../services/lsp/manager.js'
|
||||
import { populateOAuthAccountInfoIfNeeded } from '../services/oauth/client.js'
|
||||
import {
|
||||
@@ -41,19 +38,9 @@ import {
|
||||
ensureScratchpadDir,
|
||||
isScratchpadEnabled,
|
||||
} from '../utils/permissions/filesystem.js'
|
||||
// initializeTelemetry is loaded lazily via import() in setMeterState() to defer
|
||||
// ~400KB of OpenTelemetry + protobuf modules until telemetry is actually initialized.
|
||||
// gRPC exporters (~700KB via @grpc/grpc-js) are further lazy-loaded within instrumentation.ts.
|
||||
import { configureGlobalAgents } from '../utils/proxy.js'
|
||||
import { isBetaTracingEnabled } from '../utils/telemetry/betaSessionTracing.js'
|
||||
import { getTelemetryAttributes } from '../utils/telemetryAttributes.js'
|
||||
import { setShellIfWindows } from '../utils/windowsPaths.js'
|
||||
|
||||
// initialize1PEventLogging is dynamically imported to defer OpenTelemetry sdk-logs/resources
|
||||
|
||||
// Track if telemetry has been initialized to prevent double initialization
|
||||
let telemetryInitialized = false
|
||||
|
||||
export const init = memoize(async (): Promise<void> => {
|
||||
const initStartTime = Date.now()
|
||||
logForDiagnosticsNoPII('info', 'init_started')
|
||||
@@ -87,22 +74,8 @@ export const init = memoize(async (): Promise<void> => {
|
||||
setupGracefulShutdown()
|
||||
profileCheckpoint('init_after_graceful_shutdown')
|
||||
|
||||
// Initialize 1P event logging (no security concerns, but deferred to avoid
|
||||
// loading OpenTelemetry sdk-logs at startup). growthbook.js is already in
|
||||
// the module cache by this point (firstPartyEventLogger imports it), so the
|
||||
// second dynamic import adds no load cost.
|
||||
void Promise.all([
|
||||
import('../services/analytics/firstPartyEventLogger.js'),
|
||||
import('../services/analytics/growthbook.js'),
|
||||
]).then(([fp, gb]) => {
|
||||
fp.initialize1PEventLogging()
|
||||
// Rebuild the logger provider if tengu_1p_event_batch_config changes
|
||||
// mid-session. Change detection (isEqual) is inside the handler so
|
||||
// unchanged refreshes are no-ops.
|
||||
gb.onGrowthBookRefresh(() => {
|
||||
void fp.reinitialize1PEventLoggingIfConfigChanged()
|
||||
})
|
||||
})
|
||||
// Telemetry/log export is disabled in this build. Keep the startup
|
||||
// checkpoint so callers depending on the init timeline still see it.
|
||||
profileCheckpoint('init_after_1p_event_logging')
|
||||
|
||||
// Populate OAuth account info if it is not already cached in config. This is needed since the
|
||||
@@ -236,105 +209,3 @@ export const init = memoize(async (): Promise<void> => {
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Initialize telemetry after trust has been granted.
|
||||
* For remote-settings-eligible users, waits for settings to load (non-blocking),
|
||||
* then re-applies env vars (to include remote settings) before initializing telemetry.
|
||||
* For non-eligible users, initializes telemetry immediately.
|
||||
* This should only be called once, after the trust dialog has been accepted.
|
||||
*/
|
||||
export function initializeTelemetryAfterTrust(): void {
|
||||
if (isEligibleForRemoteManagedSettings()) {
|
||||
// For SDK/headless mode with beta tracing, initialize eagerly first
|
||||
// to ensure the tracer is ready before the first query runs.
|
||||
// The async path below will still run but doInitializeTelemetry() guards against double init.
|
||||
if (getIsNonInteractiveSession() && isBetaTracingEnabled()) {
|
||||
void doInitializeTelemetry().catch(error => {
|
||||
logForDebugging(
|
||||
`[3P telemetry] Eager telemetry init failed (beta tracing): ${errorMessage(error)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
})
|
||||
}
|
||||
logForDebugging(
|
||||
'[3P telemetry] Waiting for remote managed settings before telemetry init',
|
||||
)
|
||||
void waitForRemoteManagedSettingsToLoad()
|
||||
.then(async () => {
|
||||
logForDebugging(
|
||||
'[3P telemetry] Remote managed settings loaded, initializing telemetry',
|
||||
)
|
||||
// Re-apply env vars to pick up remote settings before initializing telemetry.
|
||||
applyConfigEnvironmentVariables()
|
||||
await doInitializeTelemetry()
|
||||
})
|
||||
.catch(error => {
|
||||
logForDebugging(
|
||||
`[3P telemetry] Telemetry init failed (remote settings path): ${errorMessage(error)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
})
|
||||
} else {
|
||||
void doInitializeTelemetry().catch(error => {
|
||||
logForDebugging(
|
||||
`[3P telemetry] Telemetry init failed: ${errorMessage(error)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async function doInitializeTelemetry(): Promise<void> {
|
||||
if (telemetryInitialized) {
|
||||
// Already initialized, nothing to do
|
||||
return
|
||||
}
|
||||
|
||||
// Set flag before init to prevent double initialization
|
||||
telemetryInitialized = true
|
||||
try {
|
||||
await setMeterState()
|
||||
} catch (error) {
|
||||
// Reset flag on failure so subsequent calls can retry
|
||||
telemetryInitialized = false
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
async function setMeterState(): Promise<void> {
|
||||
// Lazy-load instrumentation to defer ~400KB of OpenTelemetry + protobuf
|
||||
const { initializeTelemetry } = await import(
|
||||
'../utils/telemetry/instrumentation.js'
|
||||
)
|
||||
// Initialize customer OTLP telemetry (metrics, logs, traces)
|
||||
const meter = await initializeTelemetry()
|
||||
if (meter) {
|
||||
// Create factory function for attributed counters
|
||||
const createAttributedCounter = (
|
||||
name: string,
|
||||
options: MetricOptions,
|
||||
): AttributedCounter => {
|
||||
const counter = meter?.createCounter(name, options)
|
||||
|
||||
return {
|
||||
add(value: number, additionalAttributes: Attributes = {}) {
|
||||
// Always fetch fresh telemetry attributes to ensure they're up to date
|
||||
const currentAttributes = getTelemetryAttributes()
|
||||
const mergedAttributes = {
|
||||
...currentAttributes,
|
||||
...additionalAttributes,
|
||||
}
|
||||
counter?.add(value, mergedAttributes)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setMeter(meter, createAttributedCounter)
|
||||
|
||||
// Increment session counter here because the startup telemetry path
|
||||
// runs before this async initialization completes, so the counter
|
||||
// would be null there.
|
||||
getSessionCounter()?.add(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ import {
|
||||
isShutdownApproved,
|
||||
isShutdownRequest,
|
||||
isTeamPermissionUpdate,
|
||||
markMessagesAsRead,
|
||||
markMessagesAsReadByPredicate,
|
||||
readUnreadMessages,
|
||||
type TeammateMessage,
|
||||
writeToMailbox,
|
||||
@@ -195,10 +195,20 @@ export function useInboxPoller({
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to mark messages as read in the inbox file.
|
||||
// Helper to remove the unread batch we just processed from the inbox file.
|
||||
// Called after messages are successfully delivered or reliably queued.
|
||||
const deliveredMessageKeys = new Set(
|
||||
unread.map(message => `${message.from}|${message.timestamp}|${message.text}`),
|
||||
)
|
||||
const markRead = () => {
|
||||
void markMessagesAsRead(agentName, currentAppState.teamContext?.teamName)
|
||||
void markMessagesAsReadByPredicate(
|
||||
agentName,
|
||||
message =>
|
||||
deliveredMessageKeys.has(
|
||||
`${message.from}|${message.timestamp}|${message.text}`,
|
||||
),
|
||||
currentAppState.teamContext?.teamName,
|
||||
)
|
||||
}
|
||||
|
||||
// Separate permission messages from regular teammate messages
|
||||
@@ -503,9 +513,7 @@ export function useInboxPoller({
|
||||
for (const m of teamPermissionUpdates) {
|
||||
const parsed = isTeamPermissionUpdate(m.text)
|
||||
if (!parsed) {
|
||||
logForDebugging(
|
||||
`[InboxPoller] Failed to parse team permission update: ${m.text.substring(0, 100)}`,
|
||||
)
|
||||
logForDebugging('[InboxPoller] Failed to parse team permission update')
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -522,10 +530,7 @@ export function useInboxPoller({
|
||||
|
||||
// Apply the permission update to the teammate's context
|
||||
logForDebugging(
|
||||
`[InboxPoller] Applying team permission update: ${parsed.toolName} allowed in ${parsed.directoryPath}`,
|
||||
)
|
||||
logForDebugging(
|
||||
`[InboxPoller] Permission update rules: ${jsonStringify(parsed.permissionUpdate.rules)}`,
|
||||
`[InboxPoller] Applying team permission update for ${parsed.toolName} (${parsed.permissionUpdate.rules.length} rule(s))`,
|
||||
)
|
||||
|
||||
setAppState(prev => {
|
||||
@@ -536,7 +541,7 @@ export function useInboxPoller({
|
||||
destination: 'session',
|
||||
})
|
||||
logForDebugging(
|
||||
`[InboxPoller] Updated session allow rules: ${jsonStringify(updated.alwaysAllowRules.session)}`,
|
||||
`[InboxPoller] Updated session allow rules (${updated.alwaysAllowRules.session.length} total)`,
|
||||
)
|
||||
return {
|
||||
...prev,
|
||||
@@ -563,9 +568,7 @@ export function useInboxPoller({
|
||||
|
||||
const parsed = isModeSetRequest(m.text)
|
||||
if (!parsed) {
|
||||
logForDebugging(
|
||||
`[InboxPoller] Failed to parse mode set request: ${m.text.substring(0, 100)}`,
|
||||
)
|
||||
logForDebugging('[InboxPoller] Failed to parse mode set request')
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -1,31 +1,16 @@
|
||||
/**
|
||||
* Swarm Permission Poller Hook
|
||||
* Swarm permission callback registry helpers.
|
||||
*
|
||||
* This hook polls for permission responses from the team leader when running
|
||||
* as a worker agent in a swarm. When a response is received, it calls the
|
||||
* appropriate callback (onAllow/onReject) to continue execution.
|
||||
*
|
||||
* This hook should be used in conjunction with the worker-side integration
|
||||
* in useCanUseTool.ts, which creates pending requests that this hook monitors.
|
||||
* Permission requests/responses now flow entirely through teammate mailboxes.
|
||||
* Workers register callbacks here, and the inbox poller dispatches mailbox
|
||||
* responses back into those callbacks.
|
||||
*/
|
||||
|
||||
import { useCallback, useEffect, useRef } from 'react'
|
||||
import { useInterval } from 'usehooks-ts'
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { errorMessage } from '../utils/errors.js'
|
||||
import {
|
||||
type PermissionUpdate,
|
||||
permissionUpdateSchema,
|
||||
} from '../utils/permissions/PermissionUpdateSchema.js'
|
||||
import {
|
||||
isSwarmWorker,
|
||||
type PermissionResponse,
|
||||
pollForResponse,
|
||||
removeWorkerResponse,
|
||||
} from '../utils/swarm/permissionSync.js'
|
||||
import { getAgentName, getTeamName } from '../utils/teammate.js'
|
||||
|
||||
const POLL_INTERVAL_MS = 500
|
||||
|
||||
/**
|
||||
* Validate permissionUpdates from external sources (mailbox IPC, disk polling).
|
||||
@@ -226,105 +211,9 @@ export function processSandboxPermissionResponse(params: {
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a permission response by invoking the registered callback
|
||||
*/
|
||||
function processResponse(response: PermissionResponse): boolean {
|
||||
const callback = pendingCallbacks.get(response.requestId)
|
||||
|
||||
if (!callback) {
|
||||
logForDebugging(
|
||||
`[SwarmPermissionPoller] No callback registered for request ${response.requestId}`,
|
||||
)
|
||||
return false
|
||||
}
|
||||
|
||||
logForDebugging(
|
||||
`[SwarmPermissionPoller] Processing response for request ${response.requestId}: ${response.decision}`,
|
||||
)
|
||||
|
||||
// Remove from registry before invoking callback
|
||||
pendingCallbacks.delete(response.requestId)
|
||||
|
||||
if (response.decision === 'approved') {
|
||||
const permissionUpdates = parsePermissionUpdates(response.permissionUpdates)
|
||||
const updatedInput = response.updatedInput
|
||||
callback.onAllow(updatedInput, permissionUpdates)
|
||||
} else {
|
||||
callback.onReject(response.feedback)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook that polls for permission responses when running as a swarm worker.
|
||||
*
|
||||
* This hook:
|
||||
* 1. Only activates when isSwarmWorker() returns true
|
||||
* 2. Polls every 500ms for responses
|
||||
* 3. When a response is found, invokes the registered callback
|
||||
* 4. Cleans up the response file after processing
|
||||
* Legacy no-op hook kept for compatibility with older imports.
|
||||
* Mailbox responses are handled by useInboxPoller instead of disk polling.
|
||||
*/
|
||||
export function useSwarmPermissionPoller(): void {
|
||||
const isProcessingRef = useRef(false)
|
||||
|
||||
const poll = useCallback(async () => {
|
||||
// Don't poll if not a swarm worker
|
||||
if (!isSwarmWorker()) {
|
||||
return
|
||||
}
|
||||
|
||||
// Prevent concurrent polling
|
||||
if (isProcessingRef.current) {
|
||||
return
|
||||
}
|
||||
|
||||
// Don't poll if no callbacks are registered
|
||||
if (pendingCallbacks.size === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
isProcessingRef.current = true
|
||||
|
||||
try {
|
||||
const agentName = getAgentName()
|
||||
const teamName = getTeamName()
|
||||
|
||||
if (!agentName || !teamName) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check each pending request for a response
|
||||
for (const [requestId, _callback] of pendingCallbacks) {
|
||||
const response = await pollForResponse(requestId, agentName, teamName)
|
||||
|
||||
if (response) {
|
||||
// Process the response
|
||||
const processed = processResponse(response)
|
||||
|
||||
if (processed) {
|
||||
// Clean up the response from the worker's inbox
|
||||
await removeWorkerResponse(requestId, agentName, teamName)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`[SwarmPermissionPoller] Error during poll: ${errorMessage(error)}`,
|
||||
)
|
||||
} finally {
|
||||
isProcessingRef.current = false
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Only poll if we're a swarm worker
|
||||
const shouldPoll = isSwarmWorker()
|
||||
useInterval(() => void poll(), shouldPoll ? POLL_INTERVAL_MS : null)
|
||||
|
||||
// Initial poll on mount
|
||||
useEffect(() => {
|
||||
if (isSwarmWorker()) {
|
||||
void poll()
|
||||
}
|
||||
}, [poll])
|
||||
// Intentionally empty.
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
92
src/main.tsx
92
src/main.tsx
@@ -29,7 +29,7 @@ import React from 'react';
|
||||
import { getOauthConfig } from './constants/oauth.js';
|
||||
import { getRemoteSessionUrl } from './constants/product.js';
|
||||
import { getSystemContext, getUserContext } from './context.js';
|
||||
import { init, initializeTelemetryAfterTrust } from './entrypoints/init.js';
|
||||
import { init } from './entrypoints/init.js';
|
||||
import { addToHistory } from './history.js';
|
||||
import type { Root } from './ink.js';
|
||||
import { launchRepl } from './replLauncher.js';
|
||||
@@ -49,7 +49,7 @@ import { isAgentSwarmsEnabled } from './utils/agentSwarmsEnabled.js';
|
||||
import { count, uniq } from './utils/array.js';
|
||||
import { installAsciicastRecorder } from './utils/asciicast.js';
|
||||
import { getSubscriptionType, isClaudeAISubscriber, prefetchAwsCredentialsAndBedRockInfoIfSafe, prefetchGcpCredentialsIfSafe, validateForceLoginOrg } from './utils/auth.js';
|
||||
import { checkHasTrustDialogAccepted, getGlobalConfig, getRemoteControlAtStartup, isAutoUpdaterDisabled, saveGlobalConfig } from './utils/config.js';
|
||||
import { checkHasTrustDialogAccepted, getGlobalConfig, getRemoteControlAtStartup, saveGlobalConfig } from './utils/config.js';
|
||||
import { seedEarlyInput, stopCapturingEarlyInput } from './utils/earlyInput.js';
|
||||
import { getInitialEffortSetting, parseEffortValue } from './utils/effort.js';
|
||||
import { getInitialFastModeSetting, isFastModeEnabled, prefetchFastModeStatus, resolveFastModeStatusFromCache } from './utils/fastMode.js';
|
||||
@@ -80,10 +80,8 @@ const coordinatorModeModule = feature('COORDINATOR_MODE') ? require('./coordinat
|
||||
const assistantModule = feature('KAIROS') ? require('./assistant/index.js') as typeof import('./assistant/index.js') : null;
|
||||
const kairosGate = feature('KAIROS') ? require('./assistant/gate.js') as typeof import('./assistant/gate.js') : null;
|
||||
import { relative, resolve } from 'path';
|
||||
import { isAnalyticsDisabled } from 'src/services/analytics/config.js';
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js';
|
||||
import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, logEvent } from 'src/services/analytics/index.js';
|
||||
import { initializeAnalyticsGates } from 'src/services/analytics/sink.js';
|
||||
import { getOriginalCwd, setAdditionalDirectoriesForClaudeMd, setIsRemoteMode, setMainLoopModelOverride, setMainThreadAgentType, setTeleportedSessionInfo } from './bootstrap/state.js';
|
||||
import { filterCommandsForRemoteMode, getCommands } from './commands.js';
|
||||
import type { StatsStore } from './context/stats.js';
|
||||
@@ -103,15 +101,13 @@ import type { Message as MessageType } from './types/message.js';
|
||||
import { assertMinVersion } from './utils/autoUpdater.js';
|
||||
import { CLAUDE_IN_CHROME_SKILL_HINT, CLAUDE_IN_CHROME_SKILL_HINT_WITH_WEBBROWSER } from './utils/claudeInChrome/prompt.js';
|
||||
import { setupClaudeInChrome, shouldAutoEnableClaudeInChrome, shouldEnableClaudeInChrome } from './utils/claudeInChrome/setup.js';
|
||||
import { getContextWindowForModel } from './utils/context.js';
|
||||
import { loadConversationForResume } from './utils/conversationRecovery.js';
|
||||
import { buildDeepLinkBanner } from './utils/deepLink/banner.js';
|
||||
import { hasNodeOption, isBareMode, isEnvTruthy, isInProtectedNamespace } from './utils/envUtils.js';
|
||||
import { isBareMode, isEnvTruthy, isInProtectedNamespace } from './utils/envUtils.js';
|
||||
import { refreshExampleCommands } from './utils/exampleCommands.js';
|
||||
import type { FpsMetrics } from './utils/fpsTracker.js';
|
||||
import { getWorktreePaths } from './utils/getWorktreePaths.js';
|
||||
import { findGitRoot, getBranch, getIsGit, getWorktreeCount } from './utils/git.js';
|
||||
import { getGhAuthStatus } from './utils/github/ghAuthStatus.js';
|
||||
import { findGitRoot, getBranch } from './utils/git.js';
|
||||
import { safeParseJSON } from './utils/json.js';
|
||||
import { logError } from './utils/log.js';
|
||||
import { getModelDeprecationWarning } from './utils/model/deprecation.js';
|
||||
@@ -121,9 +117,7 @@ import { PERMISSION_MODES } from './utils/permissions/PermissionMode.js';
|
||||
import { checkAndDisableBypassPermissions, getAutoModeEnabledStateIfCached, initializeToolPermissionContext, initialPermissionModeFromCLI, isDefaultPermissionModeAuto, parseToolListFromCLI, removeDangerousPermissions, stripDangerousPermissionsForAutoMode, verifyAutoModeGateAccess } from './utils/permissions/permissionSetup.js';
|
||||
import { cleanupOrphanedPluginVersionsInBackground } from './utils/plugins/cacheUtils.js';
|
||||
import { initializeVersionedPlugins } from './utils/plugins/installedPluginsManager.js';
|
||||
import { getManagedPluginNames } from './utils/plugins/managedPlugins.js';
|
||||
import { getGlobExclusionsForPluginCache } from './utils/plugins/orphanedPluginFilter.js';
|
||||
import { getPluginSeedDirs } from './utils/plugins/pluginDirectories.js';
|
||||
import { countFilesRoundedRg } from './utils/ripgrep.js';
|
||||
import { processSessionStartHooks, processSetupHooks } from './utils/sessionStart.js';
|
||||
import { cacheSessionTitle, getSessionIdFromLog, loadTranscriptFromFile, saveAgentSetting, saveMode, searchSessionsByCustomTitle, sessionIdExists } from './utils/sessionStorage.js';
|
||||
@@ -132,8 +126,6 @@ import { getInitialSettings, getManagedSettingsKeysForLogging, getSettingsForSou
|
||||
import { resetSettingsCache } from './utils/settings/settingsCache.js';
|
||||
import type { ValidationError } from './utils/settings/validation.js';
|
||||
import { DEFAULT_TASKS_MODE_TASK_LIST_ID, TASK_STATUSES } from './utils/tasks.js';
|
||||
import { logPluginLoadErrors, logPluginsEnabledForSession } from './utils/telemetry/pluginTelemetry.js';
|
||||
import { logSkillsLoaded } from './utils/telemetry/skillLoadedEvent.js';
|
||||
import { generateTempFilePath } from './utils/tempfile.js';
|
||||
import { validateUuid } from './utils/uuid.js';
|
||||
// Plugin startup checks are now handled non-blockingly in REPL.tsx
|
||||
@@ -196,7 +188,7 @@ import { filterAllowedSdkBetas } from './utils/betas.js';
|
||||
import { isInBundledMode, isRunningWithBun } from './utils/bundledMode.js';
|
||||
import { logForDiagnosticsNoPII } from './utils/diagLogs.js';
|
||||
import { filterExistingPaths, getKnownPathsForRepo } from './utils/githubRepoPathMapping.js';
|
||||
import { clearPluginCache, loadAllPluginsCacheOnly } from './utils/plugins/pluginLoader.js';
|
||||
import { clearPluginCache } from './utils/plugins/pluginLoader.js';
|
||||
import { migrateChangelogFromConfig } from './utils/releaseNotes.js';
|
||||
import { SandboxManager } from './utils/sandbox/sandbox-adapter.js';
|
||||
import { fetchSession, prepareApiRequest } from './utils/teleport/api.js';
|
||||
@@ -270,56 +262,6 @@ if ("external" !== 'ant' && isBeingDebugged()) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-session skill/plugin telemetry. Called from both the interactive path
|
||||
* and the headless -p path (before runHeadless) — both go through
|
||||
* main.tsx but branch before the interactive startup path, so it needs two
|
||||
* call sites here rather than one here + one in QueryEngine.
|
||||
*/
|
||||
function logSessionTelemetry(): void {
|
||||
const model = parseUserSpecifiedModel(getInitialMainLoopModel() ?? getDefaultMainLoopModel());
|
||||
void logSkillsLoaded(getCwd(), getContextWindowForModel(model, getSdkBetas()));
|
||||
void loadAllPluginsCacheOnly().then(({
|
||||
enabled,
|
||||
errors
|
||||
}) => {
|
||||
const managedNames = getManagedPluginNames();
|
||||
logPluginsEnabledForSession(enabled, managedNames, getPluginSeedDirs());
|
||||
logPluginLoadErrors(errors, managedNames);
|
||||
}).catch(err => logError(err));
|
||||
}
|
||||
function getCertEnvVarTelemetry(): Record<string, boolean> {
|
||||
const result: Record<string, boolean> = {};
|
||||
if (process.env.NODE_EXTRA_CA_CERTS) {
|
||||
result.has_node_extra_ca_certs = true;
|
||||
}
|
||||
if (process.env.CLAUDE_CODE_CLIENT_CERT) {
|
||||
result.has_client_cert = true;
|
||||
}
|
||||
if (hasNodeOption('--use-system-ca')) {
|
||||
result.has_use_system_ca = true;
|
||||
}
|
||||
if (hasNodeOption('--use-openssl-ca')) {
|
||||
result.has_use_openssl_ca = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
async function logStartupTelemetry(): Promise<void> {
|
||||
if (isAnalyticsDisabled()) return;
|
||||
const [isGit, worktreeCount, ghAuthStatus] = await Promise.all([getIsGit(), getWorktreeCount(), getGhAuthStatus()]);
|
||||
logEvent('tengu_startup_telemetry', {
|
||||
is_git: isGit,
|
||||
worktree_count: worktreeCount,
|
||||
gh_auth_status: ghAuthStatus as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
sandbox_enabled: SandboxManager.isSandboxingEnabled(),
|
||||
are_unsandboxed_commands_allowed: SandboxManager.areUnsandboxedCommandsAllowed(),
|
||||
is_auto_bash_allowed_if_sandbox_enabled: SandboxManager.isAutoAllowBashIfSandboxedEnabled(),
|
||||
auto_updater_disabled: isAutoUpdaterDisabled(),
|
||||
prefers_reduced_motion: getInitialSettings().prefersReducedMotion ?? false,
|
||||
...getCertEnvVarTelemetry()
|
||||
});
|
||||
}
|
||||
|
||||
// @[MODEL LAUNCH]: Consider any migrations you may need for model strings. See migrateSonnet1mToSonnet45.ts for an example.
|
||||
// Bump this when adding a new sync migration so existing users re-run the set.
|
||||
const CURRENT_MIGRATION_VERSION = 11;
|
||||
@@ -413,8 +355,7 @@ export function startDeferredPrefetches(): void {
|
||||
}
|
||||
void countFilesRoundedRg(getCwd(), AbortSignal.timeout(3000), []);
|
||||
|
||||
// Analytics and feature flag initialization
|
||||
void initializeAnalyticsGates();
|
||||
// Feature flag initialization
|
||||
void prefetchOfficialMcpUrls();
|
||||
void refreshModelCapabilities();
|
||||
|
||||
@@ -2285,11 +2226,8 @@ async function run(): Promise<CommanderCommand> {
|
||||
resetUserCache();
|
||||
// Refresh GrowthBook after login to get updated feature flags (e.g., for claude.ai MCPs)
|
||||
refreshGrowthBookAfterAuthChange();
|
||||
// Clear any stale trusted device token then enroll for Remote Control.
|
||||
// Both self-gate on tengu_sessions_elevated_auth_enforcement internally
|
||||
// — enrollTrustedDevice() via checkGate_CACHED_OR_BLOCKING (awaits
|
||||
// the GrowthBook reinit above), clearTrustedDeviceToken() via the
|
||||
// sync cached check (acceptable since clear is idempotent).
|
||||
// Clear any stale trusted-device token, then run the no-op enrollment
|
||||
// stub so the disabled bridge path stays consistent after login.
|
||||
void import('./bridge/trustedDevice.js').then(m => {
|
||||
m.clearTrustedDeviceToken();
|
||||
return m.enrollTrustedDevice();
|
||||
@@ -2587,15 +2525,10 @@ async function run(): Promise<CommanderCommand> {
|
||||
setHasFormattedOutput(true);
|
||||
}
|
||||
|
||||
// Apply full environment variables in print mode since trust dialog is bypassed
|
||||
// This includes potentially dangerous environment variables from untrusted sources
|
||||
// Apply full environment variables in print mode since trust dialog is bypassed.
|
||||
// but print mode is considered trusted (as documented in help text)
|
||||
applyConfigEnvironmentVariables();
|
||||
|
||||
// Initialize telemetry after env vars are applied so OTEL endpoint env vars and
|
||||
// otelHeadersHelper (which requires trust to execute) are available.
|
||||
initializeTelemetryAfterTrust();
|
||||
|
||||
// Kick SessionStart hooks now so the subprocess spawn overlaps with
|
||||
// MCP connect + plugin init + print.ts import below. loadInitialMessages
|
||||
// joins this at print.ts:4397. Guarded same as loadInitialMessages —
|
||||
@@ -2820,7 +2753,6 @@ async function run(): Promise<CommanderCommand> {
|
||||
void import('./utils/sdkHeapDumpMonitor.js').then(m => m.startSdkMemoryMonitor());
|
||||
}
|
||||
}
|
||||
logSessionTelemetry();
|
||||
profileCheckpoint('before_print_import');
|
||||
const {
|
||||
runHeadless
|
||||
@@ -3043,15 +2975,11 @@ async function run(): Promise<CommanderCommand> {
|
||||
|
||||
// Increment numStartups synchronously — first-render readers like
|
||||
// shouldShowEffortCallout (via useState initializer) need the updated
|
||||
// value before setImmediate fires. Defer only telemetry.
|
||||
// value immediately.
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
numStartups: (current.numStartups ?? 0) + 1
|
||||
}));
|
||||
setImmediate(() => {
|
||||
void logStartupTelemetry();
|
||||
logSessionTelemetry();
|
||||
});
|
||||
|
||||
// Set up per-turn session environment data uploader (ant-only build).
|
||||
// Default-enabled for all ant users when working in an Anthropic-owned
|
||||
|
||||
@@ -108,7 +108,9 @@ export class SessionsWebSocket {
|
||||
const baseUrl = getOauthConfig().BASE_API_URL.replace('https://', 'wss://')
|
||||
const url = `${baseUrl}/v1/sessions/ws/${this.sessionId}/subscribe?organization_uuid=${this.orgUuid}`
|
||||
|
||||
logForDebugging(`[SessionsWebSocket] Connecting to ${url}`)
|
||||
logForDebugging(
|
||||
'[SessionsWebSocket] Connecting to session subscription endpoint',
|
||||
)
|
||||
|
||||
// Get fresh token for each connection attempt
|
||||
const accessToken = this.getAccessToken()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* Shared analytics configuration
|
||||
*
|
||||
* Common logic for determining when analytics should be disabled
|
||||
* across all analytics systems (Datadog, 1P)
|
||||
* across the remaining local analytics compatibility surfaces.
|
||||
*/
|
||||
|
||||
import { isEnvTruthy } from '../../utils/envUtils.js'
|
||||
@@ -31,7 +31,7 @@ export function isAnalyticsDisabled(): boolean {
|
||||
*
|
||||
* Unlike isAnalyticsDisabled(), this does NOT block on 3P providers
|
||||
* (Bedrock/Vertex/Foundry). The survey is a local UI prompt with no
|
||||
* transcript data — enterprise customers capture responses via OTEL.
|
||||
* transcript upload in this fork.
|
||||
*/
|
||||
export function isFeedbackSurveyDisabled(): boolean {
|
||||
return process.env.NODE_ENV === 'test' || isTelemetryDisabled()
|
||||
|
||||
@@ -1,307 +1,9 @@
|
||||
import axios from 'axios'
|
||||
import { createHash } from 'crypto'
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import { getOrCreateUserID } from '../../utils/config.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { getCanonicalName } from '../../utils/model/model.js'
|
||||
import { getAPIProvider } from '../../utils/model/providers.js'
|
||||
import { MODEL_COSTS } from '../../utils/modelCost.js'
|
||||
import { isAnalyticsDisabled } from './config.js'
|
||||
import { getEventMetadata } from './metadata.js'
|
||||
|
||||
const DATADOG_LOGS_ENDPOINT =
|
||||
'https://http-intake.logs.us5.datadoghq.com/api/v2/logs'
|
||||
const DATADOG_CLIENT_TOKEN = 'pubbbf48e6d78dae54bceaa4acf463299bf'
|
||||
const DEFAULT_FLUSH_INTERVAL_MS = 15000
|
||||
const MAX_BATCH_SIZE = 100
|
||||
const NETWORK_TIMEOUT_MS = 5000
|
||||
|
||||
const DATADOG_ALLOWED_EVENTS = new Set([
|
||||
'chrome_bridge_connection_succeeded',
|
||||
'chrome_bridge_connection_failed',
|
||||
'chrome_bridge_disconnected',
|
||||
'chrome_bridge_tool_call_completed',
|
||||
'chrome_bridge_tool_call_error',
|
||||
'chrome_bridge_tool_call_started',
|
||||
'chrome_bridge_tool_call_timeout',
|
||||
'tengu_api_error',
|
||||
'tengu_api_success',
|
||||
'tengu_brief_mode_enabled',
|
||||
'tengu_brief_mode_toggled',
|
||||
'tengu_brief_send',
|
||||
'tengu_cancel',
|
||||
'tengu_compact_failed',
|
||||
'tengu_exit',
|
||||
'tengu_flicker',
|
||||
'tengu_init',
|
||||
'tengu_model_fallback_triggered',
|
||||
'tengu_oauth_error',
|
||||
'tengu_oauth_success',
|
||||
'tengu_oauth_token_refresh_failure',
|
||||
'tengu_oauth_token_refresh_success',
|
||||
'tengu_oauth_token_refresh_lock_acquiring',
|
||||
'tengu_oauth_token_refresh_lock_acquired',
|
||||
'tengu_oauth_token_refresh_starting',
|
||||
'tengu_oauth_token_refresh_completed',
|
||||
'tengu_oauth_token_refresh_lock_releasing',
|
||||
'tengu_oauth_token_refresh_lock_released',
|
||||
'tengu_query_error',
|
||||
'tengu_session_file_read',
|
||||
'tengu_started',
|
||||
'tengu_tool_use_error',
|
||||
'tengu_tool_use_granted_in_prompt_permanent',
|
||||
'tengu_tool_use_granted_in_prompt_temporary',
|
||||
'tengu_tool_use_rejected_in_prompt',
|
||||
'tengu_tool_use_success',
|
||||
'tengu_uncaught_exception',
|
||||
'tengu_unhandled_rejection',
|
||||
'tengu_voice_recording_started',
|
||||
'tengu_voice_toggled',
|
||||
'tengu_team_mem_sync_pull',
|
||||
'tengu_team_mem_sync_push',
|
||||
'tengu_team_mem_sync_started',
|
||||
'tengu_team_mem_entries_capped',
|
||||
])
|
||||
|
||||
const TAG_FIELDS = [
|
||||
'arch',
|
||||
'clientType',
|
||||
'errorType',
|
||||
'http_status_range',
|
||||
'http_status',
|
||||
'kairosActive',
|
||||
'model',
|
||||
'platform',
|
||||
'provider',
|
||||
'skillMode',
|
||||
'subscriptionType',
|
||||
'toolName',
|
||||
'userBucket',
|
||||
'userType',
|
||||
'version',
|
||||
'versionBase',
|
||||
]
|
||||
|
||||
function camelToSnakeCase(str: string): string {
|
||||
return str.replace(/[A-Z]/g, letter => `_${letter.toLowerCase()}`)
|
||||
}
|
||||
|
||||
type DatadogLog = {
|
||||
ddsource: string
|
||||
ddtags: string
|
||||
message: string
|
||||
service: string
|
||||
hostname: string
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
let logBatch: DatadogLog[] = []
|
||||
let flushTimer: NodeJS.Timeout | null = null
|
||||
let datadogInitialized: boolean | null = null
|
||||
|
||||
async function flushLogs(): Promise<void> {
|
||||
if (logBatch.length === 0) return
|
||||
|
||||
const logsToSend = logBatch
|
||||
logBatch = []
|
||||
|
||||
try {
|
||||
await axios.post(DATADOG_LOGS_ENDPOINT, logsToSend, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'DD-API-KEY': DATADOG_CLIENT_TOKEN,
|
||||
},
|
||||
timeout: NETWORK_TIMEOUT_MS,
|
||||
})
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
function scheduleFlush(): void {
|
||||
if (flushTimer) return
|
||||
|
||||
flushTimer = setTimeout(() => {
|
||||
flushTimer = null
|
||||
void flushLogs()
|
||||
}, getFlushIntervalMs()).unref()
|
||||
}
|
||||
|
||||
export const initializeDatadog = memoize(async (): Promise<boolean> => {
|
||||
if (isAnalyticsDisabled()) {
|
||||
datadogInitialized = false
|
||||
return false
|
||||
}
|
||||
|
||||
try {
|
||||
datadogInitialized = true
|
||||
return true
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
datadogInitialized = false
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* Flush remaining Datadog logs and shut down.
|
||||
* Called from gracefulShutdown() before process.exit() since
|
||||
* forceExit() prevents the beforeExit handler from firing.
|
||||
* Datadog analytics egress is disabled in this build.
|
||||
*
|
||||
* Only shutdown compatibility remains for existing cleanup paths.
|
||||
*/
|
||||
|
||||
export async function shutdownDatadog(): Promise<void> {
|
||||
if (flushTimer) {
|
||||
clearTimeout(flushTimer)
|
||||
flushTimer = null
|
||||
}
|
||||
await flushLogs()
|
||||
}
|
||||
|
||||
// NOTE: use via src/services/analytics/index.ts > logEvent
|
||||
export async function trackDatadogEvent(
|
||||
eventName: string,
|
||||
properties: { [key: string]: boolean | number | undefined },
|
||||
): Promise<void> {
|
||||
if (process.env.NODE_ENV !== 'production') {
|
||||
return
|
||||
}
|
||||
|
||||
// Don't send events for 3P providers (Bedrock, Vertex, Foundry)
|
||||
if (getAPIProvider() !== 'firstParty') {
|
||||
return
|
||||
}
|
||||
|
||||
// Fast path: use cached result if available to avoid await overhead
|
||||
let initialized = datadogInitialized
|
||||
if (initialized === null) {
|
||||
initialized = await initializeDatadog()
|
||||
}
|
||||
if (!initialized || !DATADOG_ALLOWED_EVENTS.has(eventName)) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
const metadata = await getEventMetadata({
|
||||
model: properties.model,
|
||||
betas: properties.betas,
|
||||
})
|
||||
// Destructure to avoid duplicate envContext (once nested, once flattened)
|
||||
const { envContext, ...restMetadata } = metadata
|
||||
const allData: Record<string, unknown> = {
|
||||
...restMetadata,
|
||||
...envContext,
|
||||
...properties,
|
||||
userBucket: getUserBucket(),
|
||||
}
|
||||
|
||||
// Normalize MCP tool names to "mcp" for cardinality reduction
|
||||
if (
|
||||
typeof allData.toolName === 'string' &&
|
||||
allData.toolName.startsWith('mcp__')
|
||||
) {
|
||||
allData.toolName = 'mcp'
|
||||
}
|
||||
|
||||
// Normalize model names for cardinality reduction (external users only)
|
||||
if (process.env.USER_TYPE !== 'ant' && typeof allData.model === 'string') {
|
||||
const shortName = getCanonicalName(allData.model.replace(/\[1m]$/i, ''))
|
||||
allData.model = shortName in MODEL_COSTS ? shortName : 'other'
|
||||
}
|
||||
|
||||
// Truncate dev version to base + date (remove timestamp and sha for cardinality reduction)
|
||||
// e.g. "2.0.53-dev.20251124.t173302.sha526cc6a" -> "2.0.53-dev.20251124"
|
||||
if (typeof allData.version === 'string') {
|
||||
allData.version = allData.version.replace(
|
||||
/^(\d+\.\d+\.\d+-dev\.\d{8})\.t\d+\.sha[a-f0-9]+$/,
|
||||
'$1',
|
||||
)
|
||||
}
|
||||
|
||||
// Transform status to http_status and http_status_range to avoid Datadog reserved field
|
||||
if (allData.status !== undefined && allData.status !== null) {
|
||||
const statusCode = String(allData.status)
|
||||
allData.http_status = statusCode
|
||||
|
||||
// Determine status range (1xx, 2xx, 3xx, 4xx, 5xx)
|
||||
const firstDigit = statusCode.charAt(0)
|
||||
if (firstDigit >= '1' && firstDigit <= '5') {
|
||||
allData.http_status_range = `${firstDigit}xx`
|
||||
}
|
||||
|
||||
// Remove original status field to avoid conflict with Datadog's reserved field
|
||||
delete allData.status
|
||||
}
|
||||
|
||||
// Build ddtags with high-cardinality fields for filtering.
|
||||
// event:<name> is prepended so the event name is searchable via the
|
||||
// log search API — the `message` field (where eventName also lives)
|
||||
// is a DD reserved field and is NOT queryable from dashboard widget
|
||||
// queries or the aggregation API. See scripts/release/MONITORING.md.
|
||||
const allDataRecord = allData
|
||||
const tags = [
|
||||
`event:${eventName}`,
|
||||
...TAG_FIELDS.filter(
|
||||
field =>
|
||||
allDataRecord[field] !== undefined && allDataRecord[field] !== null,
|
||||
).map(field => `${camelToSnakeCase(field)}:${allDataRecord[field]}`),
|
||||
]
|
||||
|
||||
const log: DatadogLog = {
|
||||
ddsource: 'nodejs',
|
||||
ddtags: tags.join(','),
|
||||
message: eventName,
|
||||
service: 'claude-code',
|
||||
hostname: 'claude-code',
|
||||
env: process.env.USER_TYPE,
|
||||
}
|
||||
|
||||
// Add all fields as searchable attributes (not duplicated in tags)
|
||||
for (const [key, value] of Object.entries(allData)) {
|
||||
if (value !== undefined && value !== null) {
|
||||
log[camelToSnakeCase(key)] = value
|
||||
}
|
||||
}
|
||||
|
||||
logBatch.push(log)
|
||||
|
||||
// Flush immediately if batch is full, otherwise schedule
|
||||
if (logBatch.length >= MAX_BATCH_SIZE) {
|
||||
if (flushTimer) {
|
||||
clearTimeout(flushTimer)
|
||||
flushTimer = null
|
||||
}
|
||||
void flushLogs()
|
||||
} else {
|
||||
scheduleFlush()
|
||||
}
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
const NUM_USER_BUCKETS = 30
|
||||
|
||||
/**
|
||||
* Gets a 'bucket' that the user ID falls into.
|
||||
*
|
||||
* For alerting purposes, we want to alert on the number of users impacted
|
||||
* by an issue, rather than the number of events- often a small number of users
|
||||
* can generate a large number of events (e.g. due to retries). To approximate
|
||||
* this without ruining cardinality by counting user IDs directly, we hash the user ID
|
||||
* and assign it to one of a fixed number of buckets.
|
||||
*
|
||||
* This allows us to estimate the number of unique users by counting unique buckets,
|
||||
* while preserving user privacy and reducing cardinality.
|
||||
*/
|
||||
const getUserBucket = memoize((): number => {
|
||||
const userId = getOrCreateUserID()
|
||||
const hash = createHash('sha256').update(userId).digest('hex')
|
||||
return parseInt(hash.slice(0, 8), 16) % NUM_USER_BUCKETS
|
||||
})
|
||||
|
||||
function getFlushIntervalMs(): number {
|
||||
// Allow tests to override to not block on the default flush interval.
|
||||
return (
|
||||
parseInt(process.env.CLAUDE_CODE_DATADOG_FLUSH_INTERVAL_MS || '', 10) ||
|
||||
DEFAULT_FLUSH_INTERVAL_MS
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,449 +1,16 @@
|
||||
import type { AnyValueMap, Logger, logs } from '@opentelemetry/api-logs'
|
||||
import { resourceFromAttributes } from '@opentelemetry/resources'
|
||||
import {
|
||||
BatchLogRecordProcessor,
|
||||
LoggerProvider,
|
||||
} from '@opentelemetry/sdk-logs'
|
||||
import {
|
||||
ATTR_SERVICE_NAME,
|
||||
ATTR_SERVICE_VERSION,
|
||||
} from '@opentelemetry/semantic-conventions'
|
||||
import { randomUUID } from 'crypto'
|
||||
import { isEqual } from 'lodash-es'
|
||||
import { getOrCreateUserID } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { getPlatform, getWslVersion } from '../../utils/platform.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
import { profileCheckpoint } from '../../utils/startupProfiler.js'
|
||||
import { getCoreUserData } from '../../utils/user.js'
|
||||
import { isAnalyticsDisabled } from './config.js'
|
||||
import { FirstPartyEventLoggingExporter } from './firstPartyEventLoggingExporter.js'
|
||||
import type { GrowthBookUserAttributes } from './growthbook.js'
|
||||
import { getDynamicConfig_CACHED_MAY_BE_STALE } from './growthbook.js'
|
||||
import { getEventMetadata } from './metadata.js'
|
||||
import { isSinkKilled } from './sinkKillswitch.js'
|
||||
|
||||
/**
|
||||
* Configuration for sampling individual event types.
|
||||
* Each event name maps to an object containing sample_rate (0-1).
|
||||
* Events not in the config are logged at 100% rate.
|
||||
*/
|
||||
export type EventSamplingConfig = {
|
||||
[eventName: string]: {
|
||||
sample_rate: number
|
||||
}
|
||||
}
|
||||
|
||||
const EVENT_SAMPLING_CONFIG_NAME = 'tengu_event_sampling_config'
|
||||
/**
|
||||
* Get the event sampling configuration from GrowthBook.
|
||||
* Uses cached value if available, updates cache in background.
|
||||
*/
|
||||
export function getEventSamplingConfig(): EventSamplingConfig {
|
||||
return getDynamicConfig_CACHED_MAY_BE_STALE<EventSamplingConfig>(
|
||||
EVENT_SAMPLING_CONFIG_NAME,
|
||||
{},
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if an event should be sampled based on its sample rate.
|
||||
* Returns the sample rate if sampled, null if not sampled.
|
||||
* Anthropic 1P event logging egress is disabled in this build.
|
||||
*
|
||||
* @param eventName - Name of the event to check
|
||||
* @returns The sample_rate if event should be logged, null if it should be dropped
|
||||
* Only the shutdown and feedback call sites still need a local stub.
|
||||
*/
|
||||
export function shouldSampleEvent(eventName: string): number | null {
|
||||
const config = getEventSamplingConfig()
|
||||
const eventConfig = config[eventName]
|
||||
|
||||
// If no config for this event, log at 100% rate (no sampling)
|
||||
if (!eventConfig) {
|
||||
return null
|
||||
}
|
||||
|
||||
const sampleRate = eventConfig.sample_rate
|
||||
|
||||
// Validate sample rate is in valid range
|
||||
if (typeof sampleRate !== 'number' || sampleRate < 0 || sampleRate > 1) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Sample rate of 1 means log everything (no need to add metadata)
|
||||
if (sampleRate >= 1) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Sample rate of 0 means drop everything
|
||||
if (sampleRate <= 0) {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Randomly decide whether to sample this event
|
||||
return Math.random() < sampleRate ? sampleRate : 0
|
||||
}
|
||||
|
||||
const BATCH_CONFIG_NAME = 'tengu_1p_event_batch_config'
|
||||
type BatchConfig = {
|
||||
scheduledDelayMillis?: number
|
||||
maxExportBatchSize?: number
|
||||
maxQueueSize?: number
|
||||
skipAuth?: boolean
|
||||
maxAttempts?: number
|
||||
path?: string
|
||||
baseUrl?: string
|
||||
}
|
||||
function getBatchConfig(): BatchConfig {
|
||||
return getDynamicConfig_CACHED_MAY_BE_STALE<BatchConfig>(
|
||||
BATCH_CONFIG_NAME,
|
||||
{},
|
||||
)
|
||||
}
|
||||
|
||||
// Module-local state for event logging (not exposed globally)
|
||||
let firstPartyEventLogger: ReturnType<typeof logs.getLogger> | null = null
|
||||
let firstPartyEventLoggerProvider: LoggerProvider | null = null
|
||||
// Last batch config used to construct the provider — used by
|
||||
// reinitialize1PEventLoggingIfConfigChanged to decide whether a rebuild is
|
||||
// needed when GrowthBook refreshes.
|
||||
let lastBatchConfig: BatchConfig | null = null
|
||||
/**
|
||||
* Flush and shutdown the 1P event logger.
|
||||
* This should be called as the final step before process exit to ensure
|
||||
* all events (including late ones from API responses) are exported.
|
||||
*/
|
||||
export async function shutdown1PEventLogging(): Promise<void> {
|
||||
if (!firstPartyEventLoggerProvider) {
|
||||
return
|
||||
}
|
||||
try {
|
||||
await firstPartyEventLoggerProvider.shutdown()
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging: final shutdown complete')
|
||||
}
|
||||
} catch {
|
||||
// Ignore shutdown errors
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if 1P event logging is enabled.
|
||||
* Respects the same opt-outs as other analytics sinks:
|
||||
* - Test environment
|
||||
* - Third-party cloud providers (Bedrock/Vertex)
|
||||
* - Global telemetry opt-outs
|
||||
* - Non-essential traffic disabled
|
||||
*
|
||||
* Note: Unlike BigQuery metrics, event logging does NOT check organization-level
|
||||
* metrics opt-out via API. It follows the same pattern as Statsig event logging.
|
||||
*/
|
||||
export function is1PEventLoggingEnabled(): boolean {
|
||||
// Respect standard analytics opt-outs
|
||||
return !isAnalyticsDisabled()
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a 1st-party event for internal analytics (async version).
|
||||
* Events are batched and exported to /api/event_logging/batch
|
||||
*
|
||||
* This enriches the event with core metadata (model, session, env context, etc.)
|
||||
* at log time, similar to logEventToStatsig.
|
||||
*
|
||||
* @param eventName - Name of the event (e.g., 'tengu_api_query')
|
||||
* @param metadata - Additional metadata for the event (intentionally no strings, to avoid accidentally logging code/filepaths)
|
||||
*/
|
||||
async function logEventTo1PAsync(
|
||||
firstPartyEventLogger: Logger,
|
||||
eventName: string,
|
||||
metadata: Record<string, number | boolean | undefined> = {},
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Enrich with core metadata at log time (similar to Statsig pattern)
|
||||
const coreMetadata = await getEventMetadata({
|
||||
model: metadata.model,
|
||||
betas: metadata.betas,
|
||||
})
|
||||
|
||||
// Build attributes - OTel supports nested objects natively via AnyValueMap
|
||||
// Cast through unknown since our nested objects are structurally compatible
|
||||
// with AnyValue but TS doesn't recognize it due to missing index signatures
|
||||
const attributes = {
|
||||
event_name: eventName,
|
||||
event_id: randomUUID(),
|
||||
// Pass objects directly - no JSON serialization needed
|
||||
core_metadata: coreMetadata,
|
||||
user_metadata: getCoreUserData(true),
|
||||
event_metadata: metadata,
|
||||
} as unknown as AnyValueMap
|
||||
|
||||
// Add user_id if available
|
||||
const userId = getOrCreateUserID()
|
||||
if (userId) {
|
||||
attributes.user_id = userId
|
||||
}
|
||||
|
||||
// Debug logging when debug mode is enabled
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`[ANT-ONLY] 1P event: ${eventName} ${jsonStringify(metadata, null, 0)}`,
|
||||
)
|
||||
}
|
||||
|
||||
// Emit log record
|
||||
firstPartyEventLogger.emit({
|
||||
body: eventName,
|
||||
attributes,
|
||||
})
|
||||
} catch (e) {
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
throw e
|
||||
}
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logError(e as Error)
|
||||
}
|
||||
// swallow
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a 1st-party event for internal analytics.
|
||||
* Events are batched and exported to /api/event_logging/batch
|
||||
*
|
||||
* @param eventName - Name of the event (e.g., 'tengu_api_query')
|
||||
* @param metadata - Additional metadata for the event (intentionally no strings, to avoid accidentally logging code/filepaths)
|
||||
*/
|
||||
export function logEventTo1P(
|
||||
eventName: string,
|
||||
metadata: Record<string, number | boolean | undefined> = {},
|
||||
_eventName: string,
|
||||
_metadata: Record<string, number | boolean | undefined> = {},
|
||||
): void {
|
||||
if (!is1PEventLoggingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!firstPartyEventLogger || isSinkKilled('firstParty')) {
|
||||
return
|
||||
}
|
||||
|
||||
// Fire and forget - don't block on metadata enrichment
|
||||
void logEventTo1PAsync(firstPartyEventLogger, eventName, metadata)
|
||||
}
|
||||
|
||||
/**
|
||||
* GrowthBook experiment event data for logging
|
||||
*/
|
||||
export type GrowthBookExperimentData = {
|
||||
experimentId: string
|
||||
variationId: number
|
||||
userAttributes?: GrowthBookUserAttributes
|
||||
experimentMetadata?: Record<string, unknown>
|
||||
}
|
||||
|
||||
// api.anthropic.com only serves the "production" GrowthBook environment
|
||||
// (see starling/starling/cli/cli.py DEFAULT_ENVIRONMENTS). Staging and
|
||||
// development environments are not exported to the prod API.
|
||||
function getEnvironmentForGrowthBook(): string {
|
||||
return 'production'
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a GrowthBook experiment assignment event to 1P.
|
||||
* Events are batched and exported to /api/event_logging/batch
|
||||
*
|
||||
* @param data - GrowthBook experiment assignment data
|
||||
*/
|
||||
export function logGrowthBookExperimentTo1P(
|
||||
data: GrowthBookExperimentData,
|
||||
): void {
|
||||
if (!is1PEventLoggingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!firstPartyEventLogger || isSinkKilled('firstParty')) {
|
||||
return
|
||||
}
|
||||
|
||||
const userId = getOrCreateUserID()
|
||||
const { accountUuid, organizationUuid } = getCoreUserData(true)
|
||||
|
||||
// Build attributes for GrowthbookExperimentEvent
|
||||
const attributes = {
|
||||
event_type: 'GrowthbookExperimentEvent',
|
||||
event_id: randomUUID(),
|
||||
experiment_id: data.experimentId,
|
||||
variation_id: data.variationId,
|
||||
...(userId && { device_id: userId }),
|
||||
...(accountUuid && { account_uuid: accountUuid }),
|
||||
...(organizationUuid && { organization_uuid: organizationUuid }),
|
||||
...(data.userAttributes && {
|
||||
session_id: data.userAttributes.sessionId,
|
||||
user_attributes: jsonStringify(data.userAttributes),
|
||||
}),
|
||||
...(data.experimentMetadata && {
|
||||
experiment_metadata: jsonStringify(data.experimentMetadata),
|
||||
}),
|
||||
environment: getEnvironmentForGrowthBook(),
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`[ANT-ONLY] 1P GrowthBook experiment: ${data.experimentId} variation=${data.variationId}`,
|
||||
)
|
||||
}
|
||||
|
||||
firstPartyEventLogger.emit({
|
||||
body: 'growthbook_experiment',
|
||||
attributes,
|
||||
})
|
||||
}
|
||||
|
||||
const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 10000
|
||||
const DEFAULT_MAX_EXPORT_BATCH_SIZE = 200
|
||||
const DEFAULT_MAX_QUEUE_SIZE = 8192
|
||||
|
||||
/**
|
||||
* Initialize 1P event logging infrastructure.
|
||||
* This creates a separate LoggerProvider for internal event logging,
|
||||
* independent of customer OTLP telemetry.
|
||||
*
|
||||
* This uses its own minimal resource configuration with just the attributes
|
||||
* we need for internal analytics (service name, version, platform info).
|
||||
*/
|
||||
export function initialize1PEventLogging(): void {
|
||||
profileCheckpoint('1p_event_logging_start')
|
||||
const enabled = is1PEventLoggingEnabled()
|
||||
|
||||
if (!enabled) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging not enabled')
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch batch processor configuration from GrowthBook dynamic config
|
||||
// Uses cached value if available, refreshes in background
|
||||
const batchConfig = getBatchConfig()
|
||||
lastBatchConfig = batchConfig
|
||||
profileCheckpoint('1p_event_after_growthbook_config')
|
||||
|
||||
const scheduledDelayMillis =
|
||||
batchConfig.scheduledDelayMillis ||
|
||||
parseInt(
|
||||
process.env.OTEL_LOGS_EXPORT_INTERVAL ||
|
||||
DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
|
||||
)
|
||||
|
||||
const maxExportBatchSize =
|
||||
batchConfig.maxExportBatchSize || DEFAULT_MAX_EXPORT_BATCH_SIZE
|
||||
|
||||
const maxQueueSize = batchConfig.maxQueueSize || DEFAULT_MAX_QUEUE_SIZE
|
||||
|
||||
// Build our own resource for 1P event logging with minimal attributes
|
||||
const platform = getPlatform()
|
||||
const attributes: Record<string, string> = {
|
||||
[ATTR_SERVICE_NAME]: 'claude-code',
|
||||
[ATTR_SERVICE_VERSION]: MACRO.VERSION,
|
||||
}
|
||||
|
||||
// Add WSL-specific attributes if running on WSL
|
||||
if (platform === 'wsl') {
|
||||
const wslVersion = getWslVersion()
|
||||
if (wslVersion) {
|
||||
attributes['wsl.version'] = wslVersion
|
||||
}
|
||||
}
|
||||
|
||||
const resource = resourceFromAttributes(attributes)
|
||||
|
||||
// Create a new LoggerProvider with the EventLoggingExporter
|
||||
// NOTE: This is kept separate from customer telemetry logs to ensure
|
||||
// internal events don't leak to customer endpoints and vice versa.
|
||||
// We don't register this globally - it's only used for internal event logging.
|
||||
const eventLoggingExporter = new FirstPartyEventLoggingExporter({
|
||||
maxBatchSize: maxExportBatchSize,
|
||||
skipAuth: batchConfig.skipAuth,
|
||||
maxAttempts: batchConfig.maxAttempts,
|
||||
path: batchConfig.path,
|
||||
baseUrl: batchConfig.baseUrl,
|
||||
isKilled: () => isSinkKilled('firstParty'),
|
||||
})
|
||||
firstPartyEventLoggerProvider = new LoggerProvider({
|
||||
resource,
|
||||
processors: [
|
||||
new BatchLogRecordProcessor(eventLoggingExporter, {
|
||||
scheduledDelayMillis,
|
||||
maxExportBatchSize,
|
||||
maxQueueSize,
|
||||
}),
|
||||
],
|
||||
})
|
||||
|
||||
// Initialize event logger from our internal provider (NOT from global API)
|
||||
// IMPORTANT: We must get the logger from our local provider, not logs.getLogger()
|
||||
// because logs.getLogger() returns a logger from the global provider, which is
|
||||
// separate and used for customer telemetry.
|
||||
firstPartyEventLogger = firstPartyEventLoggerProvider.getLogger(
|
||||
'com.anthropic.claude_code.events',
|
||||
MACRO.VERSION,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild the 1P event logging pipeline if the batch config changed.
|
||||
* Register this with onGrowthBookRefresh so long-running sessions pick up
|
||||
* changes to batch size, delay, endpoint, etc.
|
||||
*
|
||||
* Event-loss safety:
|
||||
* 1. Null the logger first — concurrent logEventTo1P() calls hit the
|
||||
* !firstPartyEventLogger guard and bail during the swap window. This drops
|
||||
* a handful of events but prevents emitting to a draining provider.
|
||||
* 2. forceFlush() drains the old BatchLogRecordProcessor buffer to the
|
||||
* exporter. Export failures go to disk at getCurrentBatchFilePath() which
|
||||
* is keyed by module-level BATCH_UUID + sessionId — unchanged across
|
||||
* reinit — so the NEW exporter's disk-backed retry picks them up.
|
||||
* 3. Swap to new provider/logger; old provider shutdown runs in background
|
||||
* (buffer already drained, just cleanup).
|
||||
*/
|
||||
export async function reinitialize1PEventLoggingIfConfigChanged(): Promise<void> {
|
||||
if (!is1PEventLoggingEnabled() || !firstPartyEventLoggerProvider) {
|
||||
return
|
||||
}
|
||||
|
||||
const newConfig = getBatchConfig()
|
||||
|
||||
if (isEqual(newConfig, lastBatchConfig)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: ${BATCH_CONFIG_NAME} changed, reinitializing`,
|
||||
)
|
||||
}
|
||||
|
||||
const oldProvider = firstPartyEventLoggerProvider
|
||||
const oldLogger = firstPartyEventLogger
|
||||
firstPartyEventLogger = null
|
||||
|
||||
try {
|
||||
await oldProvider.forceFlush()
|
||||
} catch {
|
||||
// Export failures are already on disk; new exporter will retry them.
|
||||
}
|
||||
|
||||
firstPartyEventLoggerProvider = null
|
||||
try {
|
||||
initialize1PEventLogging()
|
||||
} catch (e) {
|
||||
// Restore so the next GrowthBook refresh can retry. oldProvider was
|
||||
// only forceFlush()'d, not shut down — it's still functional. Without
|
||||
// this, both stay null and the !firstPartyEventLoggerProvider gate at
|
||||
// the top makes recovery impossible.
|
||||
firstPartyEventLoggerProvider = oldProvider
|
||||
firstPartyEventLogger = oldLogger
|
||||
logError(e)
|
||||
return
|
||||
}
|
||||
|
||||
void oldProvider.shutdown().catch(() => {})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,806 +0,0 @@
|
||||
import type { HrTime } from '@opentelemetry/api'
|
||||
import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
|
||||
import type {
|
||||
LogRecordExporter,
|
||||
ReadableLogRecord,
|
||||
} from '@opentelemetry/sdk-logs'
|
||||
import axios from 'axios'
|
||||
import { randomUUID } from 'crypto'
|
||||
import { appendFile, mkdir, readdir, unlink, writeFile } from 'fs/promises'
|
||||
import * as path from 'path'
|
||||
import type { CoreUserData } from 'src/utils/user.js'
|
||||
import {
|
||||
getIsNonInteractiveSession,
|
||||
getSessionId,
|
||||
} from '../../bootstrap/state.js'
|
||||
import { ClaudeCodeInternalEvent } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
|
||||
import { GrowthbookExperimentEvent } from '../../types/generated/events_mono/growthbook/v1/growthbook_experiment_event.js'
|
||||
import {
|
||||
getClaudeAIOAuthTokens,
|
||||
hasProfileScope,
|
||||
isClaudeAISubscriber,
|
||||
} from '../../utils/auth.js'
|
||||
import { checkHasTrustDialogAccepted } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
|
||||
import { errorMessage, isFsInaccessible, toError } from '../../utils/errors.js'
|
||||
import { getAuthHeaders } from '../../utils/http.js'
|
||||
import { readJSONLFile } from '../../utils/json.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { sleep } from '../../utils/sleep.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
import { isOAuthTokenExpired } from '../oauth/client.js'
|
||||
import { stripProtoFields } from './index.js'
|
||||
import { type EventMetadata, to1PEventFormat } from './metadata.js'
|
||||
|
||||
// Unique ID for this process run - used to isolate failed event files between runs
|
||||
const BATCH_UUID = randomUUID()
|
||||
|
||||
// File prefix for failed event storage
|
||||
const FILE_PREFIX = '1p_failed_events.'
|
||||
|
||||
// Storage directory for failed events - evaluated at runtime to respect CLAUDE_CONFIG_DIR in tests
|
||||
function getStorageDir(): string {
|
||||
return path.join(getClaudeConfigHomeDir(), 'telemetry')
|
||||
}
|
||||
|
||||
// API envelope - event_data is the JSON output from proto toJSON()
|
||||
type FirstPartyEventLoggingEvent = {
|
||||
event_type: 'ClaudeCodeInternalEvent' | 'GrowthbookExperimentEvent'
|
||||
event_data: unknown
|
||||
}
|
||||
|
||||
type FirstPartyEventLoggingPayload = {
|
||||
events: FirstPartyEventLoggingEvent[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Exporter for 1st-party event logging to /api/event_logging/batch.
|
||||
*
|
||||
* Export cycles are controlled by OpenTelemetry's BatchLogRecordProcessor, which
|
||||
* triggers export() when either:
|
||||
* - Time interval elapses (default: 5 seconds via scheduledDelayMillis)
|
||||
* - Batch size is reached (default: 200 events via maxExportBatchSize)
|
||||
*
|
||||
* This exporter adds resilience on top:
|
||||
* - Append-only log for failed events (concurrency-safe)
|
||||
* - Quadratic backoff retry for failed events, dropped after maxAttempts
|
||||
* - Immediate retry of queued events when any export succeeds (endpoint is healthy)
|
||||
* - Chunking large event sets into smaller batches
|
||||
* - Auth fallback: retries without auth on 401 errors
|
||||
*/
|
||||
export class FirstPartyEventLoggingExporter implements LogRecordExporter {
|
||||
private readonly endpoint: string
|
||||
private readonly timeout: number
|
||||
private readonly maxBatchSize: number
|
||||
private readonly skipAuth: boolean
|
||||
private readonly batchDelayMs: number
|
||||
private readonly baseBackoffDelayMs: number
|
||||
private readonly maxBackoffDelayMs: number
|
||||
private readonly maxAttempts: number
|
||||
private readonly isKilled: () => boolean
|
||||
private pendingExports: Promise<void>[] = []
|
||||
private isShutdown = false
|
||||
private readonly schedule: (
|
||||
fn: () => Promise<void>,
|
||||
delayMs: number,
|
||||
) => () => void
|
||||
private cancelBackoff: (() => void) | null = null
|
||||
private attempts = 0
|
||||
private isRetrying = false
|
||||
private lastExportErrorContext: string | undefined
|
||||
|
||||
constructor(
|
||||
options: {
|
||||
timeout?: number
|
||||
maxBatchSize?: number
|
||||
skipAuth?: boolean
|
||||
batchDelayMs?: number
|
||||
baseBackoffDelayMs?: number
|
||||
maxBackoffDelayMs?: number
|
||||
maxAttempts?: number
|
||||
path?: string
|
||||
baseUrl?: string
|
||||
// Injected killswitch probe. Checked per-POST so that disabling the
|
||||
// firstParty sink also stops backoff retries (not just new emits).
|
||||
// Passed in rather than imported to avoid a cycle with firstPartyEventLogger.ts.
|
||||
isKilled?: () => boolean
|
||||
schedule?: (fn: () => Promise<void>, delayMs: number) => () => void
|
||||
} = {},
|
||||
) {
|
||||
// Default: prod, except when ANTHROPIC_BASE_URL is explicitly staging.
|
||||
// Overridable via tengu_1p_event_batch_config.baseUrl.
|
||||
const baseUrl =
|
||||
options.baseUrl ||
|
||||
(process.env.ANTHROPIC_BASE_URL === 'https://api-staging.anthropic.com'
|
||||
? 'https://api-staging.anthropic.com'
|
||||
: 'https://api.anthropic.com')
|
||||
|
||||
this.endpoint = `${baseUrl}${options.path || '/api/event_logging/batch'}`
|
||||
|
||||
this.timeout = options.timeout || 10000
|
||||
this.maxBatchSize = options.maxBatchSize || 200
|
||||
this.skipAuth = options.skipAuth ?? false
|
||||
this.batchDelayMs = options.batchDelayMs || 100
|
||||
this.baseBackoffDelayMs = options.baseBackoffDelayMs || 500
|
||||
this.maxBackoffDelayMs = options.maxBackoffDelayMs || 30000
|
||||
this.maxAttempts = options.maxAttempts ?? 8
|
||||
this.isKilled = options.isKilled ?? (() => false)
|
||||
this.schedule =
|
||||
options.schedule ??
|
||||
((fn, ms) => {
|
||||
const t = setTimeout(fn, ms)
|
||||
return () => clearTimeout(t)
|
||||
})
|
||||
|
||||
// Retry any failed events from previous runs of this session (in background)
|
||||
void this.retryPreviousBatches()
|
||||
}
|
||||
|
||||
// Expose for testing
|
||||
async getQueuedEventCount(): Promise<number> {
|
||||
return (await this.loadEventsFromCurrentBatch()).length
|
||||
}
|
||||
|
||||
// --- Storage helpers ---
|
||||
|
||||
private getCurrentBatchFilePath(): string {
|
||||
return path.join(
|
||||
getStorageDir(),
|
||||
`${FILE_PREFIX}${getSessionId()}.${BATCH_UUID}.json`,
|
||||
)
|
||||
}
|
||||
|
||||
private async loadEventsFromFile(
|
||||
filePath: string,
|
||||
): Promise<FirstPartyEventLoggingEvent[]> {
|
||||
try {
|
||||
return await readJSONLFile<FirstPartyEventLoggingEvent>(filePath)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
private async loadEventsFromCurrentBatch(): Promise<
|
||||
FirstPartyEventLoggingEvent[]
|
||||
> {
|
||||
return this.loadEventsFromFile(this.getCurrentBatchFilePath())
|
||||
}
|
||||
|
||||
private async saveEventsToFile(
|
||||
filePath: string,
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<void> {
|
||||
try {
|
||||
if (events.length === 0) {
|
||||
try {
|
||||
await unlink(filePath)
|
||||
} catch {
|
||||
// File doesn't exist, nothing to delete
|
||||
}
|
||||
} else {
|
||||
// Ensure storage directory exists
|
||||
await mkdir(getStorageDir(), { recursive: true })
|
||||
// Write as JSON lines (one event per line)
|
||||
const content = events.map(e => jsonStringify(e)).join('\n') + '\n'
|
||||
await writeFile(filePath, content, 'utf8')
|
||||
}
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
private async appendEventsToFile(
|
||||
filePath: string,
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<void> {
|
||||
if (events.length === 0) return
|
||||
try {
|
||||
// Ensure storage directory exists
|
||||
await mkdir(getStorageDir(), { recursive: true })
|
||||
// Append as JSON lines (one event per line) - atomic on most filesystems
|
||||
const content = events.map(e => jsonStringify(e)).join('\n') + '\n'
|
||||
await appendFile(filePath, content, 'utf8')
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
private async deleteFile(filePath: string): Promise<void> {
|
||||
try {
|
||||
await unlink(filePath)
|
||||
} catch {
|
||||
// File doesn't exist or can't be deleted, ignore
|
||||
}
|
||||
}
|
||||
|
||||
// --- Previous batch retry (startup) ---
|
||||
|
||||
private async retryPreviousBatches(): Promise<void> {
|
||||
try {
|
||||
const prefix = `${FILE_PREFIX}${getSessionId()}.`
|
||||
let files: string[]
|
||||
try {
|
||||
files = (await readdir(getStorageDir()))
|
||||
.filter((f: string) => f.startsWith(prefix) && f.endsWith('.json'))
|
||||
.filter((f: string) => !f.includes(BATCH_UUID)) // Exclude current batch
|
||||
} catch (e) {
|
||||
if (isFsInaccessible(e)) return
|
||||
throw e
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
const filePath = path.join(getStorageDir(), file)
|
||||
void this.retryFileInBackground(filePath)
|
||||
}
|
||||
} catch (error) {
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
|
||||
private async retryFileInBackground(filePath: string): Promise<void> {
|
||||
if (this.attempts >= this.maxAttempts) {
|
||||
await this.deleteFile(filePath)
|
||||
return
|
||||
}
|
||||
|
||||
const events = await this.loadEventsFromFile(filePath)
|
||||
if (events.length === 0) {
|
||||
await this.deleteFile(filePath)
|
||||
return
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: retrying ${events.length} events from previous batch`,
|
||||
)
|
||||
}
|
||||
|
||||
const failedEvents = await this.sendEventsInBatches(events)
|
||||
if (failedEvents.length === 0) {
|
||||
await this.deleteFile(filePath)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging: previous batch retry succeeded')
|
||||
}
|
||||
} else {
|
||||
// Save only the failed events back (not all original events)
|
||||
await this.saveEventsToFile(filePath, failedEvents)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: previous batch retry failed, ${failedEvents.length} events remain`,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async export(
|
||||
logs: ReadableLogRecord[],
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
if (this.isShutdown) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
'1P event logging export failed: Exporter has been shutdown',
|
||||
)
|
||||
}
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error('Exporter has been shutdown'),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const exportPromise = this.doExport(logs, resultCallback)
|
||||
this.pendingExports.push(exportPromise)
|
||||
|
||||
// Clean up completed exports
|
||||
void exportPromise.finally(() => {
|
||||
const index = this.pendingExports.indexOf(exportPromise)
|
||||
if (index > -1) {
|
||||
void this.pendingExports.splice(index, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private async doExport(
|
||||
logs: ReadableLogRecord[],
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Filter for event logs only (by scope name)
|
||||
const eventLogs = logs.filter(
|
||||
log =>
|
||||
log.instrumentationScope?.name === 'com.anthropic.claude_code.events',
|
||||
)
|
||||
|
||||
if (eventLogs.length === 0) {
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
// Transform new logs (failed events are retried independently via backoff)
|
||||
const events = this.transformLogsToEvents(eventLogs).events
|
||||
|
||||
if (events.length === 0) {
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
if (this.attempts >= this.maxAttempts) {
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error(
|
||||
`Dropped ${events.length} events: max attempts (${this.maxAttempts}) reached`,
|
||||
),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Send events
|
||||
const failedEvents = await this.sendEventsInBatches(events)
|
||||
this.attempts++
|
||||
|
||||
if (failedEvents.length > 0) {
|
||||
await this.queueFailedEvents(failedEvents)
|
||||
this.scheduleBackoffRetry()
|
||||
const context = this.lastExportErrorContext
|
||||
? ` (${this.lastExportErrorContext})`
|
||||
: ''
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error(
|
||||
`Failed to export ${failedEvents.length} events${context}`,
|
||||
),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Success - reset backoff and immediately retry any queued events
|
||||
this.resetBackoff()
|
||||
if ((await this.getQueuedEventCount()) > 0 && !this.isRetrying) {
|
||||
void this.retryFailedEvents()
|
||||
}
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
} catch (error) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging export failed: ${errorMessage(error)}`,
|
||||
)
|
||||
}
|
||||
logError(error)
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: toError(error),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
private async sendEventsInBatches(
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<FirstPartyEventLoggingEvent[]> {
|
||||
// Chunk events into batches
|
||||
const batches: FirstPartyEventLoggingEvent[][] = []
|
||||
for (let i = 0; i < events.length; i += this.maxBatchSize) {
|
||||
batches.push(events.slice(i, i + this.maxBatchSize))
|
||||
}
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: exporting ${events.length} events in ${batches.length} batch(es)`,
|
||||
)
|
||||
}
|
||||
|
||||
// Send each batch with delay between them. On first failure, assume the
|
||||
// endpoint is down and short-circuit: queue the failed batch plus all
|
||||
// remaining unsent batches without POSTing them. The backoff retry will
|
||||
// probe again with a single batch next tick.
|
||||
const failedBatchEvents: FirstPartyEventLoggingEvent[] = []
|
||||
let lastErrorContext: string | undefined
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
const batch = batches[i]!
|
||||
try {
|
||||
await this.sendBatchWithRetry({ events: batch })
|
||||
} catch (error) {
|
||||
lastErrorContext = getAxiosErrorContext(error)
|
||||
for (let j = i; j < batches.length; j++) {
|
||||
failedBatchEvents.push(...batches[j]!)
|
||||
}
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
const skipped = batches.length - 1 - i
|
||||
logForDebugging(
|
||||
`1P event logging: batch ${i + 1}/${batches.length} failed (${lastErrorContext}); short-circuiting ${skipped} remaining batch(es)`,
|
||||
)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if (i < batches.length - 1 && this.batchDelayMs > 0) {
|
||||
await sleep(this.batchDelayMs)
|
||||
}
|
||||
}
|
||||
|
||||
if (failedBatchEvents.length > 0 && lastErrorContext) {
|
||||
this.lastExportErrorContext = lastErrorContext
|
||||
}
|
||||
|
||||
return failedBatchEvents
|
||||
}
|
||||
|
||||
private async queueFailedEvents(
|
||||
events: FirstPartyEventLoggingEvent[],
|
||||
): Promise<void> {
|
||||
const filePath = this.getCurrentBatchFilePath()
|
||||
|
||||
// Append-only: just add new events to file (atomic on most filesystems)
|
||||
await this.appendEventsToFile(filePath, events)
|
||||
|
||||
const context = this.lastExportErrorContext
|
||||
? ` (${this.lastExportErrorContext})`
|
||||
: ''
|
||||
const message = `1P event logging: ${events.length} events failed to export${context}`
|
||||
logError(new Error(message))
|
||||
}
|
||||
|
||||
private scheduleBackoffRetry(): void {
|
||||
// Don't schedule if already retrying or shutdown
|
||||
if (this.cancelBackoff || this.isRetrying || this.isShutdown) {
|
||||
return
|
||||
}
|
||||
|
||||
// Quadratic backoff (matching Statsig SDK): base * attempts²
|
||||
const delay = Math.min(
|
||||
this.baseBackoffDelayMs * this.attempts * this.attempts,
|
||||
this.maxBackoffDelayMs,
|
||||
)
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: scheduling backoff retry in ${delay}ms (attempt ${this.attempts})`,
|
||||
)
|
||||
}
|
||||
|
||||
this.cancelBackoff = this.schedule(async () => {
|
||||
this.cancelBackoff = null
|
||||
await this.retryFailedEvents()
|
||||
}, delay)
|
||||
}
|
||||
|
||||
private async retryFailedEvents(): Promise<void> {
|
||||
const filePath = this.getCurrentBatchFilePath()
|
||||
|
||||
// Keep retrying while there are events and endpoint is healthy
|
||||
while (!this.isShutdown) {
|
||||
const events = await this.loadEventsFromFile(filePath)
|
||||
if (events.length === 0) break
|
||||
|
||||
if (this.attempts >= this.maxAttempts) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: max attempts (${this.maxAttempts}) reached, dropping ${events.length} events`,
|
||||
)
|
||||
}
|
||||
await this.deleteFile(filePath)
|
||||
this.resetBackoff()
|
||||
return
|
||||
}
|
||||
|
||||
this.isRetrying = true
|
||||
|
||||
// Clear file before retry (we have events in memory now)
|
||||
await this.deleteFile(filePath)
|
||||
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: retrying ${events.length} failed events (attempt ${this.attempts + 1})`,
|
||||
)
|
||||
}
|
||||
|
||||
const failedEvents = await this.sendEventsInBatches(events)
|
||||
this.attempts++
|
||||
|
||||
this.isRetrying = false
|
||||
|
||||
if (failedEvents.length > 0) {
|
||||
// Write failures back to disk
|
||||
await this.saveEventsToFile(filePath, failedEvents)
|
||||
this.scheduleBackoffRetry()
|
||||
return // Failed - wait for backoff
|
||||
}
|
||||
|
||||
// Success - reset backoff and continue loop to drain any newly queued events
|
||||
this.resetBackoff()
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging: backoff retry succeeded')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private resetBackoff(): void {
|
||||
this.attempts = 0
|
||||
if (this.cancelBackoff) {
|
||||
this.cancelBackoff()
|
||||
this.cancelBackoff = null
|
||||
}
|
||||
}
|
||||
|
||||
private async sendBatchWithRetry(
|
||||
payload: FirstPartyEventLoggingPayload,
|
||||
): Promise<void> {
|
||||
if (this.isKilled()) {
|
||||
// Throw so the caller short-circuits remaining batches and queues
|
||||
// everything to disk. Zero network traffic while killed; the backoff
|
||||
// timer keeps ticking and will resume POSTs as soon as the GrowthBook
|
||||
// cache picks up the cleared flag.
|
||||
throw new Error('firstParty sink killswitch active')
|
||||
}
|
||||
|
||||
const baseHeaders: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
'x-service-name': 'claude-code',
|
||||
}
|
||||
|
||||
// Skip auth if trust hasn't been established yet
|
||||
// This prevents executing apiKeyHelper commands before the trust dialog
|
||||
// Non-interactive sessions implicitly have workspace trust
|
||||
const hasTrust =
|
||||
checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
|
||||
if (process.env.USER_TYPE === 'ant' && !hasTrust) {
|
||||
logForDebugging('1P event logging: Trust not accepted')
|
||||
}
|
||||
|
||||
// Skip auth when the OAuth token is expired or lacks user:profile
|
||||
// scope (service key sessions). Falls through to unauthenticated send.
|
||||
let shouldSkipAuth = this.skipAuth || !hasTrust
|
||||
if (!shouldSkipAuth && isClaudeAISubscriber()) {
|
||||
const tokens = getClaudeAIOAuthTokens()
|
||||
if (!hasProfileScope()) {
|
||||
shouldSkipAuth = true
|
||||
} else if (tokens && isOAuthTokenExpired(tokens.expiresAt)) {
|
||||
shouldSkipAuth = true
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
'1P event logging: OAuth token expired, skipping auth to avoid 401',
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try with auth headers first (unless trust not established or token is known to be expired)
|
||||
const authResult = shouldSkipAuth
|
||||
? { headers: {}, error: 'trust not established or Oauth token expired' }
|
||||
: getAuthHeaders()
|
||||
const useAuth = !authResult.error
|
||||
|
||||
if (!useAuth && process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: auth not available, sending without auth`,
|
||||
)
|
||||
}
|
||||
|
||||
const headers = useAuth
|
||||
? { ...baseHeaders, ...authResult.headers }
|
||||
: baseHeaders
|
||||
|
||||
try {
|
||||
const response = await axios.post(this.endpoint, payload, {
|
||||
timeout: this.timeout,
|
||||
headers,
|
||||
})
|
||||
this.logSuccess(payload.events.length, useAuth, response.data)
|
||||
return
|
||||
} catch (error) {
|
||||
// Handle 401 by retrying without auth
|
||||
if (
|
||||
useAuth &&
|
||||
axios.isAxiosError(error) &&
|
||||
error.response?.status === 401
|
||||
) {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
'1P event logging: 401 auth error, retrying without auth',
|
||||
)
|
||||
}
|
||||
const response = await axios.post(this.endpoint, payload, {
|
||||
timeout: this.timeout,
|
||||
headers: baseHeaders,
|
||||
})
|
||||
this.logSuccess(payload.events.length, false, response.data)
|
||||
return
|
||||
}
|
||||
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
private logSuccess(
|
||||
eventCount: number,
|
||||
withAuth: boolean,
|
||||
responseData: unknown,
|
||||
): void {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: ${eventCount} events exported successfully${withAuth ? ' (with auth)' : ' (without auth)'}`,
|
||||
)
|
||||
logForDebugging(`API Response: ${jsonStringify(responseData, null, 2)}`)
|
||||
}
|
||||
}
|
||||
|
||||
private hrTimeToDate(hrTime: HrTime): Date {
|
||||
const [seconds, nanoseconds] = hrTime
|
||||
return new Date(seconds * 1000 + nanoseconds / 1000000)
|
||||
}
|
||||
|
||||
private transformLogsToEvents(
|
||||
logs: ReadableLogRecord[],
|
||||
): FirstPartyEventLoggingPayload {
|
||||
const events: FirstPartyEventLoggingEvent[] = []
|
||||
|
||||
for (const log of logs) {
|
||||
const attributes = log.attributes || {}
|
||||
|
||||
// Check if this is a GrowthBook experiment event
|
||||
if (attributes.event_type === 'GrowthbookExperimentEvent') {
|
||||
const timestamp = this.hrTimeToDate(log.hrTime)
|
||||
const account_uuid = attributes.account_uuid as string | undefined
|
||||
const organization_uuid = attributes.organization_uuid as
|
||||
| string
|
||||
| undefined
|
||||
events.push({
|
||||
event_type: 'GrowthbookExperimentEvent',
|
||||
event_data: GrowthbookExperimentEvent.toJSON({
|
||||
event_id: attributes.event_id as string,
|
||||
timestamp,
|
||||
experiment_id: attributes.experiment_id as string,
|
||||
variation_id: attributes.variation_id as number,
|
||||
environment: attributes.environment as string,
|
||||
user_attributes: attributes.user_attributes as string,
|
||||
experiment_metadata: attributes.experiment_metadata as string,
|
||||
device_id: attributes.device_id as string,
|
||||
session_id: attributes.session_id as string,
|
||||
auth:
|
||||
account_uuid || organization_uuid
|
||||
? { account_uuid, organization_uuid }
|
||||
: undefined,
|
||||
}),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract event name
|
||||
const eventName =
|
||||
(attributes.event_name as string) || (log.body as string) || 'unknown'
|
||||
|
||||
// Extract metadata objects directly (no JSON parsing needed)
|
||||
const coreMetadata = attributes.core_metadata as EventMetadata | undefined
|
||||
const userMetadata = attributes.user_metadata as CoreUserData
|
||||
const eventMetadata = (attributes.event_metadata || {}) as Record<
|
||||
string,
|
||||
unknown
|
||||
>
|
||||
|
||||
if (!coreMetadata) {
|
||||
// Emit partial event if core metadata is missing
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging(
|
||||
`1P event logging: core_metadata missing for event ${eventName}`,
|
||||
)
|
||||
}
|
||||
events.push({
|
||||
event_type: 'ClaudeCodeInternalEvent',
|
||||
event_data: ClaudeCodeInternalEvent.toJSON({
|
||||
event_id: attributes.event_id as string | undefined,
|
||||
event_name: eventName,
|
||||
client_timestamp: this.hrTimeToDate(log.hrTime),
|
||||
session_id: getSessionId(),
|
||||
additional_metadata: Buffer.from(
|
||||
jsonStringify({
|
||||
transform_error: 'core_metadata attribute is missing',
|
||||
}),
|
||||
).toString('base64'),
|
||||
}),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Transform to 1P format
|
||||
const formatted = to1PEventFormat(
|
||||
coreMetadata,
|
||||
userMetadata,
|
||||
eventMetadata,
|
||||
)
|
||||
|
||||
// _PROTO_* keys are PII-tagged values meant only for privileged BQ
|
||||
// columns. Hoist known keys to proto fields, then defensively strip any
|
||||
// remaining _PROTO_* so an unrecognized future key can't silently land
|
||||
// in the general-access additional_metadata blob. sink.ts applies the
|
||||
// same strip before Datadog; this closes the 1P side.
|
||||
const {
|
||||
_PROTO_skill_name,
|
||||
_PROTO_plugin_name,
|
||||
_PROTO_marketplace_name,
|
||||
...rest
|
||||
} = formatted.additional
|
||||
const additionalMetadata = stripProtoFields(rest)
|
||||
|
||||
events.push({
|
||||
event_type: 'ClaudeCodeInternalEvent',
|
||||
event_data: ClaudeCodeInternalEvent.toJSON({
|
||||
event_id: attributes.event_id as string | undefined,
|
||||
event_name: eventName,
|
||||
client_timestamp: this.hrTimeToDate(log.hrTime),
|
||||
device_id: attributes.user_id as string | undefined,
|
||||
email: userMetadata?.email,
|
||||
auth: formatted.auth,
|
||||
...formatted.core,
|
||||
env: formatted.env,
|
||||
process: formatted.process,
|
||||
skill_name:
|
||||
typeof _PROTO_skill_name === 'string'
|
||||
? _PROTO_skill_name
|
||||
: undefined,
|
||||
plugin_name:
|
||||
typeof _PROTO_plugin_name === 'string'
|
||||
? _PROTO_plugin_name
|
||||
: undefined,
|
||||
marketplace_name:
|
||||
typeof _PROTO_marketplace_name === 'string'
|
||||
? _PROTO_marketplace_name
|
||||
: undefined,
|
||||
additional_metadata:
|
||||
Object.keys(additionalMetadata).length > 0
|
||||
? Buffer.from(jsonStringify(additionalMetadata)).toString(
|
||||
'base64',
|
||||
)
|
||||
: undefined,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
return { events }
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
this.isShutdown = true
|
||||
this.resetBackoff()
|
||||
await this.forceFlush()
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging exporter shutdown complete')
|
||||
}
|
||||
}
|
||||
|
||||
async forceFlush(): Promise<void> {
|
||||
await Promise.all(this.pendingExports)
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
logForDebugging('1P event logging exporter flush complete')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getAxiosErrorContext(error: unknown): string {
|
||||
if (!axios.isAxiosError(error)) {
|
||||
return errorMessage(error)
|
||||
}
|
||||
|
||||
const parts: string[] = []
|
||||
|
||||
const requestId = error.response?.headers?.['request-id']
|
||||
if (requestId) {
|
||||
parts.push(`request-id=${requestId}`)
|
||||
}
|
||||
|
||||
if (error.response?.status) {
|
||||
parts.push(`status=${error.response.status}`)
|
||||
}
|
||||
|
||||
if (error.code) {
|
||||
parts.push(`code=${error.code}`)
|
||||
}
|
||||
|
||||
if (error.message) {
|
||||
parts.push(error.message)
|
||||
}
|
||||
|
||||
return parts.join(', ')
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,9 @@
|
||||
/**
|
||||
* Analytics service - public API for event logging
|
||||
*
|
||||
* This module serves as the main entry point for analytics events in Claude CLI.
|
||||
*
|
||||
* DESIGN: This module has NO dependencies to avoid import cycles.
|
||||
* Events are queued until attachAnalyticsSink() is called during app initialization.
|
||||
* The sink handles routing to Datadog and 1P event logging.
|
||||
* The open build intentionally ships without product telemetry. We keep this
|
||||
* module as a compatibility boundary so existing call sites can remain
|
||||
* unchanged while all analytics become inert.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -19,53 +17,22 @@
|
||||
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
|
||||
|
||||
/**
|
||||
* Marker type for values routed to PII-tagged proto columns via `_PROTO_*`
|
||||
* payload keys. The destination BQ column has privileged access controls,
|
||||
* so unredacted values are acceptable — unlike general-access backends.
|
||||
*
|
||||
* sink.ts strips `_PROTO_*` keys before Datadog fanout; only the 1P
|
||||
* exporter (firstPartyEventLoggingExporter) sees them and hoists them to the
|
||||
* top-level proto field. A single stripProtoFields call guards all non-1P
|
||||
* sinks — no per-sink filtering to forget.
|
||||
* Marker type for values that previously flowed to privileged `_PROTO_*`
|
||||
* columns. The export remains so existing call sites keep their explicit
|
||||
* privacy annotations even though external analytics export is disabled.
|
||||
*
|
||||
* Usage: `rawName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED`
|
||||
*/
|
||||
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED = never
|
||||
|
||||
/**
|
||||
* Strip `_PROTO_*` keys from a payload destined for general-access storage.
|
||||
* Used by:
|
||||
* - sink.ts: before Datadog fanout (never sees PII-tagged values)
|
||||
* - firstPartyEventLoggingExporter: defensive strip of additional_metadata
|
||||
* after hoisting known _PROTO_* keys to proto fields — prevents a future
|
||||
* unrecognized _PROTO_foo from silently landing in the BQ JSON blob.
|
||||
*
|
||||
* Returns the input unchanged (same reference) when no _PROTO_ keys present.
|
||||
*/
|
||||
export function stripProtoFields<V>(
|
||||
metadata: Record<string, V>,
|
||||
): Record<string, V> {
|
||||
let result: Record<string, V> | undefined
|
||||
for (const key in metadata) {
|
||||
if (key.startsWith('_PROTO_')) {
|
||||
if (result === undefined) {
|
||||
result = { ...metadata }
|
||||
}
|
||||
delete result[key]
|
||||
}
|
||||
}
|
||||
return result ?? metadata
|
||||
return metadata
|
||||
}
|
||||
|
||||
// Internal type for logEvent metadata - different from the enriched EventMetadata in metadata.ts
|
||||
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
||||
|
||||
type QueuedEvent = {
|
||||
eventName: string
|
||||
metadata: LogEventMetadata
|
||||
async: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Sink interface for the analytics backend
|
||||
*/
|
||||
@@ -77,97 +44,26 @@ export type AnalyticsSink = {
|
||||
) => Promise<void>
|
||||
}
|
||||
|
||||
// Event queue for events logged before sink is attached
|
||||
const eventQueue: QueuedEvent[] = []
|
||||
|
||||
// Sink - initialized during app startup
|
||||
let sink: AnalyticsSink | null = null
|
||||
|
||||
/**
|
||||
* Attach the analytics sink that will receive all events.
|
||||
* Queued events are drained asynchronously via queueMicrotask to avoid
|
||||
* adding latency to the startup path.
|
||||
*
|
||||
* Idempotent: if a sink is already attached, this is a no-op. This allows
|
||||
* calling from both the preAction hook (for subcommands) and setup() (for
|
||||
* the default command) without coordination.
|
||||
*/
|
||||
export function attachAnalyticsSink(newSink: AnalyticsSink): void {
|
||||
if (sink !== null) {
|
||||
return
|
||||
}
|
||||
sink = newSink
|
||||
|
||||
// Drain the queue asynchronously to avoid blocking startup
|
||||
if (eventQueue.length > 0) {
|
||||
const queuedEvents = [...eventQueue]
|
||||
eventQueue.length = 0
|
||||
|
||||
// Log queue size for ants to help debug analytics initialization timing
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
sink.logEvent('analytics_sink_attached', {
|
||||
queued_event_count: queuedEvents.length,
|
||||
})
|
||||
}
|
||||
|
||||
queueMicrotask(() => {
|
||||
for (const event of queuedEvents) {
|
||||
if (event.async) {
|
||||
void sink!.logEventAsync(event.eventName, event.metadata)
|
||||
} else {
|
||||
sink!.logEvent(event.eventName, event.metadata)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
export function attachAnalyticsSink(_newSink: AnalyticsSink): void {}
|
||||
|
||||
/**
|
||||
* Log an event to analytics backends (synchronous)
|
||||
*
|
||||
* Events may be sampled based on the 'tengu_event_sampling_config' dynamic config.
|
||||
* When sampled, the sample_rate is added to the event metadata.
|
||||
*
|
||||
* If no sink is attached, events are queued and drained when the sink attaches.
|
||||
*/
|
||||
export function logEvent(
|
||||
eventName: string,
|
||||
// intentionally no strings unless AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
// to avoid accidentally logging code/filepaths
|
||||
metadata: LogEventMetadata,
|
||||
): void {
|
||||
if (sink === null) {
|
||||
eventQueue.push({ eventName, metadata, async: false })
|
||||
return
|
||||
}
|
||||
sink.logEvent(eventName, metadata)
|
||||
}
|
||||
_eventName: string,
|
||||
_metadata: LogEventMetadata,
|
||||
): void {}
|
||||
|
||||
/**
|
||||
* Log an event to analytics backends (asynchronous)
|
||||
*
|
||||
* Events may be sampled based on the 'tengu_event_sampling_config' dynamic config.
|
||||
* When sampled, the sample_rate is added to the event metadata.
|
||||
*
|
||||
* If no sink is attached, events are queued and drained when the sink attaches.
|
||||
*/
|
||||
export async function logEventAsync(
|
||||
eventName: string,
|
||||
// intentionally no strings, to avoid accidentally logging code/filepaths
|
||||
metadata: LogEventMetadata,
|
||||
): Promise<void> {
|
||||
if (sink === null) {
|
||||
eventQueue.push({ eventName, metadata, async: true })
|
||||
return
|
||||
}
|
||||
await sink.logEventAsync(eventName, metadata)
|
||||
}
|
||||
_eventName: string,
|
||||
_metadata: LogEventMetadata,
|
||||
): Promise<void> {}
|
||||
|
||||
/**
|
||||
* Reset analytics state for testing purposes only.
|
||||
* @internal
|
||||
*/
|
||||
export function _resetForTesting(): void {
|
||||
sink = null
|
||||
eventQueue.length = 0
|
||||
}
|
||||
export function _resetForTesting(): void {}
|
||||
|
||||
@@ -1,72 +1,13 @@
|
||||
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
|
||||
/**
|
||||
* Shared event metadata enrichment for analytics systems
|
||||
*
|
||||
* This module provides a single source of truth for collecting and formatting
|
||||
* event metadata across all analytics systems (Datadog, 1P).
|
||||
*/
|
||||
|
||||
import { extname } from 'path'
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import { env, getHostPlatformForAnalytics } from '../../utils/env.js'
|
||||
import { envDynamic } from '../../utils/envDynamic.js'
|
||||
import { getModelBetas } from '../../utils/betas.js'
|
||||
import { getMainLoopModel } from '../../utils/model/model.js'
|
||||
import {
|
||||
getSessionId,
|
||||
getIsInteractive,
|
||||
getKairosActive,
|
||||
getClientType,
|
||||
getParentSessionId as getParentSessionIdFromState,
|
||||
} from '../../bootstrap/state.js'
|
||||
import { isEnvTruthy } from '../../utils/envUtils.js'
|
||||
import { isOfficialMcpUrl } from '../mcp/officialRegistry.js'
|
||||
import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js'
|
||||
import { getRepoRemoteHash } from '../../utils/git.js'
|
||||
import {
|
||||
getWslVersion,
|
||||
getLinuxDistroInfo,
|
||||
detectVcs,
|
||||
} from '../../utils/platform.js'
|
||||
import type { CoreUserData } from 'src/utils/user.js'
|
||||
import { getAgentContext } from '../../utils/agentContext.js'
|
||||
import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
|
||||
import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
import {
|
||||
getAgentId,
|
||||
getParentSessionId as getTeammateParentSessionId,
|
||||
getTeamName,
|
||||
isTeammate,
|
||||
} from '../../utils/teammate.js'
|
||||
import { feature } from 'bun:bundle'
|
||||
import { type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './index.js'
|
||||
|
||||
/**
|
||||
* Marker type for verifying analytics metadata doesn't contain sensitive data
|
||||
*
|
||||
* This type forces explicit verification that string values being logged
|
||||
* don't contain code snippets, file paths, or other sensitive information.
|
||||
*
|
||||
* The metadata is expected to be JSON-serializable.
|
||||
*
|
||||
* Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
|
||||
*
|
||||
* The type is `never` which means it can never actually hold a value - this is
|
||||
* intentional as it's only used for type-casting to document developer intent.
|
||||
* Local-only analytics helpers retained for compatibility after telemetry
|
||||
* export removal. These helpers only sanitize or classify values in-process.
|
||||
*/
|
||||
export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
|
||||
|
||||
/**
|
||||
* Sanitizes tool names for analytics logging to avoid PII exposure.
|
||||
*
|
||||
* MCP tool names follow the format `mcp__<server>__<tool>` and can reveal
|
||||
* user-specific server configurations, which is considered PII-medium.
|
||||
* This function redacts MCP tool names while preserving built-in tool names
|
||||
* (Bash, Read, Write, etc.) which are safe to log.
|
||||
*
|
||||
* @param toolName - The tool name to sanitize
|
||||
* @returns The original name for built-in tools, or 'mcp_tool' for MCP tools
|
||||
*/
|
||||
export type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS }
|
||||
|
||||
export function sanitizeToolNameForAnalytics(
|
||||
toolName: string,
|
||||
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
|
||||
@@ -76,103 +17,17 @@ export function sanitizeToolNameForAnalytics(
|
||||
return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if detailed tool name logging is enabled for OTLP events.
|
||||
* When enabled, MCP server/tool names and Skill names are logged.
|
||||
* Disabled by default to protect PII (user-specific server configurations).
|
||||
*
|
||||
* Enable with OTEL_LOG_TOOL_DETAILS=1
|
||||
*/
|
||||
export function isToolDetailsLoggingEnabled(): boolean {
|
||||
return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if detailed tool name logging (MCP server/tool names) is enabled
|
||||
* for analytics events.
|
||||
*
|
||||
* Per go/taxonomy, MCP names are medium PII. We log them for:
|
||||
* - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs
|
||||
* - claude.ai-proxied connectors — always official (from claude.ai's list)
|
||||
* - Servers whose URL matches the official MCP registry — directory
|
||||
* connectors added via `claude mcp add`, not customer-specific config
|
||||
*
|
||||
* Custom/user-configured MCPs stay sanitized (toolName='mcp_tool').
|
||||
*/
|
||||
export function isAnalyticsToolDetailsLoggingEnabled(
|
||||
mcpServerType: string | undefined,
|
||||
mcpServerBaseUrl: string | undefined,
|
||||
): boolean {
|
||||
if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') {
|
||||
return true
|
||||
}
|
||||
if (mcpServerType === 'claudeai-proxy') {
|
||||
return true
|
||||
}
|
||||
if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Built-in first-party MCP servers whose names are fixed reserved strings,
|
||||
* not user-configured — so logging them is not PII. Checked in addition to
|
||||
* isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio
|
||||
* built-in would otherwise fail.
|
||||
*
|
||||
* Feature-gated so the set is empty when the feature is off: the name
|
||||
* reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so
|
||||
* a user-configured 'computer-use' is possible in builds without the feature.
|
||||
*/
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set(
|
||||
feature('CHICAGO_MCP')
|
||||
? [
|
||||
(
|
||||
require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
|
||||
).COMPUTER_USE_MCP_SERVER_NAME,
|
||||
]
|
||||
: [],
|
||||
)
|
||||
/* eslint-enable @typescript-eslint/no-require-imports */
|
||||
|
||||
/**
|
||||
* Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName}
|
||||
* if the gate passes, empty object otherwise. Consolidates the identical IIFE
|
||||
* pattern at each tengu_tool_use_* call site.
|
||||
*/
|
||||
export function mcpToolDetailsForAnalytics(
|
||||
toolName: string,
|
||||
mcpServerType: string | undefined,
|
||||
mcpServerBaseUrl: string | undefined,
|
||||
): {
|
||||
export function mcpToolDetailsForAnalytics(): {
|
||||
mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
} {
|
||||
const details = extractMcpToolDetails(toolName)
|
||||
if (!details) {
|
||||
return {}
|
||||
}
|
||||
if (
|
||||
!BUILTIN_MCP_SERVER_NAMES.has(details.serverName) &&
|
||||
!isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl)
|
||||
) {
|
||||
return {}
|
||||
}
|
||||
return {
|
||||
mcpServerName: details.serverName,
|
||||
mcpToolName: details.mcpToolName,
|
||||
}
|
||||
return {}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract MCP server and tool names from a full MCP tool name.
|
||||
* MCP tool names follow the format: mcp__<server>__<tool>
|
||||
*
|
||||
* @param toolName - The full tool name (e.g., 'mcp__slack__read_channel')
|
||||
* @returns Object with serverName and toolName, or undefined if not an MCP tool
|
||||
*/
|
||||
export function extractMcpToolDetails(toolName: string):
|
||||
| {
|
||||
serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
@@ -183,16 +38,13 @@ export function extractMcpToolDetails(toolName: string):
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Format: mcp__<server>__<tool>
|
||||
const parts = toolName.split('__')
|
||||
if (parts.length < 3) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const serverName = parts[1]
|
||||
// Tool name may contain __ so rejoin remaining parts
|
||||
const mcpToolName = parts.slice(2).join('__')
|
||||
|
||||
if (!serverName || !mcpToolName) {
|
||||
return undefined
|
||||
}
|
||||
@@ -205,13 +57,6 @@ export function extractMcpToolDetails(toolName: string):
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract skill name from Skill tool input.
|
||||
*
|
||||
* @param toolName - The tool name (should be 'Skill')
|
||||
* @param input - The tool input containing the skill name
|
||||
* @returns The skill name if this is a Skill tool call, undefined otherwise
|
||||
*/
|
||||
export function extractSkillName(
|
||||
toolName: string,
|
||||
input: unknown,
|
||||
@@ -233,93 +78,14 @@ export function extractSkillName(
|
||||
return undefined
|
||||
}
|
||||
|
||||
const TOOL_INPUT_STRING_TRUNCATE_AT = 512
|
||||
const TOOL_INPUT_STRING_TRUNCATE_TO = 128
|
||||
const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024
|
||||
const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20
|
||||
const TOOL_INPUT_MAX_DEPTH = 2
|
||||
|
||||
function truncateToolInputValue(value: unknown, depth = 0): unknown {
|
||||
if (typeof value === 'string') {
|
||||
if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) {
|
||||
return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]`
|
||||
}
|
||||
return value
|
||||
}
|
||||
if (
|
||||
typeof value === 'number' ||
|
||||
typeof value === 'boolean' ||
|
||||
value === null ||
|
||||
value === undefined
|
||||
) {
|
||||
return value
|
||||
}
|
||||
if (depth >= TOOL_INPUT_MAX_DEPTH) {
|
||||
return '<nested>'
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
const mapped = value
|
||||
.slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
|
||||
.map(v => truncateToolInputValue(v, depth + 1))
|
||||
if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
|
||||
mapped.push(`…[${value.length} items]`)
|
||||
}
|
||||
return mapped
|
||||
}
|
||||
if (typeof value === 'object') {
|
||||
const entries = Object.entries(value as Record<string, unknown>)
|
||||
// Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by
|
||||
// SedEditPermissionRequest) so they don't leak into telemetry.
|
||||
.filter(([k]) => !k.startsWith('_'))
|
||||
const mapped = entries
|
||||
.slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
|
||||
.map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)])
|
||||
if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
|
||||
mapped.push(['…', `${entries.length} keys`])
|
||||
}
|
||||
return Object.fromEntries(mapped)
|
||||
}
|
||||
return String(value)
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a tool's input arguments for the OTel tool_result event.
|
||||
* Truncates long strings and deep nesting to keep the output bounded while
|
||||
* preserving forensically useful fields like file paths, URLs, and MCP args.
|
||||
* Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled.
|
||||
*/
|
||||
export function extractToolInputForTelemetry(
|
||||
input: unknown,
|
||||
_input: unknown,
|
||||
): string | undefined {
|
||||
if (!isToolDetailsLoggingEnabled()) {
|
||||
return undefined
|
||||
}
|
||||
const truncated = truncateToolInputValue(input)
|
||||
let json = jsonStringify(truncated)
|
||||
if (json.length > TOOL_INPUT_MAX_JSON_CHARS) {
|
||||
json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]'
|
||||
}
|
||||
return json
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Maximum length for file extensions to be logged.
|
||||
* Extensions longer than this are considered potentially sensitive
|
||||
* (e.g., hash-based filenames like "key-hash-abcd-123-456") and
|
||||
* will be replaced with 'other'.
|
||||
*/
|
||||
const MAX_FILE_EXTENSION_LENGTH = 10
|
||||
|
||||
/**
|
||||
* Extracts and sanitizes a file extension for analytics logging.
|
||||
*
|
||||
* Uses Node's path.extname for reliable cross-platform extension extraction.
|
||||
* Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid
|
||||
* logging potentially sensitive data (like hash-based filenames).
|
||||
*
|
||||
* @param filePath - The file path to extract the extension from
|
||||
* @returns The sanitized extension, 'other' for long extensions, or undefined if no extension
|
||||
*/
|
||||
export function getFileExtensionForAnalytics(
|
||||
filePath: string,
|
||||
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
||||
@@ -328,7 +94,7 @@ export function getFileExtensionForAnalytics(
|
||||
return undefined
|
||||
}
|
||||
|
||||
const extension = ext.slice(1) // remove leading dot
|
||||
const extension = ext.slice(1)
|
||||
if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
|
||||
return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
}
|
||||
@@ -336,7 +102,6 @@ export function getFileExtensionForAnalytics(
|
||||
return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
}
|
||||
|
||||
/** Allow list of commands we extract file extensions from. */
|
||||
const FILE_COMMANDS = new Set([
|
||||
'rm',
|
||||
'mv',
|
||||
@@ -357,23 +122,16 @@ const FILE_COMMANDS = new Set([
|
||||
'sed',
|
||||
])
|
||||
|
||||
/** Regex to split bash commands on compound operators (&&, ||, ;, |). */
|
||||
const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
|
||||
|
||||
/** Regex to split on whitespace. */
|
||||
const WHITESPACE_REGEX = /\s+/
|
||||
|
||||
/**
|
||||
* Extracts file extensions from a bash command for analytics.
|
||||
* Best-effort: splits on operators and whitespace, extracts extensions
|
||||
* from non-flag args of allowed commands. No heavy shell parsing needed
|
||||
* because grep patterns and sed scripts rarely resemble file extensions.
|
||||
*/
|
||||
export function getFileExtensionsFromBashCommand(
|
||||
command: string,
|
||||
simulatedSedEditFilePath?: string,
|
||||
): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
|
||||
if (!command.includes('.') && !simulatedSedEditFilePath) return undefined
|
||||
if (!command.includes('.') && !simulatedSedEditFilePath) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
let result: string | undefined
|
||||
const seen = new Set<string>()
|
||||
@@ -398,7 +156,7 @@ export function getFileExtensionsFromBashCommand(
|
||||
|
||||
for (let i = 1; i < tokens.length; i++) {
|
||||
const arg = tokens[i]!
|
||||
if (arg.charCodeAt(0) === 45 /* - */) continue
|
||||
if (arg.charCodeAt(0) === 45) continue
|
||||
const ext = getFileExtensionForAnalytics(arg)
|
||||
if (ext && !seen.has(ext)) {
|
||||
seen.add(ext)
|
||||
@@ -407,567 +165,8 @@ export function getFileExtensionsFromBashCommand(
|
||||
}
|
||||
}
|
||||
|
||||
if (!result) return undefined
|
||||
return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
}
|
||||
|
||||
/**
|
||||
* Environment context metadata
|
||||
*/
|
||||
export type EnvContext = {
|
||||
platform: string
|
||||
platformRaw: string
|
||||
arch: string
|
||||
nodeVersion: string
|
||||
terminal: string | null
|
||||
packageManagers: string
|
||||
runtimes: string
|
||||
isRunningWithBun: boolean
|
||||
isCi: boolean
|
||||
isClaubbit: boolean
|
||||
isClaudeCodeRemote: boolean
|
||||
isLocalAgentMode: boolean
|
||||
isConductor: boolean
|
||||
remoteEnvironmentType?: string
|
||||
coworkerType?: string
|
||||
claudeCodeContainerId?: string
|
||||
claudeCodeRemoteSessionId?: string
|
||||
tags?: string
|
||||
isGithubAction: boolean
|
||||
isClaudeCodeAction: boolean
|
||||
isClaudeAiAuth: boolean
|
||||
version: string
|
||||
versionBase?: string
|
||||
buildTime: string
|
||||
deploymentEnvironment: string
|
||||
githubEventName?: string
|
||||
githubActionsRunnerEnvironment?: string
|
||||
githubActionsRunnerOs?: string
|
||||
githubActionRef?: string
|
||||
wslVersion?: string
|
||||
linuxDistroId?: string
|
||||
linuxDistroVersion?: string
|
||||
linuxKernel?: string
|
||||
vcs?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Process metrics included with all analytics events.
|
||||
*/
|
||||
export type ProcessMetrics = {
|
||||
uptime: number
|
||||
rss: number
|
||||
heapTotal: number
|
||||
heapUsed: number
|
||||
external: number
|
||||
arrayBuffers: number
|
||||
constrainedMemory: number | undefined
|
||||
cpuUsage: NodeJS.CpuUsage
|
||||
cpuPercent: number | undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Core event metadata shared across all analytics systems
|
||||
*/
|
||||
export type EventMetadata = {
|
||||
model: string
|
||||
sessionId: string
|
||||
userType: string
|
||||
betas?: string
|
||||
envContext: EnvContext
|
||||
entrypoint?: string
|
||||
agentSdkVersion?: string
|
||||
isInteractive: string
|
||||
clientType: string
|
||||
processMetrics?: ProcessMetrics
|
||||
sweBenchRunId: string
|
||||
sweBenchInstanceId: string
|
||||
sweBenchTaskId: string
|
||||
// Swarm/team agent identification for analytics attribution
|
||||
agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID
|
||||
parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session)
|
||||
agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents
|
||||
teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage)
|
||||
subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team)
|
||||
rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data
|
||||
kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check)
|
||||
skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation)
|
||||
observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events)
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for enriching event metadata
|
||||
*/
|
||||
export type EnrichMetadataOptions = {
|
||||
// Model to use, falls back to getMainLoopModel() if not provided
|
||||
model?: unknown
|
||||
// Explicit betas string (already joined)
|
||||
betas?: unknown
|
||||
// Additional metadata to include (optional)
|
||||
additionalMetadata?: Record<string, unknown>
|
||||
}
|
||||
|
||||
/**
|
||||
* Get agent identification for analytics.
|
||||
* Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
|
||||
*/
|
||||
function getAgentIdentification(): {
|
||||
agentId?: string
|
||||
parentSessionId?: string
|
||||
agentType?: 'teammate' | 'subagent' | 'standalone'
|
||||
teamName?: string
|
||||
} {
|
||||
// Check AsyncLocalStorage first (for subagents running in same process)
|
||||
const agentContext = getAgentContext()
|
||||
if (agentContext) {
|
||||
const result: ReturnType<typeof getAgentIdentification> = {
|
||||
agentId: agentContext.agentId,
|
||||
parentSessionId: agentContext.parentSessionId,
|
||||
agentType: agentContext.agentType,
|
||||
}
|
||||
if (agentContext.agentType === 'teammate') {
|
||||
result.teamName = agentContext.teamName
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Fall back to swarm helpers (for swarm agents)
|
||||
const agentId = getAgentId()
|
||||
const parentSessionId = getTeammateParentSessionId()
|
||||
const teamName = getTeamName()
|
||||
const isSwarmAgent = isTeammate()
|
||||
// For standalone agents (have agent ID but not a teammate), set agentType to 'standalone'
|
||||
const agentType = isSwarmAgent
|
||||
? ('teammate' as const)
|
||||
: agentId
|
||||
? ('standalone' as const)
|
||||
: undefined
|
||||
if (agentId || agentType || parentSessionId || teamName) {
|
||||
return {
|
||||
...(agentId ? { agentId } : {}),
|
||||
...(agentType ? { agentType } : {}),
|
||||
...(parentSessionId ? { parentSessionId } : {}),
|
||||
...(teamName ? { teamName } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
// Check bootstrap state for parent session ID (e.g., plan mode -> implementation)
|
||||
const stateParentSessionId = getParentSessionIdFromState()
|
||||
if (stateParentSessionId) {
|
||||
return { parentSessionId: stateParentSessionId }
|
||||
}
|
||||
|
||||
return {}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev"
|
||||
*/
|
||||
const getVersionBase = memoize((): string | undefined => {
|
||||
const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/)
|
||||
return match ? match[0] : undefined
|
||||
})
|
||||
|
||||
/**
|
||||
* Builds the environment context object
|
||||
*/
|
||||
const buildEnvContext = memoize(async (): Promise<EnvContext> => {
|
||||
const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([
|
||||
env.getPackageManagers(),
|
||||
env.getRuntimes(),
|
||||
getLinuxDistroInfo(),
|
||||
detectVcs(),
|
||||
])
|
||||
|
||||
return {
|
||||
platform: getHostPlatformForAnalytics(),
|
||||
// Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ.
|
||||
// getHostPlatformForAnalytics() buckets those into 'linux'; here we want
|
||||
// the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote.
|
||||
platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform,
|
||||
arch: env.arch,
|
||||
nodeVersion: env.nodeVersion,
|
||||
terminal: envDynamic.terminal,
|
||||
packageManagers: packageManagers.join(','),
|
||||
runtimes: runtimes.join(','),
|
||||
isRunningWithBun: env.isRunningWithBun(),
|
||||
isCi: isEnvTruthy(process.env.CI),
|
||||
isClaubbit: isEnvTruthy(process.env.CLAUBBIT),
|
||||
isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE),
|
||||
isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent',
|
||||
isConductor: env.isConductor(),
|
||||
...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && {
|
||||
remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE,
|
||||
}),
|
||||
// Gated by feature flag to prevent leaking "coworkerType" string in external builds
|
||||
...(feature('COWORKER_TYPE_TELEMETRY')
|
||||
? process.env.CLAUDE_CODE_COWORKER_TYPE
|
||||
? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE }
|
||||
: {}
|
||||
: {}),
|
||||
...(process.env.CLAUDE_CODE_CONTAINER_ID && {
|
||||
claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID,
|
||||
}),
|
||||
...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && {
|
||||
claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID,
|
||||
}),
|
||||
...(process.env.CLAUDE_CODE_TAGS && {
|
||||
tags: process.env.CLAUDE_CODE_TAGS,
|
||||
}),
|
||||
isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS),
|
||||
isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION),
|
||||
isClaudeAiAuth: isClaudeAISubscriber(),
|
||||
version: MACRO.VERSION,
|
||||
versionBase: getVersionBase(),
|
||||
buildTime: MACRO.BUILD_TIME,
|
||||
deploymentEnvironment: env.detectDeploymentEnvironment(),
|
||||
...(isEnvTruthy(process.env.GITHUB_ACTIONS) && {
|
||||
githubEventName: process.env.GITHUB_EVENT_NAME,
|
||||
githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT,
|
||||
githubActionsRunnerOs: process.env.RUNNER_OS,
|
||||
githubActionRef: process.env.GITHUB_ACTION_PATH?.includes(
|
||||
'claude-code-action/',
|
||||
)
|
||||
? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1]
|
||||
: undefined,
|
||||
}),
|
||||
...(getWslVersion() && { wslVersion: getWslVersion() }),
|
||||
...(linuxDistroInfo ?? {}),
|
||||
...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}),
|
||||
}
|
||||
})
|
||||
|
||||
// --
|
||||
// CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts
|
||||
let prevCpuUsage: NodeJS.CpuUsage | null = null
|
||||
let prevWallTimeMs: number | null = null
|
||||
|
||||
/**
|
||||
* Builds process metrics object for all users.
|
||||
*/
|
||||
function buildProcessMetrics(): ProcessMetrics | undefined {
|
||||
try {
|
||||
const mem = process.memoryUsage()
|
||||
const cpu = process.cpuUsage()
|
||||
const now = Date.now()
|
||||
|
||||
let cpuPercent: number | undefined
|
||||
if (prevCpuUsage && prevWallTimeMs) {
|
||||
const wallDeltaMs = now - prevWallTimeMs
|
||||
if (wallDeltaMs > 0) {
|
||||
const userDeltaUs = cpu.user - prevCpuUsage.user
|
||||
const systemDeltaUs = cpu.system - prevCpuUsage.system
|
||||
cpuPercent =
|
||||
((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100
|
||||
}
|
||||
}
|
||||
prevCpuUsage = cpu
|
||||
prevWallTimeMs = now
|
||||
|
||||
return {
|
||||
uptime: process.uptime(),
|
||||
rss: mem.rss,
|
||||
heapTotal: mem.heapTotal,
|
||||
heapUsed: mem.heapUsed,
|
||||
external: mem.external,
|
||||
arrayBuffers: mem.arrayBuffers,
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
constrainedMemory: process.constrainedMemory(),
|
||||
cpuUsage: cpu,
|
||||
cpuPercent,
|
||||
}
|
||||
} catch {
|
||||
if (!result) {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get core event metadata shared across all analytics systems.
|
||||
*
|
||||
* This function collects environment, runtime, and context information
|
||||
* that should be included with all analytics events.
|
||||
*
|
||||
* @param options - Configuration options
|
||||
* @returns Promise resolving to enriched metadata object
|
||||
*/
|
||||
export async function getEventMetadata(
|
||||
options: EnrichMetadataOptions = {},
|
||||
): Promise<EventMetadata> {
|
||||
const model = options.model ? String(options.model) : getMainLoopModel()
|
||||
const betas =
|
||||
typeof options.betas === 'string'
|
||||
? options.betas
|
||||
: getModelBetas(model).join(',')
|
||||
const [envContext, repoRemoteHash] = await Promise.all([
|
||||
buildEnvContext(),
|
||||
getRepoRemoteHash(),
|
||||
])
|
||||
const processMetrics = buildProcessMetrics()
|
||||
|
||||
const metadata: EventMetadata = {
|
||||
model,
|
||||
sessionId: getSessionId(),
|
||||
userType: process.env.USER_TYPE || '',
|
||||
...(betas.length > 0 ? { betas: betas } : {}),
|
||||
envContext,
|
||||
...(process.env.CLAUDE_CODE_ENTRYPOINT && {
|
||||
entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT,
|
||||
}),
|
||||
...(process.env.CLAUDE_AGENT_SDK_VERSION && {
|
||||
agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION,
|
||||
}),
|
||||
isInteractive: String(getIsInteractive()),
|
||||
clientType: getClientType(),
|
||||
...(processMetrics && { processMetrics }),
|
||||
sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '',
|
||||
sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '',
|
||||
sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '',
|
||||
// Swarm/team agent identification
|
||||
// Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
|
||||
...getAgentIdentification(),
|
||||
// Subscription tier for DAU-by-tier analytics
|
||||
...(getSubscriptionType() && {
|
||||
subscriptionType: getSubscriptionType()!,
|
||||
}),
|
||||
// Assistant mode tag — lives outside memoized buildEnvContext() because
|
||||
// setKairosActive() runs at main.tsx:~1648, after the first event may
|
||||
// have already fired and memoized the env. Read fresh per-event instead.
|
||||
...(feature('KAIROS') && getKairosActive()
|
||||
? { kairosActive: true as const }
|
||||
: {}),
|
||||
// Repo remote hash for joining with server-side repo bundle data
|
||||
...(repoRemoteHash && { rh: repoRemoteHash }),
|
||||
}
|
||||
|
||||
return metadata
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Core event metadata for 1P event logging (snake_case format).
|
||||
*/
|
||||
export type FirstPartyEventLoggingCoreMetadata = {
|
||||
session_id: string
|
||||
model: string
|
||||
user_type: string
|
||||
betas?: string
|
||||
entrypoint?: string
|
||||
agent_sdk_version?: string
|
||||
is_interactive: boolean
|
||||
client_type: string
|
||||
swe_bench_run_id?: string
|
||||
swe_bench_instance_id?: string
|
||||
swe_bench_task_id?: string
|
||||
// Swarm/team agent identification
|
||||
agent_id?: string
|
||||
parent_session_id?: string
|
||||
agent_type?: 'teammate' | 'subagent' | 'standalone'
|
||||
team_name?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete event logging metadata format for 1P events.
|
||||
*/
|
||||
export type FirstPartyEventLoggingMetadata = {
|
||||
env: EnvironmentMetadata
|
||||
process?: string
|
||||
// auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth).
|
||||
// account_id is intentionally omitted — only UUID fields are populated client-side.
|
||||
auth?: PublicApiAuth
|
||||
// core fields correspond to the top level of ClaudeCodeInternalEvent.
|
||||
// They get directly exported to their individual columns in the BigQuery tables
|
||||
core: FirstPartyEventLoggingCoreMetadata
|
||||
// additional fields are populated in the additional_metadata field of the
|
||||
// ClaudeCodeInternalEvent proto. Includes but is not limited to information
|
||||
// that differs by event type.
|
||||
additional: Record<string, unknown>
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert metadata to 1P event logging format (snake_case fields).
|
||||
*
|
||||
* The /api/event_logging/batch endpoint expects snake_case field names
|
||||
* for environment and core metadata.
|
||||
*
|
||||
* @param metadata - Core event metadata
|
||||
* @param additionalMetadata - Additional metadata to include
|
||||
* @returns Metadata formatted for 1P event logging
|
||||
*/
|
||||
export function to1PEventFormat(
|
||||
metadata: EventMetadata,
|
||||
userMetadata: CoreUserData,
|
||||
additionalMetadata: Record<string, unknown> = {},
|
||||
): FirstPartyEventLoggingMetadata {
|
||||
const {
|
||||
envContext,
|
||||
processMetrics,
|
||||
rh,
|
||||
kairosActive,
|
||||
skillMode,
|
||||
observerMode,
|
||||
...coreFields
|
||||
} = metadata
|
||||
|
||||
// Convert envContext to snake_case.
|
||||
// IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that
|
||||
// adding a field here that the proto doesn't define is a compile error. The
|
||||
// generated toJSON() serializer silently drops unknown keys — a hand-written
|
||||
// parallel type previously let #11318, #13924, #19448, and coworker_type all
|
||||
// ship fields that never reached BQ.
|
||||
// Adding a field? Update the monorepo proto first (go/cc-logging):
|
||||
// event_schemas/.../claude_code/v1/claude_code_internal_event.proto
|
||||
// then run `bun run generate:proto` here.
|
||||
const env: EnvironmentMetadata = {
|
||||
platform: envContext.platform,
|
||||
platform_raw: envContext.platformRaw,
|
||||
arch: envContext.arch,
|
||||
node_version: envContext.nodeVersion,
|
||||
terminal: envContext.terminal || 'unknown',
|
||||
package_managers: envContext.packageManagers,
|
||||
runtimes: envContext.runtimes,
|
||||
is_running_with_bun: envContext.isRunningWithBun,
|
||||
is_ci: envContext.isCi,
|
||||
is_claubbit: envContext.isClaubbit,
|
||||
is_claude_code_remote: envContext.isClaudeCodeRemote,
|
||||
is_local_agent_mode: envContext.isLocalAgentMode,
|
||||
is_conductor: envContext.isConductor,
|
||||
is_github_action: envContext.isGithubAction,
|
||||
is_claude_code_action: envContext.isClaudeCodeAction,
|
||||
is_claude_ai_auth: envContext.isClaudeAiAuth,
|
||||
version: envContext.version,
|
||||
build_time: envContext.buildTime,
|
||||
deployment_environment: envContext.deploymentEnvironment,
|
||||
}
|
||||
|
||||
// Add optional env fields
|
||||
if (envContext.remoteEnvironmentType) {
|
||||
env.remote_environment_type = envContext.remoteEnvironmentType
|
||||
}
|
||||
if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) {
|
||||
env.coworker_type = envContext.coworkerType
|
||||
}
|
||||
if (envContext.claudeCodeContainerId) {
|
||||
env.claude_code_container_id = envContext.claudeCodeContainerId
|
||||
}
|
||||
if (envContext.claudeCodeRemoteSessionId) {
|
||||
env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId
|
||||
}
|
||||
if (envContext.tags) {
|
||||
env.tags = envContext.tags
|
||||
.split(',')
|
||||
.map(t => t.trim())
|
||||
.filter(Boolean)
|
||||
}
|
||||
if (envContext.githubEventName) {
|
||||
env.github_event_name = envContext.githubEventName
|
||||
}
|
||||
if (envContext.githubActionsRunnerEnvironment) {
|
||||
env.github_actions_runner_environment =
|
||||
envContext.githubActionsRunnerEnvironment
|
||||
}
|
||||
if (envContext.githubActionsRunnerOs) {
|
||||
env.github_actions_runner_os = envContext.githubActionsRunnerOs
|
||||
}
|
||||
if (envContext.githubActionRef) {
|
||||
env.github_action_ref = envContext.githubActionRef
|
||||
}
|
||||
if (envContext.wslVersion) {
|
||||
env.wsl_version = envContext.wslVersion
|
||||
}
|
||||
if (envContext.linuxDistroId) {
|
||||
env.linux_distro_id = envContext.linuxDistroId
|
||||
}
|
||||
if (envContext.linuxDistroVersion) {
|
||||
env.linux_distro_version = envContext.linuxDistroVersion
|
||||
}
|
||||
if (envContext.linuxKernel) {
|
||||
env.linux_kernel = envContext.linuxKernel
|
||||
}
|
||||
if (envContext.vcs) {
|
||||
env.vcs = envContext.vcs
|
||||
}
|
||||
if (envContext.versionBase) {
|
||||
env.version_base = envContext.versionBase
|
||||
}
|
||||
|
||||
// Convert core fields to snake_case
|
||||
const core: FirstPartyEventLoggingCoreMetadata = {
|
||||
session_id: coreFields.sessionId,
|
||||
model: coreFields.model,
|
||||
user_type: coreFields.userType,
|
||||
is_interactive: coreFields.isInteractive === 'true',
|
||||
client_type: coreFields.clientType,
|
||||
}
|
||||
|
||||
// Add other core fields
|
||||
if (coreFields.betas) {
|
||||
core.betas = coreFields.betas
|
||||
}
|
||||
if (coreFields.entrypoint) {
|
||||
core.entrypoint = coreFields.entrypoint
|
||||
}
|
||||
if (coreFields.agentSdkVersion) {
|
||||
core.agent_sdk_version = coreFields.agentSdkVersion
|
||||
}
|
||||
if (coreFields.sweBenchRunId) {
|
||||
core.swe_bench_run_id = coreFields.sweBenchRunId
|
||||
}
|
||||
if (coreFields.sweBenchInstanceId) {
|
||||
core.swe_bench_instance_id = coreFields.sweBenchInstanceId
|
||||
}
|
||||
if (coreFields.sweBenchTaskId) {
|
||||
core.swe_bench_task_id = coreFields.sweBenchTaskId
|
||||
}
|
||||
// Swarm/team agent identification
|
||||
if (coreFields.agentId) {
|
||||
core.agent_id = coreFields.agentId
|
||||
}
|
||||
if (coreFields.parentSessionId) {
|
||||
core.parent_session_id = coreFields.parentSessionId
|
||||
}
|
||||
if (coreFields.agentType) {
|
||||
core.agent_type = coreFields.agentType
|
||||
}
|
||||
if (coreFields.teamName) {
|
||||
core.team_name = coreFields.teamName
|
||||
}
|
||||
|
||||
// Map userMetadata to output fields.
|
||||
// Based on src/utils/user.ts getUser(), but with fields present in other
|
||||
// parts of ClaudeCodeInternalEvent deduplicated.
|
||||
// Convert camelCase GitHubActionsMetadata to snake_case for 1P API
|
||||
// Note: github_actions_metadata is placed inside env (EnvironmentMetadata)
|
||||
// rather than at the top level of ClaudeCodeInternalEvent
|
||||
if (userMetadata.githubActionsMetadata) {
|
||||
const ghMeta = userMetadata.githubActionsMetadata
|
||||
env.github_actions_metadata = {
|
||||
actor_id: ghMeta.actorId,
|
||||
repository_id: ghMeta.repositoryId,
|
||||
repository_owner_id: ghMeta.repositoryOwnerId,
|
||||
}
|
||||
}
|
||||
|
||||
let auth: PublicApiAuth | undefined
|
||||
if (userMetadata.accountUuid || userMetadata.organizationUuid) {
|
||||
auth = {
|
||||
account_uuid: userMetadata.accountUuid,
|
||||
organization_uuid: userMetadata.organizationUuid,
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
env,
|
||||
...(processMetrics && {
|
||||
process: Buffer.from(jsonStringify(processMetrics)).toString('base64'),
|
||||
}),
|
||||
...(auth && { auth }),
|
||||
core,
|
||||
additional: {
|
||||
...(rh && { rh }),
|
||||
...(kairosActive && { is_assistant_mode: true }),
|
||||
...(skillMode && { skill_mode: skillMode }),
|
||||
...(observerMode && { observer_mode: observerMode }),
|
||||
...additionalMetadata,
|
||||
},
|
||||
}
|
||||
return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
|
||||
}
|
||||
|
||||
@@ -1,114 +1,10 @@
|
||||
/**
|
||||
* Analytics sink implementation
|
||||
*
|
||||
* This module contains the actual analytics routing logic and should be
|
||||
* initialized during app startup. It routes events to Datadog and 1P event
|
||||
* logging.
|
||||
*
|
||||
* Usage: Call initializeAnalyticsSink() during app startup to attach the sink.
|
||||
* Telemetry sinks are disabled in this build. The exported functions remain so
|
||||
* startup code does not need to special-case the open build.
|
||||
*/
|
||||
|
||||
import { trackDatadogEvent } from './datadog.js'
|
||||
import { logEventTo1P, shouldSampleEvent } from './firstPartyEventLogger.js'
|
||||
import { checkStatsigFeatureGate_CACHED_MAY_BE_STALE } from './growthbook.js'
|
||||
import { attachAnalyticsSink, stripProtoFields } from './index.js'
|
||||
import { isSinkKilled } from './sinkKillswitch.js'
|
||||
|
||||
// Local type matching the logEvent metadata signature
|
||||
type LogEventMetadata = { [key: string]: boolean | number | undefined }
|
||||
|
||||
const DATADOG_GATE_NAME = 'tengu_log_datadog_events'
|
||||
|
||||
// Module-level gate state - starts undefined, initialized during startup
|
||||
let isDatadogGateEnabled: boolean | undefined = undefined
|
||||
|
||||
/**
|
||||
* Check if Datadog tracking is enabled.
|
||||
* Falls back to cached value from previous session if not yet initialized.
|
||||
*/
|
||||
function shouldTrackDatadog(): boolean {
|
||||
if (isSinkKilled('datadog')) {
|
||||
return false
|
||||
}
|
||||
if (isDatadogGateEnabled !== undefined) {
|
||||
return isDatadogGateEnabled
|
||||
}
|
||||
|
||||
// Fallback to cached value from previous session
|
||||
try {
|
||||
return checkStatsigFeatureGate_CACHED_MAY_BE_STALE(DATADOG_GATE_NAME)
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an event (synchronous implementation)
|
||||
*/
|
||||
function logEventImpl(eventName: string, metadata: LogEventMetadata): void {
|
||||
// Check if this event should be sampled
|
||||
const sampleResult = shouldSampleEvent(eventName)
|
||||
|
||||
// If sample result is 0, the event was not selected for logging
|
||||
if (sampleResult === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
// If sample result is a positive number, add it to metadata
|
||||
const metadataWithSampleRate =
|
||||
sampleResult !== null
|
||||
? { ...metadata, sample_rate: sampleResult }
|
||||
: metadata
|
||||
|
||||
if (shouldTrackDatadog()) {
|
||||
// Datadog is a general-access backend — strip _PROTO_* keys
|
||||
// (unredacted PII-tagged values meant only for the 1P privileged column).
|
||||
void trackDatadogEvent(eventName, stripProtoFields(metadataWithSampleRate))
|
||||
}
|
||||
|
||||
// 1P receives the full payload including _PROTO_* — the exporter
|
||||
// destructures and routes those keys to proto fields itself.
|
||||
logEventTo1P(eventName, metadataWithSampleRate)
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an event (asynchronous implementation)
|
||||
*
|
||||
* With Segment removed the two remaining sinks are fire-and-forget, so this
|
||||
* just wraps the sync impl — kept to preserve the sink interface contract.
|
||||
*/
|
||||
function logEventAsyncImpl(
|
||||
eventName: string,
|
||||
metadata: LogEventMetadata,
|
||||
): Promise<void> {
|
||||
logEventImpl(eventName, metadata)
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize analytics gates during startup.
|
||||
*
|
||||
* Updates gate values from server. Early events use cached values from previous
|
||||
* session to avoid data loss during initialization.
|
||||
*
|
||||
* Called from main.tsx during setupBackend().
|
||||
*/
|
||||
export function initializeAnalyticsGates(): void {
|
||||
isDatadogGateEnabled =
|
||||
checkStatsigFeatureGate_CACHED_MAY_BE_STALE(DATADOG_GATE_NAME)
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the analytics sink.
|
||||
*
|
||||
* Call this during app startup to attach the analytics backend.
|
||||
* Any events logged before this is called will be queued and drained.
|
||||
*
|
||||
* Idempotent: safe to call multiple times (subsequent calls are no-ops).
|
||||
*/
|
||||
export function initializeAnalyticsSink(): void {
|
||||
attachAnalyticsSink({
|
||||
logEvent: logEventImpl,
|
||||
logEventAsync: logEventAsyncImpl,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
import { getDynamicConfig_CACHED_MAY_BE_STALE } from './growthbook.js'
|
||||
|
||||
// Mangled name: per-sink analytics killswitch
|
||||
const SINK_KILLSWITCH_CONFIG_NAME = 'tengu_frond_boric'
|
||||
|
||||
export type SinkName = 'datadog' | 'firstParty'
|
||||
|
||||
/**
|
||||
* GrowthBook JSON config that disables individual analytics sinks.
|
||||
* Shape: { datadog?: boolean, firstParty?: boolean }
|
||||
* A value of true for a key stops all dispatch to that sink.
|
||||
* Default {} (nothing killed). Fail-open: missing/malformed config = sink stays on.
|
||||
*
|
||||
* NOTE: Must NOT be called from inside is1PEventLoggingEnabled() -
|
||||
* growthbook.ts:isGrowthBookEnabled() calls that, so a lookup here would recurse.
|
||||
* Call at per-event dispatch sites instead.
|
||||
*/
|
||||
export function isSinkKilled(sink: SinkName): boolean {
|
||||
const config = getDynamicConfig_CACHED_MAY_BE_STALE<
|
||||
Partial<Record<SinkName, boolean>>
|
||||
>(SINK_KILLSWITCH_CONFIG_NAME, {})
|
||||
// getFeatureValue_CACHED_MAY_BE_STALE guards on `!== undefined`, so a
|
||||
// cached JSON null leaks through instead of falling back to {}.
|
||||
return config?.[sink] === true
|
||||
}
|
||||
@@ -14,9 +14,10 @@ import * as path from 'path'
|
||||
import { count } from '../../utils/array.js'
|
||||
import { getCwd } from '../../utils/cwd.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import { errorMessage, getErrnoCode } from '../../utils/errors.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { sleep } from '../../utils/sleep.js'
|
||||
import { jsonStringify } from '../../utils/slowOperations.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
logEvent,
|
||||
@@ -45,6 +46,37 @@ function logDebug(message: string): void {
|
||||
logForDebugging(`[files-api] ${message}`)
|
||||
}
|
||||
|
||||
function summarizeFilesApiError(error: unknown): string {
|
||||
const summary: Record<string, boolean | number | string> = {}
|
||||
|
||||
if (error instanceof Error) {
|
||||
summary.errorType = error.constructor.name
|
||||
summary.errorName = error.name
|
||||
summary.hasMessage = error.message.length > 0
|
||||
} else {
|
||||
summary.errorType = typeof error
|
||||
summary.hasValue = error !== undefined && error !== null
|
||||
}
|
||||
|
||||
const errno = getErrnoCode(error)
|
||||
if (errno) {
|
||||
summary.errno = errno
|
||||
}
|
||||
|
||||
if (axios.isAxiosError(error)) {
|
||||
summary.errorType = 'AxiosError'
|
||||
if (error.code) {
|
||||
summary.axiosCode = error.code
|
||||
}
|
||||
if (typeof error.response?.status === 'number') {
|
||||
summary.httpStatus = error.response.status
|
||||
}
|
||||
summary.hasResponseData = error.response?.data !== undefined
|
||||
}
|
||||
|
||||
return jsonStringify(summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* File specification parsed from CLI args
|
||||
* Format: --file=<file_id>:<relative_path>
|
||||
@@ -108,9 +140,7 @@ async function retryWithBackoff<T>(
|
||||
}
|
||||
|
||||
lastError = result.error || `${operation} failed`
|
||||
logDebug(
|
||||
`${operation} attempt ${attempt}/${MAX_RETRIES} failed: ${lastError}`,
|
||||
)
|
||||
logDebug(`${operation} attempt ${attempt}/${MAX_RETRIES} failed`)
|
||||
|
||||
if (attempt < MAX_RETRIES) {
|
||||
const delayMs = BASE_DELAY_MS * Math.pow(2, attempt - 1)
|
||||
@@ -142,7 +172,7 @@ export async function downloadFile(
|
||||
'anthropic-beta': FILES_API_BETA_HEADER,
|
||||
}
|
||||
|
||||
logDebug(`Downloading file ${fileId} from ${url}`)
|
||||
logDebug(`Downloading file ${fileId} from configured Files API endpoint`)
|
||||
|
||||
return retryWithBackoff(`Download file ${fileId}`, async () => {
|
||||
try {
|
||||
@@ -191,9 +221,7 @@ export function buildDownloadPath(
|
||||
): string | null {
|
||||
const normalized = path.normalize(relativePath)
|
||||
if (normalized.startsWith('..')) {
|
||||
logDebugError(
|
||||
`Invalid file path: ${relativePath}. Path must not traverse above workspace`,
|
||||
)
|
||||
logDebugError('Invalid file path rejected: path traversal is not allowed')
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -243,7 +271,7 @@ export async function downloadAndSaveFile(
|
||||
// Write the file
|
||||
await fs.writeFile(fullPath, content)
|
||||
|
||||
logDebug(`Saved file ${fileId} to ${fullPath} (${content.length} bytes)`)
|
||||
logDebug(`Saved file ${fileId} (${content.length} bytes)`)
|
||||
|
||||
return {
|
||||
fileId,
|
||||
@@ -252,10 +280,16 @@ export async function downloadAndSaveFile(
|
||||
bytesWritten: content.length,
|
||||
}
|
||||
} catch (error) {
|
||||
logDebugError(`Failed to download file ${fileId}: ${errorMessage(error)}`)
|
||||
if (error instanceof Error) {
|
||||
logError(error)
|
||||
}
|
||||
logDebugError(
|
||||
`Failed to download file ${fileId}: ${summarizeFilesApiError(error)}`,
|
||||
)
|
||||
logError(
|
||||
new Error(
|
||||
`Files API download failed for ${fileId}: ${summarizeFilesApiError(
|
||||
error,
|
||||
)}`,
|
||||
),
|
||||
)
|
||||
|
||||
return {
|
||||
fileId,
|
||||
@@ -390,7 +424,7 @@ export async function uploadFile(
|
||||
'anthropic-beta': FILES_API_BETA_HEADER,
|
||||
}
|
||||
|
||||
logDebug(`Uploading file ${filePath} as ${relativePath}`)
|
||||
logDebug('Uploading file to configured Files API endpoint')
|
||||
|
||||
// Read file content first (outside retry loop since it's not a network operation)
|
||||
let content: Buffer
|
||||
@@ -455,7 +489,7 @@ export async function uploadFile(
|
||||
const body = Buffer.concat(bodyParts)
|
||||
|
||||
try {
|
||||
return await retryWithBackoff(`Upload file ${relativePath}`, async () => {
|
||||
return await retryWithBackoff('Upload session file', async () => {
|
||||
try {
|
||||
const response = await axios.post(url, body, {
|
||||
headers: {
|
||||
@@ -476,7 +510,7 @@ export async function uploadFile(
|
||||
error: 'Upload succeeded but no file ID returned',
|
||||
}
|
||||
}
|
||||
logDebug(`Uploaded file ${filePath} -> ${fileId} (${fileSize} bytes)`)
|
||||
logDebug(`Uploaded file (${fileSize} bytes)`)
|
||||
return {
|
||||
done: true,
|
||||
value: {
|
||||
@@ -735,9 +769,7 @@ export function parseFileSpecs(fileSpecs: string[]): File[] {
|
||||
const relativePath = spec.substring(colonIndex + 1)
|
||||
|
||||
if (!fileId || !relativePath) {
|
||||
logDebugError(
|
||||
`Invalid file spec: ${spec}. Both file_id and path are required`,
|
||||
)
|
||||
logDebugError('Invalid file spec: missing file_id or relative path')
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
import axios from 'axios'
|
||||
import { hasProfileScope, isClaudeAISubscriber } from '../../utils/auth.js'
|
||||
import { getGlobalConfig, saveGlobalConfig } from '../../utils/config.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import { getAuthHeaders, withOAuth401Retry } from '../../utils/http.js'
|
||||
import { logError } from '../../utils/log.js'
|
||||
import { memoizeWithTTLAsync } from '../../utils/memoize.js'
|
||||
import { isEssentialTrafficOnly } from '../../utils/privacyLevel.js'
|
||||
import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
|
||||
|
||||
type MetricsEnabledResponse = {
|
||||
metrics_logging_enabled: boolean
|
||||
}
|
||||
|
||||
type MetricsStatus = {
|
||||
enabled: boolean
|
||||
hasError: boolean
|
||||
}
|
||||
|
||||
// In-memory TTL — dedupes calls within a single process
|
||||
const CACHE_TTL_MS = 60 * 60 * 1000
|
||||
|
||||
// Disk TTL — org settings rarely change. When disk cache is fresher than this,
|
||||
// we skip the network entirely (no background refresh). This is what collapses
|
||||
// N `claude -p` invocations into ~1 API call/day.
|
||||
const DISK_CACHE_TTL_MS = 24 * 60 * 60 * 1000
|
||||
|
||||
/**
|
||||
* Internal function to call the API and check if metrics are enabled
|
||||
* This is wrapped by memoizeWithTTLAsync to add caching behavior
|
||||
*/
|
||||
async function _fetchMetricsEnabled(): Promise<MetricsEnabledResponse> {
|
||||
const authResult = getAuthHeaders()
|
||||
if (authResult.error) {
|
||||
throw new Error(`Auth error: ${authResult.error}`)
|
||||
}
|
||||
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
...authResult.headers,
|
||||
}
|
||||
|
||||
const endpoint = `https://api.anthropic.com/api/claude_code/organizations/metrics_enabled`
|
||||
const response = await axios.get<MetricsEnabledResponse>(endpoint, {
|
||||
headers,
|
||||
timeout: 5000,
|
||||
})
|
||||
return response.data
|
||||
}
|
||||
|
||||
async function _checkMetricsEnabledAPI(): Promise<MetricsStatus> {
|
||||
// Incident kill switch: skip the network call when nonessential traffic is disabled.
|
||||
// Returning enabled:false sheds load at the consumer (bigqueryExporter skips
|
||||
// export). Matches the non-subscriber early-return shape below.
|
||||
if (isEssentialTrafficOnly()) {
|
||||
return { enabled: false, hasError: false }
|
||||
}
|
||||
|
||||
try {
|
||||
const data = await withOAuth401Retry(_fetchMetricsEnabled, {
|
||||
also403Revoked: true,
|
||||
})
|
||||
|
||||
logForDebugging(
|
||||
`Metrics opt-out API response: enabled=${data.metrics_logging_enabled}`,
|
||||
)
|
||||
|
||||
return {
|
||||
enabled: data.metrics_logging_enabled,
|
||||
hasError: false,
|
||||
}
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`Failed to check metrics opt-out status: ${errorMessage(error)}`,
|
||||
)
|
||||
logError(error)
|
||||
return { enabled: false, hasError: true }
|
||||
}
|
||||
}
|
||||
|
||||
// Create memoized version with custom error handling
|
||||
const memoizedCheckMetrics = memoizeWithTTLAsync(
|
||||
_checkMetricsEnabledAPI,
|
||||
CACHE_TTL_MS,
|
||||
)
|
||||
|
||||
/**
|
||||
* Fetch (in-memory memoized) and persist to disk on change.
|
||||
* Errors are not persisted — a transient failure should not overwrite a
|
||||
* known-good disk value.
|
||||
*/
|
||||
async function refreshMetricsStatus(): Promise<MetricsStatus> {
|
||||
const result = await memoizedCheckMetrics()
|
||||
if (result.hasError) {
|
||||
return result
|
||||
}
|
||||
|
||||
const cached = getGlobalConfig().metricsStatusCache
|
||||
const unchanged = cached !== undefined && cached.enabled === result.enabled
|
||||
// Skip write when unchanged AND timestamp still fresh — avoids config churn
|
||||
// when concurrent callers race past a stale disk entry and all try to write.
|
||||
if (unchanged && Date.now() - cached.timestamp < DISK_CACHE_TTL_MS) {
|
||||
return result
|
||||
}
|
||||
|
||||
saveGlobalConfig(current => ({
|
||||
...current,
|
||||
metricsStatusCache: {
|
||||
enabled: result.enabled,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
}))
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if metrics are enabled for the current organization.
|
||||
*
|
||||
* Two-tier cache:
|
||||
* - Disk (24h TTL): survives process restarts. Fresh disk cache → zero network.
|
||||
* - In-memory (1h TTL): dedupes the background refresh within a process.
|
||||
*
|
||||
* The caller (bigqueryExporter) tolerates stale reads — a missed export or
|
||||
* an extra one during the 24h window is acceptable.
|
||||
*/
|
||||
export async function checkMetricsEnabled(): Promise<MetricsStatus> {
|
||||
// Service key OAuth sessions lack user:profile scope → would 403.
|
||||
// API key users (non-subscribers) fall through and use x-api-key auth.
|
||||
// This check runs before the disk read so we never persist auth-state-derived
|
||||
// answers — only real API responses go to disk. Otherwise a service-key
|
||||
// session would poison the cache for a later full-OAuth session.
|
||||
if (isClaudeAISubscriber() && !hasProfileScope()) {
|
||||
return { enabled: false, hasError: false }
|
||||
}
|
||||
|
||||
const cached = getGlobalConfig().metricsStatusCache
|
||||
if (cached) {
|
||||
if (Date.now() - cached.timestamp > DISK_CACHE_TTL_MS) {
|
||||
// saveGlobalConfig's fallback path (config.ts:731) can throw if both
|
||||
// locked and fallback writes fail — catch here so fire-and-forget
|
||||
// doesn't become an unhandled rejection.
|
||||
void refreshMetricsStatus().catch(logError)
|
||||
}
|
||||
return {
|
||||
enabled: cached.enabled,
|
||||
hasError: false,
|
||||
}
|
||||
}
|
||||
|
||||
// First-ever run on this machine: block on the network to populate disk.
|
||||
return refreshMetricsStatus()
|
||||
}
|
||||
|
||||
// Export for testing purposes only
|
||||
export const _clearMetricsEnabledCacheForTesting = (): void => {
|
||||
memoizedCheckMetrics.cache.clear()
|
||||
}
|
||||
@@ -19,6 +19,25 @@ interface SessionIngressError {
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeSessionIngressPayload(payload: unknown): string {
|
||||
if (payload === null) return 'null'
|
||||
if (payload === undefined) return 'undefined'
|
||||
if (Array.isArray(payload)) return `array(${payload.length})`
|
||||
if (typeof payload === 'object') {
|
||||
const value = payload as Record<string, unknown>
|
||||
return jsonStringify({
|
||||
payloadType: 'object',
|
||||
keys: Object.keys(value)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
loglinesCount: Array.isArray(value.loglines) ? value.loglines.length : 0,
|
||||
dataCount: Array.isArray(value.data) ? value.data.length : 0,
|
||||
hasNextCursor: typeof value.next_cursor === 'string',
|
||||
})
|
||||
}
|
||||
return typeof payload
|
||||
}
|
||||
|
||||
// Module-level state
|
||||
const lastUuidMap: Map<string, UUID> = new Map()
|
||||
|
||||
@@ -249,7 +268,7 @@ export async function getSessionLogsViaOAuth(
|
||||
orgUUID: string,
|
||||
): Promise<Entry[] | null> {
|
||||
const url = `${getOauthConfig().BASE_API_URL}/v1/session_ingress/session/${sessionId}`
|
||||
logForDebugging(`[session-ingress] Fetching session logs from: ${url}`)
|
||||
logForDebugging('[session-ingress] Fetching session logs via OAuth endpoint')
|
||||
const headers = {
|
||||
...getOAuthHeaders(accessToken),
|
||||
'x-organization-uuid': orgUUID,
|
||||
@@ -299,7 +318,7 @@ export async function getTeleportEvents(
|
||||
'x-organization-uuid': orgUUID,
|
||||
}
|
||||
|
||||
logForDebugging(`[teleport] Fetching events from: ${baseUrl}`)
|
||||
logForDebugging('[teleport] Fetching session events via teleport endpoint')
|
||||
|
||||
const all: Entry[] = []
|
||||
let cursor: string | undefined
|
||||
@@ -362,7 +381,9 @@ export async function getTeleportEvents(
|
||||
if (response.status !== 200) {
|
||||
logError(
|
||||
new Error(
|
||||
`Teleport events returned ${response.status}: ${jsonStringify(response.data)}`,
|
||||
`Teleport events returned ${response.status}: ${summarizeSessionIngressPayload(
|
||||
response.data,
|
||||
)}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'teleport_events_bad_status')
|
||||
@@ -373,7 +394,9 @@ export async function getTeleportEvents(
|
||||
if (!Array.isArray(data)) {
|
||||
logError(
|
||||
new Error(
|
||||
`Teleport events invalid response shape: ${jsonStringify(response.data)}`,
|
||||
`Teleport events invalid response shape: ${summarizeSessionIngressPayload(
|
||||
response.data,
|
||||
)}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'teleport_events_invalid_shape')
|
||||
@@ -439,7 +462,9 @@ async function fetchSessionLogsFromUrl(
|
||||
if (!data || typeof data !== 'object' || !Array.isArray(data.loglines)) {
|
||||
logError(
|
||||
new Error(
|
||||
`Invalid session logs response format: ${jsonStringify(data)}`,
|
||||
`Invalid session logs response format: ${summarizeSessionIngressPayload(
|
||||
data,
|
||||
)}`,
|
||||
),
|
||||
)
|
||||
logForDiagnosticsNoPII('error', 'session_get_fail_invalid_response')
|
||||
|
||||
@@ -8,6 +8,34 @@ import type { DiagnosticFile } from '../diagnosticTracking.js'
|
||||
import { registerPendingLSPDiagnostic } from './LSPDiagnosticRegistry.js'
|
||||
import type { LSPServerManager } from './LSPServerManager.js'
|
||||
|
||||
function summarizeLspErrorForDebug(error: unknown): string {
|
||||
const err = toError(error)
|
||||
return jsonStringify({
|
||||
errorType: err.constructor.name,
|
||||
errorName: err.name,
|
||||
hasMessage: err.message.length > 0,
|
||||
})
|
||||
}
|
||||
|
||||
function summarizeDiagnosticParamsForDebug(params: unknown): string {
|
||||
if (!params || typeof params !== 'object') {
|
||||
return jsonStringify({
|
||||
paramsType: typeof params,
|
||||
hasValue: params !== undefined && params !== null,
|
||||
})
|
||||
}
|
||||
|
||||
const paramRecord = params as Record<string, unknown>
|
||||
const diagnostics = paramRecord.diagnostics
|
||||
return jsonStringify({
|
||||
keys: Object.keys(paramRecord)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
hasUri: typeof paramRecord.uri === 'string',
|
||||
diagnosticsCount: Array.isArray(diagnostics) ? diagnostics.length : 0,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Map LSP severity to Claude diagnostic severity
|
||||
*
|
||||
@@ -54,7 +82,9 @@ export function formatDiagnosticsForAttachment(
|
||||
const err = toError(error)
|
||||
logError(err)
|
||||
logForDebugging(
|
||||
`Failed to convert URI to file path: ${params.uri}. Error: ${err.message}. Using original URI as fallback.`,
|
||||
`Failed to convert diagnostic URI to file path; using original URI fallback (${summarizeLspErrorForDebug(
|
||||
err,
|
||||
)})`,
|
||||
)
|
||||
// Gracefully fallback to original URI - LSP servers may send malformed URIs
|
||||
uri = params.uri
|
||||
@@ -177,14 +207,16 @@ export function registerLSPNotificationHandlers(
|
||||
)
|
||||
logError(err)
|
||||
logForDebugging(
|
||||
`Invalid diagnostic params from ${serverName}: ${jsonStringify(params)}`,
|
||||
`Invalid diagnostic params from ${serverName}: ${summarizeDiagnosticParamsForDebug(
|
||||
params,
|
||||
)}`,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const diagnosticParams = params as PublishDiagnosticsParams
|
||||
logForDebugging(
|
||||
`Received diagnostics from ${serverName}: ${diagnosticParams.diagnostics.length} diagnostic(s) for ${diagnosticParams.uri}`,
|
||||
`Received diagnostics from ${serverName}: ${diagnosticParams.diagnostics.length} diagnostic(s)`,
|
||||
)
|
||||
|
||||
// Convert LSP diagnostics to Claude format (can throw on invalid URIs)
|
||||
@@ -199,7 +231,7 @@ export function registerLSPNotificationHandlers(
|
||||
firstFile.diagnostics.length === 0
|
||||
) {
|
||||
logForDebugging(
|
||||
`Skipping empty diagnostics from ${serverName} for ${diagnosticParams.uri}`,
|
||||
`Skipping empty diagnostics from ${serverName}`,
|
||||
)
|
||||
return
|
||||
}
|
||||
@@ -223,9 +255,8 @@ export function registerLSPNotificationHandlers(
|
||||
logError(err)
|
||||
logForDebugging(
|
||||
`Error registering LSP diagnostics from ${serverName}: ` +
|
||||
`URI: ${diagnosticParams.uri}, ` +
|
||||
`Diagnostic count: ${firstFile.diagnostics.length}, ` +
|
||||
`Error: ${err.message}`,
|
||||
`Error: ${summarizeLspErrorForDebug(err)}`,
|
||||
)
|
||||
|
||||
// Track consecutive failures and warn after 3+
|
||||
@@ -234,7 +265,7 @@ export function registerLSPNotificationHandlers(
|
||||
lastError: '',
|
||||
}
|
||||
failures.count++
|
||||
failures.lastError = err.message
|
||||
failures.lastError = summarizeLspErrorForDebug(err)
|
||||
diagnosticFailures.set(serverName, failures)
|
||||
|
||||
if (failures.count >= 3) {
|
||||
@@ -251,7 +282,9 @@ export function registerLSPNotificationHandlers(
|
||||
const err = toError(error)
|
||||
logError(err)
|
||||
logForDebugging(
|
||||
`Unexpected error processing diagnostics from ${serverName}: ${err.message}`,
|
||||
`Unexpected error processing diagnostics from ${serverName}: ${summarizeLspErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
|
||||
// Track consecutive failures and warn after 3+
|
||||
@@ -260,7 +293,7 @@ export function registerLSPNotificationHandlers(
|
||||
lastError: '',
|
||||
}
|
||||
failures.count++
|
||||
failures.lastError = err.message
|
||||
failures.lastError = summarizeLspErrorForDebug(err)
|
||||
diagnosticFailures.set(serverName, failures)
|
||||
|
||||
if (failures.count >= 3) {
|
||||
@@ -284,13 +317,13 @@ export function registerLSPNotificationHandlers(
|
||||
|
||||
registrationErrors.push({
|
||||
serverName,
|
||||
error: err.message,
|
||||
error: summarizeLspErrorForDebug(err),
|
||||
})
|
||||
|
||||
logError(err)
|
||||
logForDebugging(
|
||||
`Failed to register diagnostics handler for ${serverName}: ` +
|
||||
`Error: ${err.message}`,
|
||||
`Error: ${summarizeLspErrorForDebug(err)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,35 +93,77 @@ type MCPOAuthFlowErrorReason =
|
||||
|
||||
const MAX_LOCK_RETRIES = 5
|
||||
|
||||
/**
|
||||
* OAuth query parameters that should be redacted from logs.
|
||||
* These contain sensitive values that could enable CSRF or session fixation attacks.
|
||||
*/
|
||||
const SENSITIVE_OAUTH_PARAMS = [
|
||||
'state',
|
||||
'nonce',
|
||||
'code_challenge',
|
||||
'code_verifier',
|
||||
'code',
|
||||
]
|
||||
|
||||
/**
|
||||
* Redacts sensitive OAuth query parameters from a URL for safe logging.
|
||||
* Prevents exposure of state, nonce, code_challenge, code_verifier, and authorization codes.
|
||||
*/
|
||||
function redactSensitiveUrlParams(url: string): string {
|
||||
try {
|
||||
const parsedUrl = new URL(url)
|
||||
for (const param of SENSITIVE_OAUTH_PARAMS) {
|
||||
if (parsedUrl.searchParams.has(param)) {
|
||||
parsedUrl.searchParams.set(param, '[REDACTED]')
|
||||
}
|
||||
function summarizeHeadersForDebug(
|
||||
headers: Record<string, string> | undefined,
|
||||
): {
|
||||
headerCount: number
|
||||
headerNames: string[]
|
||||
hasAuthorization: boolean
|
||||
} {
|
||||
if (!headers) {
|
||||
return {
|
||||
headerCount: 0,
|
||||
headerNames: [],
|
||||
hasAuthorization: false,
|
||||
}
|
||||
return parsedUrl.toString()
|
||||
} catch {
|
||||
// Return as-is if not a valid URL
|
||||
return url
|
||||
}
|
||||
|
||||
const headerNames = Object.keys(headers).sort()
|
||||
return {
|
||||
headerCount: headerNames.length,
|
||||
headerNames,
|
||||
hasAuthorization: headerNames.some(
|
||||
headerName => headerName.toLowerCase() === 'authorization',
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
function extractHttpStatusFromErrorMessage(message: string): number | undefined {
|
||||
const statusMatch = message.match(/^HTTP (\d{3}):/)
|
||||
if (!statusMatch) {
|
||||
return undefined
|
||||
}
|
||||
return Number(statusMatch[1])
|
||||
}
|
||||
|
||||
function summarizeOAuthErrorForDebug(error: unknown): string {
|
||||
const summary: Record<string, boolean | number | string> = {}
|
||||
|
||||
if (error instanceof Error) {
|
||||
summary.errorType = error.constructor.name
|
||||
summary.errorName = error.name
|
||||
summary.hasMessage = error.message.length > 0
|
||||
|
||||
const httpStatus = extractHttpStatusFromErrorMessage(error.message)
|
||||
if (httpStatus !== undefined) {
|
||||
summary.httpStatus = httpStatus
|
||||
}
|
||||
|
||||
if (error instanceof OAuthError) {
|
||||
summary.oauthErrorCode = error.errorCode
|
||||
}
|
||||
} else {
|
||||
summary.errorType = typeof error
|
||||
summary.hasValue = error !== undefined && error !== null
|
||||
}
|
||||
|
||||
const errno = getErrnoCode(error)
|
||||
if (errno) {
|
||||
summary.errno = errno
|
||||
}
|
||||
|
||||
if (axios.isAxiosError(error)) {
|
||||
summary.errorType = 'AxiosError'
|
||||
if (error.code) {
|
||||
summary.axiosCode = error.code
|
||||
}
|
||||
if (typeof error.response?.status === 'number') {
|
||||
summary.httpStatus = error.response.status
|
||||
}
|
||||
summary.hasResponseData = error.response?.data !== undefined
|
||||
}
|
||||
|
||||
return jsonStringify(summary)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -295,7 +337,9 @@ async function fetchAuthServerMetadata(
|
||||
// to the legacy path-aware retry.
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`RFC 9728 discovery failed, falling back: ${errorMessage(err)}`,
|
||||
`RFC 9728 discovery failed, falling back: ${summarizeOAuthErrorForDebug(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -517,7 +561,7 @@ export async function revokeServerTokens(
|
||||
: 'client_secret_basic'
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Revoking tokens via ${revocationEndpointStr} (${authMethod})`,
|
||||
`Revoking tokens via discovered OAuth revocation endpoint (${authMethod})`,
|
||||
)
|
||||
|
||||
// Revoke refresh token first (more important - prevents future access token generation)
|
||||
@@ -537,7 +581,9 @@ export async function revokeServerTokens(
|
||||
// Log but continue
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Failed to revoke refresh token: ${errorMessage(error)}`,
|
||||
`Failed to revoke refresh token: ${summarizeOAuthErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -558,7 +604,9 @@ export async function revokeServerTokens(
|
||||
} catch (error: unknown) {
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Failed to revoke access token: ${errorMessage(error)}`,
|
||||
`Failed to revoke access token: ${summarizeOAuthErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -566,7 +614,10 @@ export async function revokeServerTokens(
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
// Log error but don't throw - revocation is best-effort
|
||||
logMCPDebug(serverName, `Failed to revoke tokens: ${errorMessage(error)}`)
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Failed to revoke tokens: ${summarizeOAuthErrorForDebug(error)}`,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
logMCPDebug(serverName, 'No tokens to revoke')
|
||||
@@ -696,14 +747,11 @@ async function performMCPXaaAuth(
|
||||
const haveKeys = Object.keys(
|
||||
getSecureStorage().read()?.mcpOAuthClientConfig ?? {},
|
||||
)
|
||||
const headersForLogging = Object.fromEntries(
|
||||
Object.entries(serverConfig.headers ?? {}).map(([k, v]) =>
|
||||
k.toLowerCase() === 'authorization' ? [k, '[REDACTED]'] : [k, v],
|
||||
),
|
||||
)
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`XAA: secret lookup miss. wanted=${wantedKey} have=[${haveKeys.join(', ')}] configHeaders=${jsonStringify(headersForLogging)}`,
|
||||
`XAA: secret lookup miss. wanted=${wantedKey} availableKeys=${haveKeys.length} configHeaderSummary=${jsonStringify(
|
||||
summarizeHeadersForDebug(serverConfig.headers),
|
||||
)}`,
|
||||
)
|
||||
throw new Error(
|
||||
`XAA: AS client secret not found for '${serverName}'. Re-add with --client-secret.`,
|
||||
@@ -923,10 +971,7 @@ export async function performMCPOAuthFlow(
|
||||
try {
|
||||
resourceMetadataUrl = new URL(cachedResourceMetadataUrl)
|
||||
} catch {
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Invalid cached resourceMetadataUrl: ${cachedResourceMetadataUrl}`,
|
||||
)
|
||||
logMCPDebug(serverName, 'Invalid cached resource metadata URL')
|
||||
}
|
||||
}
|
||||
const wwwAuthParams: WWWAuthenticateParams = {
|
||||
@@ -988,13 +1033,15 @@ export async function performMCPOAuthFlow(
|
||||
provider.setMetadata(metadata)
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Fetched OAuth metadata with scope: ${getScopeFromMetadata(metadata) || 'NONE'}`,
|
||||
`Fetched OAuth metadata (hasScope=${Boolean(
|
||||
getScopeFromMetadata(metadata),
|
||||
)})`,
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Failed to fetch OAuth metadata: ${errorMessage(error)}`,
|
||||
`Failed to fetch OAuth metadata: ${summarizeOAuthErrorForDebug(error)}`,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1170,8 +1217,10 @@ export async function performMCPOAuthFlow(
|
||||
|
||||
server.listen(port, '127.0.0.1', async () => {
|
||||
try {
|
||||
logMCPDebug(serverName, `Starting SDK auth`)
|
||||
logMCPDebug(serverName, `Server URL: ${serverConfig.url}`)
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Starting SDK auth (transport=${serverConfig.type})`,
|
||||
)
|
||||
|
||||
// First call to start the auth flow - should redirect
|
||||
// Pass the scope and resource_metadata from WWW-Authenticate header if available
|
||||
@@ -1189,7 +1238,10 @@ export async function performMCPOAuthFlow(
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logMCPDebug(serverName, `SDK auth error: ${error}`)
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`SDK auth error: ${summarizeOAuthErrorForDebug(error)}`,
|
||||
)
|
||||
cleanup()
|
||||
rejectOnce(new Error(`SDK auth failed: ${errorMessage(error)}`))
|
||||
}
|
||||
@@ -1235,9 +1287,13 @@ export async function performMCPOAuthFlow(
|
||||
if (savedTokens) {
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Token access_token length: ${savedTokens.access_token?.length}`,
|
||||
`Token summary after auth: ${jsonStringify({
|
||||
hasAccessToken: Boolean(savedTokens.access_token),
|
||||
hasRefreshToken: Boolean(savedTokens.refresh_token),
|
||||
expiresInSec: savedTokens.expires_in,
|
||||
hasScope: Boolean(savedTokens.scope),
|
||||
})}`,
|
||||
)
|
||||
logMCPDebug(serverName, `Token expires_in: ${savedTokens.expires_in}`)
|
||||
}
|
||||
|
||||
logEvent('tengu_mcp_oauth_flow_success', {
|
||||
@@ -1257,7 +1313,10 @@ export async function performMCPOAuthFlow(
|
||||
throw new Error('Unexpected auth result: ' + result)
|
||||
}
|
||||
} catch (error) {
|
||||
logMCPDebug(serverName, `Error during auth completion: ${error}`)
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`Error during auth completion: ${summarizeOAuthErrorForDebug(error)}`,
|
||||
)
|
||||
|
||||
// Determine failure reason for attribution telemetry. The try block covers
|
||||
// port acquisition, the callback server, the redirect flow, and token
|
||||
@@ -1298,9 +1357,9 @@ export async function performMCPOAuthFlow(
|
||||
// SDK does not attach HTTP status as a property, but the fallback ServerError
|
||||
// embeds it in the message as "HTTP {status}:" when the response body was
|
||||
// unparseable. Best-effort extraction.
|
||||
const statusMatch = error.message.match(/^HTTP (\d{3}):/)
|
||||
if (statusMatch) {
|
||||
httpStatus = Number(statusMatch[1])
|
||||
const parsedStatus = extractHttpStatusFromErrorMessage(error.message)
|
||||
if (parsedStatus !== undefined) {
|
||||
httpStatus = parsedStatus
|
||||
}
|
||||
// If client not found, clear the stored client ID and suggest retry
|
||||
if (
|
||||
@@ -1429,7 +1488,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
metadata.scope = metadataScope
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Using scope from metadata: ${metadata.scope}`,
|
||||
'Using scope from metadata',
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1445,7 +1504,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
get clientMetadataUrl(): string | undefined {
|
||||
const override = process.env.MCP_OAUTH_CLIENT_METADATA_URL
|
||||
if (override) {
|
||||
logMCPDebug(this.serverName, `Using CIMD URL from env: ${override}`)
|
||||
logMCPDebug(this.serverName, 'Using CIMD URL from env override')
|
||||
return override
|
||||
}
|
||||
return MCP_CLIENT_METADATA_URL
|
||||
@@ -1467,7 +1526,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
*/
|
||||
markStepUpPending(scope: string): void {
|
||||
this._pendingStepUpScope = scope
|
||||
logMCPDebug(this.serverName, `Marked step-up pending: ${scope}`)
|
||||
logMCPDebug(this.serverName, 'Marked step-up pending')
|
||||
}
|
||||
|
||||
async state(): Promise<string> {
|
||||
@@ -1606,7 +1665,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
} catch (e) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`XAA silent exchange failed: ${errorMessage(e)}`,
|
||||
`XAA silent exchange failed: ${summarizeOAuthErrorForDebug(e)}`,
|
||||
)
|
||||
}
|
||||
// Fall through. Either id_token isn't cached (xaaRefresh returned
|
||||
@@ -1632,7 +1691,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
if (needsStepUp) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Step-up pending (${this._pendingStepUpScope}), omitting refresh_token`,
|
||||
'Step-up pending, omitting refresh_token',
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1679,7 +1738,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
} catch (error) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Token refresh error: ${errorMessage(error)}`,
|
||||
`Token refresh error: ${summarizeOAuthErrorForDebug(error)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1693,10 +1752,15 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
token_type: 'Bearer',
|
||||
}
|
||||
|
||||
logMCPDebug(this.serverName, `Returning tokens`)
|
||||
logMCPDebug(this.serverName, `Token length: ${tokens.access_token?.length}`)
|
||||
logMCPDebug(this.serverName, `Has refresh token: ${!!tokens.refresh_token}`)
|
||||
logMCPDebug(this.serverName, `Expires in: ${Math.floor(expiresIn)}s`)
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Returning tokens: ${jsonStringify({
|
||||
hasAccessToken: Boolean(tokens.access_token),
|
||||
hasRefreshToken: Boolean(tokens.refresh_token),
|
||||
hasScope: Boolean(tokens.scope),
|
||||
expiresInSec: Math.floor(expiresIn),
|
||||
})}`,
|
||||
)
|
||||
|
||||
return tokens
|
||||
}
|
||||
@@ -1707,9 +1771,15 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
const existingData = storage.read() || {}
|
||||
const serverKey = getServerKey(this.serverName, this.serverConfig)
|
||||
|
||||
logMCPDebug(this.serverName, `Saving tokens`)
|
||||
logMCPDebug(this.serverName, `Token expires in: ${tokens.expires_in}`)
|
||||
logMCPDebug(this.serverName, `Has refresh token: ${!!tokens.refresh_token}`)
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Saving tokens: ${jsonStringify({
|
||||
hasAccessToken: Boolean(tokens.access_token),
|
||||
hasRefreshToken: Boolean(tokens.refresh_token),
|
||||
hasScope: Boolean(tokens.scope),
|
||||
expiresInSec: tokens.expires_in,
|
||||
})}`,
|
||||
)
|
||||
|
||||
const updatedData: SecureStorageData = {
|
||||
...existingData,
|
||||
@@ -1783,7 +1853,9 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
} catch (e) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`XAA: OIDC discovery failed in silent refresh: ${errorMessage(e)}`,
|
||||
`XAA: OIDC discovery failed in silent refresh: ${summarizeOAuthErrorForDebug(
|
||||
e,
|
||||
)}`,
|
||||
)
|
||||
return undefined
|
||||
}
|
||||
@@ -1855,29 +1927,18 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
|
||||
// Extract and store scopes from the authorization URL for later use in token exchange
|
||||
const scopes = authorizationUrl.searchParams.get('scope')
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Authorization URL: ${redactSensitiveUrlParams(authorizationUrl.toString())}`,
|
||||
)
|
||||
logMCPDebug(this.serverName, `Scopes in URL: ${scopes || 'NOT FOUND'}`)
|
||||
|
||||
if (scopes) {
|
||||
this._scopes = scopes
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Captured scopes from authorization URL: ${scopes}`,
|
||||
)
|
||||
logMCPDebug(this.serverName, 'Captured scopes from authorization URL')
|
||||
} else {
|
||||
// If no scope in URL, try to get it from metadata
|
||||
const metadataScope = getScopeFromMetadata(this._metadata)
|
||||
if (metadataScope) {
|
||||
this._scopes = metadataScope
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Using scopes from metadata: ${metadataScope}`,
|
||||
)
|
||||
logMCPDebug(this.serverName, 'Using scopes from metadata')
|
||||
} else {
|
||||
logMCPDebug(this.serverName, `No scopes available from URL or metadata`)
|
||||
logMCPDebug(this.serverName, 'No scopes available from URL or metadata')
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1895,7 +1956,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
if (existing) {
|
||||
existing.stepUpScope = this._scopes
|
||||
storage.update(existingData)
|
||||
logMCPDebug(this.serverName, `Persisted step-up scope: ${this._scopes}`)
|
||||
logMCPDebug(this.serverName, 'Persisted step-up scope')
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1916,8 +1977,6 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
}
|
||||
|
||||
logMCPDebug(this.serverName, `Redirecting to authorization URL`)
|
||||
const redactedUrl = redactSensitiveUrlParams(urlString)
|
||||
logMCPDebug(this.serverName, `Authorization URL: ${redactedUrl}`)
|
||||
|
||||
// Notify the UI about the authorization URL BEFORE opening the browser,
|
||||
// so users can see the URL as a fallback if the browser fails to open
|
||||
@@ -1926,7 +1985,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
}
|
||||
|
||||
if (!this.skipBrowserOpen) {
|
||||
logMCPDebug(this.serverName, `Opening authorization URL: ${redactedUrl}`)
|
||||
logMCPDebug(this.serverName, 'Opening authorization URL')
|
||||
|
||||
const success = await openBrowser(urlString)
|
||||
if (!success) {
|
||||
@@ -1938,7 +1997,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
} else {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Skipping browser open (skipBrowserOpen=true). URL: ${redactedUrl}`,
|
||||
'Skipping browser open (skipBrowserOpen=true)',
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1991,7 +2050,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
}
|
||||
|
||||
storage.update(existingData)
|
||||
logMCPDebug(this.serverName, `Invalidated credentials (scope: ${scope})`)
|
||||
logMCPDebug(this.serverName, `Invalidated credentials (${scope})`)
|
||||
}
|
||||
|
||||
async saveDiscoveryState(state: OAuthDiscoveryState): Promise<void> {
|
||||
@@ -1999,10 +2058,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
const existingData = storage.read() || {}
|
||||
const serverKey = getServerKey(this.serverName, this.serverConfig)
|
||||
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Saving discovery state (authServer: ${state.authorizationServerUrl})`,
|
||||
)
|
||||
logMCPDebug(this.serverName, 'Saving discovery state')
|
||||
|
||||
// Persist only the URLs, NOT the full metadata blobs.
|
||||
// authorizationServerMetadata alone is ~1.5-2KB per MCP server (every
|
||||
@@ -2041,10 +2097,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
|
||||
const cached = data?.mcpOAuth?.[serverKey]?.discoveryState
|
||||
if (cached?.authorizationServerUrl) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Returning cached discovery state (authServer: ${cached.authorizationServerUrl})`,
|
||||
)
|
||||
logMCPDebug(this.serverName, 'Returning cached discovery state')
|
||||
|
||||
return {
|
||||
authorizationServerUrl: cached.authorizationServerUrl,
|
||||
@@ -2061,7 +2114,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
if (metadataUrl) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Fetching metadata from configured URL: ${metadataUrl}`,
|
||||
'Fetching metadata from configured override URL',
|
||||
)
|
||||
try {
|
||||
const metadata = await fetchAuthServerMetadata(
|
||||
@@ -2079,7 +2132,9 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
} catch (error) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Failed to fetch from configured metadata URL: ${errorMessage(error)}`,
|
||||
`Failed to fetch from configured metadata URL: ${summarizeOAuthErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -2231,7 +2286,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
} else if (cached?.authorizationServerUrl) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Re-discovering metadata from persisted auth server URL: ${cached.authorizationServerUrl}`,
|
||||
'Re-discovering metadata from persisted auth server URL',
|
||||
)
|
||||
metadata = await discoverAuthorizationServerMetadata(
|
||||
cached.authorizationServerUrl,
|
||||
@@ -2287,10 +2342,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
// Invalid grant means the refresh token itself is invalid/revoked/expired.
|
||||
// But another process may have already refreshed successfully — check first.
|
||||
if (error instanceof InvalidGrantError) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Token refresh failed with invalid_grant: ${error.message}`,
|
||||
)
|
||||
logMCPDebug(this.serverName, 'Token refresh failed with invalid_grant')
|
||||
clearKeychainCache()
|
||||
const storage = getSecureStorage()
|
||||
const data = storage.read()
|
||||
@@ -2337,7 +2389,7 @@ export class ClaudeAuthProvider implements OAuthClientProvider {
|
||||
if (!isRetryable || attempt >= MAX_ATTEMPTS) {
|
||||
logMCPDebug(
|
||||
this.serverName,
|
||||
`Token refresh failed: ${errorMessage(error)}`,
|
||||
`Token refresh failed: ${summarizeOAuthErrorForDebug(error)}`,
|
||||
)
|
||||
emitRefreshEvent(
|
||||
'failure',
|
||||
|
||||
@@ -332,6 +332,94 @@ function mcpBaseUrlAnalytics(serverRef: ScopedMcpServerConfig): {
|
||||
: {}
|
||||
}
|
||||
|
||||
function mcpBaseUrlForDebug(serverRef: ScopedMcpServerConfig): string {
|
||||
return getLoggingSafeMcpBaseUrl(serverRef) || '[unavailable]'
|
||||
}
|
||||
|
||||
function summarizeHeadersForDebug(
|
||||
headers: Record<string, string> | undefined,
|
||||
): {
|
||||
headerCount: number
|
||||
headerNames: string[]
|
||||
hasAuthorization: boolean
|
||||
} {
|
||||
if (!headers) {
|
||||
return {
|
||||
headerCount: 0,
|
||||
headerNames: [],
|
||||
hasAuthorization: false,
|
||||
}
|
||||
}
|
||||
|
||||
const headerNames = Object.keys(headers)
|
||||
return {
|
||||
headerCount: headerNames.length,
|
||||
headerNames: headerNames.sort(),
|
||||
hasAuthorization: headerNames.some(
|
||||
headerName => headerName.toLowerCase() === 'authorization',
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeProxyEnvForDebug(): Record<string, string | boolean> {
|
||||
return {
|
||||
hasNodeOptions: Boolean(process.env.NODE_OPTIONS),
|
||||
uvThreadpoolSizeConfigured: Boolean(process.env.UV_THREADPOOL_SIZE),
|
||||
hasHttpProxy: Boolean(process.env.HTTP_PROXY),
|
||||
hasHttpsProxy: Boolean(process.env.HTTPS_PROXY),
|
||||
hasNoProxy: Boolean(process.env.NO_PROXY),
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeStderrForDebug(stderrOutput: string): string {
|
||||
const trimmed = stderrOutput.trim()
|
||||
const lineCount = trimmed === '' ? 0 : trimmed.split('\n').length
|
||||
return `Server stderr captured (${trimmed.length} chars, ${lineCount} lines)`
|
||||
}
|
||||
|
||||
function summarizeMcpErrorForDebug(error: unknown): string {
|
||||
const summary: Record<string, boolean | number | string> = {}
|
||||
|
||||
if (error instanceof Error) {
|
||||
summary.errorType = error.constructor.name
|
||||
summary.errorName = error.name
|
||||
summary.hasMessage = error.message.length > 0
|
||||
summary.hasStack = Boolean(error.stack)
|
||||
|
||||
const errorObj = error as Error & {
|
||||
code?: unknown
|
||||
errno?: unknown
|
||||
syscall?: unknown
|
||||
status?: unknown
|
||||
cause?: unknown
|
||||
}
|
||||
|
||||
if (typeof errorObj.code === 'string' || typeof errorObj.code === 'number') {
|
||||
summary.code = errorObj.code
|
||||
}
|
||||
if (
|
||||
typeof errorObj.errno === 'string' ||
|
||||
typeof errorObj.errno === 'number'
|
||||
) {
|
||||
summary.errno = errorObj.errno
|
||||
}
|
||||
if (typeof errorObj.syscall === 'string') {
|
||||
summary.syscall = errorObj.syscall
|
||||
}
|
||||
if (typeof errorObj.status === 'number') {
|
||||
summary.status = errorObj.status
|
||||
}
|
||||
if (errorObj.cause !== undefined) {
|
||||
summary.hasCause = true
|
||||
}
|
||||
} else {
|
||||
summary.errorType = typeof error
|
||||
summary.hasValue = error !== undefined && error !== null
|
||||
}
|
||||
|
||||
return jsonStringify(summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared handler for sse/http/claudeai-proxy auth failures during connect:
|
||||
* emits tengu_mcp_server_needs_auth, caches the needs-auth entry, and returns
|
||||
@@ -676,7 +764,10 @@ export const connectToServer = memoize(
|
||||
)
|
||||
logMCPDebug(name, `SSE transport initialized, awaiting connection`)
|
||||
} else if (serverRef.type === 'sse-ide') {
|
||||
logMCPDebug(name, `Setting up SSE-IDE transport to ${serverRef.url}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Setting up SSE-IDE transport to ${mcpBaseUrlForDebug(serverRef)}`,
|
||||
)
|
||||
// IDE servers don't need authentication
|
||||
// TODO: Use the auth token provided in the lockfile
|
||||
const proxyOptions = getProxyFetchOptions()
|
||||
@@ -735,7 +826,7 @@ export const connectToServer = memoize(
|
||||
} else if (serverRef.type === 'ws') {
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Initializing WebSocket transport to ${serverRef.url}`,
|
||||
`Initializing WebSocket transport to ${mcpBaseUrlForDebug(serverRef)}`,
|
||||
)
|
||||
|
||||
const combinedHeaders = await getMcpServerHeaders(name, serverRef)
|
||||
@@ -749,16 +840,17 @@ export const connectToServer = memoize(
|
||||
...combinedHeaders,
|
||||
}
|
||||
|
||||
// Redact sensitive headers before logging
|
||||
const wsHeadersForLogging = mapValues(wsHeaders, (value, key) =>
|
||||
key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,
|
||||
const wsHeadersForLogging = summarizeHeadersForDebug(
|
||||
mapValues(wsHeaders, (_value, key) =>
|
||||
key.toLowerCase() === 'authorization' ? '[REDACTED]' : '[set]',
|
||||
),
|
||||
)
|
||||
|
||||
logMCPDebug(
|
||||
name,
|
||||
`WebSocket transport options: ${jsonStringify({
|
||||
url: serverRef.url,
|
||||
headers: wsHeadersForLogging,
|
||||
url: mcpBaseUrlForDebug(serverRef),
|
||||
...wsHeadersForLogging,
|
||||
hasSessionAuth: !!sessionIngressToken,
|
||||
})}`,
|
||||
)
|
||||
@@ -782,20 +874,17 @@ export const connectToServer = memoize(
|
||||
}
|
||||
transport = new WebSocketTransport(wsClient)
|
||||
} else if (serverRef.type === 'http') {
|
||||
logMCPDebug(name, `Initializing HTTP transport to ${serverRef.url}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Initializing HTTP transport to ${mcpBaseUrlForDebug(serverRef)}`,
|
||||
)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Node version: ${process.version}, Platform: ${process.platform}`,
|
||||
)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Environment: ${jsonStringify({
|
||||
NODE_OPTIONS: process.env.NODE_OPTIONS || 'not set',
|
||||
UV_THREADPOOL_SIZE: process.env.UV_THREADPOOL_SIZE || 'default',
|
||||
HTTP_PROXY: process.env.HTTP_PROXY || 'not set',
|
||||
HTTPS_PROXY: process.env.HTTPS_PROXY || 'not set',
|
||||
NO_PROXY: process.env.NO_PROXY || 'not set',
|
||||
})}`,
|
||||
`Environment: ${jsonStringify(summarizeProxyEnvForDebug())}`,
|
||||
)
|
||||
|
||||
// Create an auth provider for this server
|
||||
@@ -843,16 +932,16 @@ export const connectToServer = memoize(
|
||||
const headersForLogging = transportOptions.requestInit?.headers
|
||||
? mapValues(
|
||||
transportOptions.requestInit.headers as Record<string, string>,
|
||||
(value, key) =>
|
||||
key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,
|
||||
(_value, key) =>
|
||||
key.toLowerCase() === 'authorization' ? '[REDACTED]' : '[set]',
|
||||
)
|
||||
: undefined
|
||||
|
||||
logMCPDebug(
|
||||
name,
|
||||
`HTTP transport options: ${jsonStringify({
|
||||
url: serverRef.url,
|
||||
headers: headersForLogging,
|
||||
url: mcpBaseUrlForDebug(serverRef),
|
||||
...summarizeHeadersForDebug(headersForLogging),
|
||||
hasAuthProvider: !!authProvider,
|
||||
timeoutMs: MCP_REQUEST_TIMEOUT_MS,
|
||||
})}`,
|
||||
@@ -879,7 +968,7 @@ export const connectToServer = memoize(
|
||||
const oauthConfig = getOauthConfig()
|
||||
const proxyUrl = `${oauthConfig.MCP_PROXY_URL}${oauthConfig.MCP_PROXY_PATH.replace('{server_id}', serverRef.id)}`
|
||||
|
||||
logMCPDebug(name, `Using claude.ai proxy at ${proxyUrl}`)
|
||||
logMCPDebug(name, `Using claude.ai proxy transport`)
|
||||
|
||||
// eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
|
||||
const fetchWithAuth = createClaudeAiProxyFetch(globalThis.fetch)
|
||||
@@ -1025,23 +1114,28 @@ export const connectToServer = memoize(
|
||||
|
||||
// For HTTP transport, try a basic connectivity test first
|
||||
if (serverRef.type === 'http') {
|
||||
logMCPDebug(name, `Testing basic HTTP connectivity to ${serverRef.url}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Testing basic HTTP connectivity to ${mcpBaseUrlForDebug(serverRef)}`,
|
||||
)
|
||||
try {
|
||||
const testUrl = new URL(serverRef.url)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Parsed URL: host=${testUrl.hostname}, port=${testUrl.port || 'default'}, protocol=${testUrl.protocol}`,
|
||||
)
|
||||
logMCPDebug(name, 'Parsed HTTP endpoint for preflight checks')
|
||||
|
||||
// Log DNS resolution attempt
|
||||
if (
|
||||
testUrl.hostname === '127.0.0.1' ||
|
||||
testUrl.hostname === 'localhost'
|
||||
) {
|
||||
logMCPDebug(name, `Using loopback address: ${testUrl.hostname}`)
|
||||
logMCPDebug(name, 'Using loopback HTTP endpoint')
|
||||
}
|
||||
} catch (urlError) {
|
||||
logMCPDebug(name, `Failed to parse URL: ${urlError}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Failed to parse HTTP endpoint for preflight: ${summarizeMcpErrorForDebug(
|
||||
urlError,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1079,7 +1173,7 @@ export const connectToServer = memoize(
|
||||
try {
|
||||
await Promise.race([connectPromise, timeoutPromise])
|
||||
if (stderrOutput) {
|
||||
logMCPError(name, `Server stderr: ${stderrOutput}`)
|
||||
logMCPError(name, summarizeStderrForDebug(stderrOutput))
|
||||
stderrOutput = '' // Release accumulated string to prevent memory growth
|
||||
}
|
||||
const elapsed = Date.now() - connectStartTime
|
||||
@@ -1093,30 +1187,29 @@ export const connectToServer = memoize(
|
||||
if (serverRef.type === 'sse' && error instanceof Error) {
|
||||
logMCPDebug(
|
||||
name,
|
||||
`SSE Connection failed after ${elapsed}ms: ${jsonStringify({
|
||||
url: serverRef.url,
|
||||
error: error.message,
|
||||
errorType: error.constructor.name,
|
||||
stack: error.stack,
|
||||
})}`,
|
||||
`SSE connection failed after ${elapsed}ms: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
logMCPError(
|
||||
name,
|
||||
`SSE connection failed: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
logMCPError(name, error)
|
||||
|
||||
if (error instanceof UnauthorizedError) {
|
||||
return handleRemoteAuthFailure(name, serverRef, 'sse')
|
||||
}
|
||||
} else if (serverRef.type === 'http' && error instanceof Error) {
|
||||
const errorObj = error as Error & {
|
||||
cause?: unknown
|
||||
code?: string
|
||||
errno?: string | number
|
||||
syscall?: string
|
||||
}
|
||||
logMCPDebug(
|
||||
name,
|
||||
`HTTP Connection failed after ${elapsed}ms: ${error.message} (code: ${errorObj.code || 'none'}, errno: ${errorObj.errno || 'none'})`,
|
||||
`HTTP connection failed after ${elapsed}ms: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
logMCPError(
|
||||
name,
|
||||
`HTTP connection failed: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
logMCPError(name, error)
|
||||
|
||||
if (error instanceof UnauthorizedError) {
|
||||
return handleRemoteAuthFailure(name, serverRef, 'http')
|
||||
@@ -1127,9 +1220,16 @@ export const connectToServer = memoize(
|
||||
) {
|
||||
logMCPDebug(
|
||||
name,
|
||||
`claude.ai proxy connection failed after ${elapsed}ms: ${error.message}`,
|
||||
`claude.ai proxy connection failed after ${elapsed}ms: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
logMCPError(
|
||||
name,
|
||||
`claude.ai proxy connection failed: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
logMCPError(name, error)
|
||||
|
||||
// StreamableHTTPError has a `code` property with the HTTP status
|
||||
const errorCode = (error as Error & { code?: number }).code
|
||||
@@ -1149,7 +1249,7 @@ export const connectToServer = memoize(
|
||||
}
|
||||
transport.close().catch(() => {})
|
||||
if (stderrOutput) {
|
||||
logMCPError(name, `Server stderr: ${stderrOutput}`)
|
||||
logMCPError(name, summarizeStderrForDebug(stderrOutput))
|
||||
}
|
||||
throw error
|
||||
}
|
||||
@@ -1208,7 +1308,9 @@ export const connectToServer = memoize(
|
||||
} catch (error) {
|
||||
logMCPError(
|
||||
name,
|
||||
`Failed to send ide_connected notification: ${error}`,
|
||||
`Failed to send ide_connected notification: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1242,7 +1344,10 @@ export const connectToServer = memoize(
|
||||
hasTriggeredClose = true
|
||||
logMCPDebug(name, `Closing transport (${reason})`)
|
||||
void client.close().catch(e => {
|
||||
logMCPDebug(name, `Error during close: ${errorMessage(e)}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error during close: ${summarizeMcpErrorForDebug(e)}`,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1306,7 +1411,10 @@ export const connectToServer = memoize(
|
||||
`Failed to spawn process - check command and permissions`,
|
||||
)
|
||||
} else {
|
||||
logMCPDebug(name, `Connection error: ${error.message}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Connection error: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1407,12 +1515,20 @@ export const connectToServer = memoize(
|
||||
try {
|
||||
await inProcessServer.close()
|
||||
} catch (error) {
|
||||
logMCPDebug(name, `Error closing in-process server: ${error}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error closing in-process server: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
try {
|
||||
await client.close()
|
||||
} catch (error) {
|
||||
logMCPDebug(name, `Error closing client: ${error}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error closing client: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -1438,7 +1554,10 @@ export const connectToServer = memoize(
|
||||
try {
|
||||
process.kill(childPid, 'SIGINT')
|
||||
} catch (error) {
|
||||
logMCPDebug(name, `Error sending SIGINT: ${error}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error sending SIGINT: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1492,7 +1611,12 @@ export const connectToServer = memoize(
|
||||
try {
|
||||
process.kill(childPid, 'SIGTERM')
|
||||
} catch (termError) {
|
||||
logMCPDebug(name, `Error sending SIGTERM: ${termError}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error sending SIGTERM: ${summarizeMcpErrorForDebug(
|
||||
termError,
|
||||
)}`,
|
||||
)
|
||||
resolved = true
|
||||
clearInterval(checkInterval)
|
||||
clearTimeout(failsafeTimeout)
|
||||
@@ -1525,7 +1649,9 @@ export const connectToServer = memoize(
|
||||
} catch (killError) {
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error sending SIGKILL: ${killError}`,
|
||||
`Error sending SIGKILL: ${summarizeMcpErrorForDebug(
|
||||
killError,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
} catch {
|
||||
@@ -1557,7 +1683,12 @@ export const connectToServer = memoize(
|
||||
})
|
||||
}
|
||||
} catch (processError) {
|
||||
logMCPDebug(name, `Error terminating process: ${processError}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error terminating process: ${summarizeMcpErrorForDebug(
|
||||
processError,
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1565,7 +1696,10 @@ export const connectToServer = memoize(
|
||||
try {
|
||||
await client.close()
|
||||
} catch (error) {
|
||||
logMCPDebug(name, `Error closing client: ${error}`)
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Error closing client: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1622,9 +1756,14 @@ export const connectToServer = memoize(
|
||||
})
|
||||
logMCPDebug(
|
||||
name,
|
||||
`Connection failed after ${connectionDurationMs}ms: ${errorMessage(error)}`,
|
||||
`Connection failed after ${connectionDurationMs}ms: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
logMCPError(
|
||||
name,
|
||||
`Connection failed: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
logMCPError(name, `Connection failed: ${errorMessage(error)}`)
|
||||
|
||||
if (inProcessServer) {
|
||||
inProcessServer.close().catch(() => {})
|
||||
@@ -1989,7 +2128,10 @@ export const fetchToolsForClient = memoizeWithLRU(
|
||||
})
|
||||
.filter(isIncludedMcpTool)
|
||||
} catch (error) {
|
||||
logMCPError(client.name, `Failed to fetch tools: ${errorMessage(error)}`)
|
||||
logMCPError(
|
||||
client.name,
|
||||
`Failed to fetch tools: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
return []
|
||||
}
|
||||
},
|
||||
@@ -2021,7 +2163,7 @@ export const fetchResourcesForClient = memoizeWithLRU(
|
||||
} catch (error) {
|
||||
logMCPError(
|
||||
client.name,
|
||||
`Failed to fetch resources: ${errorMessage(error)}`,
|
||||
`Failed to fetch resources: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
return []
|
||||
}
|
||||
@@ -2087,7 +2229,9 @@ export const fetchCommandsForClient = memoizeWithLRU(
|
||||
} catch (error) {
|
||||
logMCPError(
|
||||
client.name,
|
||||
`Error running command '${prompt.name}': ${errorMessage(error)}`,
|
||||
`Error running command '${prompt.name}': ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
throw error
|
||||
}
|
||||
@@ -2097,7 +2241,7 @@ export const fetchCommandsForClient = memoizeWithLRU(
|
||||
} catch (error) {
|
||||
logMCPError(
|
||||
client.name,
|
||||
`Failed to fetch commands: ${errorMessage(error)}`,
|
||||
`Failed to fetch commands: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
return []
|
||||
}
|
||||
@@ -2198,7 +2342,10 @@ export async function reconnectMcpServerImpl(
|
||||
}
|
||||
} catch (error) {
|
||||
// Handle errors gracefully - connection might have closed during fetch
|
||||
logMCPError(name, `Error during reconnection: ${errorMessage(error)}`)
|
||||
logMCPError(
|
||||
name,
|
||||
`Error during reconnection: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
|
||||
// Return with failed status
|
||||
return {
|
||||
@@ -2373,7 +2520,9 @@ export async function getMcpToolsCommandsAndResources(
|
||||
// Handle errors gracefully - connection might have closed during fetch
|
||||
logMCPError(
|
||||
name,
|
||||
`Error fetching tools/commands/resources: ${errorMessage(error)}`,
|
||||
`Error fetching tools/commands/resources: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
|
||||
// Still update with the client but no tools/commands
|
||||
@@ -2460,7 +2609,7 @@ export function prefetchAllMcpResources(
|
||||
}, mcpConfigs).catch(error => {
|
||||
logMCPError(
|
||||
'prefetchAllMcpResources',
|
||||
`Failed to get MCP resources: ${errorMessage(error)}`,
|
||||
`Failed to get MCP resources: ${summarizeMcpErrorForDebug(error)}`,
|
||||
)
|
||||
// Still resolve with empty results
|
||||
void resolve({
|
||||
@@ -3322,7 +3471,12 @@ export async function setupSdkMcpClients(
|
||||
}
|
||||
} catch (error) {
|
||||
// If connection fails, return failed server
|
||||
logMCPError(name, `Failed to connect SDK MCP server: ${error}`)
|
||||
logMCPError(
|
||||
name,
|
||||
`Failed to connect SDK MCP server: ${summarizeMcpErrorForDebug(
|
||||
error,
|
||||
)}`,
|
||||
)
|
||||
return {
|
||||
client: {
|
||||
type: 'failed' as const,
|
||||
|
||||
@@ -1397,6 +1397,7 @@ export function parseMcpConfigFromFilePath(params: {
|
||||
configContent = fs.readFileSync(filePath, { encoding: 'utf8' })
|
||||
} catch (error: unknown) {
|
||||
const code = getErrnoCode(error)
|
||||
const fileName = parse(filePath).base
|
||||
if (code === 'ENOENT') {
|
||||
return {
|
||||
config: null,
|
||||
@@ -1415,7 +1416,7 @@ export function parseMcpConfigFromFilePath(params: {
|
||||
}
|
||||
}
|
||||
logForDebugging(
|
||||
`MCP config read error for ${filePath} (scope=${scope}): ${error}`,
|
||||
`MCP config read error (scope=${scope}, file=${fileName}, errno=${code ?? 'none'}, errorType=${error instanceof Error ? error.name : typeof error})`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
return {
|
||||
@@ -1439,7 +1440,7 @@ export function parseMcpConfigFromFilePath(params: {
|
||||
|
||||
if (!parsedJson) {
|
||||
logForDebugging(
|
||||
`MCP config is not valid JSON: ${filePath} (scope=${scope}, length=${configContent.length}, first100=${jsonStringify(configContent.slice(0, 100))})`,
|
||||
`MCP config is not valid JSON (scope=${scope}, file=${parse(filePath).base}, length=${configContent.length})`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
return {
|
||||
|
||||
@@ -96,6 +96,24 @@ function redactTokens(raw: unknown): string {
|
||||
return s.replace(SENSITIVE_TOKEN_RE, (_, k) => `"${k}":"[REDACTED]"`)
|
||||
}
|
||||
|
||||
function summarizeXaaPayload(raw: unknown): string {
|
||||
if (typeof raw === 'string') {
|
||||
return `text(${raw.length} chars)`
|
||||
}
|
||||
if (Array.isArray(raw)) {
|
||||
return `array(${raw.length})`
|
||||
}
|
||||
if (raw && typeof raw === 'object') {
|
||||
return jsonStringify({
|
||||
payloadType: 'object',
|
||||
keys: Object.keys(raw as Record<string, unknown>)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
})
|
||||
}
|
||||
return typeof raw
|
||||
}
|
||||
|
||||
// ─── Zod Schemas ────────────────────────────────────────────────────────────
|
||||
|
||||
const TokenExchangeResponseSchema = lazySchema(() =>
|
||||
@@ -145,7 +163,7 @@ export async function discoverProtectedResource(
|
||||
)
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`XAA: PRM discovery failed: ${e instanceof Error ? e.message : String(e)}`,
|
||||
`XAA: PRM discovery failed (${e instanceof Error ? e.name : typeof e})`,
|
||||
)
|
||||
}
|
||||
if (!prm.resource || !prm.authorization_servers?.[0]) {
|
||||
@@ -154,9 +172,7 @@ export async function discoverProtectedResource(
|
||||
)
|
||||
}
|
||||
if (normalizeUrl(prm.resource) !== normalizeUrl(serverUrl)) {
|
||||
throw new Error(
|
||||
`XAA: PRM discovery failed: PRM resource mismatch: expected ${serverUrl}, got ${prm.resource}`,
|
||||
)
|
||||
throw new Error('XAA: PRM discovery failed: PRM resource mismatch')
|
||||
}
|
||||
return {
|
||||
resource: prm.resource,
|
||||
@@ -183,22 +199,16 @@ export async function discoverAuthorizationServer(
|
||||
fetchFn: opts?.fetchFn ?? defaultFetch,
|
||||
})
|
||||
if (!meta?.issuer || !meta.token_endpoint) {
|
||||
throw new Error(
|
||||
`XAA: AS metadata discovery failed: no valid metadata at ${asUrl}`,
|
||||
)
|
||||
throw new Error('XAA: AS metadata discovery failed: no valid metadata')
|
||||
}
|
||||
if (normalizeUrl(meta.issuer) !== normalizeUrl(asUrl)) {
|
||||
throw new Error(
|
||||
`XAA: AS metadata discovery failed: issuer mismatch: expected ${asUrl}, got ${meta.issuer}`,
|
||||
)
|
||||
throw new Error('XAA: AS metadata discovery failed: issuer mismatch')
|
||||
}
|
||||
// RFC 8414 §3.3 / RFC 9728 §3 require HTTPS. A PRM-advertised http:// AS
|
||||
// that self-consistently reports an http:// issuer would pass the mismatch
|
||||
// check above, then we'd POST id_token + client_secret over plaintext.
|
||||
if (new URL(meta.token_endpoint).protocol !== 'https:') {
|
||||
throw new Error(
|
||||
`XAA: refusing non-HTTPS token endpoint: ${meta.token_endpoint}`,
|
||||
)
|
||||
throw new Error('XAA: refusing non-HTTPS token endpoint')
|
||||
}
|
||||
return {
|
||||
issuer: meta.issuer,
|
||||
@@ -263,7 +273,7 @@ export async function requestJwtAuthorizationGrant(opts: {
|
||||
body: params,
|
||||
})
|
||||
if (!res.ok) {
|
||||
const body = redactTokens(await res.text()).slice(0, 200)
|
||||
const body = summarizeXaaPayload(redactTokens(await res.text()))
|
||||
// 4xx → id_token rejected (invalid_grant etc.), clear cache.
|
||||
// 5xx → IdP outage, id_token may still be valid, preserve it.
|
||||
const shouldClear = res.status < 500
|
||||
@@ -278,21 +288,25 @@ export async function requestJwtAuthorizationGrant(opts: {
|
||||
} catch {
|
||||
// Transient network condition (captive portal, proxy) — don't clear id_token.
|
||||
throw new XaaTokenExchangeError(
|
||||
`XAA: token exchange returned non-JSON (captive portal?) at ${opts.tokenEndpoint}`,
|
||||
'XAA: token exchange returned non-JSON response (captive portal?)',
|
||||
false,
|
||||
)
|
||||
}
|
||||
const exchangeParsed = TokenExchangeResponseSchema().safeParse(rawExchange)
|
||||
if (!exchangeParsed.success) {
|
||||
throw new XaaTokenExchangeError(
|
||||
`XAA: token exchange response did not match expected shape: ${redactTokens(rawExchange)}`,
|
||||
`XAA: token exchange response did not match expected shape: ${summarizeXaaPayload(
|
||||
redactTokens(rawExchange),
|
||||
)}`,
|
||||
true,
|
||||
)
|
||||
}
|
||||
const result = exchangeParsed.data
|
||||
if (!result.access_token) {
|
||||
throw new XaaTokenExchangeError(
|
||||
`XAA: token exchange response missing access_token: ${redactTokens(result)}`,
|
||||
`XAA: token exchange response missing access_token: ${summarizeXaaPayload(
|
||||
redactTokens(result),
|
||||
)}`,
|
||||
true,
|
||||
)
|
||||
}
|
||||
@@ -373,7 +387,7 @@ export async function exchangeJwtAuthGrant(opts: {
|
||||
body: params,
|
||||
})
|
||||
if (!res.ok) {
|
||||
const body = redactTokens(await res.text()).slice(0, 200)
|
||||
const body = summarizeXaaPayload(redactTokens(await res.text()))
|
||||
throw new Error(`XAA: jwt-bearer grant failed: HTTP ${res.status}: ${body}`)
|
||||
}
|
||||
let rawTokens: unknown
|
||||
@@ -381,13 +395,15 @@ export async function exchangeJwtAuthGrant(opts: {
|
||||
rawTokens = await res.json()
|
||||
} catch {
|
||||
throw new Error(
|
||||
`XAA: jwt-bearer grant returned non-JSON (captive portal?) at ${opts.tokenEndpoint}`,
|
||||
'XAA: jwt-bearer grant returned non-JSON response (captive portal?)',
|
||||
)
|
||||
}
|
||||
const tokensParsed = JwtBearerResponseSchema().safeParse(rawTokens)
|
||||
if (!tokensParsed.success) {
|
||||
throw new Error(
|
||||
`XAA: jwt-bearer response did not match expected shape: ${redactTokens(rawTokens)}`,
|
||||
`XAA: jwt-bearer response did not match expected shape: ${summarizeXaaPayload(
|
||||
redactTokens(rawTokens),
|
||||
)}`,
|
||||
)
|
||||
}
|
||||
return tokensParsed.data
|
||||
@@ -431,11 +447,14 @@ export async function performCrossAppAccess(
|
||||
): Promise<XaaResult> {
|
||||
const fetchFn = makeXaaFetch(abortSignal)
|
||||
|
||||
logMCPDebug(serverName, `XAA: discovering PRM for ${serverUrl}`)
|
||||
logMCPDebug(serverName, 'XAA: discovering protected resource metadata')
|
||||
const prm = await discoverProtectedResource(serverUrl, { fetchFn })
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`XAA: discovered resource=${prm.resource} ASes=[${prm.authorization_servers.join(', ')}]`,
|
||||
`XAA: discovered protected resource metadata ${jsonStringify({
|
||||
hasResource: Boolean(prm.resource),
|
||||
authorizationServerCount: prm.authorization_servers.length,
|
||||
})}`,
|
||||
)
|
||||
|
||||
// Try each advertised AS in order. grant_types_supported is OPTIONAL per
|
||||
@@ -449,16 +468,16 @@ export async function performCrossAppAccess(
|
||||
candidate = await discoverAuthorizationServer(asUrl, { fetchFn })
|
||||
} catch (e) {
|
||||
if (abortSignal?.aborted) throw e
|
||||
asErrors.push(`${asUrl}: ${e instanceof Error ? e.message : String(e)}`)
|
||||
asErrors.push(
|
||||
`authorization server discovery failed (${e instanceof Error ? e.name : typeof e})`,
|
||||
)
|
||||
continue
|
||||
}
|
||||
if (
|
||||
candidate.grant_types_supported &&
|
||||
!candidate.grant_types_supported.includes(JWT_BEARER_GRANT)
|
||||
) {
|
||||
asErrors.push(
|
||||
`${asUrl}: does not advertise jwt-bearer grant (supported: ${candidate.grant_types_supported.join(', ')})`,
|
||||
)
|
||||
asErrors.push('authorization server does not advertise jwt-bearer grant')
|
||||
continue
|
||||
}
|
||||
asMeta = candidate
|
||||
@@ -466,7 +485,7 @@ export async function performCrossAppAccess(
|
||||
}
|
||||
if (!asMeta) {
|
||||
throw new Error(
|
||||
`XAA: no authorization server supports jwt-bearer. Tried: ${asErrors.join('; ')}`,
|
||||
`XAA: no authorization server supports jwt-bearer (${asErrors.length} candidates tried)`,
|
||||
)
|
||||
}
|
||||
// Pick auth method from what the AS advertises. We handle
|
||||
@@ -481,7 +500,7 @@ export async function performCrossAppAccess(
|
||||
: 'client_secret_basic'
|
||||
logMCPDebug(
|
||||
serverName,
|
||||
`XAA: AS issuer=${asMeta.issuer} token_endpoint=${asMeta.token_endpoint} auth_method=${authMethod}`,
|
||||
`XAA: selected authorization server (auth_method=${authMethod})`,
|
||||
)
|
||||
|
||||
logMCPDebug(serverName, `XAA: exchanging id_token for ID-JAG at IdP`)
|
||||
|
||||
@@ -210,9 +210,7 @@ export async function discoverOidc(
|
||||
signal: AbortSignal.timeout(IDP_REQUEST_TIMEOUT_MS),
|
||||
})
|
||||
if (!res.ok) {
|
||||
throw new Error(
|
||||
`XAA IdP: OIDC discovery failed: HTTP ${res.status} at ${url}`,
|
||||
)
|
||||
throw new Error(`XAA IdP: OIDC discovery failed (HTTP ${res.status})`)
|
||||
}
|
||||
// Captive portals and proxy auth pages return 200 with HTML. res.json()
|
||||
// throws a raw SyntaxError before safeParse can give a useful message.
|
||||
@@ -221,17 +219,15 @@ export async function discoverOidc(
|
||||
body = await res.json()
|
||||
} catch {
|
||||
throw new Error(
|
||||
`XAA IdP: OIDC discovery returned non-JSON at ${url} (captive portal or proxy?)`,
|
||||
'XAA IdP: OIDC discovery returned non-JSON response (captive portal or proxy?)',
|
||||
)
|
||||
}
|
||||
const parsed = OpenIdProviderDiscoveryMetadataSchema.safeParse(body)
|
||||
if (!parsed.success) {
|
||||
throw new Error(`XAA IdP: invalid OIDC metadata: ${parsed.error.message}`)
|
||||
throw new Error('XAA IdP: invalid OIDC metadata')
|
||||
}
|
||||
if (new URL(parsed.data.token_endpoint).protocol !== 'https:') {
|
||||
throw new Error(
|
||||
`XAA IdP: refusing non-HTTPS token endpoint: ${parsed.data.token_endpoint}`,
|
||||
)
|
||||
throw new Error('XAA IdP: refusing non-HTTPS token endpoint')
|
||||
}
|
||||
return parsed.data
|
||||
}
|
||||
@@ -373,7 +369,7 @@ function waitForCallback(
|
||||
),
|
||||
)
|
||||
} else {
|
||||
rejectOnce(new Error(`XAA IdP: callback server failed: ${err.message}`))
|
||||
rejectOnce(new Error('XAA IdP: callback server failed'))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -405,11 +401,11 @@ export async function acquireIdpIdToken(
|
||||
|
||||
const cached = getCachedIdpIdToken(idpIssuer)
|
||||
if (cached) {
|
||||
logMCPDebug('xaa', `Using cached id_token for ${idpIssuer}`)
|
||||
logMCPDebug('xaa', 'Using cached id_token for configured IdP')
|
||||
return cached
|
||||
}
|
||||
|
||||
logMCPDebug('xaa', `No cached id_token for ${idpIssuer}; starting OIDC login`)
|
||||
logMCPDebug('xaa', 'No cached id_token for configured IdP; starting OIDC login')
|
||||
|
||||
const metadata = await discoverOidc(idpIssuer)
|
||||
const port = opts.callbackPort ?? (await findAvailablePort())
|
||||
@@ -478,10 +474,7 @@ export async function acquireIdpIdToken(
|
||||
: Date.now() + (tokens.expires_in ?? 3600) * 1000
|
||||
|
||||
saveIdpIdToken(idpIssuer, tokens.id_token, expiresAt)
|
||||
logMCPDebug(
|
||||
'xaa',
|
||||
`Cached id_token for ${idpIssuer} (expires ${new Date(expiresAt).toISOString()})`,
|
||||
)
|
||||
logMCPDebug('xaa', 'Cached id_token for configured IdP')
|
||||
|
||||
return tokens.id_token
|
||||
}
|
||||
|
||||
@@ -174,7 +174,7 @@ export async function connectVoiceStream(
|
||||
|
||||
const url = `${wsBaseUrl}${VOICE_STREAM_PATH}?${params.toString()}`
|
||||
|
||||
logForDebugging(`[voice_stream] Connecting to ${url}`)
|
||||
logForDebugging('[voice_stream] Connecting to voice stream websocket')
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
Authorization: `Bearer ${tokens.accessToken}`,
|
||||
@@ -357,7 +357,7 @@ export async function connectVoiceStream(
|
||||
ws.on('message', (raw: Buffer | string) => {
|
||||
const text = raw.toString()
|
||||
logForDebugging(
|
||||
`[voice_stream] Message received (${String(text.length)} chars): ${text.slice(0, 200)}`,
|
||||
`[voice_stream] Message received (${String(text.length)} chars)`,
|
||||
)
|
||||
let msg: VoiceStreamMessage
|
||||
try {
|
||||
@@ -369,7 +369,9 @@ export async function connectVoiceStream(
|
||||
switch (msg.type) {
|
||||
case 'TranscriptText': {
|
||||
const transcript = msg.data
|
||||
logForDebugging(`[voice_stream] TranscriptText: "${transcript ?? ''}"`)
|
||||
logForDebugging(
|
||||
`[voice_stream] TranscriptText received (${String((transcript ?? '').length)} chars)`,
|
||||
)
|
||||
// Data arrived after CloseStream — disarm the no-data timer so
|
||||
// a slow-but-real flush isn't cut off. Only disarm once finalized
|
||||
// (CloseStream sent); pre-CloseStream data racing the deferred
|
||||
@@ -403,7 +405,7 @@ export async function connectVoiceStream(
|
||||
!prev.startsWith(next)
|
||||
) {
|
||||
logForDebugging(
|
||||
`[voice_stream] Auto-finalizing previous segment (new segment detected): "${lastTranscriptText}"`,
|
||||
'[voice_stream] Auto-finalizing previous segment (new segment detected)',
|
||||
)
|
||||
callbacks.onTranscript(lastTranscriptText, true)
|
||||
}
|
||||
@@ -416,7 +418,7 @@ export async function connectVoiceStream(
|
||||
}
|
||||
case 'TranscriptEndpoint': {
|
||||
logForDebugging(
|
||||
`[voice_stream] TranscriptEndpoint received, lastTranscriptText="${lastTranscriptText}"`,
|
||||
`[voice_stream] TranscriptEndpoint received (hasBufferedTranscript=${Boolean(lastTranscriptText)})`,
|
||||
)
|
||||
// The server signals the end of an utterance. Emit the last
|
||||
// TranscriptText as a final transcript so the caller can commit it.
|
||||
@@ -441,7 +443,9 @@ export async function connectVoiceStream(
|
||||
case 'TranscriptError': {
|
||||
const desc =
|
||||
msg.description ?? msg.error_code ?? 'unknown transcription error'
|
||||
logForDebugging(`[voice_stream] TranscriptError: ${desc}`)
|
||||
logForDebugging(
|
||||
`[voice_stream] TranscriptError received (${msg.error_code ?? 'unknown'})`,
|
||||
)
|
||||
if (!finalizing) {
|
||||
callbacks.onError(desc)
|
||||
}
|
||||
@@ -449,7 +453,7 @@ export async function connectVoiceStream(
|
||||
}
|
||||
case 'error': {
|
||||
const errorDetail = msg.message ?? jsonStringify(msg)
|
||||
logForDebugging(`[voice_stream] Server error: ${errorDetail}`)
|
||||
logForDebugging('[voice_stream] Server error received')
|
||||
if (!finalizing) {
|
||||
callbacks.onError(errorDetail)
|
||||
}
|
||||
|
||||
@@ -368,7 +368,7 @@ export async function setup(
|
||||
) // Start team memory sync watcher
|
||||
}
|
||||
}
|
||||
initSinks() // Attach error log + analytics sinks and drain queued events
|
||||
initSinks() // Attach error log sink and analytics compatibility stubs
|
||||
|
||||
// Session-success-rate denominator. Emit immediately after the analytics
|
||||
// sink is attached — before any parsing, fetching, or I/O that could throw.
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
import { mkdir, readdir, readFile, unlink, writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { z } from 'zod/v4'
|
||||
import { getCwd } from '../../utils/cwd.js'
|
||||
import { logForDebugging } from '../../utils/debug.js'
|
||||
import { lazySchema } from '../../utils/lazySchema.js'
|
||||
import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
|
||||
import { type AgentMemoryScope, getAgentMemoryDir } from './agentMemory.js'
|
||||
|
||||
const SNAPSHOT_BASE = 'agent-memory-snapshots'
|
||||
const SNAPSHOT_JSON = 'snapshot.json'
|
||||
const SYNCED_JSON = '.snapshot-synced.json'
|
||||
|
||||
const snapshotMetaSchema = lazySchema(() =>
|
||||
z.object({
|
||||
updatedAt: z.string().min(1),
|
||||
}),
|
||||
)
|
||||
|
||||
const syncedMetaSchema = lazySchema(() =>
|
||||
z.object({
|
||||
syncedFrom: z.string().min(1),
|
||||
}),
|
||||
)
|
||||
type SyncedMeta = z.infer<ReturnType<typeof syncedMetaSchema>>
|
||||
|
||||
/**
|
||||
* Returns the path to the snapshot directory for an agent in the current project.
|
||||
* e.g., <cwd>/.claude/agent-memory-snapshots/<agentType>/
|
||||
*/
|
||||
export function getSnapshotDirForAgent(agentType: string): string {
|
||||
return join(getCwd(), '.claude', SNAPSHOT_BASE, agentType)
|
||||
}
|
||||
|
||||
function getSnapshotJsonPath(agentType: string): string {
|
||||
return join(getSnapshotDirForAgent(agentType), SNAPSHOT_JSON)
|
||||
}
|
||||
|
||||
function getSyncedJsonPath(agentType: string, scope: AgentMemoryScope): string {
|
||||
return join(getAgentMemoryDir(agentType, scope), SYNCED_JSON)
|
||||
}
|
||||
|
||||
async function readJsonFile<T>(
|
||||
path: string,
|
||||
schema: z.ZodType<T>,
|
||||
): Promise<T | null> {
|
||||
try {
|
||||
const content = await readFile(path, { encoding: 'utf-8' })
|
||||
const result = schema.safeParse(jsonParse(content))
|
||||
return result.success ? result.data : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async function copySnapshotToLocal(
|
||||
agentType: string,
|
||||
scope: AgentMemoryScope,
|
||||
): Promise<void> {
|
||||
const snapshotMemDir = getSnapshotDirForAgent(agentType)
|
||||
const localMemDir = getAgentMemoryDir(agentType, scope)
|
||||
|
||||
await mkdir(localMemDir, { recursive: true })
|
||||
|
||||
try {
|
||||
const files = await readdir(snapshotMemDir, { withFileTypes: true })
|
||||
for (const dirent of files) {
|
||||
if (!dirent.isFile() || dirent.name === SNAPSHOT_JSON) continue
|
||||
const content = await readFile(join(snapshotMemDir, dirent.name), {
|
||||
encoding: 'utf-8',
|
||||
})
|
||||
await writeFile(join(localMemDir, dirent.name), content)
|
||||
}
|
||||
} catch (e) {
|
||||
logForDebugging(`Failed to copy snapshot to local agent memory: ${e}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function saveSyncedMeta(
|
||||
agentType: string,
|
||||
scope: AgentMemoryScope,
|
||||
snapshotTimestamp: string,
|
||||
): Promise<void> {
|
||||
const syncedPath = getSyncedJsonPath(agentType, scope)
|
||||
const localMemDir = getAgentMemoryDir(agentType, scope)
|
||||
await mkdir(localMemDir, { recursive: true })
|
||||
const meta: SyncedMeta = { syncedFrom: snapshotTimestamp }
|
||||
try {
|
||||
await writeFile(syncedPath, jsonStringify(meta))
|
||||
} catch (e) {
|
||||
logForDebugging(`Failed to save snapshot sync metadata: ${e}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a snapshot exists and whether it's newer than what we last synced.
|
||||
*/
|
||||
export async function checkAgentMemorySnapshot(
|
||||
agentType: string,
|
||||
scope: AgentMemoryScope,
|
||||
): Promise<{
|
||||
action: 'none' | 'initialize' | 'prompt-update'
|
||||
snapshotTimestamp?: string
|
||||
}> {
|
||||
const snapshotMeta = await readJsonFile(
|
||||
getSnapshotJsonPath(agentType),
|
||||
snapshotMetaSchema(),
|
||||
)
|
||||
|
||||
if (!snapshotMeta) {
|
||||
return { action: 'none' }
|
||||
}
|
||||
|
||||
const localMemDir = getAgentMemoryDir(agentType, scope)
|
||||
|
||||
let hasLocalMemory = false
|
||||
try {
|
||||
const dirents = await readdir(localMemDir, { withFileTypes: true })
|
||||
hasLocalMemory = dirents.some(d => d.isFile() && d.name.endsWith('.md'))
|
||||
} catch {
|
||||
// Directory doesn't exist
|
||||
}
|
||||
|
||||
if (!hasLocalMemory) {
|
||||
return { action: 'initialize', snapshotTimestamp: snapshotMeta.updatedAt }
|
||||
}
|
||||
|
||||
const syncedMeta = await readJsonFile(
|
||||
getSyncedJsonPath(agentType, scope),
|
||||
syncedMetaSchema(),
|
||||
)
|
||||
|
||||
if (
|
||||
!syncedMeta ||
|
||||
new Date(snapshotMeta.updatedAt) > new Date(syncedMeta.syncedFrom)
|
||||
) {
|
||||
return {
|
||||
action: 'prompt-update',
|
||||
snapshotTimestamp: snapshotMeta.updatedAt,
|
||||
}
|
||||
}
|
||||
|
||||
return { action: 'none' }
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize local agent memory from a snapshot (first-time setup).
|
||||
*/
|
||||
export async function initializeFromSnapshot(
|
||||
agentType: string,
|
||||
scope: AgentMemoryScope,
|
||||
snapshotTimestamp: string,
|
||||
): Promise<void> {
|
||||
logForDebugging(
|
||||
`Initializing agent memory for ${agentType} from project snapshot`,
|
||||
)
|
||||
await copySnapshotToLocal(agentType, scope)
|
||||
await saveSyncedMeta(agentType, scope, snapshotTimestamp)
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace local agent memory with the snapshot.
|
||||
*/
|
||||
export async function replaceFromSnapshot(
|
||||
agentType: string,
|
||||
scope: AgentMemoryScope,
|
||||
snapshotTimestamp: string,
|
||||
): Promise<void> {
|
||||
logForDebugging(
|
||||
`Replacing agent memory for ${agentType} with project snapshot`,
|
||||
)
|
||||
// Remove existing .md files before copying to avoid orphans
|
||||
const localMemDir = getAgentMemoryDir(agentType, scope)
|
||||
try {
|
||||
const existing = await readdir(localMemDir, { withFileTypes: true })
|
||||
for (const dirent of existing) {
|
||||
if (dirent.isFile() && dirent.name.endsWith('.md')) {
|
||||
await unlink(join(localMemDir, dirent.name))
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Directory may not exist yet
|
||||
}
|
||||
await copySnapshotToLocal(agentType, scope)
|
||||
await saveSyncedMeta(agentType, scope, snapshotTimestamp)
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the current snapshot as synced without changing local memory.
|
||||
*/
|
||||
export async function markSnapshotSynced(
|
||||
agentType: string,
|
||||
scope: AgentMemoryScope,
|
||||
snapshotTimestamp: string,
|
||||
): Promise<void> {
|
||||
await saveSyncedMeta(agentType, scope, snapshotTimestamp)
|
||||
}
|
||||
@@ -47,10 +47,6 @@ import {
|
||||
setAgentColor,
|
||||
} from './agentColorManager.js'
|
||||
import { type AgentMemoryScope, loadAgentMemoryPrompt } from './agentMemory.js'
|
||||
import {
|
||||
checkAgentMemorySnapshot,
|
||||
initializeFromSnapshot,
|
||||
} from './agentMemorySnapshot.js'
|
||||
import { getBuiltInAgents } from './builtInAgents.js'
|
||||
|
||||
// Type for MCP server specification in agent definitions
|
||||
@@ -255,41 +251,14 @@ export function filterAgentsByMcpRequirements(
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for and initialize agent memory from project snapshots.
|
||||
* For agents with memory enabled, copies snapshot to local if no local memory exists.
|
||||
* For agents with newer snapshots, logs a debug message (user prompt TODO).
|
||||
* Agent memory snapshot sync is disabled in this fork to avoid copying
|
||||
* project-scoped memory into persistent user/local agent memory.
|
||||
*/
|
||||
async function initializeAgentMemorySnapshots(
|
||||
agents: CustomAgentDefinition[],
|
||||
_agents: CustomAgentDefinition[],
|
||||
): Promise<void> {
|
||||
await Promise.all(
|
||||
agents.map(async agent => {
|
||||
if (agent.memory !== 'user') return
|
||||
const result = await checkAgentMemorySnapshot(
|
||||
agent.agentType,
|
||||
agent.memory,
|
||||
)
|
||||
switch (result.action) {
|
||||
case 'initialize':
|
||||
logForDebugging(
|
||||
`Initializing ${agent.agentType} memory from project snapshot`,
|
||||
)
|
||||
await initializeFromSnapshot(
|
||||
agent.agentType,
|
||||
agent.memory,
|
||||
result.snapshotTimestamp!,
|
||||
)
|
||||
break
|
||||
case 'prompt-update':
|
||||
agent.pendingSnapshotUpdate = {
|
||||
snapshotTimestamp: result.snapshotTimestamp!,
|
||||
}
|
||||
logForDebugging(
|
||||
`Newer snapshot available for ${agent.agentType} memory (snapshot: ${result.snapshotTimestamp})`,
|
||||
)
|
||||
break
|
||||
}
|
||||
}),
|
||||
logForDebugging(
|
||||
'[loadAgentsDir] Agent memory snapshot sync is disabled in this build',
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -57,6 +57,47 @@ function debug(msg: string): void {
|
||||
logForDebugging(`[brief:upload] ${msg}`)
|
||||
}
|
||||
|
||||
function summarizeUploadError(error: unknown): string {
|
||||
const summary: Record<string, boolean | number | string> = {}
|
||||
|
||||
if (error instanceof Error) {
|
||||
summary.errorType = error.constructor.name
|
||||
summary.errorName = error.name
|
||||
summary.hasMessage = error.message.length > 0
|
||||
} else {
|
||||
summary.errorType = typeof error
|
||||
summary.hasValue = error !== undefined && error !== null
|
||||
}
|
||||
|
||||
if (axios.isAxiosError(error)) {
|
||||
summary.errorType = 'AxiosError'
|
||||
if (error.code) {
|
||||
summary.axiosCode = error.code
|
||||
}
|
||||
if (typeof error.response?.status === 'number') {
|
||||
summary.httpStatus = error.response.status
|
||||
}
|
||||
summary.hasResponseData = error.response?.data !== undefined
|
||||
}
|
||||
|
||||
return jsonStringify(summary)
|
||||
}
|
||||
|
||||
function summarizeUploadResponse(data: unknown): string {
|
||||
if (data === undefined) return 'undefined'
|
||||
if (data === null) return 'null'
|
||||
if (Array.isArray(data)) return `array(${data.length})`
|
||||
if (typeof data === 'object') {
|
||||
return jsonStringify({
|
||||
responseType: 'object',
|
||||
keys: Object.keys(data as Record<string, unknown>)
|
||||
.sort()
|
||||
.slice(0, 10),
|
||||
})
|
||||
}
|
||||
return typeof data
|
||||
}
|
||||
|
||||
/**
|
||||
* Base URL for uploads. Must match the host the token is valid for.
|
||||
*
|
||||
@@ -100,7 +141,9 @@ export async function uploadBriefAttachment(
|
||||
if (!ctx.replBridgeEnabled) return undefined
|
||||
|
||||
if (size > MAX_UPLOAD_BYTES) {
|
||||
debug(`skip ${fullPath}: ${size} bytes exceeds ${MAX_UPLOAD_BYTES} limit`)
|
||||
debug(
|
||||
`skip attachment upload: ${size} bytes exceeds ${MAX_UPLOAD_BYTES} limit`,
|
||||
)
|
||||
return undefined
|
||||
}
|
||||
|
||||
@@ -114,7 +157,7 @@ export async function uploadBriefAttachment(
|
||||
try {
|
||||
content = await readFile(fullPath)
|
||||
} catch (e) {
|
||||
debug(`read failed for ${fullPath}: ${e}`)
|
||||
debug(`read failed before upload: ${summarizeUploadError(e)}`)
|
||||
return undefined
|
||||
}
|
||||
|
||||
@@ -150,23 +193,23 @@ export async function uploadBriefAttachment(
|
||||
|
||||
if (response.status !== 201) {
|
||||
debug(
|
||||
`upload failed for ${fullPath}: status=${response.status} body=${jsonStringify(response.data).slice(0, 200)}`,
|
||||
`upload failed: status=${response.status} response=${summarizeUploadResponse(
|
||||
response.data,
|
||||
)}`,
|
||||
)
|
||||
return undefined
|
||||
}
|
||||
|
||||
const parsed = uploadResponseSchema().safeParse(response.data)
|
||||
if (!parsed.success) {
|
||||
debug(
|
||||
`unexpected response shape for ${fullPath}: ${parsed.error.message}`,
|
||||
)
|
||||
debug('unexpected upload response shape')
|
||||
return undefined
|
||||
}
|
||||
|
||||
debug(`uploaded ${fullPath} → ${parsed.data.file_uuid} (${size} bytes)`)
|
||||
debug(`uploaded attachment (${size} bytes)`)
|
||||
return parsed.data.file_uuid
|
||||
} catch (e) {
|
||||
debug(`upload threw for ${fullPath}: ${e}`)
|
||||
debug(`upload threw: ${summarizeUploadError(e)}`)
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ import type {
|
||||
ScopedMcpServerConfig,
|
||||
} from '../../services/mcp/types.js'
|
||||
import type { Tool } from '../../Tool.js'
|
||||
import { errorMessage } from '../../utils/errors.js'
|
||||
import { lazySchema } from '../../utils/lazySchema.js'
|
||||
import { logMCPDebug, logMCPError } from '../../utils/log.js'
|
||||
import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js'
|
||||
@@ -29,9 +28,11 @@ export type McpAuthOutput = {
|
||||
authUrl?: string
|
||||
}
|
||||
|
||||
function getConfigUrl(config: ScopedMcpServerConfig): string | undefined {
|
||||
if ('url' in config) return config.url
|
||||
return undefined
|
||||
function summarizeMcpAuthToolError(error: unknown): string {
|
||||
if (error instanceof Error) {
|
||||
return `${error.name} (hasMessage=${error.message.length > 0})`
|
||||
}
|
||||
return `non-Error (${typeof error})`
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -50,12 +51,10 @@ export function createMcpAuthTool(
|
||||
serverName: string,
|
||||
config: ScopedMcpServerConfig,
|
||||
): Tool<InputSchema, McpAuthOutput> {
|
||||
const url = getConfigUrl(config)
|
||||
const transport = config.type ?? 'stdio'
|
||||
const location = url ? `${transport} at ${url}` : transport
|
||||
|
||||
const description =
|
||||
`The \`${serverName}\` MCP server (${location}) is installed but requires authentication. ` +
|
||||
`The \`${serverName}\` MCP server (${transport}) is installed but requires authentication. ` +
|
||||
`Call this tool to start the OAuth flow — you'll receive an authorization URL to share with the user. ` +
|
||||
`Once the user completes authorization in their browser, the server's real tools will become available automatically.`
|
||||
|
||||
@@ -167,7 +166,9 @@ export function createMcpAuthTool(
|
||||
.catch(err => {
|
||||
logMCPError(
|
||||
serverName,
|
||||
`OAuth flow failed after tool-triggered start: ${errorMessage(err)}`,
|
||||
`OAuth flow failed after tool-triggered start: ${summarizeMcpAuthToolError(
|
||||
err,
|
||||
)}`,
|
||||
)
|
||||
})
|
||||
|
||||
@@ -199,7 +200,7 @@ export function createMcpAuthTool(
|
||||
return {
|
||||
data: {
|
||||
status: 'error' as const,
|
||||
message: `Failed to start OAuth flow for ${serverName}: ${errorMessage(err)}. Ask the user to run /mcp and authenticate manually.`,
|
||||
message: `Failed to start OAuth flow for ${serverName}. Ask the user to run /mcp and authenticate manually.`,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { z } from 'zod/v4'
|
||||
import { getSessionId } from '../../bootstrap/state.js'
|
||||
import { logEvent } from '../../services/analytics/index.js'
|
||||
import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../../services/analytics/metadata.js'
|
||||
import type { Tool } from '../../Tool.js'
|
||||
@@ -159,7 +158,6 @@ export const TeamCreateTool: Tool<InputSchema, Output> = buildTool({
|
||||
description: _description,
|
||||
createdAt: Date.now(),
|
||||
leadAgentId,
|
||||
leadSessionId: getSessionId(), // Store actual session ID for team discovery
|
||||
members: [
|
||||
{
|
||||
agentId: leadAgentId,
|
||||
@@ -169,7 +167,6 @@ export const TeamCreateTool: Tool<InputSchema, Output> = buildTool({
|
||||
joinedAt: Date.now(),
|
||||
tmuxPaneId: '',
|
||||
cwd: getCwd(),
|
||||
subscriptions: [],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
@@ -497,13 +497,11 @@ async function handleSpawnSplitPane(
|
||||
name: sanitizedName,
|
||||
agentType: agent_type,
|
||||
model,
|
||||
prompt,
|
||||
color: teammateColor,
|
||||
planModeRequired: plan_mode_required,
|
||||
joinedAt: Date.now(),
|
||||
tmuxPaneId: paneId,
|
||||
cwd: workingDir,
|
||||
subscriptions: [],
|
||||
backendType: detectionResult.backend.type,
|
||||
})
|
||||
await writeTeamFileAsync(teamName, teamFile)
|
||||
@@ -711,13 +709,11 @@ async function handleSpawnSeparateWindow(
|
||||
name: sanitizedName,
|
||||
agentType: agent_type,
|
||||
model,
|
||||
prompt,
|
||||
color: teammateColor,
|
||||
planModeRequired: plan_mode_required,
|
||||
joinedAt: Date.now(),
|
||||
tmuxPaneId: paneId,
|
||||
cwd: workingDir,
|
||||
subscriptions: [],
|
||||
backendType: 'tmux', // This handler always uses tmux directly
|
||||
})
|
||||
await writeTeamFileAsync(teamName, teamFile)
|
||||
@@ -997,13 +993,11 @@ async function handleSpawnInProcess(
|
||||
name: sanitizedName,
|
||||
agentType: agent_type,
|
||||
model,
|
||||
prompt,
|
||||
color: teammateColor,
|
||||
planModeRequired: plan_mode_required,
|
||||
joinedAt: Date.now(),
|
||||
tmuxPaneId: 'in-process',
|
||||
cwd: getCwd(),
|
||||
subscriptions: [],
|
||||
backendType: 'in-process',
|
||||
})
|
||||
await writeTeamFileAsync(teamName, teamFile)
|
||||
|
||||
@@ -40,7 +40,7 @@ export function maybePersistTokenForSubprocesses(
|
||||
mkdirSync(CCR_TOKEN_DIR, { recursive: true, mode: 0o700 })
|
||||
// eslint-disable-next-line custom-rules/no-sync-fs -- one-shot startup write in CCR, caller is sync
|
||||
writeFileSync(path, token, { encoding: 'utf8', mode: 0o600 })
|
||||
logForDebugging(`Persisted ${tokenName} to ${path} for subprocess access`)
|
||||
logForDebugging(`Persisted ${tokenName} for subprocess access`)
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`Failed to persist ${tokenName} to disk (non-fatal): ${errorMessage(error)}`,
|
||||
@@ -65,7 +65,7 @@ export function readTokenFromWellKnownFile(
|
||||
if (!token) {
|
||||
return null
|
||||
}
|
||||
logForDebugging(`Read ${tokenName} from well-known file ${path}`)
|
||||
logForDebugging(`Read ${tokenName} from well-known file`)
|
||||
return token
|
||||
} catch (error) {
|
||||
// ENOENT is the expected outcome outside CCR — stay silent. Anything
|
||||
@@ -73,7 +73,7 @@ export function readTokenFromWellKnownFile(
|
||||
// debug log so subprocess auth failures aren't mysterious.
|
||||
if (!isENOENT(error)) {
|
||||
logForDebugging(
|
||||
`Failed to read ${tokenName} from ${path}: ${errorMessage(error)}`,
|
||||
`Failed to read ${tokenName} from well-known file: ${errorMessage(error)}`,
|
||||
{ level: 'debug' },
|
||||
)
|
||||
}
|
||||
@@ -124,7 +124,7 @@ function getCredentialFromFd({
|
||||
const fd = parseInt(fdEnv, 10)
|
||||
if (Number.isNaN(fd)) {
|
||||
logForDebugging(
|
||||
`${envVar} must be a valid file descriptor number, got: ${fdEnv}`,
|
||||
`${envVar} must be a valid file descriptor number`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
setCached(null)
|
||||
@@ -148,13 +148,13 @@ function getCredentialFromFd({
|
||||
setCached(null)
|
||||
return null
|
||||
}
|
||||
logForDebugging(`Successfully read ${label} from file descriptor ${fd}`)
|
||||
logForDebugging(`Successfully read ${label} from file descriptor`)
|
||||
setCached(token)
|
||||
maybePersistTokenForSubprocesses(wellKnownPath, token, label)
|
||||
return token
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`Failed to read ${label} from file descriptor ${fd}: ${errorMessage(error)}`,
|
||||
`Failed to read ${label} from file descriptor: ${errorMessage(error)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
// FD env var was set but read failed — typically a subprocess that
|
||||
|
||||
@@ -17,12 +17,19 @@ import {
|
||||
filterExistingPaths,
|
||||
getKnownPathsForRepo,
|
||||
} from '../githubRepoPathMapping.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
import { readLastFetchTime } from './banner.js'
|
||||
import { parseDeepLink } from './parseDeepLink.js'
|
||||
import { MACOS_BUNDLE_ID } from './registerProtocol.js'
|
||||
import { launchInTerminal } from './terminalLauncher.js'
|
||||
|
||||
function summarizeDeepLinkAction(action: {
|
||||
query?: string
|
||||
cwd?: string
|
||||
repo?: string
|
||||
}): string {
|
||||
return `hasQuery=${Boolean(action.query)} hasCwd=${Boolean(action.cwd)} hasRepo=${Boolean(action.repo)}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle an incoming deep link URI.
|
||||
*
|
||||
@@ -34,7 +41,7 @@ import { launchInTerminal } from './terminalLauncher.js'
|
||||
* @returns exit code (0 = success)
|
||||
*/
|
||||
export async function handleDeepLinkUri(uri: string): Promise<number> {
|
||||
logForDebugging(`Handling deep link URI: ${uri}`)
|
||||
logForDebugging('Handling deep link URI')
|
||||
|
||||
let action
|
||||
try {
|
||||
@@ -46,7 +53,7 @@ export async function handleDeepLinkUri(uri: string): Promise<number> {
|
||||
return 1
|
||||
}
|
||||
|
||||
logForDebugging(`Parsed deep link action: ${jsonStringify(action)}`)
|
||||
logForDebugging(`Parsed deep link action (${summarizeDeepLinkAction(action)})`)
|
||||
|
||||
// Always the running executable — no PATH lookup. The OS launched us via
|
||||
// an absolute path (bundle symlink / .desktop Exec= / registry command)
|
||||
@@ -125,11 +132,11 @@ async function resolveCwd(action: {
|
||||
const known = getKnownPathsForRepo(action.repo)
|
||||
const existing = await filterExistingPaths(known)
|
||||
if (existing[0]) {
|
||||
logForDebugging(`Resolved repo ${action.repo} → ${existing[0]}`)
|
||||
logForDebugging('Resolved repo deep link to local clone')
|
||||
return { cwd: existing[0], resolvedRepo: action.repo }
|
||||
}
|
||||
logForDebugging(
|
||||
`No local clone found for repo ${action.repo}, falling back to home`,
|
||||
'No local clone found for repo deep link, falling back to home',
|
||||
)
|
||||
}
|
||||
return { cwd: homedir() }
|
||||
|
||||
@@ -116,7 +116,6 @@ function appendToLog(path: string, message: object): void {
|
||||
const messageWithTimestamp = {
|
||||
timestamp: new Date().toISOString(),
|
||||
...message,
|
||||
cwd: getFsImplementation().cwd(),
|
||||
userType: process.env.USER_TYPE,
|
||||
sessionId: getSessionId(),
|
||||
version: MACRO.VERSION,
|
||||
@@ -125,25 +124,12 @@ function appendToLog(path: string, message: object): void {
|
||||
getLogWriter(path).write(messageWithTimestamp)
|
||||
}
|
||||
|
||||
function extractServerMessage(data: unknown): string | undefined {
|
||||
if (typeof data === 'string') {
|
||||
return data
|
||||
function summarizeUrlForLogs(url: string): string | undefined {
|
||||
try {
|
||||
return new URL(url).host || undefined
|
||||
} catch {
|
||||
return undefined
|
||||
}
|
||||
if (data && typeof data === 'object') {
|
||||
const obj = data as Record<string, unknown>
|
||||
if (typeof obj.message === 'string') {
|
||||
return obj.message
|
||||
}
|
||||
if (
|
||||
typeof obj.error === 'object' &&
|
||||
obj.error &&
|
||||
'message' in obj.error &&
|
||||
typeof (obj.error as Record<string, unknown>).message === 'string'
|
||||
) {
|
||||
return (obj.error as Record<string, unknown>).message as string
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -155,15 +141,15 @@ function logErrorImpl(error: Error): void {
|
||||
// Enrich axios errors with request URL, status, and server message for debugging
|
||||
let context = ''
|
||||
if (axios.isAxiosError(error) && error.config?.url) {
|
||||
const parts = [`url=${error.config.url}`]
|
||||
const parts: string[] = []
|
||||
const host = summarizeUrlForLogs(error.config.url)
|
||||
if (host) {
|
||||
parts.push(`host=${host}`)
|
||||
}
|
||||
if (error.response?.status !== undefined) {
|
||||
parts.push(`status=${error.response.status}`)
|
||||
}
|
||||
const serverMessage = extractServerMessage(error.response?.data)
|
||||
if (serverMessage) {
|
||||
parts.push(`body=${serverMessage}`)
|
||||
}
|
||||
context = `[${parts.join(',')}] `
|
||||
context = parts.length > 0 ? `[${parts.join(',')}] ` : ''
|
||||
}
|
||||
|
||||
logForDebugging(`${error.name}: ${context}${errorStr}`, { level: 'error' })
|
||||
@@ -188,7 +174,6 @@ function logMCPErrorImpl(serverName: string, error: unknown): void {
|
||||
error: errorStr,
|
||||
timestamp: new Date().toISOString(),
|
||||
sessionId: getSessionId(),
|
||||
cwd: getFsImplementation().cwd(),
|
||||
}
|
||||
|
||||
getLogWriter(logFile).write(errorInfo)
|
||||
@@ -206,7 +191,6 @@ function logMCPDebugImpl(serverName: string, message: string): void {
|
||||
debug: message,
|
||||
timestamp: new Date().toISOString(),
|
||||
sessionId: getSessionId(),
|
||||
cwd: getFsImplementation().cwd(),
|
||||
}
|
||||
|
||||
getLogWriter(logFile).write(debugInfo)
|
||||
|
||||
@@ -301,7 +301,7 @@ export const setupGracefulShutdown = memoize(() => {
|
||||
process.on('uncaughtException', error => {
|
||||
logForDiagnosticsNoPII('error', 'uncaught_exception', {
|
||||
error_name: error.name,
|
||||
error_message: error.message.slice(0, 2000),
|
||||
has_message: error.message.length > 0,
|
||||
})
|
||||
logEvent('tengu_uncaught_exception', {
|
||||
error_name:
|
||||
@@ -321,10 +321,10 @@ export const setupGracefulShutdown = memoize(() => {
|
||||
reason instanceof Error
|
||||
? {
|
||||
error_name: reason.name,
|
||||
error_message: reason.message.slice(0, 2000),
|
||||
error_stack: reason.stack?.slice(0, 4000),
|
||||
has_message: reason.message.length > 0,
|
||||
has_stack: Boolean(reason.stack),
|
||||
}
|
||||
: { error_message: String(reason).slice(0, 2000) }
|
||||
: { reason_type: typeof reason }
|
||||
logForDiagnosticsNoPII('error', 'unhandled_rejection', errorInfo)
|
||||
logEvent('tengu_unhandled_rejection', {
|
||||
error_name:
|
||||
|
||||
@@ -78,13 +78,13 @@ export async function getSessionEnvironmentScript(): Promise<string | null> {
|
||||
if (envScript) {
|
||||
scripts.push(envScript)
|
||||
logForDebugging(
|
||||
`Session environment loaded from CLAUDE_ENV_FILE: ${envFile} (${envScript.length} chars)`,
|
||||
`Session environment loaded from CLAUDE_ENV_FILE (${envScript.length} chars)`,
|
||||
)
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code !== 'ENOENT') {
|
||||
logForDebugging(`Failed to read CLAUDE_ENV_FILE: ${errorMessage(e)}`)
|
||||
logForDebugging('Failed to read CLAUDE_ENV_FILE')
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -109,9 +109,7 @@ export async function getSessionEnvironmentScript(): Promise<string | null> {
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code !== 'ENOENT') {
|
||||
logForDebugging(
|
||||
`Failed to read hook file ${filePath}: ${errorMessage(e)}`,
|
||||
)
|
||||
logForDebugging(`Failed to read hook env file ${file}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ function getTokenFromFileDescriptor(): string | null {
|
||||
const fd = parseInt(fdEnv, 10)
|
||||
if (Number.isNaN(fd)) {
|
||||
logForDebugging(
|
||||
`CLAUDE_CODE_WEBSOCKET_AUTH_FILE_DESCRIPTOR must be a valid file descriptor number, got: ${fdEnv}`,
|
||||
'CLAUDE_CODE_WEBSOCKET_AUTH_FILE_DESCRIPTOR must be a valid file descriptor number',
|
||||
{ level: 'error' },
|
||||
)
|
||||
setSessionIngressToken(null)
|
||||
@@ -61,7 +61,7 @@ function getTokenFromFileDescriptor(): string | null {
|
||||
setSessionIngressToken(null)
|
||||
return null
|
||||
}
|
||||
logForDebugging(`Successfully read token from file descriptor ${fd}`)
|
||||
logForDebugging('Successfully read token from file descriptor')
|
||||
setSessionIngressToken(token)
|
||||
maybePersistTokenForSubprocesses(
|
||||
CCR_SESSION_INGRESS_TOKEN_PATH,
|
||||
@@ -71,7 +71,7 @@ function getTokenFromFileDescriptor(): string | null {
|
||||
return token
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`Failed to read token from file descriptor ${fd}: ${errorMessage(error)}`,
|
||||
`Failed to read token from file descriptor: ${errorMessage(error)}`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
// FD env var was set but read failed — typically a subprocess that
|
||||
|
||||
@@ -1344,7 +1344,11 @@ class Project {
|
||||
|
||||
setRemoteIngressUrl(url: string): void {
|
||||
this.remoteIngressUrl = url
|
||||
logForDebugging(`Remote persistence enabled with URL: ${url}`)
|
||||
logForDebugging(
|
||||
url
|
||||
? 'Remote persistence enabled (remote ingress configured)'
|
||||
: 'Remote persistence disabled',
|
||||
)
|
||||
if (url) {
|
||||
// If using CCR, don't delay messages by any more than 10ms.
|
||||
this.FLUSH_INTERVAL_MS = REMOTE_FLUSH_INTERVAL_MS
|
||||
|
||||
@@ -2,10 +2,9 @@ import { initializeAnalyticsSink } from '../services/analytics/sink.js'
|
||||
import { initializeErrorLogSink } from './errorLogSink.js'
|
||||
|
||||
/**
|
||||
* Attach error log and analytics sinks, draining any events queued before
|
||||
* attachment. Both inits are idempotent. Called from setup() for the default
|
||||
* command; other entrypoints (subcommands, daemon, bridge) call this directly
|
||||
* since they bypass setup().
|
||||
* Attach error log and analytics compatibility sinks. Both inits are
|
||||
* idempotent. Called from setup() for the default command; other entrypoints
|
||||
* (subcommands, daemon, bridge) call this directly since they bypass setup().
|
||||
*
|
||||
* Leaf module — kept out of setup.ts to avoid the setup → commands → bridge
|
||||
* → setup import cycle.
|
||||
|
||||
@@ -18,16 +18,10 @@
|
||||
* 6. Worker polls mailbox for responses and continues execution
|
||||
*/
|
||||
|
||||
import { mkdir, readdir, readFile, unlink, writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { z } from 'zod/v4'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { getErrnoCode } from '../errors.js'
|
||||
import { lazySchema } from '../lazySchema.js'
|
||||
import * as lockfile from '../lockfile.js'
|
||||
import { logError } from '../log.js'
|
||||
import type { PermissionUpdate } from '../permissions/PermissionUpdateSchema.js'
|
||||
import { jsonParse, jsonStringify } from '../slowOperations.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
import {
|
||||
getAgentId,
|
||||
getAgentName,
|
||||
@@ -41,53 +35,44 @@ import {
|
||||
createSandboxPermissionResponseMessage,
|
||||
writeToMailbox,
|
||||
} from '../teammateMailbox.js'
|
||||
import { getTeamDir, readTeamFileAsync } from './teamHelpers.js'
|
||||
import { readTeamFileAsync } from './teamHelpers.js'
|
||||
|
||||
/**
|
||||
* Full request schema for a permission request from a worker to the leader
|
||||
*/
|
||||
export const SwarmPermissionRequestSchema = lazySchema(() =>
|
||||
z.object({
|
||||
/** Unique identifier for this request */
|
||||
id: z.string(),
|
||||
/** Worker's CLAUDE_CODE_AGENT_ID */
|
||||
workerId: z.string(),
|
||||
/** Worker's CLAUDE_CODE_AGENT_NAME */
|
||||
workerName: z.string(),
|
||||
/** Worker's CLAUDE_CODE_AGENT_COLOR */
|
||||
workerColor: z.string().optional(),
|
||||
/** Team name for routing */
|
||||
teamName: z.string(),
|
||||
/** Tool name requiring permission (e.g., "Bash", "Edit") */
|
||||
toolName: z.string(),
|
||||
/** Original toolUseID from worker's context */
|
||||
toolUseId: z.string(),
|
||||
/** Human-readable description of the tool use */
|
||||
description: z.string(),
|
||||
/** Serialized tool input */
|
||||
input: z.record(z.string(), z.unknown()),
|
||||
/** Suggested permission rules from the permission result */
|
||||
permissionSuggestions: z.array(z.unknown()),
|
||||
/** Status of the request */
|
||||
status: z.enum(['pending', 'approved', 'rejected']),
|
||||
/** Who resolved the request */
|
||||
resolvedBy: z.enum(['worker', 'leader']).optional(),
|
||||
/** Timestamp when resolved */
|
||||
resolvedAt: z.number().optional(),
|
||||
/** Rejection feedback message */
|
||||
feedback: z.string().optional(),
|
||||
/** Modified input if changed by resolver */
|
||||
updatedInput: z.record(z.string(), z.unknown()).optional(),
|
||||
/** "Always allow" rules applied during resolution */
|
||||
permissionUpdates: z.array(z.unknown()).optional(),
|
||||
/** Timestamp when request was created */
|
||||
createdAt: z.number(),
|
||||
}),
|
||||
)
|
||||
|
||||
export type SwarmPermissionRequest = z.infer<
|
||||
ReturnType<typeof SwarmPermissionRequestSchema>
|
||||
>
|
||||
export type SwarmPermissionRequest = {
|
||||
/** Unique identifier for this request */
|
||||
id: string
|
||||
/** Worker's CLAUDE_CODE_AGENT_ID */
|
||||
workerId: string
|
||||
/** Worker's CLAUDE_CODE_AGENT_NAME */
|
||||
workerName: string
|
||||
/** Worker's CLAUDE_CODE_AGENT_COLOR */
|
||||
workerColor?: string
|
||||
/** Team name for routing */
|
||||
teamName: string
|
||||
/** Tool name requiring permission (e.g., "Bash", "Edit") */
|
||||
toolName: string
|
||||
/** Original toolUseID from worker's context */
|
||||
toolUseId: string
|
||||
/** Human-readable description of the tool use */
|
||||
description: string
|
||||
/** Serialized tool input */
|
||||
input: Record<string, unknown>
|
||||
/** Suggested permission rules from the permission result */
|
||||
permissionSuggestions: unknown[]
|
||||
/** Status of the request */
|
||||
status: 'pending' | 'approved' | 'rejected'
|
||||
/** Who resolved the request */
|
||||
resolvedBy?: 'worker' | 'leader'
|
||||
/** Timestamp when resolved */
|
||||
resolvedAt?: number
|
||||
/** Rejection feedback message */
|
||||
feedback?: string
|
||||
/** Modified input if changed by resolver */
|
||||
updatedInput?: Record<string, unknown>
|
||||
/** "Always allow" rules applied during resolution */
|
||||
permissionUpdates?: unknown[]
|
||||
/** Timestamp when request was created */
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolution data returned when leader/worker resolves a request
|
||||
@@ -105,55 +90,6 @@ export type PermissionResolution = {
|
||||
permissionUpdates?: PermissionUpdate[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base directory for a team's permission requests
|
||||
* Path: ~/.claude/teams/{teamName}/permissions/
|
||||
*/
|
||||
export function getPermissionDir(teamName: string): string {
|
||||
return join(getTeamDir(teamName), 'permissions')
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the pending directory for a team
|
||||
*/
|
||||
function getPendingDir(teamName: string): string {
|
||||
return join(getPermissionDir(teamName), 'pending')
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the resolved directory for a team
|
||||
*/
|
||||
function getResolvedDir(teamName: string): string {
|
||||
return join(getPermissionDir(teamName), 'resolved')
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the permissions directory structure exists (async)
|
||||
*/
|
||||
async function ensurePermissionDirsAsync(teamName: string): Promise<void> {
|
||||
const permDir = getPermissionDir(teamName)
|
||||
const pendingDir = getPendingDir(teamName)
|
||||
const resolvedDir = getResolvedDir(teamName)
|
||||
|
||||
for (const dir of [permDir, pendingDir, resolvedDir]) {
|
||||
await mkdir(dir, { recursive: true })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to a pending request file
|
||||
*/
|
||||
function getPendingRequestPath(teamName: string, requestId: string): string {
|
||||
return join(getPendingDir(teamName), `${requestId}.json`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to a resolved request file
|
||||
*/
|
||||
function getResolvedRequestPath(teamName: string, requestId: string): string {
|
||||
return join(getResolvedDir(teamName), `${requestId}.json`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique request ID
|
||||
*/
|
||||
@@ -206,375 +142,6 @@ export function createPermissionRequest(params: {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a permission request to the pending directory with file locking
|
||||
* Called by worker agents when they need permission approval from the leader
|
||||
*
|
||||
* @returns The written request
|
||||
*/
|
||||
export async function writePermissionRequest(
|
||||
request: SwarmPermissionRequest,
|
||||
): Promise<SwarmPermissionRequest> {
|
||||
await ensurePermissionDirsAsync(request.teamName)
|
||||
|
||||
const pendingPath = getPendingRequestPath(request.teamName, request.id)
|
||||
const lockDir = getPendingDir(request.teamName)
|
||||
|
||||
// Create a directory-level lock file for atomic writes
|
||||
const lockFilePath = join(lockDir, '.lock')
|
||||
await writeFile(lockFilePath, '', 'utf-8')
|
||||
|
||||
let release: (() => Promise<void>) | undefined
|
||||
try {
|
||||
release = await lockfile.lock(lockFilePath)
|
||||
|
||||
// Write the request file
|
||||
await writeFile(pendingPath, jsonStringify(request, null, 2), 'utf-8')
|
||||
|
||||
logForDebugging(
|
||||
`[PermissionSync] Wrote pending request ${request.id} from ${request.workerName} for ${request.toolName}`,
|
||||
)
|
||||
|
||||
return request
|
||||
} catch (error) {
|
||||
logForDebugging(
|
||||
`[PermissionSync] Failed to write permission request: ${error}`,
|
||||
)
|
||||
logError(error)
|
||||
throw error
|
||||
} finally {
|
||||
if (release) {
|
||||
await release()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all pending permission requests for a team
|
||||
* Called by the team leader to see what requests need attention
|
||||
*/
|
||||
export async function readPendingPermissions(
|
||||
teamName?: string,
|
||||
): Promise<SwarmPermissionRequest[]> {
|
||||
const team = teamName || getTeamName()
|
||||
if (!team) {
|
||||
logForDebugging('[PermissionSync] No team name available')
|
||||
return []
|
||||
}
|
||||
|
||||
const pendingDir = getPendingDir(team)
|
||||
|
||||
let files: string[]
|
||||
try {
|
||||
files = await readdir(pendingDir)
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code === 'ENOENT') {
|
||||
return []
|
||||
}
|
||||
logForDebugging(`[PermissionSync] Failed to read pending requests: ${e}`)
|
||||
logError(e)
|
||||
return []
|
||||
}
|
||||
|
||||
const jsonFiles = files.filter(f => f.endsWith('.json') && f !== '.lock')
|
||||
|
||||
const results = await Promise.all(
|
||||
jsonFiles.map(async file => {
|
||||
const filePath = join(pendingDir, file)
|
||||
try {
|
||||
const content = await readFile(filePath, 'utf-8')
|
||||
const parsed = SwarmPermissionRequestSchema().safeParse(
|
||||
jsonParse(content),
|
||||
)
|
||||
if (parsed.success) {
|
||||
return parsed.data
|
||||
}
|
||||
logForDebugging(
|
||||
`[PermissionSync] Invalid request file ${file}: ${parsed.error.message}`,
|
||||
)
|
||||
return null
|
||||
} catch (err) {
|
||||
logForDebugging(
|
||||
`[PermissionSync] Failed to read request file ${file}: ${err}`,
|
||||
)
|
||||
return null
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
const requests = results.filter(r => r !== null)
|
||||
|
||||
// Sort by creation time (oldest first)
|
||||
requests.sort((a, b) => a.createdAt - b.createdAt)
|
||||
|
||||
return requests
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a resolved permission request by ID
|
||||
* Called by workers to check if their request has been resolved
|
||||
*
|
||||
* @returns The resolved request, or null if not yet resolved
|
||||
*/
|
||||
export async function readResolvedPermission(
|
||||
requestId: string,
|
||||
teamName?: string,
|
||||
): Promise<SwarmPermissionRequest | null> {
|
||||
const team = teamName || getTeamName()
|
||||
if (!team) {
|
||||
return null
|
||||
}
|
||||
|
||||
const resolvedPath = getResolvedRequestPath(team, requestId)
|
||||
|
||||
try {
|
||||
const content = await readFile(resolvedPath, 'utf-8')
|
||||
const parsed = SwarmPermissionRequestSchema().safeParse(jsonParse(content))
|
||||
if (parsed.success) {
|
||||
return parsed.data
|
||||
}
|
||||
logForDebugging(
|
||||
`[PermissionSync] Invalid resolved request ${requestId}: ${parsed.error.message}`,
|
||||
)
|
||||
return null
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code === 'ENOENT') {
|
||||
return null
|
||||
}
|
||||
logForDebugging(
|
||||
`[PermissionSync] Failed to read resolved request ${requestId}: ${e}`,
|
||||
)
|
||||
logError(e)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a permission request
|
||||
* Called by the team leader (or worker in self-resolution cases)
|
||||
*
|
||||
* Writes the resolution to resolved/, removes from pending/
|
||||
*/
|
||||
export async function resolvePermission(
|
||||
requestId: string,
|
||||
resolution: PermissionResolution,
|
||||
teamName?: string,
|
||||
): Promise<boolean> {
|
||||
const team = teamName || getTeamName()
|
||||
if (!team) {
|
||||
logForDebugging('[PermissionSync] No team name available')
|
||||
return false
|
||||
}
|
||||
|
||||
await ensurePermissionDirsAsync(team)
|
||||
|
||||
const pendingPath = getPendingRequestPath(team, requestId)
|
||||
const resolvedPath = getResolvedRequestPath(team, requestId)
|
||||
const lockFilePath = join(getPendingDir(team), '.lock')
|
||||
|
||||
await writeFile(lockFilePath, '', 'utf-8')
|
||||
|
||||
let release: (() => Promise<void>) | undefined
|
||||
try {
|
||||
release = await lockfile.lock(lockFilePath)
|
||||
|
||||
// Read the pending request
|
||||
let content: string
|
||||
try {
|
||||
content = await readFile(pendingPath, 'utf-8')
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code === 'ENOENT') {
|
||||
logForDebugging(
|
||||
`[PermissionSync] Pending request not found: ${requestId}`,
|
||||
)
|
||||
return false
|
||||
}
|
||||
throw e
|
||||
}
|
||||
|
||||
const parsed = SwarmPermissionRequestSchema().safeParse(jsonParse(content))
|
||||
if (!parsed.success) {
|
||||
logForDebugging(
|
||||
`[PermissionSync] Invalid pending request ${requestId}: ${parsed.error.message}`,
|
||||
)
|
||||
return false
|
||||
}
|
||||
|
||||
const request = parsed.data
|
||||
|
||||
// Update the request with resolution data
|
||||
const resolvedRequest: SwarmPermissionRequest = {
|
||||
...request,
|
||||
status: resolution.decision === 'approved' ? 'approved' : 'rejected',
|
||||
resolvedBy: resolution.resolvedBy,
|
||||
resolvedAt: Date.now(),
|
||||
feedback: resolution.feedback,
|
||||
updatedInput: resolution.updatedInput,
|
||||
permissionUpdates: resolution.permissionUpdates,
|
||||
}
|
||||
|
||||
// Write to resolved directory
|
||||
await writeFile(
|
||||
resolvedPath,
|
||||
jsonStringify(resolvedRequest, null, 2),
|
||||
'utf-8',
|
||||
)
|
||||
|
||||
// Remove from pending directory
|
||||
await unlink(pendingPath)
|
||||
|
||||
logForDebugging(
|
||||
`[PermissionSync] Resolved request ${requestId} with ${resolution.decision}`,
|
||||
)
|
||||
|
||||
return true
|
||||
} catch (error) {
|
||||
logForDebugging(`[PermissionSync] Failed to resolve request: ${error}`)
|
||||
logError(error)
|
||||
return false
|
||||
} finally {
|
||||
if (release) {
|
||||
await release()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up old resolved permission files
|
||||
* Called periodically to prevent file accumulation
|
||||
*
|
||||
* @param teamName - Team name
|
||||
* @param maxAgeMs - Maximum age in milliseconds (default: 1 hour)
|
||||
*/
|
||||
export async function cleanupOldResolutions(
|
||||
teamName?: string,
|
||||
maxAgeMs = 3600000,
|
||||
): Promise<number> {
|
||||
const team = teamName || getTeamName()
|
||||
if (!team) {
|
||||
return 0
|
||||
}
|
||||
|
||||
const resolvedDir = getResolvedDir(team)
|
||||
|
||||
let files: string[]
|
||||
try {
|
||||
files = await readdir(resolvedDir)
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code === 'ENOENT') {
|
||||
return 0
|
||||
}
|
||||
logForDebugging(`[PermissionSync] Failed to cleanup resolutions: ${e}`)
|
||||
logError(e)
|
||||
return 0
|
||||
}
|
||||
|
||||
const now = Date.now()
|
||||
const jsonFiles = files.filter(f => f.endsWith('.json'))
|
||||
|
||||
const cleanupResults = await Promise.all(
|
||||
jsonFiles.map(async file => {
|
||||
const filePath = join(resolvedDir, file)
|
||||
try {
|
||||
const content = await readFile(filePath, 'utf-8')
|
||||
const request = jsonParse(content) as SwarmPermissionRequest
|
||||
|
||||
// Check if the resolution is old enough to clean up
|
||||
// Use >= to handle edge case where maxAgeMs is 0 (clean up everything)
|
||||
const resolvedAt = request.resolvedAt || request.createdAt
|
||||
if (now - resolvedAt >= maxAgeMs) {
|
||||
await unlink(filePath)
|
||||
logForDebugging(`[PermissionSync] Cleaned up old resolution: ${file}`)
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
} catch {
|
||||
// If we can't parse it, clean it up anyway
|
||||
try {
|
||||
await unlink(filePath)
|
||||
return 1
|
||||
} catch {
|
||||
// Ignore deletion errors
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
const cleanedCount = cleanupResults.reduce<number>((sum, n) => sum + n, 0)
|
||||
|
||||
if (cleanedCount > 0) {
|
||||
logForDebugging(
|
||||
`[PermissionSync] Cleaned up ${cleanedCount} old resolutions`,
|
||||
)
|
||||
}
|
||||
|
||||
return cleanedCount
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy response type for worker polling
|
||||
* Used for backward compatibility with worker integration code
|
||||
*/
|
||||
export type PermissionResponse = {
|
||||
/** ID of the request this responds to */
|
||||
requestId: string
|
||||
/** Decision: approved or denied */
|
||||
decision: 'approved' | 'denied'
|
||||
/** Timestamp when response was created */
|
||||
timestamp: string
|
||||
/** Optional feedback message if denied */
|
||||
feedback?: string
|
||||
/** Optional updated input if the resolver modified it */
|
||||
updatedInput?: Record<string, unknown>
|
||||
/** Permission updates to apply (e.g., "always allow" rules) */
|
||||
permissionUpdates?: unknown[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll for a permission response (worker-side convenience function)
|
||||
* Converts the resolved request into a simpler response format
|
||||
*
|
||||
* @returns The permission response, or null if not yet resolved
|
||||
*/
|
||||
export async function pollForResponse(
|
||||
requestId: string,
|
||||
_agentName?: string,
|
||||
teamName?: string,
|
||||
): Promise<PermissionResponse | null> {
|
||||
const resolved = await readResolvedPermission(requestId, teamName)
|
||||
if (!resolved) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
requestId: resolved.id,
|
||||
decision: resolved.status === 'approved' ? 'approved' : 'denied',
|
||||
timestamp: resolved.resolvedAt
|
||||
? new Date(resolved.resolvedAt).toISOString()
|
||||
: new Date(resolved.createdAt).toISOString(),
|
||||
feedback: resolved.feedback,
|
||||
updatedInput: resolved.updatedInput,
|
||||
permissionUpdates: resolved.permissionUpdates,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a worker's response after processing
|
||||
* This is an alias for deleteResolvedPermission for backward compatibility
|
||||
*/
|
||||
export async function removeWorkerResponse(
|
||||
requestId: string,
|
||||
_agentName?: string,
|
||||
teamName?: string,
|
||||
): Promise<void> {
|
||||
await deleteResolvedPermission(requestId, teamName)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the current agent is a team leader
|
||||
*/
|
||||
@@ -600,46 +167,6 @@ export function isSwarmWorker(): boolean {
|
||||
return !!teamName && !!agentId && !isTeamLeader()
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a resolved permission file
|
||||
* Called after a worker has processed the resolution
|
||||
*/
|
||||
export async function deleteResolvedPermission(
|
||||
requestId: string,
|
||||
teamName?: string,
|
||||
): Promise<boolean> {
|
||||
const team = teamName || getTeamName()
|
||||
if (!team) {
|
||||
return false
|
||||
}
|
||||
|
||||
const resolvedPath = getResolvedRequestPath(team, requestId)
|
||||
|
||||
try {
|
||||
await unlink(resolvedPath)
|
||||
logForDebugging(
|
||||
`[PermissionSync] Deleted resolved permission: ${requestId}`,
|
||||
)
|
||||
return true
|
||||
} catch (e: unknown) {
|
||||
const code = getErrnoCode(e)
|
||||
if (code === 'ENOENT') {
|
||||
return false
|
||||
}
|
||||
logForDebugging(
|
||||
`[PermissionSync] Failed to delete resolved permission: ${e}`,
|
||||
)
|
||||
logError(e)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit a permission request (alias for writePermissionRequest)
|
||||
* Provided for backward compatibility with worker integration code
|
||||
*/
|
||||
export const submitPermissionRequest = writePermissionRequest
|
||||
|
||||
// ============================================================================
|
||||
// Mailbox-Based Permission System
|
||||
// ============================================================================
|
||||
|
||||
@@ -66,7 +66,6 @@ export type TeamFile = {
|
||||
description?: string
|
||||
createdAt: number
|
||||
leadAgentId: string
|
||||
leadSessionId?: string // Actual session UUID of the leader (for discovery)
|
||||
hiddenPaneIds?: string[] // Pane IDs that are currently hidden from the UI
|
||||
teamAllowedPaths?: TeamAllowedPath[] // Paths all teammates can edit without asking
|
||||
members: Array<{
|
||||
@@ -74,15 +73,13 @@ export type TeamFile = {
|
||||
name: string
|
||||
agentType?: string
|
||||
model?: string
|
||||
prompt?: string
|
||||
prompt?: string // Legacy field; stripped from persisted configs
|
||||
color?: string
|
||||
planModeRequired?: boolean
|
||||
joinedAt: number
|
||||
tmuxPaneId: string
|
||||
cwd: string
|
||||
worktreePath?: string
|
||||
sessionId?: string
|
||||
subscriptions: string[]
|
||||
backendType?: BackendType
|
||||
isActive?: boolean // false when idle, undefined/true when active
|
||||
mode?: PermissionMode // Current permission mode for this teammate
|
||||
@@ -123,6 +120,42 @@ export function getTeamFilePath(teamName: string): string {
|
||||
return join(getTeamDir(teamName), 'config.json')
|
||||
}
|
||||
|
||||
function sanitizeTeamFileForPersistence(teamFile: TeamFile): TeamFile {
|
||||
return {
|
||||
name: teamFile.name,
|
||||
...(teamFile.description ? { description: teamFile.description } : {}),
|
||||
createdAt: teamFile.createdAt,
|
||||
leadAgentId: teamFile.leadAgentId,
|
||||
...(teamFile.hiddenPaneIds && teamFile.hiddenPaneIds.length > 0
|
||||
? { hiddenPaneIds: [...teamFile.hiddenPaneIds] }
|
||||
: {}),
|
||||
...(teamFile.teamAllowedPaths && teamFile.teamAllowedPaths.length > 0
|
||||
? {
|
||||
teamAllowedPaths: teamFile.teamAllowedPaths.map(path => ({
|
||||
...path,
|
||||
})),
|
||||
}
|
||||
: {}),
|
||||
members: teamFile.members.map(member => ({
|
||||
agentId: member.agentId,
|
||||
name: member.name,
|
||||
...(member.agentType ? { agentType: member.agentType } : {}),
|
||||
...(member.model ? { model: member.model } : {}),
|
||||
...(member.color ? { color: member.color } : {}),
|
||||
...(member.planModeRequired !== undefined
|
||||
? { planModeRequired: member.planModeRequired }
|
||||
: {}),
|
||||
joinedAt: member.joinedAt,
|
||||
tmuxPaneId: member.tmuxPaneId,
|
||||
cwd: member.cwd,
|
||||
...(member.worktreePath ? { worktreePath: member.worktreePath } : {}),
|
||||
...(member.backendType ? { backendType: member.backendType } : {}),
|
||||
...(member.isActive !== undefined ? { isActive: member.isActive } : {}),
|
||||
...(member.mode ? { mode: member.mode } : {}),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a team file by name (sync — for sync contexts like React render paths)
|
||||
* @internal Exported for team discovery UI
|
||||
@@ -131,7 +164,7 @@ export function getTeamFilePath(teamName: string): string {
|
||||
export function readTeamFile(teamName: string): TeamFile | null {
|
||||
try {
|
||||
const content = readFileSync(getTeamFilePath(teamName), 'utf-8')
|
||||
return jsonParse(content) as TeamFile
|
||||
return sanitizeTeamFileForPersistence(jsonParse(content) as TeamFile)
|
||||
} catch (e) {
|
||||
if (getErrnoCode(e) === 'ENOENT') return null
|
||||
logForDebugging(
|
||||
@@ -149,7 +182,7 @@ export async function readTeamFileAsync(
|
||||
): Promise<TeamFile | null> {
|
||||
try {
|
||||
const content = await readFile(getTeamFilePath(teamName), 'utf-8')
|
||||
return jsonParse(content) as TeamFile
|
||||
return sanitizeTeamFileForPersistence(jsonParse(content) as TeamFile)
|
||||
} catch (e) {
|
||||
if (getErrnoCode(e) === 'ENOENT') return null
|
||||
logForDebugging(
|
||||
@@ -166,7 +199,10 @@ export async function readTeamFileAsync(
|
||||
function writeTeamFile(teamName: string, teamFile: TeamFile): void {
|
||||
const teamDir = getTeamDir(teamName)
|
||||
mkdirSync(teamDir, { recursive: true })
|
||||
writeFileSync(getTeamFilePath(teamName), jsonStringify(teamFile, null, 2))
|
||||
writeFileSync(
|
||||
getTeamFilePath(teamName),
|
||||
jsonStringify(sanitizeTeamFileForPersistence(teamFile), null, 2),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -178,7 +214,10 @@ export async function writeTeamFileAsync(
|
||||
): Promise<void> {
|
||||
const teamDir = getTeamDir(teamName)
|
||||
await mkdir(teamDir, { recursive: true })
|
||||
await writeFile(getTeamFilePath(teamName), jsonStringify(teamFile, null, 2))
|
||||
await writeFile(
|
||||
getTeamFilePath(teamName),
|
||||
jsonStringify(sanitizeTeamFileForPersistence(teamFile), null, 2),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -20,7 +20,6 @@ export type TeammateStatus = {
|
||||
agentId: string
|
||||
agentType?: string
|
||||
model?: string
|
||||
prompt?: string
|
||||
status: 'running' | 'idle' | 'unknown'
|
||||
color?: string
|
||||
idleSince?: string // ISO timestamp from idle notification
|
||||
@@ -60,7 +59,6 @@ export function getTeammateStatuses(teamName: string): TeammateStatus[] {
|
||||
agentId: member.agentId,
|
||||
agentType: member.agentType,
|
||||
model: member.model,
|
||||
prompt: member.prompt,
|
||||
status,
|
||||
color: member.color,
|
||||
tmuxPaneId: member.tmuxPaneId,
|
||||
|
||||
@@ -15,7 +15,6 @@ import { PermissionModeSchema } from '../entrypoints/sdk/coreSchemas.js'
|
||||
import { SEND_MESSAGE_TOOL_NAME } from '../tools/SendMessageTool/constants.js'
|
||||
import type { Message } from '../types/message.js'
|
||||
import { generateRequestId } from './agentId.js'
|
||||
import { count } from './array.js'
|
||||
import { logForDebugging } from './debug.js'
|
||||
import { getTeamsDir } from './envUtils.js'
|
||||
import { getErrnoCode } from './errors.js'
|
||||
@@ -58,11 +57,7 @@ export function getInboxPath(agentName: string, teamName?: string): string {
|
||||
const safeTeam = sanitizePathComponent(team)
|
||||
const safeAgentName = sanitizePathComponent(agentName)
|
||||
const inboxDir = join(getTeamsDir(), safeTeam, 'inboxes')
|
||||
const fullPath = join(inboxDir, `${safeAgentName}.json`)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] getInboxPath: agent=${agentName}, team=${team}, fullPath=${fullPath}`,
|
||||
)
|
||||
return fullPath
|
||||
return join(inboxDir, `${safeAgentName}.json`)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -73,7 +68,7 @@ async function ensureInboxDir(teamName?: string): Promise<void> {
|
||||
const safeTeam = sanitizePathComponent(team)
|
||||
const inboxDir = join(getTeamsDir(), safeTeam, 'inboxes')
|
||||
await mkdir(inboxDir, { recursive: true })
|
||||
logForDebugging(`[TeammateMailbox] Ensured inbox directory: ${inboxDir}`)
|
||||
logForDebugging('[TeammateMailbox] Ensured inbox directory')
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -86,7 +81,6 @@ export async function readMailbox(
|
||||
teamName?: string,
|
||||
): Promise<TeammateMessage[]> {
|
||||
const inboxPath = getInboxPath(agentName, teamName)
|
||||
logForDebugging(`[TeammateMailbox] readMailbox: path=${inboxPath}`)
|
||||
|
||||
try {
|
||||
const content = await readFile(inboxPath, 'utf-8')
|
||||
@@ -101,7 +95,7 @@ export async function readMailbox(
|
||||
logForDebugging(`[TeammateMailbox] readMailbox: file does not exist`)
|
||||
return []
|
||||
}
|
||||
logForDebugging(`Failed to read inbox for ${agentName}: ${error}`)
|
||||
logForDebugging(`[TeammateMailbox] Failed to read inbox for ${agentName}`)
|
||||
logError(error)
|
||||
return []
|
||||
}
|
||||
@@ -142,7 +136,7 @@ export async function writeToMailbox(
|
||||
const lockFilePath = `${inboxPath}.lock`
|
||||
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] writeToMailbox: recipient=${recipientName}, from=${message.from}, path=${inboxPath}`,
|
||||
`[TeammateMailbox] writeToMailbox: recipient=${recipientName}, from=${message.from}`,
|
||||
)
|
||||
|
||||
// Ensure the inbox file exists before locking (proper-lockfile requires the file to exist)
|
||||
@@ -153,7 +147,7 @@ export async function writeToMailbox(
|
||||
const code = getErrnoCode(error)
|
||||
if (code !== 'EEXIST') {
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] writeToMailbox: failed to create inbox file: ${error}`,
|
||||
`[TeammateMailbox] writeToMailbox: failed to create inbox file`,
|
||||
)
|
||||
logError(error)
|
||||
return
|
||||
@@ -182,7 +176,9 @@ export async function writeToMailbox(
|
||||
`[TeammateMailbox] Wrote message to ${recipientName}'s inbox from ${message.from}`,
|
||||
)
|
||||
} catch (error) {
|
||||
logForDebugging(`Failed to write to inbox for ${recipientName}: ${error}`)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] Failed to write to inbox for ${recipientName}`,
|
||||
)
|
||||
logError(error)
|
||||
} finally {
|
||||
if (release) {
|
||||
@@ -192,8 +188,8 @@ export async function writeToMailbox(
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a specific message in a teammate's inbox as read by index
|
||||
* Uses file locking to prevent race conditions
|
||||
* Remove a specific processed message from a teammate's inbox by index.
|
||||
* Uses file locking to prevent race conditions.
|
||||
* @param agentName - The agent name to mark message as read for
|
||||
* @param teamName - Optional team name
|
||||
* @param messageIndex - Index of the message to mark as read
|
||||
@@ -205,7 +201,7 @@ export async function markMessageAsReadByIndex(
|
||||
): Promise<void> {
|
||||
const inboxPath = getInboxPath(agentName, teamName)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessageAsReadByIndex called: agentName=${agentName}, teamName=${teamName}, index=${messageIndex}, path=${inboxPath}`,
|
||||
`[TeammateMailbox] markMessageAsReadByIndex called: agentName=${agentName}, index=${messageIndex}`,
|
||||
)
|
||||
|
||||
const lockFilePath = `${inboxPath}.lock`
|
||||
@@ -242,22 +238,26 @@ export async function markMessageAsReadByIndex(
|
||||
return
|
||||
}
|
||||
|
||||
messages[messageIndex] = { ...message, read: true }
|
||||
const updatedMessages = messages.filter(
|
||||
(currentMessage, index) => index !== messageIndex && !currentMessage.read,
|
||||
)
|
||||
|
||||
await writeFile(inboxPath, jsonStringify(messages, null, 2), 'utf-8')
|
||||
await writeFile(
|
||||
inboxPath,
|
||||
jsonStringify(updatedMessages, null, 2),
|
||||
'utf-8',
|
||||
)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessageAsReadByIndex: marked message at index ${messageIndex} as read`,
|
||||
`[TeammateMailbox] markMessageAsReadByIndex: removed message at index ${messageIndex} from inbox`,
|
||||
)
|
||||
} catch (error) {
|
||||
const code = getErrnoCode(error)
|
||||
if (code === 'ENOENT') {
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessageAsReadByIndex: file does not exist at ${inboxPath}`,
|
||||
)
|
||||
logForDebugging(`[TeammateMailbox] markMessageAsReadByIndex: file missing`)
|
||||
return
|
||||
}
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessageAsReadByIndex FAILED for ${agentName}: ${error}`,
|
||||
`[TeammateMailbox] markMessageAsReadByIndex failed for ${agentName}`,
|
||||
)
|
||||
logError(error)
|
||||
} finally {
|
||||
@@ -270,77 +270,6 @@ export async function markMessageAsReadByIndex(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark all messages in a teammate's inbox as read
|
||||
* Uses file locking to prevent race conditions
|
||||
* @param agentName - The agent name to mark messages as read for
|
||||
* @param teamName - Optional team name
|
||||
*/
|
||||
export async function markMessagesAsRead(
|
||||
agentName: string,
|
||||
teamName?: string,
|
||||
): Promise<void> {
|
||||
const inboxPath = getInboxPath(agentName, teamName)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead called: agentName=${agentName}, teamName=${teamName}, path=${inboxPath}`,
|
||||
)
|
||||
|
||||
const lockFilePath = `${inboxPath}.lock`
|
||||
|
||||
let release: (() => Promise<void>) | undefined
|
||||
try {
|
||||
logForDebugging(`[TeammateMailbox] markMessagesAsRead: acquiring lock...`)
|
||||
release = await lockfile.lock(inboxPath, {
|
||||
lockfilePath: lockFilePath,
|
||||
...LOCK_OPTIONS,
|
||||
})
|
||||
logForDebugging(`[TeammateMailbox] markMessagesAsRead: lock acquired`)
|
||||
|
||||
// Re-read messages after acquiring lock to get the latest state
|
||||
const messages = await readMailbox(agentName, teamName)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead: read ${messages.length} messages after lock`,
|
||||
)
|
||||
|
||||
if (messages.length === 0) {
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead: no messages to mark`,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const unreadCount = count(messages, m => !m.read)
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead: ${unreadCount} unread of ${messages.length} total`,
|
||||
)
|
||||
|
||||
// messages comes from jsonParse — fresh, unshared objects safe to mutate
|
||||
for (const m of messages) m.read = true
|
||||
|
||||
await writeFile(inboxPath, jsonStringify(messages, null, 2), 'utf-8')
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead: WROTE ${unreadCount} message(s) as read to ${inboxPath}`,
|
||||
)
|
||||
} catch (error) {
|
||||
const code = getErrnoCode(error)
|
||||
if (code === 'ENOENT') {
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead: file does not exist at ${inboxPath}`,
|
||||
)
|
||||
return
|
||||
}
|
||||
logForDebugging(
|
||||
`[TeammateMailbox] markMessagesAsRead FAILED for ${agentName}: ${error}`,
|
||||
)
|
||||
logError(error)
|
||||
} finally {
|
||||
if (release) {
|
||||
await release()
|
||||
logForDebugging(`[TeammateMailbox] markMessagesAsRead: lock released`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear a teammate's inbox (delete all messages)
|
||||
* @param agentName - The agent name to clear inbox for
|
||||
@@ -362,7 +291,7 @@ export async function clearMailbox(
|
||||
if (code === 'ENOENT') {
|
||||
return
|
||||
}
|
||||
logForDebugging(`Failed to clear inbox for ${agentName}: ${error}`)
|
||||
logForDebugging(`[TeammateMailbox] Failed to clear inbox for ${agentName}`)
|
||||
logError(error)
|
||||
}
|
||||
}
|
||||
@@ -1095,8 +1024,8 @@ export function isStructuredProtocolMessage(messageText: string): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks only messages matching a predicate as read, leaving others unread.
|
||||
* Uses the same file-locking mechanism as markMessagesAsRead.
|
||||
* Removes only messages matching a predicate, leaving the rest unread.
|
||||
* Uses the same file-locking mechanism as the other mailbox update helpers.
|
||||
*/
|
||||
export async function markMessagesAsReadByPredicate(
|
||||
agentName: string,
|
||||
@@ -1119,8 +1048,8 @@ export async function markMessagesAsReadByPredicate(
|
||||
return
|
||||
}
|
||||
|
||||
const updatedMessages = messages.map(m =>
|
||||
!m.read && predicate(m) ? { ...m, read: true } : m,
|
||||
const updatedMessages = messages.filter(
|
||||
m => !m.read && !predicate(m),
|
||||
)
|
||||
|
||||
await writeFile(inboxPath, jsonStringify(updatedMessages, null, 2), 'utf-8')
|
||||
@@ -1174,7 +1103,7 @@ export function getLastPeerDmSummary(messages: Message[]): string | undefined {
|
||||
const summary =
|
||||
'summary' in block.input && typeof block.input.summary === 'string'
|
||||
? block.input.summary
|
||||
: block.input.message.slice(0, 80)
|
||||
: 'sent update'
|
||||
return `[to ${to}] ${summary}`
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,105 +1,33 @@
|
||||
/**
|
||||
* Beta Session Tracing for Claude Code
|
||||
* Detailed beta tracing egress is disabled in this build.
|
||||
*
|
||||
* This module contains beta tracing features enabled when
|
||||
* ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set.
|
||||
*
|
||||
* For external users, tracing is enabled in SDK/headless mode, or in
|
||||
* interactive mode when the org is allowlisted via the
|
||||
* tengu_trace_lantern GrowthBook gate.
|
||||
* For ant users, tracing is enabled in all modes.
|
||||
*
|
||||
* Visibility Rules:
|
||||
* | Content | External | Ant |
|
||||
* |------------------|----------|------|
|
||||
* | System prompts | ✅ | ✅ |
|
||||
* | Model output | ✅ | ✅ |
|
||||
* | Thinking output | ❌ | ✅ |
|
||||
* | Tools | ✅ | ✅ |
|
||||
* | new_context | ✅ | ✅ |
|
||||
*
|
||||
* Features:
|
||||
* - Per-agent message tracking with hash-based deduplication
|
||||
* - System prompt logging (once per unique hash)
|
||||
* - Hook execution spans
|
||||
* - Detailed new_context attributes for LLM requests
|
||||
* The exported helpers remain for compile-time compatibility, but do not
|
||||
* retain tracing state or emit tracing attributes.
|
||||
*/
|
||||
|
||||
import type { Span } from '@opentelemetry/api'
|
||||
import { createHash } from 'crypto'
|
||||
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
||||
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
|
||||
import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js'
|
||||
import type { AssistantMessage, UserMessage } from '../../types/message.js'
|
||||
import { isEnvTruthy } from '../envUtils.js'
|
||||
import { jsonParse, jsonStringify } from '../slowOperations.js'
|
||||
import { logOTelEvent } from './events.js'
|
||||
type AttributeValue = string | number | boolean
|
||||
|
||||
// Message type for API calls (UserMessage or AssistantMessage)
|
||||
type APIMessage = UserMessage | AssistantMessage
|
||||
export interface SpanAttributeWriter {
|
||||
setAttribute?(_key: string, _value: AttributeValue): void
|
||||
setAttributes?(_attributes: Record<string, AttributeValue>): void
|
||||
}
|
||||
|
||||
/**
|
||||
* Track hashes we've already logged this session (system prompts, tools, etc).
|
||||
*
|
||||
* WHY: System prompts and tool schemas are large and rarely change within a session.
|
||||
* Sending full content on every request would be wasteful. Instead, we hash and
|
||||
* only log the full content once per unique hash.
|
||||
*/
|
||||
const seenHashes = new Set<string>()
|
||||
export interface LLMRequestNewContext {
|
||||
systemPrompt?: string
|
||||
querySource?: string
|
||||
tools?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Track the last reported message hash per querySource (agent) for incremental context.
|
||||
*
|
||||
* WHY: When debugging traces, we want to see what NEW information was added each turn,
|
||||
* not the entire conversation history (which can be huge). By tracking the last message
|
||||
* we reported per agent, we can compute and send only the delta (new messages since
|
||||
* the last request). This is tracked per-agent (querySource) because different agents
|
||||
* (main thread, subagents, warmup requests) have independent conversation contexts.
|
||||
*/
|
||||
const lastReportedMessageHash = new Map<string, string>()
|
||||
const MAX_CONTENT_SIZE = 60 * 1024
|
||||
|
||||
/**
|
||||
* Clear tracking state after compaction.
|
||||
* Old hashes are irrelevant once messages have been replaced.
|
||||
*/
|
||||
export function clearBetaTracingState(): void {
|
||||
seenHashes.clear()
|
||||
lastReportedMessageHash.clear()
|
||||
return
|
||||
}
|
||||
|
||||
const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe)
|
||||
|
||||
/**
|
||||
* Check if beta detailed tracing is enabled.
|
||||
* - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
|
||||
* - For external users, enabled in SDK/headless mode OR when org is
|
||||
* allowlisted via the tengu_trace_lantern GrowthBook gate
|
||||
*/
|
||||
export function isBetaTracingEnabled(): boolean {
|
||||
const baseEnabled =
|
||||
isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) &&
|
||||
Boolean(process.env.BETA_TRACING_ENDPOINT)
|
||||
|
||||
if (!baseEnabled) {
|
||||
return false
|
||||
}
|
||||
|
||||
// For external users, enable in SDK/headless mode OR when org is allowlisted.
|
||||
// Gate reads from disk cache, so first run after allowlisting returns false;
|
||||
// works from second run onward (same behavior as enhanced_telemetry_beta).
|
||||
if (process.env.USER_TYPE !== 'ant') {
|
||||
return (
|
||||
getIsNonInteractiveSession() ||
|
||||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false)
|
||||
)
|
||||
}
|
||||
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate content to fit within Honeycomb limits.
|
||||
*/
|
||||
export function truncateContent(
|
||||
content: string,
|
||||
maxSize: number = MAX_CONTENT_SIZE,
|
||||
@@ -116,376 +44,43 @@ export function truncateContent(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a short hash (first 12 hex chars of SHA-256).
|
||||
*/
|
||||
function shortHash(content: string): string {
|
||||
return createHash('sha256').update(content).digest('hex').slice(0, 12)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hash for a system prompt.
|
||||
*/
|
||||
function hashSystemPrompt(systemPrompt: string): string {
|
||||
return `sp_${shortHash(systemPrompt)}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hash for a message based on its content.
|
||||
*/
|
||||
function hashMessage(message: APIMessage): string {
|
||||
const content = jsonStringify(message.message.content)
|
||||
return `msg_${shortHash(content)}`
|
||||
}
|
||||
|
||||
// Regex to detect content wrapped in <system-reminder> tags
|
||||
const SYSTEM_REMINDER_REGEX =
|
||||
/^<system-reminder>\n?([\s\S]*?)\n?<\/system-reminder>$/
|
||||
|
||||
/**
|
||||
* Check if text is entirely a system reminder (wrapped in <system-reminder> tags).
|
||||
* Returns the inner content if it is, null otherwise.
|
||||
*/
|
||||
function extractSystemReminderContent(text: string): string | null {
|
||||
const match = text.trim().match(SYSTEM_REMINDER_REGEX)
|
||||
return match && match[1] ? match[1].trim() : null
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of formatting messages - separates regular content from system reminders.
|
||||
*/
|
||||
interface FormattedMessages {
|
||||
contextParts: string[]
|
||||
systemReminders: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Format user messages for new_context display, separating system reminders.
|
||||
* Only handles user messages (assistant messages are filtered out before this is called).
|
||||
*/
|
||||
function formatMessagesForContext(messages: UserMessage[]): FormattedMessages {
|
||||
const contextParts: string[] = []
|
||||
const systemReminders: string[] = []
|
||||
|
||||
for (const message of messages) {
|
||||
const content = message.message.content
|
||||
if (typeof content === 'string') {
|
||||
const reminderContent = extractSystemReminderContent(content)
|
||||
if (reminderContent) {
|
||||
systemReminders.push(reminderContent)
|
||||
} else {
|
||||
contextParts.push(`[USER]\n${content}`)
|
||||
}
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'text') {
|
||||
const reminderContent = extractSystemReminderContent(block.text)
|
||||
if (reminderContent) {
|
||||
systemReminders.push(reminderContent)
|
||||
} else {
|
||||
contextParts.push(`[USER]\n${block.text}`)
|
||||
}
|
||||
} else if (block.type === 'tool_result') {
|
||||
const resultContent =
|
||||
typeof block.content === 'string'
|
||||
? block.content
|
||||
: jsonStringify(block.content)
|
||||
// Tool results can also contain system reminders (e.g., malware warning)
|
||||
const reminderContent = extractSystemReminderContent(resultContent)
|
||||
if (reminderContent) {
|
||||
systemReminders.push(reminderContent)
|
||||
} else {
|
||||
contextParts.push(
|
||||
`[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { contextParts, systemReminders }
|
||||
}
|
||||
|
||||
export interface LLMRequestNewContext {
|
||||
/** System prompt (typically only on first request or if changed) */
|
||||
systemPrompt?: string
|
||||
/** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */
|
||||
querySource?: string
|
||||
/** Tool schemas sent with the request */
|
||||
tools?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to an interaction span.
|
||||
* Adds new_context with the user prompt.
|
||||
*/
|
||||
export function addBetaInteractionAttributes(
|
||||
span: Span,
|
||||
userPrompt: string,
|
||||
_span: SpanAttributeWriter,
|
||||
_userPrompt: string,
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
const { content: truncatedPrompt, truncated } = truncateContent(
|
||||
`[USER PROMPT]\n${userPrompt}`,
|
||||
)
|
||||
span.setAttributes({
|
||||
new_context: truncatedPrompt,
|
||||
...(truncated && {
|
||||
new_context_truncated: true,
|
||||
new_context_original_length: userPrompt.length,
|
||||
}),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to an LLM request span.
|
||||
* Handles system prompt logging and new_context computation.
|
||||
*/
|
||||
export function addBetaLLMRequestAttributes(
|
||||
span: Span,
|
||||
newContext?: LLMRequestNewContext,
|
||||
messagesForAPI?: APIMessage[],
|
||||
_span: SpanAttributeWriter,
|
||||
_newContext?: LLMRequestNewContext,
|
||||
_messagesForAPI?: unknown[],
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
// Add system prompt info to the span
|
||||
if (newContext?.systemPrompt) {
|
||||
const promptHash = hashSystemPrompt(newContext.systemPrompt)
|
||||
const preview = newContext.systemPrompt.slice(0, 500)
|
||||
|
||||
// Always add hash, preview, and length to the span
|
||||
span.setAttribute('system_prompt_hash', promptHash)
|
||||
span.setAttribute('system_prompt_preview', preview)
|
||||
span.setAttribute('system_prompt_length', newContext.systemPrompt.length)
|
||||
|
||||
// Log the full system prompt only once per unique hash this session
|
||||
if (!seenHashes.has(promptHash)) {
|
||||
seenHashes.add(promptHash)
|
||||
|
||||
// Truncate for the log if needed
|
||||
const { content: truncatedPrompt, truncated } = truncateContent(
|
||||
newContext.systemPrompt,
|
||||
)
|
||||
|
||||
void logOTelEvent('system_prompt', {
|
||||
system_prompt_hash: promptHash,
|
||||
system_prompt: truncatedPrompt,
|
||||
system_prompt_length: String(newContext.systemPrompt.length),
|
||||
...(truncated && { system_prompt_truncated: 'true' }),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add tools info to the span
|
||||
if (newContext?.tools) {
|
||||
try {
|
||||
const toolsArray = jsonParse(newContext.tools) as Record<
|
||||
string,
|
||||
unknown
|
||||
>[]
|
||||
|
||||
// Build array of {name, hash} for each tool
|
||||
const toolsWithHashes = toolsArray.map(tool => {
|
||||
const toolJson = jsonStringify(tool)
|
||||
const toolHash = shortHash(toolJson)
|
||||
return {
|
||||
name: typeof tool.name === 'string' ? tool.name : 'unknown',
|
||||
hash: toolHash,
|
||||
json: toolJson,
|
||||
}
|
||||
})
|
||||
|
||||
// Set span attribute with array of name/hash pairs
|
||||
span.setAttribute(
|
||||
'tools',
|
||||
jsonStringify(
|
||||
toolsWithHashes.map(({ name, hash }) => ({ name, hash })),
|
||||
),
|
||||
)
|
||||
span.setAttribute('tools_count', toolsWithHashes.length)
|
||||
|
||||
// Log each tool's full description once per unique hash
|
||||
for (const { name, hash, json } of toolsWithHashes) {
|
||||
if (!seenHashes.has(`tool_${hash}`)) {
|
||||
seenHashes.add(`tool_${hash}`)
|
||||
|
||||
const { content: truncatedTool, truncated } = truncateContent(json)
|
||||
|
||||
void logOTelEvent('tool', {
|
||||
tool_name: sanitizeToolNameForAnalytics(name),
|
||||
tool_hash: hash,
|
||||
tool: truncatedTool,
|
||||
...(truncated && { tool_truncated: 'true' }),
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// If parsing fails, log the raw tools string
|
||||
span.setAttribute('tools_parse_error', true)
|
||||
}
|
||||
}
|
||||
|
||||
// Add new_context using hash-based tracking (visible to all users)
|
||||
if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) {
|
||||
const querySource = newContext.querySource
|
||||
const lastHash = lastReportedMessageHash.get(querySource)
|
||||
|
||||
// Find where the last reported message is in the array
|
||||
let startIndex = 0
|
||||
if (lastHash) {
|
||||
for (let i = 0; i < messagesForAPI.length; i++) {
|
||||
const msg = messagesForAPI[i]
|
||||
if (msg && hashMessage(msg) === lastHash) {
|
||||
startIndex = i + 1 // Start after the last reported message
|
||||
break
|
||||
}
|
||||
}
|
||||
// If lastHash not found, startIndex stays 0 (send everything)
|
||||
}
|
||||
|
||||
// Get new messages (filter out assistant messages - we only want user input/tool results)
|
||||
const newMessages = messagesForAPI
|
||||
.slice(startIndex)
|
||||
.filter((m): m is UserMessage => m.type === 'user')
|
||||
|
||||
if (newMessages.length > 0) {
|
||||
// Format new messages, separating system reminders from regular content
|
||||
const { contextParts, systemReminders } =
|
||||
formatMessagesForContext(newMessages)
|
||||
|
||||
// Set new_context (regular user content and tool results)
|
||||
if (contextParts.length > 0) {
|
||||
const fullContext = contextParts.join('\n\n---\n\n')
|
||||
const { content: truncatedContext, truncated } =
|
||||
truncateContent(fullContext)
|
||||
|
||||
span.setAttributes({
|
||||
new_context: truncatedContext,
|
||||
new_context_message_count: newMessages.length,
|
||||
...(truncated && {
|
||||
new_context_truncated: true,
|
||||
new_context_original_length: fullContext.length,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
// Set system_reminders as a separate attribute
|
||||
if (systemReminders.length > 0) {
|
||||
const fullReminders = systemReminders.join('\n\n---\n\n')
|
||||
const { content: truncatedReminders, truncated: remindersTruncated } =
|
||||
truncateContent(fullReminders)
|
||||
|
||||
span.setAttributes({
|
||||
system_reminders: truncatedReminders,
|
||||
system_reminders_count: systemReminders.length,
|
||||
...(remindersTruncated && {
|
||||
system_reminders_truncated: true,
|
||||
system_reminders_original_length: fullReminders.length,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
// Update last reported hash to the last message in the array
|
||||
const lastMessage = messagesForAPI[messagesForAPI.length - 1]
|
||||
if (lastMessage) {
|
||||
lastReportedMessageHash.set(querySource, hashMessage(lastMessage))
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to endLLMRequestSpan.
|
||||
* Handles model_output and thinking_output truncation.
|
||||
*/
|
||||
export function addBetaLLMResponseAttributes(
|
||||
endAttributes: Record<string, string | number | boolean>,
|
||||
metadata?: {
|
||||
_attributes: Record<string, AttributeValue>,
|
||||
_metadata?: {
|
||||
modelOutput?: string
|
||||
thinkingOutput?: string
|
||||
},
|
||||
): void {
|
||||
if (!isBetaTracingEnabled() || !metadata) {
|
||||
return
|
||||
}
|
||||
|
||||
// Add model_output (text content) - visible to all users
|
||||
if (metadata.modelOutput !== undefined) {
|
||||
const { content: modelOutput, truncated: outputTruncated } =
|
||||
truncateContent(metadata.modelOutput)
|
||||
endAttributes['response.model_output'] = modelOutput
|
||||
if (outputTruncated) {
|
||||
endAttributes['response.model_output_truncated'] = true
|
||||
endAttributes['response.model_output_original_length'] =
|
||||
metadata.modelOutput.length
|
||||
}
|
||||
}
|
||||
|
||||
// Add thinking_output - ant-only
|
||||
if (
|
||||
process.env.USER_TYPE === 'ant' &&
|
||||
metadata.thinkingOutput !== undefined
|
||||
) {
|
||||
const { content: thinkingOutput, truncated: thinkingTruncated } =
|
||||
truncateContent(metadata.thinkingOutput)
|
||||
endAttributes['response.thinking_output'] = thinkingOutput
|
||||
if (thinkingTruncated) {
|
||||
endAttributes['response.thinking_output_truncated'] = true
|
||||
endAttributes['response.thinking_output_original_length'] =
|
||||
metadata.thinkingOutput.length
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to startToolSpan.
|
||||
* Adds tool_input with the serialized tool input.
|
||||
*/
|
||||
export function addBetaToolInputAttributes(
|
||||
span: Span,
|
||||
toolName: string,
|
||||
toolInput: string,
|
||||
_span: SpanAttributeWriter,
|
||||
_toolName: string,
|
||||
_toolInput: string,
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
const { content: truncatedInput, truncated } = truncateContent(
|
||||
`[TOOL INPUT: ${toolName}]\n${toolInput}`,
|
||||
)
|
||||
span.setAttributes({
|
||||
tool_input: truncatedInput,
|
||||
...(truncated && {
|
||||
tool_input_truncated: true,
|
||||
tool_input_original_length: toolInput.length,
|
||||
}),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Add beta attributes to endToolSpan.
|
||||
* Adds new_context with the tool result.
|
||||
*/
|
||||
export function addBetaToolResultAttributes(
|
||||
endAttributes: Record<string, string | number | boolean>,
|
||||
toolName: string | number | boolean,
|
||||
toolResult: string,
|
||||
_attributes: Record<string, AttributeValue>,
|
||||
_toolName: string | number | boolean,
|
||||
_toolResult: string,
|
||||
): void {
|
||||
if (!isBetaTracingEnabled()) {
|
||||
return
|
||||
}
|
||||
|
||||
const { content: truncatedResult, truncated } = truncateContent(
|
||||
`[TOOL RESULT: ${toolName}]\n${toolResult}`,
|
||||
)
|
||||
endAttributes['new_context'] = truncatedResult
|
||||
if (truncated) {
|
||||
endAttributes['new_context_truncated'] = true
|
||||
endAttributes['new_context_original_length'] = toolResult.length
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
import type { Attributes, HrTime } from '@opentelemetry/api'
|
||||
import { type ExportResult, ExportResultCode } from '@opentelemetry/core'
|
||||
import {
|
||||
AggregationTemporality,
|
||||
type MetricData,
|
||||
type DataPoint as OTelDataPoint,
|
||||
type PushMetricExporter,
|
||||
type ResourceMetrics,
|
||||
} from '@opentelemetry/sdk-metrics'
|
||||
import axios from 'axios'
|
||||
import { checkMetricsEnabled } from 'src/services/api/metricsOptOut.js'
|
||||
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
||||
import { getSubscriptionType, isClaudeAISubscriber } from '../auth.js'
|
||||
import { checkHasTrustDialogAccepted } from '../config.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { errorMessage, toError } from '../errors.js'
|
||||
import { getAuthHeaders } from '../http.js'
|
||||
import { logError } from '../log.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
import { getClaudeCodeUserAgent } from '../userAgent.js'
|
||||
|
||||
type DataPoint = {
|
||||
attributes: Record<string, string>
|
||||
value: number
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
type Metric = {
|
||||
name: string
|
||||
description?: string
|
||||
unit?: string
|
||||
data_points: DataPoint[]
|
||||
}
|
||||
|
||||
type InternalMetricsPayload = {
|
||||
resource_attributes: Record<string, string>
|
||||
metrics: Metric[]
|
||||
}
|
||||
|
||||
export class BigQueryMetricsExporter implements PushMetricExporter {
|
||||
private readonly endpoint: string
|
||||
private readonly timeout: number
|
||||
private pendingExports: Promise<void>[] = []
|
||||
private isShutdown = false
|
||||
|
||||
constructor(options: { timeout?: number } = {}) {
|
||||
const defaultEndpoint = 'https://api.anthropic.com/api/claude_code/metrics'
|
||||
|
||||
if (
|
||||
process.env.USER_TYPE === 'ant' &&
|
||||
process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT
|
||||
) {
|
||||
this.endpoint =
|
||||
process.env.ANT_CLAUDE_CODE_METRICS_ENDPOINT +
|
||||
'/api/claude_code/metrics'
|
||||
} else {
|
||||
this.endpoint = defaultEndpoint
|
||||
}
|
||||
|
||||
this.timeout = options.timeout || 5000
|
||||
}
|
||||
|
||||
async export(
|
||||
metrics: ResourceMetrics,
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
if (this.isShutdown) {
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error('Exporter has been shutdown'),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const exportPromise = this.doExport(metrics, resultCallback)
|
||||
this.pendingExports.push(exportPromise)
|
||||
|
||||
// Clean up completed exports
|
||||
void exportPromise.finally(() => {
|
||||
const index = this.pendingExports.indexOf(exportPromise)
|
||||
if (index > -1) {
|
||||
void this.pendingExports.splice(index, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private async doExport(
|
||||
metrics: ResourceMetrics,
|
||||
resultCallback: (result: ExportResult) => void,
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Skip if trust not established in interactive mode
|
||||
// This prevents triggering apiKeyHelper before trust dialog
|
||||
const hasTrust =
|
||||
checkHasTrustDialogAccepted() || getIsNonInteractiveSession()
|
||||
if (!hasTrust) {
|
||||
logForDebugging(
|
||||
'BigQuery metrics export: trust not established, skipping',
|
||||
)
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
// Check organization-level metrics opt-out
|
||||
const metricsStatus = await checkMetricsEnabled()
|
||||
if (!metricsStatus.enabled) {
|
||||
logForDebugging('Metrics export disabled by organization setting')
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
return
|
||||
}
|
||||
|
||||
const payload = this.transformMetricsForInternal(metrics)
|
||||
|
||||
const authResult = getAuthHeaders()
|
||||
if (authResult.error) {
|
||||
logForDebugging(`Metrics export failed: ${authResult.error}`)
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: new Error(authResult.error),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': getClaudeCodeUserAgent(),
|
||||
...authResult.headers,
|
||||
}
|
||||
|
||||
const response = await axios.post(this.endpoint, payload, {
|
||||
timeout: this.timeout,
|
||||
headers,
|
||||
})
|
||||
|
||||
logForDebugging('BigQuery metrics exported successfully')
|
||||
logForDebugging(
|
||||
`BigQuery API Response: ${jsonStringify(response.data, null, 2)}`,
|
||||
)
|
||||
resultCallback({ code: ExportResultCode.SUCCESS })
|
||||
} catch (error) {
|
||||
logForDebugging(`BigQuery metrics export failed: ${errorMessage(error)}`)
|
||||
logError(error)
|
||||
resultCallback({
|
||||
code: ExportResultCode.FAILED,
|
||||
error: toError(error),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
private transformMetricsForInternal(
|
||||
metrics: ResourceMetrics,
|
||||
): InternalMetricsPayload {
|
||||
const attrs = metrics.resource.attributes
|
||||
|
||||
const resourceAttributes: Record<string, string> = {
|
||||
'service.name': (attrs['service.name'] as string) || 'claude-code',
|
||||
'service.version': (attrs['service.version'] as string) || 'unknown',
|
||||
'os.type': (attrs['os.type'] as string) || 'unknown',
|
||||
'os.version': (attrs['os.version'] as string) || 'unknown',
|
||||
'host.arch': (attrs['host.arch'] as string) || 'unknown',
|
||||
'aggregation.temporality':
|
||||
this.selectAggregationTemporality() === AggregationTemporality.DELTA
|
||||
? 'delta'
|
||||
: 'cumulative',
|
||||
}
|
||||
|
||||
// Only add wsl.version if it exists (omit instead of default)
|
||||
if (attrs['wsl.version']) {
|
||||
resourceAttributes['wsl.version'] = attrs['wsl.version'] as string
|
||||
}
|
||||
|
||||
// Add customer type and subscription type
|
||||
if (isClaudeAISubscriber()) {
|
||||
resourceAttributes['user.customer_type'] = 'claude_ai'
|
||||
const subscriptionType = getSubscriptionType()
|
||||
if (subscriptionType) {
|
||||
resourceAttributes['user.subscription_type'] = subscriptionType
|
||||
}
|
||||
} else {
|
||||
resourceAttributes['user.customer_type'] = 'api'
|
||||
}
|
||||
|
||||
const transformed = {
|
||||
resource_attributes: resourceAttributes,
|
||||
metrics: metrics.scopeMetrics.flatMap(scopeMetric =>
|
||||
scopeMetric.metrics.map(metric => ({
|
||||
name: metric.descriptor.name,
|
||||
description: metric.descriptor.description,
|
||||
unit: metric.descriptor.unit,
|
||||
data_points: this.extractDataPoints(metric),
|
||||
})),
|
||||
),
|
||||
}
|
||||
|
||||
return transformed
|
||||
}
|
||||
|
||||
private extractDataPoints(metric: MetricData): DataPoint[] {
|
||||
const dataPoints = metric.dataPoints || []
|
||||
|
||||
return dataPoints
|
||||
.filter(
|
||||
(point): point is OTelDataPoint<number> =>
|
||||
typeof point.value === 'number',
|
||||
)
|
||||
.map(point => ({
|
||||
attributes: this.convertAttributes(point.attributes),
|
||||
value: point.value,
|
||||
timestamp: this.hrTimeToISOString(
|
||||
point.endTime || point.startTime || [Date.now() / 1000, 0],
|
||||
),
|
||||
}))
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
this.isShutdown = true
|
||||
await this.forceFlush()
|
||||
logForDebugging('BigQuery metrics exporter shutdown complete')
|
||||
}
|
||||
|
||||
async forceFlush(): Promise<void> {
|
||||
await Promise.all(this.pendingExports)
|
||||
logForDebugging('BigQuery metrics exporter flush complete')
|
||||
}
|
||||
|
||||
private convertAttributes(
|
||||
attributes: Attributes | undefined,
|
||||
): Record<string, string> {
|
||||
const result: Record<string, string> = {}
|
||||
if (attributes) {
|
||||
for (const [key, value] of Object.entries(attributes)) {
|
||||
if (value !== undefined && value !== null) {
|
||||
result[key] = String(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
private hrTimeToISOString(hrTime: HrTime): string {
|
||||
const [seconds, nanoseconds] = hrTime
|
||||
const date = new Date(seconds * 1000 + nanoseconds / 1000000)
|
||||
return date.toISOString()
|
||||
}
|
||||
|
||||
selectAggregationTemporality(): AggregationTemporality {
|
||||
// DO NOT CHANGE THIS TO CUMULATIVE
|
||||
// It would mess up the aggregation of metrics
|
||||
// for CC Productivity metrics dashboard
|
||||
return AggregationTemporality.DELTA
|
||||
}
|
||||
}
|
||||
@@ -1,75 +1,14 @@
|
||||
import type { Attributes } from '@opentelemetry/api'
|
||||
import { getEventLogger, getPromptId } from 'src/bootstrap/state.js'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { isEnvTruthy } from '../envUtils.js'
|
||||
import { getTelemetryAttributes } from '../telemetryAttributes.js'
|
||||
/**
|
||||
* OpenTelemetry event egress is disabled in this build.
|
||||
*/
|
||||
|
||||
// Monotonically increasing counter for ordering events within a session
|
||||
let eventSequence = 0
|
||||
|
||||
// Track whether we've already warned about a null event logger to avoid spamming
|
||||
let hasWarnedNoEventLogger = false
|
||||
|
||||
function isUserPromptLoggingEnabled() {
|
||||
return isEnvTruthy(process.env.OTEL_LOG_USER_PROMPTS)
|
||||
}
|
||||
|
||||
export function redactIfDisabled(content: string): string {
|
||||
return isUserPromptLoggingEnabled() ? content : '<REDACTED>'
|
||||
export function redactIfDisabled(_content: string): string {
|
||||
return '<REDACTED>'
|
||||
}
|
||||
|
||||
export async function logOTelEvent(
|
||||
eventName: string,
|
||||
metadata: { [key: string]: string | undefined } = {},
|
||||
_eventName: string,
|
||||
_metadata: { [key: string]: string | undefined } = {},
|
||||
): Promise<void> {
|
||||
const eventLogger = getEventLogger()
|
||||
if (!eventLogger) {
|
||||
if (!hasWarnedNoEventLogger) {
|
||||
hasWarnedNoEventLogger = true
|
||||
logForDebugging(
|
||||
`[3P telemetry] Event dropped (no event logger initialized): ${eventName}`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Skip logging in test environment
|
||||
if (process.env.NODE_ENV === 'test') {
|
||||
return
|
||||
}
|
||||
|
||||
const attributes: Attributes = {
|
||||
...getTelemetryAttributes(),
|
||||
'event.name': eventName,
|
||||
'event.timestamp': new Date().toISOString(),
|
||||
'event.sequence': eventSequence++,
|
||||
}
|
||||
|
||||
// Add prompt ID to events (but not metrics, where it would cause unbounded cardinality)
|
||||
const promptId = getPromptId()
|
||||
if (promptId) {
|
||||
attributes['prompt.id'] = promptId
|
||||
}
|
||||
|
||||
// Workspace directory from the desktop app (host path). Events only —
|
||||
// filesystem paths are too high-cardinality for metric dimensions, and
|
||||
// the BQ metrics pipeline must never see them.
|
||||
const workspaceDir = process.env.CLAUDE_CODE_WORKSPACE_HOST_PATHS
|
||||
if (workspaceDir) {
|
||||
attributes['workspace.host_paths'] = workspaceDir.split('|')
|
||||
}
|
||||
|
||||
// Add metadata as attributes - all values are already strings
|
||||
for (const [key, value] of Object.entries(metadata)) {
|
||||
if (value !== undefined) {
|
||||
attributes[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
// Emit log record as an event
|
||||
eventLogger.emit({
|
||||
body: `claude_code.${eventName}`,
|
||||
attributes,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,825 +0,0 @@
|
||||
import { DiagLogLevel, diag, trace } from '@opentelemetry/api'
|
||||
import { logs } from '@opentelemetry/api-logs'
|
||||
// OTLP/Prometheus exporters are dynamically imported inside the protocol
|
||||
// switch statements below. A process uses at most one protocol variant per
|
||||
// signal, but static imports would load all 6 (~1.2MB) on every startup.
|
||||
import {
|
||||
envDetector,
|
||||
hostDetector,
|
||||
osDetector,
|
||||
resourceFromAttributes,
|
||||
} from '@opentelemetry/resources'
|
||||
import {
|
||||
BatchLogRecordProcessor,
|
||||
ConsoleLogRecordExporter,
|
||||
LoggerProvider,
|
||||
} from '@opentelemetry/sdk-logs'
|
||||
import {
|
||||
ConsoleMetricExporter,
|
||||
MeterProvider,
|
||||
PeriodicExportingMetricReader,
|
||||
} from '@opentelemetry/sdk-metrics'
|
||||
import {
|
||||
BasicTracerProvider,
|
||||
BatchSpanProcessor,
|
||||
ConsoleSpanExporter,
|
||||
} from '@opentelemetry/sdk-trace-base'
|
||||
import {
|
||||
ATTR_SERVICE_NAME,
|
||||
ATTR_SERVICE_VERSION,
|
||||
SEMRESATTRS_HOST_ARCH,
|
||||
} from '@opentelemetry/semantic-conventions'
|
||||
import { HttpsProxyAgent } from 'https-proxy-agent'
|
||||
import {
|
||||
getLoggerProvider,
|
||||
getMeterProvider,
|
||||
getTracerProvider,
|
||||
setEventLogger,
|
||||
setLoggerProvider,
|
||||
setMeterProvider,
|
||||
setTracerProvider,
|
||||
} from 'src/bootstrap/state.js'
|
||||
import {
|
||||
getOtelHeadersFromHelper,
|
||||
getSubscriptionType,
|
||||
is1PApiCustomer,
|
||||
isClaudeAISubscriber,
|
||||
} from 'src/utils/auth.js'
|
||||
import { getPlatform, getWslVersion } from 'src/utils/platform.js'
|
||||
|
||||
import { getCACertificates } from '../caCerts.js'
|
||||
import { registerCleanup } from '../cleanupRegistry.js'
|
||||
import { getHasFormattedOutput, logForDebugging } from '../debug.js'
|
||||
import { isEnvTruthy } from '../envUtils.js'
|
||||
import { errorMessage } from '../errors.js'
|
||||
import { getMTLSConfig } from '../mtls.js'
|
||||
import { getProxyUrl, shouldBypassProxy } from '../proxy.js'
|
||||
import { getSettings_DEPRECATED } from '../settings/settings.js'
|
||||
import { jsonStringify } from '../slowOperations.js'
|
||||
import { profileCheckpoint } from '../startupProfiler.js'
|
||||
import { isBetaTracingEnabled } from './betaSessionTracing.js'
|
||||
import { BigQueryMetricsExporter } from './bigqueryExporter.js'
|
||||
import { ClaudeCodeDiagLogger } from './logger.js'
|
||||
import { initializePerfettoTracing } from './perfettoTracing.js'
|
||||
import {
|
||||
endInteractionSpan,
|
||||
isEnhancedTelemetryEnabled,
|
||||
} from './sessionTracing.js'
|
||||
|
||||
const DEFAULT_METRICS_EXPORT_INTERVAL_MS = 60000
|
||||
const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 5000
|
||||
const DEFAULT_TRACES_EXPORT_INTERVAL_MS = 5000
|
||||
|
||||
class TelemetryTimeoutError extends Error {}
|
||||
|
||||
function telemetryTimeout(ms: number, message: string): Promise<never> {
|
||||
return new Promise((_, reject) => {
|
||||
setTimeout(
|
||||
(rej: (e: Error) => void, msg: string) =>
|
||||
rej(new TelemetryTimeoutError(msg)),
|
||||
ms,
|
||||
reject,
|
||||
message,
|
||||
).unref()
|
||||
})
|
||||
}
|
||||
|
||||
export function bootstrapTelemetry() {
|
||||
if (process.env.USER_TYPE === 'ant') {
|
||||
// Read from ANT_ prefixed variables that are defined at build time
|
||||
if (process.env.ANT_OTEL_METRICS_EXPORTER) {
|
||||
process.env.OTEL_METRICS_EXPORTER = process.env.ANT_OTEL_METRICS_EXPORTER
|
||||
}
|
||||
if (process.env.ANT_OTEL_LOGS_EXPORTER) {
|
||||
process.env.OTEL_LOGS_EXPORTER = process.env.ANT_OTEL_LOGS_EXPORTER
|
||||
}
|
||||
if (process.env.ANT_OTEL_TRACES_EXPORTER) {
|
||||
process.env.OTEL_TRACES_EXPORTER = process.env.ANT_OTEL_TRACES_EXPORTER
|
||||
}
|
||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL) {
|
||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL =
|
||||
process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL
|
||||
}
|
||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT) {
|
||||
process.env.OTEL_EXPORTER_OTLP_ENDPOINT =
|
||||
process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
}
|
||||
if (process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS) {
|
||||
process.env.OTEL_EXPORTER_OTLP_HEADERS =
|
||||
process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS
|
||||
}
|
||||
}
|
||||
|
||||
// Set default tempoality to 'delta' because it's the more sane default
|
||||
if (!process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE) {
|
||||
process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = 'delta'
|
||||
}
|
||||
}
|
||||
|
||||
// Per OTEL spec, "none" means "no automatically configured exporter for this signal".
|
||||
// https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#exporter-selection
|
||||
export function parseExporterTypes(value: string | undefined): string[] {
|
||||
return (value || '')
|
||||
.trim()
|
||||
.split(',')
|
||||
.filter(Boolean)
|
||||
.map(t => t.trim())
|
||||
.filter(t => t !== 'none')
|
||||
}
|
||||
|
||||
async function getOtlpReaders() {
|
||||
const exporterTypes = parseExporterTypes(process.env.OTEL_METRICS_EXPORTER)
|
||||
const exportInterval = parseInt(
|
||||
process.env.OTEL_METRIC_EXPORT_INTERVAL ||
|
||||
DEFAULT_METRICS_EXPORT_INTERVAL_MS.toString(),
|
||||
)
|
||||
|
||||
const exporters = []
|
||||
for (const exporterType of exporterTypes) {
|
||||
if (exporterType === 'console') {
|
||||
// Custom console exporter that shows resource attributes
|
||||
const consoleExporter = new ConsoleMetricExporter()
|
||||
const originalExport = consoleExporter.export.bind(consoleExporter)
|
||||
|
||||
consoleExporter.export = (metrics, callback) => {
|
||||
// Log resource attributes once at the start
|
||||
if (metrics.resource && metrics.resource.attributes) {
|
||||
// The console exporter is for debugging, so console output is intentional here
|
||||
|
||||
logForDebugging('\n=== Resource Attributes ===')
|
||||
logForDebugging(jsonStringify(metrics.resource.attributes))
|
||||
logForDebugging('===========================\n')
|
||||
}
|
||||
|
||||
return originalExport(metrics, callback)
|
||||
}
|
||||
|
||||
exporters.push(consoleExporter)
|
||||
} else if (exporterType === 'otlp') {
|
||||
const protocol =
|
||||
process.env.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL?.trim() ||
|
||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
||||
|
||||
const httpConfig = getOTLPExporterConfig()
|
||||
|
||||
switch (protocol) {
|
||||
case 'grpc': {
|
||||
// Lazy-import to keep @grpc/grpc-js (~700KB) out of the telemetry chunk
|
||||
// when the protocol is http/protobuf (ant default) or http/json.
|
||||
const { OTLPMetricExporter } = await import(
|
||||
'@opentelemetry/exporter-metrics-otlp-grpc'
|
||||
)
|
||||
exporters.push(new OTLPMetricExporter())
|
||||
break
|
||||
}
|
||||
case 'http/json': {
|
||||
const { OTLPMetricExporter } = await import(
|
||||
'@opentelemetry/exporter-metrics-otlp-http'
|
||||
)
|
||||
exporters.push(new OTLPMetricExporter(httpConfig))
|
||||
break
|
||||
}
|
||||
case 'http/protobuf': {
|
||||
const { OTLPMetricExporter } = await import(
|
||||
'@opentelemetry/exporter-metrics-otlp-proto'
|
||||
)
|
||||
exporters.push(new OTLPMetricExporter(httpConfig))
|
||||
break
|
||||
}
|
||||
default:
|
||||
throw new Error(
|
||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
||||
)
|
||||
}
|
||||
} else if (exporterType === 'prometheus') {
|
||||
const { PrometheusExporter } = await import(
|
||||
'@opentelemetry/exporter-prometheus'
|
||||
)
|
||||
exporters.push(new PrometheusExporter())
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unknown exporter type set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${exporterType}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return exporters.map(exporter => {
|
||||
if ('export' in exporter) {
|
||||
return new PeriodicExportingMetricReader({
|
||||
exporter,
|
||||
exportIntervalMillis: exportInterval,
|
||||
})
|
||||
}
|
||||
return exporter
|
||||
})
|
||||
}
|
||||
|
||||
async function getOtlpLogExporters() {
|
||||
const exporterTypes = parseExporterTypes(process.env.OTEL_LOGS_EXPORTER)
|
||||
|
||||
const protocol =
|
||||
process.env.OTEL_EXPORTER_OTLP_LOGS_PROTOCOL?.trim() ||
|
||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
||||
const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
|
||||
logForDebugging(
|
||||
`[3P telemetry] getOtlpLogExporters: types=${jsonStringify(exporterTypes)}, protocol=${protocol}, endpoint=${endpoint}`,
|
||||
)
|
||||
|
||||
const exporters = []
|
||||
for (const exporterType of exporterTypes) {
|
||||
if (exporterType === 'console') {
|
||||
exporters.push(new ConsoleLogRecordExporter())
|
||||
} else if (exporterType === 'otlp') {
|
||||
const httpConfig = getOTLPExporterConfig()
|
||||
|
||||
switch (protocol) {
|
||||
case 'grpc': {
|
||||
const { OTLPLogExporter } = await import(
|
||||
'@opentelemetry/exporter-logs-otlp-grpc'
|
||||
)
|
||||
exporters.push(new OTLPLogExporter())
|
||||
break
|
||||
}
|
||||
case 'http/json': {
|
||||
const { OTLPLogExporter } = await import(
|
||||
'@opentelemetry/exporter-logs-otlp-http'
|
||||
)
|
||||
exporters.push(new OTLPLogExporter(httpConfig))
|
||||
break
|
||||
}
|
||||
case 'http/protobuf': {
|
||||
const { OTLPLogExporter } = await import(
|
||||
'@opentelemetry/exporter-logs-otlp-proto'
|
||||
)
|
||||
exporters.push(new OTLPLogExporter(httpConfig))
|
||||
break
|
||||
}
|
||||
default:
|
||||
throw new Error(
|
||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_LOGS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unknown exporter type set in OTEL_LOGS_EXPORTER env var: ${exporterType}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return exporters
|
||||
}
|
||||
|
||||
async function getOtlpTraceExporters() {
|
||||
const exporterTypes = parseExporterTypes(process.env.OTEL_TRACES_EXPORTER)
|
||||
|
||||
const exporters = []
|
||||
for (const exporterType of exporterTypes) {
|
||||
if (exporterType === 'console') {
|
||||
exporters.push(new ConsoleSpanExporter())
|
||||
} else if (exporterType === 'otlp') {
|
||||
const protocol =
|
||||
process.env.OTEL_EXPORTER_OTLP_TRACES_PROTOCOL?.trim() ||
|
||||
process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
|
||||
|
||||
const httpConfig = getOTLPExporterConfig()
|
||||
|
||||
switch (protocol) {
|
||||
case 'grpc': {
|
||||
const { OTLPTraceExporter } = await import(
|
||||
'@opentelemetry/exporter-trace-otlp-grpc'
|
||||
)
|
||||
exporters.push(new OTLPTraceExporter())
|
||||
break
|
||||
}
|
||||
case 'http/json': {
|
||||
const { OTLPTraceExporter } = await import(
|
||||
'@opentelemetry/exporter-trace-otlp-http'
|
||||
)
|
||||
exporters.push(new OTLPTraceExporter(httpConfig))
|
||||
break
|
||||
}
|
||||
case 'http/protobuf': {
|
||||
const { OTLPTraceExporter } = await import(
|
||||
'@opentelemetry/exporter-trace-otlp-proto'
|
||||
)
|
||||
exporters.push(new OTLPTraceExporter(httpConfig))
|
||||
break
|
||||
}
|
||||
default:
|
||||
throw new Error(
|
||||
`Unknown protocol set in OTEL_EXPORTER_OTLP_TRACES_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unknown exporter type set in OTEL_TRACES_EXPORTER env var: ${exporterType}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return exporters
|
||||
}
|
||||
|
||||
export function isTelemetryEnabled() {
|
||||
return isEnvTruthy(process.env.CLAUDE_CODE_ENABLE_TELEMETRY)
|
||||
}
|
||||
|
||||
function getBigQueryExportingReader() {
|
||||
const bigqueryExporter = new BigQueryMetricsExporter()
|
||||
return new PeriodicExportingMetricReader({
|
||||
exporter: bigqueryExporter,
|
||||
exportIntervalMillis: 5 * 60 * 1000, // 5mins for BigQuery metrics exporter to reduce load
|
||||
})
|
||||
}
|
||||
|
||||
function isBigQueryMetricsEnabled() {
|
||||
// BigQuery metrics are enabled for:
|
||||
// 1. API customers (excluding Claude.ai subscribers and Bedrock/Vertex)
|
||||
// 2. Claude for Enterprise (C4E) users
|
||||
// 3. Claude for Teams users
|
||||
const subscriptionType = getSubscriptionType()
|
||||
const isC4EOrTeamUser =
|
||||
isClaudeAISubscriber() &&
|
||||
(subscriptionType === 'enterprise' || subscriptionType === 'team')
|
||||
|
||||
return is1PApiCustomer() || isC4EOrTeamUser
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize beta tracing - a separate code path for detailed debugging.
|
||||
* Uses BETA_TRACING_ENDPOINT instead of OTEL_EXPORTER_OTLP_ENDPOINT.
|
||||
*/
|
||||
async function initializeBetaTracing(
|
||||
resource: ReturnType<typeof resourceFromAttributes>,
|
||||
): Promise<void> {
|
||||
const endpoint = process.env.BETA_TRACING_ENDPOINT
|
||||
if (!endpoint) {
|
||||
return
|
||||
}
|
||||
|
||||
const [{ OTLPTraceExporter }, { OTLPLogExporter }] = await Promise.all([
|
||||
import('@opentelemetry/exporter-trace-otlp-http'),
|
||||
import('@opentelemetry/exporter-logs-otlp-http'),
|
||||
])
|
||||
|
||||
const httpConfig = {
|
||||
url: `${endpoint}/v1/traces`,
|
||||
}
|
||||
|
||||
const logHttpConfig = {
|
||||
url: `${endpoint}/v1/logs`,
|
||||
}
|
||||
|
||||
// Initialize trace exporter
|
||||
const traceExporter = new OTLPTraceExporter(httpConfig)
|
||||
const spanProcessor = new BatchSpanProcessor(traceExporter, {
|
||||
scheduledDelayMillis: DEFAULT_TRACES_EXPORT_INTERVAL_MS,
|
||||
})
|
||||
|
||||
const tracerProvider = new BasicTracerProvider({
|
||||
resource,
|
||||
spanProcessors: [spanProcessor],
|
||||
})
|
||||
|
||||
trace.setGlobalTracerProvider(tracerProvider)
|
||||
setTracerProvider(tracerProvider)
|
||||
|
||||
// Initialize log exporter
|
||||
const logExporter = new OTLPLogExporter(logHttpConfig)
|
||||
const loggerProvider = new LoggerProvider({
|
||||
resource,
|
||||
processors: [
|
||||
new BatchLogRecordProcessor(logExporter, {
|
||||
scheduledDelayMillis: DEFAULT_LOGS_EXPORT_INTERVAL_MS,
|
||||
}),
|
||||
],
|
||||
})
|
||||
|
||||
logs.setGlobalLoggerProvider(loggerProvider)
|
||||
setLoggerProvider(loggerProvider)
|
||||
|
||||
// Initialize event logger
|
||||
const eventLogger = logs.getLogger(
|
||||
'com.anthropic.claude_code.events',
|
||||
MACRO.VERSION,
|
||||
)
|
||||
setEventLogger(eventLogger)
|
||||
|
||||
// Setup flush handlers - flush both logs AND traces
|
||||
process.on('beforeExit', async () => {
|
||||
await loggerProvider?.forceFlush()
|
||||
await tracerProvider?.forceFlush()
|
||||
})
|
||||
|
||||
process.on('exit', () => {
|
||||
void loggerProvider?.forceFlush()
|
||||
void tracerProvider?.forceFlush()
|
||||
})
|
||||
}
|
||||
|
||||
export async function initializeTelemetry() {
|
||||
profileCheckpoint('telemetry_init_start')
|
||||
bootstrapTelemetry()
|
||||
|
||||
// Console exporters call console.dir on a timer (5s logs/traces, 60s
|
||||
// metrics), writing pretty-printed objects to stdout. In stream-json
|
||||
// mode stdout is the SDK message channel; the first line (`{`) breaks
|
||||
// the SDK's line reader. Stripped here (not main.tsx) because init.ts
|
||||
// re-runs applyConfigEnvironmentVariables() inside initializeTelemetry-
|
||||
// AfterTrust for remote-managed-settings users, and bootstrapTelemetry
|
||||
// above copies ANT_OTEL_* for ant users — both would undo an earlier strip.
|
||||
if (getHasFormattedOutput()) {
|
||||
for (const key of [
|
||||
'OTEL_METRICS_EXPORTER',
|
||||
'OTEL_LOGS_EXPORTER',
|
||||
'OTEL_TRACES_EXPORTER',
|
||||
] as const) {
|
||||
const v = process.env[key]
|
||||
if (v?.includes('console')) {
|
||||
process.env[key] = v
|
||||
.split(',')
|
||||
.map(s => s.trim())
|
||||
.filter(s => s !== 'console')
|
||||
.join(',')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
diag.setLogger(new ClaudeCodeDiagLogger(), DiagLogLevel.ERROR)
|
||||
|
||||
// Initialize Perfetto tracing (independent of OTEL)
|
||||
// Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path>
|
||||
initializePerfettoTracing()
|
||||
|
||||
const readers = []
|
||||
|
||||
// Add customer exporters (if enabled)
|
||||
const telemetryEnabled = isTelemetryEnabled()
|
||||
logForDebugging(
|
||||
`[3P telemetry] isTelemetryEnabled=${telemetryEnabled} (CLAUDE_CODE_ENABLE_TELEMETRY=${process.env.CLAUDE_CODE_ENABLE_TELEMETRY})`,
|
||||
)
|
||||
if (telemetryEnabled) {
|
||||
readers.push(...(await getOtlpReaders()))
|
||||
}
|
||||
|
||||
// Add BigQuery exporter (for API customers, C4E users, and internal users)
|
||||
if (isBigQueryMetricsEnabled()) {
|
||||
readers.push(getBigQueryExportingReader())
|
||||
}
|
||||
|
||||
// Create base resource with service attributes
|
||||
const platform = getPlatform()
|
||||
const baseAttributes: Record<string, string> = {
|
||||
[ATTR_SERVICE_NAME]: 'claude-code',
|
||||
[ATTR_SERVICE_VERSION]: MACRO.VERSION,
|
||||
}
|
||||
|
||||
// Add WSL-specific attributes if running on WSL
|
||||
if (platform === 'wsl') {
|
||||
const wslVersion = getWslVersion()
|
||||
if (wslVersion) {
|
||||
baseAttributes['wsl.version'] = wslVersion
|
||||
}
|
||||
}
|
||||
|
||||
const baseResource = resourceFromAttributes(baseAttributes)
|
||||
|
||||
// Use OpenTelemetry detectors
|
||||
const osResource = resourceFromAttributes(
|
||||
osDetector.detect().attributes || {},
|
||||
)
|
||||
|
||||
// Extract only host.arch from hostDetector
|
||||
const hostDetected = hostDetector.detect()
|
||||
const hostArchAttributes = hostDetected.attributes?.[SEMRESATTRS_HOST_ARCH]
|
||||
? {
|
||||
[SEMRESATTRS_HOST_ARCH]: hostDetected.attributes[SEMRESATTRS_HOST_ARCH],
|
||||
}
|
||||
: {}
|
||||
const hostArchResource = resourceFromAttributes(hostArchAttributes)
|
||||
|
||||
const envResource = resourceFromAttributes(
|
||||
envDetector.detect().attributes || {},
|
||||
)
|
||||
|
||||
// Merge resources - later resources take precedence
|
||||
const resource = baseResource
|
||||
.merge(osResource)
|
||||
.merge(hostArchResource)
|
||||
.merge(envResource)
|
||||
|
||||
// Check if beta tracing is enabled - this is a separate code path
|
||||
// Available to all users who set ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
|
||||
if (isBetaTracingEnabled()) {
|
||||
void initializeBetaTracing(resource).catch(e =>
|
||||
logForDebugging(`Beta tracing init failed: ${e}`, { level: 'error' }),
|
||||
)
|
||||
// Still set up meter provider for metrics (but skip regular logs/traces setup)
|
||||
const meterProvider = new MeterProvider({
|
||||
resource,
|
||||
views: [],
|
||||
readers,
|
||||
})
|
||||
setMeterProvider(meterProvider)
|
||||
|
||||
// Register shutdown for beta tracing
|
||||
const shutdownTelemetry = async () => {
|
||||
const timeoutMs = parseInt(
|
||||
process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
|
||||
)
|
||||
try {
|
||||
endInteractionSpan()
|
||||
|
||||
// Force flush + shutdown together inside the timeout. Previously forceFlush
|
||||
// was awaited unbounded BEFORE the race, blocking exit on slow OTLP endpoints.
|
||||
// Each provider's flush→shutdown is chained independently so a slow logger
|
||||
// flush doesn't delay meterProvider/tracerProvider shutdown (no waterfall).
|
||||
const loggerProvider = getLoggerProvider()
|
||||
const tracerProvider = getTracerProvider()
|
||||
|
||||
const chains: Promise<void>[] = [meterProvider.shutdown()]
|
||||
if (loggerProvider) {
|
||||
chains.push(
|
||||
loggerProvider.forceFlush().then(() => loggerProvider.shutdown()),
|
||||
)
|
||||
}
|
||||
if (tracerProvider) {
|
||||
chains.push(
|
||||
tracerProvider.forceFlush().then(() => tracerProvider.shutdown()),
|
||||
)
|
||||
}
|
||||
|
||||
await Promise.race([
|
||||
Promise.all(chains),
|
||||
telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
|
||||
])
|
||||
} catch {
|
||||
// Ignore shutdown errors
|
||||
}
|
||||
}
|
||||
registerCleanup(shutdownTelemetry)
|
||||
|
||||
return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
|
||||
}
|
||||
|
||||
const meterProvider = new MeterProvider({
|
||||
resource,
|
||||
views: [],
|
||||
readers,
|
||||
})
|
||||
|
||||
// Store reference in state for flushing
|
||||
setMeterProvider(meterProvider)
|
||||
|
||||
// Initialize logs if telemetry is enabled
|
||||
if (telemetryEnabled) {
|
||||
const logExporters = await getOtlpLogExporters()
|
||||
logForDebugging(
|
||||
`[3P telemetry] Created ${logExporters.length} log exporter(s)`,
|
||||
)
|
||||
|
||||
if (logExporters.length > 0) {
|
||||
const loggerProvider = new LoggerProvider({
|
||||
resource,
|
||||
// Add batch processors for each exporter
|
||||
processors: logExporters.map(
|
||||
exporter =>
|
||||
new BatchLogRecordProcessor(exporter, {
|
||||
scheduledDelayMillis: parseInt(
|
||||
process.env.OTEL_LOGS_EXPORT_INTERVAL ||
|
||||
DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
|
||||
),
|
||||
}),
|
||||
),
|
||||
})
|
||||
|
||||
// Register the logger provider globally
|
||||
logs.setGlobalLoggerProvider(loggerProvider)
|
||||
setLoggerProvider(loggerProvider)
|
||||
|
||||
// Initialize event logger
|
||||
const eventLogger = logs.getLogger(
|
||||
'com.anthropic.claude_code.events',
|
||||
MACRO.VERSION,
|
||||
)
|
||||
setEventLogger(eventLogger)
|
||||
logForDebugging('[3P telemetry] Event logger set successfully')
|
||||
|
||||
// 'beforeExit' is emitted when Node.js empties its event loop and has no additional work to schedule.
|
||||
// Unlike 'exit', it allows us to perform async operations, so it works well for letting
|
||||
// network requests complete before the process exits naturally.
|
||||
process.on('beforeExit', async () => {
|
||||
await loggerProvider?.forceFlush()
|
||||
// Also flush traces - they use BatchSpanProcessor which needs explicit flush
|
||||
const tracerProvider = getTracerProvider()
|
||||
await tracerProvider?.forceFlush()
|
||||
})
|
||||
|
||||
process.on('exit', () => {
|
||||
// Final attempt to flush logs and traces
|
||||
void loggerProvider?.forceFlush()
|
||||
void getTracerProvider()?.forceFlush()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize tracing if enhanced telemetry is enabled (BETA)
|
||||
if (telemetryEnabled && isEnhancedTelemetryEnabled()) {
|
||||
const traceExporters = await getOtlpTraceExporters()
|
||||
if (traceExporters.length > 0) {
|
||||
// Create span processors for each exporter
|
||||
const spanProcessors = traceExporters.map(
|
||||
exporter =>
|
||||
new BatchSpanProcessor(exporter, {
|
||||
scheduledDelayMillis: parseInt(
|
||||
process.env.OTEL_TRACES_EXPORT_INTERVAL ||
|
||||
DEFAULT_TRACES_EXPORT_INTERVAL_MS.toString(),
|
||||
),
|
||||
}),
|
||||
)
|
||||
|
||||
const tracerProvider = new BasicTracerProvider({
|
||||
resource,
|
||||
spanProcessors,
|
||||
})
|
||||
|
||||
// Register the tracer provider globally
|
||||
trace.setGlobalTracerProvider(tracerProvider)
|
||||
setTracerProvider(tracerProvider)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown metrics and logs on exit (flushes and closes exporters)
|
||||
const shutdownTelemetry = async () => {
|
||||
const timeoutMs = parseInt(
|
||||
process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
|
||||
)
|
||||
|
||||
try {
|
||||
// End any active interaction span before shutdown
|
||||
endInteractionSpan()
|
||||
|
||||
const shutdownPromises = [meterProvider.shutdown()]
|
||||
const loggerProvider = getLoggerProvider()
|
||||
if (loggerProvider) {
|
||||
shutdownPromises.push(loggerProvider.shutdown())
|
||||
}
|
||||
const tracerProvider = getTracerProvider()
|
||||
if (tracerProvider) {
|
||||
shutdownPromises.push(tracerProvider.shutdown())
|
||||
}
|
||||
|
||||
await Promise.race([
|
||||
Promise.all(shutdownPromises),
|
||||
telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
|
||||
])
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('timeout')) {
|
||||
logForDebugging(
|
||||
`
|
||||
OpenTelemetry telemetry flush timed out after ${timeoutMs}ms
|
||||
|
||||
To resolve this issue, you can:
|
||||
1. Increase the timeout by setting CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS env var (e.g., 5000 for 5 seconds)
|
||||
2. Check if your OpenTelemetry backend is experiencing scalability issues
|
||||
3. Disable OpenTelemetry by unsetting CLAUDE_CODE_ENABLE_TELEMETRY env var
|
||||
|
||||
Current timeout: ${timeoutMs}ms
|
||||
`,
|
||||
{ level: 'error' },
|
||||
)
|
||||
}
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
// Always register shutdown (internal metrics are always enabled)
|
||||
registerCleanup(shutdownTelemetry)
|
||||
|
||||
return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush all pending telemetry data immediately.
|
||||
* This should be called before logout or org switching to prevent data leakage.
|
||||
*/
|
||||
export async function flushTelemetry(): Promise<void> {
|
||||
const meterProvider = getMeterProvider()
|
||||
if (!meterProvider) {
|
||||
return
|
||||
}
|
||||
|
||||
const timeoutMs = parseInt(
|
||||
process.env.CLAUDE_CODE_OTEL_FLUSH_TIMEOUT_MS || '5000',
|
||||
)
|
||||
|
||||
try {
|
||||
const flushPromises = [meterProvider.forceFlush()]
|
||||
const loggerProvider = getLoggerProvider()
|
||||
if (loggerProvider) {
|
||||
flushPromises.push(loggerProvider.forceFlush())
|
||||
}
|
||||
const tracerProvider = getTracerProvider()
|
||||
if (tracerProvider) {
|
||||
flushPromises.push(tracerProvider.forceFlush())
|
||||
}
|
||||
|
||||
await Promise.race([
|
||||
Promise.all(flushPromises),
|
||||
telemetryTimeout(timeoutMs, 'OpenTelemetry flush timeout'),
|
||||
])
|
||||
|
||||
logForDebugging('Telemetry flushed successfully')
|
||||
} catch (error) {
|
||||
if (error instanceof TelemetryTimeoutError) {
|
||||
logForDebugging(
|
||||
`Telemetry flush timed out after ${timeoutMs}ms. Some metrics may not be exported.`,
|
||||
{ level: 'warn' },
|
||||
)
|
||||
} else {
|
||||
logForDebugging(`Telemetry flush failed: ${errorMessage(error)}`, {
|
||||
level: 'error',
|
||||
})
|
||||
}
|
||||
// Don't throw - allow logout to continue even if flush fails
|
||||
}
|
||||
}
|
||||
|
||||
function parseOtelHeadersEnvVar(): Record<string, string> {
|
||||
const headers: Record<string, string> = {}
|
||||
const envHeaders = process.env.OTEL_EXPORTER_OTLP_HEADERS
|
||||
if (envHeaders) {
|
||||
for (const pair of envHeaders.split(',')) {
|
||||
const [key, ...valueParts] = pair.split('=')
|
||||
if (key && valueParts.length > 0) {
|
||||
headers[key.trim()] = valueParts.join('=').trim()
|
||||
}
|
||||
}
|
||||
}
|
||||
return headers
|
||||
}
|
||||
|
||||
/**
|
||||
* Get configuration for OTLP exporters including:
|
||||
* - HTTP agent options (proxy, mTLS)
|
||||
* - Dynamic headers via otelHeadersHelper or static headers from env var
|
||||
*/
|
||||
function getOTLPExporterConfig() {
|
||||
const proxyUrl = getProxyUrl()
|
||||
const mtlsConfig = getMTLSConfig()
|
||||
const settings = getSettings_DEPRECATED()
|
||||
|
||||
// Build base config
|
||||
const config: Record<string, unknown> = {}
|
||||
|
||||
// Parse static headers from env var once (doesn't change at runtime)
|
||||
const staticHeaders = parseOtelHeadersEnvVar()
|
||||
|
||||
// If otelHeadersHelper is configured, use async headers function for dynamic refresh
|
||||
// Otherwise just return static headers if any exist
|
||||
if (settings?.otelHeadersHelper) {
|
||||
config.headers = async (): Promise<Record<string, string>> => {
|
||||
const dynamicHeaders = getOtelHeadersFromHelper()
|
||||
return { ...staticHeaders, ...dynamicHeaders }
|
||||
}
|
||||
} else if (Object.keys(staticHeaders).length > 0) {
|
||||
config.headers = async (): Promise<Record<string, string>> => staticHeaders
|
||||
}
|
||||
|
||||
// Check if we should bypass proxy for OTEL endpoint
|
||||
const otelEndpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
if (!proxyUrl || (otelEndpoint && shouldBypassProxy(otelEndpoint))) {
|
||||
// No proxy configured or OTEL endpoint should bypass proxy
|
||||
const caCerts = getCACertificates()
|
||||
if (mtlsConfig || caCerts) {
|
||||
config.httpAgentOptions = {
|
||||
...mtlsConfig,
|
||||
...(caCerts && { ca: caCerts }),
|
||||
}
|
||||
}
|
||||
return config
|
||||
}
|
||||
|
||||
// Return an HttpAgentFactory function that creates our proxy agent
|
||||
const caCerts = getCACertificates()
|
||||
const agentFactory = (_protocol: string) => {
|
||||
// Create and return the proxy agent with mTLS and CA cert config
|
||||
const proxyAgent =
|
||||
mtlsConfig || caCerts
|
||||
? new HttpsProxyAgent(proxyUrl, {
|
||||
...(mtlsConfig && {
|
||||
cert: mtlsConfig.cert,
|
||||
key: mtlsConfig.key,
|
||||
passphrase: mtlsConfig.passphrase,
|
||||
}),
|
||||
...(caCerts && { ca: caCerts }),
|
||||
})
|
||||
: new HttpsProxyAgent(proxyUrl)
|
||||
|
||||
return proxyAgent
|
||||
}
|
||||
|
||||
config.httpAgentOptions = agentFactory
|
||||
return config
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
import type { DiagLogger } from '@opentelemetry/api'
|
||||
import { logForDebugging } from '../debug.js'
|
||||
import { logError } from '../log.js'
|
||||
export class ClaudeCodeDiagLogger implements DiagLogger {
|
||||
error(message: string, ..._: unknown[]) {
|
||||
logError(new Error(message))
|
||||
logForDebugging(`[3P telemetry] OTEL diag error: ${message}`, {
|
||||
level: 'error',
|
||||
})
|
||||
}
|
||||
warn(message: string, ..._: unknown[]) {
|
||||
logError(new Error(message))
|
||||
logForDebugging(`[3P telemetry] OTEL diag warn: ${message}`, {
|
||||
level: 'warn',
|
||||
})
|
||||
}
|
||||
info(_message: string, ..._args: unknown[]) {
|
||||
return
|
||||
}
|
||||
debug(_message: string, ..._args: unknown[]) {
|
||||
return
|
||||
}
|
||||
verbose(_message: string, ..._args: unknown[]) {
|
||||
return
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,28 +1,17 @@
|
||||
/**
|
||||
* Plugin telemetry helpers — shared field builders for plugin lifecycle events.
|
||||
* Legacy plugin metadata helpers shared by call sites that still assemble
|
||||
* analytics-compatible payload shapes.
|
||||
*
|
||||
* Implements the twin-column privacy pattern: every user-defined-name field
|
||||
* emits both a raw value (routed to PII-tagged _PROTO_* BQ columns) and a
|
||||
* redacted twin (real name iff marketplace ∈ allowlist, else 'third-party').
|
||||
*
|
||||
* plugin_id_hash provides an opaque per-plugin aggregation key with no privacy
|
||||
* dependency — sha256(name@marketplace + FIXED_SALT) truncated to 16 chars.
|
||||
* This answers distinct-count and per-plugin-trend questions that the
|
||||
* redacted column can't, without exposing user-defined names.
|
||||
* In this fork the downstream analytics sinks are disabled, so these helpers
|
||||
* only normalize/redact fields for local compatibility code; they do not
|
||||
* imply an active telemetry export path.
|
||||
*/
|
||||
|
||||
import { createHash } from 'crypto'
|
||||
import { sep } from 'path'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
logEvent,
|
||||
} from '../../services/analytics/index.js'
|
||||
import type {
|
||||
LoadedPlugin,
|
||||
PluginError,
|
||||
PluginManifest,
|
||||
} from '../../types/plugin.js'
|
||||
import type { PluginManifest } from '../../types/plugin.js'
|
||||
import {
|
||||
isOfficialMarketplaceName,
|
||||
parsePluginIdentifier,
|
||||
@@ -32,18 +21,14 @@ import {
|
||||
// through commands.js. Marketplace schemas.ts enforces 'builtin' is reserved.
|
||||
const BUILTIN_MARKETPLACE_NAME = 'builtin'
|
||||
|
||||
// Fixed salt for plugin_id_hash. Same constant across all repos and emission
|
||||
// sites. Not per-org, not rotated — per-org salt would defeat cross-org
|
||||
// distinct-count, rotation would break trend lines. Customers can compute the
|
||||
// same hash on their known plugin names to reverse-match their own telemetry.
|
||||
// Fixed salt for plugin_id_hash. Kept stable so legacy field shapes that still
|
||||
// use this helper continue to derive the same opaque key.
|
||||
const PLUGIN_ID_HASH_SALT = 'claude-plugin-telemetry-v1'
|
||||
|
||||
/**
|
||||
* Opaque per-plugin aggregation key. Input is the name@marketplace string as
|
||||
* it appears in enabledPlugins keys, lowercased on the marketplace suffix for
|
||||
* reproducibility. 16-char truncation keeps BQ GROUP BY cardinality manageable
|
||||
* while making collisions negligible at projected 10k-plugin scale. Name case
|
||||
* is preserved in both branches (enabledPlugins keys are case-sensitive).
|
||||
* Opaque per-plugin compatibility key derived from the name@marketplace
|
||||
* string. The 16-char truncation keeps the identifier short while preserving
|
||||
* a stable grouping key for local compatibility code.
|
||||
*/
|
||||
export function hashPluginId(name: string, marketplace?: string): string {
|
||||
const key = marketplace ? `${name}@${marketplace.toLowerCase()}` : name
|
||||
@@ -80,17 +65,6 @@ export function getTelemetryPluginScope(
|
||||
return 'user-local'
|
||||
}
|
||||
|
||||
/**
|
||||
* How a plugin arrived in the session. Splits self-selected from org-pushed
|
||||
* — plugin_scope alone doesn't (an official plugin can be user-installed OR
|
||||
* org-pushed; both are scope='official').
|
||||
*/
|
||||
export type EnabledVia =
|
||||
| 'user-install'
|
||||
| 'org-policy'
|
||||
| 'default-enable'
|
||||
| 'seed-mount'
|
||||
|
||||
/** How a skill/command invocation was triggered. */
|
||||
export type InvocationTrigger =
|
||||
| 'user-slash'
|
||||
@@ -107,28 +81,10 @@ export type InstallSource =
|
||||
| 'ui-suggestion'
|
||||
| 'deep-link'
|
||||
|
||||
export function getEnabledVia(
|
||||
plugin: LoadedPlugin,
|
||||
managedNames: Set<string> | null,
|
||||
seedDirs: string[],
|
||||
): EnabledVia {
|
||||
if (plugin.isBuiltin) return 'default-enable'
|
||||
if (managedNames?.has(plugin.name)) return 'org-policy'
|
||||
// Trailing sep: /opt/plugins must not match /opt/plugins-extra
|
||||
if (
|
||||
seedDirs.some(dir =>
|
||||
plugin.path.startsWith(dir.endsWith(sep) ? dir : dir + sep),
|
||||
)
|
||||
) {
|
||||
return 'seed-mount'
|
||||
}
|
||||
return 'user-install'
|
||||
}
|
||||
|
||||
/**
|
||||
* Common plugin telemetry fields keyed off name@marketplace. Returns the
|
||||
* hash, scope enum, and the redacted-twin columns. Callers add the raw
|
||||
* _PROTO_* fields separately (those require the PII-tagged marker type).
|
||||
* Common plugin metadata fields keyed off name@marketplace. Keeps the legacy
|
||||
* field set in one place so no-op analytics compatibility callers do not have
|
||||
* to duplicate redaction logic.
|
||||
*/
|
||||
export function buildPluginTelemetryFields(
|
||||
name: string,
|
||||
@@ -165,10 +121,7 @@ export function buildPluginTelemetryFields(
|
||||
|
||||
/**
|
||||
* Per-invocation callers (SkillTool, processSlashCommand) pass
|
||||
* managedNames=null — the session-level tengu_plugin_enabled_for_session
|
||||
* event carries the authoritative plugin_scope, and per-invocation rows can
|
||||
* join on plugin_id_hash to recover it. This keeps hot-path call sites free
|
||||
* of the extra settings read.
|
||||
* managedNames=null to keep hot-path call sites free of the extra settings read.
|
||||
*/
|
||||
export function buildPluginCommandTelemetryFields(
|
||||
pluginInfo: { pluginManifest: PluginManifest; repository: string },
|
||||
@@ -183,50 +136,7 @@ export function buildPluginCommandTelemetryFields(
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit tengu_plugin_enabled_for_session once per enabled plugin at session
|
||||
* start. Supplements tengu_skill_loaded (which still fires per-skill) — use
|
||||
* this for plugin-level aggregates instead of DISTINCT-on-prefix hacks.
|
||||
* A plugin with 5 skills emits 5 skill_loaded rows but 1 of these.
|
||||
*/
|
||||
export function logPluginsEnabledForSession(
|
||||
plugins: LoadedPlugin[],
|
||||
managedNames: Set<string> | null,
|
||||
seedDirs: string[],
|
||||
): void {
|
||||
for (const plugin of plugins) {
|
||||
const { marketplace } = parsePluginIdentifier(plugin.repository)
|
||||
|
||||
logEvent('tengu_plugin_enabled_for_session', {
|
||||
_PROTO_plugin_name:
|
||||
plugin.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
...(marketplace && {
|
||||
_PROTO_marketplace_name:
|
||||
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
}),
|
||||
...buildPluginTelemetryFields(plugin.name, marketplace, managedNames),
|
||||
enabled_via: getEnabledVia(
|
||||
plugin,
|
||||
managedNames,
|
||||
seedDirs,
|
||||
) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
skill_path_count:
|
||||
(plugin.skillsPath ? 1 : 0) + (plugin.skillsPaths?.length ?? 0),
|
||||
command_path_count:
|
||||
(plugin.commandsPath ? 1 : 0) + (plugin.commandsPaths?.length ?? 0),
|
||||
has_mcp: plugin.manifest.mcpServers !== undefined,
|
||||
has_hooks: plugin.hooksConfig !== undefined,
|
||||
...(plugin.manifest.version && {
|
||||
version: plugin.manifest
|
||||
.version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bounded-cardinality error bucket for CLI plugin operation failures.
|
||||
* Maps free-form error messages to 5 stable categories so dashboard
|
||||
* GROUP BY stays tractable.
|
||||
* Stable error buckets for CLI plugin operation failures.
|
||||
*/
|
||||
export type PluginCommandErrorCategory =
|
||||
| 'network'
|
||||
@@ -257,33 +167,3 @@ export function classifyPluginCommandError(
|
||||
}
|
||||
return 'unknown'
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit tengu_plugin_load_failed once per error surfaced by session-start
|
||||
* plugin loading. Pairs with tengu_plugin_enabled_for_session so dashboards
|
||||
* can compute a load-success rate. PluginError.type is already a bounded
|
||||
* enum — use it directly as error_category.
|
||||
*/
|
||||
export function logPluginLoadErrors(
|
||||
errors: PluginError[],
|
||||
managedNames: Set<string> | null,
|
||||
): void {
|
||||
for (const err of errors) {
|
||||
const { name, marketplace } = parsePluginIdentifier(err.source)
|
||||
// Not all PluginError variants carry a plugin name (some have pluginId,
|
||||
// some are marketplace-level). Use the 'plugin' property if present,
|
||||
// fall back to the name parsed from err.source.
|
||||
const pluginName = 'plugin' in err && err.plugin ? err.plugin : name
|
||||
logEvent('tengu_plugin_load_failed', {
|
||||
error_category:
|
||||
err.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
_PROTO_plugin_name:
|
||||
pluginName as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
...(marketplace && {
|
||||
_PROTO_marketplace_name:
|
||||
marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
}),
|
||||
...buildPluginTelemetryFields(pluginName, marketplace, managedNames),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,39 +0,0 @@
|
||||
import { getSkillToolCommands } from '../../commands.js'
|
||||
import {
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
logEvent,
|
||||
} from '../../services/analytics/index.js'
|
||||
import { getCharBudget } from '../../tools/SkillTool/prompt.js'
|
||||
|
||||
/**
|
||||
* Logs a tengu_skill_loaded event for each skill available at session startup.
|
||||
* This enables analytics on which skills are available across sessions.
|
||||
*/
|
||||
export async function logSkillsLoaded(
|
||||
cwd: string,
|
||||
contextWindowTokens: number,
|
||||
): Promise<void> {
|
||||
const skills = await getSkillToolCommands(cwd)
|
||||
const skillBudget = getCharBudget(contextWindowTokens)
|
||||
|
||||
for (const skill of skills) {
|
||||
if (skill.type !== 'prompt') continue
|
||||
|
||||
logEvent('tengu_skill_loaded', {
|
||||
// _PROTO_skill_name routes to the privileged skill_name BQ column.
|
||||
// Unredacted names don't go in additional_metadata.
|
||||
_PROTO_skill_name:
|
||||
skill.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
|
||||
skill_source:
|
||||
skill.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
skill_loaded_from:
|
||||
skill.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
skill_budget: skillBudget,
|
||||
...(skill.kind && {
|
||||
skill_kind:
|
||||
skill.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
import type { Attributes } from '@opentelemetry/api'
|
||||
import { getSessionId } from 'src/bootstrap/state.js'
|
||||
import { getOauthAccountInfo } from './auth.js'
|
||||
import { getOrCreateUserID } from './config.js'
|
||||
import { envDynamic } from './envDynamic.js'
|
||||
import { isEnvTruthy } from './envUtils.js'
|
||||
import { toTaggedId } from './taggedId.js'
|
||||
|
||||
// Default configuration for metrics cardinality
|
||||
const METRICS_CARDINALITY_DEFAULTS = {
|
||||
OTEL_METRICS_INCLUDE_SESSION_ID: true,
|
||||
OTEL_METRICS_INCLUDE_VERSION: false,
|
||||
OTEL_METRICS_INCLUDE_ACCOUNT_UUID: true,
|
||||
}
|
||||
|
||||
function shouldIncludeAttribute(
|
||||
envVar: keyof typeof METRICS_CARDINALITY_DEFAULTS,
|
||||
): boolean {
|
||||
const defaultValue = METRICS_CARDINALITY_DEFAULTS[envVar]
|
||||
const envValue = process.env[envVar]
|
||||
|
||||
if (envValue === undefined) {
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
return isEnvTruthy(envValue)
|
||||
}
|
||||
|
||||
export function getTelemetryAttributes(): Attributes {
|
||||
const userId = getOrCreateUserID()
|
||||
const sessionId = getSessionId()
|
||||
|
||||
const attributes: Attributes = {
|
||||
'user.id': userId,
|
||||
}
|
||||
|
||||
if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_SESSION_ID')) {
|
||||
attributes['session.id'] = sessionId
|
||||
}
|
||||
if (shouldIncludeAttribute('OTEL_METRICS_INCLUDE_VERSION')) {
|
||||
attributes['app.version'] = MACRO.VERSION
|
||||
}
|
||||
|
||||
// Only include OAuth account data when actively using OAuth authentication
|
||||
const oauthAccount = getOauthAccountInfo()
|
||||
if (oauthAccount) {
|
||||
const orgId = oauthAccount.organizationUuid
|
||||
const email = oauthAccount.emailAddress
|
||||
const accountUuid = oauthAccount.accountUuid
|
||||
|
||||
if (orgId) attributes['organization.id'] = orgId
|
||||
if (email) attributes['user.email'] = email
|
||||
|
||||
if (
|
||||
accountUuid &&
|
||||
shouldIncludeAttribute('OTEL_METRICS_INCLUDE_ACCOUNT_UUID')
|
||||
) {
|
||||
attributes['user.account_uuid'] = accountUuid
|
||||
attributes['user.account_id'] =
|
||||
process.env.CLAUDE_CODE_ACCOUNT_TAGGED_ID ||
|
||||
toTaggedId('user', accountUuid)
|
||||
}
|
||||
}
|
||||
|
||||
// Add terminal type if available
|
||||
if (envDynamic.terminal) {
|
||||
attributes['terminal.type'] = envDynamic.terminal
|
||||
}
|
||||
|
||||
return attributes
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user