chore: initialize recovered claude workspace

This commit is contained in:
2026-04-02 15:29:01 +08:00
commit a10efa3b4b
1940 changed files with 506426 additions and 0 deletions

539
src/bridge/bridgeApi.ts Normal file
View File

@@ -0,0 +1,539 @@
import axios from 'axios'
import { debugBody, extractErrorDetail } from './debugUtils.js'
import {
BRIDGE_LOGIN_INSTRUCTION,
type BridgeApiClient,
type BridgeConfig,
type PermissionResponseEvent,
type WorkResponse,
} from './types.js'
type BridgeApiDeps = {
baseUrl: string
getAccessToken: () => string | undefined
runnerVersion: string
onDebug?: (msg: string) => void
/**
* Called on 401 to attempt OAuth token refresh. Returns true if refreshed,
* in which case the request is retried once. Injected because
* handleOAuth401Error from utils/auth.ts transitively pulls in config.ts →
* file.ts → permissions/filesystem.ts → sessionStorage.ts → commands.ts
* (~1300 modules). Daemon callers using env-var tokens omit this — their
* tokens don't refresh, so 401 goes straight to BridgeFatalError.
*/
onAuth401?: (staleAccessToken: string) => Promise<boolean>
/**
* Returns the trusted device token to send as X-Trusted-Device-Token on
* bridge API calls. Bridge sessions have SecurityTier=ELEVATED on the
* server (CCR v2); when the server's enforcement flag is on,
* ConnectBridgeWorker requires a trusted device at JWT-issuance.
* Optional — when absent or returning undefined, the header is omitted
* and the server falls through to its flag-off/no-op path. The CLI-side
* gate is tengu_sessions_elevated_auth_enforcement (see trustedDevice.ts).
*/
getTrustedDeviceToken?: () => string | undefined
}
const BETA_HEADER = 'environments-2025-11-01'
/** Allowlist pattern for server-provided IDs used in URL path segments. */
const SAFE_ID_PATTERN = /^[a-zA-Z0-9_-]+$/
/**
* Validate that a server-provided ID is safe to interpolate into a URL path.
* Prevents path traversal (e.g. `../../admin`) and injection via IDs that
* contain slashes, dots, or other special characters.
*/
export function validateBridgeId(id: string, label: string): string {
if (!id || !SAFE_ID_PATTERN.test(id)) {
throw new Error(`Invalid ${label}: contains unsafe characters`)
}
return id
}
/** Fatal bridge errors that should not be retried (e.g. auth failures). */
export class BridgeFatalError extends Error {
readonly status: number
/** Server-provided error type, e.g. "environment_expired". */
readonly errorType: string | undefined
constructor(message: string, status: number, errorType?: string) {
super(message)
this.name = 'BridgeFatalError'
this.status = status
this.errorType = errorType
}
}
export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
function debug(msg: string): void {
deps.onDebug?.(msg)
}
let consecutiveEmptyPolls = 0
const EMPTY_POLL_LOG_INTERVAL = 100
function getHeaders(accessToken: string): Record<string, string> {
const headers: Record<string, string> = {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'anthropic-beta': BETA_HEADER,
'x-environment-runner-version': deps.runnerVersion,
}
const deviceToken = deps.getTrustedDeviceToken?.()
if (deviceToken) {
headers['X-Trusted-Device-Token'] = deviceToken
}
return headers
}
function resolveAuth(): string {
const accessToken = deps.getAccessToken()
if (!accessToken) {
throw new Error(BRIDGE_LOGIN_INSTRUCTION)
}
return accessToken
}
/**
* Execute an OAuth-authenticated request with a single retry on 401.
* On 401, attempts token refresh via handleOAuth401Error (same pattern as
* withRetry.ts for v1/messages). If refresh succeeds, retries the request
* once with the new token. If refresh fails or the retry also returns 401,
* the 401 response is returned for handleErrorStatus to throw BridgeFatalError.
*/
async function withOAuthRetry<T>(
fn: (accessToken: string) => Promise<{ status: number; data: T }>,
context: string,
): Promise<{ status: number; data: T }> {
const accessToken = resolveAuth()
const response = await fn(accessToken)
if (response.status !== 401) {
return response
}
if (!deps.onAuth401) {
debug(`[bridge:api] ${context}: 401 received, no refresh handler`)
return response
}
// Attempt token refresh — matches the pattern in withRetry.ts
debug(`[bridge:api] ${context}: 401 received, attempting token refresh`)
const refreshed = await deps.onAuth401(accessToken)
if (refreshed) {
debug(`[bridge:api] ${context}: Token refreshed, retrying request`)
const newToken = resolveAuth()
const retryResponse = await fn(newToken)
if (retryResponse.status !== 401) {
return retryResponse
}
debug(`[bridge:api] ${context}: Retry after refresh also got 401`)
} else {
debug(`[bridge:api] ${context}: Token refresh failed`)
}
// Refresh failed — return 401 for handleErrorStatus to throw
return response
}
return {
async registerBridgeEnvironment(
config: BridgeConfig,
): Promise<{ environment_id: string; environment_secret: string }> {
debug(
`[bridge:api] POST /v1/environments/bridge bridgeId=${config.bridgeId}`,
)
const response = await withOAuthRetry(
(token: string) =>
axios.post<{
environment_id: string
environment_secret: string
}>(
`${deps.baseUrl}/v1/environments/bridge`,
{
machine_name: config.machineName,
directory: config.dir,
branch: config.branch,
git_repo_url: config.gitRepoUrl,
// Advertise session capacity so claude.ai/code can show
// "2/4 sessions" badges and only block the picker when
// actually at capacity. Backends that don't yet accept
// this field will silently ignore it.
max_sessions: config.maxSessions,
// worker_type lets claude.ai filter environments by origin
// (e.g. assistant picker only shows assistant-mode workers).
// Desktop cowork app sends "cowork"; we send a distinct value.
metadata: { worker_type: config.workerType },
// Idempotent re-registration: if we have a backend-issued
// environment_id from a prior session (--session-id resume),
// send it back so the backend reattaches instead of creating
// a new env. The backend may still hand back a fresh ID if
// the old one expired — callers must compare the response.
...(config.reuseEnvironmentId && {
environment_id: config.reuseEnvironmentId,
}),
},
{
headers: getHeaders(token),
timeout: 15_000,
validateStatus: status => status < 500,
},
),
'Registration',
)
handleErrorStatus(response.status, response.data, 'Registration')
debug(
`[bridge:api] POST /v1/environments/bridge -> ${response.status} environment_id=${response.data.environment_id}`,
)
debug(
`[bridge:api] >>> ${debugBody({ machine_name: config.machineName, directory: config.dir, branch: config.branch, git_repo_url: config.gitRepoUrl, max_sessions: config.maxSessions, metadata: { worker_type: config.workerType } })}`,
)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
return response.data
},
async pollForWork(
environmentId: string,
environmentSecret: string,
signal?: AbortSignal,
reclaimOlderThanMs?: number,
): Promise<WorkResponse | null> {
validateBridgeId(environmentId, 'environmentId')
// Save and reset so errors break the "consecutive empty" streak.
// Restored below when the response is truly empty.
const prevEmptyPolls = consecutiveEmptyPolls
consecutiveEmptyPolls = 0
const response = await axios.get<WorkResponse | null>(
`${deps.baseUrl}/v1/environments/${environmentId}/work/poll`,
{
headers: getHeaders(environmentSecret),
params:
reclaimOlderThanMs !== undefined
? { reclaim_older_than_ms: reclaimOlderThanMs }
: undefined,
timeout: 10_000,
signal,
validateStatus: status => status < 500,
},
)
handleErrorStatus(response.status, response.data, 'Poll')
// Empty body or null = no work available
if (!response.data) {
consecutiveEmptyPolls = prevEmptyPolls + 1
if (
consecutiveEmptyPolls === 1 ||
consecutiveEmptyPolls % EMPTY_POLL_LOG_INTERVAL === 0
) {
debug(
`[bridge:api] GET .../work/poll -> ${response.status} (no work, ${consecutiveEmptyPolls} consecutive empty polls)`,
)
}
return null
}
debug(
`[bridge:api] GET .../work/poll -> ${response.status} workId=${response.data.id} type=${response.data.data?.type}${response.data.data?.id ? ` sessionId=${response.data.data.id}` : ''}`,
)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
return response.data
},
async acknowledgeWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/ack`)
const response = await axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/ack`,
{},
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(response.status, response.data, 'Acknowledge')
debug(`[bridge:api] POST .../work/${workId}/ack -> ${response.status}`)
},
async stopWork(
environmentId: string,
workId: string,
force: boolean,
): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/stop force=${force}`)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/stop`,
{ force },
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'StopWork',
)
handleErrorStatus(response.status, response.data, 'StopWork')
debug(`[bridge:api] POST .../work/${workId}/stop -> ${response.status}`)
},
async deregisterEnvironment(environmentId: string): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
debug(`[bridge:api] DELETE /v1/environments/bridge/${environmentId}`)
const response = await withOAuthRetry(
(token: string) =>
axios.delete(
`${deps.baseUrl}/v1/environments/bridge/${environmentId}`,
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'Deregister',
)
handleErrorStatus(response.status, response.data, 'Deregister')
debug(
`[bridge:api] DELETE /v1/environments/bridge/${environmentId} -> ${response.status}`,
)
},
async archiveSession(sessionId: string): Promise<void> {
validateBridgeId(sessionId, 'sessionId')
debug(`[bridge:api] POST /v1/sessions/${sessionId}/archive`)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/sessions/${sessionId}/archive`,
{},
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'ArchiveSession',
)
// 409 = already archived (idempotent, not an error)
if (response.status === 409) {
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/archive -> 409 (already archived)`,
)
return
}
handleErrorStatus(response.status, response.data, 'ArchiveSession')
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/archive -> ${response.status}`,
)
},
async reconnectSession(
environmentId: string,
sessionId: string,
): Promise<void> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(sessionId, 'sessionId')
debug(
`[bridge:api] POST /v1/environments/${environmentId}/bridge/reconnect session_id=${sessionId}`,
)
const response = await withOAuthRetry(
(token: string) =>
axios.post(
`${deps.baseUrl}/v1/environments/${environmentId}/bridge/reconnect`,
{ session_id: sessionId },
{
headers: getHeaders(token),
timeout: 10_000,
validateStatus: s => s < 500,
},
),
'ReconnectSession',
)
handleErrorStatus(response.status, response.data, 'ReconnectSession')
debug(`[bridge:api] POST .../bridge/reconnect -> ${response.status}`)
},
async heartbeatWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<{ lease_extended: boolean; state: string }> {
validateBridgeId(environmentId, 'environmentId')
validateBridgeId(workId, 'workId')
debug(`[bridge:api] POST .../work/${workId}/heartbeat`)
const response = await axios.post<{
lease_extended: boolean
state: string
last_heartbeat: string
ttl_seconds: number
}>(
`${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/heartbeat`,
{},
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(response.status, response.data, 'Heartbeat')
debug(
`[bridge:api] POST .../work/${workId}/heartbeat -> ${response.status} lease_extended=${response.data.lease_extended} state=${response.data.state}`,
)
return response.data
},
async sendPermissionResponseEvent(
sessionId: string,
event: PermissionResponseEvent,
sessionToken: string,
): Promise<void> {
validateBridgeId(sessionId, 'sessionId')
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/events type=${event.type}`,
)
const response = await axios.post(
`${deps.baseUrl}/v1/sessions/${sessionId}/events`,
{ events: [event] },
{
headers: getHeaders(sessionToken),
timeout: 10_000,
validateStatus: s => s < 500,
},
)
handleErrorStatus(
response.status,
response.data,
'SendPermissionResponseEvent',
)
debug(
`[bridge:api] POST /v1/sessions/${sessionId}/events -> ${response.status}`,
)
debug(`[bridge:api] >>> ${debugBody({ events: [event] })}`)
debug(`[bridge:api] <<< ${debugBody(response.data)}`)
},
}
}
function handleErrorStatus(
status: number,
data: unknown,
context: string,
): void {
if (status === 200 || status === 204) {
return
}
const detail = extractErrorDetail(data)
const errorType = extractErrorTypeFromData(data)
switch (status) {
case 401:
throw new BridgeFatalError(
`${context}: Authentication failed (401)${detail ? `: ${detail}` : ''}. ${BRIDGE_LOGIN_INSTRUCTION}`,
401,
errorType,
)
case 403:
throw new BridgeFatalError(
isExpiredErrorType(errorType)
? 'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.'
: `${context}: Access denied (403)${detail ? `: ${detail}` : ''}. Check your organization permissions.`,
403,
errorType,
)
case 404:
throw new BridgeFatalError(
detail ??
`${context}: Not found (404). Remote Control may not be available for this organization.`,
404,
errorType,
)
case 410:
throw new BridgeFatalError(
detail ??
'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.',
410,
errorType ?? 'environment_expired',
)
case 429:
throw new Error(`${context}: Rate limited (429). Polling too frequently.`)
default:
throw new Error(
`${context}: Failed with status ${status}${detail ? `: ${detail}` : ''}`,
)
}
}
/** Check whether an error type string indicates a session/environment expiry. */
export function isExpiredErrorType(errorType: string | undefined): boolean {
if (!errorType) {
return false
}
return errorType.includes('expired') || errorType.includes('lifetime')
}
/**
* Check whether a BridgeFatalError is a suppressible 403 permission error.
* These are 403 errors for scopes like 'external_poll_sessions' or operations
* like StopWork that fail because the user's role lacks 'environments:manage'.
* They don't affect core functionality and shouldn't be shown to users.
*/
export function isSuppressible403(err: BridgeFatalError): boolean {
if (err.status !== 403) {
return false
}
return (
err.message.includes('external_poll_sessions') ||
err.message.includes('environments:manage')
)
}
function extractErrorTypeFromData(data: unknown): string | undefined {
if (data && typeof data === 'object') {
if (
'error' in data &&
data.error &&
typeof data.error === 'object' &&
'type' in data.error &&
typeof data.error.type === 'string'
) {
return data.error.type
}
}
return undefined
}

View File

@@ -0,0 +1,48 @@
/**
* Shared bridge auth/URL resolution. Consolidates the ant-only
* CLAUDE_BRIDGE_* dev overrides that were previously copy-pasted across
* a dozen files — inboundAttachments, BriefTool/upload, bridgeMain,
* initReplBridge, remoteBridgeCore, daemon workers, /rename,
* /remote-control.
*
* Two layers: *Override() returns the ant-only env var (or undefined);
* the non-Override versions fall through to the real OAuth store/config.
* Callers that compose with a different auth source (e.g. daemon workers
* using IPC auth) use the Override getters directly.
*/
import { getOauthConfig } from '../constants/oauth.js'
import { getClaudeAIOAuthTokens } from '../utils/auth.js'
/** Ant-only dev override: CLAUDE_BRIDGE_OAUTH_TOKEN, else undefined. */
export function getBridgeTokenOverride(): string | undefined {
return (
(process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_OAUTH_TOKEN) ||
undefined
)
}
/** Ant-only dev override: CLAUDE_BRIDGE_BASE_URL, else undefined. */
export function getBridgeBaseUrlOverride(): string | undefined {
return (
(process.env.USER_TYPE === 'ant' && process.env.CLAUDE_BRIDGE_BASE_URL) ||
undefined
)
}
/**
* Access token for bridge API calls: dev override first, then the OAuth
* keychain. Undefined means "not logged in".
*/
export function getBridgeAccessToken(): string | undefined {
return getBridgeTokenOverride() ?? getClaudeAIOAuthTokens()?.accessToken
}
/**
* Base URL for bridge API calls: dev override first, then the production
* OAuth config. Always returns a URL.
*/
export function getBridgeBaseUrl(): string {
return getBridgeBaseUrlOverride() ?? getOauthConfig().BASE_API_URL
}

135
src/bridge/bridgeDebug.ts Normal file
View File

@@ -0,0 +1,135 @@
import { logForDebugging } from '../utils/debug.js'
import { BridgeFatalError } from './bridgeApi.js'
import type { BridgeApiClient } from './types.js'
/**
* Ant-only fault injection for manually testing bridge recovery paths.
*
* Real failure modes this targets (BQ 2026-03-12, 7-day window):
* poll 404 not_found_error — 147K sessions/week, dead onEnvironmentLost gate
* ws_closed 1002/1006 — 22K sessions/week, zombie poll after close
* register transient failure — residual: network blips during doReconnect
*
* Usage: /bridge-kick <subcommand> from the REPL while Remote Control is
* connected, then tail debug.log to watch the recovery machinery react.
*
* Module-level state is intentional here: one bridge per REPL process, the
* /bridge-kick slash command has no other way to reach into initBridgeCore's
* closures, and teardown clears the slot.
*/
/** One-shot fault to inject on the next matching api call. */
type BridgeFault = {
method:
| 'pollForWork'
| 'registerBridgeEnvironment'
| 'reconnectSession'
| 'heartbeatWork'
/** Fatal errors go through handleErrorStatus → BridgeFatalError. Transient
* errors surface as plain axios rejections (5xx / network). Recovery code
* distinguishes the two: fatal → teardown, transient → retry/backoff. */
kind: 'fatal' | 'transient'
status: number
errorType?: string
/** Remaining injections. Decremented on consume; removed at 0. */
count: number
}
export type BridgeDebugHandle = {
/** Invoke the transport's permanent-close handler directly. Tests the
* ws_closed → reconnectEnvironmentWithSession escalation (#22148). */
fireClose: (code: number) => void
/** Call reconnectEnvironmentWithSession() — same as SIGUSR2 but
* reachable from the slash command. */
forceReconnect: () => void
/** Queue a fault for the next N calls to the named api method. */
injectFault: (fault: BridgeFault) => void
/** Abort the at-capacity sleep so an injected poll fault lands
* immediately instead of up to 10min later. */
wakePollLoop: () => void
/** env/session IDs for the debug.log grep. */
describe: () => string
}
let debugHandle: BridgeDebugHandle | null = null
const faultQueue: BridgeFault[] = []
export function registerBridgeDebugHandle(h: BridgeDebugHandle): void {
debugHandle = h
}
export function clearBridgeDebugHandle(): void {
debugHandle = null
faultQueue.length = 0
}
export function getBridgeDebugHandle(): BridgeDebugHandle | null {
return debugHandle
}
export function injectBridgeFault(fault: BridgeFault): void {
faultQueue.push(fault)
logForDebugging(
`[bridge:debug] Queued fault: ${fault.method} ${fault.kind}/${fault.status}${fault.errorType ? `/${fault.errorType}` : ''} ×${fault.count}`,
)
}
/**
* Wrap a BridgeApiClient so each call first checks the fault queue. If a
* matching fault is queued, throw the specified error instead of calling
* through. Delegates everything else to the real client.
*
* Only called when USER_TYPE === 'ant' — zero overhead in external builds.
*/
export function wrapApiForFaultInjection(
api: BridgeApiClient,
): BridgeApiClient {
function consume(method: BridgeFault['method']): BridgeFault | null {
const idx = faultQueue.findIndex(f => f.method === method)
if (idx === -1) return null
const fault = faultQueue[idx]!
fault.count--
if (fault.count <= 0) faultQueue.splice(idx, 1)
return fault
}
function throwFault(fault: BridgeFault, context: string): never {
logForDebugging(
`[bridge:debug] Injecting ${fault.kind} fault into ${context}: status=${fault.status} errorType=${fault.errorType ?? 'none'}`,
)
if (fault.kind === 'fatal') {
throw new BridgeFatalError(
`[injected] ${context} ${fault.status}`,
fault.status,
fault.errorType,
)
}
// Transient: mimic an axios rejection (5xx / network). No .status on
// the error itself — that's how the catch blocks distinguish.
throw new Error(`[injected transient] ${context} ${fault.status}`)
}
return {
...api,
async pollForWork(envId, secret, signal, reclaimMs) {
const f = consume('pollForWork')
if (f) throwFault(f, 'Poll')
return api.pollForWork(envId, secret, signal, reclaimMs)
},
async registerBridgeEnvironment(config) {
const f = consume('registerBridgeEnvironment')
if (f) throwFault(f, 'Registration')
return api.registerBridgeEnvironment(config)
},
async reconnectSession(envId, sessionId) {
const f = consume('reconnectSession')
if (f) throwFault(f, 'ReconnectSession')
return api.reconnectSession(envId, sessionId)
},
async heartbeatWork(envId, workId, token) {
const f = consume('heartbeatWork')
if (f) throwFault(f, 'Heartbeat')
return api.heartbeatWork(envId, workId, token)
},
}
}

202
src/bridge/bridgeEnabled.ts Normal file
View File

@@ -0,0 +1,202 @@
import { feature } from 'bun:bundle'
import {
checkGate_CACHED_OR_BLOCKING,
getDynamicConfig_CACHED_MAY_BE_STALE,
getFeatureValue_CACHED_MAY_BE_STALE,
} from '../services/analytics/growthbook.js'
// Namespace import breaks the bridgeEnabled → auth → config → bridgeEnabled
// cycle — authModule.foo is a live binding, so by the time the helpers below
// call it, auth.js is fully loaded. Previously used require() for the same
// deferral, but require() hits a CJS cache that diverges from the ESM
// namespace after mock.module() (daemon/auth.test.ts), breaking spyOn.
import * as authModule from '../utils/auth.js'
import { isEnvTruthy } from '../utils/envUtils.js'
import { lt } from '../utils/semver.js'
/**
* Runtime check for bridge mode entitlement.
*
* Remote Control requires a claude.ai subscription (the bridge auths to CCR
* with the claude.ai OAuth token). isClaudeAISubscriber() excludes
* Bedrock/Vertex/Foundry, apiKeyHelper/gateway deployments, env-var API keys,
* and Console API logins — none of which have the OAuth token CCR needs.
* See github.com/deshaw/anthropic-issues/issues/24.
*
* The `feature('BRIDGE_MODE')` guard ensures the GrowthBook string literal
* is only referenced when bridge mode is enabled at build time.
*/
export function isBridgeEnabled(): boolean {
// Positive ternary pattern — see docs/feature-gating.md.
// Negative pattern (if (!feature(...)) return) does not eliminate
// inline string literals from external builds.
return feature('BRIDGE_MODE')
? isClaudeAISubscriber() &&
getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_bridge', false)
: false
}
/**
* Blocking entitlement check for Remote Control.
*
* Returns cached `true` immediately (fast path). If the disk cache says
* `false` or is missing, awaits GrowthBook init and fetches the fresh
* server value (slow path, max ~5s), then writes it to disk.
*
* Use at entitlement gates where a stale `false` would unfairly block access.
* For user-facing error paths, prefer `getBridgeDisabledReason()` which gives
* a specific diagnostic. For render-body UI visibility checks, use
* `isBridgeEnabled()` instead.
*/
export async function isBridgeEnabledBlocking(): Promise<boolean> {
return feature('BRIDGE_MODE')
? isClaudeAISubscriber() &&
(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))
: false
}
/**
* Diagnostic message for why Remote Control is unavailable, or null if
* it's enabled. Call this instead of a bare `isBridgeEnabledBlocking()`
* check when you need to show the user an actionable error.
*
* The GrowthBook gate targets on organizationUUID, which comes from
* config.oauthAccount — populated by /api/oauth/profile during login.
* That endpoint requires the user:profile scope. Tokens without it
* (setup-token, CLAUDE_CODE_OAUTH_TOKEN env var, or pre-scope-expansion
* logins) leave oauthAccount unpopulated, so the gate falls back to
* false and users see a dead-end "not enabled" message with no hint
* that re-login would fix it. See CC-1165 / gh-33105.
*/
export async function getBridgeDisabledReason(): Promise<string | null> {
if (feature('BRIDGE_MODE')) {
if (!isClaudeAISubscriber()) {
return 'Remote Control requires a claude.ai subscription. Run `claude auth login` to sign in with your claude.ai account.'
}
if (!hasProfileScope()) {
return 'Remote Control requires a full-scope login token. Long-lived tokens (from `claude setup-token` or CLAUDE_CODE_OAUTH_TOKEN) are limited to inference-only for security reasons. Run `claude auth login` to use Remote Control.'
}
if (!getOauthAccountInfo()?.organizationUuid) {
return 'Unable to determine your organization for Remote Control eligibility. Run `claude auth login` to refresh your account information.'
}
if (!(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))) {
return 'Remote Control is not yet enabled for your account.'
}
return null
}
return 'Remote Control is not available in this build.'
}
// try/catch: main.tsx:5698 calls isBridgeEnabled() while defining the Commander
// program, before enableConfigs() runs. isClaudeAISubscriber() → getGlobalConfig()
// throws "Config accessed before allowed" there. Pre-config, no OAuth token can
// exist anyway — false is correct. Same swallow getFeatureValue_CACHED_MAY_BE_STALE
// already does at growthbook.ts:775-780.
function isClaudeAISubscriber(): boolean {
try {
return authModule.isClaudeAISubscriber()
} catch {
return false
}
}
function hasProfileScope(): boolean {
try {
return authModule.hasProfileScope()
} catch {
return false
}
}
function getOauthAccountInfo(): ReturnType<
typeof authModule.getOauthAccountInfo
> {
try {
return authModule.getOauthAccountInfo()
} catch {
return undefined
}
}
/**
* Runtime check for the env-less (v2) REPL bridge path.
* Returns true when the GrowthBook flag `tengu_bridge_repl_v2` is enabled.
*
* This gates which implementation initReplBridge uses — NOT whether bridge
* is available at all (see isBridgeEnabled above). Daemon/print paths stay
* on the env-based implementation regardless of this gate.
*/
export function isEnvLessBridgeEnabled(): boolean {
return feature('BRIDGE_MODE')
? getFeatureValue_CACHED_MAY_BE_STALE('tengu_bridge_repl_v2', false)
: false
}
/**
* Kill-switch for the `cse_*` → `session_*` client-side retag shim.
*
* The shim exists because compat/convert.go:27 validates TagSession and the
* claude.ai frontend routes on `session_*`, while v2 worker endpoints hand out
* `cse_*`. Once the server tags by environment_kind and the frontend accepts
* `cse_*` directly, flip this to false to make toCompatSessionId a no-op.
* Defaults to true — the shim stays active until explicitly disabled.
*/
export function isCseShimEnabled(): boolean {
return feature('BRIDGE_MODE')
? getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_bridge_repl_v2_cse_shim_enabled',
true,
)
: true
}
/**
* Returns an error message if the current CLI version is below the
* minimum required for the v1 (env-based) Remote Control path, or null if the
* version is fine. The v2 (env-less) path uses checkEnvLessBridgeMinVersion()
* in envLessBridgeConfig.ts instead — the two implementations have independent
* version floors.
*
* Uses cached (non-blocking) GrowthBook config. If GrowthBook hasn't
* loaded yet, the default '0.0.0' means the check passes — a safe fallback.
*/
export function checkBridgeMinVersion(): string | null {
// Positive pattern — see docs/feature-gating.md.
// Negative pattern (if (!feature(...)) return) does not eliminate
// inline string literals from external builds.
if (feature('BRIDGE_MODE')) {
const config = getDynamicConfig_CACHED_MAY_BE_STALE<{
minVersion: string
}>('tengu_bridge_min_version', { minVersion: '0.0.0' })
if (config.minVersion && lt(MACRO.VERSION, config.minVersion)) {
return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${config.minVersion} or higher is required. Run \`claude update\` to update.`
}
}
return null
}
/**
* Default for remoteControlAtStartup when the user hasn't explicitly set it.
* When the CCR_AUTO_CONNECT build flag is present (ant-only) and the
* tengu_cobalt_harbor GrowthBook gate is on, all sessions connect to CCR by
* default — the user can still opt out by setting remoteControlAtStartup=false
* in config (explicit settings always win over this default).
*
* Defined here rather than in config.ts to avoid a direct
* config.ts → growthbook.ts import cycle (growthbook.ts → user.ts → config.ts).
*/
export function getCcrAutoConnectDefault(): boolean {
return feature('CCR_AUTO_CONNECT')
? getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_harbor', false)
: false
}
/**
* Opt-in CCR mirror mode — every local session spawns an outbound-only
* Remote Control session that receives forwarded events. Separate from
* getCcrAutoConnectDefault (bidirectional Remote Control). Env var wins for
* local opt-in; GrowthBook controls rollout.
*/
export function isCcrMirrorEnabled(): boolean {
return feature('CCR_MIRROR')
? isEnvTruthy(process.env.CLAUDE_CODE_CCR_MIRROR) ||
getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_mirror', false)
: false
}

2999
src/bridge/bridgeMain.ts Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,461 @@
/**
* Shared transport-layer helpers for bridge message handling.
*
* Extracted from replBridge.ts so both the env-based core (initBridgeCore)
* and the env-less core (initEnvLessBridgeCore) can use the same ingress
* parsing, control-request handling, and echo-dedup machinery.
*
* Everything here is pure — no closure over bridge-specific state. All
* collaborators (transport, sessionId, UUID sets, callbacks) are passed
* as params.
*/
import { randomUUID } from 'crypto'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type {
SDKControlRequest,
SDKControlResponse,
} from '../entrypoints/sdk/controlTypes.js'
import type { SDKResultSuccess } from '../entrypoints/sdk/coreTypes.js'
import { logEvent } from '../services/analytics/index.js'
import { EMPTY_USAGE } from '../services/api/emptyUsage.js'
import type { Message } from '../types/message.js'
import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
import { logForDebugging } from '../utils/debug.js'
import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
import { errorMessage } from '../utils/errors.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import { jsonParse } from '../utils/slowOperations.js'
import type { ReplBridgeTransport } from './replBridgeTransport.js'
// ─── Type guards ─────────────────────────────────────────────────────────────
/** Type predicate for parsed WebSocket messages. SDKMessage is a
* discriminated union on `type` — validating the discriminant is
* sufficient for the predicate; callers narrow further via the union. */
export function isSDKMessage(value: unknown): value is SDKMessage {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
typeof value.type === 'string'
)
}
/** Type predicate for control_response messages from the server. */
export function isSDKControlResponse(
value: unknown,
): value is SDKControlResponse {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
value.type === 'control_response' &&
'response' in value
)
}
/** Type predicate for control_request messages from the server. */
export function isSDKControlRequest(
value: unknown,
): value is SDKControlRequest {
return (
value !== null &&
typeof value === 'object' &&
'type' in value &&
value.type === 'control_request' &&
'request_id' in value &&
'request' in value
)
}
/**
* True for message types that should be forwarded to the bridge transport.
* The server only wants user/assistant turns and slash-command system events;
* everything else (tool_result, progress, etc.) is internal REPL chatter.
*/
export function isEligibleBridgeMessage(m: Message): boolean {
// Virtual messages (REPL inner calls) are display-only — bridge/SDK
// consumers see the REPL tool_use/result which summarizes the work.
if ((m.type === 'user' || m.type === 'assistant') && m.isVirtual) {
return false
}
return (
m.type === 'user' ||
m.type === 'assistant' ||
(m.type === 'system' && m.subtype === 'local_command')
)
}
/**
* Extract title-worthy text from a Message for onUserMessage. Returns
* undefined for messages that shouldn't title the session: non-user, meta
* (nudges), tool results, compact summaries, non-human origins (task
* notifications, channel messages), or pure display-tag content
* (<ide_opened_file>, <session-start-hook>, etc.).
*
* Synthetic interrupts ([Request interrupted by user]) are NOT filtered here —
* isSyntheticMessage lives in messages.ts (heavy import, pulls command
* registry). The initialMessages path in initReplBridge checks it; the
* writeMessages path reaching an interrupt as the *first* message is
* implausible (an interrupt implies a prior prompt already flowed through).
*/
export function extractTitleText(m: Message): string | undefined {
if (m.type !== 'user' || m.isMeta || m.toolUseResult || m.isCompactSummary)
return undefined
if (m.origin && m.origin.kind !== 'human') return undefined
const content = m.message.content
let raw: string | undefined
if (typeof content === 'string') {
raw = content
} else {
for (const block of content) {
if (block.type === 'text') {
raw = block.text
break
}
}
}
if (!raw) return undefined
const clean = stripDisplayTagsAllowEmpty(raw)
return clean || undefined
}
// ─── Ingress routing ─────────────────────────────────────────────────────────
/**
* Parse an ingress WebSocket message and route it to the appropriate handler.
* Ignores messages whose UUID is in recentPostedUUIDs (echoes of what we sent)
* or in recentInboundUUIDs (re-deliveries we've already forwarded — e.g.
* server replayed history after a transport swap lost the seq-num cursor).
*/
export function handleIngressMessage(
data: string,
recentPostedUUIDs: BoundedUUIDSet,
recentInboundUUIDs: BoundedUUIDSet,
onInboundMessage: ((msg: SDKMessage) => void | Promise<void>) | undefined,
onPermissionResponse?: ((response: SDKControlResponse) => void) | undefined,
onControlRequest?: ((request: SDKControlRequest) => void) | undefined,
): void {
try {
const parsed: unknown = normalizeControlMessageKeys(jsonParse(data))
// control_response is not an SDKMessage — check before the type guard
if (isSDKControlResponse(parsed)) {
logForDebugging('[bridge:repl] Ingress message type=control_response')
onPermissionResponse?.(parsed)
return
}
// control_request from the server (initialize, set_model, can_use_tool).
// Must respond promptly or the server kills the WS (~10-14s timeout).
if (isSDKControlRequest(parsed)) {
logForDebugging(
`[bridge:repl] Inbound control_request subtype=${parsed.request.subtype}`,
)
onControlRequest?.(parsed)
return
}
if (!isSDKMessage(parsed)) return
// Check for UUID to detect echoes of our own messages
const uuid =
'uuid' in parsed && typeof parsed.uuid === 'string'
? parsed.uuid
: undefined
if (uuid && recentPostedUUIDs.has(uuid)) {
logForDebugging(
`[bridge:repl] Ignoring echo: type=${parsed.type} uuid=${uuid}`,
)
return
}
// Defensive dedup: drop inbound prompts we've already forwarded. The
// SSE seq-num carryover (lastTransportSequenceNum) is the primary fix
// for history-replay; this catches edge cases where that negotiation
// fails (server ignores from_sequence_num, transport died before
// receiving any frames, etc).
if (uuid && recentInboundUUIDs.has(uuid)) {
logForDebugging(
`[bridge:repl] Ignoring re-delivered inbound: type=${parsed.type} uuid=${uuid}`,
)
return
}
logForDebugging(
`[bridge:repl] Ingress message type=${parsed.type}${uuid ? ` uuid=${uuid}` : ''}`,
)
if (parsed.type === 'user') {
if (uuid) recentInboundUUIDs.add(uuid)
logEvent('tengu_bridge_message_received', {
is_repl: true,
})
// Fire-and-forget — handler may be async (attachment resolution).
void onInboundMessage?.(parsed)
} else {
logForDebugging(
`[bridge:repl] Ignoring non-user inbound message: type=${parsed.type}`,
)
}
} catch (err) {
logForDebugging(
`[bridge:repl] Failed to parse ingress message: ${errorMessage(err)}`,
)
}
}
// ─── Server-initiated control requests ───────────────────────────────────────
export type ServerControlRequestHandlers = {
transport: ReplBridgeTransport | null
sessionId: string
/**
* When true, all mutable requests (interrupt, set_model, set_permission_mode,
* set_max_thinking_tokens) reply with an error instead of false-success.
* initialize still replies success — the server kills the connection otherwise.
* Used by the outbound-only bridge mode and the SDK's /bridge subpath so claude.ai sees a
* proper error instead of "action succeeded but nothing happened locally".
*/
outboundOnly?: boolean
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
}
const OUTBOUND_ONLY_ERROR =
'This session is outbound-only. Enable Remote Control locally to allow inbound control.'
/**
* Respond to inbound control_request messages from the server. The server
* sends these for session lifecycle events (initialize, set_model) and
* for turn-level coordination (interrupt, set_max_thinking_tokens). If we
* don't respond, the server hangs and kills the WS after ~10-14s.
*
* Previously a closure inside initBridgeCore's onWorkReceived; now takes
* collaborators as params so both cores can use it.
*/
export function handleServerControlRequest(
request: SDKControlRequest,
handlers: ServerControlRequestHandlers,
): void {
const {
transport,
sessionId,
outboundOnly,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
} = handlers
if (!transport) {
logForDebugging(
'[bridge:repl] Cannot respond to control_request: transport not configured',
)
return
}
let response: SDKControlResponse
// Outbound-only: reply error for mutable requests so claude.ai doesn't show
// false success. initialize must still succeed (server kills the connection
// if it doesn't — see comment above).
if (outboundOnly && request.request.subtype !== 'initialize') {
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: OUTBOUND_ONLY_ERROR,
},
}
const event = { ...response, session_id: sessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Rejected ${request.request.subtype} (outbound-only) request_id=${request.request_id}`,
)
return
}
switch (request.request.subtype) {
case 'initialize':
// Respond with minimal capabilities — the REPL handles
// commands, models, and account info itself.
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
response: {
commands: [],
output_style: 'normal',
available_output_styles: ['normal'],
models: [],
account: {},
pid: process.pid,
},
},
}
break
case 'set_model':
onSetModel?.(request.request.model)
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
case 'set_max_thinking_tokens':
onSetMaxThinkingTokens?.(request.request.max_thinking_tokens)
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
case 'set_permission_mode': {
// The callback returns a policy verdict so we can send an error
// control_response without importing isAutoModeGateEnabled /
// isBypassPermissionsModeDisabled here (bootstrap-isolation). If no
// callback is registered (daemon context, which doesn't wire this —
// see daemonBridge.ts), return an error verdict rather than a silent
// false-success: the mode is never actually applied in that context,
// so success would lie to the client.
const verdict = onSetPermissionMode?.(request.request.mode) ?? {
ok: false,
error:
'set_permission_mode is not supported in this context (onSetPermissionMode callback not registered)',
}
if (verdict.ok) {
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
} else {
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: verdict.error,
},
}
}
break
}
case 'interrupt':
onInterrupt?.()
response = {
type: 'control_response',
response: {
subtype: 'success',
request_id: request.request_id,
},
}
break
default:
// Unknown subtype — respond with error so the server doesn't
// hang waiting for a reply that never comes.
response = {
type: 'control_response',
response: {
subtype: 'error',
request_id: request.request_id,
error: `REPL bridge does not handle control_request subtype: ${request.request.subtype}`,
},
}
}
const event = { ...response, session_id: sessionId }
void transport.write(event)
logForDebugging(
`[bridge:repl] Sent control_response for ${request.request.subtype} request_id=${request.request_id} result=${response.response.subtype}`,
)
}
// ─── Result message (for session archival on teardown) ───────────────────────
/**
* Build a minimal `SDKResultSuccess` message for session archival.
* The server needs this event before a WS close to trigger archival.
*/
export function makeResultMessage(sessionId: string): SDKResultSuccess {
return {
type: 'result',
subtype: 'success',
duration_ms: 0,
duration_api_ms: 0,
is_error: false,
num_turns: 0,
result: '',
stop_reason: null,
total_cost_usd: 0,
usage: { ...EMPTY_USAGE },
modelUsage: {},
permission_denials: [],
session_id: sessionId,
uuid: randomUUID(),
}
}
// ─── BoundedUUIDSet (echo-dedup ring buffer) ─────────────────────────────────
/**
* FIFO-bounded set backed by a circular buffer. Evicts the oldest entry
* when capacity is reached, keeping memory usage constant at O(capacity).
*
* Messages are added in chronological order, so evicted entries are always
* the oldest. The caller relies on external ordering (the hook's
* lastWrittenIndexRef) as the primary dedup — this set is a secondary
* safety net for echo filtering and race-condition dedup.
*/
export class BoundedUUIDSet {
private readonly capacity: number
private readonly ring: (string | undefined)[]
private readonly set = new Set<string>()
private writeIdx = 0
constructor(capacity: number) {
this.capacity = capacity
this.ring = new Array<string | undefined>(capacity)
}
add(uuid: string): void {
if (this.set.has(uuid)) return
// Evict the entry at the current write position (if occupied)
const evicted = this.ring[this.writeIdx]
if (evicted !== undefined) {
this.set.delete(evicted)
}
this.ring[this.writeIdx] = uuid
this.set.add(uuid)
this.writeIdx = (this.writeIdx + 1) % this.capacity
}
has(uuid: string): boolean {
return this.set.has(uuid)
}
clear(): void {
this.set.clear()
this.ring.fill(undefined)
this.writeIdx = 0
}
}

View File

@@ -0,0 +1,43 @@
import type { PermissionUpdate } from '../utils/permissions/PermissionUpdateSchema.js'
type BridgePermissionResponse = {
behavior: 'allow' | 'deny'
updatedInput?: Record<string, unknown>
updatedPermissions?: PermissionUpdate[]
message?: string
}
type BridgePermissionCallbacks = {
sendRequest(
requestId: string,
toolName: string,
input: Record<string, unknown>,
toolUseId: string,
description: string,
permissionSuggestions?: PermissionUpdate[],
blockedPath?: string,
): void
sendResponse(requestId: string, response: BridgePermissionResponse): void
/** Cancel a pending control_request so the web app can dismiss its prompt. */
cancelRequest(requestId: string): void
onResponse(
requestId: string,
handler: (response: BridgePermissionResponse) => void,
): () => void // returns unsubscribe
}
/** Type predicate for validating a parsed control_response payload
* as a BridgePermissionResponse. Checks the required `behavior`
* discriminant rather than using an unsafe `as` cast. */
function isBridgePermissionResponse(
value: unknown,
): value is BridgePermissionResponse {
if (!value || typeof value !== 'object') return false
return (
'behavior' in value &&
(value.behavior === 'allow' || value.behavior === 'deny')
)
}
export { isBridgePermissionResponse }
export type { BridgePermissionCallbacks, BridgePermissionResponse }

210
src/bridge/bridgePointer.ts Normal file
View File

@@ -0,0 +1,210 @@
import { mkdir, readFile, stat, unlink, writeFile } from 'fs/promises'
import { dirname, join } from 'path'
import { z } from 'zod/v4'
import { logForDebugging } from '../utils/debug.js'
import { isENOENT } from '../utils/errors.js'
import { getWorktreePathsPortable } from '../utils/getWorktreePathsPortable.js'
import { lazySchema } from '../utils/lazySchema.js'
import {
getProjectsDir,
sanitizePath,
} from '../utils/sessionStoragePortable.js'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
/**
* Upper bound on worktree fanout. git worktree list is naturally bounded
* (50 is a LOT), but this caps the parallel stat() burst and guards against
* pathological setups. Above this, --continue falls back to current-dir-only.
*/
const MAX_WORKTREE_FANOUT = 50
/**
* Crash-recovery pointer for Remote Control sessions.
*
* Written immediately after a bridge session is created, periodically
* refreshed during the session, and cleared on clean shutdown. If the
* process dies unclean (crash, kill -9, terminal closed), the pointer
* persists. On next startup, `claude remote-control` detects it and offers
* to resume via the --session-id flow from #20460.
*
* Staleness is checked against the file's mtime (not an embedded timestamp)
* so that a periodic re-write with the same content serves as a refresh —
* matches the backend's rolling BRIDGE_LAST_POLL_TTL (4h) semantics. A
* bridge that's been polling for 5+ hours and then crashes still has a
* fresh pointer as long as the refresh ran within the window.
*
* Scoped per working directory (alongside transcript JSONL files) so two
* concurrent bridges in different repos don't clobber each other.
*/
export const BRIDGE_POINTER_TTL_MS = 4 * 60 * 60 * 1000
const BridgePointerSchema = lazySchema(() =>
z.object({
sessionId: z.string(),
environmentId: z.string(),
source: z.enum(['standalone', 'repl']),
}),
)
export type BridgePointer = z.infer<ReturnType<typeof BridgePointerSchema>>
export function getBridgePointerPath(dir: string): string {
return join(getProjectsDir(), sanitizePath(dir), 'bridge-pointer.json')
}
/**
* Write the pointer. Also used to refresh mtime during long sessions —
* calling with the same IDs is a cheap no-content-change write that bumps
* the staleness clock. Best-effort — a crash-recovery file must never
* itself cause a crash. Logs and swallows on error.
*/
export async function writeBridgePointer(
dir: string,
pointer: BridgePointer,
): Promise<void> {
const path = getBridgePointerPath(dir)
try {
await mkdir(dirname(path), { recursive: true })
await writeFile(path, jsonStringify(pointer), 'utf8')
logForDebugging(`[bridge:pointer] wrote ${path}`)
} catch (err: unknown) {
logForDebugging(`[bridge:pointer] write failed: ${err}`, { level: 'warn' })
}
}
/**
* Read the pointer and its age (ms since last write). Operates directly
* and handles errors — no existence check (CLAUDE.md TOCTOU rule). Returns
* null on any failure: missing file, corrupted JSON, schema mismatch, or
* stale (mtime > 4h ago). Stale/invalid pointers are deleted so they don't
* keep re-prompting after the backend has already GC'd the env.
*/
export async function readBridgePointer(
dir: string,
): Promise<(BridgePointer & { ageMs: number }) | null> {
const path = getBridgePointerPath(dir)
let raw: string
let mtimeMs: number
try {
// stat for mtime (staleness anchor), then read. Two syscalls, but both
// are needed — mtime IS the data we return, not a TOCTOU guard.
mtimeMs = (await stat(path)).mtimeMs
raw = await readFile(path, 'utf8')
} catch {
return null
}
const parsed = BridgePointerSchema().safeParse(safeJsonParse(raw))
if (!parsed.success) {
logForDebugging(`[bridge:pointer] invalid schema, clearing: ${path}`)
await clearBridgePointer(dir)
return null
}
const ageMs = Math.max(0, Date.now() - mtimeMs)
if (ageMs > BRIDGE_POINTER_TTL_MS) {
logForDebugging(`[bridge:pointer] stale (>4h mtime), clearing: ${path}`)
await clearBridgePointer(dir)
return null
}
return { ...parsed.data, ageMs }
}
/**
* Worktree-aware read for `--continue`. The REPL bridge writes its pointer
* to `getOriginalCwd()` which EnterWorktreeTool/activeWorktreeSession can
* mutate to a worktree path — but `claude remote-control --continue` runs
* with `resolve('.')` = shell CWD. This fans out across git worktree
* siblings to find the freshest pointer, matching /resume's semantics.
*
* Fast path: checks `dir` first. Only shells out to `git worktree list` if
* that misses — the common case (pointer in launch dir) is one stat, zero
* exec. Fanout reads run in parallel; capped at MAX_WORKTREE_FANOUT.
*
* Returns the pointer AND the dir it was found in, so the caller can clear
* the right file on resume failure.
*/
export async function readBridgePointerAcrossWorktrees(
dir: string,
): Promise<{ pointer: BridgePointer & { ageMs: number }; dir: string } | null> {
// Fast path: current dir. Covers standalone bridge (always matches) and
// REPL bridge when no worktree mutation happened.
const here = await readBridgePointer(dir)
if (here) {
return { pointer: here, dir }
}
// Fanout: scan worktree siblings. getWorktreePathsPortable has a 5s
// timeout and returns [] on any error (not a git repo, git not installed).
const worktrees = await getWorktreePathsPortable(dir)
if (worktrees.length <= 1) return null
if (worktrees.length > MAX_WORKTREE_FANOUT) {
logForDebugging(
`[bridge:pointer] ${worktrees.length} worktrees exceeds fanout cap ${MAX_WORKTREE_FANOUT}, skipping`,
)
return null
}
// Dedupe against `dir` so we don't re-stat it. sanitizePath normalizes
// case/separators so worktree-list output matches our fast-path key even
// on Windows where git may emit C:/ vs stored c:/.
const dirKey = sanitizePath(dir)
const candidates = worktrees.filter(wt => sanitizePath(wt) !== dirKey)
// Parallel stat+read. Each readBridgePointer is a stat() that ENOENTs
// for worktrees with no pointer (cheap) plus a ~100-byte read for the
// rare ones that have one. Promise.all → latency ≈ slowest single stat.
const results = await Promise.all(
candidates.map(async wt => {
const p = await readBridgePointer(wt)
return p ? { pointer: p, dir: wt } : null
}),
)
// Pick freshest (lowest ageMs). The pointer stores environmentId so
// resume reconnects to the right env regardless of which worktree
// --continue was invoked from.
let freshest: {
pointer: BridgePointer & { ageMs: number }
dir: string
} | null = null
for (const r of results) {
if (r && (!freshest || r.pointer.ageMs < freshest.pointer.ageMs)) {
freshest = r
}
}
if (freshest) {
logForDebugging(
`[bridge:pointer] fanout found pointer in worktree ${freshest.dir} (ageMs=${freshest.pointer.ageMs})`,
)
}
return freshest
}
/**
* Delete the pointer. Idempotent — ENOENT is expected when the process
* shut down clean previously.
*/
export async function clearBridgePointer(dir: string): Promise<void> {
const path = getBridgePointerPath(dir)
try {
await unlink(path)
logForDebugging(`[bridge:pointer] cleared ${path}`)
} catch (err: unknown) {
if (!isENOENT(err)) {
logForDebugging(`[bridge:pointer] clear failed: ${err}`, {
level: 'warn',
})
}
}
}
function safeJsonParse(raw: string): unknown {
try {
return jsonParse(raw)
} catch {
return null
}
}

View File

@@ -0,0 +1,163 @@
import {
getClaudeAiBaseUrl,
getRemoteSessionUrl,
} from '../constants/product.js'
import { stringWidth } from '../ink/stringWidth.js'
import { formatDuration, truncateToWidth } from '../utils/format.js'
import { getGraphemeSegmenter } from '../utils/intl.js'
/** Bridge status state machine states. */
export type StatusState =
| 'idle'
| 'attached'
| 'titled'
| 'reconnecting'
| 'failed'
/** How long a tool activity line stays visible after last tool_start (ms). */
export const TOOL_DISPLAY_EXPIRY_MS = 30_000
/** Interval for the shimmer animation tick (ms). */
export const SHIMMER_INTERVAL_MS = 150
export function timestamp(): string {
const now = new Date()
const h = String(now.getHours()).padStart(2, '0')
const m = String(now.getMinutes()).padStart(2, '0')
const s = String(now.getSeconds()).padStart(2, '0')
return `${h}:${m}:${s}`
}
export { formatDuration, truncateToWidth as truncatePrompt }
/** Abbreviate a tool activity summary for the trail display. */
export function abbreviateActivity(summary: string): string {
return truncateToWidth(summary, 30)
}
/** Build the connect URL shown when the bridge is idle. */
export function buildBridgeConnectUrl(
environmentId: string,
ingressUrl?: string,
): string {
const baseUrl = getClaudeAiBaseUrl(undefined, ingressUrl)
return `${baseUrl}/code?bridge=${environmentId}`
}
/**
* Build the session URL shown when a session is attached. Delegates to
* getRemoteSessionUrl for the cse_→session_ prefix translation, then appends
* the v1-specific ?bridge={environmentId} query.
*/
export function buildBridgeSessionUrl(
sessionId: string,
environmentId: string,
ingressUrl?: string,
): string {
return `${getRemoteSessionUrl(sessionId, ingressUrl)}?bridge=${environmentId}`
}
/** Compute the glimmer index for a reverse-sweep shimmer animation. */
export function computeGlimmerIndex(
tick: number,
messageWidth: number,
): number {
const cycleLength = messageWidth + 20
return messageWidth + 10 - (tick % cycleLength)
}
/**
* Split text into three segments by visual column position for shimmer rendering.
*
* Uses grapheme segmentation and `stringWidth` so the split is correct for
* multi-byte characters, emoji, and CJK glyphs.
*
* Returns `{ before, shimmer, after }` strings. Both renderers (chalk in
* bridgeUI.ts and React/Ink in bridge.tsx) apply their own coloring to
* these segments.
*/
export function computeShimmerSegments(
text: string,
glimmerIndex: number,
): { before: string; shimmer: string; after: string } {
const messageWidth = stringWidth(text)
const shimmerStart = glimmerIndex - 1
const shimmerEnd = glimmerIndex + 1
// When shimmer is offscreen, return all text as "before"
if (shimmerStart >= messageWidth || shimmerEnd < 0) {
return { before: text, shimmer: '', after: '' }
}
// Split into at most 3 segments by visual column position
const clampedStart = Math.max(0, shimmerStart)
let colPos = 0
let before = ''
let shimmer = ''
let after = ''
for (const { segment } of getGraphemeSegmenter().segment(text)) {
const segWidth = stringWidth(segment)
if (colPos + segWidth <= clampedStart) {
before += segment
} else if (colPos > shimmerEnd) {
after += segment
} else {
shimmer += segment
}
colPos += segWidth
}
return { before, shimmer, after }
}
/** Computed bridge status label and color from connection state. */
export type BridgeStatusInfo = {
label:
| 'Remote Control failed'
| 'Remote Control reconnecting'
| 'Remote Control active'
| 'Remote Control connecting\u2026'
color: 'error' | 'warning' | 'success'
}
/** Derive a status label and color from the bridge connection state. */
export function getBridgeStatus({
error,
connected,
sessionActive,
reconnecting,
}: {
error: string | undefined
connected: boolean
sessionActive: boolean
reconnecting: boolean
}): BridgeStatusInfo {
if (error) return { label: 'Remote Control failed', color: 'error' }
if (reconnecting)
return { label: 'Remote Control reconnecting', color: 'warning' }
if (sessionActive || connected)
return { label: 'Remote Control active', color: 'success' }
return { label: 'Remote Control connecting\u2026', color: 'warning' }
}
/** Footer text shown when bridge is idle (Ready state). */
export function buildIdleFooterText(url: string): string {
return `Code everywhere with the Claude app or ${url}`
}
/** Footer text shown when a session is active (Connected state). */
export function buildActiveFooterText(url: string): string {
return `Continue coding in the Claude app or ${url}`
}
/** Footer text shown when the bridge has failed. */
export const FAILED_FOOTER_TEXT = 'Something went wrong, please try again'
/**
* Wrap text in an OSC 8 terminal hyperlink. Zero visual width for layout purposes.
* strip-ansi (used by stringWidth) correctly strips these sequences, so
* countVisualLines in bridgeUI.ts remains accurate.
*/
export function wrapWithOsc8Link(text: string, url: string): string {
return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`
}

530
src/bridge/bridgeUI.ts Normal file
View File

@@ -0,0 +1,530 @@
import chalk from 'chalk'
import { toString as qrToString } from 'qrcode'
import {
BRIDGE_FAILED_INDICATOR,
BRIDGE_READY_INDICATOR,
BRIDGE_SPINNER_FRAMES,
} from '../constants/figures.js'
import { stringWidth } from '../ink/stringWidth.js'
import { logForDebugging } from '../utils/debug.js'
import {
buildActiveFooterText,
buildBridgeConnectUrl,
buildBridgeSessionUrl,
buildIdleFooterText,
FAILED_FOOTER_TEXT,
formatDuration,
type StatusState,
TOOL_DISPLAY_EXPIRY_MS,
timestamp,
truncatePrompt,
wrapWithOsc8Link,
} from './bridgeStatusUtil.js'
import type {
BridgeConfig,
BridgeLogger,
SessionActivity,
SpawnMode,
} from './types.js'
const QR_OPTIONS = {
type: 'utf8' as const,
errorCorrectionLevel: 'L' as const,
small: true,
}
/** Generate a QR code and return its lines. */
async function generateQr(url: string): Promise<string[]> {
const qr = await qrToString(url, QR_OPTIONS)
return qr.split('\n').filter((line: string) => line.length > 0)
}
export function createBridgeLogger(options: {
verbose: boolean
write?: (s: string) => void
}): BridgeLogger {
const write = options.write ?? ((s: string) => process.stdout.write(s))
const verbose = options.verbose
// Track how many status lines are currently displayed at the bottom
let statusLineCount = 0
// Status state machine
let currentState: StatusState = 'idle'
let currentStateText = 'Ready'
let repoName = ''
let branch = ''
let debugLogPath = ''
// Connect URL (built in printBanner with correct base for staging/prod)
let connectUrl = ''
let cachedIngressUrl = ''
let cachedEnvironmentId = ''
let activeSessionUrl: string | null = null
// QR code lines for the current URL
let qrLines: string[] = []
let qrVisible = false
// Tool activity for the second status line
let lastToolSummary: string | null = null
let lastToolTime = 0
// Session count indicator (shown when multi-session mode is enabled)
let sessionActive = 0
let sessionMax = 1
// Spawn mode shown in the session-count line + gates the `w` hint
let spawnModeDisplay: 'same-dir' | 'worktree' | null = null
let spawnMode: SpawnMode = 'single-session'
// Per-session display info for the multi-session bullet list (keyed by compat sessionId)
const sessionDisplayInfo = new Map<
string,
{ title?: string; url: string; activity?: SessionActivity }
>()
// Connecting spinner state
let connectingTimer: ReturnType<typeof setInterval> | null = null
let connectingTick = 0
/**
* Count how many visual terminal rows a string occupies, accounting for
* line wrapping. Each `\n` is one row, and content wider than the terminal
* wraps to additional rows.
*/
function countVisualLines(text: string): number {
// eslint-disable-next-line custom-rules/prefer-use-terminal-size
const cols = process.stdout.columns || 80 // non-React CLI context
let count = 0
// Split on newlines to get logical lines
for (const logical of text.split('\n')) {
if (logical.length === 0) {
// Empty segment between consecutive \n — counts as 1 row
count++
continue
}
const width = stringWidth(logical)
count += Math.max(1, Math.ceil(width / cols))
}
// The trailing \n in "line\n" produces an empty last element — don't count it
// because the cursor sits at the start of the next line, not a new visual row.
if (text.endsWith('\n')) {
count--
}
return count
}
/** Write a status line and track its visual line count. */
function writeStatus(text: string): void {
write(text)
statusLineCount += countVisualLines(text)
}
/** Clear any currently displayed status lines. */
function clearStatusLines(): void {
if (statusLineCount <= 0) return
logForDebugging(`[bridge:ui] clearStatusLines count=${statusLineCount}`)
// Move cursor up to the start of the status block, then erase everything below
write(`\x1b[${statusLineCount}A`) // cursor up N lines
write('\x1b[J') // erase from cursor to end of screen
statusLineCount = 0
}
/** Print a permanent log line, clearing status first and restoring after. */
function printLog(line: string): void {
clearStatusLines()
write(line)
}
/** Regenerate the QR code with the given URL. */
function regenerateQr(url: string): void {
generateQr(url)
.then(lines => {
qrLines = lines
renderStatusLine()
})
.catch(e => {
logForDebugging(`QR code generation failed: ${e}`, { level: 'error' })
})
}
/** Render the connecting spinner line (shown before first updateIdleStatus). */
function renderConnectingLine(): void {
clearStatusLines()
const frame =
BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
if (branch) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
writeStatus(
`${chalk.yellow(frame)} ${chalk.yellow('Connecting')}${suffix}\n`,
)
}
/** Start the connecting spinner. Stopped by first updateIdleStatus(). */
function startConnecting(): void {
stopConnecting()
renderConnectingLine()
connectingTimer = setInterval(() => {
connectingTick++
renderConnectingLine()
}, 150)
}
/** Stop the connecting spinner. */
function stopConnecting(): void {
if (connectingTimer) {
clearInterval(connectingTimer)
connectingTimer = null
}
}
/** Render and write the current status lines based on state. */
function renderStatusLine(): void {
if (currentState === 'reconnecting' || currentState === 'failed') {
// These states are handled separately (updateReconnectingStatus /
// updateFailedStatus). Return before clearing so callers like toggleQr
// and setSpawnModeDisplay don't blank the display during these states.
return
}
clearStatusLines()
const isIdle = currentState === 'idle'
// QR code above the status line
if (qrVisible) {
for (const line of qrLines) {
writeStatus(`${chalk.dim(line)}\n`)
}
}
// Determine indicator and colors based on state
const indicator = BRIDGE_READY_INDICATOR
const indicatorColor = isIdle ? chalk.green : chalk.cyan
const baseColor = isIdle ? chalk.green : chalk.cyan
const stateText = baseColor(currentStateText)
// Build the suffix with repo and branch
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
// In worktree mode each session gets its own branch, so showing the
// bridge's branch would be misleading.
if (branch && spawnMode !== 'worktree') {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
if (process.env.USER_TYPE === 'ant' && debugLogPath) {
writeStatus(
`${chalk.yellow('[ANT-ONLY] Logs:')} ${chalk.dim(debugLogPath)}\n`,
)
}
writeStatus(`${indicatorColor(indicator)} ${stateText}${suffix}\n`)
// Session count and per-session list (multi-session mode only)
if (sessionMax > 1) {
const modeHint =
spawnMode === 'worktree'
? 'New sessions will be created in an isolated worktree'
: 'New sessions will be created in the current directory'
writeStatus(
` ${chalk.dim(`Capacity: ${sessionActive}/${sessionMax} \u00b7 ${modeHint}`)}\n`,
)
for (const [, info] of sessionDisplayInfo) {
const titleText = info.title
? truncatePrompt(info.title, 35)
: chalk.dim('Attached')
const titleLinked = wrapWithOsc8Link(titleText, info.url)
const act = info.activity
const showAct = act && act.type !== 'result' && act.type !== 'error'
const actText = showAct
? chalk.dim(` ${truncatePrompt(act.summary, 40)}`)
: ''
writeStatus(` ${titleLinked}${actText}
`)
}
}
// Mode line for spawn modes with a single slot (or true single-session mode)
if (sessionMax === 1) {
const modeText =
spawnMode === 'single-session'
? 'Single session \u00b7 exits when complete'
: spawnMode === 'worktree'
? `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in an isolated worktree`
: `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in the current directory`
writeStatus(` ${chalk.dim(modeText)}\n`)
}
// Tool activity line for single-session mode
if (
sessionMax === 1 &&
!isIdle &&
lastToolSummary &&
Date.now() - lastToolTime < TOOL_DISPLAY_EXPIRY_MS
) {
writeStatus(` ${chalk.dim(truncatePrompt(lastToolSummary, 60))}\n`)
}
// Blank line separator before footer
const url = activeSessionUrl ?? connectUrl
if (url) {
writeStatus('\n')
const footerText = isIdle
? buildIdleFooterText(url)
: buildActiveFooterText(url)
const qrHint = qrVisible
? chalk.dim.italic('space to hide QR code')
: chalk.dim.italic('space to show QR code')
const toggleHint = spawnModeDisplay
? chalk.dim.italic(' \u00b7 w to toggle spawn mode')
: ''
writeStatus(`${chalk.dim(footerText)}\n`)
writeStatus(`${qrHint}${toggleHint}\n`)
}
}
return {
printBanner(config: BridgeConfig, environmentId: string): void {
cachedIngressUrl = config.sessionIngressUrl
cachedEnvironmentId = environmentId
connectUrl = buildBridgeConnectUrl(environmentId, cachedIngressUrl)
regenerateQr(connectUrl)
if (verbose) {
write(chalk.dim(`Remote Control`) + ` v${MACRO.VERSION}\n`)
}
if (verbose) {
if (config.spawnMode !== 'single-session') {
write(chalk.dim(`Spawn mode: `) + `${config.spawnMode}\n`)
write(
chalk.dim(`Max concurrent sessions: `) + `${config.maxSessions}\n`,
)
}
write(chalk.dim(`Environment ID: `) + `${environmentId}\n`)
}
if (config.sandbox) {
write(chalk.dim(`Sandbox: `) + `${chalk.green('Enabled')}\n`)
}
write('\n')
// Start connecting spinner — first updateIdleStatus() will stop it
startConnecting()
},
logSessionStart(sessionId: string, prompt: string): void {
if (verbose) {
const short = truncatePrompt(prompt, 80)
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session started: ${chalk.white(`"${short}"`)} (${chalk.dim(sessionId)})\n`,
)
}
},
logSessionComplete(sessionId: string, durationMs: number): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session ${chalk.green('completed')} (${formatDuration(durationMs)}) ${chalk.dim(sessionId)}\n`,
)
},
logSessionFailed(sessionId: string, error: string): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` Session ${chalk.red('failed')}: ${error} ${chalk.dim(sessionId)}\n`,
)
},
logStatus(message: string): void {
printLog(chalk.dim(`[${timestamp()}]`) + ` ${message}\n`)
},
logVerbose(message: string): void {
if (verbose) {
printLog(chalk.dim(`[${timestamp()}] ${message}`) + '\n')
}
},
logError(message: string): void {
printLog(chalk.red(`[${timestamp()}] Error: ${message}`) + '\n')
},
logReconnected(disconnectedMs: number): void {
printLog(
chalk.dim(`[${timestamp()}]`) +
` ${chalk.green('Reconnected')} after ${formatDuration(disconnectedMs)}\n`,
)
},
setRepoInfo(repo: string, branchName: string): void {
repoName = repo
branch = branchName
},
setDebugLogPath(path: string): void {
debugLogPath = path
},
updateIdleStatus(): void {
stopConnecting()
currentState = 'idle'
currentStateText = 'Ready'
lastToolSummary = null
lastToolTime = 0
activeSessionUrl = null
regenerateQr(connectUrl)
renderStatusLine()
},
setAttached(sessionId: string): void {
stopConnecting()
currentState = 'attached'
currentStateText = 'Connected'
lastToolSummary = null
lastToolTime = 0
// Multi-session: keep footer/QR on the environment connect URL so users
// can spawn more sessions. Per-session links are in the bullet list.
if (sessionMax <= 1) {
activeSessionUrl = buildBridgeSessionUrl(
sessionId,
cachedEnvironmentId,
cachedIngressUrl,
)
regenerateQr(activeSessionUrl)
}
renderStatusLine()
},
updateReconnectingStatus(delayStr: string, elapsedStr: string): void {
stopConnecting()
clearStatusLines()
currentState = 'reconnecting'
// QR code above the status line
if (qrVisible) {
for (const line of qrLines) {
writeStatus(`${chalk.dim(line)}\n`)
}
}
const frame =
BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
connectingTick++
writeStatus(
`${chalk.yellow(frame)} ${chalk.yellow('Reconnecting')} ${chalk.dim('\u00b7')} ${chalk.dim(`retrying in ${delayStr}`)} ${chalk.dim('\u00b7')} ${chalk.dim(`disconnected ${elapsedStr}`)}\n`,
)
},
updateFailedStatus(error: string): void {
stopConnecting()
clearStatusLines()
currentState = 'failed'
let suffix = ''
if (repoName) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
}
if (branch) {
suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
}
writeStatus(
`${chalk.red(BRIDGE_FAILED_INDICATOR)} ${chalk.red('Remote Control Failed')}${suffix}\n`,
)
writeStatus(`${chalk.dim(FAILED_FOOTER_TEXT)}\n`)
if (error) {
writeStatus(`${chalk.red(error)}\n`)
}
},
updateSessionStatus(
_sessionId: string,
_elapsed: string,
activity: SessionActivity,
_trail: string[],
): void {
// Cache tool activity for the second status line
if (activity.type === 'tool_start') {
lastToolSummary = activity.summary
lastToolTime = Date.now()
}
renderStatusLine()
},
clearStatus(): void {
stopConnecting()
clearStatusLines()
},
toggleQr(): void {
qrVisible = !qrVisible
renderStatusLine()
},
updateSessionCount(active: number, max: number, mode: SpawnMode): void {
if (sessionActive === active && sessionMax === max && spawnMode === mode)
return
sessionActive = active
sessionMax = max
spawnMode = mode
// Don't re-render here — the status ticker calls renderStatusLine
// on its own cadence, and the next tick will pick up the new values.
},
setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void {
if (spawnModeDisplay === mode) return
spawnModeDisplay = mode
// Also sync the #21118-added spawnMode so the next render shows correct
// mode hint + branch visibility. Don't render here — matches
// updateSessionCount: called before printBanner (initial setup) and
// again from the `w` handler (which follows with refreshDisplay).
if (mode) spawnMode = mode
},
addSession(sessionId: string, url: string): void {
sessionDisplayInfo.set(sessionId, { url })
},
updateSessionActivity(sessionId: string, activity: SessionActivity): void {
const info = sessionDisplayInfo.get(sessionId)
if (!info) return
info.activity = activity
},
setSessionTitle(sessionId: string, title: string): void {
const info = sessionDisplayInfo.get(sessionId)
if (!info) return
info.title = title
// Guard against reconnecting/failed — renderStatusLine clears then returns
// early for those states, which would erase the spinner/error.
if (currentState === 'reconnecting' || currentState === 'failed') return
if (sessionMax === 1) {
// Single-session: show title in the main status line too.
currentState = 'titled'
currentStateText = truncatePrompt(title, 40)
}
renderStatusLine()
},
removeSession(sessionId: string): void {
sessionDisplayInfo.delete(sessionId)
},
refreshDisplay(): void {
// Skip during reconnecting/failed — renderStatusLine clears then returns
// early for those states, which would erase the spinner/error.
if (currentState === 'reconnecting' || currentState === 'failed') return
renderStatusLine()
},
}
}

View File

@@ -0,0 +1,56 @@
/**
* Shared capacity-wake primitive for bridge poll loops.
*
* Both replBridge.ts and bridgeMain.ts need to sleep while "at capacity"
* but wake early when either (a) the outer loop signal aborts (shutdown),
* or (b) capacity frees up (session done / transport lost). This module
* encapsulates the mutable wake-controller + two-signal merger that both
* poll loops previously duplicated byte-for-byte.
*/
export type CapacitySignal = { signal: AbortSignal; cleanup: () => void }
export type CapacityWake = {
/**
* Create a signal that aborts when either the outer loop signal or the
* capacity-wake controller fires. Returns the merged signal and a cleanup
* function that removes listeners when the sleep resolves normally
* (without abort).
*/
signal(): CapacitySignal
/**
* Abort the current at-capacity sleep and arm a fresh controller so the
* poll loop immediately re-checks for new work.
*/
wake(): void
}
export function createCapacityWake(outerSignal: AbortSignal): CapacityWake {
let wakeController = new AbortController()
function wake(): void {
wakeController.abort()
wakeController = new AbortController()
}
function signal(): CapacitySignal {
const merged = new AbortController()
const abort = (): void => merged.abort()
if (outerSignal.aborted || wakeController.signal.aborted) {
merged.abort()
return { signal: merged.signal, cleanup: () => {} }
}
outerSignal.addEventListener('abort', abort, { once: true })
const capSig = wakeController.signal
capSig.addEventListener('abort', abort, { once: true })
return {
signal: merged.signal,
cleanup: () => {
outerSignal.removeEventListener('abort', abort)
capSig.removeEventListener('abort', abort)
},
}
}
return { signal, wake }
}

View File

@@ -0,0 +1,168 @@
/**
* Thin HTTP wrappers for the CCR v2 code-session API.
*
* Separate file from remoteBridgeCore.ts so the SDK /bridge subpath can
* export createCodeSession + fetchRemoteCredentials without bundling the
* heavy CLI tree (analytics, transport, etc.). Callers supply explicit
* accessToken + baseUrl — no implicit auth or config reads.
*/
import axios from 'axios'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { jsonStringify } from '../utils/slowOperations.js'
import { extractErrorDetail } from './debugUtils.js'
const ANTHROPIC_VERSION = '2023-06-01'
function oauthHeaders(accessToken: string): Record<string, string> {
return {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': ANTHROPIC_VERSION,
}
}
export async function createCodeSession(
baseUrl: string,
accessToken: string,
title: string,
timeoutMs: number,
tags?: string[],
): Promise<string | null> {
const url = `${baseUrl}/v1/code/sessions`
let response
try {
response = await axios.post(
url,
// bridge: {} is the positive signal for the oneof runner — omitting it
// (or sending environment_id: "") now 400s. BridgeRunner is an empty
// message today; it's a placeholder for future bridge-specific options.
{ title, bridge: {}, ...(tags?.length ? { tags } : {}) },
{
headers: oauthHeaders(accessToken),
timeout: timeoutMs,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[code-session] Session create request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200 && response.status !== 201) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[code-session] Session create failed ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const data: unknown = response.data
if (
!data ||
typeof data !== 'object' ||
!('session' in data) ||
!data.session ||
typeof data.session !== 'object' ||
!('id' in data.session) ||
typeof data.session.id !== 'string' ||
!data.session.id.startsWith('cse_')
) {
logForDebugging(
`[code-session] No session.id (cse_*) in response: ${jsonStringify(data).slice(0, 200)}`,
)
return null
}
return data.session.id
}
/**
* Credentials from POST /bridge. JWT is opaque — do not decode.
* Each /bridge call bumps worker_epoch server-side (it IS the register).
*/
export type RemoteCredentials = {
worker_jwt: string
api_base_url: string
expires_in: number
worker_epoch: number
}
export async function fetchRemoteCredentials(
sessionId: string,
baseUrl: string,
accessToken: string,
timeoutMs: number,
trustedDeviceToken?: string,
): Promise<RemoteCredentials | null> {
const url = `${baseUrl}/v1/code/sessions/${sessionId}/bridge`
const headers = oauthHeaders(accessToken)
if (trustedDeviceToken) {
headers['X-Trusted-Device-Token'] = trustedDeviceToken
}
let response
try {
response = await axios.post(
url,
{},
{
headers,
timeout: timeoutMs,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[code-session] /bridge request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[code-session] /bridge failed ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const data: unknown = response.data
if (
data === null ||
typeof data !== 'object' ||
!('worker_jwt' in data) ||
typeof data.worker_jwt !== 'string' ||
!('expires_in' in data) ||
typeof data.expires_in !== 'number' ||
!('api_base_url' in data) ||
typeof data.api_base_url !== 'string' ||
!('worker_epoch' in data)
) {
logForDebugging(
`[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${jsonStringify(data).slice(0, 200)}`,
)
return null
}
// protojson serializes int64 as a string to avoid JS precision loss;
// Go may also return a number depending on encoder settings.
const rawEpoch = data.worker_epoch
const epoch = typeof rawEpoch === 'string' ? Number(rawEpoch) : rawEpoch
if (
typeof epoch !== 'number' ||
!Number.isFinite(epoch) ||
!Number.isSafeInteger(epoch)
) {
logForDebugging(
`[code-session] /bridge worker_epoch invalid: ${jsonStringify(rawEpoch)}`,
)
return null
}
return {
worker_jwt: data.worker_jwt,
api_base_url: data.api_base_url,
expires_in: data.expires_in,
worker_epoch: epoch,
}
}

384
src/bridge/createSession.ts Normal file
View File

@@ -0,0 +1,384 @@
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { extractErrorDetail } from './debugUtils.js'
import { toCompatSessionId } from './sessionIdCompat.js'
type GitSource = {
type: 'git_repository'
url: string
revision?: string
}
type GitOutcome = {
type: 'git_repository'
git_info: { type: 'github'; repo: string; branches: string[] }
}
// Events must be wrapped in { type: 'event', data: <sdk_message> } for the
// POST /v1/sessions endpoint (discriminated union format).
type SessionEvent = {
type: 'event'
data: SDKMessage
}
/**
* Create a session on a bridge environment via POST /v1/sessions.
*
* Used by both `claude remote-control` (empty session so the user has somewhere to
* type immediately) and `/remote-control` (session pre-populated with conversation
* history).
*
* Returns the session ID on success, or null if creation fails (non-fatal).
*/
export async function createBridgeSession({
environmentId,
title,
events,
gitRepoUrl,
branch,
signal,
baseUrl: baseUrlOverride,
getAccessToken,
permissionMode,
}: {
environmentId: string
title?: string
events: SessionEvent[]
gitRepoUrl: string | null
branch: string
signal: AbortSignal
baseUrl?: string
getAccessToken?: () => string | undefined
permissionMode?: string
}): Promise<string | null> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { parseGitHubRepository } = await import('../utils/detectRepository.js')
const { getDefaultBranch } = await import('../utils/git.js')
const { getMainLoopModel } = await import('../utils/model/model.js')
const { default: axios } = await import('axios')
const accessToken =
getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session creation')
return null
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session creation')
return null
}
// Build git source and outcome context
let gitSource: GitSource | null = null
let gitOutcome: GitOutcome | null = null
if (gitRepoUrl) {
const { parseGitRemote } = await import('../utils/detectRepository.js')
const parsed = parseGitRemote(gitRepoUrl)
if (parsed) {
const { host, owner, name } = parsed
const revision = branch || (await getDefaultBranch()) || undefined
gitSource = {
type: 'git_repository',
url: `https://${host}/${owner}/${name}`,
revision,
}
gitOutcome = {
type: 'git_repository',
git_info: {
type: 'github',
repo: `${owner}/${name}`,
branches: [`claude/${branch || 'task'}`],
},
}
} else {
// Fallback: try parseGitHubRepository for owner/repo format
const ownerRepo = parseGitHubRepository(gitRepoUrl)
if (ownerRepo) {
const [owner, name] = ownerRepo.split('/')
if (owner && name) {
const revision = branch || (await getDefaultBranch()) || undefined
gitSource = {
type: 'git_repository',
url: `https://github.com/${owner}/${name}`,
revision,
}
gitOutcome = {
type: 'git_repository',
git_info: {
type: 'github',
repo: `${owner}/${name}`,
branches: [`claude/${branch || 'task'}`],
},
}
}
}
}
}
const requestBody = {
...(title !== undefined && { title }),
events,
session_context: {
sources: gitSource ? [gitSource] : [],
outcomes: gitOutcome ? [gitOutcome] : [],
model: getMainLoopModel(),
},
environment_id: environmentId,
source: 'remote-control',
...(permissionMode && { permission_mode: permissionMode }),
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${baseUrlOverride ?? getOauthConfig().BASE_API_URL}/v1/sessions`
let response
try {
response = await axios.post(url, requestBody, {
headers,
signal,
validateStatus: s => s < 500,
})
} catch (err: unknown) {
logForDebugging(
`[bridge] Session creation request failed: ${errorMessage(err)}`,
)
return null
}
const isSuccess = response.status === 200 || response.status === 201
if (!isSuccess) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session creation failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
const sessionData: unknown = response.data
if (
!sessionData ||
typeof sessionData !== 'object' ||
!('id' in sessionData) ||
typeof sessionData.id !== 'string'
) {
logForDebugging('[bridge] No session ID in response')
return null
}
return sessionData.id
}
/**
* Fetch a bridge session via GET /v1/sessions/{id}.
*
* Returns the session's environment_id (for `--session-id` resume) and title.
* Uses the same org-scoped headers as create/archive — the environments-level
* client in bridgeApi.ts uses a different beta header and no org UUID, which
* makes the Sessions API return 404.
*/
export async function getBridgeSession(
sessionId: string,
opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
): Promise<{ environment_id?: string; title?: string } | null> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session fetch')
return null
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session fetch')
return null
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}`
logForDebugging(`[bridge] Fetching session ${sessionId}`)
let response
try {
response = await axios.get<{ environment_id?: string; title?: string }>(
url,
{ headers, timeout: 10_000, validateStatus: s => s < 500 },
)
} catch (err: unknown) {
logForDebugging(
`[bridge] Session fetch request failed: ${errorMessage(err)}`,
)
return null
}
if (response.status !== 200) {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session fetch failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
return null
}
return response.data
}
/**
* Archive a bridge session via POST /v1/sessions/{id}/archive.
*
* The CCR server never auto-archives sessions — archival is always an
* explicit client action. Both `claude remote-control` (standalone bridge) and the
* always-on `/remote-control` REPL bridge call this during shutdown to archive any
* sessions that are still alive.
*
* The archive endpoint accepts sessions in any status (running, idle,
* requires_action, pending) and returns 409 if already archived, making
* it safe to call even if the server-side runner already archived the
* session.
*
* Callers must handle errors — this function has no try/catch; 5xx,
* timeouts, and network errors throw. Archival is best-effort during
* cleanup; call sites wrap with .catch().
*/
export async function archiveBridgeSession(
sessionId: string,
opts?: {
baseUrl?: string
getAccessToken?: () => string | undefined
timeoutMs?: number
},
): Promise<void> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session archive')
return
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session archive')
return
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/archive`
logForDebugging(`[bridge] Archiving session ${sessionId}`)
const response = await axios.post(
url,
{},
{
headers,
timeout: opts?.timeoutMs ?? 10_000,
validateStatus: s => s < 500,
},
)
if (response.status === 200) {
logForDebugging(`[bridge] Session ${sessionId} archived successfully`)
} else {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session archive failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
}
}
/**
* Update the title of a bridge session via PATCH /v1/sessions/{id}.
*
* Called when the user renames a session via /rename while a bridge
* connection is active, so the title stays in sync on claude.ai/code.
*
* Errors are swallowed — title sync is best-effort.
*/
export async function updateBridgeSessionTitle(
sessionId: string,
title: string,
opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
): Promise<void> {
const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
const { getOrganizationUUID } = await import('../services/oauth/client.js')
const { getOauthConfig } = await import('../constants/oauth.js')
const { getOAuthHeaders } = await import('../utils/teleport/api.js')
const { default: axios } = await import('axios')
const accessToken =
opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[bridge] No access token for session title update')
return
}
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logForDebugging('[bridge] No org UUID for session title update')
return
}
const headers = {
...getOAuthHeaders(accessToken),
'anthropic-beta': 'ccr-byoc-2025-07-29',
'x-organization-uuid': orgUUID,
}
// Compat gateway only accepts session_* (compat/convert.go:27). v2 callers
// pass raw cse_*; retag here so all callers can pass whatever they hold.
// Idempotent for v1's session_* and bridgeMain's pre-converted compatSessionId.
const compatId = toCompatSessionId(sessionId)
const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${compatId}`
logForDebugging(`[bridge] Updating session title: ${compatId}${title}`)
try {
const response = await axios.patch(
url,
{ title },
{ headers, timeout: 10_000, validateStatus: s => s < 500 },
)
if (response.status === 200) {
logForDebugging(`[bridge] Session title updated successfully`)
} else {
const detail = extractErrorDetail(response.data)
logForDebugging(
`[bridge] Session title update failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
)
}
} catch (err: unknown) {
logForDebugging(
`[bridge] Session title update request failed: ${errorMessage(err)}`,
)
}
}

141
src/bridge/debugUtils.ts Normal file
View File

@@ -0,0 +1,141 @@
import {
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
logEvent,
} from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { jsonStringify } from '../utils/slowOperations.js'
const DEBUG_MSG_LIMIT = 2000
const SECRET_FIELD_NAMES = [
'session_ingress_token',
'environment_secret',
'access_token',
'secret',
'token',
]
const SECRET_PATTERN = new RegExp(
`"(${SECRET_FIELD_NAMES.join('|')})"\\s*:\\s*"([^"]*)"`,
'g',
)
const REDACT_MIN_LENGTH = 16
export function redactSecrets(s: string): string {
return s.replace(SECRET_PATTERN, (_match, field: string, value: string) => {
if (value.length < REDACT_MIN_LENGTH) {
return `"${field}":"[REDACTED]"`
}
const redacted = `${value.slice(0, 8)}...${value.slice(-4)}`
return `"${field}":"${redacted}"`
})
}
/** Truncate a string for debug logging, collapsing newlines. */
export function debugTruncate(s: string): string {
const flat = s.replace(/\n/g, '\\n')
if (flat.length <= DEBUG_MSG_LIMIT) {
return flat
}
return flat.slice(0, DEBUG_MSG_LIMIT) + `... (${flat.length} chars)`
}
/** Truncate a JSON-serializable value for debug logging. */
export function debugBody(data: unknown): string {
const raw = typeof data === 'string' ? data : jsonStringify(data)
const s = redactSecrets(raw)
if (s.length <= DEBUG_MSG_LIMIT) {
return s
}
return s.slice(0, DEBUG_MSG_LIMIT) + `... (${s.length} chars)`
}
/**
* Extract a descriptive error message from an axios error (or any error).
* For HTTP errors, appends the server's response body message if available,
* since axios's default message only includes the status code.
*/
export function describeAxiosError(err: unknown): string {
const msg = errorMessage(err)
if (err && typeof err === 'object' && 'response' in err) {
const response = (err as { response?: { data?: unknown } }).response
if (response?.data && typeof response.data === 'object') {
const data = response.data as Record<string, unknown>
const detail =
typeof data.message === 'string'
? data.message
: typeof data.error === 'object' &&
data.error &&
'message' in data.error &&
typeof (data.error as Record<string, unknown>).message ===
'string'
? (data.error as Record<string, unknown>).message
: undefined
if (detail) {
return `${msg}: ${detail}`
}
}
}
return msg
}
/**
* Extract the HTTP status code from an axios error, if present.
* Returns undefined for non-HTTP errors (e.g. network failures).
*/
export function extractHttpStatus(err: unknown): number | undefined {
if (
err &&
typeof err === 'object' &&
'response' in err &&
(err as { response?: { status?: unknown } }).response &&
typeof (err as { response: { status?: unknown } }).response.status ===
'number'
) {
return (err as { response: { status: number } }).response.status
}
return undefined
}
/**
* Pull a human-readable message out of an API error response body.
* Checks `data.message` first, then `data.error.message`.
*/
export function extractErrorDetail(data: unknown): string | undefined {
if (!data || typeof data !== 'object') return undefined
if ('message' in data && typeof data.message === 'string') {
return data.message
}
if (
'error' in data &&
data.error !== null &&
typeof data.error === 'object' &&
'message' in data.error &&
typeof data.error.message === 'string'
) {
return data.error.message
}
return undefined
}
/**
* Log a bridge init skip — debug message + `tengu_bridge_repl_skipped`
* analytics event. Centralizes the event name and the AnalyticsMetadata
* cast so call sites don't each repeat the 5-line boilerplate.
*/
export function logBridgeSkip(
reason: string,
debugMsg?: string,
v2?: boolean,
): void {
if (debugMsg) {
logForDebugging(debugMsg)
}
logEvent('tengu_bridge_repl_skipped', {
reason:
reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
...(v2 !== undefined && { v2 }),
})
}

View File

@@ -0,0 +1,165 @@
import { z } from 'zod/v4'
import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
import { lazySchema } from '../utils/lazySchema.js'
import { lt } from '../utils/semver.js'
import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
export type EnvLessBridgeConfig = {
// withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
init_retry_max_attempts: number
init_retry_base_delay_ms: number
init_retry_jitter_fraction: number
init_retry_max_delay_ms: number
// axios timeout for POST /sessions, POST /bridge, POST /archive
http_timeout_ms: number
// BoundedUUIDSet ring size (echo + re-delivery dedup)
uuid_dedup_buffer_size: number
// CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
heartbeat_interval_ms: number
// ±fraction of interval — per-beat jitter to spread fleet load.
heartbeat_jitter_fraction: number
// Fire proactive JWT refresh this long before expires_in. Larger buffer =
// more frequent refresh (refresh cadence ≈ expires_in - buffer).
token_refresh_buffer_ms: number
// Archive POST timeout in teardown(). Distinct from http_timeout_ms because
// gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
// axios timeout on a slow/stalled archive burns the whole budget on a
// request that forceExit will kill anyway.
teardown_archive_timeout_ms: number
// Deadline for onConnect after transport.connect(). If neither onConnect
// nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
// — the only telemetry for the ~1% of sessions that emit `started` then
// go silent (no error, no event, just nothing).
connect_timeout_ms: number
// Semver floor for the env-less bridge path. Separate from the v1
// tengu_bridge_min_version config so a v2-specific bug can force upgrades
// without blocking v1 (env-based) clients, and vice versa.
min_version: string
// When true, tell users their claude.ai app may be too old to see v2
// sessions — lets us roll the v2 bridge before the app ships the new
// session-list query.
should_show_app_upgrade_message: boolean
}
export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
init_retry_max_attempts: 3,
init_retry_base_delay_ms: 500,
init_retry_jitter_fraction: 0.25,
init_retry_max_delay_ms: 4000,
http_timeout_ms: 10_000,
uuid_dedup_buffer_size: 2000,
heartbeat_interval_ms: 20_000,
heartbeat_jitter_fraction: 0.1,
token_refresh_buffer_ms: 300_000,
teardown_archive_timeout_ms: 1500,
connect_timeout_ms: 15_000,
min_version: '0.0.0',
should_show_app_upgrade_message: false,
}
// Floors reject the whole object on violation (fall back to DEFAULT) rather
// than partially trusting — same defense-in-depth as pollConfig.ts.
const envLessBridgeConfigSchema = lazySchema(() =>
z.object({
init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
init_retry_base_delay_ms: z.number().int().min(100).default(500),
init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
init_retry_max_delay_ms: z.number().int().min(500).default(4000),
http_timeout_ms: z.number().int().min(2000).default(10_000),
uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
// Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
heartbeat_interval_ms: z
.number()
.int()
.min(5000)
.max(30_000)
.default(20_000),
// ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
// still under the 60s TTL.
heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
// Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
// semantic inversion: ops entering expires_in-5min (the *delay until
// refresh*) instead of 5min (the *buffer before expiry*) yields
// delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
// durations so .min() alone can't distinguish; .max() catches the
// inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
token_refresh_buffer_ms: z
.number()
.int()
.min(30_000)
.max(1_800_000)
.default(300_000),
// Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
// timeout just lies to axios since forceExit kills the socket regardless.
teardown_archive_timeout_ms: z
.number()
.int()
.min(500)
.max(2000)
.default(1500),
// Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
// false-positive rate under transient slowness; cap 60s bounds how long
// a truly-stalled session stays dark.
connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
min_version: z
.string()
.refine(v => {
try {
lt(v, '0.0.0')
return true
} catch {
return false
}
})
.default('0.0.0'),
should_show_app_upgrade_message: z.boolean().default(false),
}),
)
/**
* Fetch the env-less bridge timing config from GrowthBook. Read once per
* initEnvLessBridgeCore call — config is fixed for the lifetime of a bridge
* session.
*
* Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
* runs well after GrowthBook init — initializeGrowthBook() resolves instantly,
* so there's no startup penalty, and we get the fresh in-memory remoteEval
* value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
* warns against startup-path usage, which this isn't.
*/
export async function getEnvLessBridgeConfig(): Promise<EnvLessBridgeConfig> {
const raw = await getFeatureValue_DEPRECATED<unknown>(
'tengu_bridge_repl_v2_config',
DEFAULT_ENV_LESS_BRIDGE_CONFIG,
)
const parsed = envLessBridgeConfigSchema().safeParse(raw)
return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
}
/**
* Returns an error message if the current CLI version is below the minimum
* required for the env-less (v2) bridge path, or null if the version is fine.
*
* v2 analogue of checkBridgeMinVersion() — reads from tengu_bridge_repl_v2_config
* instead of tengu_bridge_min_version so the two implementations can enforce
* independent floors.
*/
export async function checkEnvLessBridgeMinVersion(): Promise<string | null> {
const cfg = await getEnvLessBridgeConfig()
if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
}
return null
}
/**
* Whether to nudge users toward upgrading their claude.ai app when a
* Remote Control session starts. True only when the v2 bridge is active
* AND the should_show_app_upgrade_message config bit is set — lets us
* roll the v2 bridge before the app ships the new session-list query.
*/
export async function shouldShowAppUpgradeMessage(): Promise<boolean> {
if (!isEnvLessBridgeEnabled()) return false
const cfg = await getEnvLessBridgeConfig()
return cfg.should_show_app_upgrade_message
}

71
src/bridge/flushGate.ts Normal file
View File

@@ -0,0 +1,71 @@
/**
* State machine for gating message writes during an initial flush.
*
* When a bridge session starts, historical messages are flushed to the
* server via a single HTTP POST. During that flush, new messages must
* be queued to prevent them from arriving at the server interleaved
* with the historical messages.
*
* Lifecycle:
* start() → enqueue() returns true, items are queued
* end() → returns queued items for draining, enqueue() returns false
* drop() → discards queued items (permanent transport close)
* deactivate() → clears active flag without dropping items
* (transport replacement — new transport will drain)
*/
export class FlushGate<T> {
private _active = false
private _pending: T[] = []
get active(): boolean {
return this._active
}
get pendingCount(): number {
return this._pending.length
}
/** Mark flush as in-progress. enqueue() will start queuing items. */
start(): void {
this._active = true
}
/**
* End the flush and return any queued items for draining.
* Caller is responsible for sending the returned items.
*/
end(): T[] {
this._active = false
return this._pending.splice(0)
}
/**
* If flush is active, queue the items and return true.
* If flush is not active, return false (caller should send directly).
*/
enqueue(...items: T[]): boolean {
if (!this._active) return false
this._pending.push(...items)
return true
}
/**
* Discard all queued items (permanent transport close).
* Returns the number of items dropped.
*/
drop(): number {
this._active = false
const count = this._pending.length
this._pending.length = 0
return count
}
/**
* Clear the active flag without dropping queued items.
* Used when the transport is replaced (onWorkReceived) — the new
* transport's flush will drain the pending items.
*/
deactivate(): void {
this._active = false
}
}

View File

@@ -0,0 +1,175 @@
/**
* Resolve file_uuid attachments on inbound bridge user messages.
*
* Web composer uploads via cookie-authed /api/{org}/upload, sends file_uuid
* alongside the message. Here we fetch each via GET /api/oauth/files/{uuid}/content
* (oauth-authed, same store), write to ~/.claude/uploads/{sessionId}/, and
* return @path refs to prepend. Claude's Read tool takes it from there.
*
* Best-effort: any failure (no token, network, non-2xx, disk) logs debug and
* skips that attachment. The message still reaches Claude, just without @path.
*/
import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
import axios from 'axios'
import { randomUUID } from 'crypto'
import { mkdir, writeFile } from 'fs/promises'
import { basename, join } from 'path'
import { z } from 'zod/v4'
import { getSessionId } from '../bootstrap/state.js'
import { logForDebugging } from '../utils/debug.js'
import { getClaudeConfigHomeDir } from '../utils/envUtils.js'
import { lazySchema } from '../utils/lazySchema.js'
import { getBridgeAccessToken, getBridgeBaseUrl } from './bridgeConfig.js'
const DOWNLOAD_TIMEOUT_MS = 30_000
function debug(msg: string): void {
logForDebugging(`[bridge:inbound-attach] ${msg}`)
}
const attachmentSchema = lazySchema(() =>
z.object({
file_uuid: z.string(),
file_name: z.string(),
}),
)
const attachmentsArraySchema = lazySchema(() => z.array(attachmentSchema()))
export type InboundAttachment = z.infer<ReturnType<typeof attachmentSchema>>
/** Pull file_attachments off a loosely-typed inbound message. */
export function extractInboundAttachments(msg: unknown): InboundAttachment[] {
if (typeof msg !== 'object' || msg === null || !('file_attachments' in msg)) {
return []
}
const parsed = attachmentsArraySchema().safeParse(msg.file_attachments)
return parsed.success ? parsed.data : []
}
/**
* Strip path components and keep only filename-safe chars. file_name comes
* from the network (web composer), so treat it as untrusted even though the
* composer controls it.
*/
function sanitizeFileName(name: string): string {
const base = basename(name).replace(/[^a-zA-Z0-9._-]/g, '_')
return base || 'attachment'
}
function uploadsDir(): string {
return join(getClaudeConfigHomeDir(), 'uploads', getSessionId())
}
/**
* Fetch + write one attachment. Returns the absolute path on success,
* undefined on any failure.
*/
async function resolveOne(att: InboundAttachment): Promise<string | undefined> {
const token = getBridgeAccessToken()
if (!token) {
debug('skip: no oauth token')
return undefined
}
let data: Buffer
try {
// getOauthConfig() (via getBridgeBaseUrl) throws on a non-allowlisted
// CLAUDE_CODE_CUSTOM_OAUTH_URL — keep it inside the try so a bad
// FedStart URL degrades to "no @path" instead of crashing print.ts's
// reader loop (which has no catch around the await).
const url = `${getBridgeBaseUrl()}/api/oauth/files/${encodeURIComponent(att.file_uuid)}/content`
const response = await axios.get(url, {
headers: { Authorization: `Bearer ${token}` },
responseType: 'arraybuffer',
timeout: DOWNLOAD_TIMEOUT_MS,
validateStatus: () => true,
})
if (response.status !== 200) {
debug(`fetch ${att.file_uuid} failed: status=${response.status}`)
return undefined
}
data = Buffer.from(response.data)
} catch (e) {
debug(`fetch ${att.file_uuid} threw: ${e}`)
return undefined
}
// uuid-prefix makes collisions impossible across messages and within one
// (same filename, different files). 8 chars is enough — this isn't security.
const safeName = sanitizeFileName(att.file_name)
const prefix = (
att.file_uuid.slice(0, 8) || randomUUID().slice(0, 8)
).replace(/[^a-zA-Z0-9_-]/g, '_')
const dir = uploadsDir()
const outPath = join(dir, `${prefix}-${safeName}`)
try {
await mkdir(dir, { recursive: true })
await writeFile(outPath, data)
} catch (e) {
debug(`write ${outPath} failed: ${e}`)
return undefined
}
debug(`resolved ${att.file_uuid}${outPath} (${data.length} bytes)`)
return outPath
}
/**
* Resolve all attachments on an inbound message to a prefix string of
* @path refs. Empty string if none resolved.
*/
export async function resolveInboundAttachments(
attachments: InboundAttachment[],
): Promise<string> {
if (attachments.length === 0) return ''
debug(`resolving ${attachments.length} attachment(s)`)
const paths = await Promise.all(attachments.map(resolveOne))
const ok = paths.filter((p): p is string => p !== undefined)
if (ok.length === 0) return ''
// Quoted form — extractAtMentionedFiles truncates unquoted @refs at the
// first space, which breaks any home dir with spaces (/Users/John Smith/).
return ok.map(p => `@"${p}"`).join(' ') + ' '
}
/**
* Prepend @path refs to content, whichever form it's in.
* Targets the LAST text block — processUserInputBase reads inputString
* from processedBlocks[processedBlocks.length - 1], so putting refs in
* block[0] means they're silently ignored for [text, image] content.
*/
export function prependPathRefs(
content: string | Array<ContentBlockParam>,
prefix: string,
): string | Array<ContentBlockParam> {
if (!prefix) return content
if (typeof content === 'string') return prefix + content
const i = content.findLastIndex(b => b.type === 'text')
if (i !== -1) {
const b = content[i]!
if (b.type === 'text') {
return [
...content.slice(0, i),
{ ...b, text: prefix + b.text },
...content.slice(i + 1),
]
}
}
// No text block — append one at the end so it's last.
return [...content, { type: 'text', text: prefix.trimEnd() }]
}
/**
* Convenience: extract + resolve + prepend. No-op when the message has no
* file_attachments field (fast path — no network, returns same reference).
*/
export async function resolveAndPrepend(
msg: unknown,
content: string | Array<ContentBlockParam>,
): Promise<string | Array<ContentBlockParam>> {
const attachments = extractInboundAttachments(msg)
if (attachments.length === 0) return content
const prefix = await resolveInboundAttachments(attachments)
return prependPathRefs(content, prefix)
}

View File

@@ -0,0 +1,80 @@
import type {
Base64ImageSource,
ContentBlockParam,
ImageBlockParam,
} from '@anthropic-ai/sdk/resources/messages.mjs'
import type { UUID } from 'crypto'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import { detectImageFormatFromBase64 } from '../utils/imageResizer.js'
/**
* Process an inbound user message from the bridge, extracting content
* and UUID for enqueueing. Supports both string content and
* ContentBlockParam[] (e.g. messages containing images).
*
* Normalizes image blocks from bridge clients that may use camelCase
* `mediaType` instead of snake_case `media_type` (mobile-apps#5825).
*
* Returns the extracted fields, or undefined if the message should be
* skipped (non-user type, missing/empty content).
*/
export function extractInboundMessageFields(
msg: SDKMessage,
):
| { content: string | Array<ContentBlockParam>; uuid: UUID | undefined }
| undefined {
if (msg.type !== 'user') return undefined
const content = msg.message?.content
if (!content) return undefined
if (Array.isArray(content) && content.length === 0) return undefined
const uuid =
'uuid' in msg && typeof msg.uuid === 'string'
? (msg.uuid as UUID)
: undefined
return {
content: Array.isArray(content) ? normalizeImageBlocks(content) : content,
uuid,
}
}
/**
* Normalize image content blocks from bridge clients. iOS/web clients may
* send `mediaType` (camelCase) instead of `media_type` (snake_case), or
* omit the field entirely. Without normalization, the bad block poisons
* the session — every subsequent API call fails with
* "media_type: Field required".
*
* Fast-path scan returns the original array reference when no
* normalization is needed (zero allocation on the happy path).
*/
export function normalizeImageBlocks(
blocks: Array<ContentBlockParam>,
): Array<ContentBlockParam> {
if (!blocks.some(isMalformedBase64Image)) return blocks
return blocks.map(block => {
if (!isMalformedBase64Image(block)) return block
const src = block.source as unknown as Record<string, unknown>
const mediaType =
typeof src.mediaType === 'string' && src.mediaType
? src.mediaType
: detectImageFormatFromBase64(block.source.data)
return {
...block,
source: {
type: 'base64' as const,
media_type: mediaType as Base64ImageSource['media_type'],
data: block.source.data,
},
}
})
}
function isMalformedBase64Image(
block: ContentBlockParam,
): block is ImageBlockParam & { source: Base64ImageSource } {
if (block.type !== 'image' || block.source?.type !== 'base64') return false
return !(block.source as unknown as Record<string, unknown>).media_type
}

View File

@@ -0,0 +1,569 @@
/**
* REPL-specific wrapper around initBridgeCore. Owns the parts that read
* bootstrap state — gates, cwd, session ID, git context, OAuth, title
* derivation — then delegates to the bootstrap-free core.
*
* Split out of replBridge.ts because the sessionStorage import
* (getCurrentSessionTitle) transitively pulls in src/commands.ts → the
* entire slash command + React component tree (~1300 modules). Keeping
* initBridgeCore in a file that doesn't touch sessionStorage lets
* daemonBridge.ts import the core without bloating the Agent SDK bundle.
*
* Called via dynamic import by useReplBridge (auto-start) and print.ts
* (SDK -p mode via query.enableRemoteControl).
*/
import { feature } from 'bun:bundle'
import { hostname } from 'os'
import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
import type { SDKControlResponse } from '../entrypoints/sdk/controlTypes.js'
import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
import { getOrganizationUUID } from '../services/oauth/client.js'
import {
isPolicyAllowed,
waitForPolicyLimitsToLoad,
} from '../services/policyLimits/index.js'
import type { Message } from '../types/message.js'
import {
checkAndRefreshOAuthTokenIfNeeded,
getClaudeAIOAuthTokens,
handleOAuth401Error,
} from '../utils/auth.js'
import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
import { logForDebugging } from '../utils/debug.js'
import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
import { errorMessage } from '../utils/errors.js'
import { getBranch, getRemoteUrl } from '../utils/git.js'
import { toSDKMessages } from '../utils/messages/mappers.js'
import {
getContentText,
getMessagesAfterCompactBoundary,
isSyntheticMessage,
} from '../utils/messages.js'
import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
import { getCurrentSessionTitle } from '../utils/sessionStorage.js'
import {
extractConversationText,
generateSessionTitle,
} from '../utils/sessionTitle.js'
import { generateShortWordSlug } from '../utils/words.js'
import {
getBridgeAccessToken,
getBridgeBaseUrl,
getBridgeTokenOverride,
} from './bridgeConfig.js'
import {
checkBridgeMinVersion,
isBridgeEnabledBlocking,
isCseShimEnabled,
isEnvLessBridgeEnabled,
} from './bridgeEnabled.js'
import {
archiveBridgeSession,
createBridgeSession,
updateBridgeSessionTitle,
} from './createSession.js'
import { logBridgeSkip } from './debugUtils.js'
import { checkEnvLessBridgeMinVersion } from './envLessBridgeConfig.js'
import { getPollIntervalConfig } from './pollConfig.js'
import type { BridgeState, ReplBridgeHandle } from './replBridge.js'
import { initBridgeCore } from './replBridge.js'
import { setCseShimGate } from './sessionIdCompat.js'
import type { BridgeWorkerType } from './types.js'
export type InitBridgeOptions = {
onInboundMessage?: (msg: SDKMessage) => void | Promise<void>
onPermissionResponse?: (response: SDKControlResponse) => void
onInterrupt?: () => void
onSetModel?: (model: string | undefined) => void
onSetMaxThinkingTokens?: (maxTokens: number | null) => void
onSetPermissionMode?: (
mode: PermissionMode,
) => { ok: true } | { ok: false; error: string }
onStateChange?: (state: BridgeState, detail?: string) => void
initialMessages?: Message[]
// Explicit session name from `/remote-control <name>`. When set, overrides
// the title derived from the conversation or /rename.
initialName?: string
// Fresh view of the full conversation at call time. Used by onUserMessage's
// count-3 derivation to call generateSessionTitle over the full conversation.
// Optional — print.ts's SDK enableRemoteControl path has no REPL message
// array; count-3 falls back to the single message text when absent.
getMessages?: () => Message[]
// UUIDs already flushed in a prior bridge session. Messages with these
// UUIDs are excluded from the initial flush to avoid poisoning the
// server (duplicate UUIDs across sessions cause the WS to be killed).
// Mutated in place — newly flushed UUIDs are added after each flush.
previouslyFlushedUUIDs?: Set<string>
/** See BridgeCoreParams.perpetual. */
perpetual?: boolean
/**
* When true, the bridge only forwards events outbound (no SSE inbound
* stream). Used by CCR mirror mode — local sessions visible on claude.ai
* without enabling inbound control.
*/
outboundOnly?: boolean
tags?: string[]
}
export async function initReplBridge(
options?: InitBridgeOptions,
): Promise<ReplBridgeHandle | null> {
const {
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
initialMessages,
getMessages,
previouslyFlushedUUIDs,
initialName,
perpetual,
outboundOnly,
tags,
} = options ?? {}
// Wire the cse_ shim kill switch so toCompatSessionId respects the
// GrowthBook gate. Daemon/SDK paths skip this — shim defaults to active.
setCseShimGate(isCseShimEnabled)
// 1. Runtime gate
if (!(await isBridgeEnabledBlocking())) {
logBridgeSkip('not_enabled', '[bridge:repl] Skipping: bridge not enabled')
return null
}
// 1b. Minimum version check — deferred to after the v1/v2 branch below,
// since each implementation has its own floor (tengu_bridge_min_version
// for v1, tengu_bridge_repl_v2_config.min_version for v2).
// 2. Check OAuth — must be signed in with claude.ai. Runs before the
// policy check so console-auth users get the actionable "/login" hint
// instead of a misleading policy error from a stale/wrong-org cache.
if (!getBridgeAccessToken()) {
logBridgeSkip('no_oauth', '[bridge:repl] Skipping: no OAuth tokens')
onStateChange?.('failed', '/login')
return null
}
// 3. Check organization policy — remote control may be disabled
await waitForPolicyLimitsToLoad()
if (!isPolicyAllowed('allow_remote_control')) {
logBridgeSkip(
'policy_denied',
'[bridge:repl] Skipping: allow_remote_control policy not allowed',
)
onStateChange?.('failed', "disabled by your organization's policy")
return null
}
// When CLAUDE_BRIDGE_OAUTH_TOKEN is set (ant-only local dev), the bridge
// uses that token directly via getBridgeAccessToken() — keychain state is
// irrelevant. Skip 2b/2c to preserve that decoupling: an expired keychain
// token shouldn't block a bridge connection that doesn't use it.
if (!getBridgeTokenOverride()) {
// 2a. Cross-process backoff. If N prior processes already saw this exact
// dead token (matched by expiresAt), skip silently — no event, no refresh
// attempt. The count threshold tolerates transient refresh failures (auth
// server 5xx, lockfile errors per auth.ts:1437/1444/1485): each process
// independently retries until 3 consecutive failures prove the token dead.
// Mirrors useReplBridge's MAX_CONSECUTIVE_INIT_FAILURES for in-process.
// The expiresAt key is content-addressed: /login → new token → new expiresAt
// → this stops matching without any explicit clear.
const cfg = getGlobalConfig()
if (
cfg.bridgeOauthDeadExpiresAt != null &&
(cfg.bridgeOauthDeadFailCount ?? 0) >= 3 &&
getClaudeAIOAuthTokens()?.expiresAt === cfg.bridgeOauthDeadExpiresAt
) {
logForDebugging(
`[bridge:repl] Skipping: cross-process backoff (dead token seen ${cfg.bridgeOauthDeadFailCount} times)`,
)
return null
}
// 2b. Proactively refresh if expired. Mirrors bridgeMain.ts:2096 — the REPL
// bridge fires at useEffect mount BEFORE any v1/messages call, making this
// usually the first OAuth request of the session. Without this, ~9% of
// registrations hit the server with a >8h-expired token → 401 → withOAuthRetry
// recovers, but the server logs a 401 we can avoid. VPN egress IPs observed
// at 30:1 401:200 when many unrelated users cluster at the 8h TTL boundary.
//
// Fresh-token cost: one memoized read + one Date.now() comparison (~µs).
// checkAndRefreshOAuthTokenIfNeeded clears its own cache in every path that
// touches the keychain (refresh success, lockfile race, throw), so no
// explicit clearOAuthTokenCache() here — that would force a blocking
// keychain spawn on the 91%+ fresh-token path.
await checkAndRefreshOAuthTokenIfNeeded()
// 2c. Skip if token is still expired post-refresh-attempt. Env-var / FD
// tokens (auth.ts:894-917) have expiresAt=null → never trip this. But a
// keychain token whose refresh token is dead (password change, org left,
// token GC'd) has expiresAt<now AND refresh just failed — the client would
// otherwise loop 401 forever: withOAuthRetry → handleOAuth401Error →
// refresh fails again → retry with same stale token → 401 again.
// Datadog 2026-03-08: single IPs generating 2,879 such 401s/day. Skip the
// guaranteed-fail API call; useReplBridge surfaces the failure.
//
// Intentionally NOT using isOAuthTokenExpired here — that has a 5-minute
// proactive-refresh buffer, which is the right heuristic for "should
// refresh soon" but wrong for "provably unusable". A token with 3min left
// + transient refresh endpoint blip (5xx/timeout/wifi-reconnect) would
// falsely trip a buffered check; the still-valid token would connect fine.
// Check actual expiry instead: past-expiry AND refresh-failed → truly dead.
const tokens = getClaudeAIOAuthTokens()
if (tokens && tokens.expiresAt !== null && tokens.expiresAt <= Date.now()) {
logBridgeSkip(
'oauth_expired_unrefreshable',
'[bridge:repl] Skipping: OAuth token expired and refresh failed (re-login required)',
)
onStateChange?.('failed', '/login')
// Persist for the next process. Increments failCount when re-discovering
// the same dead token (matched by expiresAt); resets to 1 for a different
// token. Once count reaches 3, step 2a's early-return fires and this path
// is never reached again — writes are capped at 3 per dead token.
// Local const captures the narrowed type (closure loses !==null narrowing).
const deadExpiresAt = tokens.expiresAt
saveGlobalConfig(c => ({
...c,
bridgeOauthDeadExpiresAt: deadExpiresAt,
bridgeOauthDeadFailCount:
c.bridgeOauthDeadExpiresAt === deadExpiresAt
? (c.bridgeOauthDeadFailCount ?? 0) + 1
: 1,
}))
return null
}
}
// 4. Compute baseUrl — needed by both v1 (env-based) and v2 (env-less)
// paths. Hoisted above the v2 gate so both can use it.
const baseUrl = getBridgeBaseUrl()
// 5. Derive session title. Precedence: explicit initialName → /rename
// (session storage) → last meaningful user message → generated slug.
// Cosmetic only (claude.ai session list); the model never sees it.
// Two flags: `hasExplicitTitle` (initialName or /rename — never auto-
// overwrite) vs. `hasTitle` (any title, including auto-derived — blocks
// the count-1 re-derivation but not count-3). The onUserMessage callback
// (wired to both v1 and v2 below) derives from the 1st prompt and again
// from the 3rd so mobile/web show a title that reflects more context.
// The slug fallback (e.g. "remote-control-graceful-unicorn") makes
// auto-started sessions distinguishable in the claude.ai list before the
// first prompt.
let title = `remote-control-${generateShortWordSlug()}`
let hasTitle = false
let hasExplicitTitle = false
if (initialName) {
title = initialName
hasTitle = true
hasExplicitTitle = true
} else {
const sessionId = getSessionId()
const customTitle = sessionId
? getCurrentSessionTitle(sessionId)
: undefined
if (customTitle) {
title = customTitle
hasTitle = true
hasExplicitTitle = true
} else if (initialMessages && initialMessages.length > 0) {
// Find the last user message that has meaningful content. Skip meta
// (nudges), tool results, compact summaries ("This session is being
// continued…"), non-human origins (task notifications, channel pushes),
// and synthetic interrupts ([Request interrupted by user]) — none are
// human-authored. Same filter as extractTitleText + isSyntheticMessage.
for (let i = initialMessages.length - 1; i >= 0; i--) {
const msg = initialMessages[i]!
if (
msg.type !== 'user' ||
msg.isMeta ||
msg.toolUseResult ||
msg.isCompactSummary ||
(msg.origin && msg.origin.kind !== 'human') ||
isSyntheticMessage(msg)
)
continue
const rawContent = getContentText(msg.message.content)
if (!rawContent) continue
const derived = deriveTitle(rawContent)
if (!derived) continue
title = derived
hasTitle = true
break
}
}
}
// Shared by both v1 and v2 — fires on every title-worthy user message until
// it returns true. At count 1: deriveTitle placeholder immediately, then
// generateSessionTitle (Haiku, sentence-case) fire-and-forget upgrade. At
// count 3: re-generate over the full conversation. Skips entirely if the
// title is explicit (/remote-control <name> or /rename) — re-checks
// sessionStorage at call time so /rename between messages isn't clobbered.
// Skips count 1 if initialMessages already derived (that title is fresh);
// still refreshes at count 3. v2 passes cse_*; updateBridgeSessionTitle
// retags internally.
let userMessageCount = 0
let lastBridgeSessionId: string | undefined
let genSeq = 0
const patch = (
derived: string,
bridgeSessionId: string,
atCount: number,
): void => {
hasTitle = true
title = derived
logForDebugging(
`[bridge:repl] derived title from message ${atCount}: ${derived}`,
)
void updateBridgeSessionTitle(bridgeSessionId, derived, {
baseUrl,
getAccessToken: getBridgeAccessToken,
}).catch(() => {})
}
// Fire-and-forget Haiku generation with post-await guards. Re-checks /rename
// (sessionStorage), v1 env-lost (lastBridgeSessionId), and same-session
// out-of-order resolution (genSeq — count-1's Haiku resolving after count-3
// would clobber the richer title). generateSessionTitle never rejects.
const generateAndPatch = (input: string, bridgeSessionId: string): void => {
const gen = ++genSeq
const atCount = userMessageCount
void generateSessionTitle(input, AbortSignal.timeout(15_000)).then(
generated => {
if (
generated &&
gen === genSeq &&
lastBridgeSessionId === bridgeSessionId &&
!getCurrentSessionTitle(getSessionId())
) {
patch(generated, bridgeSessionId, atCount)
}
},
)
}
const onUserMessage = (text: string, bridgeSessionId: string): boolean => {
if (hasExplicitTitle || getCurrentSessionTitle(getSessionId())) {
return true
}
// v1 env-lost re-creates the session with a new ID. Reset the count so
// the new session gets its own count-3 derivation; hasTitle stays true
// (new session was created via getCurrentTitle(), which reads the count-1
// title from this closure), so count-1 of the fresh cycle correctly skips.
if (
lastBridgeSessionId !== undefined &&
lastBridgeSessionId !== bridgeSessionId
) {
userMessageCount = 0
}
lastBridgeSessionId = bridgeSessionId
userMessageCount++
if (userMessageCount === 1 && !hasTitle) {
const placeholder = deriveTitle(text)
if (placeholder) patch(placeholder, bridgeSessionId, userMessageCount)
generateAndPatch(text, bridgeSessionId)
} else if (userMessageCount === 3) {
const msgs = getMessages?.()
const input = msgs
? extractConversationText(getMessagesAfterCompactBoundary(msgs))
: text
generateAndPatch(input, bridgeSessionId)
}
// Also re-latches if v1 env-lost resets the transport's done flag past 3.
return userMessageCount >= 3
}
const initialHistoryCap = getFeatureValue_CACHED_WITH_REFRESH(
'tengu_bridge_initial_history_cap',
200,
5 * 60 * 1000,
)
// Fetch orgUUID before the v1/v2 branch — both paths need it. v1 for
// environment registration; v2 for archive (which lives at the compat
// /v1/sessions/{id}/archive, not /v1/code/sessions). Without it, v2
// archive 404s and sessions stay alive in CCR after /exit.
const orgUUID = await getOrganizationUUID()
if (!orgUUID) {
logBridgeSkip('no_org_uuid', '[bridge:repl] Skipping: no org UUID')
onStateChange?.('failed', '/login')
return null
}
// ── GrowthBook gate: env-less bridge ──────────────────────────────────
// When enabled, skips the Environments API layer entirely (no register/
// poll/ack/heartbeat) and connects directly via POST /bridge → worker_jwt.
// See server PR #292605 (renamed in #293280). REPL-only — daemon/print stay
// on env-based.
//
// NAMING: "env-less" is distinct from "CCR v2" (the /worker/* transport).
// The env-based path below can ALSO use CCR v2 via CLAUDE_CODE_USE_CCR_V2.
// tengu_bridge_repl_v2 gates env-less (no poll loop), not transport version.
//
// perpetual (assistant-mode session continuity via bridge-pointer.json) is
// env-coupled and not yet implemented here — fall back to env-based when set
// so KAIROS users don't silently lose cross-restart continuity.
if (isEnvLessBridgeEnabled() && !perpetual) {
const versionError = await checkEnvLessBridgeMinVersion()
if (versionError) {
logBridgeSkip(
'version_too_old',
`[bridge:repl] Skipping: ${versionError}`,
true,
)
onStateChange?.('failed', 'run `claude update` to upgrade')
return null
}
logForDebugging(
'[bridge:repl] Using env-less bridge path (tengu_bridge_repl_v2)',
)
const { initEnvLessBridgeCore } = await import('./remoteBridgeCore.js')
return initEnvLessBridgeCore({
baseUrl,
orgUUID,
title,
getAccessToken: getBridgeAccessToken,
onAuth401: handleOAuth401Error,
toSDKMessages,
initialHistoryCap,
initialMessages,
// v2 always creates a fresh server session (new cse_* id), so
// previouslyFlushedUUIDs is not passed — there's no cross-session
// UUID collision risk, and the ref persists across enable→disable→
// re-enable cycles which would cause the new session to receive zero
// history (all UUIDs already in the set from the prior enable).
// v1 handles this by calling previouslyFlushedUUIDs.clear() on fresh
// session creation (replBridge.ts:768); v2 skips the param entirely.
onInboundMessage,
onUserMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
outboundOnly,
tags,
})
}
// ── v1 path: env-based (register/poll/ack/heartbeat) ──────────────────
const versionError = checkBridgeMinVersion()
if (versionError) {
logBridgeSkip('version_too_old', `[bridge:repl] Skipping: ${versionError}`)
onStateChange?.('failed', 'run `claude update` to upgrade')
return null
}
// Gather git context — this is the bootstrap-read boundary.
// Everything from here down is passed explicitly to bridgeCore.
const branch = await getBranch()
const gitRepoUrl = await getRemoteUrl()
const sessionIngressUrl =
process.env.USER_TYPE === 'ant' &&
process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
: baseUrl
// Assistant-mode sessions advertise a distinct worker_type so the web UI
// can filter them into a dedicated picker. KAIROS guard keeps the
// assistant module out of external builds entirely.
let workerType: BridgeWorkerType = 'claude_code'
if (feature('KAIROS')) {
/* eslint-disable @typescript-eslint/no-require-imports */
const { isAssistantMode } =
require('../assistant/index.js') as typeof import('../assistant/index.js')
/* eslint-enable @typescript-eslint/no-require-imports */
if (isAssistantMode()) {
workerType = 'claude_code_assistant'
}
}
// 6. Delegate. BridgeCoreHandle is a structural superset of
// ReplBridgeHandle (adds writeSdkMessages which REPL callers don't use),
// so no adapter needed — just the narrower type on the way out.
return initBridgeCore({
dir: getOriginalCwd(),
machineName: hostname(),
branch,
gitRepoUrl,
title,
baseUrl,
sessionIngressUrl,
workerType,
getAccessToken: getBridgeAccessToken,
createSession: opts =>
createBridgeSession({
...opts,
events: [],
baseUrl,
getAccessToken: getBridgeAccessToken,
}),
archiveSession: sessionId =>
archiveBridgeSession(sessionId, {
baseUrl,
getAccessToken: getBridgeAccessToken,
// gracefulShutdown.ts:407 races runCleanupFunctions against 2s.
// Teardown also does stopWork (parallel) + deregister (sequential),
// so archive can't have the full budget. 1.5s matches v2's
// teardown_archive_timeout_ms default.
timeoutMs: 1500,
}).catch((err: unknown) => {
// archiveBridgeSession has no try/catch — 5xx/timeout/network throw
// straight through. Previously swallowed silently, making archive
// failures BQ-invisible and undiagnosable from debug logs.
logForDebugging(
`[bridge:repl] archiveBridgeSession threw: ${errorMessage(err)}`,
{ level: 'error' },
)
}),
// getCurrentTitle is read on reconnect-after-env-lost to re-title the new
// session. /rename writes to session storage; onUserMessage mutates
// `title` directly — both paths are picked up here.
getCurrentTitle: () => getCurrentSessionTitle(getSessionId()) ?? title,
onUserMessage,
toSDKMessages,
onAuth401: handleOAuth401Error,
getPollIntervalConfig,
initialHistoryCap,
initialMessages,
previouslyFlushedUUIDs,
onInboundMessage,
onPermissionResponse,
onInterrupt,
onSetModel,
onSetMaxThinkingTokens,
onSetPermissionMode,
onStateChange,
perpetual,
})
}
const TITLE_MAX_LEN = 50
/**
* Quick placeholder title: strip display tags, take the first sentence,
* collapse whitespace, truncate to 50 chars. Returns undefined if the result
* is empty (e.g. message was only <local-command-stdout>). Replaced by
* generateSessionTitle once Haiku resolves (~1-15s).
*/
function deriveTitle(raw: string): string | undefined {
// Strip <ide_opened_file>, <session-start-hook>, etc. — these appear in
// user messages when IDE/hooks inject context. stripDisplayTagsAllowEmpty
// returns '' (not the original) so pure-tag messages are skipped.
const clean = stripDisplayTagsAllowEmpty(raw)
// First sentence is usually the intent; rest is often context/detail.
// Capture group instead of lookbehind — keeps YARR JIT happy.
const firstSentence = /^(.*?[.!?])\s/.exec(clean)?.[1] ?? clean
// Collapse newlines/tabs — titles are single-line in the claude.ai list.
const flat = firstSentence.replace(/\s+/g, ' ').trim()
if (!flat) return undefined
return flat.length > TITLE_MAX_LEN
? flat.slice(0, TITLE_MAX_LEN - 1) + '\u2026'
: flat
}

256
src/bridge/jwtUtils.ts Normal file
View File

@@ -0,0 +1,256 @@
import { logEvent } from '../services/analytics/index.js'
import { logForDebugging } from '../utils/debug.js'
import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
import { errorMessage } from '../utils/errors.js'
import { jsonParse } from '../utils/slowOperations.js'
/** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */
function formatDuration(ms: number): string {
if (ms < 60_000) return `${Math.round(ms / 1000)}s`
const m = Math.floor(ms / 60_000)
const s = Math.round((ms % 60_000) / 1000)
return s > 0 ? `${m}m ${s}s` : `${m}m`
}
/**
* Decode a JWT's payload segment without verifying the signature.
* Strips the `sk-ant-si-` session-ingress prefix if present.
* Returns the parsed JSON payload as `unknown`, or `null` if the
* token is malformed or the payload is not valid JSON.
*/
export function decodeJwtPayload(token: string): unknown | null {
const jwt = token.startsWith('sk-ant-si-')
? token.slice('sk-ant-si-'.length)
: token
const parts = jwt.split('.')
if (parts.length !== 3 || !parts[1]) return null
try {
return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8'))
} catch {
return null
}
}
/**
* Decode the `exp` (expiry) claim from a JWT without verifying the signature.
* @returns The `exp` value in Unix seconds, or `null` if unparseable
*/
export function decodeJwtExpiry(token: string): number | null {
const payload = decodeJwtPayload(token)
if (
payload !== null &&
typeof payload === 'object' &&
'exp' in payload &&
typeof payload.exp === 'number'
) {
return payload.exp
}
return null
}
/** Refresh buffer: request a new token before expiry. */
const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000
/** Fallback refresh interval when the new token's expiry is unknown. */
const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes
/** Max consecutive failures before giving up on the refresh chain. */
const MAX_REFRESH_FAILURES = 3
/** Retry delay when getAccessToken returns undefined. */
const REFRESH_RETRY_DELAY_MS = 60_000
/**
* Creates a token refresh scheduler that proactively refreshes session tokens
* before they expire. Used by both the standalone bridge and the REPL bridge.
*
* When a token is about to expire, the scheduler calls `onRefresh` with the
* session ID and the bridge's OAuth access token. The caller is responsible
* for delivering the token to the appropriate transport (child process stdin
* for standalone bridge, WebSocket reconnect for REPL bridge).
*/
export function createTokenRefreshScheduler({
getAccessToken,
onRefresh,
label,
refreshBufferMs = TOKEN_REFRESH_BUFFER_MS,
}: {
getAccessToken: () => string | undefined | Promise<string | undefined>
onRefresh: (sessionId: string, oauthToken: string) => void
label: string
/** How long before expiry to fire refresh. Defaults to 5 min. */
refreshBufferMs?: number
}): {
schedule: (sessionId: string, token: string) => void
scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void
cancel: (sessionId: string) => void
cancelAll: () => void
} {
const timers = new Map<string, ReturnType<typeof setTimeout>>()
const failureCounts = new Map<string, number>()
// Generation counter per session — incremented by schedule() and cancel()
// so that in-flight async doRefresh() calls can detect when they've been
// superseded and should skip setting follow-up timers.
const generations = new Map<string, number>()
function nextGeneration(sessionId: string): number {
const gen = (generations.get(sessionId) ?? 0) + 1
generations.set(sessionId, gen)
return gen
}
function schedule(sessionId: string, token: string): void {
const expiry = decodeJwtExpiry(token)
if (!expiry) {
// Token is not a decodable JWT (e.g. an OAuth token passed from the
// REPL bridge WebSocket open handler). Preserve any existing timer
// (such as the follow-up refresh set by doRefresh) so the refresh
// chain is not broken.
logForDebugging(
`[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
)
return
}
// Clear any existing refresh timer — we have a concrete expiry to replace it.
const existing = timers.get(sessionId)
if (existing) {
clearTimeout(existing)
}
// Bump generation to invalidate any in-flight async doRefresh.
const gen = nextGeneration(sessionId)
const expiryDate = new Date(expiry * 1000).toISOString()
const delayMs = expiry * 1000 - Date.now() - refreshBufferMs
if (delayMs <= 0) {
logForDebugging(
`[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`,
)
void doRefresh(sessionId, gen)
return
}
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
/**
* Schedule refresh using an explicit TTL (seconds until expiry) rather
* than decoding a JWT's exp claim. Used by callers whose JWT is opaque
* (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly).
*/
function scheduleFromExpiresIn(
sessionId: string,
expiresInSeconds: number,
): void {
const existing = timers.get(sessionId)
if (existing) clearTimeout(existing)
const gen = nextGeneration(sessionId)
// Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in
// (e.g. very large buffer for frequent-refresh testing, or server shortens
// expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop.
const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000)
logForDebugging(
`[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`,
)
const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
timers.set(sessionId, timer)
}
async function doRefresh(sessionId: string, gen: number): Promise<void> {
let oauthToken: string | undefined
try {
oauthToken = await getAccessToken()
} catch (err) {
logForDebugging(
`[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`,
{ level: 'error' },
)
}
// If the session was cancelled or rescheduled while we were awaiting,
// the generation will have changed — bail out to avoid orphaned timers.
if (generations.get(sessionId) !== gen) {
logForDebugging(
`[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`,
)
return
}
if (!oauthToken) {
const failures = (failureCounts.get(sessionId) ?? 0) + 1
failureCounts.set(sessionId, failures)
logForDebugging(
`[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`,
{ level: 'error' },
)
logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth')
// Schedule a retry so the refresh chain can recover if the token
// becomes available again (e.g. transient cache clear during refresh).
// Cap retries to avoid spamming on genuine failures.
if (failures < MAX_REFRESH_FAILURES) {
const retryTimer = setTimeout(
doRefresh,
REFRESH_RETRY_DELAY_MS,
sessionId,
gen,
)
timers.set(sessionId, retryTimer)
}
return
}
// Reset failure counter on successful token retrieval
failureCounts.delete(sessionId)
logForDebugging(
`[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}`,
)
logEvent('tengu_bridge_token_refreshed', {})
onRefresh(sessionId, oauthToken)
// Schedule a follow-up refresh so long-running sessions stay authenticated.
// Without this, the initial one-shot timer leaves the session vulnerable
// to token expiry if it runs past the first refresh window.
const timer = setTimeout(
doRefresh,
FALLBACK_REFRESH_INTERVAL_MS,
sessionId,
gen,
)
timers.set(sessionId, timer)
logForDebugging(
`[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`,
)
}
function cancel(sessionId: string): void {
// Bump generation to invalidate any in-flight async doRefresh.
nextGeneration(sessionId)
const timer = timers.get(sessionId)
if (timer) {
clearTimeout(timer)
timers.delete(sessionId)
}
failureCounts.delete(sessionId)
}
function cancelAll(): void {
// Bump all generations so in-flight doRefresh calls are invalidated.
for (const sessionId of generations.keys()) {
nextGeneration(sessionId)
}
for (const timer of timers.values()) {
clearTimeout(timer)
}
timers.clear()
failureCounts.clear()
}
return { schedule, scheduleFromExpiresIn, cancel, cancelAll }
}

110
src/bridge/pollConfig.ts Normal file
View File

@@ -0,0 +1,110 @@
import { z } from 'zod/v4'
import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
import { lazySchema } from '../utils/lazySchema.js'
import {
DEFAULT_POLL_CONFIG,
type PollIntervalConfig,
} from './pollConfigDefaults.js'
// .min(100) on the seek-work intervals restores the old Math.max(..., 100)
// defense-in-depth floor against fat-fingered GrowthBook values. Unlike a
// clamp, Zod rejects the whole object on violation — a config with one bad
// field falls back to DEFAULT_POLL_CONFIG entirely rather than being
// partially trusted.
//
// The at_capacity intervals use a 0-or-≥100 refinement: 0 means "disabled"
// (heartbeat-only mode), ≥100 is the fat-finger floor. Values 199 are
// rejected so unit confusion (ops thinks seconds, enters 10) doesn't poll
// every 10ms against the VerifyEnvironmentSecretAuth DB path.
//
// The object-level refines require at least one at-capacity liveness
// mechanism enabled: heartbeat OR the relevant poll interval. Without this,
// the hb=0, atCapMs=0 drift config (ops disables heartbeat without
// restoring at_capacity) falls through every throttle site with no sleep —
// tight-looping /poll at HTTP-round-trip speed.
const zeroOrAtLeast100 = {
message: 'must be 0 (disabled) or ≥100ms',
}
const pollIntervalConfigSchema = lazySchema(() =>
z
.object({
poll_interval_ms_not_at_capacity: z.number().int().min(100),
// 0 = no at-capacity polling. Independent of heartbeat — both can be
// enabled (heartbeat runs, periodically breaks out to poll).
poll_interval_ms_at_capacity: z
.number()
.int()
.refine(v => v === 0 || v >= 100, zeroOrAtLeast100),
// 0 = disabled; positive value = heartbeat at this interval while at
// capacity. Runs alongside at-capacity polling, not instead of it.
// Named non_exclusive to distinguish from the old heartbeat_interval_ms
// (either-or semantics in pre-#22145 clients). .default(0) so existing
// GrowthBook configs without this field parse successfully.
non_exclusive_heartbeat_interval_ms: z.number().int().min(0).default(0),
// Multisession (bridgeMain.ts) intervals. Defaults match the
// single-session values so existing configs without these fields
// preserve current behavior.
multisession_poll_interval_ms_not_at_capacity: z
.number()
.int()
.min(100)
.default(
DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_not_at_capacity,
),
multisession_poll_interval_ms_partial_capacity: z
.number()
.int()
.min(100)
.default(
DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_partial_capacity,
),
multisession_poll_interval_ms_at_capacity: z
.number()
.int()
.refine(v => v === 0 || v >= 100, zeroOrAtLeast100)
.default(DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_at_capacity),
// .min(1) matches the server's ge=1 constraint (work_v1.py:230).
reclaim_older_than_ms: z.number().int().min(1).default(5000),
session_keepalive_interval_v2_ms: z
.number()
.int()
.min(0)
.default(120_000),
})
.refine(
cfg =>
cfg.non_exclusive_heartbeat_interval_ms > 0 ||
cfg.poll_interval_ms_at_capacity > 0,
{
message:
'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or poll_interval_ms_at_capacity > 0',
},
)
.refine(
cfg =>
cfg.non_exclusive_heartbeat_interval_ms > 0 ||
cfg.multisession_poll_interval_ms_at_capacity > 0,
{
message:
'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or multisession_poll_interval_ms_at_capacity > 0',
},
),
)
/**
* Fetch the bridge poll interval config from GrowthBook with a 5-minute
* refresh window. Validates the served JSON against the schema; falls back
* to defaults if the flag is absent, malformed, or partially-specified.
*
* Shared by bridgeMain.ts (standalone) and replBridge.ts (REPL) so ops
* can tune both poll rates fleet-wide with a single config push.
*/
export function getPollIntervalConfig(): PollIntervalConfig {
const raw = getFeatureValue_CACHED_WITH_REFRESH<unknown>(
'tengu_bridge_poll_interval_config',
DEFAULT_POLL_CONFIG,
5 * 60 * 1000,
)
const parsed = pollIntervalConfigSchema().safeParse(raw)
return parsed.success ? parsed.data : DEFAULT_POLL_CONFIG
}

View File

@@ -0,0 +1,82 @@
/**
* Bridge poll interval defaults. Extracted from pollConfig.ts so callers
* that don't need live GrowthBook tuning (daemon via Agent SDK) can avoid
* the growthbook.ts → config.ts → file.ts → sessionStorage.ts → commands.ts
* transitive dependency chain.
*/
/**
* Poll interval when actively seeking work (no transport / below maxSessions).
* Governs user-visible "connecting…" latency on initial work pickup and
* recovery speed after the server re-dispatches a work item.
*/
const POLL_INTERVAL_MS_NOT_AT_CAPACITY = 2000
/**
* Poll interval when the transport is connected. Runs independently of
* heartbeat — when both are enabled, the heartbeat loop breaks out to poll
* at this interval. Set to 0 to disable at-capacity polling entirely.
*
* Server-side constraints that bound this value:
* - BRIDGE_LAST_POLL_TTL = 4h (Redis key expiry → environment auto-archived)
* - max_poll_stale_seconds = 24h (session-creation health gate, currently disabled)
*
* 10 minutes gives 24× headroom on the Redis TTL while still picking up
* server-initiated token-rotation redispatches within one poll cycle.
* The transport auto-reconnects internally for 10 minutes on transient WS
* failures, so poll is not the recovery path — it's strictly a liveness
* signal plus a backstop for permanent close.
*/
const POLL_INTERVAL_MS_AT_CAPACITY = 600_000
/**
* Multisession bridge (bridgeMain.ts) poll intervals. Defaults match the
* single-session values so existing GrowthBook configs without these fields
* preserve current behavior. Ops can tune these independently via the
* tengu_bridge_poll_interval_config GB flag.
*/
const MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY =
POLL_INTERVAL_MS_NOT_AT_CAPACITY
const MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY =
POLL_INTERVAL_MS_NOT_AT_CAPACITY
const MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY = POLL_INTERVAL_MS_AT_CAPACITY
export type PollIntervalConfig = {
poll_interval_ms_not_at_capacity: number
poll_interval_ms_at_capacity: number
non_exclusive_heartbeat_interval_ms: number
multisession_poll_interval_ms_not_at_capacity: number
multisession_poll_interval_ms_partial_capacity: number
multisession_poll_interval_ms_at_capacity: number
reclaim_older_than_ms: number
session_keepalive_interval_v2_ms: number
}
export const DEFAULT_POLL_CONFIG: PollIntervalConfig = {
poll_interval_ms_not_at_capacity: POLL_INTERVAL_MS_NOT_AT_CAPACITY,
poll_interval_ms_at_capacity: POLL_INTERVAL_MS_AT_CAPACITY,
// 0 = disabled. When > 0, at-capacity loops send per-work-item heartbeats
// at this interval. Independent of poll_interval_ms_at_capacity — both may
// run (heartbeat periodically yields to poll). 60s gives 5× headroom under
// the server's 300s heartbeat TTL. Named non_exclusive to distinguish from
// the old heartbeat_interval_ms field (either-or semantics in pre-#22145
// clients — heartbeat suppressed poll). Old clients ignore this key; ops
// can set both fields during rollout.
non_exclusive_heartbeat_interval_ms: 0,
multisession_poll_interval_ms_not_at_capacity:
MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY,
multisession_poll_interval_ms_partial_capacity:
MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY,
multisession_poll_interval_ms_at_capacity:
MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY,
// Poll query param: reclaim unacknowledged work items older than this.
// Matches the server's DEFAULT_RECLAIM_OLDER_THAN_MS (work_service.py:24).
// Enables picking up stale-pending work after JWT expiry, when the prior
// ack failed because the session_ingress_token was already stale.
reclaim_older_than_ms: 5000,
// 0 = disabled. When > 0, push a silent {type:'keep_alive'} frame to
// session-ingress at this interval so upstream proxies don't GC an idle
// remote-control session. 2 min is the default. _v2: bridge-only gate
// (pre-v2 clients read the old key, new clients ignore it).
session_keepalive_interval_v2_ms: 120_000,
}

File diff suppressed because it is too large Load Diff

2406
src/bridge/replBridge.ts Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
import type { ReplBridgeHandle } from './replBridge.js'
import { toCompatSessionId } from './sessionIdCompat.js'
/**
* Global pointer to the active REPL bridge handle, so callers outside
* useReplBridge's React tree (tools, slash commands) can invoke handle methods
* like subscribePR. Same one-bridge-per-process justification as bridgeDebug.ts
* — the handle's closure captures the sessionId and getAccessToken that created
* the session, and re-deriving those independently (BriefTool/upload.ts pattern)
* risks staging/prod token divergence.
*
* Set from useReplBridge.tsx when init completes; cleared on teardown.
*/
let handle: ReplBridgeHandle | null = null
export function setReplBridgeHandle(h: ReplBridgeHandle | null): void {
handle = h
// Publish (or clear) our bridge session ID in the session record so other
// local peers can dedup us out of their bridge list — local is preferred.
void updateSessionBridgeId(getSelfBridgeCompatId() ?? null).catch(() => {})
}
export function getReplBridgeHandle(): ReplBridgeHandle | null {
return handle
}
/**
* Our own bridge session ID in the session_* compat format the API returns
* in /v1/sessions responses — or undefined if bridge isn't connected.
*/
export function getSelfBridgeCompatId(): string | undefined {
const h = getReplBridgeHandle()
return h ? toCompatSessionId(h.bridgeSessionId) : undefined
}

View File

@@ -0,0 +1,370 @@
import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
import { CCRClient } from '../cli/transports/ccrClient.js'
import type { HybridTransport } from '../cli/transports/HybridTransport.js'
import { SSETransport } from '../cli/transports/SSETransport.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
import type { SessionState } from '../utils/sessionState.js'
import { registerWorker } from './workSecret.js'
/**
* Transport abstraction for replBridge. Covers exactly the surface that
* replBridge.ts uses against HybridTransport so the v1/v2 choice is
* confined to the construction site.
*
* - v1: HybridTransport (WS reads + POST writes to Session-Ingress)
* - v2: SSETransport (reads) + CCRClient (writes to CCR v2 /worker/*)
*
* The v2 write path goes through CCRClient.writeEvent → SerialBatchEventUploader,
* NOT through SSETransport.write() — SSETransport.write() targets the
* Session-Ingress POST URL shape, which is wrong for CCR v2.
*/
export type ReplBridgeTransport = {
write(message: StdoutMessage): Promise<void>
writeBatch(messages: StdoutMessage[]): Promise<void>
close(): void
isConnectedStatus(): boolean
getStateLabel(): string
setOnData(callback: (data: string) => void): void
setOnClose(callback: (closeCode?: number) => void): void
setOnConnect(callback: () => void): void
connect(): void
/**
* High-water mark of the underlying read stream's event sequence numbers.
* replBridge reads this before swapping transports so the new one can
* resume from where the old one left off (otherwise the server replays
* the entire session history from seq 0).
*
* v1 returns 0 — Session-Ingress WS doesn't use SSE sequence numbers;
* replay-on-reconnect is handled by the server-side message cursor.
*/
getLastSequenceNum(): number
/**
* Monotonic count of batches dropped via maxConsecutiveFailures.
* Snapshot before writeBatch() and compare after to detect silent drops
* (writeBatch() resolves normally even when batches were dropped).
* v2 returns 0 — the v2 write path doesn't set maxConsecutiveFailures.
*/
readonly droppedBatchCount: number
/**
* PUT /worker state (v2 only; v1 is a no-op). `requires_action` tells
* the backend a permission prompt is pending — claude.ai shows the
* "waiting for input" indicator. REPL/daemon callers don't need this
* (user watches the REPL locally); multi-session worker callers do.
*/
reportState(state: SessionState): void
/** PUT /worker external_metadata (v2 only; v1 is a no-op). */
reportMetadata(metadata: Record<string, unknown>): void
/**
* POST /worker/events/{id}/delivery (v2 only; v1 is a no-op). Populates
* CCR's processing_at/processed_at columns. `received` is auto-fired by
* CCRClient on every SSE frame and is not exposed here.
*/
reportDelivery(eventId: string, status: 'processing' | 'processed'): void
/**
* Drain the write queue before close() (v2 only; v1 resolves
* immediately — HybridTransport POSTs are already awaited per-write).
*/
flush(): Promise<void>
}
/**
* v1 adapter: HybridTransport already has the full surface (it extends
* WebSocketTransport which has setOnConnect + getStateLabel). This is a
* no-op wrapper that exists only so replBridge's `transport` variable
* has a single type.
*/
export function createV1ReplTransport(
hybrid: HybridTransport,
): ReplBridgeTransport {
return {
write: msg => hybrid.write(msg),
writeBatch: msgs => hybrid.writeBatch(msgs),
close: () => hybrid.close(),
isConnectedStatus: () => hybrid.isConnectedStatus(),
getStateLabel: () => hybrid.getStateLabel(),
setOnData: cb => hybrid.setOnData(cb),
setOnClose: cb => hybrid.setOnClose(cb),
setOnConnect: cb => hybrid.setOnConnect(cb),
connect: () => void hybrid.connect(),
// v1 Session-Ingress WS doesn't use SSE sequence numbers; replay
// semantics are different. Always return 0 so the seq-num carryover
// logic in replBridge is a no-op for v1.
getLastSequenceNum: () => 0,
get droppedBatchCount() {
return hybrid.droppedBatchCount
},
reportState: () => {},
reportMetadata: () => {},
reportDelivery: () => {},
flush: () => Promise.resolve(),
}
}
/**
* v2 adapter: wrap SSETransport (reads) + CCRClient (writes, heartbeat,
* state, delivery tracking).
*
* Auth: v2 endpoints validate the JWT's session_id claim (register_worker.go:32)
* and worker role (environment_auth.py:856). OAuth tokens have neither.
* This is the inverse of the v1 replBridge path, which deliberately uses OAuth.
* The JWT is refreshed when the poll loop re-dispatches work — the caller
* invokes createV2ReplTransport again with the fresh token.
*
* Registration happens here (not in the caller) so the entire v2 handshake
* is one async step. registerWorker failure propagates — replBridge will
* catch it and stay on the poll loop.
*/
export async function createV2ReplTransport(opts: {
sessionUrl: string
ingressToken: string
sessionId: string
/**
* SSE sequence-number high-water mark from the previous transport.
* Passed to the new SSETransport so its first connect() sends
* from_sequence_num / Last-Event-ID and the server resumes from where
* the old stream left off. Without this, every transport swap asks the
* server to replay the entire session history from seq 0.
*/
initialSequenceNum?: number
/**
* Worker epoch from POST /bridge response. When provided, the server
* already bumped epoch (the /bridge call IS the register — see server
* PR #293280). When omitted (v1 CCR-v2 path via replBridge.ts poll loop),
* call registerWorker as before.
*/
epoch?: number
/** CCRClient heartbeat interval. Defaults to 20s when omitted. */
heartbeatIntervalMs?: number
/** ±fraction per-beat jitter. Defaults to 0 (no jitter) when omitted. */
heartbeatJitterFraction?: number
/**
* When true, skip opening the SSE read stream — only the CCRClient write
* path is activated. Use for mirror-mode attachments that forward events
* but never receive inbound prompts or control requests.
*/
outboundOnly?: boolean
/**
* Per-instance auth header source. When provided, CCRClient + SSETransport
* read auth from this closure instead of the process-wide
* CLAUDE_CODE_SESSION_ACCESS_TOKEN env var. Required for callers managing
* multiple concurrent sessions — the env-var path stomps across sessions.
* When omitted, falls back to the env var (single-session callers).
*/
getAuthToken?: () => string | undefined
}): Promise<ReplBridgeTransport> {
const {
sessionUrl,
ingressToken,
sessionId,
initialSequenceNum,
getAuthToken,
} = opts
// Auth header builder. If getAuthToken is provided, read from it
// (per-instance, multi-session safe). Otherwise write ingressToken to
// the process-wide env var (legacy single-session path — CCRClient's
// default getAuthHeaders reads it via getSessionIngressAuthHeaders).
let getAuthHeaders: (() => Record<string, string>) | undefined
if (getAuthToken) {
getAuthHeaders = (): Record<string, string> => {
const token = getAuthToken()
if (!token) return {}
return { Authorization: `Bearer ${token}` }
}
} else {
// CCRClient.request() and SSETransport.connect() both read auth via
// getSessionIngressAuthHeaders() → this env var. Set it before either
// touches the network.
updateSessionIngressAuthToken(ingressToken)
}
const epoch = opts.epoch ?? (await registerWorker(sessionUrl, ingressToken))
logForDebugging(
`[bridge:repl] CCR v2: worker sessionId=${sessionId} epoch=${epoch}${opts.epoch !== undefined ? ' (from /bridge)' : ' (via registerWorker)'}`,
)
// Derive SSE stream URL. Same logic as transportUtils.ts:26-33 but
// starting from an http(s) base instead of a --sdk-url that might be ws://.
const sseUrl = new URL(sessionUrl)
sseUrl.pathname = sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
const sse = new SSETransport(
sseUrl,
{},
sessionId,
undefined,
initialSequenceNum,
getAuthHeaders,
)
let onCloseCb: ((closeCode?: number) => void) | undefined
const ccr = new CCRClient(sse, new URL(sessionUrl), {
getAuthHeaders,
heartbeatIntervalMs: opts.heartbeatIntervalMs,
heartbeatJitterFraction: opts.heartbeatJitterFraction,
// Default is process.exit(1) — correct for spawn-mode children. In-process,
// that kills the REPL. Close instead: replBridge's onClose wakes the poll
// loop, which picks up the server's re-dispatch (with fresh epoch).
onEpochMismatch: () => {
logForDebugging(
'[bridge:repl] CCR v2: epoch superseded (409) — closing for poll-loop recovery',
)
// Close resources in a try block so the throw always executes.
// If ccr.close() or sse.close() throw, we still need to unwind
// the caller (request()) — otherwise handleEpochMismatch's `never`
// return type is violated at runtime and control falls through.
try {
ccr.close()
sse.close()
onCloseCb?.(4090)
} catch (closeErr: unknown) {
logForDebugging(
`[bridge:repl] CCR v2: error during epoch-mismatch cleanup: ${errorMessage(closeErr)}`,
{ level: 'error' },
)
}
// Don't return — the calling request() code continues after the 409
// branch, so callers see the logged warning and a false return. We
// throw to unwind; the uploaders catch it as a send failure.
throw new Error('epoch superseded')
},
})
// CCRClient's constructor wired sse.setOnEvent → reportDelivery('received').
// remoteIO.ts additionally sends 'processing'/'processed' via
// setCommandLifecycleListener, which the in-process query loop fires. This
// transport's only caller (replBridge/daemonBridge) has no such wiring — the
// daemon's agent child is a separate process (ProcessTransport), and its
// notifyCommandLifecycle calls fire with listener=null in its own module
// scope. So events stay at 'received' forever, and reconnectSession re-queues
// them on every daemon restart (observed: 21→24→25 phantom prompts as
// "user sent a new message while you were working" system-reminders).
//
// Fix: ACK 'processed' immediately alongside 'received'. The window between
// SSE receipt and transcript-write is narrow (queue → SDK → child stdin →
// model); a crash there loses one prompt vs. the observed N-prompt flood on
// every restart. Overwrite the constructor's wiring to do both — setOnEvent
// replaces, not appends (SSETransport.ts:658).
sse.setOnEvent(event => {
ccr.reportDelivery(event.event_id, 'received')
ccr.reportDelivery(event.event_id, 'processed')
})
// Both sse.connect() and ccr.initialize() are deferred to connect() below.
// replBridge's calling order is newTransport → setOnConnect → setOnData →
// setOnClose → connect(), and both calls need those callbacks wired first:
// sse.connect() opens the stream (events flow to onData/onClose immediately),
// and ccr.initialize().then() fires onConnectCb.
//
// onConnect fires once ccr.initialize() resolves. Writes go via
// CCRClient HTTP POST (SerialBatchEventUploader), not SSE, so the
// write path is ready the moment workerEpoch is set. SSE.connect()
// awaits its read loop and never resolves — don't gate on it.
// The SSE stream opens in parallel (~30ms) and starts delivering
// inbound events via setOnData; outbound doesn't need to wait for it.
let onConnectCb: (() => void) | undefined
let ccrInitialized = false
let closed = false
return {
write(msg) {
return ccr.writeEvent(msg)
},
async writeBatch(msgs) {
// SerialBatchEventUploader already batches internally (maxBatchSize=100);
// sequential enqueue preserves order and the uploader coalesces.
// Check closed between writes to avoid sending partial batches after
// transport teardown (epoch mismatch, SSE drop).
for (const m of msgs) {
if (closed) break
await ccr.writeEvent(m)
}
},
close() {
closed = true
ccr.close()
sse.close()
},
isConnectedStatus() {
// Write-readiness, not read-readiness — replBridge checks this
// before calling writeBatch. SSE open state is orthogonal.
return ccrInitialized
},
getStateLabel() {
// SSETransport doesn't expose its state string; synthesize from
// what we can observe. replBridge only uses this for debug logging.
if (sse.isClosedStatus()) return 'closed'
if (sse.isConnectedStatus()) return ccrInitialized ? 'connected' : 'init'
return 'connecting'
},
setOnData(cb) {
sse.setOnData(cb)
},
setOnClose(cb) {
onCloseCb = cb
// SSE reconnect-budget exhaustion fires onClose(undefined) — map to
// 4092 so ws_closed telemetry can distinguish it from HTTP-status
// closes (SSETransport:280 passes response.status). Stop CCRClient's
// heartbeat timer before notifying replBridge. (sse.close() doesn't
// invoke this, so the epoch-mismatch path above isn't double-firing.)
sse.setOnClose(code => {
ccr.close()
cb(code ?? 4092)
})
},
setOnConnect(cb) {
onConnectCb = cb
},
getLastSequenceNum() {
return sse.getLastSequenceNum()
},
// v2 write path (CCRClient) doesn't set maxConsecutiveFailures — no drops.
droppedBatchCount: 0,
reportState(state) {
ccr.reportState(state)
},
reportMetadata(metadata) {
ccr.reportMetadata(metadata)
},
reportDelivery(eventId, status) {
ccr.reportDelivery(eventId, status)
},
flush() {
return ccr.flush()
},
connect() {
// Outbound-only: skip the SSE read stream entirely — no inbound
// events to receive, no delivery ACKs to send. Only the CCRClient
// write path (POST /worker/events) and heartbeat are needed.
if (!opts.outboundOnly) {
// Fire-and-forget — SSETransport.connect() awaits readStream()
// (the read loop) and only resolves on stream close/error. The
// spawn-mode path in remoteIO.ts does the same void discard.
void sse.connect()
}
void ccr.initialize(epoch).then(
() => {
ccrInitialized = true
logForDebugging(
`[bridge:repl] v2 transport ready for writes (epoch=${epoch}, sse=${sse.isConnectedStatus() ? 'open' : 'opening'})`,
)
onConnectCb?.()
},
(err: unknown) => {
logForDebugging(
`[bridge:repl] CCR v2 initialize failed: ${errorMessage(err)}`,
{ level: 'error' },
)
// Close transport resources and notify replBridge via onClose
// so the poll loop can retry on the next work dispatch.
// Without this callback, replBridge never learns the transport
// failed to initialize and sits with transport === null forever.
ccr.close()
sse.close()
onCloseCb?.(4091) // 4091 = init failure, distinguishable from 4090 epoch mismatch
},
)
},
}
}

View File

@@ -0,0 +1,57 @@
/**
* Session ID tag translation helpers for the CCR v2 compat layer.
*
* Lives in its own file (rather than workSecret.ts) so that sessionHandle.ts
* and replBridgeTransport.ts (bridge.mjs entry points) can import from
* workSecret.ts without pulling in these retag functions.
*
* The isCseShimEnabled kill switch is injected via setCseShimGate() to avoid
* a static import of bridgeEnabled.ts → growthbook.ts → config.ts — all
* banned from the sdk.mjs bundle (scripts/build-agent-sdk.sh). Callers that
* already import bridgeEnabled.ts register the gate; the SDK path never does,
* so the shim defaults to active (matching isCseShimEnabled()'s own default).
*/
let _isCseShimEnabled: (() => boolean) | undefined
/**
* Register the GrowthBook gate for the cse_ shim. Called from bridge
* init code that already imports bridgeEnabled.ts.
*/
export function setCseShimGate(gate: () => boolean): void {
_isCseShimEnabled = gate
}
/**
* Re-tag a `cse_*` session ID to `session_*` for use with the v1 compat API.
*
* Worker endpoints (/v1/code/sessions/{id}/worker/*) want `cse_*`; that's
* what the work poll delivers. Client-facing compat endpoints
* (/v1/sessions/{id}, /v1/sessions/{id}/archive, /v1/sessions/{id}/events)
* want `session_*` — compat/convert.go:27 validates TagSession. Same UUID,
* different costume. No-op for IDs that aren't `cse_*`.
*
* bridgeMain holds one sessionId variable for both worker registration and
* session-management calls. It arrives as `cse_*` from the work poll under
* the compat gate, so archiveSession/fetchSessionTitle need this re-tag.
*/
export function toCompatSessionId(id: string): string {
if (!id.startsWith('cse_')) return id
if (_isCseShimEnabled && !_isCseShimEnabled()) return id
return 'session_' + id.slice('cse_'.length)
}
/**
* Re-tag a `session_*` session ID to `cse_*` for infrastructure-layer calls.
*
* Inverse of toCompatSessionId. POST /v1/environments/{id}/bridge/reconnect
* lives below the compat layer: once ccr_v2_compat_enabled is on server-side,
* it looks sessions up by their infra tag (`cse_*`). createBridgeSession still
* returns `session_*` (compat/convert.go:41) and that's what bridge-pointer
* stores — so perpetual reconnect passes the wrong costume and gets "Session
* not found" back. Same UUID, wrong tag. No-op for IDs that aren't `session_*`.
*/
export function toInfraSessionId(id: string): string {
if (!id.startsWith('session_')) return id
return 'cse_' + id.slice('session_'.length)
}

550
src/bridge/sessionRunner.ts Normal file
View File

@@ -0,0 +1,550 @@
import { type ChildProcess, spawn } from 'child_process'
import { createWriteStream, type WriteStream } from 'fs'
import { tmpdir } from 'os'
import { dirname, join } from 'path'
import { createInterface } from 'readline'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
import { debugTruncate } from './debugUtils.js'
import type {
SessionActivity,
SessionDoneStatus,
SessionHandle,
SessionSpawner,
SessionSpawnOpts,
} from './types.js'
const MAX_ACTIVITIES = 10
const MAX_STDERR_LINES = 10
/**
* Sanitize a session ID for use in file names.
* Strips any characters that could cause path traversal (e.g. `../`, `/`)
* or other filesystem issues, replacing them with underscores.
*/
export function safeFilenameId(id: string): string {
return id.replace(/[^a-zA-Z0-9_-]/g, '_')
}
/**
* A control_request emitted by the child CLI when it needs permission to
* execute a **specific** tool invocation (not a general capability check).
* The bridge forwards this to the server so the user can approve/deny.
*/
export type PermissionRequest = {
type: 'control_request'
request_id: string
request: {
/** Per-invocation permission check — "may I run this tool with these inputs?" */
subtype: 'can_use_tool'
tool_name: string
input: Record<string, unknown>
tool_use_id: string
}
}
type SessionSpawnerDeps = {
execPath: string
/**
* Arguments that must precede the CLI flags when spawning. Empty for
* compiled binaries (where execPath is the claude binary itself); contains
* the script path (process.argv[1]) for npm installs where execPath is the
* node runtime. Without this, node sees --sdk-url as a node option and
* exits with "bad option: --sdk-url" (see anthropics/claude-code#28334).
*/
scriptArgs: string[]
env: NodeJS.ProcessEnv
verbose: boolean
sandbox: boolean
debugFile?: string
permissionMode?: string
onDebug: (msg: string) => void
onActivity?: (sessionId: string, activity: SessionActivity) => void
onPermissionRequest?: (
sessionId: string,
request: PermissionRequest,
accessToken: string,
) => void
}
/** Map tool names to human-readable verbs for the status display. */
const TOOL_VERBS: Record<string, string> = {
Read: 'Reading',
Write: 'Writing',
Edit: 'Editing',
MultiEdit: 'Editing',
Bash: 'Running',
Glob: 'Searching',
Grep: 'Searching',
WebFetch: 'Fetching',
WebSearch: 'Searching',
Task: 'Running task',
FileReadTool: 'Reading',
FileWriteTool: 'Writing',
FileEditTool: 'Editing',
GlobTool: 'Searching',
GrepTool: 'Searching',
BashTool: 'Running',
NotebookEditTool: 'Editing notebook',
LSP: 'LSP',
}
function toolSummary(name: string, input: Record<string, unknown>): string {
const verb = TOOL_VERBS[name] ?? name
const target =
(input.file_path as string) ??
(input.filePath as string) ??
(input.pattern as string) ??
(input.command as string | undefined)?.slice(0, 60) ??
(input.url as string) ??
(input.query as string) ??
''
if (target) {
return `${verb} ${target}`
}
return verb
}
function extractActivities(
line: string,
sessionId: string,
onDebug: (msg: string) => void,
): SessionActivity[] {
let parsed: unknown
try {
parsed = jsonParse(line)
} catch {
return []
}
if (!parsed || typeof parsed !== 'object') {
return []
}
const msg = parsed as Record<string, unknown>
const activities: SessionActivity[] = []
const now = Date.now()
switch (msg.type) {
case 'assistant': {
const message = msg.message as Record<string, unknown> | undefined
if (!message) break
const content = message.content
if (!Array.isArray(content)) break
for (const block of content) {
if (!block || typeof block !== 'object') continue
const b = block as Record<string, unknown>
if (b.type === 'tool_use') {
const name = (b.name as string) ?? 'Tool'
const input = (b.input as Record<string, unknown>) ?? {}
const summary = toolSummary(name, input)
activities.push({
type: 'tool_start',
summary,
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} tool_use name=${name} ${inputPreview(input)}`,
)
} else if (b.type === 'text') {
const text = (b.text as string) ?? ''
if (text.length > 0) {
activities.push({
type: 'text',
summary: text.slice(0, 80),
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} text "${text.slice(0, 100)}"`,
)
}
}
}
break
}
case 'result': {
const subtype = msg.subtype as string | undefined
if (subtype === 'success') {
activities.push({
type: 'result',
summary: 'Session completed',
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=success`,
)
} else if (subtype) {
const errors = msg.errors as string[] | undefined
const errorSummary = errors?.[0] ?? `Error: ${subtype}`
activities.push({
type: 'error',
summary: errorSummary,
timestamp: now,
})
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=${subtype} error="${errorSummary}"`,
)
} else {
onDebug(
`[bridge:activity] sessionId=${sessionId} result subtype=undefined`,
)
}
break
}
default:
break
}
return activities
}
/**
* Extract plain text from a replayed SDKUserMessage NDJSON line. Returns the
* trimmed text if this looks like a real human-authored message, otherwise
* undefined so the caller keeps waiting for the first real message.
*/
function extractUserMessageText(
msg: Record<string, unknown>,
): string | undefined {
// Skip tool-result user messages (wrapped subagent results) and synthetic
// caveat messages — neither is human-authored.
if (msg.parent_tool_use_id != null || msg.isSynthetic || msg.isReplay)
return undefined
const message = msg.message as Record<string, unknown> | undefined
const content = message?.content
let text: string | undefined
if (typeof content === 'string') {
text = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === 'object' &&
(block as Record<string, unknown>).type === 'text'
) {
text = (block as Record<string, unknown>).text as string | undefined
break
}
}
}
text = text?.trim()
return text ? text : undefined
}
/** Build a short preview of tool input for debug logging. */
function inputPreview(input: Record<string, unknown>): string {
const parts: string[] = []
for (const [key, val] of Object.entries(input)) {
if (typeof val === 'string') {
parts.push(`${key}="${val.slice(0, 100)}"`)
}
if (parts.length >= 3) break
}
return parts.join(' ')
}
export function createSessionSpawner(deps: SessionSpawnerDeps): SessionSpawner {
return {
spawn(opts: SessionSpawnOpts, dir: string): SessionHandle {
// Debug file resolution:
// 1. If deps.debugFile is provided, use it with session ID suffix for uniqueness
// 2. If verbose or ant build, auto-generate a temp file path
// 3. Otherwise, no debug file
const safeId = safeFilenameId(opts.sessionId)
let debugFile: string | undefined
if (deps.debugFile) {
const ext = deps.debugFile.lastIndexOf('.')
if (ext > 0) {
debugFile = `${deps.debugFile.slice(0, ext)}-${safeId}${deps.debugFile.slice(ext)}`
} else {
debugFile = `${deps.debugFile}-${safeId}`
}
} else if (deps.verbose || process.env.USER_TYPE === 'ant') {
debugFile = join(tmpdir(), 'claude', `bridge-session-${safeId}.log`)
}
// Transcript file: write raw NDJSON lines for post-hoc analysis.
// Placed alongside the debug file when one is configured.
let transcriptStream: WriteStream | null = null
let transcriptPath: string | undefined
if (deps.debugFile) {
transcriptPath = join(
dirname(deps.debugFile),
`bridge-transcript-${safeId}.jsonl`,
)
transcriptStream = createWriteStream(transcriptPath, { flags: 'a' })
transcriptStream.on('error', err => {
deps.onDebug(
`[bridge:session] Transcript write error: ${err.message}`,
)
transcriptStream = null
})
deps.onDebug(`[bridge:session] Transcript log: ${transcriptPath}`)
}
const args = [
...deps.scriptArgs,
'--print',
'--sdk-url',
opts.sdkUrl,
'--session-id',
opts.sessionId,
'--input-format',
'stream-json',
'--output-format',
'stream-json',
'--replay-user-messages',
...(deps.verbose ? ['--verbose'] : []),
...(debugFile ? ['--debug-file', debugFile] : []),
...(deps.permissionMode
? ['--permission-mode', deps.permissionMode]
: []),
]
const env: NodeJS.ProcessEnv = {
...deps.env,
// Strip the bridge's OAuth token so the child CC process uses
// the session access token for inference instead.
CLAUDE_CODE_OAUTH_TOKEN: undefined,
CLAUDE_CODE_ENVIRONMENT_KIND: 'bridge',
...(deps.sandbox && { CLAUDE_CODE_FORCE_SANDBOX: '1' }),
CLAUDE_CODE_SESSION_ACCESS_TOKEN: opts.accessToken,
// v1: HybridTransport (WS reads + POST writes) to Session-Ingress.
// Harmless in v2 mode — transportUtils checks CLAUDE_CODE_USE_CCR_V2 first.
CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2: '1',
// v2: SSETransport + CCRClient to CCR's /v1/code/sessions/* endpoints.
// Same env vars environment-manager sets in the container path.
...(opts.useCcrV2 && {
CLAUDE_CODE_USE_CCR_V2: '1',
CLAUDE_CODE_WORKER_EPOCH: String(opts.workerEpoch),
}),
}
deps.onDebug(
`[bridge:session] Spawning sessionId=${opts.sessionId} sdkUrl=${opts.sdkUrl} accessToken=${opts.accessToken ? 'present' : 'MISSING'}`,
)
deps.onDebug(`[bridge:session] Child args: ${args.join(' ')}`)
if (debugFile) {
deps.onDebug(`[bridge:session] Debug log: ${debugFile}`)
}
// Pipe all three streams: stdin for control, stdout for NDJSON parsing,
// stderr for error capture and diagnostics.
const child: ChildProcess = spawn(deps.execPath, args, {
cwd: dir,
stdio: ['pipe', 'pipe', 'pipe'],
env,
windowsHide: true,
})
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} pid=${child.pid}`,
)
const activities: SessionActivity[] = []
let currentActivity: SessionActivity | null = null
const lastStderr: string[] = []
let sigkillSent = false
let firstUserMessageSeen = false
// Buffer stderr for error diagnostics
if (child.stderr) {
const stderrRl = createInterface({ input: child.stderr })
stderrRl.on('line', line => {
// Forward stderr to bridge's stderr in verbose mode
if (deps.verbose) {
process.stderr.write(line + '\n')
}
// Ring buffer of last N lines
if (lastStderr.length >= MAX_STDERR_LINES) {
lastStderr.shift()
}
lastStderr.push(line)
})
}
// Parse NDJSON from child stdout
if (child.stdout) {
const rl = createInterface({ input: child.stdout })
rl.on('line', line => {
// Write raw NDJSON to transcript file
if (transcriptStream) {
transcriptStream.write(line + '\n')
}
// Log all messages flowing from the child CLI to the bridge
deps.onDebug(
`[bridge:ws] sessionId=${opts.sessionId} <<< ${debugTruncate(line)}`,
)
// In verbose mode, forward raw output to stderr
if (deps.verbose) {
process.stderr.write(line + '\n')
}
const extracted = extractActivities(
line,
opts.sessionId,
deps.onDebug,
)
for (const activity of extracted) {
// Maintain ring buffer
if (activities.length >= MAX_ACTIVITIES) {
activities.shift()
}
activities.push(activity)
currentActivity = activity
deps.onActivity?.(opts.sessionId, activity)
}
// Detect control_request and replayed user messages.
// extractActivities parses the same line but swallows parse errors
// and skips 'user' type — re-parse here is cheap (NDJSON lines are
// small) and keeps each path self-contained.
{
let parsed: unknown
try {
parsed = jsonParse(line)
} catch {
// Non-JSON line, skip detection
}
if (parsed && typeof parsed === 'object') {
const msg = parsed as Record<string, unknown>
if (msg.type === 'control_request') {
const request = msg.request as
| Record<string, unknown>
| undefined
if (
request?.subtype === 'can_use_tool' &&
deps.onPermissionRequest
) {
deps.onPermissionRequest(
opts.sessionId,
parsed as PermissionRequest,
opts.accessToken,
)
}
// interrupt is turn-level; the child handles it internally (print.ts)
} else if (
msg.type === 'user' &&
!firstUserMessageSeen &&
opts.onFirstUserMessage
) {
const text = extractUserMessageText(msg)
if (text) {
firstUserMessageSeen = true
opts.onFirstUserMessage(text)
}
}
}
}
})
}
const done = new Promise<SessionDoneStatus>(resolve => {
child.on('close', (code, signal) => {
// Close transcript stream on exit
if (transcriptStream) {
transcriptStream.end()
transcriptStream = null
}
if (signal === 'SIGTERM' || signal === 'SIGINT') {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} interrupted signal=${signal} pid=${child.pid}`,
)
resolve('interrupted')
} else if (code === 0) {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} completed exit_code=0 pid=${child.pid}`,
)
resolve('completed')
} else {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} failed exit_code=${code} pid=${child.pid}`,
)
resolve('failed')
}
})
child.on('error', err => {
deps.onDebug(
`[bridge:session] sessionId=${opts.sessionId} spawn error: ${err.message}`,
)
resolve('failed')
})
})
const handle: SessionHandle = {
sessionId: opts.sessionId,
done,
activities,
accessToken: opts.accessToken,
lastStderr,
get currentActivity(): SessionActivity | null {
return currentActivity
},
kill(): void {
if (!child.killed) {
deps.onDebug(
`[bridge:session] Sending SIGTERM to sessionId=${opts.sessionId} pid=${child.pid}`,
)
// On Windows, child.kill('SIGTERM') throws; use default signal.
if (process.platform === 'win32') {
child.kill()
} else {
child.kill('SIGTERM')
}
}
},
forceKill(): void {
// Use separate flag because child.killed is set when kill() is called,
// not when the process exits. We need to send SIGKILL even after SIGTERM.
if (!sigkillSent && child.pid) {
sigkillSent = true
deps.onDebug(
`[bridge:session] Sending SIGKILL to sessionId=${opts.sessionId} pid=${child.pid}`,
)
if (process.platform === 'win32') {
child.kill()
} else {
child.kill('SIGKILL')
}
}
},
writeStdin(data: string): void {
if (child.stdin && !child.stdin.destroyed) {
deps.onDebug(
`[bridge:ws] sessionId=${opts.sessionId} >>> ${debugTruncate(data)}`,
)
child.stdin.write(data)
}
},
updateAccessToken(token: string): void {
handle.accessToken = token
// Send the fresh token to the child process via stdin. The child's
// StructuredIO handles update_environment_variables messages by
// setting process.env directly, so getSessionIngressAuthToken()
// picks up the new token on the next refreshHeaders call.
handle.writeStdin(
jsonStringify({
type: 'update_environment_variables',
variables: { CLAUDE_CODE_SESSION_ACCESS_TOKEN: token },
}) + '\n',
)
deps.onDebug(
`[bridge:session] Sent token refresh via stdin for sessionId=${opts.sessionId}`,
)
},
}
return handle
},
}
}
export { extractActivities as _extractActivitiesForTesting }

210
src/bridge/trustedDevice.ts Normal file
View File

@@ -0,0 +1,210 @@
import axios from 'axios'
import memoize from 'lodash-es/memoize.js'
import { hostname } from 'os'
import { getOauthConfig } from '../constants/oauth.js'
import {
checkGate_CACHED_OR_BLOCKING,
getFeatureValue_CACHED_MAY_BE_STALE,
} from '../services/analytics/growthbook.js'
import { logForDebugging } from '../utils/debug.js'
import { errorMessage } from '../utils/errors.js'
import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
import { getSecureStorage } from '../utils/secureStorage/index.js'
import { jsonStringify } from '../utils/slowOperations.js'
/**
* Trusted device token source for bridge (remote-control) sessions.
*
* Bridge sessions have SecurityTier=ELEVATED on the server (CCR v2).
* The server gates ConnectBridgeWorker on its own flag
* (sessions_elevated_auth_enforcement in Anthropic Main); this CLI-side
* flag controls whether the CLI sends X-Trusted-Device-Token at all.
* Two flags so rollout can be staged: flip CLI-side first (headers
* start flowing, server still no-ops), then flip server-side.
*
* Enrollment (POST /auth/trusted_devices) is gated server-side by
* account_session.created_at < 10min, so it must happen during /login.
* Token is persistent (90d rolling expiry) and stored in keychain.
*
* See anthropics/anthropic#274559 (spec), #310375 (B1b tenant RPCs),
* #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
*/
const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
function isGateEnabled(): boolean {
return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
}
// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
// Cache cleared after enrollment (below) and on logout (clearAuthRelatedCaches).
//
// Only the storage read is memoized — the GrowthBook gate is checked live so
// that a gate flip after GrowthBook refresh takes effect without a restart.
const readStoredToken = memoize((): string | undefined => {
// Env var takes precedence for testing/canary.
const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
if (envToken) {
return envToken
}
return getSecureStorage().read()?.trustedDeviceToken
})
export function getTrustedDeviceToken(): string | undefined {
if (!isGateEnabled()) {
return undefined
}
return readStoredToken()
}
export function clearTrustedDeviceTokenCache(): void {
readStoredToken.cache?.clear?.()
}
/**
* Clear the stored trusted device token from secure storage and the memo cache.
* Called before enrollTrustedDevice() during /login so a stale token from the
* previous account isn't sent as X-Trusted-Device-Token while enrollment is
* in-flight (enrollTrustedDevice is async — bridge API calls between login and
* enrollment completion would otherwise still read the old cached token).
*/
export function clearTrustedDeviceToken(): void {
if (!isGateEnabled()) {
return
}
const secureStorage = getSecureStorage()
try {
const data = secureStorage.read()
if (data?.trustedDeviceToken) {
delete data.trustedDeviceToken
secureStorage.update(data)
}
} catch {
// Best-effort — don't block login if storage is inaccessible
}
readStoredToken.cache?.clear?.()
}
/**
* Enroll this device via POST /auth/trusted_devices and persist the token
* to keychain. Best-effort — logs and returns on failure so callers
* (post-login hooks) don't block the login flow.
*
* The server gates enrollment on account_session.created_at < 10min, so
* this must be called immediately after a fresh /login. Calling it later
* (e.g. lazy enrollment on /bridge 403) will fail with 403 stale_session.
*/
export async function enrollTrustedDevice(): Promise<void> {
try {
// checkGate_CACHED_OR_BLOCKING awaits any in-flight GrowthBook re-init
// (triggered by refreshGrowthBookAfterAuthChange in login.tsx) before
// reading the gate, so we get the post-refresh value.
if (!(await checkGate_CACHED_OR_BLOCKING(TRUSTED_DEVICE_GATE))) {
logForDebugging(
`[trusted-device] Gate ${TRUSTED_DEVICE_GATE} is off, skipping enrollment`,
)
return
}
// If CLAUDE_TRUSTED_DEVICE_TOKEN is set (e.g. by an enterprise wrapper),
// skip enrollment — the env var takes precedence in readStoredToken() so
// any enrolled token would be shadowed and never used.
if (process.env.CLAUDE_TRUSTED_DEVICE_TOKEN) {
logForDebugging(
'[trusted-device] CLAUDE_TRUSTED_DEVICE_TOKEN env var is set, skipping enrollment (env var takes precedence)',
)
return
}
// Lazy require — utils/auth.ts transitively pulls ~1300 modules
// (config → file → permissions → sessionStorage → commands). Daemon callers
// of getTrustedDeviceToken() don't need this; only /login does.
/* eslint-disable @typescript-eslint/no-require-imports */
const { getClaudeAIOAuthTokens } =
require('../utils/auth.js') as typeof import('../utils/auth.js')
/* eslint-enable @typescript-eslint/no-require-imports */
const accessToken = getClaudeAIOAuthTokens()?.accessToken
if (!accessToken) {
logForDebugging('[trusted-device] No OAuth token, skipping enrollment')
return
}
// Always re-enroll on /login — the existing token may belong to a
// different account (account-switch without /logout). Skipping enrollment
// would send the old account's token on the new account's bridge calls.
const secureStorage = getSecureStorage()
if (isEssentialTrafficOnly()) {
logForDebugging(
'[trusted-device] Essential traffic only, skipping enrollment',
)
return
}
const baseUrl = getOauthConfig().BASE_API_URL
let response
try {
response = await axios.post<{
device_token?: string
device_id?: string
}>(
`${baseUrl}/api/auth/trusted_devices`,
{ display_name: `Claude Code on ${hostname()} · ${process.platform}` },
{
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
timeout: 10_000,
validateStatus: s => s < 500,
},
)
} catch (err: unknown) {
logForDebugging(
`[trusted-device] Enrollment request failed: ${errorMessage(err)}`,
)
return
}
if (response.status !== 200 && response.status !== 201) {
logForDebugging(
`[trusted-device] Enrollment failed ${response.status}: ${jsonStringify(response.data).slice(0, 200)}`,
)
return
}
const token = response.data?.device_token
if (!token || typeof token !== 'string') {
logForDebugging(
'[trusted-device] Enrollment response missing device_token field',
)
return
}
try {
const storageData = secureStorage.read()
if (!storageData) {
logForDebugging(
'[trusted-device] Cannot read storage, skipping token persist',
)
return
}
storageData.trustedDeviceToken = token
const result = secureStorage.update(storageData)
if (!result.success) {
logForDebugging(
`[trusted-device] Failed to persist token: ${result.warning ?? 'unknown'}`,
)
return
}
readStoredToken.cache?.clear?.()
logForDebugging(
`[trusted-device] Enrolled device_id=${response.data.device_id ?? 'unknown'}`,
)
} catch (err: unknown) {
logForDebugging(
`[trusted-device] Storage write failed: ${errorMessage(err)}`,
)
}
} catch (err: unknown) {
logForDebugging(`[trusted-device] Enrollment error: ${errorMessage(err)}`)
}
}

262
src/bridge/types.ts Normal file
View File

@@ -0,0 +1,262 @@
/** Default per-session timeout (24 hours). */
export const DEFAULT_SESSION_TIMEOUT_MS = 24 * 60 * 60 * 1000
/** Reusable login guidance appended to bridge auth errors. */
export const BRIDGE_LOGIN_INSTRUCTION =
'Remote Control is only available with claude.ai subscriptions. Please use `/login` to sign in with your claude.ai account.'
/** Full error printed when `claude remote-control` is run without auth. */
export const BRIDGE_LOGIN_ERROR =
'Error: You must be logged in to use Remote Control.\n\n' +
BRIDGE_LOGIN_INSTRUCTION
/** Shown when the user disconnects Remote Control (via /remote-control or ultraplan launch). */
export const REMOTE_CONTROL_DISCONNECTED_MSG = 'Remote Control disconnected.'
// --- Protocol types for the environments API ---
export type WorkData = {
type: 'session' | 'healthcheck'
id: string
}
export type WorkResponse = {
id: string
type: 'work'
environment_id: string
state: string
data: WorkData
secret: string // base64url-encoded JSON
created_at: string
}
export type WorkSecret = {
version: number
session_ingress_token: string
api_base_url: string
sources: Array<{
type: string
git_info?: { type: string; repo: string; ref?: string; token?: string }
}>
auth: Array<{ type: string; token: string }>
claude_code_args?: Record<string, string> | null
mcp_config?: unknown | null
environment_variables?: Record<string, string> | null
/**
* Server-driven CCR v2 selector. Set by prepare_work_secret() when the
* session was created via the v2 compat layer (ccr_v2_compat_enabled).
* Same field the BYOC runner reads at environment-runner/sessionExecutor.ts.
*/
use_code_sessions?: boolean
}
export type SessionDoneStatus = 'completed' | 'failed' | 'interrupted'
export type SessionActivityType = 'tool_start' | 'text' | 'result' | 'error'
export type SessionActivity = {
type: SessionActivityType
summary: string // e.g. "Editing src/foo.ts", "Reading package.json"
timestamp: number
}
/**
* How `claude remote-control` chooses session working directories.
* - `single-session`: one session in cwd, bridge tears down when it ends
* - `worktree`: persistent server, every session gets an isolated git worktree
* - `same-dir`: persistent server, every session shares cwd (can stomp each other)
*/
export type SpawnMode = 'single-session' | 'worktree' | 'same-dir'
/**
* Well-known worker_type values THIS codebase produces. Sent as
* `metadata.worker_type` at environment registration so claude.ai can filter
* the session picker by origin (e.g. assistant tab only shows assistant
* workers). The backend treats this as an opaque string — desktop cowork
* sends `"cowork"`, which isn't in this union. REPL code uses this narrow
* type for its own exhaustiveness; wire-level fields accept any string.
*/
export type BridgeWorkerType = 'claude_code' | 'claude_code_assistant'
export type BridgeConfig = {
dir: string
machineName: string
branch: string
gitRepoUrl: string | null
maxSessions: number
spawnMode: SpawnMode
verbose: boolean
sandbox: boolean
/** Client-generated UUID identifying this bridge instance. */
bridgeId: string
/**
* Sent as metadata.worker_type so web clients can filter by origin.
* Backend treats this as opaque — any string, not just BridgeWorkerType.
*/
workerType: string
/** Client-generated UUID for idempotent environment registration. */
environmentId: string
/**
* Backend-issued environment_id to reuse on re-register. When set, the
* backend treats registration as a reconnect to the existing environment
* instead of creating a new one. Used by `claude remote-control
* --session-id` resume. Must be a backend-format ID — client UUIDs are
* rejected with 400.
*/
reuseEnvironmentId?: string
/** API base URL the bridge is connected to (used for polling). */
apiBaseUrl: string
/** Session ingress base URL for WebSocket connections (may differ from apiBaseUrl locally). */
sessionIngressUrl: string
/** Debug file path passed via --debug-file. */
debugFile?: string
/** Per-session timeout in milliseconds. Sessions exceeding this are killed. */
sessionTimeoutMs?: number
}
// --- Dependency interfaces (for testability) ---
/**
* A control_response event sent back to a session (e.g. a permission decision).
* The `subtype` is `'success'` per the SDK protocol; the inner `response`
* carries the permission decision payload (e.g. `{ behavior: 'allow' }`).
*/
export type PermissionResponseEvent = {
type: 'control_response'
response: {
subtype: 'success'
request_id: string
response: Record<string, unknown>
}
}
export type BridgeApiClient = {
registerBridgeEnvironment(config: BridgeConfig): Promise<{
environment_id: string
environment_secret: string
}>
pollForWork(
environmentId: string,
environmentSecret: string,
signal?: AbortSignal,
reclaimOlderThanMs?: number,
): Promise<WorkResponse | null>
acknowledgeWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<void>
/** Stop a work item via the environments API. */
stopWork(environmentId: string, workId: string, force: boolean): Promise<void>
/** Deregister/delete the bridge environment on graceful shutdown. */
deregisterEnvironment(environmentId: string): Promise<void>
/** Send a permission response (control_response) to a session via the session events API. */
sendPermissionResponseEvent(
sessionId: string,
event: PermissionResponseEvent,
sessionToken: string,
): Promise<void>
/** Archive a session so it no longer appears as active on the server. */
archiveSession(sessionId: string): Promise<void>
/**
* Force-stop stale worker instances and re-queue a session on an environment.
* Used by `--session-id` to resume a session after the original bridge died.
*/
reconnectSession(environmentId: string, sessionId: string): Promise<void>
/**
* Send a lightweight heartbeat for an active work item, extending its lease.
* Uses SessionIngressAuth (JWT, no DB hit) instead of EnvironmentSecretAuth.
* Returns the server's response with lease status.
*/
heartbeatWork(
environmentId: string,
workId: string,
sessionToken: string,
): Promise<{ lease_extended: boolean; state: string }>
}
export type SessionHandle = {
sessionId: string
done: Promise<SessionDoneStatus>
kill(): void
forceKill(): void
activities: SessionActivity[] // ring buffer of recent activities (last ~10)
currentActivity: SessionActivity | null // most recent
accessToken: string // session_ingress_token for API calls
lastStderr: string[] // ring buffer of last stderr lines
writeStdin(data: string): void // write directly to child stdin
/** Update the access token for a running session (e.g. after token refresh). */
updateAccessToken(token: string): void
}
export type SessionSpawnOpts = {
sessionId: string
sdkUrl: string
accessToken: string
/** When true, spawn the child with CCR v2 env vars (SSE transport + CCRClient). */
useCcrV2?: boolean
/** Required when useCcrV2 is true. Obtained from POST /worker/register. */
workerEpoch?: number
/**
* Fires once with the text of the first real user message seen on the
* child's stdout (via --replay-user-messages). Lets the caller derive a
* session title when none exists yet. Tool-result and synthetic user
* messages are skipped.
*/
onFirstUserMessage?: (text: string) => void
}
export type SessionSpawner = {
spawn(opts: SessionSpawnOpts, dir: string): SessionHandle
}
export type BridgeLogger = {
printBanner(config: BridgeConfig, environmentId: string): void
logSessionStart(sessionId: string, prompt: string): void
logSessionComplete(sessionId: string, durationMs: number): void
logSessionFailed(sessionId: string, error: string): void
logStatus(message: string): void
logVerbose(message: string): void
logError(message: string): void
/** Log a reconnection success event after recovering from connection errors. */
logReconnected(disconnectedMs: number): void
/** Show idle status with repo/branch info and shimmer animation. */
updateIdleStatus(): void
/** Show reconnecting status in the live display. */
updateReconnectingStatus(delayStr: string, elapsedStr: string): void
updateSessionStatus(
sessionId: string,
elapsed: string,
activity: SessionActivity,
trail: string[],
): void
clearStatus(): void
/** Set repository info for status line display. */
setRepoInfo(repoName: string, branch: string): void
/** Set debug log glob shown above the status line (ant users). */
setDebugLogPath(path: string): void
/** Transition to "Attached" state when a session starts. */
setAttached(sessionId: string): void
/** Show failed status in the live display. */
updateFailedStatus(error: string): void
/** Toggle QR code visibility. */
toggleQr(): void
/** Update the "<n> of <m> sessions" indicator and spawn mode hint. */
updateSessionCount(active: number, max: number, mode: SpawnMode): void
/** Update the spawn mode shown in the session-count line. Pass null to hide (single-session or toggle unavailable). */
setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void
/** Register a new session for multi-session display (called after spawn succeeds). */
addSession(sessionId: string, url: string): void
/** Update the per-session activity summary (tool being run) in the multi-session list. */
updateSessionActivity(sessionId: string, activity: SessionActivity): void
/**
* Set a session's display title. In multi-session mode, updates the bullet list
* entry. In single-session mode, also shows the title in the main status line.
* Triggers a render (guarded against reconnecting/failed states).
*/
setSessionTitle(sessionId: string, title: string): void
/** Remove a session from the multi-session display when it ends. */
removeSession(sessionId: string): void
/** Force a re-render of the status display (for multi-session activity refresh). */
refreshDisplay(): void
}

127
src/bridge/workSecret.ts Normal file
View File

@@ -0,0 +1,127 @@
import axios from 'axios'
import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
import type { WorkSecret } from './types.js'
/** Decode a base64url-encoded work secret and validate its version. */
export function decodeWorkSecret(secret: string): WorkSecret {
const json = Buffer.from(secret, 'base64url').toString('utf-8')
const parsed: unknown = jsonParse(json)
if (
!parsed ||
typeof parsed !== 'object' ||
!('version' in parsed) ||
parsed.version !== 1
) {
throw new Error(
`Unsupported work secret version: ${parsed && typeof parsed === 'object' && 'version' in parsed ? parsed.version : 'unknown'}`,
)
}
const obj = parsed as Record<string, unknown>
if (
typeof obj.session_ingress_token !== 'string' ||
obj.session_ingress_token.length === 0
) {
throw new Error(
'Invalid work secret: missing or empty session_ingress_token',
)
}
if (typeof obj.api_base_url !== 'string') {
throw new Error('Invalid work secret: missing api_base_url')
}
return parsed as WorkSecret
}
/**
* Build a WebSocket SDK URL from the API base URL and session ID.
* Strips the HTTP(S) protocol and constructs a ws(s):// ingress URL.
*
* Uses /v2/ for localhost (direct to session-ingress, no Envoy rewrite)
* and /v1/ for production (Envoy rewrites /v1/ → /v2/).
*/
export function buildSdkUrl(apiBaseUrl: string, sessionId: string): string {
const isLocalhost =
apiBaseUrl.includes('localhost') || apiBaseUrl.includes('127.0.0.1')
const protocol = isLocalhost ? 'ws' : 'wss'
const version = isLocalhost ? 'v2' : 'v1'
const host = apiBaseUrl.replace(/^https?:\/\//, '').replace(/\/+$/, '')
return `${protocol}://${host}/${version}/session_ingress/ws/${sessionId}`
}
/**
* Compare two session IDs regardless of their tagged-ID prefix.
*
* Tagged IDs have the form {tag}_{body} or {tag}_staging_{body}, where the
* body encodes a UUID. CCR v2's compat layer returns `session_*` to v1 API
* clients (compat/convert.go:41) but the infrastructure layer (sandbox-gateway
* work queue, work poll response) uses `cse_*` (compat/CLAUDE.md:13). Both
* have the same underlying UUID.
*
* Without this, replBridge rejects its own session as "foreign" at the
* work-received check when the ccr_v2_compat_enabled gate is on.
*/
export function sameSessionId(a: string, b: string): boolean {
if (a === b) return true
// The body is everything after the last underscore — this handles both
// `{tag}_{body}` and `{tag}_staging_{body}`.
const aBody = a.slice(a.lastIndexOf('_') + 1)
const bBody = b.slice(b.lastIndexOf('_') + 1)
// Guard against IDs with no underscore (bare UUIDs): lastIndexOf returns -1,
// slice(0) returns the whole string, and we already checked a === b above.
// Require a minimum length to avoid accidental matches on short suffixes
// (e.g. single-char tag remnants from malformed IDs).
return aBody.length >= 4 && aBody === bBody
}
/**
* Build a CCR v2 session URL from the API base URL and session ID.
* Unlike buildSdkUrl, this returns an HTTP(S) URL (not ws://) and points at
* /v1/code/sessions/{id} — the child CC will derive the SSE stream path
* and worker endpoints from this base.
*/
export function buildCCRv2SdkUrl(
apiBaseUrl: string,
sessionId: string,
): string {
const base = apiBaseUrl.replace(/\/+$/, '')
return `${base}/v1/code/sessions/${sessionId}`
}
/**
* Register this bridge as the worker for a CCR v2 session.
* Returns the worker_epoch, which must be passed to the child CC process
* so its CCRClient can include it in every heartbeat/state/event request.
*
* Mirrors what environment-manager does in the container path
* (api-go/environment-manager/cmd/cmd_task_run.go RegisterWorker).
*/
export async function registerWorker(
sessionUrl: string,
accessToken: string,
): Promise<number> {
const response = await axios.post(
`${sessionUrl}/worker/register`,
{},
{
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
},
timeout: 10_000,
},
)
// protojson serializes int64 as a string to avoid JS number precision loss;
// the Go side may also return a number depending on encoder settings.
const raw = response.data?.worker_epoch
const epoch = typeof raw === 'string' ? Number(raw) : raw
if (
typeof epoch !== 'number' ||
!Number.isFinite(epoch) ||
!Number.isSafeInteger(epoch)
) {
throw new Error(
`registerWorker: invalid worker_epoch in response: ${jsonStringify(response.data)}`,
)
}
return epoch
}