chore: initialize recovered claude workspace
This commit is contained in:
135
src/bridge/bridgeDebug.ts
Normal file
135
src/bridge/bridgeDebug.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { logForDebugging } from '../utils/debug.js'
|
||||
import { BridgeFatalError } from './bridgeApi.js'
|
||||
import type { BridgeApiClient } from './types.js'
|
||||
|
||||
/**
|
||||
* Ant-only fault injection for manually testing bridge recovery paths.
|
||||
*
|
||||
* Real failure modes this targets (BQ 2026-03-12, 7-day window):
|
||||
* poll 404 not_found_error — 147K sessions/week, dead onEnvironmentLost gate
|
||||
* ws_closed 1002/1006 — 22K sessions/week, zombie poll after close
|
||||
* register transient failure — residual: network blips during doReconnect
|
||||
*
|
||||
* Usage: /bridge-kick <subcommand> from the REPL while Remote Control is
|
||||
* connected, then tail debug.log to watch the recovery machinery react.
|
||||
*
|
||||
* Module-level state is intentional here: one bridge per REPL process, the
|
||||
* /bridge-kick slash command has no other way to reach into initBridgeCore's
|
||||
* closures, and teardown clears the slot.
|
||||
*/
|
||||
|
||||
/** One-shot fault to inject on the next matching api call. */
|
||||
type BridgeFault = {
|
||||
method:
|
||||
| 'pollForWork'
|
||||
| 'registerBridgeEnvironment'
|
||||
| 'reconnectSession'
|
||||
| 'heartbeatWork'
|
||||
/** Fatal errors go through handleErrorStatus → BridgeFatalError. Transient
|
||||
* errors surface as plain axios rejections (5xx / network). Recovery code
|
||||
* distinguishes the two: fatal → teardown, transient → retry/backoff. */
|
||||
kind: 'fatal' | 'transient'
|
||||
status: number
|
||||
errorType?: string
|
||||
/** Remaining injections. Decremented on consume; removed at 0. */
|
||||
count: number
|
||||
}
|
||||
|
||||
export type BridgeDebugHandle = {
|
||||
/** Invoke the transport's permanent-close handler directly. Tests the
|
||||
* ws_closed → reconnectEnvironmentWithSession escalation (#22148). */
|
||||
fireClose: (code: number) => void
|
||||
/** Call reconnectEnvironmentWithSession() — same as SIGUSR2 but
|
||||
* reachable from the slash command. */
|
||||
forceReconnect: () => void
|
||||
/** Queue a fault for the next N calls to the named api method. */
|
||||
injectFault: (fault: BridgeFault) => void
|
||||
/** Abort the at-capacity sleep so an injected poll fault lands
|
||||
* immediately instead of up to 10min later. */
|
||||
wakePollLoop: () => void
|
||||
/** env/session IDs for the debug.log grep. */
|
||||
describe: () => string
|
||||
}
|
||||
|
||||
let debugHandle: BridgeDebugHandle | null = null
|
||||
const faultQueue: BridgeFault[] = []
|
||||
|
||||
export function registerBridgeDebugHandle(h: BridgeDebugHandle): void {
|
||||
debugHandle = h
|
||||
}
|
||||
|
||||
export function clearBridgeDebugHandle(): void {
|
||||
debugHandle = null
|
||||
faultQueue.length = 0
|
||||
}
|
||||
|
||||
export function getBridgeDebugHandle(): BridgeDebugHandle | null {
|
||||
return debugHandle
|
||||
}
|
||||
|
||||
export function injectBridgeFault(fault: BridgeFault): void {
|
||||
faultQueue.push(fault)
|
||||
logForDebugging(
|
||||
`[bridge:debug] Queued fault: ${fault.method} ${fault.kind}/${fault.status}${fault.errorType ? `/${fault.errorType}` : ''} ×${fault.count}`,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a BridgeApiClient so each call first checks the fault queue. If a
|
||||
* matching fault is queued, throw the specified error instead of calling
|
||||
* through. Delegates everything else to the real client.
|
||||
*
|
||||
* Only called when USER_TYPE === 'ant' — zero overhead in external builds.
|
||||
*/
|
||||
export function wrapApiForFaultInjection(
|
||||
api: BridgeApiClient,
|
||||
): BridgeApiClient {
|
||||
function consume(method: BridgeFault['method']): BridgeFault | null {
|
||||
const idx = faultQueue.findIndex(f => f.method === method)
|
||||
if (idx === -1) return null
|
||||
const fault = faultQueue[idx]!
|
||||
fault.count--
|
||||
if (fault.count <= 0) faultQueue.splice(idx, 1)
|
||||
return fault
|
||||
}
|
||||
|
||||
function throwFault(fault: BridgeFault, context: string): never {
|
||||
logForDebugging(
|
||||
`[bridge:debug] Injecting ${fault.kind} fault into ${context}: status=${fault.status} errorType=${fault.errorType ?? 'none'}`,
|
||||
)
|
||||
if (fault.kind === 'fatal') {
|
||||
throw new BridgeFatalError(
|
||||
`[injected] ${context} ${fault.status}`,
|
||||
fault.status,
|
||||
fault.errorType,
|
||||
)
|
||||
}
|
||||
// Transient: mimic an axios rejection (5xx / network). No .status on
|
||||
// the error itself — that's how the catch blocks distinguish.
|
||||
throw new Error(`[injected transient] ${context} ${fault.status}`)
|
||||
}
|
||||
|
||||
return {
|
||||
...api,
|
||||
async pollForWork(envId, secret, signal, reclaimMs) {
|
||||
const f = consume('pollForWork')
|
||||
if (f) throwFault(f, 'Poll')
|
||||
return api.pollForWork(envId, secret, signal, reclaimMs)
|
||||
},
|
||||
async registerBridgeEnvironment(config) {
|
||||
const f = consume('registerBridgeEnvironment')
|
||||
if (f) throwFault(f, 'Registration')
|
||||
return api.registerBridgeEnvironment(config)
|
||||
},
|
||||
async reconnectSession(envId, sessionId) {
|
||||
const f = consume('reconnectSession')
|
||||
if (f) throwFault(f, 'ReconnectSession')
|
||||
return api.reconnectSession(envId, sessionId)
|
||||
},
|
||||
async heartbeatWork(envId, workId, token) {
|
||||
const f = consume('heartbeatWork')
|
||||
if (f) throwFault(f, 'Heartbeat')
|
||||
return api.heartbeatWork(envId, workId, token)
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user