chore: initialize recovered claude workspace

This commit is contained in:
2026-04-02 15:29:01 +08:00
commit a10efa3b4b
1940 changed files with 506426 additions and 0 deletions

View File

@@ -0,0 +1,523 @@
/**
* Download functionality for native installer
*
* Handles downloading Claude binaries from various sources:
* - Artifactory NPM packages
* - GCS bucket
*/
import { feature } from 'bun:bundle'
import axios from 'axios'
import { createHash } from 'crypto'
import { chmod, writeFile } from 'fs/promises'
import { join } from 'path'
import { logEvent } from 'src/services/analytics/index.js'
import type { ReleaseChannel } from '../config.js'
import { logForDebugging } from '../debug.js'
import { toError } from '../errors.js'
import { execFileNoThrowWithCwd } from '../execFileNoThrow.js'
import { getFsImplementation } from '../fsOperations.js'
import { logError } from '../log.js'
import { sleep } from '../sleep.js'
import { jsonStringify, writeFileSync_DEPRECATED } from '../slowOperations.js'
import { getBinaryName, getPlatform } from './installer.js'
const GCS_BUCKET_URL =
'https://storage.googleapis.com/claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/claude-code-releases'
export const ARTIFACTORY_REGISTRY_URL =
'https://artifactory.infra.ant.dev/artifactory/api/npm/npm-all/'
export async function getLatestVersionFromArtifactory(
tag: string = 'latest',
): Promise<string> {
const startTime = Date.now()
const { stdout, code, stderr } = await execFileNoThrowWithCwd(
'npm',
[
'view',
`${MACRO.NATIVE_PACKAGE_URL}@${tag}`,
'version',
'--prefer-online',
'--registry',
ARTIFACTORY_REGISTRY_URL,
],
{
timeout: 30000,
preserveOutputOnError: true,
},
)
const latencyMs = Date.now() - startTime
if (code !== 0) {
logEvent('tengu_version_check_failure', {
latency_ms: latencyMs,
source_npm: true,
exit_code: code,
})
const error = new Error(`npm view failed with code ${code}: ${stderr}`)
logError(error)
throw error
}
logEvent('tengu_version_check_success', {
latency_ms: latencyMs,
source_npm: true,
})
logForDebugging(
`npm view ${MACRO.NATIVE_PACKAGE_URL}@${tag} version: ${stdout}`,
)
const latestVersion = stdout.trim()
return latestVersion
}
export async function getLatestVersionFromBinaryRepo(
channel: ReleaseChannel = 'latest',
baseUrl: string,
authConfig?: { auth: { username: string; password: string } },
): Promise<string> {
const startTime = Date.now()
try {
const response = await axios.get(`${baseUrl}/${channel}`, {
timeout: 30000,
responseType: 'text',
...authConfig,
})
const latencyMs = Date.now() - startTime
logEvent('tengu_version_check_success', {
latency_ms: latencyMs,
})
return response.data.trim()
} catch (error) {
const latencyMs = Date.now() - startTime
const errorMessage = error instanceof Error ? error.message : String(error)
let httpStatus: number | undefined
if (axios.isAxiosError(error) && error.response) {
httpStatus = error.response.status
}
logEvent('tengu_version_check_failure', {
latency_ms: latencyMs,
http_status: httpStatus,
is_timeout: errorMessage.includes('timeout'),
})
const fetchError = new Error(
`Failed to fetch version from ${baseUrl}/${channel}: ${errorMessage}`,
)
logError(fetchError)
throw fetchError
}
}
export async function getLatestVersion(
channelOrVersion: string,
): Promise<string> {
// Direct version - match internal format too (e.g. 1.0.30-dev.shaf4937ce)
if (/^v?\d+\.\d+\.\d+(-\S+)?$/.test(channelOrVersion)) {
const normalized = channelOrVersion.startsWith('v')
? channelOrVersion.slice(1)
: channelOrVersion
// 99.99.x is reserved for CI smoke-test fixtures on real GCS.
// feature() is false in all shipped builds — DCE collapses this to an
// unconditional throw. Only `bun --feature=ALLOW_TEST_VERSIONS` (the
// smoke test's source-level invocation) bypasses.
if (/^99\.99\./.test(normalized) && !feature('ALLOW_TEST_VERSIONS')) {
throw new Error(
`Version ${normalized} is not available for installation. Use 'stable' or 'latest'.`,
)
}
return normalized
}
// ReleaseChannel validation
const channel = channelOrVersion as ReleaseChannel
if (channel !== 'stable' && channel !== 'latest') {
throw new Error(
`Invalid channel: ${channelOrVersion}. Use 'stable' or 'latest'`,
)
}
// Route to appropriate source
if (process.env.USER_TYPE === 'ant') {
// Use Artifactory for ant users
const npmTag = channel === 'stable' ? 'stable' : 'latest'
return getLatestVersionFromArtifactory(npmTag)
}
// Use GCS for external users
return getLatestVersionFromBinaryRepo(channel, GCS_BUCKET_URL)
}
export async function downloadVersionFromArtifactory(
version: string,
stagingPath: string,
) {
const fs = getFsImplementation()
// If we get here, we own the lock and can delete a partial download
await fs.rm(stagingPath, { recursive: true, force: true })
// Get the platform-specific package name
const platform = getPlatform()
const platformPackageName = `${MACRO.NATIVE_PACKAGE_URL}-${platform}`
// Fetch integrity hash for the platform-specific package
logForDebugging(
`Fetching integrity hash for ${platformPackageName}@${version}`,
)
const {
stdout: integrityOutput,
code,
stderr,
} = await execFileNoThrowWithCwd(
'npm',
[
'view',
`${platformPackageName}@${version}`,
'dist.integrity',
'--registry',
ARTIFACTORY_REGISTRY_URL,
],
{
timeout: 30000,
preserveOutputOnError: true,
},
)
if (code !== 0) {
throw new Error(`npm view integrity failed with code ${code}: ${stderr}`)
}
const integrity = integrityOutput.trim()
if (!integrity) {
throw new Error(
`Failed to fetch integrity hash for ${platformPackageName}@${version}`,
)
}
logForDebugging(`Got integrity hash for ${platform}: ${integrity}`)
// Create isolated npm project in staging
await fs.mkdir(stagingPath)
const packageJson = {
name: 'claude-native-installer',
version: '0.0.1',
dependencies: {
[MACRO.NATIVE_PACKAGE_URL!]: version,
},
}
// Create package-lock.json with integrity verification for platform-specific package
const packageLock = {
name: 'claude-native-installer',
version: '0.0.1',
lockfileVersion: 3,
requires: true,
packages: {
'': {
name: 'claude-native-installer',
version: '0.0.1',
dependencies: {
[MACRO.NATIVE_PACKAGE_URL!]: version,
},
},
[`node_modules/${MACRO.NATIVE_PACKAGE_URL}`]: {
version: version,
optionalDependencies: {
[platformPackageName]: version,
},
},
[`node_modules/${platformPackageName}`]: {
version: version,
integrity: integrity,
},
},
}
writeFileSync_DEPRECATED(
join(stagingPath, 'package.json'),
jsonStringify(packageJson, null, 2),
{ encoding: 'utf8', flush: true },
)
writeFileSync_DEPRECATED(
join(stagingPath, 'package-lock.json'),
jsonStringify(packageLock, null, 2),
{ encoding: 'utf8', flush: true },
)
// Install with npm - it will verify integrity from package-lock.json
// Use --prefer-online to force fresh metadata checks, helping with Artifactory replication delays
const result = await execFileNoThrowWithCwd(
'npm',
['ci', '--prefer-online', '--registry', ARTIFACTORY_REGISTRY_URL],
{
timeout: 60000,
preserveOutputOnError: true,
cwd: stagingPath,
},
)
if (result.code !== 0) {
throw new Error(`npm ci failed with code ${result.code}: ${result.stderr}`)
}
logForDebugging(
`Successfully downloaded and verified ${MACRO.NATIVE_PACKAGE_URL}@${version}`,
)
}
// Stall timeout: abort if no bytes received for this duration
const DEFAULT_STALL_TIMEOUT_MS = 60000 // 60 seconds
const MAX_DOWNLOAD_RETRIES = 3
function getStallTimeoutMs(): number {
return (
Number(process.env.CLAUDE_CODE_STALL_TIMEOUT_MS_FOR_TESTING) ||
DEFAULT_STALL_TIMEOUT_MS
)
}
class StallTimeoutError extends Error {
constructor() {
super('Download stalled: no data received for 60 seconds')
this.name = 'StallTimeoutError'
}
}
/**
* Common logic for downloading and verifying a binary.
* Includes stall detection (aborts if no bytes for 60s) and retry logic.
*/
async function downloadAndVerifyBinary(
binaryUrl: string,
expectedChecksum: string,
binaryPath: string,
requestConfig: Record<string, unknown> = {},
) {
let lastError: Error | undefined
for (let attempt = 1; attempt <= MAX_DOWNLOAD_RETRIES; attempt++) {
const controller = new AbortController()
let stallTimer: ReturnType<typeof setTimeout> | undefined
const clearStallTimer = () => {
if (stallTimer) {
clearTimeout(stallTimer)
stallTimer = undefined
}
}
const resetStallTimer = () => {
clearStallTimer()
stallTimer = setTimeout(c => c.abort(), getStallTimeoutMs(), controller)
}
try {
// Start the stall timer before the request
resetStallTimer()
const response = await axios.get(binaryUrl, {
timeout: 5 * 60000, // 5 minute total timeout
responseType: 'arraybuffer',
signal: controller.signal,
onDownloadProgress: () => {
// Reset stall timer on each chunk of data received
resetStallTimer()
},
...requestConfig,
})
clearStallTimer()
// Verify checksum
const hash = createHash('sha256')
hash.update(response.data)
const actualChecksum = hash.digest('hex')
if (actualChecksum !== expectedChecksum) {
throw new Error(
`Checksum mismatch: expected ${expectedChecksum}, got ${actualChecksum}`,
)
}
// Write binary to disk
await writeFile(binaryPath, Buffer.from(response.data))
await chmod(binaryPath, 0o755)
// Success - return early
return
} catch (error) {
clearStallTimer()
// Check if this was a stall timeout (axios wraps abort signals in CanceledError)
const isStallTimeout = axios.isCancel(error)
if (isStallTimeout) {
lastError = new StallTimeoutError()
} else {
lastError = toError(error)
}
// Only retry on stall timeouts
if (isStallTimeout && attempt < MAX_DOWNLOAD_RETRIES) {
logForDebugging(
`Download stalled on attempt ${attempt}/${MAX_DOWNLOAD_RETRIES}, retrying...`,
)
// Brief pause before retry to let network recover
await sleep(1000)
continue
}
// Don't retry other errors (HTTP errors, checksum mismatches, etc.)
throw lastError
}
}
// Should not reach here, but just in case
throw lastError ?? new Error('Download failed after all retries')
}
export async function downloadVersionFromBinaryRepo(
version: string,
stagingPath: string,
baseUrl: string,
authConfig?: {
auth?: { username: string; password: string }
headers?: Record<string, string>
},
) {
const fs = getFsImplementation()
// If we get here, we own the lock and can delete a partial download
await fs.rm(stagingPath, { recursive: true, force: true })
// Get platform
const platform = getPlatform()
const startTime = Date.now()
// Log download attempt start
logEvent('tengu_binary_download_attempt', {})
// Fetch manifest to get checksum
let manifest
try {
const manifestResponse = await axios.get(
`${baseUrl}/${version}/manifest.json`,
{
timeout: 10000,
responseType: 'json',
...authConfig,
},
)
manifest = manifestResponse.data
} catch (error) {
const latencyMs = Date.now() - startTime
const errorMessage = error instanceof Error ? error.message : String(error)
let httpStatus: number | undefined
if (axios.isAxiosError(error) && error.response) {
httpStatus = error.response.status
}
logEvent('tengu_binary_manifest_fetch_failure', {
latency_ms: latencyMs,
http_status: httpStatus,
is_timeout: errorMessage.includes('timeout'),
})
logError(
new Error(
`Failed to fetch manifest from ${baseUrl}/${version}/manifest.json: ${errorMessage}`,
),
)
throw error
}
const platformInfo = manifest.platforms[platform]
if (!platformInfo) {
logEvent('tengu_binary_platform_not_found', {})
throw new Error(
`Platform ${platform} not found in manifest for version ${version}`,
)
}
const expectedChecksum = platformInfo.checksum
// Both GCS and generic bucket use identical layout: ${baseUrl}/${version}/${platform}/${binaryName}
const binaryName = getBinaryName(platform)
const binaryUrl = `${baseUrl}/${version}/${platform}/${binaryName}`
// Write to staging
await fs.mkdir(stagingPath)
const binaryPath = join(stagingPath, binaryName)
try {
await downloadAndVerifyBinary(
binaryUrl,
expectedChecksum,
binaryPath,
authConfig || {},
)
const latencyMs = Date.now() - startTime
logEvent('tengu_binary_download_success', {
latency_ms: latencyMs,
})
} catch (error) {
const latencyMs = Date.now() - startTime
const errorMessage = error instanceof Error ? error.message : String(error)
let httpStatus: number | undefined
if (axios.isAxiosError(error) && error.response) {
httpStatus = error.response.status
}
logEvent('tengu_binary_download_failure', {
latency_ms: latencyMs,
http_status: httpStatus,
is_timeout: errorMessage.includes('timeout'),
is_checksum_mismatch: errorMessage.includes('Checksum mismatch'),
})
logError(
new Error(`Failed to download binary from ${binaryUrl}: ${errorMessage}`),
)
throw error
}
}
export async function downloadVersion(
version: string,
stagingPath: string,
): Promise<'npm' | 'binary'> {
// Test-fixture versions route to the private sentinel bucket. DCE'd in all
// shipped builds — the string 'claude-code-ci-sentinel' and the gcloud call
// never exist in compiled binaries. Same gcloud-token pattern as
// remoteSkillLoader.ts:175-195.
if (feature('ALLOW_TEST_VERSIONS') && /^99\.99\./.test(version)) {
const { stdout } = await execFileNoThrowWithCwd('gcloud', [
'auth',
'print-access-token',
])
await downloadVersionFromBinaryRepo(
version,
stagingPath,
'https://storage.googleapis.com/claude-code-ci-sentinel',
{ headers: { Authorization: `Bearer ${stdout.trim()}` } },
)
return 'binary'
}
if (process.env.USER_TYPE === 'ant') {
// Use Artifactory for ant users
await downloadVersionFromArtifactory(version, stagingPath)
return 'npm'
}
// Use GCS for external users
await downloadVersionFromBinaryRepo(version, stagingPath, GCS_BUCKET_URL)
return 'binary'
}
// Exported for testing
export { StallTimeoutError, MAX_DOWNLOAD_RETRIES }
export const STALL_TIMEOUT_MS = DEFAULT_STALL_TIMEOUT_MS
export const _downloadAndVerifyBinaryForTesting = downloadAndVerifyBinary

View File

@@ -0,0 +1,18 @@
/**
* Native Installer - Public API
*
* This is the barrel file that exports only the functions actually used by external modules.
* External modules should only import from this file.
*/
// Re-export only the functions that are actually used
export {
checkInstall,
cleanupNpmInstallations,
cleanupOldVersions,
cleanupShellAliases,
installLatest,
lockCurrentVersion,
removeInstalledSymlink,
type SetupMessage,
} from './installer.js'

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,336 @@
/**
* Package manager detection for Claude CLI
*/
import { readFile } from 'fs/promises'
import memoize from 'lodash-es/memoize.js'
import { logForDebugging } from '../debug.js'
import { execFileNoThrow } from '../execFileNoThrow.js'
import { getPlatform } from '../platform.js'
export type PackageManager =
| 'homebrew'
| 'winget'
| 'pacman'
| 'deb'
| 'rpm'
| 'apk'
| 'mise'
| 'asdf'
| 'unknown'
/**
* Parses /etc/os-release to extract the distro ID and ID_LIKE fields.
* ID_LIKE identifies the distro family (e.g. Ubuntu has ID_LIKE=debian),
* letting us skip package manager execs on distros that can't have them.
* Returns null if the file is unreadable (pre-systemd or non-standard systems);
* callers fall through to the exec in that case as a conservative fallback.
*/
export const getOsRelease = memoize(
async (): Promise<{ id: string; idLike: string[] } | null> => {
try {
const content = await readFile('/etc/os-release', 'utf8')
const idMatch = content.match(/^ID=["']?(\S+?)["']?\s*$/m)
const idLikeMatch = content.match(/^ID_LIKE=["']?(.+?)["']?\s*$/m)
return {
id: idMatch?.[1] ?? '',
idLike: idLikeMatch?.[1]?.split(' ') ?? [],
}
} catch {
return null
}
},
)
function isDistroFamily(
osRelease: { id: string; idLike: string[] },
families: string[],
): boolean {
return (
families.includes(osRelease.id) ||
osRelease.idLike.some(like => families.includes(like))
)
}
/**
* Detects if the currently running Claude instance was installed via mise
* (a polyglot tool version manager) by checking if the executable path
* is within a mise installs directory.
*
* mise installs to: ~/.local/share/mise/installs/<tool>/<version>/
*/
export function detectMise(): boolean {
const execPath = process.execPath || process.argv[0] || ''
// Check if the executable is within a mise installs directory
if (/[/\\]mise[/\\]installs[/\\]/i.test(execPath)) {
logForDebugging(`Detected mise installation: ${execPath}`)
return true
}
return false
}
/**
* Detects if the currently running Claude instance was installed via asdf
* (another polyglot tool version manager) by checking if the executable path
* is within an asdf installs directory.
*
* asdf installs to: ~/.asdf/installs/<tool>/<version>/
*/
export function detectAsdf(): boolean {
const execPath = process.execPath || process.argv[0] || ''
// Check if the executable is within an asdf installs directory
if (/[/\\]\.?asdf[/\\]installs[/\\]/i.test(execPath)) {
logForDebugging(`Detected asdf installation: ${execPath}`)
return true
}
return false
}
/**
* Detects if the currently running Claude instance was installed via Homebrew
* by checking if the executable path is within a Homebrew Caskroom directory.
*
* Note: We specifically check for Caskroom because npm can also be installed via
* Homebrew, which would place npm global packages under the same Homebrew prefix
* (e.g., /opt/homebrew/lib/node_modules). We need to distinguish between:
* - Homebrew cask: /opt/homebrew/Caskroom/claude-code/...
* - npm-global (via Homebrew's npm): /opt/homebrew/lib/node_modules/@anthropic-ai/...
*/
export function detectHomebrew(): boolean {
const platform = getPlatform()
// Homebrew is only for macOS and Linux
if (platform !== 'macos' && platform !== 'linux' && platform !== 'wsl') {
return false
}
// Get the path of the currently running executable
const execPath = process.execPath || process.argv[0] || ''
// Check if the executable is within a Homebrew Caskroom directory
// This is specific to Homebrew cask installations
if (execPath.includes('/Caskroom/')) {
logForDebugging(`Detected Homebrew cask installation: ${execPath}`)
return true
}
return false
}
/**
* Detects if the currently running Claude instance was installed via winget
* by checking if the executable path is within a WinGet directory.
*
* Winget installs to:
* - User: %LOCALAPPDATA%\Microsoft\WinGet\Packages
* - System: C:\Program Files\WinGet\Packages
* And creates links at: %LOCALAPPDATA%\Microsoft\WinGet\Links\
*/
export function detectWinget(): boolean {
const platform = getPlatform()
// Winget is only for Windows
if (platform !== 'windows') {
return false
}
const execPath = process.execPath || process.argv[0] || ''
// Check for WinGet paths (handles both forward and backslashes)
const wingetPatterns = [
/Microsoft[/\\]WinGet[/\\]Packages/i,
/Microsoft[/\\]WinGet[/\\]Links/i,
]
for (const pattern of wingetPatterns) {
if (pattern.test(execPath)) {
logForDebugging(`Detected winget installation: ${execPath}`)
return true
}
}
return false
}
/**
* Detects if the currently running Claude instance was installed via pacman
* by querying pacman's database for file ownership.
*
* We gate on the Arch distro family before invoking pacman. On other distros
* like Ubuntu/Debian, 'pacman' in PATH may resolve to the pacman game
* (/usr/games/pacman) rather than the Arch package manager.
*/
export const detectPacman = memoize(async (): Promise<boolean> => {
const platform = getPlatform()
if (platform !== 'linux') {
return false
}
const osRelease = await getOsRelease()
if (osRelease && !isDistroFamily(osRelease, ['arch'])) {
return false
}
const execPath = process.execPath || process.argv[0] || ''
const result = await execFileNoThrow('pacman', ['-Qo', execPath], {
timeout: 5000,
useCwd: false,
})
if (result.code === 0 && result.stdout) {
logForDebugging(`Detected pacman installation: ${result.stdout.trim()}`)
return true
}
return false
})
/**
* Detects if the currently running Claude instance was installed via a .deb package
* by querying dpkg's database for file ownership.
*
* We use `dpkg -S <execPath>` to check if the executable is owned by a dpkg-managed package.
*/
export const detectDeb = memoize(async (): Promise<boolean> => {
const platform = getPlatform()
if (platform !== 'linux') {
return false
}
const osRelease = await getOsRelease()
if (osRelease && !isDistroFamily(osRelease, ['debian'])) {
return false
}
const execPath = process.execPath || process.argv[0] || ''
const result = await execFileNoThrow('dpkg', ['-S', execPath], {
timeout: 5000,
useCwd: false,
})
if (result.code === 0 && result.stdout) {
logForDebugging(`Detected deb installation: ${result.stdout.trim()}`)
return true
}
return false
})
/**
* Detects if the currently running Claude instance was installed via an RPM package
* by querying the RPM database for file ownership.
*
* We use `rpm -qf <execPath>` to check if the executable is owned by an RPM package.
*/
export const detectRpm = memoize(async (): Promise<boolean> => {
const platform = getPlatform()
if (platform !== 'linux') {
return false
}
const osRelease = await getOsRelease()
if (osRelease && !isDistroFamily(osRelease, ['fedora', 'rhel', 'suse'])) {
return false
}
const execPath = process.execPath || process.argv[0] || ''
const result = await execFileNoThrow('rpm', ['-qf', execPath], {
timeout: 5000,
useCwd: false,
})
if (result.code === 0 && result.stdout) {
logForDebugging(`Detected rpm installation: ${result.stdout.trim()}`)
return true
}
return false
})
/**
* Detects if the currently running Claude instance was installed via Alpine APK
* by querying apk's database for file ownership.
*
* We use `apk info --who-owns <execPath>` to check if the executable is owned
* by an apk-managed package.
*/
export const detectApk = memoize(async (): Promise<boolean> => {
const platform = getPlatform()
if (platform !== 'linux') {
return false
}
const osRelease = await getOsRelease()
if (osRelease && !isDistroFamily(osRelease, ['alpine'])) {
return false
}
const execPath = process.execPath || process.argv[0] || ''
const result = await execFileNoThrow(
'apk',
['info', '--who-owns', execPath],
{
timeout: 5000,
useCwd: false,
},
)
if (result.code === 0 && result.stdout) {
logForDebugging(`Detected apk installation: ${result.stdout.trim()}`)
return true
}
return false
})
/**
* Memoized function to detect which package manager installed Claude
* Returns 'unknown' if no package manager is detected
*/
export const getPackageManager = memoize(async (): Promise<PackageManager> => {
if (detectHomebrew()) {
return 'homebrew'
}
if (detectWinget()) {
return 'winget'
}
if (detectMise()) {
return 'mise'
}
if (detectAsdf()) {
return 'asdf'
}
if (await detectPacman()) {
return 'pacman'
}
if (await detectApk()) {
return 'apk'
}
if (await detectDeb()) {
return 'deb'
}
if (await detectRpm()) {
return 'rpm'
}
return 'unknown'
})

View File

@@ -0,0 +1,433 @@
/**
* PID-Based Version Locking
*
* This module provides PID-based locking for running Claude Code versions.
* Unlike mtime-based locking (which can hold locks for 30 days after a crash),
* PID-based locking can immediately detect when a process is no longer running.
*
* Lock files contain JSON with the PID and metadata, and staleness is determined
* by checking if the process is still alive.
*/
import { basename, join } from 'path'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
import { logForDebugging } from '../debug.js'
import { isEnvDefinedFalsy, isEnvTruthy } from '../envUtils.js'
import { isENOENT, toError } from '../errors.js'
import { getFsImplementation } from '../fsOperations.js'
import { getProcessCommand } from '../genericProcessUtils.js'
import { logError } from '../log.js'
import {
jsonParse,
jsonStringify,
writeFileSync_DEPRECATED,
} from '../slowOperations.js'
/**
* Check if PID-based version locking is enabled.
* When disabled, falls back to mtime-based locking (30-day timeout).
*
* Controlled by GrowthBook gate with local override:
* - Set ENABLE_PID_BASED_VERSION_LOCKING=true to force-enable
* - Set ENABLE_PID_BASED_VERSION_LOCKING=false to force-disable
* - If unset, GrowthBook gate (tengu_pid_based_version_locking) controls rollout
*/
export function isPidBasedLockingEnabled(): boolean {
const envVar = process.env.ENABLE_PID_BASED_VERSION_LOCKING
// If env var is explicitly set, respect it
if (isEnvTruthy(envVar)) {
return true
}
if (isEnvDefinedFalsy(envVar)) {
return false
}
// GrowthBook controls gradual rollout (returns false for external users)
return getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_pid_based_version_locking',
false,
)
}
/**
* Content stored in a version lock file
*/
export type VersionLockContent = {
pid: number
version: string
execPath: string
acquiredAt: number // timestamp when lock was acquired
}
/**
* Information about a lock for diagnostic purposes
*/
export type LockInfo = {
version: string
pid: number
isProcessRunning: boolean
execPath: string
acquiredAt: Date
lockFilePath: string
}
// Fallback stale timeout (2 hours) - used when PID check is inconclusive
// This is much shorter than the previous 30-day timeout but still allows
// for edge cases like network filesystems where PID check might fail
const FALLBACK_STALE_MS = 2 * 60 * 60 * 1000
/**
* Check if a process with the given PID is currently running
* Uses signal 0 which doesn't actually send a signal but checks if we can
*/
export function isProcessRunning(pid: number): boolean {
// PID 0 is special - it refers to the current process group, not a real process
// PID 1 is init/systemd and is always running but shouldn't be considered for locks
if (pid <= 1) {
return false
}
try {
process.kill(pid, 0)
return true
} catch {
return false
}
}
/**
* Validate that a running process is actually a Claude process
* This helps mitigate PID reuse issues
*/
function isClaudeProcess(pid: number, expectedExecPath: string): boolean {
if (!isProcessRunning(pid)) {
return false
}
// If the PID matches our current process, we know it's valid
// This handles test environments where the command might not contain 'claude'
if (pid === process.pid) {
return true
}
try {
const command = getProcessCommand(pid)
if (!command) {
// If we can't get the command, trust the PID check
// This is conservative - we'd rather not delete a running version
return true
}
// Check if the command contains 'claude' or the expected exec path
const normalizedCommand = command.toLowerCase()
const normalizedExecPath = expectedExecPath.toLowerCase()
return (
normalizedCommand.includes('claude') ||
normalizedCommand.includes(normalizedExecPath)
)
} catch {
// If command check fails, trust the PID check
return true
}
}
/**
* Read and parse a lock file's content
*/
export function readLockContent(
lockFilePath: string,
): VersionLockContent | null {
const fs = getFsImplementation()
try {
const content = fs.readFileSync(lockFilePath, { encoding: 'utf8' })
if (!content || content.trim() === '') {
return null
}
const parsed = jsonParse(content) as VersionLockContent
// Validate required fields
if (typeof parsed.pid !== 'number' || !parsed.version || !parsed.execPath) {
return null
}
return parsed
} catch {
return null
}
}
/**
* Check if a lock file represents an active lock (process still running)
*/
export function isLockActive(lockFilePath: string): boolean {
const content = readLockContent(lockFilePath)
if (!content) {
return false
}
const { pid, execPath } = content
// Primary check: is the process running?
if (!isProcessRunning(pid)) {
return false
}
// Secondary validation: is it actually a Claude process?
// This helps with PID reuse scenarios
if (!isClaudeProcess(pid, execPath)) {
logForDebugging(
`Lock PID ${pid} is running but does not appear to be Claude - treating as stale`,
)
return false
}
// Fallback: if the lock is very old (> 2 hours) and we can't validate
// the command, be conservative and consider it potentially stale
// This handles edge cases like network filesystems
const fs = getFsImplementation()
try {
const stats = fs.statSync(lockFilePath)
const age = Date.now() - stats.mtimeMs
if (age > FALLBACK_STALE_MS) {
// Double-check that we can still see the process
if (!isProcessRunning(pid)) {
return false
}
}
} catch {
// If we can't stat the file, trust the PID check
}
return true
}
/**
* Write lock content to a file atomically
*/
function writeLockFile(
lockFilePath: string,
content: VersionLockContent,
): void {
const fs = getFsImplementation()
const tempPath = `${lockFilePath}.tmp.${process.pid}.${Date.now()}`
try {
writeFileSync_DEPRECATED(tempPath, jsonStringify(content, null, 2), {
encoding: 'utf8',
flush: true,
})
fs.renameSync(tempPath, lockFilePath)
} catch (error) {
// Clean up temp file on failure (best-effort)
try {
fs.unlinkSync(tempPath)
} catch {
// Ignore cleanup errors (ENOENT expected if write failed before file creation)
}
throw error
}
}
/**
* Try to acquire a lock on a version file
* Returns a release function if successful, null if the lock is already held
*/
export async function tryAcquireLock(
versionPath: string,
lockFilePath: string,
): Promise<(() => void) | null> {
const fs = getFsImplementation()
const versionName = basename(versionPath)
// Check if there's an existing active lock (including by our own process)
// Use isLockActive for consistency with cleanup - it checks both PID running AND
// validates it's actually a Claude process (to handle PID reuse scenarios)
if (isLockActive(lockFilePath)) {
const existingContent = readLockContent(lockFilePath)
logForDebugging(
`Cannot acquire lock for ${versionName} - held by PID ${existingContent?.pid}`,
)
return null
}
// Try to acquire the lock
const lockContent: VersionLockContent = {
pid: process.pid,
version: versionName,
execPath: process.execPath,
acquiredAt: Date.now(),
}
try {
writeLockFile(lockFilePath, lockContent)
// Verify we actually got the lock (race condition check)
const verifyContent = readLockContent(lockFilePath)
if (verifyContent?.pid !== process.pid) {
// Another process won the race
return null
}
logForDebugging(`Acquired PID lock for ${versionName} (PID ${process.pid})`)
// Return release function
return () => {
try {
// Only release if we still own the lock
const currentContent = readLockContent(lockFilePath)
if (currentContent?.pid === process.pid) {
fs.unlinkSync(lockFilePath)
logForDebugging(`Released PID lock for ${versionName}`)
}
} catch (error) {
logForDebugging(`Failed to release lock for ${versionName}: ${error}`)
}
}
} catch (error) {
logForDebugging(`Failed to acquire lock for ${versionName}: ${error}`)
return null
}
}
/**
* Acquire a lock and hold it for the lifetime of the process
* This is used for locking the currently running version
*/
export async function acquireProcessLifetimeLock(
versionPath: string,
lockFilePath: string,
): Promise<boolean> {
const release = await tryAcquireLock(versionPath, lockFilePath)
if (!release) {
return false
}
// Register cleanup on process exit
const cleanup = () => {
try {
release()
} catch {
// Ignore errors during process exit
}
}
process.on('exit', cleanup)
process.on('SIGINT', cleanup)
process.on('SIGTERM', cleanup)
// Don't call release() - we want to hold the lock until process exits
return true
}
/**
* Execute a callback while holding a lock
* Returns true if the callback executed, false if lock couldn't be acquired
*/
export async function withLock(
versionPath: string,
lockFilePath: string,
callback: () => void | Promise<void>,
): Promise<boolean> {
const release = await tryAcquireLock(versionPath, lockFilePath)
if (!release) {
return false
}
try {
await callback()
return true
} finally {
release()
}
}
/**
* Get information about all version locks for diagnostics
*/
export function getAllLockInfo(locksDir: string): LockInfo[] {
const fs = getFsImplementation()
const lockInfos: LockInfo[] = []
try {
const lockFiles = fs
.readdirStringSync(locksDir)
.filter((f: string) => f.endsWith('.lock'))
for (const lockFile of lockFiles) {
const lockFilePath = join(locksDir, lockFile)
const content = readLockContent(lockFilePath)
if (content) {
lockInfos.push({
version: content.version,
pid: content.pid,
isProcessRunning: isProcessRunning(content.pid),
execPath: content.execPath,
acquiredAt: new Date(content.acquiredAt),
lockFilePath,
})
}
}
} catch (error) {
if (isENOENT(error)) {
return lockInfos
}
logError(toError(error))
}
return lockInfos
}
/**
* Clean up stale locks (locks where the process is no longer running)
* Returns the number of locks cleaned up
*
* Handles both:
* - PID-based locks (files containing JSON with PID)
* - Legacy proper-lockfile locks (directories created by mtime-based locking)
*/
export function cleanupStaleLocks(locksDir: string): number {
const fs = getFsImplementation()
let cleanedCount = 0
try {
const lockEntries = fs
.readdirStringSync(locksDir)
.filter((f: string) => f.endsWith('.lock'))
for (const lockEntry of lockEntries) {
const lockFilePath = join(locksDir, lockEntry)
try {
const stats = fs.lstatSync(lockFilePath)
if (stats.isDirectory()) {
// Legacy proper-lockfile directory lock - always remove when PID-based
// locking is enabled since these are from a different locking mechanism
fs.rmSync(lockFilePath, { recursive: true, force: true })
cleanedCount++
logForDebugging(`Cleaned up legacy directory lock: ${lockEntry}`)
} else if (!isLockActive(lockFilePath)) {
// PID-based file lock with no running process
fs.unlinkSync(lockFilePath)
cleanedCount++
logForDebugging(`Cleaned up stale lock: ${lockEntry}`)
}
} catch {
// Ignore individual cleanup errors
}
}
} catch (error) {
if (isENOENT(error)) {
return 0
}
logError(toError(error))
}
return cleanedCount
}