chore: initialize recovered claude workspace
This commit is contained in:
926
src/utils/git.ts
Normal file
926
src/utils/git.ts
Normal file
@@ -0,0 +1,926 @@
|
||||
import { createHash } from 'crypto'
|
||||
import { readFileSync, realpathSync, statSync } from 'fs'
|
||||
import { open, readFile, realpath, stat } from 'fs/promises'
|
||||
import memoize from 'lodash-es/memoize.js'
|
||||
import { basename, dirname, join, resolve, sep } from 'path'
|
||||
import { hasBinaryExtension, isBinaryContent } from '../constants/files.js'
|
||||
import { getCwd } from './cwd.js'
|
||||
import { logForDebugging } from './debug.js'
|
||||
import { logForDiagnosticsNoPII } from './diagLogs.js'
|
||||
import { execFileNoThrow } from './execFileNoThrow.js'
|
||||
import { getFsImplementation } from './fsOperations.js'
|
||||
import {
|
||||
getCachedBranch,
|
||||
getCachedDefaultBranch,
|
||||
getCachedHead,
|
||||
getCachedRemoteUrl,
|
||||
getWorktreeCountFromFs,
|
||||
isShallowClone as isShallowCloneFs,
|
||||
resolveGitDir,
|
||||
} from './git/gitFilesystem.js'
|
||||
import { logError } from './log.js'
|
||||
import { memoizeWithLRU } from './memoize.js'
|
||||
import { whichSync } from './which.js'
|
||||
|
||||
const GIT_ROOT_NOT_FOUND = Symbol('git-root-not-found')
|
||||
|
||||
const findGitRootImpl = memoizeWithLRU(
|
||||
(startPath: string): string | typeof GIT_ROOT_NOT_FOUND => {
|
||||
const startTime = Date.now()
|
||||
logForDiagnosticsNoPII('info', 'find_git_root_started')
|
||||
|
||||
let current = resolve(startPath)
|
||||
const root = current.substring(0, current.indexOf(sep) + 1) || sep
|
||||
let statCount = 0
|
||||
|
||||
while (current !== root) {
|
||||
try {
|
||||
const gitPath = join(current, '.git')
|
||||
statCount++
|
||||
const stat = statSync(gitPath)
|
||||
// .git can be a directory (regular repo) or file (worktree/submodule)
|
||||
if (stat.isDirectory() || stat.isFile()) {
|
||||
logForDiagnosticsNoPII('info', 'find_git_root_completed', {
|
||||
duration_ms: Date.now() - startTime,
|
||||
stat_count: statCount,
|
||||
found: true,
|
||||
})
|
||||
return current.normalize('NFC')
|
||||
}
|
||||
} catch {
|
||||
// .git doesn't exist at this level, continue up
|
||||
}
|
||||
const parent = dirname(current)
|
||||
if (parent === current) {
|
||||
break
|
||||
}
|
||||
current = parent
|
||||
}
|
||||
|
||||
// Check root directory as well
|
||||
try {
|
||||
const gitPath = join(root, '.git')
|
||||
statCount++
|
||||
const stat = statSync(gitPath)
|
||||
if (stat.isDirectory() || stat.isFile()) {
|
||||
logForDiagnosticsNoPII('info', 'find_git_root_completed', {
|
||||
duration_ms: Date.now() - startTime,
|
||||
stat_count: statCount,
|
||||
found: true,
|
||||
})
|
||||
return root.normalize('NFC')
|
||||
}
|
||||
} catch {
|
||||
// .git doesn't exist at root
|
||||
}
|
||||
|
||||
logForDiagnosticsNoPII('info', 'find_git_root_completed', {
|
||||
duration_ms: Date.now() - startTime,
|
||||
stat_count: statCount,
|
||||
found: false,
|
||||
})
|
||||
return GIT_ROOT_NOT_FOUND
|
||||
},
|
||||
path => path,
|
||||
50,
|
||||
)
|
||||
|
||||
/**
|
||||
* Find the git root by walking up the directory tree.
|
||||
* Looks for a .git directory or file (worktrees/submodules use a file).
|
||||
* Returns the directory containing .git, or null if not found.
|
||||
*
|
||||
* Memoized per startPath with an LRU cache (max 50 entries) to prevent
|
||||
* unbounded growth — gitDiff calls this with dirname(file), so editing many
|
||||
* files across different directories would otherwise accumulate entries forever.
|
||||
*/
|
||||
export const findGitRoot = createFindGitRoot()
|
||||
|
||||
function createFindGitRoot(): {
|
||||
(startPath: string): string | null
|
||||
cache: typeof findGitRootImpl.cache
|
||||
} {
|
||||
function wrapper(startPath: string): string | null {
|
||||
const result = findGitRootImpl(startPath)
|
||||
return result === GIT_ROOT_NOT_FOUND ? null : result
|
||||
}
|
||||
wrapper.cache = findGitRootImpl.cache
|
||||
return wrapper
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a git root to the canonical main repository root.
|
||||
* For a regular repo this is a no-op. For a worktree, follows the
|
||||
* `.git` file → `gitdir:` → `commondir` chain to find the main repo's
|
||||
* working directory.
|
||||
*
|
||||
* Submodules (`.git` is a file but no `commondir`) fall through to the
|
||||
* input root, which is correct since submodules are separate repos.
|
||||
*
|
||||
* Memoized with a small LRU to avoid repeated file reads on the hot
|
||||
* path (permission checks, prompt building).
|
||||
*/
|
||||
const resolveCanonicalRoot = memoizeWithLRU(
|
||||
(gitRoot: string): string => {
|
||||
try {
|
||||
// In a worktree, .git is a file containing: gitdir: <path>
|
||||
// In a regular repo, .git is a directory (readFileSync throws EISDIR).
|
||||
const gitContent = readFileSync(join(gitRoot, '.git'), 'utf-8').trim()
|
||||
if (!gitContent.startsWith('gitdir:')) {
|
||||
return gitRoot
|
||||
}
|
||||
const worktreeGitDir = resolve(
|
||||
gitRoot,
|
||||
gitContent.slice('gitdir:'.length).trim(),
|
||||
)
|
||||
// commondir points to the shared .git directory (relative to worktree gitdir).
|
||||
// Submodules have no commondir (readFileSync throws ENOENT) → fall through.
|
||||
const commonDir = resolve(
|
||||
worktreeGitDir,
|
||||
readFileSync(join(worktreeGitDir, 'commondir'), 'utf-8').trim(),
|
||||
)
|
||||
// SECURITY: The .git file and commondir are attacker-controlled in a
|
||||
// cloned/downloaded repo. Without validation, a malicious repo can point
|
||||
// commondir at any path the victim has trusted, bypassing the trust
|
||||
// dialog and executing hooks from .claude/settings.json on startup.
|
||||
//
|
||||
// Validate the structure matches what `git worktree add` creates:
|
||||
// 1. worktreeGitDir is a direct child of <commonDir>/worktrees/
|
||||
// → ensures the commondir file we read lives inside the resolved
|
||||
// common dir, not inside the attacker's repo
|
||||
// 2. <worktreeGitDir>/gitdir points back to <gitRoot>/.git
|
||||
// → ensures an attacker can't borrow a victim's existing worktree
|
||||
// entry by guessing its path
|
||||
// Both are required: (1) alone fails if victim has a worktree of the
|
||||
// trusted repo; (2) alone fails because attacker controls worktreeGitDir.
|
||||
if (resolve(dirname(worktreeGitDir)) !== join(commonDir, 'worktrees')) {
|
||||
return gitRoot
|
||||
}
|
||||
// Git writes gitdir with strbuf_realpath() (symlinks resolved), but
|
||||
// gitRoot from findGitRoot() is only lexically resolved. Realpath gitRoot
|
||||
// so legitimate worktrees accessed via a symlinked path (e.g. macOS
|
||||
// /tmp → /private/tmp) aren't rejected. Realpath the directory then join
|
||||
// '.git' — realpathing the .git file itself would follow a symlinked .git
|
||||
// and let an attacker borrow a victim's back-link.
|
||||
const backlink = realpathSync(
|
||||
readFileSync(join(worktreeGitDir, 'gitdir'), 'utf-8').trim(),
|
||||
)
|
||||
if (backlink !== join(realpathSync(gitRoot), '.git')) {
|
||||
return gitRoot
|
||||
}
|
||||
// Bare-repo worktrees: the common dir isn't inside a working directory.
|
||||
// Use the common dir itself as the stable identity (anthropics/claude-code#27994).
|
||||
if (basename(commonDir) !== '.git') {
|
||||
return commonDir.normalize('NFC')
|
||||
}
|
||||
return dirname(commonDir).normalize('NFC')
|
||||
} catch {
|
||||
return gitRoot
|
||||
}
|
||||
},
|
||||
root => root,
|
||||
50,
|
||||
)
|
||||
|
||||
/**
|
||||
* Find the canonical git repository root, resolving through worktrees.
|
||||
*
|
||||
* Unlike findGitRoot, which returns the worktree directory (where the `.git`
|
||||
* file lives), this returns the main repository's working directory. This
|
||||
* ensures all worktrees of the same repo map to the same project identity.
|
||||
*
|
||||
* Use this instead of findGitRoot for project-scoped state (auto-memory,
|
||||
* project config, agent memory) so worktrees share state with the main repo.
|
||||
*/
|
||||
export const findCanonicalGitRoot = createFindCanonicalGitRoot()
|
||||
|
||||
function createFindCanonicalGitRoot(): {
|
||||
(startPath: string): string | null
|
||||
cache: typeof resolveCanonicalRoot.cache
|
||||
} {
|
||||
function wrapper(startPath: string): string | null {
|
||||
const root = findGitRoot(startPath)
|
||||
if (!root) {
|
||||
return null
|
||||
}
|
||||
return resolveCanonicalRoot(root)
|
||||
}
|
||||
wrapper.cache = resolveCanonicalRoot.cache
|
||||
return wrapper
|
||||
}
|
||||
|
||||
export const gitExe = memoize((): string => {
|
||||
// Every time we spawn a process, we have to lookup the path.
|
||||
// Let's instead avoid that lookup so we only do it once.
|
||||
return whichSync('git') || 'git'
|
||||
})
|
||||
|
||||
export const getIsGit = memoize(async (): Promise<boolean> => {
|
||||
const startTime = Date.now()
|
||||
logForDiagnosticsNoPII('info', 'is_git_check_started')
|
||||
|
||||
const isGit = findGitRoot(getCwd()) !== null
|
||||
|
||||
logForDiagnosticsNoPII('info', 'is_git_check_completed', {
|
||||
duration_ms: Date.now() - startTime,
|
||||
is_git: isGit,
|
||||
})
|
||||
return isGit
|
||||
})
|
||||
|
||||
export function getGitDir(cwd: string): Promise<string | null> {
|
||||
return resolveGitDir(cwd)
|
||||
}
|
||||
|
||||
export async function isAtGitRoot(): Promise<boolean> {
|
||||
const cwd = getCwd()
|
||||
const gitRoot = findGitRoot(cwd)
|
||||
if (!gitRoot) {
|
||||
return false
|
||||
}
|
||||
// Resolve symlinks for accurate comparison
|
||||
try {
|
||||
const [resolvedCwd, resolvedGitRoot] = await Promise.all([
|
||||
realpath(cwd),
|
||||
realpath(gitRoot),
|
||||
])
|
||||
return resolvedCwd === resolvedGitRoot
|
||||
} catch {
|
||||
return cwd === gitRoot
|
||||
}
|
||||
}
|
||||
|
||||
export const dirIsInGitRepo = async (cwd: string): Promise<boolean> => {
|
||||
return findGitRoot(cwd) !== null
|
||||
}
|
||||
|
||||
export const getHead = async (): Promise<string> => {
|
||||
return getCachedHead()
|
||||
}
|
||||
|
||||
export const getBranch = async (): Promise<string> => {
|
||||
return getCachedBranch()
|
||||
}
|
||||
|
||||
export const getDefaultBranch = async (): Promise<string> => {
|
||||
return getCachedDefaultBranch()
|
||||
}
|
||||
|
||||
export const getRemoteUrl = async (): Promise<string | null> => {
|
||||
return getCachedRemoteUrl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a git remote URL to a canonical form for hashing.
|
||||
* Converts SSH and HTTPS URLs to the same format: host/owner/repo (lowercase, no .git)
|
||||
*
|
||||
* Examples:
|
||||
* - git@github.com:owner/repo.git -> github.com/owner/repo
|
||||
* - https://github.com/owner/repo.git -> github.com/owner/repo
|
||||
* - ssh://git@github.com/owner/repo -> github.com/owner/repo
|
||||
* - http://local_proxy@127.0.0.1:16583/git/owner/repo -> github.com/owner/repo
|
||||
*/
|
||||
export function normalizeGitRemoteUrl(url: string): string | null {
|
||||
const trimmed = url.trim()
|
||||
if (!trimmed) return null
|
||||
|
||||
// Handle SSH format: git@host:owner/repo.git
|
||||
const sshMatch = trimmed.match(/^git@([^:]+):(.+?)(?:\.git)?$/)
|
||||
if (sshMatch && sshMatch[1] && sshMatch[2]) {
|
||||
return `${sshMatch[1]}/${sshMatch[2]}`.toLowerCase()
|
||||
}
|
||||
|
||||
// Handle HTTPS/SSH URL format: https://host/owner/repo.git or ssh://git@host/owner/repo
|
||||
const urlMatch = trimmed.match(
|
||||
/^(?:https?|ssh):\/\/(?:[^@]+@)?([^/]+)\/(.+?)(?:\.git)?$/,
|
||||
)
|
||||
if (urlMatch && urlMatch[1] && urlMatch[2]) {
|
||||
const host = urlMatch[1]
|
||||
const path = urlMatch[2]
|
||||
|
||||
// CCR git proxy URLs use format:
|
||||
// Legacy: http://...@127.0.0.1:PORT/git/owner/repo (github.com assumed)
|
||||
// GHE: http://...@127.0.0.1:PORT/git/ghe.host/owner/repo (host encoded in path)
|
||||
// Strip the /git/ prefix. If the first segment contains a dot, it's a
|
||||
// hostname (GitHub org names cannot contain dots). Otherwise assume github.com.
|
||||
if (isLocalHost(host) && path.startsWith('git/')) {
|
||||
const proxyPath = path.slice(4) // Remove "git/" prefix
|
||||
const segments = proxyPath.split('/')
|
||||
// 3+ segments where first contains a dot → host/owner/repo (GHE format)
|
||||
if (segments.length >= 3 && segments[0]!.includes('.')) {
|
||||
return proxyPath.toLowerCase()
|
||||
}
|
||||
// 2 segments → owner/repo (legacy format, assume github.com)
|
||||
return `github.com/${proxyPath}`.toLowerCase()
|
||||
}
|
||||
|
||||
return `${host}/${path}`.toLowerCase()
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a SHA256 hash (first 16 chars) of the normalized git remote URL.
|
||||
* This provides a globally unique identifier for the repository that:
|
||||
* - Is the same regardless of SSH vs HTTPS clone
|
||||
* - Does not expose the actual repository name in logs
|
||||
*/
|
||||
export async function getRepoRemoteHash(): Promise<string | null> {
|
||||
const remoteUrl = await getRemoteUrl()
|
||||
if (!remoteUrl) return null
|
||||
|
||||
const normalized = normalizeGitRemoteUrl(remoteUrl)
|
||||
if (!normalized) return null
|
||||
|
||||
const hash = createHash('sha256').update(normalized).digest('hex')
|
||||
return hash.substring(0, 16)
|
||||
}
|
||||
|
||||
export const getIsHeadOnRemote = async (): Promise<boolean> => {
|
||||
const { code } = await execFileNoThrow(gitExe(), ['rev-parse', '@{u}'], {
|
||||
preserveOutputOnError: false,
|
||||
})
|
||||
return code === 0
|
||||
}
|
||||
|
||||
export const hasUnpushedCommits = async (): Promise<boolean> => {
|
||||
const { stdout, code } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['rev-list', '--count', '@{u}..HEAD'],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
return code === 0 && parseInt(stdout.trim(), 10) > 0
|
||||
}
|
||||
|
||||
export const getIsClean = async (options?: {
|
||||
ignoreUntracked?: boolean
|
||||
}): Promise<boolean> => {
|
||||
const args = ['--no-optional-locks', 'status', '--porcelain']
|
||||
if (options?.ignoreUntracked) {
|
||||
args.push('-uno')
|
||||
}
|
||||
const { stdout } = await execFileNoThrow(gitExe(), args, {
|
||||
preserveOutputOnError: false,
|
||||
})
|
||||
return stdout.trim().length === 0
|
||||
}
|
||||
|
||||
export const getChangedFiles = async (): Promise<string[]> => {
|
||||
const { stdout } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['--no-optional-locks', 'status', '--porcelain'],
|
||||
{
|
||||
preserveOutputOnError: false,
|
||||
},
|
||||
)
|
||||
return stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map(line => line.trim().split(' ', 2)[1]?.trim()) // Remove status prefix (e.g., "M ", "A ", "??")
|
||||
.filter(line => typeof line === 'string') // Remove empty entries
|
||||
}
|
||||
|
||||
export type GitFileStatus = {
|
||||
tracked: string[]
|
||||
untracked: string[]
|
||||
}
|
||||
|
||||
export const getFileStatus = async (): Promise<GitFileStatus> => {
|
||||
const { stdout } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['--no-optional-locks', 'status', '--porcelain'],
|
||||
{
|
||||
preserveOutputOnError: false,
|
||||
},
|
||||
)
|
||||
|
||||
const tracked: string[] = []
|
||||
const untracked: string[] = []
|
||||
|
||||
stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(line => line.length > 0)
|
||||
.forEach(line => {
|
||||
const status = line.substring(0, 2)
|
||||
const filename = line.substring(2).trim()
|
||||
|
||||
if (status === '??') {
|
||||
untracked.push(filename)
|
||||
} else if (filename) {
|
||||
tracked.push(filename)
|
||||
}
|
||||
})
|
||||
|
||||
return { tracked, untracked }
|
||||
}
|
||||
|
||||
export const getWorktreeCount = async (): Promise<number> => {
|
||||
return getWorktreeCountFromFs()
|
||||
}
|
||||
|
||||
/**
|
||||
* Stashes all changes (including untracked files) to return git to a clean porcelain state
|
||||
* Important: This function stages untracked files before stashing to prevent data loss
|
||||
* @param message - Optional custom message for the stash
|
||||
* @returns Promise<boolean> - true if stash was successful, false otherwise
|
||||
*/
|
||||
export const stashToCleanState = async (message?: string): Promise<boolean> => {
|
||||
try {
|
||||
const stashMessage =
|
||||
message || `Claude Code auto-stash - ${new Date().toISOString()}`
|
||||
|
||||
// First, check if we have untracked files
|
||||
const { untracked } = await getFileStatus()
|
||||
|
||||
// If we have untracked files, add them to the index first
|
||||
// This prevents them from being deleted
|
||||
if (untracked.length > 0) {
|
||||
const { code: addCode } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['add', ...untracked],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
|
||||
if (addCode !== 0) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Now stash everything (staged and unstaged changes)
|
||||
const { code } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['stash', 'push', '--message', stashMessage],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
return code === 0
|
||||
} catch (_) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export type GitRepoState = {
|
||||
commitHash: string
|
||||
branchName: string
|
||||
remoteUrl: string | null
|
||||
isHeadOnRemote: boolean
|
||||
isClean: boolean
|
||||
worktreeCount: number
|
||||
}
|
||||
|
||||
export async function getGitState(): Promise<GitRepoState | null> {
|
||||
try {
|
||||
const [
|
||||
commitHash,
|
||||
branchName,
|
||||
remoteUrl,
|
||||
isHeadOnRemote,
|
||||
isClean,
|
||||
worktreeCount,
|
||||
] = await Promise.all([
|
||||
getHead(),
|
||||
getBranch(),
|
||||
getRemoteUrl(),
|
||||
getIsHeadOnRemote(),
|
||||
getIsClean(),
|
||||
getWorktreeCount(),
|
||||
])
|
||||
|
||||
return {
|
||||
commitHash,
|
||||
branchName,
|
||||
remoteUrl,
|
||||
isHeadOnRemote,
|
||||
isClean,
|
||||
worktreeCount,
|
||||
}
|
||||
} catch (_) {
|
||||
// Fail silently - git state is best effort
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export async function getGithubRepo(): Promise<string | null> {
|
||||
const { parseGitRemote } = await import('./detectRepository.js')
|
||||
const remoteUrl = await getRemoteUrl()
|
||||
if (!remoteUrl) {
|
||||
logForDebugging('Local GitHub repo: unknown')
|
||||
return null
|
||||
}
|
||||
// Only return results for github.com — callers (e.g. issue submission)
|
||||
// assume the result is a github.com repository.
|
||||
const parsed = parseGitRemote(remoteUrl)
|
||||
if (parsed && parsed.host === 'github.com') {
|
||||
const result = `${parsed.owner}/${parsed.name}`
|
||||
logForDebugging(`Local GitHub repo: ${result}`)
|
||||
return result
|
||||
}
|
||||
logForDebugging('Local GitHub repo: unknown')
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Preserved git state for issue submission.
|
||||
* Uses remote base (e.g., origin/main) which is rarely force-pushed,
|
||||
* unlike local commits that can be GC'd after force push.
|
||||
*/
|
||||
export type PreservedGitState = {
|
||||
/** The SHA of the merge-base with the remote branch */
|
||||
remote_base_sha: string | null
|
||||
/** The remote branch used (e.g., "origin/main") */
|
||||
remote_base: string | null
|
||||
/** Patch from merge-base to current state (includes uncommitted changes) */
|
||||
patch: string
|
||||
/** Untracked files with their contents */
|
||||
untracked_files: Array<{ path: string; content: string }>
|
||||
/** git format-patch output for committed changes between merge-base and HEAD.
|
||||
* Used to reconstruct the actual commit chain (author, date, message) in
|
||||
* replay containers. null when there are no commits between merge-base and HEAD. */
|
||||
format_patch: string | null
|
||||
/** The current HEAD SHA (tip of the feature branch) */
|
||||
head_sha: string | null
|
||||
/** The current branch name (e.g., "feat/my-feature") */
|
||||
branch_name: string | null
|
||||
}
|
||||
|
||||
// Size limits for untracked file capture
|
||||
const MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 // 500MB per file
|
||||
const MAX_TOTAL_SIZE_BYTES = 5 * 1024 * 1024 * 1024 // 5GB total
|
||||
const MAX_FILE_COUNT = 20000
|
||||
|
||||
// Initial read buffer for binary detection + content reuse. 64KB covers
|
||||
// most source files in a single read; isBinaryContent() internally scans
|
||||
// only its first 8KB for the binary heuristic, so the extra bytes are
|
||||
// purely for avoiding a second read when the file turns out to be text.
|
||||
const SNIFF_BUFFER_SIZE = 64 * 1024
|
||||
|
||||
/**
|
||||
* Find the best remote branch to use as a base.
|
||||
* Priority: tracking branch > origin/main > origin/staging > origin/master
|
||||
*/
|
||||
export async function findRemoteBase(): Promise<string | null> {
|
||||
// First try: get the tracking branch for the current branch
|
||||
const { stdout: trackingBranch, code: trackingCode } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['rev-parse', '--abbrev-ref', '--symbolic-full-name', '@{u}'],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
|
||||
if (trackingCode === 0 && trackingBranch.trim()) {
|
||||
return trackingBranch.trim()
|
||||
}
|
||||
|
||||
// Second try: check for common default branch names on origin
|
||||
const { stdout: remoteRefs, code: remoteCode } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['remote', 'show', 'origin', '--', 'HEAD'],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
|
||||
if (remoteCode === 0) {
|
||||
// Parse the default branch from remote show output
|
||||
const match = remoteRefs.match(/HEAD branch: (\S+)/)
|
||||
if (match && match[1]) {
|
||||
return `origin/${match[1]}`
|
||||
}
|
||||
}
|
||||
|
||||
// Third try: check which common branches exist
|
||||
const candidates = ['origin/main', 'origin/staging', 'origin/master']
|
||||
for (const candidate of candidates) {
|
||||
const { code } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['rev-parse', '--verify', candidate],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
if (code === 0) {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we're in a shallow clone by looking for <gitDir>/shallow.
|
||||
*/
|
||||
function isShallowClone(): Promise<boolean> {
|
||||
return isShallowCloneFs()
|
||||
}
|
||||
|
||||
/**
|
||||
* Capture untracked files (git diff doesn't include them).
|
||||
* Respects size limits and skips binary files.
|
||||
*/
|
||||
async function captureUntrackedFiles(): Promise<
|
||||
Array<{ path: string; content: string }>
|
||||
> {
|
||||
const { stdout, code } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['ls-files', '--others', '--exclude-standard'],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
|
||||
const trimmed = stdout.trim()
|
||||
if (code !== 0 || !trimmed) {
|
||||
return []
|
||||
}
|
||||
|
||||
const files = trimmed.split('\n').filter(Boolean)
|
||||
const result: Array<{ path: string; content: string }> = []
|
||||
let totalSize = 0
|
||||
|
||||
for (const filePath of files) {
|
||||
// Check file count limit
|
||||
if (result.length >= MAX_FILE_COUNT) {
|
||||
logForDebugging(
|
||||
`Untracked file capture: reached max file count (${MAX_FILE_COUNT})`,
|
||||
)
|
||||
break
|
||||
}
|
||||
|
||||
// Skip binary files by extension - zero I/O
|
||||
if (hasBinaryExtension(filePath)) {
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
const stats = await stat(filePath)
|
||||
const fileSize = stats.size
|
||||
|
||||
// Skip files exceeding per-file limit
|
||||
if (fileSize > MAX_FILE_SIZE_BYTES) {
|
||||
logForDebugging(
|
||||
`Untracked file capture: skipping ${filePath} (exceeds ${MAX_FILE_SIZE_BYTES} bytes)`,
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check total size limit
|
||||
if (totalSize + fileSize > MAX_TOTAL_SIZE_BYTES) {
|
||||
logForDebugging(
|
||||
`Untracked file capture: reached total size limit (${MAX_TOTAL_SIZE_BYTES} bytes)`,
|
||||
)
|
||||
break
|
||||
}
|
||||
|
||||
// Empty file - no need to open
|
||||
if (fileSize === 0) {
|
||||
result.push({ path: filePath, content: '' })
|
||||
continue
|
||||
}
|
||||
|
||||
// Binary sniff on up to SNIFF_BUFFER_SIZE bytes. Caps binary-file reads
|
||||
// at SNIFF_BUFFER_SIZE even though MAX_FILE_SIZE_BYTES allows up to 500MB.
|
||||
// If the file fits in the sniff buffer we reuse it as the content; for
|
||||
// larger text files we fall back to readFile with encoding so the runtime
|
||||
// decodes to a string without materializing a full-size Buffer in JS.
|
||||
const sniffSize = Math.min(SNIFF_BUFFER_SIZE, fileSize)
|
||||
const fd = await open(filePath, 'r')
|
||||
try {
|
||||
const sniffBuf = Buffer.alloc(sniffSize)
|
||||
const { bytesRead } = await fd.read(sniffBuf, 0, sniffSize, 0)
|
||||
const sniff = sniffBuf.subarray(0, bytesRead)
|
||||
|
||||
if (isBinaryContent(sniff)) {
|
||||
continue
|
||||
}
|
||||
|
||||
let content: string
|
||||
if (fileSize <= sniffSize) {
|
||||
// Sniff already covers the whole file
|
||||
content = sniff.toString('utf-8')
|
||||
} else {
|
||||
// readFile with encoding decodes to string directly, avoiding a
|
||||
// full-size Buffer living alongside the decoded string. The extra
|
||||
// open/close is cheaper than doubling peak memory for large files.
|
||||
content = await readFile(filePath, 'utf-8')
|
||||
}
|
||||
|
||||
result.push({ path: filePath, content })
|
||||
totalSize += fileSize
|
||||
} finally {
|
||||
await fd.close()
|
||||
}
|
||||
} catch (err) {
|
||||
// Skip files we can't read
|
||||
logForDebugging(`Failed to read untracked file ${filePath}: ${err}`)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Preserve git state for issue submission.
|
||||
* Uses remote base for more stable replay capability.
|
||||
*
|
||||
* Edge cases handled:
|
||||
* - Detached HEAD: falls back to merge-base with default branch directly
|
||||
* - No remote: returns null for remote fields, uses HEAD-only mode
|
||||
* - Shallow clone: falls back to HEAD-only mode
|
||||
*/
|
||||
export async function preserveGitStateForIssue(): Promise<PreservedGitState | null> {
|
||||
try {
|
||||
const isGit = await getIsGit()
|
||||
if (!isGit) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Check for shallow clone - fall back to simpler mode
|
||||
if (await isShallowClone()) {
|
||||
logForDebugging('Shallow clone detected, using HEAD-only mode for issue')
|
||||
const [{ stdout: patch }, untrackedFiles] = await Promise.all([
|
||||
execFileNoThrow(gitExe(), ['diff', 'HEAD']),
|
||||
captureUntrackedFiles(),
|
||||
])
|
||||
return {
|
||||
remote_base_sha: null,
|
||||
remote_base: null,
|
||||
patch: patch || '',
|
||||
untracked_files: untrackedFiles,
|
||||
format_patch: null,
|
||||
head_sha: null,
|
||||
branch_name: null,
|
||||
}
|
||||
}
|
||||
|
||||
// Find the best remote base
|
||||
const remoteBase = await findRemoteBase()
|
||||
|
||||
if (!remoteBase) {
|
||||
// No remote found - use HEAD-only mode
|
||||
logForDebugging('No remote found, using HEAD-only mode for issue')
|
||||
const [{ stdout: patch }, untrackedFiles] = await Promise.all([
|
||||
execFileNoThrow(gitExe(), ['diff', 'HEAD']),
|
||||
captureUntrackedFiles(),
|
||||
])
|
||||
return {
|
||||
remote_base_sha: null,
|
||||
remote_base: null,
|
||||
patch: patch || '',
|
||||
untracked_files: untrackedFiles,
|
||||
format_patch: null,
|
||||
head_sha: null,
|
||||
branch_name: null,
|
||||
}
|
||||
}
|
||||
|
||||
// Get the merge-base with remote
|
||||
const { stdout: mergeBase, code: mergeBaseCode } = await execFileNoThrow(
|
||||
gitExe(),
|
||||
['merge-base', 'HEAD', remoteBase],
|
||||
{ preserveOutputOnError: false },
|
||||
)
|
||||
|
||||
if (mergeBaseCode !== 0 || !mergeBase.trim()) {
|
||||
// Merge-base failed - fall back to HEAD-only
|
||||
logForDebugging('Merge-base failed, using HEAD-only mode for issue')
|
||||
const [{ stdout: patch }, untrackedFiles] = await Promise.all([
|
||||
execFileNoThrow(gitExe(), ['diff', 'HEAD']),
|
||||
captureUntrackedFiles(),
|
||||
])
|
||||
return {
|
||||
remote_base_sha: null,
|
||||
remote_base: null,
|
||||
patch: patch || '',
|
||||
untracked_files: untrackedFiles,
|
||||
format_patch: null,
|
||||
head_sha: null,
|
||||
branch_name: null,
|
||||
}
|
||||
}
|
||||
|
||||
const remoteBaseSha = mergeBase.trim()
|
||||
|
||||
// All 5 commands below depend only on remoteBaseSha — run them in parallel.
|
||||
// ~5×90ms serial → ~90ms parallel on Bun native (used by /issue and /share).
|
||||
const [
|
||||
{ stdout: patch },
|
||||
untrackedFiles,
|
||||
{ stdout: formatPatchOut, code: formatPatchCode },
|
||||
{ stdout: headSha },
|
||||
{ stdout: branchName },
|
||||
] = await Promise.all([
|
||||
// Patch from merge-base to current state (including staged changes)
|
||||
execFileNoThrow(gitExe(), ['diff', remoteBaseSha]),
|
||||
// Untracked files captured separately
|
||||
captureUntrackedFiles(),
|
||||
// format-patch for committed changes between merge-base and HEAD.
|
||||
// Preserves the actual commit chain (author, date, message) so replay
|
||||
// containers can reconstruct the branch with real commits instead of a
|
||||
// squashed diff. Uses --stdout to emit all patches as a single text stream.
|
||||
execFileNoThrow(gitExe(), [
|
||||
'format-patch',
|
||||
`${remoteBaseSha}..HEAD`,
|
||||
'--stdout',
|
||||
]),
|
||||
// HEAD SHA for replay
|
||||
execFileNoThrow(gitExe(), ['rev-parse', 'HEAD']),
|
||||
// Branch name for replay
|
||||
execFileNoThrow(gitExe(), ['rev-parse', '--abbrev-ref', 'HEAD']),
|
||||
])
|
||||
|
||||
let formatPatch: string | null = null
|
||||
if (formatPatchCode === 0 && formatPatchOut && formatPatchOut.trim()) {
|
||||
formatPatch = formatPatchOut
|
||||
}
|
||||
|
||||
const trimmedBranch = branchName?.trim()
|
||||
return {
|
||||
remote_base_sha: remoteBaseSha,
|
||||
remote_base: remoteBase,
|
||||
patch: patch || '',
|
||||
untracked_files: untrackedFiles,
|
||||
format_patch: formatPatch,
|
||||
head_sha: headSha?.trim() || null,
|
||||
branch_name:
|
||||
trimmedBranch && trimmedBranch !== 'HEAD' ? trimmedBranch : null,
|
||||
}
|
||||
} catch (err) {
|
||||
logError(err)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function isLocalHost(host: string): boolean {
|
||||
const hostWithoutPort = host.split(':')[0] ?? ''
|
||||
return (
|
||||
hostWithoutPort === 'localhost' ||
|
||||
/^127\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(hostWithoutPort)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current working directory appears to be a bare git repository
|
||||
* or has been manipulated to look like one (sandbox escape attack vector).
|
||||
*
|
||||
* SECURITY: Git's is_git_directory() function (setup.c:417-455) checks for:
|
||||
* 1. HEAD file - Must be a valid ref
|
||||
* 2. objects/ directory - Must exist and be accessible
|
||||
* 3. refs/ directory - Must exist and be accessible
|
||||
*
|
||||
* If all three exist in the current directory (not in a .git subdirectory),
|
||||
* Git treats the current directory as a bare repository and will execute
|
||||
* hooks/pre-commit and other hook scripts from the cwd.
|
||||
*
|
||||
* Attack scenario:
|
||||
* 1. Attacker creates HEAD, objects/, refs/, and hooks/pre-commit in cwd
|
||||
* 2. Attacker deletes or corrupts .git/HEAD to invalidate the normal git directory
|
||||
* 3. When user runs 'git status', Git treats cwd as the git dir and runs the hook
|
||||
*
|
||||
* @returns true if the cwd looks like a bare/exploited git directory
|
||||
*/
|
||||
/* eslint-disable custom-rules/no-sync-fs -- sync permission-eval check */
|
||||
export function isCurrentDirectoryBareGitRepo(): boolean {
|
||||
const fs = getFsImplementation()
|
||||
const cwd = getCwd()
|
||||
|
||||
const gitPath = join(cwd, '.git')
|
||||
try {
|
||||
const stats = fs.statSync(gitPath)
|
||||
if (stats.isFile()) {
|
||||
// worktree/submodule — Git follows the gitdir reference
|
||||
return false
|
||||
}
|
||||
if (stats.isDirectory()) {
|
||||
const gitHeadPath = join(gitPath, 'HEAD')
|
||||
try {
|
||||
// SECURITY: check isFile(). An attacker creating .git/HEAD as a
|
||||
// DIRECTORY would pass a bare statSync but Git's setup_git_directory
|
||||
// rejects it (not a valid HEAD) and falls back to cwd discovery.
|
||||
if (fs.statSync(gitHeadPath).isFile()) {
|
||||
// normal repo — .git/HEAD valid, Git won't fall back to cwd
|
||||
return false
|
||||
}
|
||||
// .git/HEAD exists but is not a regular file — fall through
|
||||
} catch {
|
||||
// .git exists but no HEAD — fall through to bare-repo check
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// no .git — fall through to bare-repo indicator check
|
||||
}
|
||||
|
||||
// No valid .git/HEAD found. Check if cwd has bare git repo indicators.
|
||||
// Be cautious — flag if ANY of these exist without a valid .git reference.
|
||||
// Per-indicator try/catch so an error on one doesn't mask another.
|
||||
try {
|
||||
if (fs.statSync(join(cwd, 'HEAD')).isFile()) return true
|
||||
} catch {
|
||||
// no HEAD
|
||||
}
|
||||
try {
|
||||
if (fs.statSync(join(cwd, 'objects')).isDirectory()) return true
|
||||
} catch {
|
||||
// no objects/
|
||||
}
|
||||
try {
|
||||
if (fs.statSync(join(cwd, 'refs')).isDirectory()) return true
|
||||
} catch {
|
||||
// no refs/
|
||||
}
|
||||
return false
|
||||
}
|
||||
/* eslint-enable custom-rules/no-sync-fs */
|
||||
Reference in New Issue
Block a user