chore: initialize recovered claude workspace

2026-04-02 15:29:01 +08:00
commit a10efa3b4b
1940 changed files with 506426 additions and 0 deletions
--- a/src/services/SessionMemory/prompts.ts
+++ b/src/services/SessionMemory/prompts.ts
@@ -0,0 +1,324 @@
+import { readFile } from 'fs/promises'
+import { join } from 'path'
+import { roughTokenCountEstimation } from '../../services/tokenEstimation.js'
+import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
+import { getErrnoCode, toError } from '../../utils/errors.js'
+import { logError } from '../../utils/log.js'
+
+const MAX_SECTION_LENGTH = 2000
+const MAX_TOTAL_SESSION_MEMORY_TOKENS = 12000
+
+export const DEFAULT_SESSION_MEMORY_TEMPLATE = `
+# Session Title
+_A short and distinctive 5-10 word descriptive title for the session. Super info dense, no filler_
+
+# Current State
+_What is actively being worked on right now? Pending tasks not yet completed. Immediate next steps._
+
+# Task specification
+_What did the user ask to build? Any design decisions or other explanatory context_
+
+# Files and Functions
+_What are the important files? In short, what do they contain and why are they relevant?_
+
+# Workflow
+_What bash commands are usually run and in what order? How to interpret their output if not obvious?_
+
+# Errors & Corrections
+_Errors encountered and how they were fixed. What did the user correct? What approaches failed and should not be tried again?_
+
+# Codebase and System Documentation
+_What are the important system components? How do they work/fit together?_
+
+# Learnings
+_What has worked well? What has not? What to avoid? Do not duplicate items from other sections_
+
+# Key results
+_If the user asked a specific output such as an answer to a question, a table, or other document, repeat the exact result here_
+
+# Worklog
+_Step by step, what was attempted, done? Very terse summary for each step_
+`
+
+function getDefaultUpdatePrompt(): string {
+  return `IMPORTANT: This message and these instructions are NOT part of the actual user conversation. Do NOT include any references to "note-taking", "session notes extraction", or these update instructions in the notes content.
+
+Based on the user conversation above (EXCLUDING this note-taking instruction message as well as system prompt, claude.md entries, or any past session summaries), update the session notes file.
+
+The file {{notesPath}} has already been read for you. Here are its current contents:
+<current_notes_content>
+{{currentNotes}}
+</current_notes_content>
+
+Your ONLY task is to use the Edit tool to update the notes file, then stop. You can make multiple edits (update every section as needed) - make all Edit tool calls in parallel in a single message. Do not call any other tools.
+
+CRITICAL RULES FOR EDITING:
+- The file must maintain its exact structure with all sections, headers, and italic descriptions intact
+-- NEVER modify, delete, or add section headers (the lines starting with '#' like # Task specification)
+-- NEVER modify or delete the italic _section description_ lines (these are the lines in italics immediately following each header - they start and end with underscores)
+-- The italic _section descriptions_ are TEMPLATE INSTRUCTIONS that must be preserved exactly as-is - they guide what content belongs in each section
+-- ONLY update the actual content that appears BELOW the italic _section descriptions_ within each existing section
+-- Do NOT add any new sections, summaries, or information outside the existing structure
+- Do NOT reference this note-taking process or instructions anywhere in the notes
+- It's OK to skip updating a section if there are no substantial new insights to add. Do not add filler content like "No info yet", just leave sections blank/unedited if appropriate.
+- Write DETAILED, INFO-DENSE content for each section - include specifics like file paths, function names, error messages, exact commands, technical details, etc.
+- For "Key results", include the complete, exact output the user requested (e.g., full table, full answer, etc.)
+- Do not include information that's already in the CLAUDE.md files included in the context
+- Keep each section under ~${MAX_SECTION_LENGTH} tokens/words - if a section is approaching this limit, condense it by cycling out less important details while preserving the most critical information
+- Focus on actionable, specific information that would help someone understand or recreate the work discussed in the conversation
+- IMPORTANT: Always update "Current State" to reflect the most recent work - this is critical for continuity after compaction
+
+Use the Edit tool with file_path: {{notesPath}}
+
+STRUCTURE PRESERVATION REMINDER:
+Each section has TWO parts that must be preserved exactly as they appear in the current file:
+1. The section header (line starting with #)
+2. The italic description line (the _italicized text_ immediately after the header - this is a template instruction)
+
+You ONLY update the actual content that comes AFTER these two preserved lines. The italic description lines starting and ending with underscores are part of the template structure, NOT content to be edited or removed.
+
+REMEMBER: Use the Edit tool in parallel and stop. Do not continue after the edits. Only include insights from the actual user conversation, never from these note-taking instructions. Do not delete or change section headers or italic _section descriptions_.`
+}
+
+/**
+ * Load custom session memory template from file if it exists
+ */
+export async function loadSessionMemoryTemplate(): Promise<string> {
+  const templatePath = join(
+    getClaudeConfigHomeDir(),
+    'session-memory',
+    'config',
+    'template.md',
+  )
+
+  try {
+    return await readFile(templatePath, { encoding: 'utf-8' })
+  } catch (e: unknown) {
+    const code = getErrnoCode(e)
+    if (code === 'ENOENT') {
+      return DEFAULT_SESSION_MEMORY_TEMPLATE
+    }
+    logError(toError(e))
+    return DEFAULT_SESSION_MEMORY_TEMPLATE
+  }
+}
+
+/**
+ * Load custom session memory prompt from file if it exists
+ * Custom prompts can be placed at ~/.claude/session-memory/prompt.md
+ * Use {{variableName}} syntax for variable substitution (e.g., {{currentNotes}}, {{notesPath}})
+ */
+export async function loadSessionMemoryPrompt(): Promise<string> {
+  const promptPath = join(
+    getClaudeConfigHomeDir(),
+    'session-memory',
+    'config',
+    'prompt.md',
+  )
+
+  try {
+    return await readFile(promptPath, { encoding: 'utf-8' })
+  } catch (e: unknown) {
+    const code = getErrnoCode(e)
+    if (code === 'ENOENT') {
+      return getDefaultUpdatePrompt()
+    }
+    logError(toError(e))
+    return getDefaultUpdatePrompt()
+  }
+}
+
+/**
+ * Parse the session memory file and analyze section sizes
+ */
+function analyzeSectionSizes(content: string): Record<string, number> {
+  const sections: Record<string, number> = {}
+  const lines = content.split('\n')
+  let currentSection = ''
+  let currentContent: string[] = []
+
+  for (const line of lines) {
+    if (line.startsWith('# ')) {
+      if (currentSection && currentContent.length > 0) {
+        const sectionContent = currentContent.join('\n').trim()
+        sections[currentSection] = roughTokenCountEstimation(sectionContent)
+      }
+      currentSection = line
+      currentContent = []
+    } else {
+      currentContent.push(line)
+    }
+  }
+
+  if (currentSection && currentContent.length > 0) {
+    const sectionContent = currentContent.join('\n').trim()
+    sections[currentSection] = roughTokenCountEstimation(sectionContent)
+  }
+
+  return sections
+}
+
+/**
+ * Generate reminders for sections that are too long
+ */
+function generateSectionReminders(
+  sectionSizes: Record<string, number>,
+  totalTokens: number,
+): string {
+  const overBudget = totalTokens > MAX_TOTAL_SESSION_MEMORY_TOKENS
+  const oversizedSections = Object.entries(sectionSizes)
+    .filter(([_, tokens]) => tokens > MAX_SECTION_LENGTH)
+    .sort(([, a], [, b]) => b - a)
+    .map(
+      ([section, tokens]) =>
+        `- "${section}" is ~${tokens} tokens (limit: ${MAX_SECTION_LENGTH})`,
+    )
+
+  if (oversizedSections.length === 0 && !overBudget) {
+    return ''
+  }
+
+  const parts: string[] = []
+
+  if (overBudget) {
+    parts.push(
+      `\n\nCRITICAL: The session memory file is currently ~${totalTokens} tokens, which exceeds the maximum of ${MAX_TOTAL_SESSION_MEMORY_TOKENS} tokens. You MUST condense the file to fit within this budget. Aggressively shorten oversized sections by removing less important details, merging related items, and summarizing older entries. Prioritize keeping "Current State" and "Errors & Corrections" accurate and detailed.`,
+    )
+  }
+
+  if (oversizedSections.length > 0) {
+    parts.push(
+      `\n\n${overBudget ? 'Oversized sections to condense' : 'IMPORTANT: The following sections exceed the per-section limit and MUST be condensed'}:\n${oversizedSections.join('\n')}`,
+    )
+  }
+
+  return parts.join('')
+}
+
+/**
+ * Substitute variables in the prompt template using {{variable}} syntax
+ */
+function substituteVariables(
+  template: string,
+  variables: Record<string, string>,
+): string {
+  // Single-pass replacement avoids two bugs: (1) $ backreference corruption
+  // (replacer fn treats $ literally), and (2) double-substitution when user
+  // content happens to contain {{varName}} matching a later variable.
+  return template.replace(/\{\{(\w+)\}\}/g, (match, key: string) =>
+    Object.prototype.hasOwnProperty.call(variables, key)
+      ? variables[key]!
+      : match,
+  )
+}
+
+/**
+ * Check if the session memory content is essentially empty (matches the template).
+ * This is used to detect if no actual content has been extracted yet,
+ * which means we should fall back to legacy compact behavior.
+ */
+export async function isSessionMemoryEmpty(content: string): Promise<boolean> {
+  const template = await loadSessionMemoryTemplate()
+  // Compare trimmed content to detect if it's just the template
+  return content.trim() === template.trim()
+}
+
+export async function buildSessionMemoryUpdatePrompt(
+  currentNotes: string,
+  notesPath: string,
+): Promise<string> {
+  const promptTemplate = await loadSessionMemoryPrompt()
+
+  // Analyze section sizes and generate reminders if needed
+  const sectionSizes = analyzeSectionSizes(currentNotes)
+  const totalTokens = roughTokenCountEstimation(currentNotes)
+  const sectionReminders = generateSectionReminders(sectionSizes, totalTokens)
+
+  // Substitute variables in the prompt
+  const variables = {
+    currentNotes,
+    notesPath,
+  }
+
+  const basePrompt = substituteVariables(promptTemplate, variables)
+
+  // Add section size reminders and/or total budget warnings
+  return basePrompt + sectionReminders
+}
+
+/**
+ * Truncate session memory sections that exceed the per-section token limit.
+ * Used when inserting session memory into compact messages to prevent
+ * oversized session memory from consuming the entire post-compact token budget.
+ *
+ * Returns the truncated content and whether any truncation occurred.
+ */
+export function truncateSessionMemoryForCompact(content: string): {
+  truncatedContent: string
+  wasTruncated: boolean
+} {
+  const lines = content.split('\n')
+  const maxCharsPerSection = MAX_SECTION_LENGTH * 4 // roughTokenCountEstimation uses length/4
+  const outputLines: string[] = []
+  let currentSectionLines: string[] = []
+  let currentSectionHeader = ''
+  let wasTruncated = false
+
+  for (const line of lines) {
+    if (line.startsWith('# ')) {
+      const result = flushSessionSection(
+        currentSectionHeader,
+        currentSectionLines,
+        maxCharsPerSection,
+      )
+      outputLines.push(...result.lines)
+      wasTruncated = wasTruncated || result.wasTruncated
+      currentSectionHeader = line
+      currentSectionLines = []
+    } else {
+      currentSectionLines.push(line)
+    }
+  }
+
+  // Flush the last section
+  const result = flushSessionSection(
+    currentSectionHeader,
+    currentSectionLines,
+    maxCharsPerSection,
+  )
+  outputLines.push(...result.lines)
+  wasTruncated = wasTruncated || result.wasTruncated
+
+  return {
+    truncatedContent: outputLines.join('\n'),
+    wasTruncated,
+  }
+}
+
+function flushSessionSection(
+  sectionHeader: string,
+  sectionLines: string[],
+  maxCharsPerSection: number,
+): { lines: string[]; wasTruncated: boolean } {
+  if (!sectionHeader) {
+    return { lines: sectionLines, wasTruncated: false }
+  }
+
+  const sectionContent = sectionLines.join('\n')
+  if (sectionContent.length <= maxCharsPerSection) {
+    return { lines: [sectionHeader, ...sectionLines], wasTruncated: false }
+  }
+
+  // Truncate at a line boundary near the limit
+  let charCount = 0
+  const keptLines: string[] = [sectionHeader]
+  for (const line of sectionLines) {
+    if (charCount + line.length + 1 > maxCharsPerSection) {
+      break
+    }
+    keptLines.push(line)
+    charCount += line.length + 1
+  }
+  keptLines.push('\n[... section truncated for length ...]')
+  return { lines: keptLines, wasTruncated: true }
+}
--- a/src/services/SessionMemory/sessionMemory.ts
+++ b/src/services/SessionMemory/sessionMemory.ts
@@ -0,0 +1,495 @@
+/**
+ * Session Memory automatically maintains a markdown file with notes about the current conversation.
+ * It runs periodically in the background using a forked subagent to extract key information
+ * without interrupting the main conversation flow.
+ */
+
+import { writeFile } from 'fs/promises'
+import memoize from 'lodash-es/memoize.js'
+import { getIsRemoteMode } from '../../bootstrap/state.js'
+import { getSystemPrompt } from '../../constants/prompts.js'
+import { getSystemContext, getUserContext } from '../../context.js'
+import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
+import type { Tool, ToolUseContext } from '../../Tool.js'
+import { FILE_EDIT_TOOL_NAME } from '../../tools/FileEditTool/constants.js'
+import {
+  FileReadTool,
+  type Output as FileReadToolOutput,
+} from '../../tools/FileReadTool/FileReadTool.js'
+import type { Message } from '../../types/message.js'
+import { count } from '../../utils/array.js'
+import {
+  createCacheSafeParams,
+  createSubagentContext,
+  runForkedAgent,
+} from '../../utils/forkedAgent.js'
+import { getFsImplementation } from '../../utils/fsOperations.js'
+import {
+  type REPLHookContext,
+  registerPostSamplingHook,
+} from '../../utils/hooks/postSamplingHooks.js'
+import {
+  createUserMessage,
+  hasToolCallsInLastAssistantTurn,
+} from '../../utils/messages.js'
+import {
+  getSessionMemoryDir,
+  getSessionMemoryPath,
+} from '../../utils/permissions/filesystem.js'
+import { sequential } from '../../utils/sequential.js'
+import { asSystemPrompt } from '../../utils/systemPromptType.js'
+import { getTokenUsage, tokenCountWithEstimation } from '../../utils/tokens.js'
+import { logEvent } from '../analytics/index.js'
+import { isAutoCompactEnabled } from '../compact/autoCompact.js'
+import {
+  buildSessionMemoryUpdatePrompt,
+  loadSessionMemoryTemplate,
+} from './prompts.js'
+import {
+  DEFAULT_SESSION_MEMORY_CONFIG,
+  getSessionMemoryConfig,
+  getToolCallsBetweenUpdates,
+  hasMetInitializationThreshold,
+  hasMetUpdateThreshold,
+  isSessionMemoryInitialized,
+  markExtractionCompleted,
+  markExtractionStarted,
+  markSessionMemoryInitialized,
+  recordExtractionTokenCount,
+  type SessionMemoryConfig,
+  setLastSummarizedMessageId,
+  setSessionMemoryConfig,
+} from './sessionMemoryUtils.js'
+
+// ============================================================================
+// Feature Gate and Config (Cached - Non-blocking)
+// ============================================================================
+// These functions return cached values from disk immediately without blocking
+// on GrowthBook initialization. Values may be stale but are updated in background.
+
+import { errorMessage, getErrnoCode } from '../../utils/errors.js'
+import {
+  getDynamicConfig_CACHED_MAY_BE_STALE,
+  getFeatureValue_CACHED_MAY_BE_STALE,
+} from '../analytics/growthbook.js'
+
+/**
+ * Check if session memory feature is enabled.
+ * Uses cached gate value - returns immediately without blocking.
+ */
+function isSessionMemoryGateEnabled(): boolean {
+  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_session_memory', false)
+}
+
+/**
+ * Get session memory config from cache.
+ * Returns immediately without blocking - value may be stale.
+ */
+function getSessionMemoryRemoteConfig(): Partial<SessionMemoryConfig> {
+  return getDynamicConfig_CACHED_MAY_BE_STALE<Partial<SessionMemoryConfig>>(
+    'tengu_sm_config',
+    {},
+  )
+}
+
+// ============================================================================
+// Module State
+// ============================================================================
+
+let lastMemoryMessageUuid: string | undefined
+
+/**
+ * Reset the last memory message UUID (for testing)
+ */
+export function resetLastMemoryMessageUuid(): void {
+  lastMemoryMessageUuid = undefined
+}
+
+function countToolCallsSince(
+  messages: Message[],
+  sinceUuid: string | undefined,
+): number {
+  let toolCallCount = 0
+  let foundStart = sinceUuid === null || sinceUuid === undefined
+
+  for (const message of messages) {
+    if (!foundStart) {
+      if (message.uuid === sinceUuid) {
+        foundStart = true
+      }
+      continue
+    }
+
+    if (message.type === 'assistant') {
+      const content = message.message.content
+      if (Array.isArray(content)) {
+        toolCallCount += count(content, block => block.type === 'tool_use')
+      }
+    }
+  }
+
+  return toolCallCount
+}
+
+export function shouldExtractMemory(messages: Message[]): boolean {
+  // Check if we've met the initialization threshold
+  // Uses total context window tokens (same as autocompact) for consistent behavior
+  const currentTokenCount = tokenCountWithEstimation(messages)
+  if (!isSessionMemoryInitialized()) {
+    if (!hasMetInitializationThreshold(currentTokenCount)) {
+      return false
+    }
+    markSessionMemoryInitialized()
+  }
+
+  // Check if we've met the minimum tokens between updates threshold
+  // Uses context window growth since last extraction (same metric as init threshold)
+  const hasMetTokenThreshold = hasMetUpdateThreshold(currentTokenCount)
+
+  // Check if we've met the tool calls threshold
+  const toolCallsSinceLastUpdate = countToolCallsSince(
+    messages,
+    lastMemoryMessageUuid,
+  )
+  const hasMetToolCallThreshold =
+    toolCallsSinceLastUpdate >= getToolCallsBetweenUpdates()
+
+  // Check if the last assistant turn has no tool calls (safe to extract)
+  const hasToolCallsInLastTurn = hasToolCallsInLastAssistantTurn(messages)
+
+  // Trigger extraction when:
+  // 1. Both thresholds are met (tokens AND tool calls), OR
+  // 2. No tool calls in last turn AND token threshold is met
+  //    (to ensure we extract at natural conversation breaks)
+  //
+  // IMPORTANT: The token threshold (minimumTokensBetweenUpdate) is ALWAYS required.
+  // Even if the tool call threshold is met, extraction won't happen until the
+  // token threshold is also satisfied. This prevents excessive extractions.
+  const shouldExtract =
+    (hasMetTokenThreshold && hasMetToolCallThreshold) ||
+    (hasMetTokenThreshold && !hasToolCallsInLastTurn)
+
+  if (shouldExtract) {
+    const lastMessage = messages[messages.length - 1]
+    if (lastMessage?.uuid) {
+      lastMemoryMessageUuid = lastMessage.uuid
+    }
+    return true
+  }
+
+  return false
+}
+
+async function setupSessionMemoryFile(
+  toolUseContext: ToolUseContext,
+): Promise<{ memoryPath: string; currentMemory: string }> {
+  const fs = getFsImplementation()
+
+  // Set up directory and file
+  const sessionMemoryDir = getSessionMemoryDir()
+  await fs.mkdir(sessionMemoryDir, { mode: 0o700 })
+
+  const memoryPath = getSessionMemoryPath()
+
+  // Create the memory file if it doesn't exist (wx = O_CREAT|O_EXCL)
+  try {
+    await writeFile(memoryPath, '', {
+      encoding: 'utf-8',
+      mode: 0o600,
+      flag: 'wx',
+    })
+    // Only load template if file was just created
+    const template = await loadSessionMemoryTemplate()
+    await writeFile(memoryPath, template, {
+      encoding: 'utf-8',
+      mode: 0o600,
+    })
+  } catch (e: unknown) {
+    const code = getErrnoCode(e)
+    if (code !== 'EEXIST') {
+      throw e
+    }
+  }
+
+  // Drop any cached entry so FileReadTool's dedup doesn't return a
+  // file_unchanged stub — we need the actual content. The Read repopulates it.
+  toolUseContext.readFileState.delete(memoryPath)
+  const result = await FileReadTool.call(
+    { file_path: memoryPath },
+    toolUseContext,
+  )
+  let currentMemory = ''
+
+  const output = result.data as FileReadToolOutput
+  if (output.type === 'text') {
+    currentMemory = output.file.content
+  }
+
+  logEvent('tengu_session_memory_file_read', {
+    content_length: currentMemory.length,
+  })
+
+  return { memoryPath, currentMemory }
+}
+
+/**
+ * Initialize session memory config from remote config (lazy initialization).
+ * Memoized - only runs once per session, subsequent calls return immediately.
+ * Uses cached config values - non-blocking.
+ */
+const initSessionMemoryConfigIfNeeded = memoize((): void => {
+  // Load config from cache (non-blocking, may be stale)
+  const remoteConfig = getSessionMemoryRemoteConfig()
+
+  // Only use remote values if they are explicitly set (non-zero positive numbers)
+  // This ensures sensible defaults aren't overridden by zero values
+  const config: SessionMemoryConfig = {
+    minimumMessageTokensToInit:
+      remoteConfig.minimumMessageTokensToInit &&
+      remoteConfig.minimumMessageTokensToInit > 0
+        ? remoteConfig.minimumMessageTokensToInit
+        : DEFAULT_SESSION_MEMORY_CONFIG.minimumMessageTokensToInit,
+    minimumTokensBetweenUpdate:
+      remoteConfig.minimumTokensBetweenUpdate &&
+      remoteConfig.minimumTokensBetweenUpdate > 0
+        ? remoteConfig.minimumTokensBetweenUpdate
+        : DEFAULT_SESSION_MEMORY_CONFIG.minimumTokensBetweenUpdate,
+    toolCallsBetweenUpdates:
+      remoteConfig.toolCallsBetweenUpdates &&
+      remoteConfig.toolCallsBetweenUpdates > 0
+        ? remoteConfig.toolCallsBetweenUpdates
+        : DEFAULT_SESSION_MEMORY_CONFIG.toolCallsBetweenUpdates,
+  }
+  setSessionMemoryConfig(config)
+})
+
+/**
+ * Session memory post-sampling hook that extracts and updates session notes
+ */
+// Track if we've logged the gate check failure this session (to avoid spam)
+let hasLoggedGateFailure = false
+
+const extractSessionMemory = sequential(async function (
+  context: REPLHookContext,
+): Promise<void> {
+  const { messages, toolUseContext, querySource } = context
+
+  // Only run session memory on main REPL thread
+  if (querySource !== 'repl_main_thread') {
+    // Don't log this - it's expected for subagents, teammates, etc.
+    return
+  }
+
+  // Check gate lazily when hook runs (cached, non-blocking)
+  if (!isSessionMemoryGateEnabled()) {
+    // Log gate failure once per session (ant-only)
+    if (process.env.USER_TYPE === 'ant' && !hasLoggedGateFailure) {
+      hasLoggedGateFailure = true
+      logEvent('tengu_session_memory_gate_disabled', {})
+    }
+    return
+  }
+
+  // Initialize config from remote (lazy, only once)
+  initSessionMemoryConfigIfNeeded()
+
+  if (!shouldExtractMemory(messages)) {
+    return
+  }
+
+  markExtractionStarted()
+
+  // Create isolated context for setup to avoid polluting parent's cache
+  const setupContext = createSubagentContext(toolUseContext)
+
+  // Set up file system and read current state with isolated context
+  const { memoryPath, currentMemory } =
+    await setupSessionMemoryFile(setupContext)
+
+  // Create extraction message
+  const userPrompt = await buildSessionMemoryUpdatePrompt(
+    currentMemory,
+    memoryPath,
+  )
+
+  // Run session memory extraction using runForkedAgent for prompt caching
+  // runForkedAgent creates an isolated context to prevent mutation of parent state
+  // Pass setupContext.readFileState so the forked agent can edit the memory file
+  await runForkedAgent({
+    promptMessages: [createUserMessage({ content: userPrompt })],
+    cacheSafeParams: createCacheSafeParams(context),
+    canUseTool: createMemoryFileCanUseTool(memoryPath),
+    querySource: 'session_memory',
+    forkLabel: 'session_memory',
+    overrides: { readFileState: setupContext.readFileState },
+  })
+
+  // Log extraction event for tracking frequency
+  // Use the token usage from the last message in the conversation
+  const lastMessage = messages[messages.length - 1]
+  const usage = lastMessage ? getTokenUsage(lastMessage) : undefined
+  const config = getSessionMemoryConfig()
+  logEvent('tengu_session_memory_extraction', {
+    input_tokens: usage?.input_tokens,
+    output_tokens: usage?.output_tokens,
+    cache_read_input_tokens: usage?.cache_read_input_tokens ?? undefined,
+    cache_creation_input_tokens:
+      usage?.cache_creation_input_tokens ?? undefined,
+    config_min_message_tokens_to_init: config.minimumMessageTokensToInit,
+    config_min_tokens_between_update: config.minimumTokensBetweenUpdate,
+    config_tool_calls_between_updates: config.toolCallsBetweenUpdates,
+  })
+
+  // Record the context size at extraction for tracking minimumTokensBetweenUpdate
+  recordExtractionTokenCount(tokenCountWithEstimation(messages))
+
+  // Update lastSummarizedMessageId after successful completion
+  updateLastSummarizedMessageIdIfSafe(messages)
+
+  markExtractionCompleted()
+})
+
+/**
+ * Initialize session memory by registering the post-sampling hook.
+ * This is synchronous to avoid race conditions during startup.
+ * The gate check and config loading happen lazily when the hook runs.
+ */
+export function initSessionMemory(): void {
+  if (getIsRemoteMode()) return
+  // Session memory is used for compaction, so respect auto-compact settings
+  const autoCompactEnabled = isAutoCompactEnabled()
+
+  // Log initialization state (ant-only to avoid noise in external logs)
+  if (process.env.USER_TYPE === 'ant') {
+    logEvent('tengu_session_memory_init', {
+      auto_compact_enabled: autoCompactEnabled,
+    })
+  }
+
+  if (!autoCompactEnabled) {
+    return
+  }
+
+  // Register hook unconditionally - gate check happens lazily when hook runs
+  registerPostSamplingHook(extractSessionMemory)
+}
+
+export type ManualExtractionResult = {
+  success: boolean
+  memoryPath?: string
+  error?: string
+}
+
+/**
+ * Manually trigger session memory extraction, bypassing threshold checks.
+ * Used by the /summary command.
+ */
+export async function manuallyExtractSessionMemory(
+  messages: Message[],
+  toolUseContext: ToolUseContext,
+): Promise<ManualExtractionResult> {
+  if (messages.length === 0) {
+    return { success: false, error: 'No messages to summarize' }
+  }
+  markExtractionStarted()
+
+  try {
+    // Create isolated context for setup to avoid polluting parent's cache
+    const setupContext = createSubagentContext(toolUseContext)
+
+    // Set up file system and read current state with isolated context
+    const { memoryPath, currentMemory } =
+      await setupSessionMemoryFile(setupContext)
+
+    // Create extraction message
+    const userPrompt = await buildSessionMemoryUpdatePrompt(
+      currentMemory,
+      memoryPath,
+    )
+
+    // Get system prompt for cache-safe params
+    const { tools, mainLoopModel } = toolUseContext.options
+    const [rawSystemPrompt, userContext, systemContext] = await Promise.all([
+      getSystemPrompt(tools, mainLoopModel),
+      getUserContext(),
+      getSystemContext(),
+    ])
+    const systemPrompt = asSystemPrompt(rawSystemPrompt)
+
+    // Run session memory extraction using runForkedAgent
+    await runForkedAgent({
+      promptMessages: [createUserMessage({ content: userPrompt })],
+      cacheSafeParams: {
+        systemPrompt,
+        userContext,
+        systemContext,
+        toolUseContext: setupContext,
+        forkContextMessages: messages,
+      },
+      canUseTool: createMemoryFileCanUseTool(memoryPath),
+      querySource: 'session_memory',
+      forkLabel: 'session_memory_manual',
+      overrides: { readFileState: setupContext.readFileState },
+    })
+
+    // Log manual extraction event
+    logEvent('tengu_session_memory_manual_extraction', {})
+
+    // Record the context size at extraction for tracking minimumTokensBetweenUpdate
+    recordExtractionTokenCount(tokenCountWithEstimation(messages))
+
+    // Update lastSummarizedMessageId after successful completion
+    updateLastSummarizedMessageIdIfSafe(messages)
+
+    return { success: true, memoryPath }
+  } catch (error) {
+    return {
+      success: false,
+      error: errorMessage(error),
+    }
+  } finally {
+    markExtractionCompleted()
+  }
+}
+
+// Helper functions
+
+/**
+ * Creates a canUseTool function that only allows Edit for the exact memory file.
+ */
+export function createMemoryFileCanUseTool(memoryPath: string): CanUseToolFn {
+  return async (tool: Tool, input: unknown) => {
+    if (
+      tool.name === FILE_EDIT_TOOL_NAME &&
+      typeof input === 'object' &&
+      input !== null &&
+      'file_path' in input
+    ) {
+      const filePath = input.file_path
+      if (typeof filePath === 'string' && filePath === memoryPath) {
+        return { behavior: 'allow' as const, updatedInput: input }
+      }
+    }
+    return {
+      behavior: 'deny' as const,
+      message: `only ${FILE_EDIT_TOOL_NAME} on ${memoryPath} is allowed`,
+      decisionReason: {
+        type: 'other' as const,
+        reason: `only ${FILE_EDIT_TOOL_NAME} on ${memoryPath} is allowed`,
+      },
+    }
+  }
+}
+
+/**
+ * Updates lastSummarizedMessageId after successful extraction.
+ * Only sets it if the last message doesn't have tool calls (to avoid orphaned tool_results).
+ */
+function updateLastSummarizedMessageIdIfSafe(messages: Message[]): void {
+  if (!hasToolCallsInLastAssistantTurn(messages)) {
+    const lastMessage = messages[messages.length - 1]
+    if (lastMessage?.uuid) {
+      setLastSummarizedMessageId(lastMessage.uuid)
+    }
+  }
+}
--- a/src/services/SessionMemory/sessionMemoryUtils.ts
+++ b/src/services/SessionMemory/sessionMemoryUtils.ts
@@ -0,0 +1,207 @@
+/**
+ * Session Memory utility functions that can be imported without circular dependencies.
+ * These are separate from the main sessionMemory.ts to avoid importing runAgent.
+ */
+
+import { isFsInaccessible } from '../../utils/errors.js'
+import { getFsImplementation } from '../../utils/fsOperations.js'
+import { getSessionMemoryPath } from '../../utils/permissions/filesystem.js'
+import { sleep } from '../../utils/sleep.js'
+import { logEvent } from '../analytics/index.js'
+
+const EXTRACTION_WAIT_TIMEOUT_MS = 15000
+const EXTRACTION_STALE_THRESHOLD_MS = 60000 // 1 minute
+
+/**
+ * Configuration for session memory extraction thresholds
+ */
+export type SessionMemoryConfig = {
+  /** Minimum context window tokens before initializing session memory.
+   * Uses the same token counting as autocompact (input + output + cache tokens)
+   * to ensure consistent behavior between the two features. */
+  minimumMessageTokensToInit: number
+  /** Minimum context window growth (in tokens) between session memory updates.
+   * Uses the same token counting as autocompact (tokenCountWithEstimation)
+   * to measure actual context growth, not cumulative API usage. */
+  minimumTokensBetweenUpdate: number
+  /** Number of tool calls between session memory updates */
+  toolCallsBetweenUpdates: number
+}
+
+// Default configuration values
+export const DEFAULT_SESSION_MEMORY_CONFIG: SessionMemoryConfig = {
+  minimumMessageTokensToInit: 10000,
+  minimumTokensBetweenUpdate: 5000,
+  toolCallsBetweenUpdates: 3,
+}
+
+// Current session memory configuration
+let sessionMemoryConfig: SessionMemoryConfig = {
+  ...DEFAULT_SESSION_MEMORY_CONFIG,
+}
+
+// Track the last summarized message ID (shared state)
+let lastSummarizedMessageId: string | undefined
+
+// Track extraction state with timestamp (set by sessionMemory.ts)
+let extractionStartedAt: number | undefined
+
+// Track context size at last memory extraction (for minimumTokensBetweenUpdate)
+let tokensAtLastExtraction = 0
+
+// Track whether session memory has been initialized (met minimumMessageTokensToInit)
+let sessionMemoryInitialized = false
+
+/**
+ * Get the message ID up to which the session memory is current
+ */
+export function getLastSummarizedMessageId(): string | undefined {
+  return lastSummarizedMessageId
+}
+
+/**
+ * Set the last summarized message ID (called from sessionMemory.ts)
+ */
+export function setLastSummarizedMessageId(
+  messageId: string | undefined,
+): void {
+  lastSummarizedMessageId = messageId
+}
+
+/**
+ * Mark extraction as started (called from sessionMemory.ts)
+ */
+export function markExtractionStarted(): void {
+  extractionStartedAt = Date.now()
+}
+
+/**
+ * Mark extraction as completed (called from sessionMemory.ts)
+ */
+export function markExtractionCompleted(): void {
+  extractionStartedAt = undefined
+}
+
+/**
+ * Wait for any in-progress session memory extraction to complete (with 15s timeout)
+ * Returns immediately if no extraction is in progress or if extraction is stale (>1min old).
+ */
+export async function waitForSessionMemoryExtraction(): Promise<void> {
+  const startTime = Date.now()
+  while (extractionStartedAt) {
+    const extractionAge = Date.now() - extractionStartedAt
+    if (extractionAge > EXTRACTION_STALE_THRESHOLD_MS) {
+      // Extraction is stale, don't wait
+      return
+    }
+
+    if (Date.now() - startTime > EXTRACTION_WAIT_TIMEOUT_MS) {
+      // Timeout - continue anyway
+      return
+    }
+
+    await sleep(1000)
+  }
+}
+
+/**
+ * Get the current session memory content
+ */
+export async function getSessionMemoryContent(): Promise<string | null> {
+  const fs = getFsImplementation()
+  const memoryPath = getSessionMemoryPath()
+
+  try {
+    const content = await fs.readFile(memoryPath, { encoding: 'utf-8' })
+
+    logEvent('tengu_session_memory_loaded', {
+      content_length: content.length,
+    })
+
+    return content
+  } catch (e: unknown) {
+    if (isFsInaccessible(e)) return null
+    throw e
+  }
+}
+
+/**
+ * Set the session memory configuration
+ */
+export function setSessionMemoryConfig(
+  config: Partial<SessionMemoryConfig>,
+): void {
+  sessionMemoryConfig = {
+    ...sessionMemoryConfig,
+    ...config,
+  }
+}
+
+/**
+ * Get the current session memory configuration
+ */
+export function getSessionMemoryConfig(): SessionMemoryConfig {
+  return { ...sessionMemoryConfig }
+}
+
+/**
+ * Record the context size at the time of extraction.
+ * Used to measure context growth for minimumTokensBetweenUpdate threshold.
+ */
+export function recordExtractionTokenCount(currentTokenCount: number): void {
+  tokensAtLastExtraction = currentTokenCount
+}
+
+/**
+ * Check if session memory has been initialized (met minimumTokensToInit threshold)
+ */
+export function isSessionMemoryInitialized(): boolean {
+  return sessionMemoryInitialized
+}
+
+/**
+ * Mark session memory as initialized
+ */
+export function markSessionMemoryInitialized(): void {
+  sessionMemoryInitialized = true
+}
+
+/**
+ * Check if we've met the threshold to initialize session memory.
+ * Uses total context window tokens (same as autocompact) for consistent behavior.
+ */
+export function hasMetInitializationThreshold(
+  currentTokenCount: number,
+): boolean {
+  return currentTokenCount >= sessionMemoryConfig.minimumMessageTokensToInit
+}
+
+/**
+ * Check if we've met the threshold for the next update.
+ * Measures actual context window growth since last extraction
+ * (same metric as autocompact and initialization threshold).
+ */
+export function hasMetUpdateThreshold(currentTokenCount: number): boolean {
+  const tokensSinceLastExtraction = currentTokenCount - tokensAtLastExtraction
+  return (
+    tokensSinceLastExtraction >= sessionMemoryConfig.minimumTokensBetweenUpdate
+  )
+}
+
+/**
+ * Get the configured number of tool calls between updates
+ */
+export function getToolCallsBetweenUpdates(): number {
+  return sessionMemoryConfig.toolCallsBetweenUpdates
+}
+
+/**
+ * Reset session memory state (useful for testing)
+ */
+export function resetSessionMemoryState(): void {
+  sessionMemoryConfig = { ...DEFAULT_SESSION_MEMORY_CONFIG }
+  tokensAtLastExtraction = 0
+  sessionMemoryInitialized = false
+  lastSummarizedMessageId = undefined
+  extractionStartedAt = undefined
+}