From a7e17f16ec72295283616ddb22cdb729dce79fed Mon Sep 17 00:00:00 2001 From: Mufeed VH Date: Wed, 2 Jul 2025 18:17:05 +0530 Subject: [PATCH] fix(agents): improve process termination with multi-layer kill strategy Resolves #87 and #9 by implementing a robust three-tier process termination approach: 1. ProcessRegistry kill - primary method using run_id tracking 2. ClaudeProcessState kill - fallback via stored process handle 3. System kill command - last resort using PID and OS commands Key improvements: - Enhanced logging throughout termination flow for better debugging - Graceful fallback between termination methods - Proper UI state management even when backend termination fails - Track run_id in AgentExecution component for targeted process killing - Comprehensive error handling with user-friendly feedback - Consistent event emission for UI synchronization This ensures agents can be properly stopped without requiring application restart, addressing the core issue where STOP requests were ignored and processes continued running. --- src-tauri/src/commands/claude.rs | 104 ++++++++++++++++++++------- src-tauri/src/process/registry.rs | 18 ++++- src/components/AgentExecution.tsx | 78 +++++++++++++++----- src/components/ClaudeCodeSession.tsx | 20 +++++- 4 files changed, 174 insertions(+), 46 deletions(-) diff --git a/src-tauri/src/commands/claude.rs b/src-tauri/src/commands/claude.rs index 3424cb5..b643227 100644 --- a/src-tauri/src/commands/claude.rs +++ b/src-tauri/src/commands/claude.rs @@ -917,25 +917,41 @@ pub async fn cancel_claude_execution( session_id ); - let killed = if let Some(sid) = &session_id { - // Try to find and kill via ProcessRegistry first - let registry = app.state::(); - if let Ok(Some(process_info)) = registry.0.get_claude_session_by_id(sid) { - match registry.0.kill_process(process_info.run_id).await { - Ok(success) => success, - Err(e) => { - log::warn!("Failed to kill via registry: {}", e); - false - } - } - } else { - false - } - } else { - false - }; + let mut killed = false; + let mut attempted_methods = Vec::new(); - // If registry kill didn't work, try the legacy approach + // Method 1: Try to find and kill via ProcessRegistry using session ID + if let Some(sid) = &session_id { + let registry = app.state::(); + match registry.0.get_claude_session_by_id(sid) { + Ok(Some(process_info)) => { + log::info!("Found process in registry for session {}: run_id={}, PID={}", + sid, process_info.run_id, process_info.pid); + match registry.0.kill_process(process_info.run_id).await { + Ok(success) => { + if success { + log::info!("Successfully killed process via registry"); + killed = true; + } else { + log::warn!("Registry kill returned false"); + } + } + Err(e) => { + log::warn!("Failed to kill via registry: {}", e); + } + } + attempted_methods.push("registry"); + } + Ok(None) => { + log::warn!("Session {} not found in ProcessRegistry", sid); + } + Err(e) => { + log::error!("Error querying ProcessRegistry: {}", e); + } + } + } + + // Method 2: Try the legacy approach via ClaudeProcessState if !killed { let claude_state = app.state::(); let mut current_process = claude_state.current_process.lock().await; @@ -943,24 +959,57 @@ pub async fn cancel_claude_execution( if let Some(mut child) = current_process.take() { // Try to get the PID before killing let pid = child.id(); - log::info!("Attempting to kill Claude process with PID: {:?}", pid); + log::info!("Attempting to kill Claude process via ClaudeProcessState with PID: {:?}", pid); // Kill the process match child.kill().await { Ok(_) => { - log::info!("Successfully killed Claude process"); + log::info!("Successfully killed Claude process via ClaudeProcessState"); + killed = true; } Err(e) => { - log::error!("Failed to kill Claude process: {}", e); - return Err(format!("Failed to kill Claude process: {}", e)); + log::error!("Failed to kill Claude process via ClaudeProcessState: {}", e); + + // Method 3: If we have a PID, try system kill as last resort + if let Some(pid) = pid { + log::info!("Attempting system kill as last resort for PID: {}", pid); + let kill_result = if cfg!(target_os = "windows") { + std::process::Command::new("taskkill") + .args(["/F", "/PID", &pid.to_string()]) + .output() + } else { + std::process::Command::new("kill") + .args(["-KILL", &pid.to_string()]) + .output() + }; + + match kill_result { + Ok(output) if output.status.success() => { + log::info!("Successfully killed process via system command"); + killed = true; + } + Ok(output) => { + let stderr = String::from_utf8_lossy(&output.stderr); + log::error!("System kill failed: {}", stderr); + } + Err(e) => { + log::error!("Failed to execute system kill command: {}", e); + } + } + } } } + attempted_methods.push("claude_state"); } else { - log::warn!("No active Claude process to cancel"); + log::warn!("No active Claude process in ClaudeProcessState"); } } - // Emit cancellation events + if !killed && attempted_methods.is_empty() { + log::warn!("No active Claude process found to cancel"); + } + + // Always emit cancellation events for UI consistency if let Some(sid) = session_id { let _ = app.emit(&format!("claude-cancelled:{}", sid), true); tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; @@ -972,6 +1021,12 @@ pub async fn cancel_claude_execution( tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; let _ = app.emit("claude-complete", false); + if killed { + log::info!("Claude process cancellation completed successfully"); + } else if !attempted_methods.is_empty() { + log::warn!("Claude process cancellation attempted but process may have already exited. Attempted methods: {:?}", attempted_methods); + } + Ok(()) } @@ -2063,3 +2118,4 @@ pub async fn track_session_messages( } Ok(()) } + diff --git a/src-tauri/src/process/registry.rs b/src-tauri/src/process/registry.rs index eb1257e..6423b90 100644 --- a/src-tauri/src/process/registry.rs +++ b/src-tauri/src/process/registry.rs @@ -213,6 +213,7 @@ impl ProcessRegistry { if let Some(handle) = processes.get(&run_id) { (handle.info.pid, handle.child.clone()) } else { + warn!("Process {} not found in registry", run_id); return Ok(false); // Process not found } }; @@ -233,16 +234,25 @@ impl ProcessRegistry { } Err(e) => { error!("Failed to send kill signal to process {}: {}", run_id, e); - return Err(format!("Failed to kill process: {}", e)); + // Don't return error here, try fallback method + false } } } else { - false // Process already killed + warn!("No child handle available for process {} (PID: {}), attempting system kill", run_id, pid); + false // Process handle not available, try fallback } }; + // If direct kill didn't work, try system command as fallback if !kill_sent { - return Ok(false); + info!("Attempting fallback kill for process {} (PID: {})", run_id, pid); + match self.kill_process_by_pid(run_id, pid) { + Ok(true) => return Ok(true), + Ok(false) => warn!("Fallback kill also failed for process {} (PID: {})", run_id, pid), + Err(e) => error!("Error during fallback kill: {}", e), + } + // Continue with the rest of the cleanup even if fallback failed } // Wait for the process to exit (with timeout) @@ -297,6 +307,8 @@ impl ProcessRegistry { if let Ok(mut child_guard) = child_arc.lock() { *child_guard = None; } + // One more attempt with system kill + let _ = self.kill_process_by_pid(run_id, pid); } } diff --git a/src/components/AgentExecution.tsx b/src/components/AgentExecution.tsx index de05488..8a515c6 100644 --- a/src/components/AgentExecution.tsx +++ b/src/components/AgentExecution.tsx @@ -92,6 +92,7 @@ export const AgentExecution: React.FC = ({ const fullscreenMessagesEndRef = useRef(null); const unlistenRefs = useRef([]); const elapsedTimeIntervalRef = useRef(null); + const [runId, setRunId] = useState(null); // Filter out messages that shouldn't be displayed const displayableMessages = React.useMemo(() => { @@ -266,24 +267,24 @@ export const AgentExecution: React.FC = ({ }; const handleExecute = async () => { - if (!projectPath || !task.trim()) return; - - let runId: number | null = null; - try { setIsRunning(true); - setError(null); + setExecutionStartTime(Date.now()); setMessages([]); setRawJsonlOutput([]); - setExecutionStartTime(Date.now()); - setElapsedTime(0); - setTotalTokens(0); - - // Execute the agent with model override and get run ID - runId = await api.executeAgent(agent.id!, projectPath, task, model); + setRunId(null); + + // Clear any existing listeners + unlistenRefs.current.forEach(unlisten => unlisten()); + unlistenRefs.current = []; + + // Execute the agent and get the run ID + const executionRunId = await api.executeAgent(agent.id!, projectPath, task, model); + console.log("Agent execution started with run ID:", executionRunId); + setRunId(executionRunId); // Set up event listeners with run ID isolation - const outputUnlisten = await listen(`agent-output:${runId}`, (event) => { + const outputUnlisten = await listen(`agent-output:${executionRunId}`, (event) => { try { // Store raw JSONL setRawJsonlOutput(prev => [...prev, event.payload]); @@ -296,12 +297,12 @@ export const AgentExecution: React.FC = ({ } }); - const errorUnlisten = await listen(`agent-error:${runId}`, (event) => { + const errorUnlisten = await listen(`agent-error:${executionRunId}`, (event) => { console.error("Agent error:", event.payload); setError(event.payload); }); - const completeUnlisten = await listen(`agent-complete:${runId}`, (event) => { + const completeUnlisten = await listen(`agent-complete:${executionRunId}`, (event) => { setIsRunning(false); setExecutionStartTime(null); if (!event.payload) { @@ -309,7 +310,7 @@ export const AgentExecution: React.FC = ({ } }); - const cancelUnlisten = await listen(`agent-cancelled:${runId}`, () => { + const cancelUnlisten = await listen(`agent-cancelled:${executionRunId}`, () => { setIsRunning(false); setExecutionStartTime(null); setError("Agent execution was cancelled"); @@ -318,16 +319,41 @@ export const AgentExecution: React.FC = ({ unlistenRefs.current = [outputUnlisten, errorUnlisten, completeUnlisten, cancelUnlisten]; } catch (err) { console.error("Failed to execute agent:", err); - setError("Failed to execute agent"); setIsRunning(false); setExecutionStartTime(null); + setRunId(null); + // Show error in messages + setMessages(prev => [...prev, { + type: "result", + subtype: "error", + is_error: true, + result: `Failed to execute agent: ${err instanceof Error ? err.message : 'Unknown error'}`, + duration_ms: 0, + usage: { + input_tokens: 0, + output_tokens: 0 + } + }]); } }; const handleStop = async () => { try { - // TODO: Implement actual stop functionality via API - // For now, just update the UI state + if (!runId) { + console.error("No run ID available to stop"); + return; + } + + // Call the API to kill the agent session + const success = await api.killAgentSession(runId); + + if (success) { + console.log(`Successfully stopped agent session ${runId}`); + } else { + console.warn(`Failed to stop agent session ${runId} - it may have already finished`); + } + + // Update UI state setIsRunning(false); setExecutionStartTime(null); @@ -349,6 +375,22 @@ export const AgentExecution: React.FC = ({ }]); } catch (err) { console.error("Failed to stop agent:", err); + // Still update UI state even if the backend call failed + setIsRunning(false); + setExecutionStartTime(null); + + // Show error message + setMessages(prev => [...prev, { + type: "result", + subtype: "error", + is_error: true, + result: `Failed to stop execution: ${err instanceof Error ? err.message : 'Unknown error'}`, + duration_ms: elapsedTime * 1000, + usage: { + input_tokens: totalTokens, + output_tokens: 0 + } + }]); } }; diff --git a/src/components/ClaudeCodeSession.tsx b/src/components/ClaudeCodeSession.tsx index 660358c..67a5b20 100644 --- a/src/components/ClaudeCodeSession.tsx +++ b/src/components/ClaudeCodeSession.tsx @@ -606,7 +606,25 @@ export const ClaudeCodeSession: React.FC = ({ setError(null); } catch (err) { console.error("Failed to cancel execution:", err); - setError("Failed to cancel execution"); + + // Even if backend fails, we should update UI to reflect stopped state + // Add error message but still stop the UI loading state + const errorMessage: ClaudeStreamMessage = { + type: "system", + subtype: "error", + result: `Failed to cancel execution: ${err instanceof Error ? err.message : 'Unknown error'}. The process may still be running in the background.`, + timestamp: new Date().toISOString() + }; + setMessages(prev => [...prev, errorMessage]); + + // Clean up listeners anyway + unlistenRefs.current.forEach(unlisten => unlisten()); + unlistenRefs.current = []; + + // Reset states to allow user to continue + setIsLoading(false); + hasActiveSessionRef.current = false; + setError(null); } finally { setIsCancelling(false); }