From a7e17f16ec72295283616ddb22cdb729dce79fed Mon Sep 17 00:00:00 2001
From: Mufeed VH <mufeedvh@gmail.com>
Date: Wed, 2 Jul 2025 18:17:05 +0530
Subject: [PATCH] fix(agents): improve process termination with multi-layer
 kill strategy

Resolves #87 and #9 by implementing a robust three-tier process
termination approach:

1. ProcessRegistry kill - primary method using run_id tracking
2. ClaudeProcessState kill - fallback via stored process handle
3. System kill command - last resort using PID and OS commands

Key improvements:
- Enhanced logging throughout termination flow for better debugging
- Graceful fallback between termination methods
- Proper UI state management even when backend termination fails
- Track run_id in AgentExecution component for targeted process killing
- Comprehensive error handling with user-friendly feedback
- Consistent event emission for UI synchronization

This ensures agents can be properly stopped without requiring
application restart, addressing the core issue where STOP
requests were ignored and processes continued running.
---
 src-tauri/src/commands/claude.rs     | 104 ++++++++++++++++++++-------
 src-tauri/src/process/registry.rs    |  18 ++++-
 src/components/AgentExecution.tsx    |  78 +++++++++++++++-----
 src/components/ClaudeCodeSession.tsx |  20 +++++-
 4 files changed, 174 insertions(+), 46 deletions(-)

diff --git a/src-tauri/src/commands/claude.rs b/src-tauri/src/commands/claude.rs
index 3424cb5..b643227 100644
--- a/src-tauri/src/commands/claude.rs
+++ b/src-tauri/src/commands/claude.rs
@@ -917,25 +917,41 @@ pub async fn cancel_claude_execution(
         session_id
     );
 
-    let killed = if let Some(sid) = &session_id {
-        // Try to find and kill via ProcessRegistry first
-        let registry = app.state::<crate::process::ProcessRegistryState>();
-        if let Ok(Some(process_info)) = registry.0.get_claude_session_by_id(sid) {
-            match registry.0.kill_process(process_info.run_id).await {
-                Ok(success) => success,
-                Err(e) => {
-                    log::warn!("Failed to kill via registry: {}", e);
-                    false
-                }
-            }
-        } else {
-            false
-        }
-    } else {
-        false
-    };
+    let mut killed = false;
+    let mut attempted_methods = Vec::new();
 
-    // If registry kill didn't work, try the legacy approach
+    // Method 1: Try to find and kill via ProcessRegistry using session ID
+    if let Some(sid) = &session_id {
+        let registry = app.state::<crate::process::ProcessRegistryState>();
+        match registry.0.get_claude_session_by_id(sid) {
+            Ok(Some(process_info)) => {
+                log::info!("Found process in registry for session {}: run_id={}, PID={}", 
+                    sid, process_info.run_id, process_info.pid);
+                match registry.0.kill_process(process_info.run_id).await {
+                    Ok(success) => {
+                        if success {
+                            log::info!("Successfully killed process via registry");
+                            killed = true;
+                        } else {
+                            log::warn!("Registry kill returned false");
+                        }
+                    }
+                    Err(e) => {
+                        log::warn!("Failed to kill via registry: {}", e);
+                    }
+                }
+                attempted_methods.push("registry");
+            }
+            Ok(None) => {
+                log::warn!("Session {} not found in ProcessRegistry", sid);
+            }
+            Err(e) => {
+                log::error!("Error querying ProcessRegistry: {}", e);
+            }
+        }
+    }
+
+    // Method 2: Try the legacy approach via ClaudeProcessState
     if !killed {
         let claude_state = app.state::<ClaudeProcessState>();
         let mut current_process = claude_state.current_process.lock().await;
@@ -943,24 +959,57 @@ pub async fn cancel_claude_execution(
         if let Some(mut child) = current_process.take() {
             // Try to get the PID before killing
             let pid = child.id();
-            log::info!("Attempting to kill Claude process with PID: {:?}", pid);
+            log::info!("Attempting to kill Claude process via ClaudeProcessState with PID: {:?}", pid);
 
             // Kill the process
             match child.kill().await {
                 Ok(_) => {
-                    log::info!("Successfully killed Claude process");
+                    log::info!("Successfully killed Claude process via ClaudeProcessState");
+                    killed = true;
                 }
                 Err(e) => {
-                    log::error!("Failed to kill Claude process: {}", e);
-                    return Err(format!("Failed to kill Claude process: {}", e));
+                    log::error!("Failed to kill Claude process via ClaudeProcessState: {}", e);
+                    
+                    // Method 3: If we have a PID, try system kill as last resort
+                    if let Some(pid) = pid {
+                        log::info!("Attempting system kill as last resort for PID: {}", pid);
+                        let kill_result = if cfg!(target_os = "windows") {
+                            std::process::Command::new("taskkill")
+                                .args(["/F", "/PID", &pid.to_string()])
+                                .output()
+                        } else {
+                            std::process::Command::new("kill")
+                                .args(["-KILL", &pid.to_string()])
+                                .output()
+                        };
+                        
+                        match kill_result {
+                            Ok(output) if output.status.success() => {
+                                log::info!("Successfully killed process via system command");
+                                killed = true;
+                            }
+                            Ok(output) => {
+                                let stderr = String::from_utf8_lossy(&output.stderr);
+                                log::error!("System kill failed: {}", stderr);
+                            }
+                            Err(e) => {
+                                log::error!("Failed to execute system kill command: {}", e);
+                            }
+                        }
+                    }
                 }
             }
+            attempted_methods.push("claude_state");
         } else {
-            log::warn!("No active Claude process to cancel");
+            log::warn!("No active Claude process in ClaudeProcessState");
         }
     }
 
-    // Emit cancellation events
+    if !killed && attempted_methods.is_empty() {
+        log::warn!("No active Claude process found to cancel");
+    }
+
+    // Always emit cancellation events for UI consistency
     if let Some(sid) = session_id {
         let _ = app.emit(&format!("claude-cancelled:{}", sid), true);
         tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
@@ -972,6 +1021,12 @@ pub async fn cancel_claude_execution(
     tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
     let _ = app.emit("claude-complete", false);
     
+    if killed {
+        log::info!("Claude process cancellation completed successfully");
+    } else if !attempted_methods.is_empty() {
+        log::warn!("Claude process cancellation attempted but process may have already exited. Attempted methods: {:?}", attempted_methods);
+    }
+    
     Ok(())
 }
 
@@ -2063,3 +2118,4 @@ pub async fn track_session_messages(
     }
     Ok(())
 }
+
diff --git a/src-tauri/src/process/registry.rs b/src-tauri/src/process/registry.rs
index eb1257e..6423b90 100644
--- a/src-tauri/src/process/registry.rs
+++ b/src-tauri/src/process/registry.rs
@@ -213,6 +213,7 @@ impl ProcessRegistry {
             if let Some(handle) = processes.get(&run_id) {
                 (handle.info.pid, handle.child.clone())
             } else {
+                warn!("Process {} not found in registry", run_id);
                 return Ok(false); // Process not found
             }
         };
@@ -233,16 +234,25 @@ impl ProcessRegistry {
                     }
                     Err(e) => {
                         error!("Failed to send kill signal to process {}: {}", run_id, e);
-                        return Err(format!("Failed to kill process: {}", e));
+                        // Don't return error here, try fallback method
+                        false
                     }
                 }
             } else {
-                false // Process already killed
+                warn!("No child handle available for process {} (PID: {}), attempting system kill", run_id, pid);
+                false // Process handle not available, try fallback
             }
         };
 
+        // If direct kill didn't work, try system command as fallback
         if !kill_sent {
-            return Ok(false);
+            info!("Attempting fallback kill for process {} (PID: {})", run_id, pid);
+            match self.kill_process_by_pid(run_id, pid) {
+                Ok(true) => return Ok(true),
+                Ok(false) => warn!("Fallback kill also failed for process {} (PID: {})", run_id, pid),
+                Err(e) => error!("Error during fallback kill: {}", e),
+            }
+            // Continue with the rest of the cleanup even if fallback failed
         }
 
         // Wait for the process to exit (with timeout)
@@ -297,6 +307,8 @@ impl ProcessRegistry {
                 if let Ok(mut child_guard) = child_arc.lock() {
                     *child_guard = None;
                 }
+                // One more attempt with system kill
+                let _ = self.kill_process_by_pid(run_id, pid);
             }
         }
 
diff --git a/src/components/AgentExecution.tsx b/src/components/AgentExecution.tsx
index de05488..8a515c6 100644
--- a/src/components/AgentExecution.tsx
+++ b/src/components/AgentExecution.tsx
@@ -92,6 +92,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
   const fullscreenMessagesEndRef = useRef<HTMLDivElement>(null);
   const unlistenRefs = useRef<UnlistenFn[]>([]);
   const elapsedTimeIntervalRef = useRef<NodeJS.Timeout | null>(null);
+  const [runId, setRunId] = useState<number | null>(null);
 
   // Filter out messages that shouldn't be displayed
   const displayableMessages = React.useMemo(() => {
@@ -266,24 +267,24 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
   };
 
   const handleExecute = async () => {
-    if (!projectPath || !task.trim()) return;
-
-    let runId: number | null = null;
-    
     try {
       setIsRunning(true);
-      setError(null);
+      setExecutionStartTime(Date.now());
       setMessages([]);
       setRawJsonlOutput([]);
-      setExecutionStartTime(Date.now());
-      setElapsedTime(0);
-      setTotalTokens(0);
-
-      // Execute the agent with model override and get run ID
-      runId = await api.executeAgent(agent.id!, projectPath, task, model);
+      setRunId(null);
+      
+      // Clear any existing listeners
+      unlistenRefs.current.forEach(unlisten => unlisten());
+      unlistenRefs.current = [];
+      
+      // Execute the agent and get the run ID
+      const executionRunId = await api.executeAgent(agent.id!, projectPath, task, model);
+      console.log("Agent execution started with run ID:", executionRunId);
+      setRunId(executionRunId);
       
       // Set up event listeners with run ID isolation
-      const outputUnlisten = await listen<string>(`agent-output:${runId}`, (event) => {
+      const outputUnlisten = await listen<string>(`agent-output:${executionRunId}`, (event) => {
         try {
           // Store raw JSONL
           setRawJsonlOutput(prev => [...prev, event.payload]);
@@ -296,12 +297,12 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
         }
       });
 
-      const errorUnlisten = await listen<string>(`agent-error:${runId}`, (event) => {
+      const errorUnlisten = await listen<string>(`agent-error:${executionRunId}`, (event) => {
         console.error("Agent error:", event.payload);
         setError(event.payload);
       });
 
-      const completeUnlisten = await listen<boolean>(`agent-complete:${runId}`, (event) => {
+      const completeUnlisten = await listen<boolean>(`agent-complete:${executionRunId}`, (event) => {
         setIsRunning(false);
         setExecutionStartTime(null);
         if (!event.payload) {
@@ -309,7 +310,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
         }
       });
 
-      const cancelUnlisten = await listen<boolean>(`agent-cancelled:${runId}`, () => {
+      const cancelUnlisten = await listen<boolean>(`agent-cancelled:${executionRunId}`, () => {
         setIsRunning(false);
         setExecutionStartTime(null);
         setError("Agent execution was cancelled");
@@ -318,16 +319,41 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
       unlistenRefs.current = [outputUnlisten, errorUnlisten, completeUnlisten, cancelUnlisten];
     } catch (err) {
       console.error("Failed to execute agent:", err);
-      setError("Failed to execute agent");
       setIsRunning(false);
       setExecutionStartTime(null);
+      setRunId(null);
+      // Show error in messages
+      setMessages(prev => [...prev, {
+        type: "result",
+        subtype: "error",
+        is_error: true,
+        result: `Failed to execute agent: ${err instanceof Error ? err.message : 'Unknown error'}`,
+        duration_ms: 0,
+        usage: {
+          input_tokens: 0,
+          output_tokens: 0
+        }
+      }]);
     }
   };
 
   const handleStop = async () => {
     try {
-      // TODO: Implement actual stop functionality via API
-      // For now, just update the UI state
+      if (!runId) {
+        console.error("No run ID available to stop");
+        return;
+      }
+
+      // Call the API to kill the agent session
+      const success = await api.killAgentSession(runId);
+      
+      if (success) {
+        console.log(`Successfully stopped agent session ${runId}`);
+      } else {
+        console.warn(`Failed to stop agent session ${runId} - it may have already finished`);
+      }
+      
+      // Update UI state
       setIsRunning(false);
       setExecutionStartTime(null);
       
@@ -349,6 +375,22 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
       }]);
     } catch (err) {
       console.error("Failed to stop agent:", err);
+      // Still update UI state even if the backend call failed
+      setIsRunning(false);
+      setExecutionStartTime(null);
+      
+      // Show error message
+      setMessages(prev => [...prev, {
+        type: "result",
+        subtype: "error",
+        is_error: true,
+        result: `Failed to stop execution: ${err instanceof Error ? err.message : 'Unknown error'}`,
+        duration_ms: elapsedTime * 1000,
+        usage: {
+          input_tokens: totalTokens,
+          output_tokens: 0
+        }
+      }]);
     }
   };
 
diff --git a/src/components/ClaudeCodeSession.tsx b/src/components/ClaudeCodeSession.tsx
index 660358c..67a5b20 100644
--- a/src/components/ClaudeCodeSession.tsx
+++ b/src/components/ClaudeCodeSession.tsx
@@ -606,7 +606,25 @@ export const ClaudeCodeSession: React.FC<ClaudeCodeSessionProps> = ({
       setError(null);
     } catch (err) {
       console.error("Failed to cancel execution:", err);
-      setError("Failed to cancel execution");
+      
+      // Even if backend fails, we should update UI to reflect stopped state
+      // Add error message but still stop the UI loading state
+      const errorMessage: ClaudeStreamMessage = {
+        type: "system",
+        subtype: "error",
+        result: `Failed to cancel execution: ${err instanceof Error ? err.message : 'Unknown error'}. The process may still be running in the background.`,
+        timestamp: new Date().toISOString()
+      };
+      setMessages(prev => [...prev, errorMessage]);
+      
+      // Clean up listeners anyway
+      unlistenRefs.current.forEach(unlisten => unlisten());
+      unlistenRefs.current = [];
+      
+      // Reset states to allow user to continue
+      setIsLoading(false);
+      hasActiveSessionRef.current = false;
+      setError(null);
     } finally {
       setIsCancelling(false);
     }