fix(agents): improve process termination with multi-layer kill strategy

Resolves #87 and #9 by implementing a robust three-tier process
termination approach:

1. ProcessRegistry kill - primary method using run_id tracking
2. ClaudeProcessState kill - fallback via stored process handle
3. System kill command - last resort using PID and OS commands

Key improvements:
- Enhanced logging throughout termination flow for better debugging
- Graceful fallback between termination methods
- Proper UI state management even when backend termination fails
- Track run_id in AgentExecution component for targeted process killing
- Comprehensive error handling with user-friendly feedback
- Consistent event emission for UI synchronization

This ensures agents can be properly stopped without requiring
application restart, addressing the core issue where STOP
requests were ignored and processes continued running.
This commit is contained in:
Mufeed VH
2025-07-02 18:17:05 +05:30
parent e8c54d7fad
commit a7e17f16ec
4 changed files with 174 additions and 46 deletions

View File

@@ -917,25 +917,41 @@ pub async fn cancel_claude_execution(
session_id
);
let killed = if let Some(sid) = &session_id {
// Try to find and kill via ProcessRegistry first
let registry = app.state::<crate::process::ProcessRegistryState>();
if let Ok(Some(process_info)) = registry.0.get_claude_session_by_id(sid) {
match registry.0.kill_process(process_info.run_id).await {
Ok(success) => success,
Err(e) => {
log::warn!("Failed to kill via registry: {}", e);
false
}
}
} else {
false
}
} else {
false
};
let mut killed = false;
let mut attempted_methods = Vec::new();
// If registry kill didn't work, try the legacy approach
// Method 1: Try to find and kill via ProcessRegistry using session ID
if let Some(sid) = &session_id {
let registry = app.state::<crate::process::ProcessRegistryState>();
match registry.0.get_claude_session_by_id(sid) {
Ok(Some(process_info)) => {
log::info!("Found process in registry for session {}: run_id={}, PID={}",
sid, process_info.run_id, process_info.pid);
match registry.0.kill_process(process_info.run_id).await {
Ok(success) => {
if success {
log::info!("Successfully killed process via registry");
killed = true;
} else {
log::warn!("Registry kill returned false");
}
}
Err(e) => {
log::warn!("Failed to kill via registry: {}", e);
}
}
attempted_methods.push("registry");
}
Ok(None) => {
log::warn!("Session {} not found in ProcessRegistry", sid);
}
Err(e) => {
log::error!("Error querying ProcessRegistry: {}", e);
}
}
}
// Method 2: Try the legacy approach via ClaudeProcessState
if !killed {
let claude_state = app.state::<ClaudeProcessState>();
let mut current_process = claude_state.current_process.lock().await;
@@ -943,24 +959,57 @@ pub async fn cancel_claude_execution(
if let Some(mut child) = current_process.take() {
// Try to get the PID before killing
let pid = child.id();
log::info!("Attempting to kill Claude process with PID: {:?}", pid);
log::info!("Attempting to kill Claude process via ClaudeProcessState with PID: {:?}", pid);
// Kill the process
match child.kill().await {
Ok(_) => {
log::info!("Successfully killed Claude process");
log::info!("Successfully killed Claude process via ClaudeProcessState");
killed = true;
}
Err(e) => {
log::error!("Failed to kill Claude process: {}", e);
return Err(format!("Failed to kill Claude process: {}", e));
log::error!("Failed to kill Claude process via ClaudeProcessState: {}", e);
// Method 3: If we have a PID, try system kill as last resort
if let Some(pid) = pid {
log::info!("Attempting system kill as last resort for PID: {}", pid);
let kill_result = if cfg!(target_os = "windows") {
std::process::Command::new("taskkill")
.args(["/F", "/PID", &pid.to_string()])
.output()
} else {
std::process::Command::new("kill")
.args(["-KILL", &pid.to_string()])
.output()
};
match kill_result {
Ok(output) if output.status.success() => {
log::info!("Successfully killed process via system command");
killed = true;
}
Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr);
log::error!("System kill failed: {}", stderr);
}
Err(e) => {
log::error!("Failed to execute system kill command: {}", e);
}
}
}
}
}
attempted_methods.push("claude_state");
} else {
log::warn!("No active Claude process to cancel");
log::warn!("No active Claude process in ClaudeProcessState");
}
}
// Emit cancellation events
if !killed && attempted_methods.is_empty() {
log::warn!("No active Claude process found to cancel");
}
// Always emit cancellation events for UI consistency
if let Some(sid) = session_id {
let _ = app.emit(&format!("claude-cancelled:{}", sid), true);
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
@@ -972,6 +1021,12 @@ pub async fn cancel_claude_execution(
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
let _ = app.emit("claude-complete", false);
if killed {
log::info!("Claude process cancellation completed successfully");
} else if !attempted_methods.is_empty() {
log::warn!("Claude process cancellation attempted but process may have already exited. Attempted methods: {:?}", attempted_methods);
}
Ok(())
}
@@ -2063,3 +2118,4 @@ pub async fn track_session_messages(
}
Ok(())
}

View File

@@ -213,6 +213,7 @@ impl ProcessRegistry {
if let Some(handle) = processes.get(&run_id) {
(handle.info.pid, handle.child.clone())
} else {
warn!("Process {} not found in registry", run_id);
return Ok(false); // Process not found
}
};
@@ -233,16 +234,25 @@ impl ProcessRegistry {
}
Err(e) => {
error!("Failed to send kill signal to process {}: {}", run_id, e);
return Err(format!("Failed to kill process: {}", e));
// Don't return error here, try fallback method
false
}
}
} else {
false // Process already killed
warn!("No child handle available for process {} (PID: {}), attempting system kill", run_id, pid);
false // Process handle not available, try fallback
}
};
// If direct kill didn't work, try system command as fallback
if !kill_sent {
return Ok(false);
info!("Attempting fallback kill for process {} (PID: {})", run_id, pid);
match self.kill_process_by_pid(run_id, pid) {
Ok(true) => return Ok(true),
Ok(false) => warn!("Fallback kill also failed for process {} (PID: {})", run_id, pid),
Err(e) => error!("Error during fallback kill: {}", e),
}
// Continue with the rest of the cleanup even if fallback failed
}
// Wait for the process to exit (with timeout)
@@ -297,6 +307,8 @@ impl ProcessRegistry {
if let Ok(mut child_guard) = child_arc.lock() {
*child_guard = None;
}
// One more attempt with system kill
let _ = self.kill_process_by_pid(run_id, pid);
}
}

View File

@@ -92,6 +92,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
const fullscreenMessagesEndRef = useRef<HTMLDivElement>(null);
const unlistenRefs = useRef<UnlistenFn[]>([]);
const elapsedTimeIntervalRef = useRef<NodeJS.Timeout | null>(null);
const [runId, setRunId] = useState<number | null>(null);
// Filter out messages that shouldn't be displayed
const displayableMessages = React.useMemo(() => {
@@ -266,24 +267,24 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
};
const handleExecute = async () => {
if (!projectPath || !task.trim()) return;
let runId: number | null = null;
try {
setIsRunning(true);
setError(null);
setExecutionStartTime(Date.now());
setMessages([]);
setRawJsonlOutput([]);
setExecutionStartTime(Date.now());
setElapsedTime(0);
setTotalTokens(0);
// Execute the agent with model override and get run ID
runId = await api.executeAgent(agent.id!, projectPath, task, model);
setRunId(null);
// Clear any existing listeners
unlistenRefs.current.forEach(unlisten => unlisten());
unlistenRefs.current = [];
// Execute the agent and get the run ID
const executionRunId = await api.executeAgent(agent.id!, projectPath, task, model);
console.log("Agent execution started with run ID:", executionRunId);
setRunId(executionRunId);
// Set up event listeners with run ID isolation
const outputUnlisten = await listen<string>(`agent-output:${runId}`, (event) => {
const outputUnlisten = await listen<string>(`agent-output:${executionRunId}`, (event) => {
try {
// Store raw JSONL
setRawJsonlOutput(prev => [...prev, event.payload]);
@@ -296,12 +297,12 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
}
});
const errorUnlisten = await listen<string>(`agent-error:${runId}`, (event) => {
const errorUnlisten = await listen<string>(`agent-error:${executionRunId}`, (event) => {
console.error("Agent error:", event.payload);
setError(event.payload);
});
const completeUnlisten = await listen<boolean>(`agent-complete:${runId}`, (event) => {
const completeUnlisten = await listen<boolean>(`agent-complete:${executionRunId}`, (event) => {
setIsRunning(false);
setExecutionStartTime(null);
if (!event.payload) {
@@ -309,7 +310,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
}
});
const cancelUnlisten = await listen<boolean>(`agent-cancelled:${runId}`, () => {
const cancelUnlisten = await listen<boolean>(`agent-cancelled:${executionRunId}`, () => {
setIsRunning(false);
setExecutionStartTime(null);
setError("Agent execution was cancelled");
@@ -318,16 +319,41 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
unlistenRefs.current = [outputUnlisten, errorUnlisten, completeUnlisten, cancelUnlisten];
} catch (err) {
console.error("Failed to execute agent:", err);
setError("Failed to execute agent");
setIsRunning(false);
setExecutionStartTime(null);
setRunId(null);
// Show error in messages
setMessages(prev => [...prev, {
type: "result",
subtype: "error",
is_error: true,
result: `Failed to execute agent: ${err instanceof Error ? err.message : 'Unknown error'}`,
duration_ms: 0,
usage: {
input_tokens: 0,
output_tokens: 0
}
}]);
}
};
const handleStop = async () => {
try {
// TODO: Implement actual stop functionality via API
// For now, just update the UI state
if (!runId) {
console.error("No run ID available to stop");
return;
}
// Call the API to kill the agent session
const success = await api.killAgentSession(runId);
if (success) {
console.log(`Successfully stopped agent session ${runId}`);
} else {
console.warn(`Failed to stop agent session ${runId} - it may have already finished`);
}
// Update UI state
setIsRunning(false);
setExecutionStartTime(null);
@@ -349,6 +375,22 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
}]);
} catch (err) {
console.error("Failed to stop agent:", err);
// Still update UI state even if the backend call failed
setIsRunning(false);
setExecutionStartTime(null);
// Show error message
setMessages(prev => [...prev, {
type: "result",
subtype: "error",
is_error: true,
result: `Failed to stop execution: ${err instanceof Error ? err.message : 'Unknown error'}`,
duration_ms: elapsedTime * 1000,
usage: {
input_tokens: totalTokens,
output_tokens: 0
}
}]);
}
};

View File

@@ -606,7 +606,25 @@ export const ClaudeCodeSession: React.FC<ClaudeCodeSessionProps> = ({
setError(null);
} catch (err) {
console.error("Failed to cancel execution:", err);
setError("Failed to cancel execution");
// Even if backend fails, we should update UI to reflect stopped state
// Add error message but still stop the UI loading state
const errorMessage: ClaudeStreamMessage = {
type: "system",
subtype: "error",
result: `Failed to cancel execution: ${err instanceof Error ? err.message : 'Unknown error'}. The process may still be running in the background.`,
timestamp: new Date().toISOString()
};
setMessages(prev => [...prev, errorMessage]);
// Clean up listeners anyway
unlistenRefs.current.forEach(unlisten => unlisten());
unlistenRefs.current = [];
// Reset states to allow user to continue
setIsLoading(false);
hasActiveSessionRef.current = false;
setError(null);
} finally {
setIsCancelling(false);
}