fix(agents): improve process termination with multi-layer kill strategy
Resolves #87 and #9 by implementing a robust three-tier process termination approach: 1. ProcessRegistry kill - primary method using run_id tracking 2. ClaudeProcessState kill - fallback via stored process handle 3. System kill command - last resort using PID and OS commands Key improvements: - Enhanced logging throughout termination flow for better debugging - Graceful fallback between termination methods - Proper UI state management even when backend termination fails - Track run_id in AgentExecution component for targeted process killing - Comprehensive error handling with user-friendly feedback - Consistent event emission for UI synchronization This ensures agents can be properly stopped without requiring application restart, addressing the core issue where STOP requests were ignored and processes continued running.
This commit is contained in:
@@ -917,25 +917,41 @@ pub async fn cancel_claude_execution(
|
|||||||
session_id
|
session_id
|
||||||
);
|
);
|
||||||
|
|
||||||
let killed = if let Some(sid) = &session_id {
|
let mut killed = false;
|
||||||
// Try to find and kill via ProcessRegistry first
|
let mut attempted_methods = Vec::new();
|
||||||
let registry = app.state::<crate::process::ProcessRegistryState>();
|
|
||||||
if let Ok(Some(process_info)) = registry.0.get_claude_session_by_id(sid) {
|
|
||||||
match registry.0.kill_process(process_info.run_id).await {
|
|
||||||
Ok(success) => success,
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!("Failed to kill via registry: {}", e);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
|
|
||||||
// If registry kill didn't work, try the legacy approach
|
// Method 1: Try to find and kill via ProcessRegistry using session ID
|
||||||
|
if let Some(sid) = &session_id {
|
||||||
|
let registry = app.state::<crate::process::ProcessRegistryState>();
|
||||||
|
match registry.0.get_claude_session_by_id(sid) {
|
||||||
|
Ok(Some(process_info)) => {
|
||||||
|
log::info!("Found process in registry for session {}: run_id={}, PID={}",
|
||||||
|
sid, process_info.run_id, process_info.pid);
|
||||||
|
match registry.0.kill_process(process_info.run_id).await {
|
||||||
|
Ok(success) => {
|
||||||
|
if success {
|
||||||
|
log::info!("Successfully killed process via registry");
|
||||||
|
killed = true;
|
||||||
|
} else {
|
||||||
|
log::warn!("Registry kill returned false");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Failed to kill via registry: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
attempted_methods.push("registry");
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
log::warn!("Session {} not found in ProcessRegistry", sid);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("Error querying ProcessRegistry: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method 2: Try the legacy approach via ClaudeProcessState
|
||||||
if !killed {
|
if !killed {
|
||||||
let claude_state = app.state::<ClaudeProcessState>();
|
let claude_state = app.state::<ClaudeProcessState>();
|
||||||
let mut current_process = claude_state.current_process.lock().await;
|
let mut current_process = claude_state.current_process.lock().await;
|
||||||
@@ -943,24 +959,57 @@ pub async fn cancel_claude_execution(
|
|||||||
if let Some(mut child) = current_process.take() {
|
if let Some(mut child) = current_process.take() {
|
||||||
// Try to get the PID before killing
|
// Try to get the PID before killing
|
||||||
let pid = child.id();
|
let pid = child.id();
|
||||||
log::info!("Attempting to kill Claude process with PID: {:?}", pid);
|
log::info!("Attempting to kill Claude process via ClaudeProcessState with PID: {:?}", pid);
|
||||||
|
|
||||||
// Kill the process
|
// Kill the process
|
||||||
match child.kill().await {
|
match child.kill().await {
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
log::info!("Successfully killed Claude process");
|
log::info!("Successfully killed Claude process via ClaudeProcessState");
|
||||||
|
killed = true;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::error!("Failed to kill Claude process: {}", e);
|
log::error!("Failed to kill Claude process via ClaudeProcessState: {}", e);
|
||||||
return Err(format!("Failed to kill Claude process: {}", e));
|
|
||||||
|
// Method 3: If we have a PID, try system kill as last resort
|
||||||
|
if let Some(pid) = pid {
|
||||||
|
log::info!("Attempting system kill as last resort for PID: {}", pid);
|
||||||
|
let kill_result = if cfg!(target_os = "windows") {
|
||||||
|
std::process::Command::new("taskkill")
|
||||||
|
.args(["/F", "/PID", &pid.to_string()])
|
||||||
|
.output()
|
||||||
|
} else {
|
||||||
|
std::process::Command::new("kill")
|
||||||
|
.args(["-KILL", &pid.to_string()])
|
||||||
|
.output()
|
||||||
|
};
|
||||||
|
|
||||||
|
match kill_result {
|
||||||
|
Ok(output) if output.status.success() => {
|
||||||
|
log::info!("Successfully killed process via system command");
|
||||||
|
killed = true;
|
||||||
|
}
|
||||||
|
Ok(output) => {
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
log::error!("System kill failed: {}", stderr);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("Failed to execute system kill command: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
attempted_methods.push("claude_state");
|
||||||
} else {
|
} else {
|
||||||
log::warn!("No active Claude process to cancel");
|
log::warn!("No active Claude process in ClaudeProcessState");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit cancellation events
|
if !killed && attempted_methods.is_empty() {
|
||||||
|
log::warn!("No active Claude process found to cancel");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always emit cancellation events for UI consistency
|
||||||
if let Some(sid) = session_id {
|
if let Some(sid) = session_id {
|
||||||
let _ = app.emit(&format!("claude-cancelled:{}", sid), true);
|
let _ = app.emit(&format!("claude-cancelled:{}", sid), true);
|
||||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||||
@@ -972,6 +1021,12 @@ pub async fn cancel_claude_execution(
|
|||||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||||
let _ = app.emit("claude-complete", false);
|
let _ = app.emit("claude-complete", false);
|
||||||
|
|
||||||
|
if killed {
|
||||||
|
log::info!("Claude process cancellation completed successfully");
|
||||||
|
} else if !attempted_methods.is_empty() {
|
||||||
|
log::warn!("Claude process cancellation attempted but process may have already exited. Attempted methods: {:?}", attempted_methods);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2063,3 +2118,4 @@ pub async fn track_session_messages(
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -213,6 +213,7 @@ impl ProcessRegistry {
|
|||||||
if let Some(handle) = processes.get(&run_id) {
|
if let Some(handle) = processes.get(&run_id) {
|
||||||
(handle.info.pid, handle.child.clone())
|
(handle.info.pid, handle.child.clone())
|
||||||
} else {
|
} else {
|
||||||
|
warn!("Process {} not found in registry", run_id);
|
||||||
return Ok(false); // Process not found
|
return Ok(false); // Process not found
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -233,16 +234,25 @@ impl ProcessRegistry {
|
|||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to send kill signal to process {}: {}", run_id, e);
|
error!("Failed to send kill signal to process {}: {}", run_id, e);
|
||||||
return Err(format!("Failed to kill process: {}", e));
|
// Don't return error here, try fallback method
|
||||||
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
false // Process already killed
|
warn!("No child handle available for process {} (PID: {}), attempting system kill", run_id, pid);
|
||||||
|
false // Process handle not available, try fallback
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// If direct kill didn't work, try system command as fallback
|
||||||
if !kill_sent {
|
if !kill_sent {
|
||||||
return Ok(false);
|
info!("Attempting fallback kill for process {} (PID: {})", run_id, pid);
|
||||||
|
match self.kill_process_by_pid(run_id, pid) {
|
||||||
|
Ok(true) => return Ok(true),
|
||||||
|
Ok(false) => warn!("Fallback kill also failed for process {} (PID: {})", run_id, pid),
|
||||||
|
Err(e) => error!("Error during fallback kill: {}", e),
|
||||||
|
}
|
||||||
|
// Continue with the rest of the cleanup even if fallback failed
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for the process to exit (with timeout)
|
// Wait for the process to exit (with timeout)
|
||||||
@@ -297,6 +307,8 @@ impl ProcessRegistry {
|
|||||||
if let Ok(mut child_guard) = child_arc.lock() {
|
if let Ok(mut child_guard) = child_arc.lock() {
|
||||||
*child_guard = None;
|
*child_guard = None;
|
||||||
}
|
}
|
||||||
|
// One more attempt with system kill
|
||||||
|
let _ = self.kill_process_by_pid(run_id, pid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -92,6 +92,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
|
|||||||
const fullscreenMessagesEndRef = useRef<HTMLDivElement>(null);
|
const fullscreenMessagesEndRef = useRef<HTMLDivElement>(null);
|
||||||
const unlistenRefs = useRef<UnlistenFn[]>([]);
|
const unlistenRefs = useRef<UnlistenFn[]>([]);
|
||||||
const elapsedTimeIntervalRef = useRef<NodeJS.Timeout | null>(null);
|
const elapsedTimeIntervalRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
|
const [runId, setRunId] = useState<number | null>(null);
|
||||||
|
|
||||||
// Filter out messages that shouldn't be displayed
|
// Filter out messages that shouldn't be displayed
|
||||||
const displayableMessages = React.useMemo(() => {
|
const displayableMessages = React.useMemo(() => {
|
||||||
@@ -266,24 +267,24 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
|
|||||||
};
|
};
|
||||||
|
|
||||||
const handleExecute = async () => {
|
const handleExecute = async () => {
|
||||||
if (!projectPath || !task.trim()) return;
|
|
||||||
|
|
||||||
let runId: number | null = null;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
setIsRunning(true);
|
setIsRunning(true);
|
||||||
setError(null);
|
setExecutionStartTime(Date.now());
|
||||||
setMessages([]);
|
setMessages([]);
|
||||||
setRawJsonlOutput([]);
|
setRawJsonlOutput([]);
|
||||||
setExecutionStartTime(Date.now());
|
setRunId(null);
|
||||||
setElapsedTime(0);
|
|
||||||
setTotalTokens(0);
|
// Clear any existing listeners
|
||||||
|
unlistenRefs.current.forEach(unlisten => unlisten());
|
||||||
// Execute the agent with model override and get run ID
|
unlistenRefs.current = [];
|
||||||
runId = await api.executeAgent(agent.id!, projectPath, task, model);
|
|
||||||
|
// Execute the agent and get the run ID
|
||||||
|
const executionRunId = await api.executeAgent(agent.id!, projectPath, task, model);
|
||||||
|
console.log("Agent execution started with run ID:", executionRunId);
|
||||||
|
setRunId(executionRunId);
|
||||||
|
|
||||||
// Set up event listeners with run ID isolation
|
// Set up event listeners with run ID isolation
|
||||||
const outputUnlisten = await listen<string>(`agent-output:${runId}`, (event) => {
|
const outputUnlisten = await listen<string>(`agent-output:${executionRunId}`, (event) => {
|
||||||
try {
|
try {
|
||||||
// Store raw JSONL
|
// Store raw JSONL
|
||||||
setRawJsonlOutput(prev => [...prev, event.payload]);
|
setRawJsonlOutput(prev => [...prev, event.payload]);
|
||||||
@@ -296,12 +297,12 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const errorUnlisten = await listen<string>(`agent-error:${runId}`, (event) => {
|
const errorUnlisten = await listen<string>(`agent-error:${executionRunId}`, (event) => {
|
||||||
console.error("Agent error:", event.payload);
|
console.error("Agent error:", event.payload);
|
||||||
setError(event.payload);
|
setError(event.payload);
|
||||||
});
|
});
|
||||||
|
|
||||||
const completeUnlisten = await listen<boolean>(`agent-complete:${runId}`, (event) => {
|
const completeUnlisten = await listen<boolean>(`agent-complete:${executionRunId}`, (event) => {
|
||||||
setIsRunning(false);
|
setIsRunning(false);
|
||||||
setExecutionStartTime(null);
|
setExecutionStartTime(null);
|
||||||
if (!event.payload) {
|
if (!event.payload) {
|
||||||
@@ -309,7 +310,7 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const cancelUnlisten = await listen<boolean>(`agent-cancelled:${runId}`, () => {
|
const cancelUnlisten = await listen<boolean>(`agent-cancelled:${executionRunId}`, () => {
|
||||||
setIsRunning(false);
|
setIsRunning(false);
|
||||||
setExecutionStartTime(null);
|
setExecutionStartTime(null);
|
||||||
setError("Agent execution was cancelled");
|
setError("Agent execution was cancelled");
|
||||||
@@ -318,16 +319,41 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
|
|||||||
unlistenRefs.current = [outputUnlisten, errorUnlisten, completeUnlisten, cancelUnlisten];
|
unlistenRefs.current = [outputUnlisten, errorUnlisten, completeUnlisten, cancelUnlisten];
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Failed to execute agent:", err);
|
console.error("Failed to execute agent:", err);
|
||||||
setError("Failed to execute agent");
|
|
||||||
setIsRunning(false);
|
setIsRunning(false);
|
||||||
setExecutionStartTime(null);
|
setExecutionStartTime(null);
|
||||||
|
setRunId(null);
|
||||||
|
// Show error in messages
|
||||||
|
setMessages(prev => [...prev, {
|
||||||
|
type: "result",
|
||||||
|
subtype: "error",
|
||||||
|
is_error: true,
|
||||||
|
result: `Failed to execute agent: ${err instanceof Error ? err.message : 'Unknown error'}`,
|
||||||
|
duration_ms: 0,
|
||||||
|
usage: {
|
||||||
|
input_tokens: 0,
|
||||||
|
output_tokens: 0
|
||||||
|
}
|
||||||
|
}]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleStop = async () => {
|
const handleStop = async () => {
|
||||||
try {
|
try {
|
||||||
// TODO: Implement actual stop functionality via API
|
if (!runId) {
|
||||||
// For now, just update the UI state
|
console.error("No run ID available to stop");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the API to kill the agent session
|
||||||
|
const success = await api.killAgentSession(runId);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
console.log(`Successfully stopped agent session ${runId}`);
|
||||||
|
} else {
|
||||||
|
console.warn(`Failed to stop agent session ${runId} - it may have already finished`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update UI state
|
||||||
setIsRunning(false);
|
setIsRunning(false);
|
||||||
setExecutionStartTime(null);
|
setExecutionStartTime(null);
|
||||||
|
|
||||||
@@ -349,6 +375,22 @@ export const AgentExecution: React.FC<AgentExecutionProps> = ({
|
|||||||
}]);
|
}]);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Failed to stop agent:", err);
|
console.error("Failed to stop agent:", err);
|
||||||
|
// Still update UI state even if the backend call failed
|
||||||
|
setIsRunning(false);
|
||||||
|
setExecutionStartTime(null);
|
||||||
|
|
||||||
|
// Show error message
|
||||||
|
setMessages(prev => [...prev, {
|
||||||
|
type: "result",
|
||||||
|
subtype: "error",
|
||||||
|
is_error: true,
|
||||||
|
result: `Failed to stop execution: ${err instanceof Error ? err.message : 'Unknown error'}`,
|
||||||
|
duration_ms: elapsedTime * 1000,
|
||||||
|
usage: {
|
||||||
|
input_tokens: totalTokens,
|
||||||
|
output_tokens: 0
|
||||||
|
}
|
||||||
|
}]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -606,7 +606,25 @@ export const ClaudeCodeSession: React.FC<ClaudeCodeSessionProps> = ({
|
|||||||
setError(null);
|
setError(null);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Failed to cancel execution:", err);
|
console.error("Failed to cancel execution:", err);
|
||||||
setError("Failed to cancel execution");
|
|
||||||
|
// Even if backend fails, we should update UI to reflect stopped state
|
||||||
|
// Add error message but still stop the UI loading state
|
||||||
|
const errorMessage: ClaudeStreamMessage = {
|
||||||
|
type: "system",
|
||||||
|
subtype: "error",
|
||||||
|
result: `Failed to cancel execution: ${err instanceof Error ? err.message : 'Unknown error'}. The process may still be running in the background.`,
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
};
|
||||||
|
setMessages(prev => [...prev, errorMessage]);
|
||||||
|
|
||||||
|
// Clean up listeners anyway
|
||||||
|
unlistenRefs.current.forEach(unlisten => unlisten());
|
||||||
|
unlistenRefs.current = [];
|
||||||
|
|
||||||
|
// Reset states to allow user to continue
|
||||||
|
setIsLoading(false);
|
||||||
|
hasActiveSessionRef.current = false;
|
||||||
|
setError(null);
|
||||||
} finally {
|
} finally {
|
||||||
setIsCancelling(false);
|
setIsCancelling(false);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user