Files
claudia-old/src-tauri/src/checkpoint/storage.rs
Mufeed VH bcffce0a08 style: apply cargo fmt across entire Rust codebase
- Remove Rust formatting check from CI workflow since formatting is now applied
- Standardize import ordering and organization throughout codebase
- Fix indentation, spacing, and line breaks for consistency
- Clean up trailing whitespace and formatting inconsistencies
- Apply rustfmt to all Rust source files including checkpoint, sandbox, commands, and test modules

This establishes a consistent code style baseline for the project.
2025-06-25 03:45:59 +05:30

461 lines
17 KiB
Rust

use anyhow::{Context, Result};
use sha2::{Digest, Sha256};
use std::fs;
use std::path::{Path, PathBuf};
use uuid::Uuid;
use zstd::stream::{decode_all, encode_all};
use super::{
Checkpoint, CheckpointPaths, CheckpointResult, FileSnapshot, SessionTimeline, TimelineNode,
};
/// Manages checkpoint storage operations
pub struct CheckpointStorage {
pub claude_dir: PathBuf,
compression_level: i32,
}
impl CheckpointStorage {
/// Create a new checkpoint storage instance
pub fn new(claude_dir: PathBuf) -> Self {
Self {
claude_dir,
compression_level: 3, // Default zstd compression level
}
}
/// Initialize checkpoint storage for a session
pub fn init_storage(&self, project_id: &str, session_id: &str) -> Result<()> {
let paths = CheckpointPaths::new(&self.claude_dir, project_id, session_id);
// Create directory structure
fs::create_dir_all(&paths.checkpoints_dir)
.context("Failed to create checkpoints directory")?;
fs::create_dir_all(&paths.files_dir).context("Failed to create files directory")?;
// Initialize empty timeline if it doesn't exist
if !paths.timeline_file.exists() {
let timeline = SessionTimeline::new(session_id.to_string());
self.save_timeline(&paths.timeline_file, &timeline)?;
}
Ok(())
}
/// Save a checkpoint to disk
pub fn save_checkpoint(
&self,
project_id: &str,
session_id: &str,
checkpoint: &Checkpoint,
file_snapshots: Vec<FileSnapshot>,
messages: &str, // JSONL content up to checkpoint
) -> Result<CheckpointResult> {
let paths = CheckpointPaths::new(&self.claude_dir, project_id, session_id);
let checkpoint_dir = paths.checkpoint_dir(&checkpoint.id);
// Create checkpoint directory
fs::create_dir_all(&checkpoint_dir).context("Failed to create checkpoint directory")?;
// Save checkpoint metadata
let metadata_path = paths.checkpoint_metadata_file(&checkpoint.id);
let metadata_json = serde_json::to_string_pretty(checkpoint)
.context("Failed to serialize checkpoint metadata")?;
fs::write(&metadata_path, metadata_json).context("Failed to write checkpoint metadata")?;
// Save messages (compressed)
let messages_path = paths.checkpoint_messages_file(&checkpoint.id);
let compressed_messages = encode_all(messages.as_bytes(), self.compression_level)
.context("Failed to compress messages")?;
fs::write(&messages_path, compressed_messages)
.context("Failed to write compressed messages")?;
// Save file snapshots
let mut warnings = Vec::new();
let mut files_processed = 0;
for snapshot in &file_snapshots {
match self.save_file_snapshot(&paths, snapshot) {
Ok(_) => files_processed += 1,
Err(e) => warnings.push(format!(
"Failed to save {}: {}",
snapshot.file_path.display(),
e
)),
}
}
// Update timeline
self.update_timeline_with_checkpoint(&paths.timeline_file, checkpoint, &file_snapshots)?;
Ok(CheckpointResult {
checkpoint: checkpoint.clone(),
files_processed,
warnings,
})
}
/// Save a single file snapshot
fn save_file_snapshot(&self, paths: &CheckpointPaths, snapshot: &FileSnapshot) -> Result<()> {
// Use content-addressable storage: store files by their hash
// This prevents duplication of identical file content across checkpoints
let content_pool_dir = paths.files_dir.join("content_pool");
fs::create_dir_all(&content_pool_dir).context("Failed to create content pool directory")?;
// Store the actual content in the content pool
let content_file = content_pool_dir.join(&snapshot.hash);
// Only write the content if it doesn't already exist
if !content_file.exists() {
// Compress and save file content
let compressed_content =
encode_all(snapshot.content.as_bytes(), self.compression_level)
.context("Failed to compress file content")?;
fs::write(&content_file, compressed_content)
.context("Failed to write file content to pool")?;
}
// Create a reference in the checkpoint-specific directory
let checkpoint_refs_dir = paths.files_dir.join("refs").join(&snapshot.checkpoint_id);
fs::create_dir_all(&checkpoint_refs_dir)
.context("Failed to create checkpoint refs directory")?;
// Save file metadata with reference to content
let ref_metadata = serde_json::json!({
"path": snapshot.file_path,
"hash": snapshot.hash,
"is_deleted": snapshot.is_deleted,
"permissions": snapshot.permissions,
"size": snapshot.size,
});
// Use a sanitized filename for the reference
let safe_filename = snapshot
.file_path
.to_string_lossy()
.replace('/', "_")
.replace('\\', "_");
let ref_path = checkpoint_refs_dir.join(format!("{}.json", safe_filename));
fs::write(&ref_path, serde_json::to_string_pretty(&ref_metadata)?)
.context("Failed to write file reference")?;
Ok(())
}
/// Load a checkpoint from disk
pub fn load_checkpoint(
&self,
project_id: &str,
session_id: &str,
checkpoint_id: &str,
) -> Result<(Checkpoint, Vec<FileSnapshot>, String)> {
let paths = CheckpointPaths::new(&self.claude_dir, project_id, session_id);
// Load checkpoint metadata
let metadata_path = paths.checkpoint_metadata_file(checkpoint_id);
let metadata_json =
fs::read_to_string(&metadata_path).context("Failed to read checkpoint metadata")?;
let checkpoint: Checkpoint =
serde_json::from_str(&metadata_json).context("Failed to parse checkpoint metadata")?;
// Load messages
let messages_path = paths.checkpoint_messages_file(checkpoint_id);
let compressed_messages =
fs::read(&messages_path).context("Failed to read compressed messages")?;
let messages = String::from_utf8(
decode_all(&compressed_messages[..]).context("Failed to decompress messages")?,
)
.context("Invalid UTF-8 in messages")?;
// Load file snapshots
let file_snapshots = self.load_file_snapshots(&paths, checkpoint_id)?;
Ok((checkpoint, file_snapshots, messages))
}
/// Load all file snapshots for a checkpoint
fn load_file_snapshots(
&self,
paths: &CheckpointPaths,
checkpoint_id: &str,
) -> Result<Vec<FileSnapshot>> {
let refs_dir = paths.files_dir.join("refs").join(checkpoint_id);
if !refs_dir.exists() {
return Ok(Vec::new());
}
let content_pool_dir = paths.files_dir.join("content_pool");
let mut snapshots = Vec::new();
// Read all reference files
for entry in fs::read_dir(&refs_dir)? {
let entry = entry?;
let path = entry.path();
// Skip non-JSON files
if path.extension().and_then(|e| e.to_str()) != Some("json") {
continue;
}
// Load reference metadata
let ref_json = fs::read_to_string(&path).context("Failed to read file reference")?;
let ref_metadata: serde_json::Value =
serde_json::from_str(&ref_json).context("Failed to parse file reference")?;
let hash = ref_metadata["hash"]
.as_str()
.ok_or_else(|| anyhow::anyhow!("Missing hash in reference"))?;
// Load content from pool
let content_file = content_pool_dir.join(hash);
let content = if content_file.exists() {
let compressed_content =
fs::read(&content_file).context("Failed to read file content from pool")?;
String::from_utf8(
decode_all(&compressed_content[..])
.context("Failed to decompress file content")?,
)
.context("Invalid UTF-8 in file content")?
} else {
// Handle missing content gracefully
log::warn!("Content file missing for hash: {}", hash);
String::new()
};
snapshots.push(FileSnapshot {
checkpoint_id: checkpoint_id.to_string(),
file_path: PathBuf::from(ref_metadata["path"].as_str().unwrap_or("")),
content,
hash: hash.to_string(),
is_deleted: ref_metadata["is_deleted"].as_bool().unwrap_or(false),
permissions: ref_metadata["permissions"].as_u64().map(|p| p as u32),
size: ref_metadata["size"].as_u64().unwrap_or(0),
});
}
Ok(snapshots)
}
/// Save timeline to disk
pub fn save_timeline(&self, timeline_path: &Path, timeline: &SessionTimeline) -> Result<()> {
let timeline_json =
serde_json::to_string_pretty(timeline).context("Failed to serialize timeline")?;
fs::write(timeline_path, timeline_json).context("Failed to write timeline")?;
Ok(())
}
/// Load timeline from disk
pub fn load_timeline(&self, timeline_path: &Path) -> Result<SessionTimeline> {
let timeline_json = fs::read_to_string(timeline_path).context("Failed to read timeline")?;
let timeline: SessionTimeline =
serde_json::from_str(&timeline_json).context("Failed to parse timeline")?;
Ok(timeline)
}
/// Update timeline with a new checkpoint
fn update_timeline_with_checkpoint(
&self,
timeline_path: &Path,
checkpoint: &Checkpoint,
file_snapshots: &[FileSnapshot],
) -> Result<()> {
let mut timeline = self.load_timeline(timeline_path)?;
let new_node = TimelineNode {
checkpoint: checkpoint.clone(),
children: Vec::new(),
file_snapshot_ids: file_snapshots.iter().map(|s| s.hash.clone()).collect(),
};
// If this is the first checkpoint
if timeline.root_node.is_none() {
timeline.root_node = Some(new_node);
timeline.current_checkpoint_id = Some(checkpoint.id.clone());
} else if let Some(parent_id) = &checkpoint.parent_checkpoint_id {
// Check if parent exists before modifying
let parent_exists = timeline.find_checkpoint(parent_id).is_some();
if parent_exists {
if let Some(root) = &mut timeline.root_node {
Self::add_child_to_node(root, parent_id, new_node)?;
timeline.current_checkpoint_id = Some(checkpoint.id.clone());
}
} else {
anyhow::bail!("Parent checkpoint not found: {}", parent_id);
}
}
timeline.total_checkpoints += 1;
self.save_timeline(timeline_path, &timeline)?;
Ok(())
}
/// Recursively add a child node to the timeline tree
fn add_child_to_node(
node: &mut TimelineNode,
parent_id: &str,
child: TimelineNode,
) -> Result<()> {
if node.checkpoint.id == parent_id {
node.children.push(child);
return Ok(());
}
for child_node in &mut node.children {
if Self::add_child_to_node(child_node, parent_id, child.clone()).is_ok() {
return Ok(());
}
}
anyhow::bail!("Parent checkpoint not found: {}", parent_id)
}
/// Calculate hash of file content
pub fn calculate_file_hash(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}
/// Generate a new checkpoint ID
pub fn generate_checkpoint_id() -> String {
Uuid::new_v4().to_string()
}
/// Estimate storage size for a checkpoint
pub fn estimate_checkpoint_size(messages: &str, file_snapshots: &[FileSnapshot]) -> u64 {
let messages_size = messages.len() as u64;
let files_size: u64 = file_snapshots.iter().map(|s| s.content.len() as u64).sum();
// Estimate compressed size (typically 20-30% of original for text)
(messages_size + files_size) / 4
}
/// Clean up old checkpoints based on retention policy
pub fn cleanup_old_checkpoints(
&self,
project_id: &str,
session_id: &str,
keep_count: usize,
) -> Result<usize> {
let paths = CheckpointPaths::new(&self.claude_dir, project_id, session_id);
let timeline = self.load_timeline(&paths.timeline_file)?;
// Collect all checkpoint IDs in chronological order
let mut all_checkpoints = Vec::new();
if let Some(root) = &timeline.root_node {
Self::collect_checkpoints(root, &mut all_checkpoints);
}
// Sort by timestamp (oldest first)
all_checkpoints.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
// Keep only the most recent checkpoints
let to_remove = all_checkpoints.len().saturating_sub(keep_count);
let mut removed_count = 0;
for checkpoint in all_checkpoints.into_iter().take(to_remove) {
if self.remove_checkpoint(&paths, &checkpoint.id).is_ok() {
removed_count += 1;
}
}
// Run garbage collection to clean up orphaned content
if removed_count > 0 {
match self.garbage_collect_content(project_id, session_id) {
Ok(gc_count) => {
log::info!("Garbage collected {} orphaned content files", gc_count);
}
Err(e) => {
log::warn!("Failed to garbage collect content: {}", e);
}
}
}
Ok(removed_count)
}
/// Collect all checkpoints from the tree in order
fn collect_checkpoints(node: &TimelineNode, checkpoints: &mut Vec<Checkpoint>) {
checkpoints.push(node.checkpoint.clone());
for child in &node.children {
Self::collect_checkpoints(child, checkpoints);
}
}
/// Remove a checkpoint and its associated files
fn remove_checkpoint(&self, paths: &CheckpointPaths, checkpoint_id: &str) -> Result<()> {
// Remove checkpoint metadata directory
let checkpoint_dir = paths.checkpoint_dir(checkpoint_id);
if checkpoint_dir.exists() {
fs::remove_dir_all(&checkpoint_dir).context("Failed to remove checkpoint directory")?;
}
// Remove file references for this checkpoint
let refs_dir = paths.files_dir.join("refs").join(checkpoint_id);
if refs_dir.exists() {
fs::remove_dir_all(&refs_dir).context("Failed to remove file references")?;
}
// Note: We don't remove content from the pool here as it might be
// referenced by other checkpoints. Use garbage_collect_content() for that.
Ok(())
}
/// Garbage collect unreferenced content from the content pool
pub fn garbage_collect_content(&self, project_id: &str, session_id: &str) -> Result<usize> {
let paths = CheckpointPaths::new(&self.claude_dir, project_id, session_id);
let content_pool_dir = paths.files_dir.join("content_pool");
let refs_dir = paths.files_dir.join("refs");
if !content_pool_dir.exists() {
return Ok(0);
}
// Collect all referenced hashes
let mut referenced_hashes = std::collections::HashSet::new();
if refs_dir.exists() {
for checkpoint_entry in fs::read_dir(&refs_dir)? {
let checkpoint_dir = checkpoint_entry?.path();
if checkpoint_dir.is_dir() {
for ref_entry in fs::read_dir(&checkpoint_dir)? {
let ref_path = ref_entry?.path();
if ref_path.extension().and_then(|e| e.to_str()) == Some("json") {
if let Ok(ref_json) = fs::read_to_string(&ref_path) {
if let Ok(ref_metadata) =
serde_json::from_str::<serde_json::Value>(&ref_json)
{
if let Some(hash) = ref_metadata["hash"].as_str() {
referenced_hashes.insert(hash.to_string());
}
}
}
}
}
}
}
}
// Remove unreferenced content
let mut removed_count = 0;
for entry in fs::read_dir(&content_pool_dir)? {
let content_file = entry?.path();
if content_file.is_file() {
if let Some(hash) = content_file.file_name().and_then(|n| n.to_str()) {
if !referenced_hashes.contains(hash) {
if fs::remove_file(&content_file).is_ok() {
removed_count += 1;
}
}
}
}
}
Ok(removed_count)
}
}