agpm_cli/cache/mod.rs
1//! Git repository cache management with worktree-based parallel operations
2//!
3//! This module provides a sophisticated caching system for Git repositories that enables
4//! safe parallel resource installation through Git worktrees. The cache system has been
5//! redesigned for optimal concurrency, simplified architecture, and enhanced performance
6//! in AGPM v0.3.0.
7//!
8//! # Architecture Overview
9//!
10//! The cache system implements a multi-layered architecture:
11//! - [`Cache`] struct: Core repository management and worktree orchestration
12//! - [`CacheLock`]: File-based locking for process-safe concurrent access
13//! - `WorktreeState`: Instance-level caching for worktree lifecycle management
14//! - Bare repositories: Optimized Git storage for efficient worktree creation
15//!
16//! # Platform-Specific Cache Locations
17//!
18//! The cache follows platform conventions for optimal performance:
19//! - **Linux/macOS**: `~/.agpm/cache/` (following XDG standards)
20//! - **Windows**: `%LOCALAPPDATA%\agpm\cache\` (using Windows cache directory)
21//! - **Environment Override**: Set `AGPM_CACHE_DIR` for custom locations
22//!
23//! # Cache Directory Structure
24//!
25//! The cache is organized for optimal parallel access patterns:
26//! ```text
27//! ~/.agpm/cache/
28//! ├── sources/ # Bare repositories optimized for worktrees
29//! │ ├── github_owner_repo.git/ # Bare repo with all Git objects
30//! │ └── gitlab_org_project.git/ # URL-parsed directory naming
31//! ├── worktrees/ # SHA-based worktrees for maximum deduplication
32//! │ ├── github_owner_repo_abc12345/ # First 8 chars of commit SHA
33//! │ ├── github_owner_repo_def67890/ # Each unique commit gets one worktree
34//! │ ├── .state.json # Persistent worktree registry
35//! │ └── github_owner_repo_456789ab/ # Multiple refs to same SHA share worktree
36//! └── .locks/ # Fine-grained locking infrastructure
37//! ├── github_owner_repo.lock # Repository-level locks
38//! └── worktree-owner_repo-v1.lock # Worktree creation locks
39//! ```
40//!
41//! # Enhanced Concurrency Architecture
42//!
43//! The v0.3.2+ cache implements SHA-based worktree optimization with advanced concurrency:
44//! - **SHA-based deduplication**: Worktrees keyed by commit SHA, not version reference
45//! - **Centralized resolution**: `VersionResolver` handles batch SHA resolution upfront
46//! - **Maximum reuse**: Multiple tags/branches pointing to same commit share one worktree
47//! - **Instance-level caching**: `WorktreeState` tracks creation across threads
48//! - **Per-worktree file locking**: Fine-grained locks prevent creation conflicts
49//! - **Direct parallelism control**: `--max-parallel` flag controls concurrency
50//! - **Command-instance fetch caching**: Single fetch per repository per command
51//! - **Atomic state transitions**: Pending → Ready state coordination
52//!
53//! ## Locking Strategy
54//!
55//! ```text
56//! Process A: acquire("source1") ───┐
57//! ├─── BLOCKS: same source
58//! Process B: acquire("source1") ───┘
59//!
60//! Process C: acquire("source2") ───── CONCURRENT: different source
61//! ```
62//!
63//! # Cache Operations
64//!
65//! ## Repository Management
66//! - **Clone**: Initial repository cloning from remote URLs
67//! - **Update**: Fetch latest changes from remote (git fetch)
68//! - **Checkout**: Switch to specific versions (tags, branches, commits)
69//! - **Cleanup**: Remove unused repositories to reclaim disk space
70//!
71//! ## Resource Installation
72//! - **Copy-based**: Files copied from cache to project directories
73//! - **Path resolution**: Handles relative paths within repositories
74//! - **Directory creation**: Automatically creates parent directories
75//! - **Overwrite safety**: Replaces existing files atomically
76//!
77//! # Performance Characteristics
78//!
79//! The cache is optimized for common AGPM workflows:
80//! - **First install**: Clone repository once, reuse for all resources
81//! - **Subsequent installs**: Copy from local cache (fast file operations)
82//! - **Version switching**: Git checkout within cached repository
83//! - **Parallel operations**: Multiple sources can be processed concurrently
84//!
85//! ## Disk Space Management
86//!
87//! - **Size calculation**: Recursive directory size calculation
88//! - **Unused cleanup**: Remove repositories no longer referenced
89//! - **Complete cleanup**: Clear entire cache when needed
90//! - **Selective removal**: Keep active sources, remove only unused ones
91//!
92//! # Error Handling and Recovery
93//!
94//! The cache provides comprehensive error handling:
95//! - **Lock timeouts**: Graceful handling of concurrent access
96//! - **Clone failures**: Network and authentication error reporting
97//! - **Version errors**: Clear messages for invalid tags/branches/commits
98//! - **File system errors**: Detailed context for permission and space issues
99//!
100//! # Security Considerations
101//!
102//! - **Path validation**: Prevents directory traversal attacks
103//! - **Lock file isolation**: Prevents lock file manipulation
104//! - **Safe file operations**: Atomic operations prevent corruption
105//! - **Permission handling**: Respects file system permissions
106//!
107//! # Usage Examples
108//!
109//! ## Basic Cache Operations
110//!
111//! ```rust,no_run
112//! use agpm_cli::cache::Cache;
113//! use std::path::PathBuf;
114//!
115//! # async fn example() -> anyhow::Result<()> {
116//! // Initialize cache with default location
117//! let cache = Cache::new()?;
118//!
119//! // Get or clone a source repository
120//! let repo_path = cache.get_or_clone_source(
121//! "community",
122//! "https://github.com/example/agpm-community.git",
123//! Some("v1.0.0") // Specific version
124//! ).await?;
125//!
126//! // Copy a resource from cache to project
127//! cache.copy_resource(
128//! &repo_path,
129//! "agents/helper.md", // Source path in repository
130//! &PathBuf::from("./agents/helper.md") // Destination in project
131//! ).await?;
132//! # Ok(())
133//! # }
134//! ```
135//!
136//! ## Cache Maintenance
137//!
138//! ```rust,no_run
139//! use agpm_cli::cache::Cache;
140//!
141//! # #[tokio::main]
142//! # async fn main() -> anyhow::Result<()> {
143//! let cache = Cache::new()?;
144//!
145//! // Check cache size
146//! let size_bytes = cache.get_cache_size().await?;
147//! println!("Cache size: {} MB", size_bytes / 1024 / 1024);
148//!
149//! // Clean unused repositories
150//! let active_sources = vec!["community".to_string(), "work".to_string()];
151//! let removed_count = cache.clean_unused(&active_sources).await?;
152//! println!("Removed {} unused repositories", removed_count);
153//!
154//! // Complete cache cleanup
155//! cache.clear_all().await?;
156//! # Ok(())
157//! # }
158//! ```
159//!
160//! ## Custom Cache Location
161//!
162//! ```rust,no_run
163//! use agpm_cli::cache::Cache;
164//! use std::path::PathBuf;
165//!
166//! # fn custom_location() -> anyhow::Result<()> {
167//! // Use custom cache directory (useful for testing or special setups)
168//! let custom_dir = PathBuf::from("/tmp/my-agpm-cache");
169//! let cache = Cache::with_dir(custom_dir)?;
170//!
171//! println!("Using cache at: {}", cache.get_cache_location().display());
172//! # Ok(())
173//! # }
174//! ```
175//!
176//! # Integration with AGPM Workflow
177//!
178//! The cache module integrates seamlessly with AGPM's dependency management:
179//! 1. **Manifest parsing**: Source URLs extracted from `agpm.toml`
180//! 2. **Dependency resolution**: Version constraints resolved to specific commits
181//! 3. **Cache population**: Repositories cloned and checked out as needed
182//! 4. **Resource installation**: Files copied from cache to project directories
183//! 5. **Lockfile generation**: Installed resources tracked in `agpm.lock`
184//!
185//! See [`crate::manifest`] for manifest handling and [`crate::lockfile`] for
186//! lockfile management.
187
188use crate::core::error::AgpmError;
189use crate::core::file_error::{FileOperation, FileResultExt};
190use crate::git::GitRepo;
191use crate::git::command_builder::GitCommand;
192use crate::utils::fs;
193use crate::utils::security::validate_path_security;
194use anyhow::{Context, Result};
195use dashmap::DashMap;
196use serde::{Deserialize, Serialize};
197use std::collections::{HashMap, HashSet};
198use std::path::{Path, PathBuf};
199use std::sync::Arc;
200use std::time::{Duration, SystemTime, UNIX_EPOCH};
201use tokio::fs as async_fs;
202use tokio::sync::{Mutex, RwLock};
203
204// Concurrency Architecture:
205// - Direct control approach: Command parallelism (--max-parallel) + per-worktree file locking
206// - Instance-level caching: Worktrees and fetch operations cached per Cache instance
207// - Command-level control: --max-parallel flag controls dependency processing parallelism
208// - Fetch caching: Network operations cached for 5 minutes to reduce redundancy
209
210/// State of a worktree in the instance-level cache for concurrent coordination.
211///
212/// This enum implements a sophisticated state machine for worktree lifecycle management
213/// that enables safe concurrent access across multiple threads without race conditions.
214/// The cache uses this state to coordinate between threads that might request the same
215/// worktree simultaneously, eliminating the need for global synchronization bottlenecks.
216///
217/// # State Transitions
218///
219/// - **Initial**: No entry exists in cache (implicit state)
220/// - [`Pending`](WorktreeState::Pending): One thread is creating the worktree
221/// - [`Ready`](WorktreeState::Ready): Worktree exists and is ready for all threads
222///
223/// # Concurrency Coordination Pattern
224///
225/// The worktree creation process follows this coordinated pattern:
226/// 1. **Reservation**: First thread reserves slot by setting state to `Pending`
227/// 2. **Creation**: Reserved thread performs actual worktree creation with file lock
228/// 3. **Notification**: Creator updates state to `Ready(path)` when complete
229/// 4. **Reuse**: Subsequent threads immediately use the ready worktree path
230/// 5. **Validation**: All threads verify worktree still exists before use
231///
232/// # Cache Key Format
233///
234/// Worktrees are uniquely identified by composite keys:
235/// ```text
236/// "{cache_dir_hash}:{owner}_{repo}:{version}"
237/// ```
238///
239/// Components:
240/// - `cache_dir_hash`: First 8 hex chars of cache directory path hash
241/// - `owner_repo`: Parsed from Git URL (e.g., "`github_owner_project`")
242/// - `version`: Git reference (tag, branch, commit, or "HEAD")
243///
244/// This format ensures isolation between:
245/// - Different cache instances (via hash)
246/// - Different repositories (via owner/repo)
247/// - Different versions (via version string)
248///
249/// # Memory Management
250///
251/// The instance-level cache persists for the lifetime of the `Cache` instance,
252/// but worktrees are validated on each access to handle external deletion.
253#[derive(Debug, Clone)]
254enum WorktreeState {
255 /// Another thread is currently creating this worktree.
256 ///
257 /// When threads encounter this state, they should wait briefly and retry
258 /// rather than attempting concurrent worktree creation which would fail.
259 Pending,
260
261 /// Worktree is fully created and ready to use.
262 ///
263 /// The `PathBuf` contains the filesystem path to the working directory.
264 /// This path should be validated before use as the worktree may have been
265 /// externally deleted.
266 Ready(PathBuf),
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize, Default)]
270struct WorktreeRegistry {
271 entries: HashMap<String, WorktreeRecord>,
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize)]
275struct WorktreeRecord {
276 source: String,
277 version: String,
278 path: PathBuf,
279 last_used: u64,
280}
281
282impl WorktreeRegistry {
283 fn load(path: &Path) -> Self {
284 match std::fs::read(path) {
285 Ok(data) => serde_json::from_slice(&data).unwrap_or_default(),
286 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Self::default(),
287 Err(err) => {
288 tracing::warn!("Failed to load worktree registry from {}: {}", path.display(), err);
289 Self::default()
290 }
291 }
292 }
293
294 fn update(&mut self, key: String, source: String, version: String, path: PathBuf) {
295 let timestamp = SystemTime::now()
296 .duration_since(UNIX_EPOCH)
297 .unwrap_or_else(|_| Duration::from_secs(0))
298 .as_secs();
299
300 self.entries.insert(
301 key,
302 WorktreeRecord {
303 source,
304 version,
305 path,
306 last_used: timestamp,
307 },
308 );
309 }
310
311 fn remove_by_path(&mut self, target: &Path) -> bool {
312 if let Some(key) = self.entries.iter().find_map(|(k, record)| {
313 if record.path == target {
314 Some(k.clone())
315 } else {
316 None
317 }
318 }) {
319 self.entries.remove(&key);
320 true
321 } else {
322 false
323 }
324 }
325
326 async fn persist(&self, path: &Path) -> Result<()> {
327 if let Some(parent) = path.parent() {
328 async_fs::create_dir_all(parent).await?;
329 }
330
331 let data = serde_json::to_vec_pretty(self)?;
332 async_fs::write(path, data).await?;
333 Ok(())
334 }
335}
336
337/// File-based locking mechanism for cache operations
338///
339/// This module provides thread-safe and process-safe locking for cache
340/// operations through OS-level file locks, ensuring data consistency
341/// when multiple AGPM processes access the same cache directory.
342pub mod lock;
343pub use lock::CacheLock;
344
345/// Git repository cache for efficient resource management
346///
347/// The `Cache` struct provides the primary interface for managing Git repository
348/// caching in AGPM. It handles repository cloning, updating, version management,
349/// and resource file copying operations.
350///
351/// # Thread Safety
352///
353/// While the `Cache` struct itself is not thread-safe (not `Send + Sync`),
354/// multiple instances can safely operate on the same cache directory through
355/// the file-based locking mechanism provided by [`CacheLock`].
356///
357/// # Platform Compatibility
358///
359/// The cache automatically handles platform-specific differences:
360/// - **Path separators**: Uses [`std::path`] for cross-platform compatibility
361/// - **Cache location**: Follows platform conventions for app data storage
362/// - **File locking**: Uses [`fs4`] crate for cross-platform file locking
363/// - **Directory creation**: Handles permissions and long paths on Windows
364///
365/// # Examples
366///
367/// Create a cache with default platform-specific location:
368///
369/// ```rust,no_run
370/// use agpm_cli::cache::Cache;
371///
372/// # fn example() -> anyhow::Result<()> {
373/// let cache = Cache::new()?;
374/// println!("Cache location: {}", cache.get_cache_location().display());
375/// # Ok(())
376/// # }
377/// ```
378///
379/// Create a cache with custom location (useful for testing):
380///
381/// ```rust,no_run
382/// use agpm_cli::cache::Cache;
383/// use std::path::PathBuf;
384///
385/// # fn example() -> anyhow::Result<()> {
386/// let custom_dir = PathBuf::from("/tmp/test-cache");
387/// let cache = Cache::with_dir(custom_dir)?;
388/// # Ok(())
389/// # }
390/// ```
391pub struct Cache {
392 /// The root directory where all cached repositories are stored
393 dir: PathBuf,
394
395 /// Instance-level cache for worktrees to avoid redundant checkouts.
396 ///
397 /// This cache maps worktree identifiers to their creation state, enabling
398 /// safe concurrent access. Multiple threads can request the same worktree
399 /// without conflicts - the first thread creates it while others wait.
400 ///
401 /// **Key format**: `"{cache_dir_hash}:{owner}_{repo}:{version}"`
402 ///
403 /// The cache directory hash ensures isolation between different Cache instances,
404 /// preventing conflicts when multiple instances operate on different cache roots.
405 worktree_cache: Arc<RwLock<HashMap<String, WorktreeState>>>,
406
407 /// Per-repository async locks that serialize fetch operations across
408 /// concurrent tasks. This prevents redundant `git fetch` runs when
409 /// multiple dependencies target the same repository simultaneously.
410 fetch_locks: Arc<DashMap<PathBuf, Arc<Mutex<()>>>>,
411
412 /// Command-instance fetch cache to track which repositories have been fetched
413 /// during this command execution. This ensures we only fetch once per repository
414 /// per command instance, dramatically reducing network operations for multi-dependency
415 /// installations.
416 ///
417 /// Contains bare repository paths that have been fetched in this command instance.
418 /// Works in conjunction with `VersionResolver` to minimize Git network operations.
419 fetched_repos: Arc<RwLock<HashSet<PathBuf>>>,
420
421 /// Persistent registry of worktrees stored on disk for reuse across
422 /// AGPM runs. Tracks last-used timestamps and paths so we can validate
423 /// and clean up cached worktrees without recreating them unnecessarily.
424 worktree_registry: Arc<Mutex<WorktreeRegistry>>,
425}
426
427impl Clone for Cache {
428 fn clone(&self) -> Self {
429 Self {
430 dir: self.dir.clone(),
431 worktree_cache: Arc::clone(&self.worktree_cache),
432 fetch_locks: Arc::clone(&self.fetch_locks),
433 fetched_repos: Arc::clone(&self.fetched_repos),
434 worktree_registry: Arc::clone(&self.worktree_registry),
435 }
436 }
437}
438
439impl Cache {
440 fn registry_path_for(cache_dir: &Path) -> PathBuf {
441 cache_dir.join("worktrees").join(".state.json")
442 }
443
444 fn registry_path(&self) -> PathBuf {
445 Self::registry_path_for(&self.dir)
446 }
447
448 /// Verify that a worktree directory is fully accessible with actual content.
449 ///
450 /// This function ensures that a newly created worktree is fully accessible
451 /// before it's marked as ready. This prevents race conditions in parallel
452 /// operations where `git worktree add` returns but the filesystem hasn't
453 /// finished writing all files yet.
454 ///
455 /// # Implementation
456 ///
457 /// Uses tokio-retry with exponential backoff to handle filesystem sync delays.
458 ///
459 /// Verification uses `git diff-index --quiet HEAD` which provides a comprehensive
460 /// check that:
461 /// - The worktree directory and .git marker exist
462 /// - The git index is readable
463 /// - ALL files from the commit are present and match HEAD
464 /// - Git recognizes the worktree as valid
465 ///
466 /// This single command provides stronger guarantees than multi-level checks,
467 /// as it verifies complete checkout rather than partial availability.
468 ///
469 /// # Parameters
470 ///
471 /// * `worktree_path` - Path to the worktree directory to verify
472 /// * `sha` - The commit SHA being checked out (for logging)
473 ///
474 /// # Errors
475 ///
476 /// Returns an error if the worktree is not accessible after all retries.
477 async fn verify_worktree_accessible(worktree_path: &Path, sha: &str) -> Result<()> {
478 use tokio_retry::Retry;
479 use tokio_retry::strategy::{ExponentialBackoff, jitter};
480
481 // Retry strategy with jitter for concurrent operations
482 let retry_strategy = ExponentialBackoff::from_millis(50)
483 .max_delay(std::time::Duration::from_secs(2))
484 .take(10)
485 .map(jitter);
486
487 let worktree_path = worktree_path.to_path_buf();
488 let sha_short = &sha[..8];
489
490 tracing::debug!(
491 target: "git::worktree",
492 "Verifying worktree at {} for SHA {}",
493 worktree_path.display(),
494 sha_short
495 );
496
497 Retry::spawn(retry_strategy, || async {
498 // Verify working tree matches HEAD (all files checked out)
499 // This verifies the worktree structure is valid and all files are present.
500 // Cache coherency (making files visible to the parent process) is now
501 // handled at the point of actual file read in installer/mod.rs and resolver/mod.rs
502 // via read_with_cache_retry functions.
503 crate::git::command_builder::GitCommand::new()
504 .args(["diff-index", "--quiet", "HEAD"])
505 .current_dir(&worktree_path)
506 .execute_success()
507 .await
508 .map_err(|_| "Working tree doesn't match HEAD (checkout incomplete)".to_string())?;
509
510 tracing::debug!(
511 target: "git::worktree",
512 "Worktree verification passed for {}",
513 worktree_path.display()
514 );
515
516 Ok::<(), String>(())
517 })
518 .await
519 .map_err(|e| {
520 anyhow::anyhow!(
521 "Worktree not fully initialized after retries: {} @ {} - {}",
522 worktree_path.display(),
523 sha_short,
524 e
525 )
526 })
527 }
528
529 async fn record_worktree_usage(
530 &self,
531 registry_key: &str,
532 source_name: &str,
533 version_key: &str,
534 worktree_path: &Path,
535 ) -> Result<()> {
536 let mut registry = self.worktree_registry.lock().await;
537 registry.update(
538 registry_key.to_string(),
539 source_name.to_string(),
540 version_key.to_string(),
541 worktree_path.to_path_buf(),
542 );
543 registry.persist(&self.registry_path()).await?;
544 Ok(())
545 }
546
547 async fn remove_worktree_record_by_path(&self, worktree_path: &Path) -> Result<()> {
548 let mut registry = self.worktree_registry.lock().await;
549 if registry.remove_by_path(worktree_path) {
550 registry.persist(&self.registry_path()).await?;
551 }
552 Ok(())
553 }
554
555 async fn configure_connection_pooling(path: &Path) -> Result<()> {
556 let commands = [
557 ("http.version", "HTTP/2"),
558 ("http.postBuffer", "524288000"),
559 ("core.compression", "0"),
560 ];
561
562 for (key, value) in commands {
563 GitCommand::new()
564 .args(["config", key, value])
565 .current_dir(path)
566 .execute_success()
567 .await
568 .ok();
569 }
570
571 Ok(())
572 }
573
574 /// Creates a new `Cache` instance using the default platform-specific cache directory.
575 ///
576 /// The cache directory is determined based on the current platform:
577 /// - **Linux/macOS**: `~/.agpm/cache/`
578 /// - **Windows**: `%LOCALAPPDATA%\agpm\cache\`
579 ///
580 /// # Environment Variable Override
581 ///
582 /// The cache location can be overridden by setting the `AGPM_CACHE_DIR`
583 /// environment variable. This is particularly useful for:
584 /// - Testing with isolated cache directories
585 /// - CI/CD environments with specific cache locations
586 /// - Custom deployment scenarios
587 ///
588 /// # Errors
589 ///
590 /// Returns an error if:
591 /// - Unable to determine the home/local data directory
592 /// - The resolved path is invalid or inaccessible
593 ///
594 /// # Examples
595 ///
596 /// ```rust,no_run
597 /// use agpm_cli::cache::Cache;
598 ///
599 /// # fn example() -> anyhow::Result<()> {
600 /// let cache = Cache::new()?;
601 /// println!("Using cache at: {}", cache.get_cache_location().display());
602 /// # Ok(())
603 /// # }
604 /// ```
605 pub fn new() -> Result<Self> {
606 let dir = crate::config::get_cache_dir()?;
607 let registry_path = Self::registry_path_for(&dir);
608 let registry = WorktreeRegistry::load(®istry_path);
609 Ok(Self {
610 dir,
611 worktree_cache: Arc::new(RwLock::new(HashMap::new())),
612 fetch_locks: Arc::new(DashMap::new()),
613 fetched_repos: Arc::new(RwLock::new(HashSet::new())),
614 worktree_registry: Arc::new(Mutex::new(registry)),
615 })
616 }
617
618 /// Creates a new `Cache` instance using a custom cache directory.
619 ///
620 /// This constructor allows you to specify exactly where the cache should be
621 /// stored, overriding platform defaults. The directory will be created if
622 /// it doesn't exist when cache operations are performed.
623 ///
624 /// # Use Cases
625 ///
626 /// - **Testing**: Use temporary directories for isolated test environments
627 /// - **Development**: Use project-local cache directories
628 /// - **Deployment**: Use specific paths in containerized environments
629 /// - **Multi-user systems**: Use user-specific cache locations
630 ///
631 /// # Parameters
632 ///
633 /// * `cache_dir` - The absolute path where cache data should be stored
634 ///
635 /// # Errors
636 ///
637 /// Returns an error if:
638 /// - Unable to load worktree registry from cache directory
639 ///
640 /// # Examples
641 ///
642 /// ```rust,no_run
643 /// use agpm_cli::cache::Cache;
644 /// use std::path::PathBuf;
645 ///
646 /// # fn example() -> anyhow::Result<()> {
647 /// // Use a project-local cache
648 /// let project_cache = Cache::with_dir(PathBuf::from("./cache"))?;
649 ///
650 /// // Use a system-wide cache
651 /// let system_cache = Cache::with_dir(PathBuf::from("/var/cache/agpm"))?;
652 ///
653 /// // Use a temporary cache for testing
654 /// let temp_cache = Cache::with_dir(std::env::temp_dir().join("agpm-test"))?;
655 /// # Ok(())
656 /// # }
657 /// ```
658 pub fn with_dir(dir: PathBuf) -> Result<Self> {
659 let registry_path = Self::registry_path_for(&dir);
660 let registry = WorktreeRegistry::load(®istry_path);
661 Ok(Self {
662 dir,
663 worktree_cache: Arc::new(RwLock::new(HashMap::new())),
664 fetch_locks: Arc::new(DashMap::new()),
665 fetched_repos: Arc::new(RwLock::new(HashSet::new())),
666 worktree_registry: Arc::new(Mutex::new(registry)),
667 })
668 }
669
670 /// Ensures the cache directory exists, creating it if necessary.
671 ///
672 /// This method creates the cache directory and all necessary parent directories
673 /// if they don't already exist. It's safe to call multiple times - it will
674 /// not error if the directory already exists.
675 ///
676 /// # Platform Considerations
677 ///
678 /// - **Windows**: Handles long path names (>260 characters) correctly
679 /// - **Unix**: Respects umask settings for directory permissions
680 /// - **All platforms**: Creates intermediate directories as needed
681 ///
682 /// # Errors
683 ///
684 /// Returns an error if:
685 /// - Insufficient permissions to create the directory
686 /// - Disk space is exhausted
687 /// - Path contains invalid characters for the platform
688 /// - A file exists at the target path (not a directory)
689 ///
690 /// # Examples
691 ///
692 /// ```rust,no_run
693 /// use agpm_cli::cache::Cache;
694 ///
695 /// # async fn example() -> anyhow::Result<()> {
696 /// let cache = Cache::new()?;
697 ///
698 /// // Ensure cache directory exists before operations
699 /// cache.ensure_cache_dir().await?;
700 ///
701 /// // Safe to call multiple times
702 /// cache.ensure_cache_dir().await?; // No error
703 /// # Ok(())
704 /// # }
705 /// ```
706 pub async fn ensure_cache_dir(&self) -> Result<()> {
707 if !self.dir.exists() {
708 async_fs::create_dir_all(&self.dir).await.with_file_context(
709 FileOperation::CreateDir,
710 &self.dir,
711 "creating cache directory",
712 "cache::ensure_cache_dir",
713 )?;
714 }
715 Ok(())
716 }
717
718 /// Returns the path to the cache directory.
719 ///
720 /// This is useful for operations that need direct access to the cache directory,
721 /// such as lock file cleanup or cache size calculations.
722 ///
723 /// # Example
724 ///
725 /// ```rust,no_run
726 /// use agpm_cli::cache::Cache;
727 ///
728 /// # fn example() -> anyhow::Result<()> {
729 /// let cache = Cache::new()?;
730 /// let cache_dir = cache.cache_dir();
731 /// println!("Cache directory: {}", cache_dir.display());
732 /// # Ok(())
733 /// # }
734 /// ```
735 #[must_use]
736 pub fn cache_dir(&self) -> &Path {
737 &self.dir
738 }
739
740 /// Get the worktree path for a specific URL and commit SHA.
741 ///
742 /// This method constructs the expected worktree directory path based on the cache's
743 /// naming scheme. It does NOT check if the worktree exists or create it - use
744 /// `get_or_create_worktree_for_sha` for that.
745 ///
746 /// # Arguments
747 ///
748 /// * `url` - Git repository URL
749 /// * `sha` - Full commit SHA (will be shortened to first 8 characters)
750 ///
751 /// # Returns
752 ///
753 /// Path to the worktree directory (may not exist yet)
754 ///
755 /// # Errors
756 ///
757 /// Returns an error if:
758 /// - Invalid Git URL format
759 ///
760 /// # Example
761 ///
762 /// ```rust,no_run
763 /// use agpm_cli::cache::Cache;
764 ///
765 /// # fn example() -> anyhow::Result<()> {
766 /// let cache = Cache::new()?;
767 /// let path = cache.get_worktree_path(
768 /// "https://github.com/owner/repo.git",
769 /// "abc1234567890def"
770 /// )?;
771 /// println!("Worktree path: {}", path.display());
772 /// # Ok(())
773 /// # }
774 /// ```
775 pub fn get_worktree_path(&self, url: &str, sha: &str) -> Result<PathBuf> {
776 let (owner, repo) =
777 crate::git::parse_git_url(url).map_err(|e| anyhow::anyhow!("Invalid Git URL: {e}"))?;
778 let sha_short = &sha[..8.min(sha.len())];
779 Ok(self.dir.join("worktrees").join(format!("{owner}_{repo}_{sha_short}")))
780 }
781
782 /// Gets or clones a source repository, ensuring it's available in the cache.
783 ///
784 /// This is the primary method for source repository management. It handles both
785 /// initial cloning of new repositories and updating existing cached repositories.
786 /// The operation is atomic and thread-safe through file-based locking.
787 ///
788 /// # Operation Flow
789 ///
790 /// 1. **Lock acquisition**: Acquires exclusive lock for the source name
791 /// 2. **Directory check**: Determines if repository already exists in cache
792 /// 3. **Clone or update**: Either clones new repository or fetches updates
793 /// 4. **Version checkout**: Switches to requested version if specified
794 /// 5. **Path return**: Returns path to cached repository
795 ///
796 /// # Concurrency Behavior
797 ///
798 /// - **Same source**: Concurrent calls with the same `name` will block
799 /// - **Different sources**: Concurrent calls with different `name` run in parallel
800 /// - **Process safety**: Safe across multiple AGPM processes
801 ///
802 /// # Version Handling
803 ///
804 /// The `version` parameter accepts various Git reference types:
805 /// - **Tags**: `"v1.0.0"`, `"release-2023"` (most common for releases)
806 /// - **Branches**: `"main"`, `"develop"`, `"feature/new-agents"`
807 /// - **Commits**: `"abc123def"` (full or short SHA hashes)
808 /// - **None**: Uses repository's default branch (typically `main` or `master`)
809 ///
810 /// # Parameters
811 ///
812 /// * `name` - Unique source identifier (used for cache directory and locking)
813 /// * `url` - Git repository URL (HTTPS, SSH, or local paths)
814 /// * `version` - Optional version constraint (tag, branch, or commit)
815 ///
816 /// # Returns
817 ///
818 /// Returns the [`PathBuf`] to the cached repository directory, which contains
819 /// the full Git repository structure and can be used for resource file access.
820 ///
821 /// # Errors
822 ///
823 /// Returns an error if:
824 /// - **Network issues**: Unable to clone or fetch from remote repository
825 /// - **Authentication**: Invalid credentials for private repositories
826 /// - **Version issues**: Specified version doesn't exist in repository
827 /// - **Lock timeout**: Unable to acquire exclusive lock (rare)
828 /// - **File system**: Permission or disk space issues
829 /// - **Git errors**: Repository corruption or invalid Git operations
830 ///
831 /// # Performance Notes
832 ///
833 /// - **First call**: Performs full repository clone (slower)
834 /// - **Subsequent calls**: Only fetches updates (faster)
835 /// - **Version switching**: Uses Git checkout (very fast)
836 /// - **Parallel sources**: Multiple sources processed concurrently
837 ///
838 /// # Examples
839 ///
840 /// Clone a public repository with specific version:
841 ///
842 /// ```rust,no_run
843 /// use agpm_cli::cache::Cache;
844 ///
845 /// # async fn example() -> anyhow::Result<()> {
846 /// let cache = Cache::new()?;
847 ///
848 /// let repo_path = cache.get_or_clone_source(
849 /// "community",
850 /// "https://github.com/example/agpm-community.git",
851 /// Some("v1.2.0")
852 /// ).await?;
853 ///
854 /// println!("Repository cached at: {}", repo_path.display());
855 /// # Ok(())
856 /// # }
857 /// ```
858 ///
859 /// Use latest version from default branch:
860 ///
861 /// ```rust,no_run
862 /// use agpm_cli::cache::Cache;
863 ///
864 /// # async fn example() -> anyhow::Result<()> {
865 /// let cache = Cache::new()?;
866 ///
867 /// let repo_path = cache.get_or_clone_source(
868 /// "dev-tools",
869 /// "https://github.com/myorg/dev-tools.git",
870 /// None // Use default branch
871 /// ).await?;
872 /// # Ok(())
873 /// # }
874 /// ```
875 ///
876 /// Work with development branch:
877 ///
878 /// ```rust,no_run
879 /// use agpm_cli::cache::Cache;
880 ///
881 /// # async fn example() -> anyhow::Result<()> {
882 /// let cache = Cache::new()?;
883 ///
884 /// let repo_path = cache.get_or_clone_source(
885 /// "experimental",
886 /// "https://github.com/myorg/experimental.git",
887 /// Some("develop")
888 /// ).await?;
889 /// # Ok(())
890 /// # }
891 /// ```
892 pub async fn get_or_clone_source(
893 &self,
894 name: &str,
895 url: &str,
896 version: Option<&str>,
897 ) -> Result<PathBuf> {
898 self.get_or_clone_source_impl(name, url, version).await
899 }
900
901 /// Clean up a worktree after use (fast version).
902 ///
903 /// This just removes the worktree directory without calling git.
904 /// Git will clean up its internal references when `git worktree prune` is called.
905 ///
906 /// # Parameters
907 ///
908 /// * `worktree_path` - The path to the worktree to clean up
909 ///
910 /// # Errors
911 ///
912 /// Returns an error if:
913 /// - Unable to remove worktree directory
914 /// - Unable to update worktree registry
915 pub async fn cleanup_worktree(&self, worktree_path: &Path) -> Result<()> {
916 // Just remove the directory - don't call git worktree remove
917 // This is much faster and git will clean up its references later
918 if worktree_path.exists() {
919 tokio::fs::remove_dir_all(worktree_path).await.with_file_context(
920 FileOperation::Write, // Using Write as it's the closest to directory modification
921 worktree_path,
922 "removing worktree directory",
923 "cache::cleanup_worktree",
924 )?;
925 self.remove_worktree_record_by_path(worktree_path).await?;
926 }
927 Ok(())
928 }
929
930 /// Clean up all worktrees in the cache.
931 ///
932 /// This is useful for cleaning up after batch operations or on cache clear.
933 ///
934 /// # Errors
935 ///
936 /// Returns an error if:
937 /// - Unable to remove worktrees directory
938 /// - Unable to prune worktree references from bare repositories
939 /// - Unable to update worktree registry
940 pub async fn cleanup_all_worktrees(&self) -> Result<()> {
941 let worktrees_dir = self.dir.join("worktrees");
942
943 if !worktrees_dir.exists() {
944 return Ok(());
945 }
946
947 // Remove the entire worktrees directory
948 tokio::fs::remove_dir_all(&worktrees_dir).await.with_file_context(
949 FileOperation::Write,
950 &worktrees_dir,
951 "cleaning up worktrees directory",
952 "cache_module",
953 )?;
954
955 // Also prune worktree references from all bare repos
956 let sources_dir = self.dir.join("sources");
957 if sources_dir.exists() {
958 let mut entries = tokio::fs::read_dir(&sources_dir).await.with_file_context(
959 FileOperation::Read,
960 &sources_dir,
961 "reading sources directory",
962 "cache_module",
963 )?;
964 while let Some(entry) = entries.next_entry().await? {
965 let path = entry.path();
966 if path.extension().and_then(|s| s.to_str()) == Some("git") {
967 let bare_repo = GitRepo::new(&path);
968 bare_repo.prune_worktrees().await.ok();
969 }
970 }
971 }
972
973 {
974 let mut registry = self.worktree_registry.lock().await;
975 if !registry.entries.is_empty() {
976 registry.entries.clear();
977 registry.persist(&self.registry_path()).await?;
978 }
979 }
980
981 Ok(())
982 }
983
984 /// Get or create a worktree for a specific commit SHA.
985 ///
986 /// This method is the cornerstone of AGPM's optimized dependency resolution.
987 /// By using commit SHAs as the primary key for worktrees, we ensure:
988 /// - Maximum worktree reuse (same SHA = same worktree)
989 /// - Deterministic installations (SHA uniquely identifies content)
990 /// - Reduced disk usage (no duplicate worktrees for same commit)
991 ///
992 /// # SHA-Based Caching Strategy
993 ///
994 /// Unlike version-based worktrees that create separate directories for
995 /// "v1.0.0" and "release-1.0" even if they point to the same commit,
996 /// SHA-based worktrees ensure a single worktree per unique commit.
997 ///
998 /// # Parameters
999 ///
1000 /// * `name` - Source name from manifest
1001 /// * `url` - Git repository URL
1002 /// * `sha` - Full 40-character commit SHA (must be pre-resolved)
1003 /// * `context` - Optional context for logging
1004 ///
1005 /// # Returns
1006 ///
1007 /// Path to the worktree containing the exact commit specified by SHA.
1008 ///
1009 /// # Example
1010 ///
1011 /// ```no_run
1012 /// # use agpm_cli::cache::Cache;
1013 /// # async fn example() -> anyhow::Result<()> {
1014 /// let cache = Cache::new()?;
1015 ///
1016 /// // First resolve version to SHA
1017 /// let sha = "abc1234567890def1234567890abcdef12345678";
1018 ///
1019 /// // Get worktree for that specific commit
1020 /// let worktree = cache.get_or_create_worktree_for_sha(
1021 /// "community",
1022 /// "https://github.com/example/repo.git",
1023 /// sha,
1024 /// Some("my-agent")
1025 /// ).await?;
1026 /// # Ok(())
1027 /// # }
1028 /// ```
1029 #[allow(clippy::too_many_lines)]
1030 pub async fn get_or_create_worktree_for_sha(
1031 &self,
1032 name: &str,
1033 url: &str,
1034 sha: &str,
1035 context: Option<&str>,
1036 ) -> Result<PathBuf> {
1037 // Validate SHA format
1038 if sha.len() != 40 || !sha.chars().all(|c| c.is_ascii_hexdigit()) {
1039 return Err(anyhow::anyhow!(
1040 "Invalid SHA format: expected 40 hex characters, got '{sha}'"
1041 ));
1042 }
1043
1044 // Check if this is a local path
1045 let is_local_path = crate::utils::is_local_path(url);
1046 if is_local_path {
1047 // Local paths don't use worktrees
1048 return self.get_or_clone_source(name, url, None).await;
1049 }
1050
1051 self.ensure_cache_dir().await?;
1052
1053 // Parse URL for cache structure
1054 let (owner, repo) =
1055 crate::git::parse_git_url(url).unwrap_or(("direct".to_string(), "repo".to_string()));
1056
1057 // Create SHA-based cache key
1058 // Using first 8 chars of SHA for directory name (like Git does)
1059 let sha_short = &sha[..8];
1060 let cache_dir_hash = {
1061 use std::collections::hash_map::DefaultHasher;
1062 use std::hash::{Hash, Hasher};
1063 let mut hasher = DefaultHasher::new();
1064 self.dir.hash(&mut hasher);
1065 format!("{:x}", hasher.finish())[..8].to_string()
1066 };
1067 let cache_key = format!("{cache_dir_hash}:{owner}_{repo}:{sha}");
1068
1069 // Check if we already have a worktree for this SHA
1070 let mut should_create_worktree = false;
1071 while !should_create_worktree {
1072 {
1073 let cache_read = self.worktree_cache.read().await;
1074 match cache_read.get(&cache_key) {
1075 Some(WorktreeState::Ready(cached_path)) => {
1076 if cached_path.exists() {
1077 let cached_path = cached_path.clone();
1078 drop(cache_read);
1079 self.record_worktree_usage(&cache_key, name, sha_short, &cached_path)
1080 .await?;
1081
1082 if let Some(ctx) = context {
1083 tracing::debug!(
1084 target: "git",
1085 "({}) Reusing SHA-based worktree for {} @ {}",
1086 ctx,
1087 url.split('/').next_back().unwrap_or(url),
1088 sha_short
1089 );
1090 }
1091 return Ok(cached_path);
1092 }
1093 should_create_worktree = true;
1094 }
1095 Some(WorktreeState::Pending) => {
1096 if let Some(ctx) = context {
1097 tracing::debug!(
1098 target: "git",
1099 "({}) Waiting for SHA worktree creation for {} @ {}",
1100 ctx,
1101 url.split('/').next_back().unwrap_or(url),
1102 sha_short
1103 );
1104 }
1105 drop(cache_read);
1106 tokio::time::sleep(Duration::from_millis(100)).await;
1107 }
1108 None => {
1109 should_create_worktree = true;
1110 }
1111 }
1112 }
1113 }
1114
1115 // Reserve the cache slot
1116 let mut reservation_successful = false;
1117 while !reservation_successful {
1118 let mut cache_write = self.worktree_cache.write().await;
1119 match cache_write.get(&cache_key) {
1120 Some(WorktreeState::Ready(cached_path)) if cached_path.exists() => {
1121 return Ok(cached_path.clone());
1122 }
1123 Some(WorktreeState::Pending) => {
1124 drop(cache_write);
1125 tokio::time::sleep(Duration::from_millis(50)).await;
1126 }
1127 _ => {
1128 cache_write.insert(cache_key.clone(), WorktreeState::Pending);
1129 reservation_successful = true;
1130 }
1131 }
1132 }
1133
1134 // Get bare repository (fetches if needed)
1135 let bare_repo_dir = self.dir.join("sources").join(format!("{owner}_{repo}.git"));
1136
1137 if bare_repo_dir.exists() {
1138 // Fetch to ensure we have the SHA
1139 self.fetch_with_hybrid_lock(&bare_repo_dir, context).await?;
1140 } else {
1141 let lock_name = format!("{owner}_{repo}");
1142 let _lock = CacheLock::acquire(&self.dir, &lock_name).await?;
1143
1144 if let Some(parent) = bare_repo_dir.parent() {
1145 tokio::fs::create_dir_all(parent).await.with_file_context(
1146 FileOperation::CreateDir,
1147 parent,
1148 "creating cache parent directory",
1149 "cache_module",
1150 )?;
1151 }
1152
1153 if !bare_repo_dir.exists() {
1154 if let Some(ctx) = context {
1155 tracing::debug!("📦 ({ctx}) Cloning repository {url}...");
1156 } else {
1157 tracing::debug!("📦 Cloning repository {url} to cache...");
1158 }
1159
1160 GitRepo::clone_bare_with_context(url, &bare_repo_dir, context).await?;
1161 Self::configure_connection_pooling(&bare_repo_dir).await.ok();
1162 }
1163 }
1164
1165 let bare_repo = GitRepo::new(&bare_repo_dir);
1166
1167 // Create worktree path using SHA
1168 let worktree_path = self.dir.join("worktrees").join(format!("{owner}_{repo}_{sha_short}"));
1169
1170 // Acquire worktree creation lock
1171 let worktree_lock_name = format!("worktree-{owner}-{repo}-{sha_short}");
1172 let _worktree_lock = CacheLock::acquire(&self.dir, &worktree_lock_name).await?;
1173
1174 // Re-check after lock
1175 if worktree_path.exists() {
1176 let mut cache_write = self.worktree_cache.write().await;
1177 cache_write.insert(cache_key.clone(), WorktreeState::Ready(worktree_path.clone()));
1178 self.record_worktree_usage(&cache_key, name, sha_short, &worktree_path).await?;
1179 return Ok(worktree_path);
1180 }
1181
1182 // Prune stale worktrees if needed
1183 if !worktree_path.exists() {
1184 let _ = bare_repo.prune_worktrees().await;
1185 }
1186
1187 // Create worktree at specific SHA
1188 if let Some(ctx) = context {
1189 tracing::debug!(
1190 target: "git",
1191 "({}) Creating SHA-based worktree: {} @ {}",
1192 ctx,
1193 url.split('/').next_back().unwrap_or(url),
1194 sha_short
1195 );
1196 }
1197
1198 // Lock bare repo for worktree creation
1199 // Hold the lock through cache update to prevent git state corruption
1200 // when multiple worktrees are created concurrently for the same repo
1201 let bare_repo_lock_name = format!("bare-repo-{owner}_{repo}");
1202 let _bare_repo_lock = CacheLock::acquire(&self.dir, &bare_repo_lock_name).await?;
1203
1204 // Create worktree using SHA directly
1205 let worktree_result =
1206 bare_repo.create_worktree_with_context(&worktree_path, Some(sha), context).await;
1207
1208 // Keep lock held until cache is updated to ensure git state is fully settled
1209 match worktree_result {
1210 Ok(_) => {
1211 // Verify worktree is fully accessible before marking as Ready
1212 // This prevents race conditions where git worktree add returns
1213 // but filesystem hasn't finished writing all files yet
1214 Self::verify_worktree_accessible(&worktree_path, sha).await?;
1215
1216 let mut cache_write = self.worktree_cache.write().await;
1217 cache_write.insert(cache_key.clone(), WorktreeState::Ready(worktree_path.clone()));
1218 self.record_worktree_usage(&cache_key, name, sha_short, &worktree_path).await?;
1219 // Lock automatically dropped here
1220 Ok(worktree_path)
1221 }
1222 Err(e) => {
1223 let mut cache_write = self.worktree_cache.write().await;
1224 cache_write.remove(&cache_key);
1225 // Lock automatically dropped here
1226 Err(e)
1227 }
1228 }
1229 }
1230
1231 /// Get or clone a source repository with options to control cache behavior.
1232 ///
1233 /// This method provides the core functionality for repository access with
1234 /// additional control over cache behavior. Creates bare repositories that
1235 /// can be shared by all operations (resolution, installation, etc).
1236 ///
1237 /// # Parameters
1238 ///
1239 /// * `name` - The name of the source (used for cache directory naming)
1240 /// * `url` - The Git repository URL or local path
1241 /// * `version` - Optional specific version/tag/branch to checkout
1242 /// * `force_refresh` - If true, ignore cached version and clone/fetch fresh
1243 ///
1244 /// # Returns
1245 ///
1246 /// Returns the path to the cached bare repository directory
1247 async fn get_or_clone_source_impl(
1248 &self,
1249 name: &str,
1250 url: &str,
1251 version: Option<&str>,
1252 ) -> Result<PathBuf> {
1253 // Check if this is a local path (not a git repository URL)
1254 let is_local_path = crate::utils::is_local_path(url);
1255
1256 if is_local_path {
1257 // For local paths (directories), validate and return the secure path
1258 // No cloning or version management needed
1259
1260 // Resolve path securely with validation
1261 let resolved_path = crate::utils::platform::resolve_path(url)?;
1262
1263 // Canonicalize to get the real path and prevent symlink attacks
1264 let canonical_path = crate::utils::safe_canonicalize(&resolved_path)
1265 .map_err(|_| anyhow::anyhow!("Local path is not accessible or does not exist"))?;
1266
1267 // Security check: Validate path against blacklist and symlinks
1268 validate_path_security(&canonical_path, true)?;
1269
1270 // For local paths, versions don't apply. Suppress warning for internal sentinel values.
1271 if let Some(ver) = version
1272 && ver != "local"
1273 {
1274 eprintln!("Warning: Version constraints are ignored for local paths");
1275 }
1276
1277 return Ok(canonical_path);
1278 }
1279
1280 self.ensure_cache_dir().await?;
1281
1282 // Acquire lock for this source to prevent concurrent access
1283 let _lock = CacheLock::acquire(&self.dir, name)
1284 .await
1285 .with_context(|| format!("Failed to acquire lock for source: {name}"))?;
1286
1287 // Use the same cache directory structure as worktrees - bare repos with .git suffix
1288 // This ensures we have ONE repository that's shared by all operations
1289 let (owner, repo) =
1290 crate::git::parse_git_url(url).unwrap_or(("direct".to_string(), "repo".to_string()));
1291 let source_dir = self.dir.join("sources").join(format!("{owner}_{repo}.git")); // Always use .git suffix for bare repos
1292
1293 // Ensure parent directory exists
1294 if let Some(parent) = source_dir.parent() {
1295 tokio::fs::create_dir_all(parent).await.with_file_context(
1296 FileOperation::CreateDir,
1297 parent,
1298 "creating cache directory",
1299 "cache_module",
1300 )?;
1301 }
1302
1303 if source_dir.exists() {
1304 // Use existing cache - fetch to ensure we have latest refs
1305 // Skip fetch for local paths as they don't have remotes
1306 // For Git URLs, always fetch to get the latest refs (especially important for branches)
1307 if crate::utils::is_git_url(url) {
1308 // Check if we've already fetched this repo in this command instance
1309 let already_fetched = {
1310 let fetched = self.fetched_repos.read().await;
1311 fetched.contains(&source_dir)
1312 };
1313
1314 if already_fetched {
1315 tracing::debug!(
1316 target: "agpm::cache",
1317 "Skipping fetch for {} (already fetched in this command)",
1318 name
1319 );
1320 } else {
1321 tracing::debug!(
1322 target: "agpm::cache",
1323 "Fetching updates for {} from {}",
1324 name,
1325 url
1326 );
1327 let repo = crate::git::GitRepo::new(&source_dir);
1328 if let Err(e) = repo.fetch(None).await {
1329 tracing::warn!(
1330 target: "agpm::cache",
1331 "Failed to fetch updates for {}: {}",
1332 name,
1333 e
1334 );
1335 } else {
1336 // Mark this repo as fetched for this command execution
1337 let mut fetched = self.fetched_repos.write().await;
1338 fetched.insert(source_dir.clone());
1339 tracing::debug!(
1340 target: "agpm::cache",
1341 "Successfully fetched updates for {}",
1342 name
1343 );
1344 }
1345 }
1346 } else {
1347 tracing::debug!(
1348 target: "agpm::cache",
1349 "Skipping fetch for local path: {}",
1350 url
1351 );
1352 }
1353 } else {
1354 // Directory doesn't exist - clone fresh as bare repo
1355 self.clone_source(url, &source_dir).await?;
1356 }
1357
1358 Ok(source_dir)
1359 }
1360
1361 /// Clones a Git repository to the specified target directory as a bare repository.
1362 ///
1363 /// This internal method performs the initial clone operation for repositories
1364 /// that are not yet present in the cache. It creates a bare repository which
1365 /// is optimal for serving and allows multiple worktrees to be created from it.
1366 ///
1367 /// # Why Bare Repositories
1368 ///
1369 /// Bare repositories are used because:
1370 /// - **No working directory conflicts**: Multiple worktrees can be created safely
1371 /// - **Optimized for serving**: Like GitHub/GitLab, designed for fetch operations
1372 /// - **Space efficient**: No checkout of files in the main repository
1373 /// - **Thread-safe**: Multiple processes can fetch from it simultaneously
1374 ///
1375 /// # Authentication
1376 ///
1377 /// Repository authentication is handled through:
1378 /// - **SSH keys**: For `git@github.com:` URLs (user's SSH configuration)
1379 /// - **HTTPS tokens**: For private repositories (from global config)
1380 /// - **Public repos**: No authentication required
1381 ///
1382 /// # Parameters
1383 ///
1384 /// * `url` - Git repository URL to clone from
1385 /// * `target` - Local directory path where bare repository should be created
1386 ///
1387 /// # Errors
1388 ///
1389 /// Returns an error if:
1390 /// - Repository URL is invalid or unreachable
1391 /// - Authentication fails for private repositories
1392 /// - Target directory cannot be created or written to
1393 /// - Network connectivity issues
1394 /// - Git command is not available in PATH
1395 async fn clone_source(&self, url: &str, target: &Path) -> Result<()> {
1396 tracing::debug!("📦 Cloning {} to cache...", url);
1397
1398 // Clone as a bare repository for better concurrency and worktree support
1399 GitRepo::clone_bare(url, target)
1400 .await
1401 .with_context(|| format!("Failed to clone repository from {url}"))?;
1402
1403 // Debug: List what was cloned
1404 if cfg!(test)
1405 && let Ok(entries) = std::fs::read_dir(target)
1406 {
1407 tracing::debug!(
1408 target: "agpm::cache",
1409 "Cloned bare repo to {}, contents:",
1410 target.display()
1411 );
1412 for entry in entries.flatten() {
1413 tracing::debug!(
1414 target: "agpm::cache",
1415 " - {}",
1416 entry.path().display()
1417 );
1418 }
1419 }
1420
1421 Ok(())
1422 }
1423
1424 /// Copies a resource file from cached repository to project directory.
1425 ///
1426 /// This method performs the core resource installation operation by copying
1427 /// files from the cached Git repository to the project's local directory.
1428 /// It provides a simple interface for resource installation without output.
1429 ///
1430 /// # Copy Strategy
1431 ///
1432 /// The method uses a copy-based approach rather than symlinks for:
1433 /// - **Cross-platform compatibility**: Works identically on all platforms
1434 /// - **Git integration**: Real files can be tracked and committed
1435 /// - **Editor support**: No symlink confusion in IDEs and editors
1436 /// - **User flexibility**: Local files can be modified if needed
1437 ///
1438 /// # Path Resolution
1439 ///
1440 /// - **Source path**: Relative to the repository root directory
1441 /// - **Target path**: Absolute path where file should be installed
1442 /// - **Directory creation**: Parent directories created automatically
1443 /// - **Path normalization**: Handles platform-specific path separators
1444 ///
1445 /// # Parameters
1446 ///
1447 /// * `source_dir` - Path to the cached repository directory
1448 /// * `source_path` - Relative path to the resource file within the repository
1449 /// * `target_path` - Absolute path where the resource should be installed
1450 ///
1451 /// # Errors
1452 ///
1453 /// Returns an error if:
1454 /// - Source file doesn't exist in the repository
1455 /// - Target directory cannot be created (permissions)
1456 /// - File copy operation fails (disk space, permissions)
1457 /// - Source path attempts directory traversal (security)
1458 ///
1459 /// # Examples
1460 ///
1461 /// Copy a single resource file:
1462 ///
1463 /// ```rust,no_run
1464 /// use agpm_cli::cache::Cache;
1465 /// use std::path::PathBuf;
1466 ///
1467 /// # async fn example() -> anyhow::Result<()> {
1468 /// let cache = Cache::new()?;
1469 ///
1470 /// // Get cached repository
1471 /// let repo_path = cache.get_or_clone_source(
1472 /// "community",
1473 /// "https://github.com/example/repo.git",
1474 /// Some("v1.0.0")
1475 /// ).await?;
1476 ///
1477 /// // Copy resource to project
1478 /// cache.copy_resource(
1479 /// &repo_path,
1480 /// "agents/helper.md", // Source: agents/helper.md in repository
1481 /// &PathBuf::from("./my-agents/helper.md") // Target: project location
1482 /// ).await?;
1483 /// # Ok(())
1484 /// # }
1485 /// ```
1486 ///
1487 /// Copy nested resource:
1488 ///
1489 /// ```rust,no_run
1490 /// use agpm_cli::cache::Cache;
1491 /// use std::path::PathBuf;
1492 ///
1493 /// # async fn example() -> anyhow::Result<()> {
1494 /// let cache = Cache::new()?;
1495 /// let repo_path = PathBuf::from("/cache/community");
1496 ///
1497 /// cache.copy_resource(
1498 /// &repo_path,
1499 /// "tools/generators/api-client.md", // Nested source path
1500 /// &PathBuf::from("./tools/api-client.md") // Flattened target
1501 /// ).await?;
1502 /// # Ok(())
1503 /// # }
1504 /// ```
1505 pub async fn copy_resource(
1506 &self,
1507 source_dir: &Path,
1508 source_path: &str,
1509 target_path: &Path,
1510 ) -> Result<()> {
1511 self.copy_resource_with_output(source_dir, source_path, target_path, false).await
1512 }
1513
1514 /// Copies a resource file with optional installation output messages.
1515 ///
1516 /// This is the full-featured resource copying method that provides control
1517 /// over whether installation progress is displayed to the user. It handles
1518 /// all the details of safe file copying including directory creation,
1519 /// error handling, and atomic operations.
1520 ///
1521 /// # Operation Details
1522 ///
1523 /// 1. **Source validation**: Verifies the source file exists in repository
1524 /// 2. **Directory creation**: Creates target parent directories if needed
1525 /// 3. **Atomic copy**: Performs file copy operation safely
1526 /// 4. **Progress output**: Optionally displays installation confirmation
1527 ///
1528 /// # File Safety
1529 ///
1530 /// - **Overwrite protection**: Will overwrite existing files without warning
1531 /// - **Atomic operations**: Uses system copy operations for atomicity
1532 /// - **Permission preservation**: Maintains reasonable file permissions
1533 /// - **Path validation**: Prevents directory traversal attacks
1534 ///
1535 /// # Output Control
1536 ///
1537 /// When `show_output` is `true`, displays user-friendly installation messages:
1538 /// ```text
1539 /// ✅ Installed ./agents/helper.md
1540 /// ✅ Installed ./snippets/docker-compose.md
1541 /// ```
1542 ///
1543 /// # Parameters
1544 ///
1545 /// * `source_dir` - Path to the cached repository directory
1546 /// * `source_path` - Relative path to resource file within repository
1547 /// * `target_path` - Absolute path where resource should be installed
1548 /// * `show_output` - Whether to display installation progress messages
1549 ///
1550 /// # Errors
1551 ///
1552 /// Returns specific error types for different failure modes:
1553 /// - [`AgpmError::ResourceFileNotFound`]: Source file doesn't exist
1554 /// - File system errors: Permission, disk space, invalid paths
1555 /// - Directory creation errors: Parent directory creation failures
1556 ///
1557 /// # Examples
1558 ///
1559 /// Silent installation (for batch operations):
1560 ///
1561 /// ```rust,no_run
1562 /// use agpm_cli::cache::Cache;
1563 /// use std::path::PathBuf;
1564 ///
1565 /// # async fn example() -> anyhow::Result<()> {
1566 /// let cache = Cache::new()?;
1567 /// let repo_path = PathBuf::from("/cache/community");
1568 ///
1569 /// cache.copy_resource_with_output(
1570 /// &repo_path,
1571 /// "agents/helper.md",
1572 /// &PathBuf::from("./agents/helper.md"),
1573 /// false // No output
1574 /// ).await?;
1575 /// # Ok(())
1576 /// # }
1577 /// ```
1578 ///
1579 /// Interactive installation (with progress):
1580 ///
1581 /// ```rust,no_run
1582 /// use agpm_cli::cache::Cache;
1583 /// use std::path::PathBuf;
1584 ///
1585 /// # async fn example() -> anyhow::Result<()> {
1586 /// let cache = Cache::new()?;
1587 /// let repo_path = PathBuf::from("/cache/community");
1588 ///
1589 /// cache.copy_resource_with_output(
1590 /// &repo_path,
1591 /// "snippets/deployment.md",
1592 /// &PathBuf::from("./snippets/deployment.md"),
1593 /// true // Show "✅ Installed" message
1594 /// ).await?;
1595 /// # Ok(())
1596 /// # }
1597 /// ```
1598 pub async fn copy_resource_with_output(
1599 &self,
1600 source_dir: &Path,
1601 source_path: &str,
1602 target_path: &Path,
1603 show_output: bool,
1604 ) -> Result<()> {
1605 let source_file = source_dir.join(source_path);
1606
1607 if !source_file.exists() {
1608 return Err(AgpmError::ResourceFileNotFound {
1609 path: source_path.to_string(),
1610 source_name: source_dir
1611 .file_name()
1612 .and_then(|n| n.to_str())
1613 .unwrap_or("unknown")
1614 .to_string(),
1615 }
1616 .into());
1617 }
1618
1619 if let Some(parent) = target_path.parent() {
1620 async_fs::create_dir_all(parent)
1621 .await
1622 .with_context(|| format!("Failed to create directory: {}", parent.display()))?;
1623 }
1624
1625 async_fs::copy(&source_file, target_path).await.with_context(|| {
1626 format!("Failed to copy {} to {}", source_file.display(), target_path.display())
1627 })?;
1628
1629 if show_output {
1630 println!(" ✅ Installed {}", target_path.display());
1631 }
1632
1633 Ok(())
1634 }
1635
1636 /// Removes unused cached repositories to reclaim disk space.
1637 ///
1638 /// This method performs selective cache cleanup by removing repositories
1639 /// that are no longer referenced by any active source configurations.
1640 /// It's a safe operation that preserves repositories currently in use.
1641 ///
1642 /// # Cleanup Strategy
1643 ///
1644 /// 1. **Directory scanning**: Enumerates all cached repository directories
1645 /// 2. **Active comparison**: Checks each directory against active sources list
1646 /// 3. **Safe removal**: Removes only unused directories, preserving files
1647 /// 4. **Progress reporting**: Displays removal progress for user feedback
1648 ///
1649 /// # Safety Guarantees
1650 ///
1651 /// - **Active protection**: Never removes repositories listed in active sources
1652 /// - **Directory-only**: Only removes directories, preserves any loose files
1653 /// - **Atomic removal**: Each directory is removed completely or not at all
1654 /// - **Lock awareness**: Respects file locks but doesn't acquire them
1655 ///
1656 /// # Performance Considerations
1657 ///
1658 /// - **I/O intensive**: Scans entire cache directory structure
1659 /// - **Disk space recovery**: Can free significant space for large repositories
1660 /// - **Network savings**: Removed repositories will need re-cloning if used again
1661 /// - **Concurrent safe**: Can run while other cache operations are in progress
1662 ///
1663 /// # Parameters
1664 ///
1665 /// * `active_sources` - List of source names that should be preserved in cache
1666 ///
1667 /// # Returns
1668 ///
1669 /// Returns the number of repository directories that were successfully removed.
1670 ///
1671 /// # Errors
1672 ///
1673 /// Returns an error if:
1674 /// - Cache directory cannot be read (permissions)
1675 /// - Unable to remove a directory (file locks, permissions)
1676 /// - File system errors during directory traversal
1677 ///
1678 /// # Output Messages
1679 ///
1680 /// Displays progress messages for each removed repository:
1681 /// ```text
1682 /// 🗑️ Removing unused cache: old-project
1683 /// 🗑️ Removing unused cache: deprecated-tools
1684 /// ```
1685 ///
1686 /// # Examples
1687 ///
1688 /// Clean cache based on current manifest sources:
1689 ///
1690 /// ```rust,no_run
1691 /// use agpm_cli::cache::Cache;
1692 ///
1693 /// # async fn example() -> anyhow::Result<()> {
1694 /// let cache = Cache::new()?;
1695 ///
1696 /// // Active sources from current agpm.toml
1697 /// let active_sources = vec![
1698 /// "community".to_string(),
1699 /// "work-tools".to_string(),
1700 /// "personal".to_string(),
1701 /// ];
1702 ///
1703 /// let removed = cache.clean_unused(&active_sources).await?;
1704 /// println!("Cleaned {} unused repositories", removed);
1705 /// # Ok(())
1706 /// # }
1707 /// ```
1708 ///
1709 /// Clean all cached repositories:
1710 ///
1711 /// ```rust,no_run
1712 /// use agpm_cli::cache::Cache;
1713 ///
1714 /// # async fn example() -> anyhow::Result<()> {
1715 /// let cache = Cache::new()?;
1716 ///
1717 /// // Empty active list removes everything
1718 /// let removed = cache.clean_unused(&[]).await?;
1719 /// println!("Removed all {} cached repositories", removed);
1720 /// # Ok(())
1721 /// # }
1722 /// ```
1723 pub async fn clean_unused(&self, active_sources: &[String]) -> Result<usize> {
1724 self.ensure_cache_dir().await?;
1725
1726 let mut removed_count = 0;
1727 let mut entries = async_fs::read_dir(&self.dir)
1728 .await
1729 .with_context(|| "Failed to read cache directory")?;
1730
1731 while let Some(entry) =
1732 entries.next_entry().await.with_context(|| "Failed to read directory entry")?
1733 {
1734 let path = entry.path();
1735 if path.is_dir() {
1736 let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1737
1738 if !active_sources.contains(&dir_name.to_string()) {
1739 println!("🗑️ Removing unused cache: {dir_name}");
1740 async_fs::remove_dir_all(&path).await.with_context(|| {
1741 format!("Failed to remove cache directory: {}", path.display())
1742 })?;
1743 removed_count += 1;
1744 }
1745 }
1746 }
1747
1748 Ok(removed_count)
1749 }
1750
1751 /// Calculates the total size of the cache directory in bytes.
1752 ///
1753 /// This method recursively calculates the disk space used by all cached
1754 /// repositories and supporting files. It's useful for cache size monitoring,
1755 /// cleanup decisions, and storage management.
1756 ///
1757 /// # Calculation Method
1758 ///
1759 /// - **Recursive traversal**: Includes all subdirectories and files
1760 /// - **Actual file sizes**: Reports real disk usage, not allocated blocks
1761 /// - **All file types**: Includes Git objects, working files, and lock files
1762 /// - **Cross-platform**: Consistent behavior across different file systems
1763 ///
1764 /// # Performance Notes
1765 ///
1766 /// - **I/O intensive**: May be slow for very large caches
1767 /// - **File system dependent**: Performance varies by underlying storage
1768 /// - **Concurrent safe**: Can run during other cache operations
1769 /// - **Memory efficient**: Streams directory traversal without loading all paths
1770 ///
1771 /// # Returns
1772 ///
1773 /// Returns the total size in bytes. For a non-existent cache directory,
1774 /// returns `0` without error.
1775 ///
1776 /// # Errors
1777 ///
1778 /// Returns an error if:
1779 /// - Permission denied reading cache directory or subdirectories
1780 /// - File system errors during directory traversal
1781 /// - Symbolic link cycles (rare, but possible)
1782 ///
1783 /// # Examples
1784 ///
1785 /// Check current cache size:
1786 ///
1787 /// ```rust,no_run
1788 /// use agpm_cli::cache::Cache;
1789 ///
1790 /// # async fn example() -> anyhow::Result<()> {
1791 /// let cache = Cache::new()?;
1792 ///
1793 /// let size_bytes = cache.get_cache_size().await?;
1794 /// let size_mb = size_bytes / 1024 / 1024;
1795 ///
1796 /// println!("Cache size: {} MB ({} bytes)", size_mb, size_bytes);
1797 /// # Ok(())
1798 /// # }
1799 /// ```
1800 ///
1801 /// Display human-readable sizes:
1802 ///
1803 /// ```rust,no_run
1804 /// use agpm_cli::cache::Cache;
1805 ///
1806 /// # async fn example() -> anyhow::Result<()> {
1807 /// let cache = Cache::new()?;
1808 /// let size_bytes = cache.get_cache_size().await?;
1809 ///
1810 /// let (size, unit) = match size_bytes {
1811 /// s if s < 1024 => (s, "B"),
1812 /// s if s < 1024 * 1024 => (s / 1024, "KB"),
1813 /// s if s < 1024 * 1024 * 1024 => (s / 1024 / 1024, "MB"),
1814 /// s => (s / 1024 / 1024 / 1024, "GB"),
1815 /// };
1816 ///
1817 /// println!("Cache size: {}{}", size, unit);
1818 /// # Ok(())
1819 /// # }
1820 /// ```
1821 pub async fn get_cache_size(&self) -> Result<u64> {
1822 if !self.dir.exists() {
1823 return Ok(0);
1824 }
1825
1826 let size = fs::get_directory_size(&self.dir).await?;
1827 Ok(size)
1828 }
1829
1830 /// Returns the path to the cache directory.
1831 ///
1832 /// This method provides access to the cache directory path for inspection,
1833 /// logging, or integration with other tools. The path represents where
1834 /// all cached repositories and supporting files are stored.
1835 ///
1836 /// # Return Value
1837 ///
1838 /// Returns a reference to the [`Path`] representing the cache directory.
1839 /// The path may or may not exist on the file system - use [`ensure_cache_dir`]
1840 /// to create it if needed.
1841 ///
1842 /// # Thread Safety
1843 ///
1844 /// This method is safe to call from multiple threads as it only returns
1845 /// a reference to the immutable path stored in the `Cache` instance.
1846 ///
1847 /// # Examples
1848 ///
1849 /// Display cache location:
1850 ///
1851 /// ```rust,no_run
1852 /// use agpm_cli::cache::Cache;
1853 ///
1854 /// # fn example() -> anyhow::Result<()> {
1855 /// let cache = Cache::new()?;
1856 /// println!("Cache stored at: {}", cache.get_cache_location().display());
1857 /// # Ok(())
1858 /// # }
1859 /// ```
1860 ///
1861 /// Check if cache exists:
1862 ///
1863 /// ```rust,no_run
1864 /// use agpm_cli::cache::Cache;
1865 ///
1866 /// # fn example() -> anyhow::Result<()> {
1867 /// let cache = Cache::new()?;
1868 /// let location = cache.get_cache_location();
1869 ///
1870 /// if location.exists() {
1871 /// println!("Cache directory exists at: {}", location.display());
1872 /// } else {
1873 /// println!("Cache directory not yet created: {}", location.display());
1874 /// }
1875 /// # Ok(())
1876 /// # }
1877 /// ```
1878 ///
1879 /// [`ensure_cache_dir`]: Cache::ensure_cache_dir
1880 #[must_use]
1881 pub fn get_cache_location(&self) -> &Path {
1882 &self.dir
1883 }
1884
1885 /// Completely removes the entire cache directory and all its contents.
1886 ///
1887 /// This is a destructive operation that removes all cached repositories,
1888 /// lock files, and any other cache-related data. Use with caution as
1889 /// this will require re-cloning all repositories on the next operation.
1890 ///
1891 /// # Operation Details
1892 ///
1893 /// - **Complete removal**: Deletes the entire cache directory tree
1894 /// - **Recursive deletion**: Removes all subdirectories and files
1895 /// - **Lock files**: Also removes .locks directory and all lock files
1896 /// - **Atomic operation**: Either succeeds completely or leaves cache intact
1897 ///
1898 /// # Recovery Impact
1899 ///
1900 /// After calling this method:
1901 /// - All repositories must be re-cloned on next use
1902 /// - Network bandwidth will be required for repository downloads
1903 /// - Disk space is immediately reclaimed
1904 /// - Cache directory will be recreated automatically on next operation
1905 ///
1906 /// # Safety Considerations
1907 ///
1908 /// - **No confirmation**: This method doesn't ask for confirmation
1909 /// - **Irreversible**: Cannot undo the deletion operation
1910 /// - **Concurrent operations**: May interfere with running cache operations
1911 /// - **Lock respect**: Doesn't wait for locks, may fail if repositories are in use
1912 ///
1913 /// # Errors
1914 ///
1915 /// Returns an error if:
1916 /// - Permission denied for cache directory or contents
1917 /// - Files are locked by other processes
1918 /// - File system errors during deletion
1919 /// - Cache directory is in use by another process
1920 ///
1921 /// # Output Messages
1922 ///
1923 /// Displays confirmation message on successful completion:
1924 /// ```text
1925 /// 🗑️ Cleared all cache
1926 /// ```
1927 ///
1928 /// # Examples
1929 ///
1930 /// Clear cache for fresh start:
1931 ///
1932 /// ```rust,no_run
1933 /// use agpm_cli::cache::Cache;
1934 ///
1935 /// # async fn example() -> anyhow::Result<()> {
1936 /// let cache = Cache::new()?;
1937 ///
1938 /// // Check size before clearing
1939 /// let size_before = cache.get_cache_size().await?;
1940 /// println!("Cache size before: {} bytes", size_before);
1941 ///
1942 /// // Clear everything
1943 /// cache.clear_all().await?;
1944 ///
1945 /// // Verify cache is empty
1946 /// let size_after = cache.get_cache_size().await?;
1947 /// println!("Cache size after: {} bytes", size_after); // Should be 0
1948 /// # Ok(())
1949 /// # }
1950 /// ```
1951 ///
1952 /// Clear cache with error handling:
1953 ///
1954 /// ```rust,no_run
1955 /// use agpm_cli::cache::Cache;
1956 ///
1957 /// # async fn example() -> anyhow::Result<()> {
1958 /// let cache = Cache::new()?;
1959 ///
1960 /// match cache.clear_all().await {
1961 /// Ok(()) => println!("Cache cleared successfully"),
1962 /// Err(e) => {
1963 /// eprintln!("Failed to clear cache: {}", e);
1964 /// eprintln!("Some files may be in use by other processes");
1965 /// }
1966 /// }
1967 /// # Ok(())
1968 /// # }
1969 /// ```
1970 pub async fn clear_all(&self) -> Result<()> {
1971 if self.dir.exists() {
1972 async_fs::remove_dir_all(&self.dir).await.with_context(|| "Failed to clear cache")?;
1973 println!("🗑️ Cleared all cache");
1974 }
1975 Ok(())
1976 }
1977
1978 /// Perform a fetch operation with hybrid locking (in-process and cross-process).
1979 ///
1980 /// This method implements a two-level locking strategy:
1981 /// 1. In-process locks (Arc<Mutex>) for fast coordination within the same process
1982 /// 2. File-based locks for cross-process coordination
1983 ///
1984 /// The fetch will only happen once per repository per command execution.
1985 ///
1986 /// # Parameters
1987 ///
1988 /// * `bare_repo_path` - Path to the bare repository
1989 /// * `context` - Optional context string for logging
1990 ///
1991 /// # Returns
1992 ///
1993 /// Returns Ok(()) if the fetch was successful or skipped.
1994 async fn fetch_with_hybrid_lock(
1995 &self,
1996 bare_repo_path: &Path,
1997 context: Option<&str>,
1998 ) -> Result<()> {
1999 use fs4::fs_std::FileExt;
2000
2001 // Level 1: In-process lock (fast path)
2002 let memory_lock = self
2003 .fetch_locks
2004 .entry(bare_repo_path.to_path_buf())
2005 .or_insert_with(|| Arc::new(Mutex::new(())))
2006 .clone();
2007 let _memory_guard = memory_lock.lock().await;
2008
2009 // Level 2: File-based lock (cross-process)
2010 let safe_name = bare_repo_path
2011 .file_name()
2012 .and_then(|s| s.to_str())
2013 .unwrap_or("unknown")
2014 .replace(['/', '\\', ':'], "_");
2015
2016 let lock_path = self.dir.join(".locks").join(format!("{safe_name}.fetch.lock"));
2017
2018 // Ensure lock directory exists
2019 if let Some(parent) = lock_path.parent() {
2020 tokio::fs::create_dir_all(parent).await.with_file_context(
2021 FileOperation::CreateDir,
2022 parent,
2023 "creating lock directory",
2024 "cache_module",
2025 )?;
2026 }
2027
2028 // Create/open lock file
2029 let lock_file = tokio::fs::OpenOptions::new()
2030 .create(true)
2031 .write(true)
2032 .truncate(false)
2033 .open(&lock_path)
2034 .await?;
2035
2036 // Convert to std::fs::File for fs4
2037 let std_file = lock_file.into_std().await;
2038
2039 // Acquire exclusive lock (blocks until available)
2040 if let Some(ctx) = context {
2041 tracing::debug!(
2042 target: "agpm::git",
2043 "({}) Acquiring file lock for {}",
2044 ctx,
2045 bare_repo_path.display()
2046 );
2047 }
2048 std_file.lock_exclusive()?;
2049
2050 if let Some(ctx) = context {
2051 tracing::debug!(
2052 target: "agpm::git",
2053 "({}) Acquired file lock for {}",
2054 ctx,
2055 bare_repo_path.display()
2056 );
2057 }
2058
2059 // Now check if we've already fetched this repo in this command execution
2060 // This happens AFTER acquiring the lock to prevent race conditions
2061 let already_fetched = {
2062 let fetched = self.fetched_repos.read().await;
2063 let is_fetched = fetched.contains(bare_repo_path);
2064 if let Some(ctx) = context {
2065 tracing::debug!(
2066 target: "agpm::git",
2067 "({}) Checking if already fetched: {} - Result: {} (total fetched: {}, hashset addr: {:p})",
2068 ctx,
2069 bare_repo_path.display(),
2070 is_fetched,
2071 fetched.len(),
2072 &raw const *fetched
2073 );
2074 }
2075 is_fetched
2076 };
2077
2078 if already_fetched {
2079 if let Some(ctx) = context {
2080 tracing::debug!(
2081 target: "agpm::git",
2082 "({}) Skipping fetch (already fetched in this command): {}",
2083 ctx,
2084 bare_repo_path.display()
2085 );
2086 }
2087 // Release the file lock and return
2088 return Ok(());
2089 }
2090
2091 // Now safe to fetch
2092 let repo = GitRepo::new(bare_repo_path);
2093
2094 if let Some(ctx) = context {
2095 tracing::debug!(
2096 target: "agpm::git",
2097 "({}) Fetching updates for {}",
2098 ctx,
2099 bare_repo_path.display()
2100 );
2101 }
2102
2103 repo.fetch(None).await?;
2104
2105 // Mark this repo as fetched for this command execution
2106 {
2107 let mut fetched = self.fetched_repos.write().await;
2108 fetched.insert(bare_repo_path.to_path_buf());
2109 if let Some(ctx) = context {
2110 tracing::debug!(
2111 target: "agpm::git",
2112 "({}) Marked as fetched: {} (total fetched: {}, hashset addr: {:p})",
2113 ctx,
2114 bare_repo_path.display(),
2115 fetched.len(),
2116 &raw const *fetched
2117 );
2118 }
2119 }
2120
2121 // File lock automatically released when std_file is dropped
2122 Ok(())
2123 }
2124}
2125
2126#[cfg(test)]
2127mod tests {
2128 use super::*;
2129 use tempfile::TempDir;
2130
2131 #[tokio::test]
2132 async fn test_cache_dir_creation() {
2133 let temp_dir = TempDir::new().unwrap();
2134 let cache_dir = temp_dir.path().join("cache");
2135
2136 let cache = Cache::with_dir(cache_dir.clone()).unwrap();
2137 cache.ensure_cache_dir().await.unwrap();
2138
2139 assert!(cache_dir.exists());
2140 }
2141
2142 #[tokio::test]
2143 async fn test_cache_location() {
2144 let temp_dir = TempDir::new().unwrap();
2145 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2146 let location = cache.get_cache_location();
2147 assert_eq!(location, temp_dir.path());
2148 }
2149
2150 #[tokio::test]
2151 async fn test_cache_size_empty() {
2152 let temp_dir = TempDir::new().unwrap();
2153 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2154
2155 cache.ensure_cache_dir().await.unwrap();
2156 let size = cache.get_cache_size().await.unwrap();
2157 assert_eq!(size, 0);
2158 }
2159
2160 #[tokio::test]
2161 async fn test_cache_size_with_content() {
2162 let temp_dir = TempDir::new().unwrap();
2163 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2164
2165 cache.ensure_cache_dir().await.unwrap();
2166
2167 // Create some test content
2168 let test_file = temp_dir.path().join("test.txt");
2169 std::fs::write(&test_file, "test content").unwrap();
2170
2171 let size = cache.get_cache_size().await.unwrap();
2172 assert!(size > 0);
2173 assert_eq!(size, 12); // "test content" is 12 bytes
2174 }
2175
2176 #[tokio::test]
2177 async fn test_clean_unused_empty_cache() {
2178 let temp_dir = TempDir::new().unwrap();
2179 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2180
2181 cache.ensure_cache_dir().await.unwrap();
2182
2183 let removed = cache.clean_unused(&["active".to_string()]).await.unwrap();
2184 assert_eq!(removed, 0);
2185 }
2186
2187 #[tokio::test]
2188 async fn test_clean_unused_removes_correct_dirs() {
2189 let temp_dir = TempDir::new().unwrap();
2190 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2191
2192 cache.ensure_cache_dir().await.unwrap();
2193
2194 // Create some test directories
2195 let active_dir = temp_dir.path().join("active");
2196 let unused_dir = temp_dir.path().join("unused");
2197 let another_unused = temp_dir.path().join("another_unused");
2198
2199 std::fs::create_dir_all(&active_dir).unwrap();
2200 std::fs::create_dir_all(&unused_dir).unwrap();
2201 std::fs::create_dir_all(&another_unused).unwrap();
2202
2203 // Add some content to verify directories are removed completely
2204 std::fs::write(active_dir.join("file.txt"), "keep").unwrap();
2205 std::fs::write(unused_dir.join("file.txt"), "remove").unwrap();
2206 std::fs::write(another_unused.join("file.txt"), "remove").unwrap();
2207
2208 let removed = cache.clean_unused(&["active".to_string()]).await.unwrap();
2209
2210 assert_eq!(removed, 2);
2211 assert!(active_dir.exists());
2212 assert!(!unused_dir.exists());
2213 assert!(!another_unused.exists());
2214 }
2215
2216 #[tokio::test]
2217 async fn test_clear_all_removes_entire_cache() {
2218 let temp_dir = TempDir::new().unwrap();
2219 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2220
2221 cache.ensure_cache_dir().await.unwrap();
2222
2223 // Create some content
2224 let subdir = temp_dir.path().join("subdir");
2225 std::fs::create_dir_all(&subdir).unwrap();
2226 std::fs::write(subdir.join("file.txt"), "content").unwrap();
2227
2228 assert!(temp_dir.path().exists());
2229 assert!(subdir.exists());
2230
2231 cache.clear_all().await.unwrap();
2232
2233 assert!(!temp_dir.path().exists());
2234 }
2235
2236 #[tokio::test]
2237 async fn test_copy_resource() {
2238 let temp_dir = TempDir::new().unwrap();
2239 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2240
2241 // Create source file
2242 let source_dir = temp_dir.path().join("source");
2243 std::fs::create_dir_all(&source_dir).unwrap();
2244 let source_file = source_dir.join("resource.md");
2245 std::fs::write(&source_file, "# Test Resource\nContent").unwrap();
2246
2247 // Copy resource
2248 let dest = temp_dir.path().join("dest.md");
2249 cache.copy_resource(&source_dir, "resource.md", &dest).await.unwrap();
2250
2251 assert!(dest.exists());
2252 let content = std::fs::read_to_string(&dest).unwrap();
2253 assert_eq!(content, "# Test Resource\nContent");
2254 }
2255
2256 #[tokio::test]
2257 async fn test_copy_resource_nested_path() {
2258 let temp_dir = TempDir::new().unwrap();
2259 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2260
2261 // Create source file in nested directory
2262 let source_dir = temp_dir.path().join("source");
2263 let nested_dir = source_dir.join("nested").join("path");
2264 std::fs::create_dir_all(&nested_dir).unwrap();
2265 let source_file = nested_dir.join("resource.md");
2266 std::fs::write(&source_file, "# Nested Resource").unwrap();
2267
2268 // Copy resource using relative path from source_dir
2269 let dest = temp_dir.path().join("dest.md");
2270 cache.copy_resource(&source_dir, "nested/path/resource.md", &dest).await.unwrap();
2271
2272 assert!(dest.exists());
2273 let content = std::fs::read_to_string(&dest).unwrap();
2274 assert_eq!(content, "# Nested Resource");
2275 }
2276
2277 #[tokio::test]
2278 async fn test_copy_resource_invalid_path() {
2279 let temp_dir = TempDir::new().unwrap();
2280 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2281
2282 let source_dir = temp_dir.path().join("source");
2283 std::fs::create_dir_all(&source_dir).unwrap();
2284
2285 // Try to copy non-existent resource
2286 let dest = temp_dir.path().join("dest.md");
2287 let result = cache.copy_resource(&source_dir, "nonexistent.md", &dest).await;
2288
2289 assert!(result.is_err());
2290 assert!(!dest.exists());
2291 }
2292
2293 #[tokio::test]
2294 async fn test_ensure_cache_dir_idempotent() {
2295 let temp_dir = TempDir::new().unwrap();
2296 let cache_dir = temp_dir.path().join("cache");
2297 let cache = Cache::with_dir(cache_dir.clone()).unwrap();
2298
2299 // Call ensure_cache_dir multiple times
2300 cache.ensure_cache_dir().await.unwrap();
2301 assert!(cache_dir.exists());
2302
2303 cache.ensure_cache_dir().await.unwrap();
2304 assert!(cache_dir.exists());
2305
2306 // Add a file and ensure it's preserved
2307 std::fs::write(cache_dir.join("test.txt"), "content").unwrap();
2308
2309 cache.ensure_cache_dir().await.unwrap();
2310 assert!(cache_dir.exists());
2311 assert!(cache_dir.join("test.txt").exists());
2312 }
2313
2314 #[tokio::test]
2315 async fn test_copy_resource_creates_parent_directories() {
2316 let temp_dir = TempDir::new().unwrap();
2317 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2318
2319 // Create source file
2320 let source_dir = temp_dir.path().join("source");
2321 std::fs::create_dir_all(&source_dir).unwrap();
2322 std::fs::write(source_dir.join("file.md"), "content").unwrap();
2323
2324 // Copy to a destination with non-existent parent directories
2325 let dest = temp_dir.path().join("deep").join("nested").join("dest.md");
2326 cache.copy_resource(&source_dir, "file.md", &dest).await.unwrap();
2327
2328 assert!(dest.exists());
2329 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "content");
2330 }
2331
2332 #[tokio::test]
2333 async fn test_copy_resource_with_output_flag() {
2334 let temp_dir = TempDir::new().unwrap();
2335 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2336
2337 // Create source file
2338 let source_dir = temp_dir.path().join("source");
2339 std::fs::create_dir_all(&source_dir).unwrap();
2340 std::fs::write(source_dir.join("file.md"), "content").unwrap();
2341
2342 // Test with output flag false
2343 let dest1 = temp_dir.path().join("dest1.md");
2344 cache.copy_resource_with_output(&source_dir, "file.md", &dest1, false).await.unwrap();
2345 assert!(dest1.exists());
2346
2347 // Test with output flag true
2348 let dest2 = temp_dir.path().join("dest2.md");
2349 cache.copy_resource_with_output(&source_dir, "file.md", &dest2, true).await.unwrap();
2350 assert!(dest2.exists());
2351 }
2352
2353 #[tokio::test]
2354 async fn test_cache_size_nonexistent_dir() {
2355 let temp_dir = TempDir::new().unwrap();
2356 let nonexistent = temp_dir.path().join("nonexistent");
2357 let cache = Cache::with_dir(nonexistent).unwrap();
2358
2359 let size = cache.get_cache_size().await.unwrap();
2360 assert_eq!(size, 0);
2361 }
2362
2363 #[tokio::test]
2364 async fn test_clear_all_nonexistent_cache() {
2365 let temp_dir = TempDir::new().unwrap();
2366 let nonexistent = temp_dir.path().join("nonexistent");
2367 let cache = Cache::with_dir(nonexistent).unwrap();
2368
2369 // Should not error when clearing non-existent cache
2370 cache.clear_all().await.unwrap();
2371 }
2372
2373 #[tokio::test]
2374 async fn test_clean_unused_with_files_and_dirs() {
2375 let temp_dir = TempDir::new().unwrap();
2376 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2377
2378 cache.ensure_cache_dir().await.unwrap();
2379
2380 // Create directories
2381 std::fs::create_dir_all(temp_dir.path().join("keep")).unwrap();
2382 std::fs::create_dir_all(temp_dir.path().join("remove")).unwrap();
2383
2384 // Create a file (not a directory)
2385 std::fs::write(temp_dir.path().join("file.txt"), "content").unwrap();
2386
2387 let removed = cache.clean_unused(&["keep".to_string()]).await.unwrap();
2388
2389 // Should only remove the "remove" directory, not the file
2390 assert_eq!(removed, 1);
2391 assert!(temp_dir.path().join("keep").exists());
2392 assert!(!temp_dir.path().join("remove").exists());
2393 assert!(temp_dir.path().join("file.txt").exists());
2394 }
2395
2396 #[tokio::test]
2397 async fn test_copy_resource_overwrites_existing() {
2398 let temp_dir = TempDir::new().unwrap();
2399 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2400
2401 // Create source file
2402 let source_dir = temp_dir.path().join("source");
2403 std::fs::create_dir_all(&source_dir).unwrap();
2404 std::fs::write(source_dir.join("file.md"), "new content").unwrap();
2405
2406 // Create existing destination file
2407 let dest = temp_dir.path().join("dest.md");
2408 std::fs::write(&dest, "old content").unwrap();
2409
2410 // Copy should overwrite
2411 cache.copy_resource(&source_dir, "file.md", &dest).await.unwrap();
2412
2413 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "new content");
2414 }
2415
2416 #[tokio::test]
2417 async fn test_copy_resource_special_characters() {
2418 let temp_dir = TempDir::new().unwrap();
2419 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2420
2421 // Create source file with special characters
2422 let source_dir = temp_dir.path().join("source");
2423 std::fs::create_dir_all(&source_dir).unwrap();
2424 let special_name = "file with spaces & special-chars.md";
2425 std::fs::write(source_dir.join(special_name), "content").unwrap();
2426
2427 // Copy resource
2428 let dest = temp_dir.path().join("dest.md");
2429 cache.copy_resource(&source_dir, special_name, &dest).await.unwrap();
2430
2431 assert!(dest.exists());
2432 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "content");
2433 }
2434
2435 #[tokio::test]
2436 async fn test_cache_location_consistency() {
2437 let temp_dir = TempDir::new().unwrap();
2438 let cache_dir = temp_dir.path().join("my_cache");
2439 let cache = Cache::with_dir(cache_dir.clone()).unwrap();
2440
2441 // Get location multiple times
2442 let loc1 = cache.get_cache_location();
2443 let loc2 = cache.get_cache_location();
2444
2445 assert_eq!(loc1, loc2);
2446 assert_eq!(loc1, cache_dir.as_path());
2447 }
2448
2449 #[tokio::test]
2450 async fn test_clean_unused_empty_active_list() {
2451 let temp_dir = TempDir::new().unwrap();
2452 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2453
2454 cache.ensure_cache_dir().await.unwrap();
2455
2456 // Create some directories
2457 std::fs::create_dir_all(temp_dir.path().join("source1")).unwrap();
2458 std::fs::create_dir_all(temp_dir.path().join("source2")).unwrap();
2459
2460 // Empty active list should remove all
2461 let removed = cache.clean_unused(&[]).await.unwrap();
2462
2463 assert_eq!(removed, 2);
2464 assert!(!temp_dir.path().join("source1").exists());
2465 assert!(!temp_dir.path().join("source2").exists());
2466 }
2467
2468 #[tokio::test]
2469 async fn test_copy_resource_with_relative_paths() {
2470 let temp_dir = TempDir::new().unwrap();
2471 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2472
2473 // Create source with subdirectories
2474 let source_dir = temp_dir.path().join("source");
2475 let sub_dir = source_dir.join("agents");
2476 std::fs::create_dir_all(&sub_dir).unwrap();
2477 std::fs::write(sub_dir.join("helper.md"), "# Helper Agent").unwrap();
2478
2479 // Copy using relative path
2480 let dest = temp_dir.path().join("my-agent.md");
2481 cache.copy_resource(&source_dir, "agents/helper.md", &dest).await.unwrap();
2482
2483 assert!(dest.exists());
2484 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "# Helper Agent");
2485 }
2486
2487 #[tokio::test]
2488 async fn test_cache_size_with_subdirectories() {
2489 let temp_dir = TempDir::new().unwrap();
2490 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2491
2492 cache.ensure_cache_dir().await.unwrap();
2493
2494 // Create nested structure with files
2495 let sub1 = temp_dir.path().join("sub1");
2496 let sub2 = sub1.join("sub2");
2497 std::fs::create_dir_all(&sub2).unwrap();
2498
2499 std::fs::write(temp_dir.path().join("file1.txt"), "12345").unwrap(); // 5 bytes
2500 std::fs::write(sub1.join("file2.txt"), "1234567890").unwrap(); // 10 bytes
2501 std::fs::write(sub2.join("file3.txt"), "abc").unwrap(); // 3 bytes
2502
2503 let size = cache.get_cache_size().await.unwrap();
2504 assert_eq!(size, 18); // 5 + 10 + 3
2505 }
2506}