agpm_cli/cache/mod.rs
1//! Git repository cache management with worktree-based parallel operations
2//!
3//! This module provides a sophisticated caching system for Git repositories that enables
4//! safe parallel resource installation through Git worktrees. The cache system has been
5//! redesigned for optimal concurrency, simplified architecture, and enhanced performance
6//! in AGPM v0.3.0.
7//!
8//! # Architecture Overview
9//!
10//! The cache system implements a multi-layered architecture:
11//! - [`Cache`] struct: Core repository management and worktree orchestration
12//! - [`CacheLock`]: File-based locking for process-safe concurrent access
13//! - `WorktreeState`: Instance-level caching for worktree lifecycle management
14//! - Bare repositories: Optimized Git storage for efficient worktree creation
15//!
16//! # Platform-Specific Cache Locations
17//!
18//! The cache follows platform conventions for optimal performance:
19//! - **Linux/macOS**: `~/.agpm/cache/` (following XDG standards)
20//! - **Windows**: `%LOCALAPPDATA%\agpm\cache\` (using Windows cache directory)
21//! - **Environment Override**: Set `AGPM_CACHE_DIR` for custom locations
22//!
23//! # Cache Directory Structure
24//!
25//! The cache is organized for optimal parallel access patterns:
26//! ```text
27//! ~/.agpm/cache/
28//! ├── sources/ # Bare repositories optimized for worktrees
29//! │ ├── github_owner_repo.git/ # Bare repo with all Git objects
30//! │ └── gitlab_org_project.git/ # URL-parsed directory naming
31//! ├── worktrees/ # SHA-based worktrees for maximum deduplication
32//! │ ├── github_owner_repo_abc12345/ # First 8 chars of commit SHA
33//! │ ├── github_owner_repo_def67890/ # Each unique commit gets one worktree
34//! │ ├── .state.json # Persistent worktree registry
35//! │ └── github_owner_repo_456789ab/ # Multiple refs to same SHA share worktree
36//! └── .locks/ # Fine-grained locking infrastructure
37//! ├── github_owner_repo.lock # Repository-level locks
38//! └── worktree-owner_repo-v1.lock # Worktree creation locks
39//! ```
40//!
41//! # Enhanced Concurrency Architecture
42//!
43//! The v0.3.2+ cache implements SHA-based worktree optimization with advanced concurrency:
44//! - **SHA-based deduplication**: Worktrees keyed by commit SHA, not version reference
45//! - **Centralized resolution**: `VersionResolver` handles batch SHA resolution upfront
46//! - **Maximum reuse**: Multiple tags/branches pointing to same commit share one worktree
47//! - **Instance-level caching**: `WorktreeState` tracks creation across threads
48//! - **Per-worktree file locking**: Fine-grained locks prevent creation conflicts
49//! - **Direct parallelism control**: `--max-parallel` flag controls concurrency
50//! - **Command-instance fetch caching**: Single fetch per repository per command
51//! - **Atomic state transitions**: Pending → Ready state coordination
52//!
53//! ## Locking Strategy
54//!
55//! ```text
56//! Process A: acquire("source1") ───┐
57//! ├─── BLOCKS: same source
58//! Process B: acquire("source1") ───┘
59//!
60//! Process C: acquire("source2") ───── CONCURRENT: different source
61//! ```
62//!
63//! # Cache Operations
64//!
65//! ## Repository Management
66//! - **Clone**: Initial repository cloning from remote URLs
67//! - **Update**: Fetch latest changes from remote (git fetch)
68//! - **Checkout**: Switch to specific versions (tags, branches, commits)
69//! - **Cleanup**: Remove unused repositories to reclaim disk space
70//!
71//! ## Resource Installation
72//! - **Copy-based**: Files copied from cache to project directories
73//! - **Path resolution**: Handles relative paths within repositories
74//! - **Directory creation**: Automatically creates parent directories
75//! - **Overwrite safety**: Replaces existing files atomically
76//!
77//! # Performance Characteristics
78//!
79//! The cache is optimized for common AGPM workflows:
80//! - **First install**: Clone repository once, reuse for all resources
81//! - **Subsequent installs**: Copy from local cache (fast file operations)
82//! - **Version switching**: Git checkout within cached repository
83//! - **Parallel operations**: Multiple sources can be processed concurrently
84//!
85//! ## Disk Space Management
86//!
87//! - **Size calculation**: Recursive directory size calculation
88//! - **Unused cleanup**: Remove repositories no longer referenced
89//! - **Complete cleanup**: Clear entire cache when needed
90//! - **Selective removal**: Keep active sources, remove only unused ones
91//!
92//! # Error Handling and Recovery
93//!
94//! The cache provides comprehensive error handling:
95//! - **Lock timeouts**: Graceful handling of concurrent access
96//! - **Clone failures**: Network and authentication error reporting
97//! - **Version errors**: Clear messages for invalid tags/branches/commits
98//! - **File system errors**: Detailed context for permission and space issues
99//!
100//! # Security Considerations
101//!
102//! - **Path validation**: Prevents directory traversal attacks
103//! - **Lock file isolation**: Prevents lock file manipulation
104//! - **Safe file operations**: Atomic operations prevent corruption
105//! - **Permission handling**: Respects file system permissions
106//!
107//! # Usage Examples
108//!
109//! ## Basic Cache Operations
110//!
111//! ```rust,no_run
112//! use agpm_cli::cache::Cache;
113//! use std::path::PathBuf;
114//!
115//! # async fn example() -> anyhow::Result<()> {
116//! // Initialize cache with default location
117//! let cache = Cache::new()?;
118//!
119//! // Get or clone a source repository
120//! let repo_path = cache.get_or_clone_source(
121//! "community",
122//! "https://github.com/example/agpm-community.git",
123//! Some("v1.0.0") // Specific version
124//! ).await?;
125//!
126//! // Copy a resource from cache to project
127//! cache.copy_resource(
128//! &repo_path,
129//! "agents/helper.md", // Source path in repository
130//! &PathBuf::from("./agents/helper.md") // Destination in project
131//! ).await?;
132//! # Ok(())
133//! # }
134//! ```
135//!
136//! ## Cache Maintenance
137//!
138//! ```rust,no_run
139//! use agpm_cli::cache::Cache;
140//!
141//! # #[tokio::main]
142//! # async fn main() -> anyhow::Result<()> {
143//! let cache = Cache::new()?;
144//!
145//! // Check cache size
146//! let size_bytes = cache.get_cache_size().await?;
147//! println!("Cache size: {} MB", size_bytes / 1024 / 1024);
148//!
149//! // Clean unused repositories
150//! let active_sources = vec!["community".to_string(), "work".to_string()];
151//! let removed_count = cache.clean_unused(&active_sources).await?;
152//! println!("Removed {} unused repositories", removed_count);
153//!
154//! // Complete cache cleanup
155//! cache.clear_all().await?;
156//! # Ok(())
157//! # }
158//! ```
159//!
160//! ## Custom Cache Location
161//!
162//! ```rust,no_run
163//! use agpm_cli::cache::Cache;
164//! use std::path::PathBuf;
165//!
166//! # fn custom_location() -> anyhow::Result<()> {
167//! // Use custom cache directory (useful for testing or special setups)
168//! let custom_dir = PathBuf::from("/tmp/my-agpm-cache");
169//! let cache = Cache::with_dir(custom_dir)?;
170//!
171//! println!("Using cache at: {}", cache.get_cache_location().display());
172//! # Ok(())
173//! # }
174//! ```
175//!
176//! # Integration with AGPM Workflow
177//!
178//! The cache module integrates seamlessly with AGPM's dependency management:
179//! 1. **Manifest parsing**: Source URLs extracted from `agpm.toml`
180//! 2. **Dependency resolution**: Version constraints resolved to specific commits
181//! 3. **Cache population**: Repositories cloned and checked out as needed
182//! 4. **Resource installation**: Files copied from cache to project directories
183//! 5. **Lockfile generation**: Installed resources tracked in `agpm.lock`
184//!
185//! See [`crate::manifest`] for manifest handling and [`crate::lockfile`] for
186//! lockfile management.
187
188use crate::core::error::AgpmError;
189use crate::git::GitRepo;
190use crate::git::command_builder::GitCommand;
191use crate::utils::fs;
192use crate::utils::security::validate_path_security;
193use anyhow::{Context, Result};
194use dashmap::DashMap;
195use serde::{Deserialize, Serialize};
196use std::collections::{HashMap, HashSet};
197use std::path::{Path, PathBuf};
198use std::sync::Arc;
199use std::time::{Duration, SystemTime, UNIX_EPOCH};
200use tokio::fs as async_fs;
201use tokio::sync::{Mutex, RwLock};
202
203// Concurrency Architecture:
204// - Direct control approach: Command parallelism (--max-parallel) + per-worktree file locking
205// - Instance-level caching: Worktrees and fetch operations cached per Cache instance
206// - Command-level control: --max-parallel flag controls dependency processing parallelism
207// - Fetch caching: Network operations cached for 5 minutes to reduce redundancy
208
209/// State of a worktree in the instance-level cache for concurrent coordination.
210///
211/// This enum implements a sophisticated state machine for worktree lifecycle management
212/// that enables safe concurrent access across multiple threads without race conditions.
213/// The cache uses this state to coordinate between threads that might request the same
214/// worktree simultaneously, eliminating the need for global synchronization bottlenecks.
215///
216/// # State Transitions
217///
218/// - **Initial**: No entry exists in cache (implicit state)
219/// - [`Pending`](WorktreeState::Pending): One thread is creating the worktree
220/// - [`Ready`](WorktreeState::Ready): Worktree exists and is ready for all threads
221///
222/// # Concurrency Coordination Pattern
223///
224/// The worktree creation process follows this coordinated pattern:
225/// 1. **Reservation**: First thread reserves slot by setting state to `Pending`
226/// 2. **Creation**: Reserved thread performs actual worktree creation with file lock
227/// 3. **Notification**: Creator updates state to `Ready(path)` when complete
228/// 4. **Reuse**: Subsequent threads immediately use the ready worktree path
229/// 5. **Validation**: All threads verify worktree still exists before use
230///
231/// # Cache Key Format
232///
233/// Worktrees are uniquely identified by composite keys:
234/// ```text
235/// "{cache_dir_hash}:{owner}_{repo}:{version}"
236/// ```
237///
238/// Components:
239/// - `cache_dir_hash`: First 8 hex chars of cache directory path hash
240/// - `owner_repo`: Parsed from Git URL (e.g., "`github_owner_project`")
241/// - `version`: Git reference (tag, branch, commit, or "HEAD")
242///
243/// This format ensures isolation between:
244/// - Different cache instances (via hash)
245/// - Different repositories (via owner/repo)
246/// - Different versions (via version string)
247///
248/// # Memory Management
249///
250/// The instance-level cache persists for the lifetime of the `Cache` instance,
251/// but worktrees are validated on each access to handle external deletion.
252#[derive(Debug, Clone)]
253enum WorktreeState {
254 /// Another thread is currently creating this worktree.
255 ///
256 /// When threads encounter this state, they should wait briefly and retry
257 /// rather than attempting concurrent worktree creation which would fail.
258 Pending,
259
260 /// Worktree is fully created and ready to use.
261 ///
262 /// The `PathBuf` contains the filesystem path to the working directory.
263 /// This path should be validated before use as the worktree may have been
264 /// externally deleted.
265 Ready(PathBuf),
266}
267
268#[derive(Debug, Clone, Serialize, Deserialize, Default)]
269struct WorktreeRegistry {
270 entries: HashMap<String, WorktreeRecord>,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
274struct WorktreeRecord {
275 source: String,
276 version: String,
277 path: PathBuf,
278 last_used: u64,
279}
280
281impl WorktreeRegistry {
282 fn load(path: &Path) -> Self {
283 match std::fs::read(path) {
284 Ok(data) => serde_json::from_slice(&data).unwrap_or_default(),
285 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Self::default(),
286 Err(err) => {
287 tracing::warn!("Failed to load worktree registry from {}: {}", path.display(), err);
288 Self::default()
289 }
290 }
291 }
292
293 fn update(&mut self, key: String, source: String, version: String, path: PathBuf) {
294 let timestamp = SystemTime::now()
295 .duration_since(UNIX_EPOCH)
296 .unwrap_or_else(|_| Duration::from_secs(0))
297 .as_secs();
298
299 self.entries.insert(
300 key,
301 WorktreeRecord {
302 source,
303 version,
304 path,
305 last_used: timestamp,
306 },
307 );
308 }
309
310 fn remove_by_path(&mut self, target: &Path) -> bool {
311 if let Some(key) = self.entries.iter().find_map(|(k, record)| {
312 if record.path == target {
313 Some(k.clone())
314 } else {
315 None
316 }
317 }) {
318 self.entries.remove(&key);
319 true
320 } else {
321 false
322 }
323 }
324
325 async fn persist(&self, path: &Path) -> Result<()> {
326 if let Some(parent) = path.parent() {
327 async_fs::create_dir_all(parent).await?;
328 }
329
330 let data = serde_json::to_vec_pretty(self)?;
331 async_fs::write(path, data).await?;
332 Ok(())
333 }
334}
335
336/// File-based locking mechanism for cache operations
337///
338/// This module provides thread-safe and process-safe locking for cache
339/// operations through OS-level file locks, ensuring data consistency
340/// when multiple AGPM processes access the same cache directory.
341pub mod lock;
342pub use lock::CacheLock;
343
344/// Git repository cache for efficient resource management
345///
346/// The `Cache` struct provides the primary interface for managing Git repository
347/// caching in AGPM. It handles repository cloning, updating, version management,
348/// and resource file copying operations.
349///
350/// # Thread Safety
351///
352/// While the `Cache` struct itself is not thread-safe (not `Send + Sync`),
353/// multiple instances can safely operate on the same cache directory through
354/// the file-based locking mechanism provided by [`CacheLock`].
355///
356/// # Platform Compatibility
357///
358/// The cache automatically handles platform-specific differences:
359/// - **Path separators**: Uses [`std::path`] for cross-platform compatibility
360/// - **Cache location**: Follows platform conventions for app data storage
361/// - **File locking**: Uses [`fs4`] crate for cross-platform file locking
362/// - **Directory creation**: Handles permissions and long paths on Windows
363///
364/// # Examples
365///
366/// Create a cache with default platform-specific location:
367///
368/// ```rust,no_run
369/// use agpm_cli::cache::Cache;
370///
371/// # fn example() -> anyhow::Result<()> {
372/// let cache = Cache::new()?;
373/// println!("Cache location: {}", cache.get_cache_location().display());
374/// # Ok(())
375/// # }
376/// ```
377///
378/// Create a cache with custom location (useful for testing):
379///
380/// ```rust,no_run
381/// use agpm_cli::cache::Cache;
382/// use std::path::PathBuf;
383///
384/// # fn example() -> anyhow::Result<()> {
385/// let custom_dir = PathBuf::from("/tmp/test-cache");
386/// let cache = Cache::with_dir(custom_dir)?;
387/// # Ok(())
388/// # }
389/// ```
390pub struct Cache {
391 /// The root directory where all cached repositories are stored
392 cache_dir: PathBuf,
393
394 /// Instance-level cache for worktrees to avoid redundant checkouts.
395 ///
396 /// This cache maps worktree identifiers to their creation state, enabling
397 /// safe concurrent access. Multiple threads can request the same worktree
398 /// without conflicts - the first thread creates it while others wait.
399 ///
400 /// **Key format**: `"{cache_dir_hash}:{owner}_{repo}:{version}"`
401 ///
402 /// The cache directory hash ensures isolation between different Cache instances,
403 /// preventing conflicts when multiple instances operate on different cache roots.
404 worktree_cache: Arc<RwLock<HashMap<String, WorktreeState>>>,
405
406 /// Per-repository async locks that serialize fetch operations across
407 /// concurrent tasks. This prevents redundant `git fetch` runs when
408 /// multiple dependencies target the same repository simultaneously.
409 fetch_locks: Arc<DashMap<PathBuf, Arc<Mutex<()>>>>,
410
411 /// Command-instance fetch cache to track which repositories have been fetched
412 /// during this command execution. This ensures we only fetch once per repository
413 /// per command instance, dramatically reducing network operations for multi-dependency
414 /// installations.
415 ///
416 /// Contains bare repository paths that have been fetched in this command instance.
417 /// Works in conjunction with `VersionResolver` to minimize Git network operations.
418 fetched_repos: Arc<RwLock<HashSet<PathBuf>>>,
419
420 /// Persistent registry of worktrees stored on disk for reuse across
421 /// AGPM runs. Tracks last-used timestamps and paths so we can validate
422 /// and clean up cached worktrees without recreating them unnecessarily.
423 worktree_registry: Arc<Mutex<WorktreeRegistry>>,
424}
425
426impl Clone for Cache {
427 fn clone(&self) -> Self {
428 Self {
429 cache_dir: self.cache_dir.clone(),
430 worktree_cache: Arc::clone(&self.worktree_cache),
431 fetch_locks: Arc::clone(&self.fetch_locks),
432 fetched_repos: Arc::clone(&self.fetched_repos),
433 worktree_registry: Arc::clone(&self.worktree_registry),
434 }
435 }
436}
437
438impl Cache {
439 fn registry_path_for(cache_dir: &Path) -> PathBuf {
440 cache_dir.join("worktrees").join(".state.json")
441 }
442
443 fn registry_path(&self) -> PathBuf {
444 Self::registry_path_for(&self.cache_dir)
445 }
446
447 /// Verify that a worktree directory is fully accessible with actual content.
448 ///
449 /// This function ensures that a newly created worktree is fully accessible
450 /// before it's marked as ready. This prevents race conditions in parallel
451 /// operations where `git worktree add` returns but the filesystem hasn't
452 /// finished writing all files yet.
453 ///
454 /// # Implementation
455 ///
456 /// Uses tokio-retry with exponential backoff to handle filesystem sync delays.
457 ///
458 /// Verification uses `git diff-index --quiet HEAD` which provides a comprehensive
459 /// check that:
460 /// - The worktree directory and .git marker exist
461 /// - The git index is readable
462 /// - ALL files from the commit are present and match HEAD
463 /// - Git recognizes the worktree as valid
464 ///
465 /// This single command provides stronger guarantees than multi-level checks,
466 /// as it verifies complete checkout rather than partial availability.
467 ///
468 /// # Parameters
469 ///
470 /// * `worktree_path` - Path to the worktree directory to verify
471 /// * `sha` - The commit SHA being checked out (for logging)
472 ///
473 /// # Errors
474 ///
475 /// Returns an error if the worktree is not accessible after all retries.
476 async fn verify_worktree_accessible(worktree_path: &Path, sha: &str) -> Result<()> {
477 use tokio_retry::Retry;
478 use tokio_retry::strategy::{ExponentialBackoff, jitter};
479
480 // Retry strategy with jitter for concurrent operations
481 let retry_strategy = ExponentialBackoff::from_millis(50)
482 .max_delay(std::time::Duration::from_secs(2))
483 .take(10)
484 .map(jitter);
485
486 let worktree_path = worktree_path.to_path_buf();
487 let sha_short = &sha[..8];
488
489 tracing::debug!(
490 target: "git::worktree",
491 "Verifying worktree at {} for SHA {}",
492 worktree_path.display(),
493 sha_short
494 );
495
496 Retry::spawn(retry_strategy, || async {
497 // Verify working tree matches HEAD (all files checked out)
498 // This verifies the worktree structure is valid and all files are present.
499 // Cache coherency (making files visible to the parent process) is now
500 // handled at the point of actual file read in installer.rs and resolver/mod.rs
501 // via read_with_cache_retry functions.
502 crate::git::command_builder::GitCommand::new()
503 .args(["diff-index", "--quiet", "HEAD"])
504 .current_dir(&worktree_path)
505 .execute_success()
506 .await
507 .map_err(|_| "Working tree doesn't match HEAD (checkout incomplete)".to_string())?;
508
509 tracing::debug!(
510 target: "git::worktree",
511 "Worktree verification passed for {}",
512 worktree_path.display()
513 );
514
515 Ok::<(), String>(())
516 })
517 .await
518 .map_err(|e| {
519 anyhow::anyhow!(
520 "Worktree not fully initialized after retries: {} @ {} - {}",
521 worktree_path.display(),
522 sha_short,
523 e
524 )
525 })
526 }
527
528 async fn record_worktree_usage(
529 &self,
530 registry_key: &str,
531 source_name: &str,
532 version_key: &str,
533 worktree_path: &Path,
534 ) -> Result<()> {
535 let mut registry = self.worktree_registry.lock().await;
536 registry.update(
537 registry_key.to_string(),
538 source_name.to_string(),
539 version_key.to_string(),
540 worktree_path.to_path_buf(),
541 );
542 registry.persist(&self.registry_path()).await?;
543 Ok(())
544 }
545
546 async fn remove_worktree_record_by_path(&self, worktree_path: &Path) -> Result<()> {
547 let mut registry = self.worktree_registry.lock().await;
548 if registry.remove_by_path(worktree_path) {
549 registry.persist(&self.registry_path()).await?;
550 }
551 Ok(())
552 }
553
554 async fn configure_connection_pooling(path: &Path) -> Result<()> {
555 let commands = [
556 ("http.version", "HTTP/2"),
557 ("http.postBuffer", "524288000"),
558 ("core.compression", "0"),
559 ];
560
561 for (key, value) in commands {
562 GitCommand::new()
563 .args(["config", key, value])
564 .current_dir(path)
565 .execute_success()
566 .await
567 .ok();
568 }
569
570 Ok(())
571 }
572
573 /// Creates a new `Cache` instance using the default platform-specific cache directory.
574 ///
575 /// The cache directory is determined based on the current platform:
576 /// - **Linux/macOS**: `~/.agpm/cache/`
577 /// - **Windows**: `%LOCALAPPDATA%\agpm\cache\`
578 ///
579 /// # Environment Variable Override
580 ///
581 /// The cache location can be overridden by setting the `AGPM_CACHE_DIR`
582 /// environment variable. This is particularly useful for:
583 /// - Testing with isolated cache directories
584 /// - CI/CD environments with specific cache locations
585 /// - Custom deployment scenarios
586 ///
587 /// # Errors
588 ///
589 /// Returns an error if:
590 /// - Unable to determine the home/local data directory
591 /// - The resolved path is invalid or inaccessible
592 ///
593 /// # Examples
594 ///
595 /// ```rust,no_run
596 /// use agpm_cli::cache::Cache;
597 ///
598 /// # fn example() -> anyhow::Result<()> {
599 /// let cache = Cache::new()?;
600 /// println!("Using cache at: {}", cache.get_cache_location().display());
601 /// # Ok(())
602 /// # }
603 /// ```
604 pub fn new() -> Result<Self> {
605 let cache_dir = crate::config::get_cache_dir()?;
606 let registry_path = Self::registry_path_for(&cache_dir);
607 let registry = WorktreeRegistry::load(®istry_path);
608 Ok(Self {
609 cache_dir,
610 worktree_cache: Arc::new(RwLock::new(HashMap::new())),
611 fetch_locks: Arc::new(DashMap::new()),
612 fetched_repos: Arc::new(RwLock::new(HashSet::new())),
613 worktree_registry: Arc::new(Mutex::new(registry)),
614 })
615 }
616
617 /// Creates a new `Cache` instance using a custom cache directory.
618 ///
619 /// This constructor allows you to specify exactly where the cache should be
620 /// stored, overriding platform defaults. The directory will be created if
621 /// it doesn't exist when cache operations are performed.
622 ///
623 /// # Use Cases
624 ///
625 /// - **Testing**: Use temporary directories for isolated test environments
626 /// - **Development**: Use project-local cache directories
627 /// - **Deployment**: Use specific paths in containerized environments
628 /// - **Multi-user systems**: Use user-specific cache locations
629 ///
630 /// # Parameters
631 ///
632 /// * `cache_dir` - The absolute path where cache data should be stored
633 ///
634 /// # Examples
635 ///
636 /// ```rust,no_run
637 /// use agpm_cli::cache::Cache;
638 /// use std::path::PathBuf;
639 ///
640 /// # fn example() -> anyhow::Result<()> {
641 /// // Use a project-local cache
642 /// let project_cache = Cache::with_dir(PathBuf::from("./cache"))?;
643 ///
644 /// // Use a system-wide cache
645 /// let system_cache = Cache::with_dir(PathBuf::from("/var/cache/agpm"))?;
646 ///
647 /// // Use a temporary cache for testing
648 /// let temp_cache = Cache::with_dir(std::env::temp_dir().join("agpm-test"))?;
649 /// # Ok(())
650 /// # }
651 /// ```
652 pub fn with_dir(cache_dir: PathBuf) -> Result<Self> {
653 let registry_path = Self::registry_path_for(&cache_dir);
654 let registry = WorktreeRegistry::load(®istry_path);
655 Ok(Self {
656 cache_dir,
657 worktree_cache: Arc::new(RwLock::new(HashMap::new())),
658 fetch_locks: Arc::new(DashMap::new()),
659 fetched_repos: Arc::new(RwLock::new(HashSet::new())),
660 worktree_registry: Arc::new(Mutex::new(registry)),
661 })
662 }
663
664 /// Ensures the cache directory exists, creating it if necessary.
665 ///
666 /// This method creates the cache directory and all necessary parent directories
667 /// if they don't already exist. It's safe to call multiple times - it will
668 /// not error if the directory already exists.
669 ///
670 /// # Platform Considerations
671 ///
672 /// - **Windows**: Handles long path names (>260 characters) correctly
673 /// - **Unix**: Respects umask settings for directory permissions
674 /// - **All platforms**: Creates intermediate directories as needed
675 ///
676 /// # Errors
677 ///
678 /// Returns an error if:
679 /// - Insufficient permissions to create the directory
680 /// - Disk space is exhausted
681 /// - Path contains invalid characters for the platform
682 /// - A file exists at the target path (not a directory)
683 ///
684 /// # Examples
685 ///
686 /// ```rust,no_run
687 /// use agpm_cli::cache::Cache;
688 ///
689 /// # async fn example() -> anyhow::Result<()> {
690 /// let cache = Cache::new()?;
691 ///
692 /// // Ensure cache directory exists before operations
693 /// cache.ensure_cache_dir().await?;
694 ///
695 /// // Safe to call multiple times
696 /// cache.ensure_cache_dir().await?; // No error
697 /// # Ok(())
698 /// # }
699 /// ```
700 pub async fn ensure_cache_dir(&self) -> Result<()> {
701 if !self.cache_dir.exists() {
702 async_fs::create_dir_all(&self.cache_dir).await.with_context(|| {
703 format!("Failed to create cache directory at {}", self.cache_dir.display())
704 })?;
705 }
706 Ok(())
707 }
708
709 /// Returns the path to the cache directory.
710 ///
711 /// This is useful for operations that need direct access to the cache directory,
712 /// such as lock file cleanup or cache size calculations.
713 ///
714 /// # Example
715 ///
716 /// ```rust,no_run
717 /// use agpm_cli::cache::Cache;
718 ///
719 /// # fn example() -> anyhow::Result<()> {
720 /// let cache = Cache::new()?;
721 /// let cache_dir = cache.cache_dir();
722 /// println!("Cache directory: {}", cache_dir.display());
723 /// # Ok(())
724 /// # }
725 /// ```
726 pub fn cache_dir(&self) -> &Path {
727 &self.cache_dir
728 }
729
730 /// Get the worktree path for a specific URL and commit SHA.
731 ///
732 /// This method constructs the expected worktree directory path based on the cache's
733 /// naming scheme. It does NOT check if the worktree exists or create it - use
734 /// `get_or_create_worktree_for_sha` for that.
735 ///
736 /// # Arguments
737 ///
738 /// * `url` - Git repository URL
739 /// * `sha` - Full commit SHA (will be shortened to first 8 characters)
740 ///
741 /// # Returns
742 ///
743 /// Path to the worktree directory (may not exist yet)
744 ///
745 /// # Example
746 ///
747 /// ```rust,no_run
748 /// use agpm_cli::cache::Cache;
749 ///
750 /// # fn example() -> anyhow::Result<()> {
751 /// let cache = Cache::new()?;
752 /// let path = cache.get_worktree_path(
753 /// "https://github.com/owner/repo.git",
754 /// "abc1234567890def"
755 /// )?;
756 /// println!("Worktree path: {}", path.display());
757 /// # Ok(())
758 /// # }
759 /// ```
760 pub fn get_worktree_path(&self, url: &str, sha: &str) -> Result<PathBuf> {
761 let (owner, repo) = crate::git::parse_git_url(url)
762 .map_err(|e| anyhow::anyhow!("Invalid Git URL: {}", e))?;
763 let sha_short = &sha[..8.min(sha.len())];
764 Ok(self.cache_dir.join("worktrees").join(format!("{owner}_{repo}_{sha_short}")))
765 }
766
767 /// Gets or clones a source repository, ensuring it's available in the cache.
768 ///
769 /// This is the primary method for source repository management. It handles both
770 /// initial cloning of new repositories and updating existing cached repositories.
771 /// The operation is atomic and thread-safe through file-based locking.
772 ///
773 /// # Operation Flow
774 ///
775 /// 1. **Lock acquisition**: Acquires exclusive lock for the source name
776 /// 2. **Directory check**: Determines if repository already exists in cache
777 /// 3. **Clone or update**: Either clones new repository or fetches updates
778 /// 4. **Version checkout**: Switches to requested version if specified
779 /// 5. **Path return**: Returns path to cached repository
780 ///
781 /// # Concurrency Behavior
782 ///
783 /// - **Same source**: Concurrent calls with the same `name` will block
784 /// - **Different sources**: Concurrent calls with different `name` run in parallel
785 /// - **Process safety**: Safe across multiple AGPM processes
786 ///
787 /// # Version Handling
788 ///
789 /// The `version` parameter accepts various Git reference types:
790 /// - **Tags**: `"v1.0.0"`, `"release-2023"` (most common for releases)
791 /// - **Branches**: `"main"`, `"develop"`, `"feature/new-agents"`
792 /// - **Commits**: `"abc123def"` (full or short SHA hashes)
793 /// - **None**: Uses repository's default branch (typically `main` or `master`)
794 ///
795 /// # Parameters
796 ///
797 /// * `name` - Unique source identifier (used for cache directory and locking)
798 /// * `url` - Git repository URL (HTTPS, SSH, or local paths)
799 /// * `version` - Optional version constraint (tag, branch, or commit)
800 ///
801 /// # Returns
802 ///
803 /// Returns the [`PathBuf`] to the cached repository directory, which contains
804 /// the full Git repository structure and can be used for resource file access.
805 ///
806 /// # Errors
807 ///
808 /// Returns an error if:
809 /// - **Network issues**: Unable to clone or fetch from remote repository
810 /// - **Authentication**: Invalid credentials for private repositories
811 /// - **Version issues**: Specified version doesn't exist in repository
812 /// - **Lock timeout**: Unable to acquire exclusive lock (rare)
813 /// - **File system**: Permission or disk space issues
814 /// - **Git errors**: Repository corruption or invalid Git operations
815 ///
816 /// # Performance Notes
817 ///
818 /// - **First call**: Performs full repository clone (slower)
819 /// - **Subsequent calls**: Only fetches updates (faster)
820 /// - **Version switching**: Uses Git checkout (very fast)
821 /// - **Parallel sources**: Multiple sources processed concurrently
822 ///
823 /// # Examples
824 ///
825 /// Clone a public repository with specific version:
826 ///
827 /// ```rust,no_run
828 /// use agpm_cli::cache::Cache;
829 ///
830 /// # async fn example() -> anyhow::Result<()> {
831 /// let cache = Cache::new()?;
832 ///
833 /// let repo_path = cache.get_or_clone_source(
834 /// "community",
835 /// "https://github.com/example/agpm-community.git",
836 /// Some("v1.2.0")
837 /// ).await?;
838 ///
839 /// println!("Repository cached at: {}", repo_path.display());
840 /// # Ok(())
841 /// # }
842 /// ```
843 ///
844 /// Use latest version from default branch:
845 ///
846 /// ```rust,no_run
847 /// use agpm_cli::cache::Cache;
848 ///
849 /// # async fn example() -> anyhow::Result<()> {
850 /// let cache = Cache::new()?;
851 ///
852 /// let repo_path = cache.get_or_clone_source(
853 /// "dev-tools",
854 /// "https://github.com/myorg/dev-tools.git",
855 /// None // Use default branch
856 /// ).await?;
857 /// # Ok(())
858 /// # }
859 /// ```
860 ///
861 /// Work with development branch:
862 ///
863 /// ```rust,no_run
864 /// use agpm_cli::cache::Cache;
865 ///
866 /// # async fn example() -> anyhow::Result<()> {
867 /// let cache = Cache::new()?;
868 ///
869 /// let repo_path = cache.get_or_clone_source(
870 /// "experimental",
871 /// "https://github.com/myorg/experimental.git",
872 /// Some("develop")
873 /// ).await?;
874 /// # Ok(())
875 /// # }
876 /// ```
877 pub async fn get_or_clone_source(
878 &self,
879 name: &str,
880 url: &str,
881 version: Option<&str>,
882 ) -> Result<PathBuf> {
883 self.get_or_clone_source_impl(name, url, version).await
884 }
885
886 /// Clean up a worktree after use (fast version).
887 ///
888 /// This just removes the worktree directory without calling git.
889 /// Git will clean up its internal references when `git worktree prune` is called.
890 ///
891 /// # Parameters
892 ///
893 /// * `worktree_path` - The path to the worktree to clean up
894 pub async fn cleanup_worktree(&self, worktree_path: &Path) -> Result<()> {
895 // Just remove the directory - don't call git worktree remove
896 // This is much faster and git will clean up its references later
897 if worktree_path.exists() {
898 tokio::fs::remove_dir_all(worktree_path).await.with_context(|| {
899 format!("Failed to remove worktree directory: {worktree_path:?}")
900 })?;
901 self.remove_worktree_record_by_path(worktree_path).await?;
902 }
903 Ok(())
904 }
905
906 /// Clean up all worktrees in the cache.
907 ///
908 /// This is useful for cleaning up after batch operations or on cache clear.
909 pub async fn cleanup_all_worktrees(&self) -> Result<()> {
910 let worktrees_dir = self.cache_dir.join("worktrees");
911
912 if !worktrees_dir.exists() {
913 return Ok(());
914 }
915
916 // Remove the entire worktrees directory
917 tokio::fs::remove_dir_all(&worktrees_dir)
918 .await
919 .with_context(|| "Failed to clean up worktrees")?;
920
921 // Also prune worktree references from all bare repos
922 let sources_dir = self.cache_dir.join("sources");
923 if sources_dir.exists() {
924 let mut entries = tokio::fs::read_dir(&sources_dir).await?;
925 while let Some(entry) = entries.next_entry().await? {
926 let path = entry.path();
927 if path.extension().and_then(|s| s.to_str()) == Some("git") {
928 let bare_repo = GitRepo::new(&path);
929 bare_repo.prune_worktrees().await.ok();
930 }
931 }
932 }
933
934 {
935 let mut registry = self.worktree_registry.lock().await;
936 if !registry.entries.is_empty() {
937 registry.entries.clear();
938 registry.persist(&self.registry_path()).await?;
939 }
940 }
941
942 Ok(())
943 }
944
945 /// Get or create a worktree for a specific commit SHA.
946 ///
947 /// This method is the cornerstone of AGPM's optimized dependency resolution.
948 /// By using commit SHAs as the primary key for worktrees, we ensure:
949 /// - Maximum worktree reuse (same SHA = same worktree)
950 /// - Deterministic installations (SHA uniquely identifies content)
951 /// - Reduced disk usage (no duplicate worktrees for same commit)
952 ///
953 /// # SHA-Based Caching Strategy
954 ///
955 /// Unlike version-based worktrees that create separate directories for
956 /// "v1.0.0" and "release-1.0" even if they point to the same commit,
957 /// SHA-based worktrees ensure a single worktree per unique commit.
958 ///
959 /// # Parameters
960 ///
961 /// * `name` - Source name from manifest
962 /// * `url` - Git repository URL
963 /// * `sha` - Full 40-character commit SHA (must be pre-resolved)
964 /// * `context` - Optional context for logging
965 ///
966 /// # Returns
967 ///
968 /// Path to the worktree containing the exact commit specified by SHA.
969 ///
970 /// # Example
971 ///
972 /// ```no_run
973 /// # use agpm_cli::cache::Cache;
974 /// # async fn example() -> anyhow::Result<()> {
975 /// let cache = Cache::new()?;
976 ///
977 /// // First resolve version to SHA
978 /// let sha = "abc1234567890def1234567890abcdef12345678";
979 ///
980 /// // Get worktree for that specific commit
981 /// let worktree = cache.get_or_create_worktree_for_sha(
982 /// "community",
983 /// "https://github.com/example/repo.git",
984 /// sha,
985 /// Some("my-agent")
986 /// ).await?;
987 /// # Ok(())
988 /// # }
989 /// ```
990 pub async fn get_or_create_worktree_for_sha(
991 &self,
992 name: &str,
993 url: &str,
994 sha: &str,
995 context: Option<&str>,
996 ) -> Result<PathBuf> {
997 // Validate SHA format
998 if sha.len() != 40 || !sha.chars().all(|c| c.is_ascii_hexdigit()) {
999 return Err(anyhow::anyhow!(
1000 "Invalid SHA format: expected 40 hex characters, got '{sha}'"
1001 ));
1002 }
1003
1004 // Check if this is a local path
1005 let is_local_path = crate::utils::is_local_path(url);
1006 if is_local_path {
1007 // Local paths don't use worktrees
1008 return self.get_or_clone_source(name, url, None).await;
1009 }
1010
1011 self.ensure_cache_dir().await?;
1012
1013 // Parse URL for cache structure
1014 let (owner, repo) =
1015 crate::git::parse_git_url(url).unwrap_or(("direct".to_string(), "repo".to_string()));
1016
1017 // Create SHA-based cache key
1018 // Using first 8 chars of SHA for directory name (like Git does)
1019 let sha_short = &sha[..8];
1020 let cache_dir_hash = {
1021 use std::collections::hash_map::DefaultHasher;
1022 use std::hash::{Hash, Hasher};
1023 let mut hasher = DefaultHasher::new();
1024 self.cache_dir.hash(&mut hasher);
1025 format!("{:x}", hasher.finish())[..8].to_string()
1026 };
1027 let cache_key = format!("{cache_dir_hash}:{owner}_{repo}:{sha}");
1028
1029 // Check if we already have a worktree for this SHA
1030 let mut should_create_worktree = false;
1031 while !should_create_worktree {
1032 {
1033 let cache_read = self.worktree_cache.read().await;
1034 match cache_read.get(&cache_key) {
1035 Some(WorktreeState::Ready(cached_path)) => {
1036 if cached_path.exists() {
1037 let cached_path = cached_path.clone();
1038 drop(cache_read);
1039 self.record_worktree_usage(&cache_key, name, sha_short, &cached_path)
1040 .await?;
1041
1042 if let Some(ctx) = context {
1043 tracing::debug!(
1044 target: "git",
1045 "({}) Reusing SHA-based worktree for {} @ {}",
1046 ctx,
1047 url.split('/').next_back().unwrap_or(url),
1048 sha_short
1049 );
1050 }
1051 return Ok(cached_path);
1052 }
1053 should_create_worktree = true;
1054 }
1055 Some(WorktreeState::Pending) => {
1056 if let Some(ctx) = context {
1057 tracing::debug!(
1058 target: "git",
1059 "({}) Waiting for SHA worktree creation for {} @ {}",
1060 ctx,
1061 url.split('/').next_back().unwrap_or(url),
1062 sha_short
1063 );
1064 }
1065 drop(cache_read);
1066 tokio::time::sleep(Duration::from_millis(100)).await;
1067 }
1068 None => {
1069 should_create_worktree = true;
1070 }
1071 }
1072 }
1073 }
1074
1075 // Reserve the cache slot
1076 let mut reservation_successful = false;
1077 while !reservation_successful {
1078 let mut cache_write = self.worktree_cache.write().await;
1079 match cache_write.get(&cache_key) {
1080 Some(WorktreeState::Ready(cached_path)) if cached_path.exists() => {
1081 return Ok(cached_path.clone());
1082 }
1083 Some(WorktreeState::Pending) => {
1084 drop(cache_write);
1085 tokio::time::sleep(Duration::from_millis(50)).await;
1086 }
1087 _ => {
1088 cache_write.insert(cache_key.clone(), WorktreeState::Pending);
1089 reservation_successful = true;
1090 }
1091 }
1092 }
1093
1094 // Get bare repository (fetches if needed)
1095 let bare_repo_dir = self.cache_dir.join("sources").join(format!("{owner}_{repo}.git"));
1096
1097 if bare_repo_dir.exists() {
1098 // Fetch to ensure we have the SHA
1099 self.fetch_with_hybrid_lock(&bare_repo_dir, context).await?;
1100 } else {
1101 let lock_name = format!("{owner}_{repo}");
1102 let _lock = CacheLock::acquire(&self.cache_dir, &lock_name).await?;
1103
1104 if let Some(parent) = bare_repo_dir.parent() {
1105 tokio::fs::create_dir_all(parent).await?;
1106 }
1107
1108 if !bare_repo_dir.exists() {
1109 if let Some(ctx) = context {
1110 tracing::debug!("📦 ({ctx}) Cloning repository {url}...");
1111 } else {
1112 tracing::debug!("📦 Cloning repository {url} to cache...");
1113 }
1114
1115 GitRepo::clone_bare_with_context(url, &bare_repo_dir, context).await?;
1116 Self::configure_connection_pooling(&bare_repo_dir).await.ok();
1117 }
1118 }
1119
1120 let bare_repo = GitRepo::new(&bare_repo_dir);
1121
1122 // Create worktree path using SHA
1123 let worktree_path =
1124 self.cache_dir.join("worktrees").join(format!("{owner}_{repo}_{sha_short}"));
1125
1126 // Acquire worktree creation lock
1127 let worktree_lock_name = format!("worktree-{owner}-{repo}-{sha_short}");
1128 let _worktree_lock = CacheLock::acquire(&self.cache_dir, &worktree_lock_name).await?;
1129
1130 // Re-check after lock
1131 if worktree_path.exists() {
1132 let mut cache_write = self.worktree_cache.write().await;
1133 cache_write.insert(cache_key.clone(), WorktreeState::Ready(worktree_path.clone()));
1134 self.record_worktree_usage(&cache_key, name, sha_short, &worktree_path).await?;
1135 return Ok(worktree_path);
1136 }
1137
1138 // Prune stale worktrees if needed
1139 if !worktree_path.exists() {
1140 let _ = bare_repo.prune_worktrees().await;
1141 }
1142
1143 // Create worktree at specific SHA
1144 if let Some(ctx) = context {
1145 tracing::debug!(
1146 target: "git",
1147 "({}) Creating SHA-based worktree: {} @ {}",
1148 ctx,
1149 url.split('/').next_back().unwrap_or(url),
1150 sha_short
1151 );
1152 }
1153
1154 // Lock bare repo for worktree creation
1155 // Hold the lock through cache update to prevent git state corruption
1156 // when multiple worktrees are created concurrently for the same repo
1157 let bare_repo_lock_name = format!("bare-repo-{owner}_{repo}");
1158 let _bare_repo_lock = CacheLock::acquire(&self.cache_dir, &bare_repo_lock_name).await?;
1159
1160 // Create worktree using SHA directly
1161 let worktree_result =
1162 bare_repo.create_worktree_with_context(&worktree_path, Some(sha), context).await;
1163
1164 // Keep lock held until cache is updated to ensure git state is fully settled
1165 match worktree_result {
1166 Ok(_) => {
1167 // Verify worktree is fully accessible before marking as Ready
1168 // This prevents race conditions where git worktree add returns
1169 // but filesystem hasn't finished writing all files yet
1170 Self::verify_worktree_accessible(&worktree_path, sha).await?;
1171
1172 let mut cache_write = self.worktree_cache.write().await;
1173 cache_write.insert(cache_key.clone(), WorktreeState::Ready(worktree_path.clone()));
1174 self.record_worktree_usage(&cache_key, name, sha_short, &worktree_path).await?;
1175 // Lock automatically dropped here
1176 Ok(worktree_path)
1177 }
1178 Err(e) => {
1179 let mut cache_write = self.worktree_cache.write().await;
1180 cache_write.remove(&cache_key);
1181 // Lock automatically dropped here
1182 Err(e)
1183 }
1184 }
1185 }
1186
1187 /// Get or clone a source repository with options to control cache behavior.
1188 ///
1189 /// This method provides the core functionality for repository access with
1190 /// additional control over cache behavior. Creates bare repositories that
1191 /// can be shared by all operations (resolution, installation, etc).
1192 ///
1193 /// # Parameters
1194 ///
1195 /// * `name` - The name of the source (used for cache directory naming)
1196 /// * `url` - The Git repository URL or local path
1197 /// * `version` - Optional specific version/tag/branch to checkout
1198 /// * `force_refresh` - If true, ignore cached version and clone/fetch fresh
1199 ///
1200 /// # Returns
1201 ///
1202 /// Returns the path to the cached bare repository directory
1203 async fn get_or_clone_source_impl(
1204 &self,
1205 name: &str,
1206 url: &str,
1207 version: Option<&str>,
1208 ) -> Result<PathBuf> {
1209 // Check if this is a local path (not a git repository URL)
1210 let is_local_path = crate::utils::is_local_path(url);
1211
1212 if is_local_path {
1213 // For local paths (directories), validate and return the secure path
1214 // No cloning or version management needed
1215
1216 // Resolve path securely with validation
1217 let resolved_path = crate::utils::platform::resolve_path(url)?;
1218
1219 // Canonicalize to get the real path and prevent symlink attacks
1220 let canonical_path = crate::utils::safe_canonicalize(&resolved_path)
1221 .map_err(|_| anyhow::anyhow!("Local path is not accessible or does not exist"))?;
1222
1223 // Security check: Validate path against blacklist and symlinks
1224 validate_path_security(&canonical_path, true)?;
1225
1226 // For local paths, versions don't apply. Suppress warning for internal sentinel values.
1227 if let Some(ver) = version
1228 && ver != "local"
1229 {
1230 eprintln!("Warning: Version constraints are ignored for local paths");
1231 }
1232
1233 return Ok(canonical_path);
1234 }
1235
1236 self.ensure_cache_dir().await?;
1237
1238 // Acquire lock for this source to prevent concurrent access
1239 let _lock = CacheLock::acquire(&self.cache_dir, name)
1240 .await
1241 .with_context(|| format!("Failed to acquire lock for source: {name}"))?;
1242
1243 // Use the same cache directory structure as worktrees - bare repos with .git suffix
1244 // This ensures we have ONE repository that's shared by all operations
1245 let (owner, repo) =
1246 crate::git::parse_git_url(url).unwrap_or(("direct".to_string(), "repo".to_string()));
1247 let source_dir = self.cache_dir.join("sources").join(format!("{owner}_{repo}.git")); // Always use .git suffix for bare repos
1248
1249 // Ensure parent directory exists
1250 if let Some(parent) = source_dir.parent() {
1251 tokio::fs::create_dir_all(parent)
1252 .await
1253 .with_context(|| format!("Failed to create cache directory: {parent:?}"))?;
1254 }
1255
1256 if source_dir.exists() {
1257 // Use existing cache - fetch to ensure we have latest refs
1258 // Skip fetch for local paths as they don't have remotes
1259 // For Git URLs, always fetch to get the latest refs (especially important for branches)
1260 if crate::utils::is_git_url(url) {
1261 // Check if we've already fetched this repo in this command instance
1262 let already_fetched = {
1263 let fetched = self.fetched_repos.read().await;
1264 fetched.contains(&source_dir)
1265 };
1266
1267 if already_fetched {
1268 tracing::debug!(
1269 target: "agpm::cache",
1270 "Skipping fetch for {} (already fetched in this command)",
1271 name
1272 );
1273 } else {
1274 tracing::debug!(
1275 target: "agpm::cache",
1276 "Fetching updates for {} from {}",
1277 name,
1278 url
1279 );
1280 let repo = crate::git::GitRepo::new(&source_dir);
1281 if let Err(e) = repo.fetch(None).await {
1282 tracing::warn!(
1283 target: "agpm::cache",
1284 "Failed to fetch updates for {}: {}",
1285 name,
1286 e
1287 );
1288 } else {
1289 // Mark this repo as fetched for this command execution
1290 let mut fetched = self.fetched_repos.write().await;
1291 fetched.insert(source_dir.clone());
1292 tracing::debug!(
1293 target: "agpm::cache",
1294 "Successfully fetched updates for {}",
1295 name
1296 );
1297 }
1298 }
1299 } else {
1300 tracing::debug!(
1301 target: "agpm::cache",
1302 "Skipping fetch for local path: {}",
1303 url
1304 );
1305 }
1306 } else {
1307 // Directory doesn't exist - clone fresh as bare repo
1308 self.clone_source(url, &source_dir).await?;
1309 }
1310
1311 Ok(source_dir)
1312 }
1313
1314 /// Clones a Git repository to the specified target directory as a bare repository.
1315 ///
1316 /// This internal method performs the initial clone operation for repositories
1317 /// that are not yet present in the cache. It creates a bare repository which
1318 /// is optimal for serving and allows multiple worktrees to be created from it.
1319 ///
1320 /// # Why Bare Repositories
1321 ///
1322 /// Bare repositories are used because:
1323 /// - **No working directory conflicts**: Multiple worktrees can be created safely
1324 /// - **Optimized for serving**: Like GitHub/GitLab, designed for fetch operations
1325 /// - **Space efficient**: No checkout of files in the main repository
1326 /// - **Thread-safe**: Multiple processes can fetch from it simultaneously
1327 ///
1328 /// # Authentication
1329 ///
1330 /// Repository authentication is handled through:
1331 /// - **SSH keys**: For `git@github.com:` URLs (user's SSH configuration)
1332 /// - **HTTPS tokens**: For private repositories (from global config)
1333 /// - **Public repos**: No authentication required
1334 ///
1335 /// # Parameters
1336 ///
1337 /// * `url` - Git repository URL to clone from
1338 /// * `target` - Local directory path where bare repository should be created
1339 ///
1340 /// # Errors
1341 ///
1342 /// Returns an error if:
1343 /// - Repository URL is invalid or unreachable
1344 /// - Authentication fails for private repositories
1345 /// - Target directory cannot be created or written to
1346 /// - Network connectivity issues
1347 /// - Git command is not available in PATH
1348 async fn clone_source(&self, url: &str, target: &Path) -> Result<()> {
1349 tracing::debug!("📦 Cloning {} to cache...", url);
1350
1351 // Clone as a bare repository for better concurrency and worktree support
1352 GitRepo::clone_bare(url, target)
1353 .await
1354 .with_context(|| format!("Failed to clone repository from {url}"))?;
1355
1356 // Debug: List what was cloned
1357 if cfg!(test)
1358 && let Ok(entries) = std::fs::read_dir(target)
1359 {
1360 tracing::debug!(
1361 target: "agpm::cache",
1362 "Cloned bare repo to {}, contents:",
1363 target.display()
1364 );
1365 for entry in entries.flatten() {
1366 tracing::debug!(
1367 target: "agpm::cache",
1368 " - {}",
1369 entry.path().display()
1370 );
1371 }
1372 }
1373
1374 Ok(())
1375 }
1376
1377 /// Copies a resource file from cached repository to project directory.
1378 ///
1379 /// This method performs the core resource installation operation by copying
1380 /// files from the cached Git repository to the project's local directory.
1381 /// It provides a simple interface for resource installation without output.
1382 ///
1383 /// # Copy Strategy
1384 ///
1385 /// The method uses a copy-based approach rather than symlinks for:
1386 /// - **Cross-platform compatibility**: Works identically on all platforms
1387 /// - **Git integration**: Real files can be tracked and committed
1388 /// - **Editor support**: No symlink confusion in IDEs and editors
1389 /// - **User flexibility**: Local files can be modified if needed
1390 ///
1391 /// # Path Resolution
1392 ///
1393 /// - **Source path**: Relative to the repository root directory
1394 /// - **Target path**: Absolute path where file should be installed
1395 /// - **Directory creation**: Parent directories created automatically
1396 /// - **Path normalization**: Handles platform-specific path separators
1397 ///
1398 /// # Parameters
1399 ///
1400 /// * `source_dir` - Path to the cached repository directory
1401 /// * `source_path` - Relative path to the resource file within the repository
1402 /// * `target_path` - Absolute path where the resource should be installed
1403 ///
1404 /// # Errors
1405 ///
1406 /// Returns an error if:
1407 /// - Source file doesn't exist in the repository
1408 /// - Target directory cannot be created (permissions)
1409 /// - File copy operation fails (disk space, permissions)
1410 /// - Source path attempts directory traversal (security)
1411 ///
1412 /// # Examples
1413 ///
1414 /// Copy a single resource file:
1415 ///
1416 /// ```rust,no_run
1417 /// use agpm_cli::cache::Cache;
1418 /// use std::path::PathBuf;
1419 ///
1420 /// # async fn example() -> anyhow::Result<()> {
1421 /// let cache = Cache::new()?;
1422 ///
1423 /// // Get cached repository
1424 /// let repo_path = cache.get_or_clone_source(
1425 /// "community",
1426 /// "https://github.com/example/repo.git",
1427 /// Some("v1.0.0")
1428 /// ).await?;
1429 ///
1430 /// // Copy resource to project
1431 /// cache.copy_resource(
1432 /// &repo_path,
1433 /// "agents/helper.md", // Source: agents/helper.md in repository
1434 /// &PathBuf::from("./my-agents/helper.md") // Target: project location
1435 /// ).await?;
1436 /// # Ok(())
1437 /// # }
1438 /// ```
1439 ///
1440 /// Copy nested resource:
1441 ///
1442 /// ```rust,no_run
1443 /// use agpm_cli::cache::Cache;
1444 /// use std::path::PathBuf;
1445 ///
1446 /// # async fn example() -> anyhow::Result<()> {
1447 /// let cache = Cache::new()?;
1448 /// let repo_path = PathBuf::from("/cache/community");
1449 ///
1450 /// cache.copy_resource(
1451 /// &repo_path,
1452 /// "tools/generators/api-client.md", // Nested source path
1453 /// &PathBuf::from("./tools/api-client.md") // Flattened target
1454 /// ).await?;
1455 /// # Ok(())
1456 /// # }
1457 /// ```
1458 pub async fn copy_resource(
1459 &self,
1460 source_dir: &Path,
1461 source_path: &str,
1462 target_path: &Path,
1463 ) -> Result<()> {
1464 self.copy_resource_with_output(source_dir, source_path, target_path, false).await
1465 }
1466
1467 /// Copies a resource file with optional installation output messages.
1468 ///
1469 /// This is the full-featured resource copying method that provides control
1470 /// over whether installation progress is displayed to the user. It handles
1471 /// all the details of safe file copying including directory creation,
1472 /// error handling, and atomic operations.
1473 ///
1474 /// # Operation Details
1475 ///
1476 /// 1. **Source validation**: Verifies the source file exists in repository
1477 /// 2. **Directory creation**: Creates target parent directories if needed
1478 /// 3. **Atomic copy**: Performs file copy operation safely
1479 /// 4. **Progress output**: Optionally displays installation confirmation
1480 ///
1481 /// # File Safety
1482 ///
1483 /// - **Overwrite protection**: Will overwrite existing files without warning
1484 /// - **Atomic operations**: Uses system copy operations for atomicity
1485 /// - **Permission preservation**: Maintains reasonable file permissions
1486 /// - **Path validation**: Prevents directory traversal attacks
1487 ///
1488 /// # Output Control
1489 ///
1490 /// When `show_output` is `true`, displays user-friendly installation messages:
1491 /// ```text
1492 /// ✅ Installed ./agents/helper.md
1493 /// ✅ Installed ./snippets/docker-compose.md
1494 /// ```
1495 ///
1496 /// # Parameters
1497 ///
1498 /// * `source_dir` - Path to the cached repository directory
1499 /// * `source_path` - Relative path to resource file within repository
1500 /// * `target_path` - Absolute path where resource should be installed
1501 /// * `show_output` - Whether to display installation progress messages
1502 ///
1503 /// # Errors
1504 ///
1505 /// Returns specific error types for different failure modes:
1506 /// - [`AgpmError::ResourceFileNotFound`]: Source file doesn't exist
1507 /// - File system errors: Permission, disk space, invalid paths
1508 /// - Directory creation errors: Parent directory creation failures
1509 ///
1510 /// # Examples
1511 ///
1512 /// Silent installation (for batch operations):
1513 ///
1514 /// ```rust,no_run
1515 /// use agpm_cli::cache::Cache;
1516 /// use std::path::PathBuf;
1517 ///
1518 /// # async fn example() -> anyhow::Result<()> {
1519 /// let cache = Cache::new()?;
1520 /// let repo_path = PathBuf::from("/cache/community");
1521 ///
1522 /// cache.copy_resource_with_output(
1523 /// &repo_path,
1524 /// "agents/helper.md",
1525 /// &PathBuf::from("./agents/helper.md"),
1526 /// false // No output
1527 /// ).await?;
1528 /// # Ok(())
1529 /// # }
1530 /// ```
1531 ///
1532 /// Interactive installation (with progress):
1533 ///
1534 /// ```rust,no_run
1535 /// use agpm_cli::cache::Cache;
1536 /// use std::path::PathBuf;
1537 ///
1538 /// # async fn example() -> anyhow::Result<()> {
1539 /// let cache = Cache::new()?;
1540 /// let repo_path = PathBuf::from("/cache/community");
1541 ///
1542 /// cache.copy_resource_with_output(
1543 /// &repo_path,
1544 /// "snippets/deployment.md",
1545 /// &PathBuf::from("./snippets/deployment.md"),
1546 /// true // Show "✅ Installed" message
1547 /// ).await?;
1548 /// # Ok(())
1549 /// # }
1550 /// ```
1551 pub async fn copy_resource_with_output(
1552 &self,
1553 source_dir: &Path,
1554 source_path: &str,
1555 target_path: &Path,
1556 show_output: bool,
1557 ) -> Result<()> {
1558 let source_file = source_dir.join(source_path);
1559
1560 if !source_file.exists() {
1561 return Err(AgpmError::ResourceFileNotFound {
1562 path: source_path.to_string(),
1563 source_name: source_dir
1564 .file_name()
1565 .and_then(|n| n.to_str())
1566 .unwrap_or("unknown")
1567 .to_string(),
1568 }
1569 .into());
1570 }
1571
1572 if let Some(parent) = target_path.parent() {
1573 async_fs::create_dir_all(parent)
1574 .await
1575 .with_context(|| format!("Failed to create directory: {}", parent.display()))?;
1576 }
1577
1578 async_fs::copy(&source_file, target_path).await.with_context(|| {
1579 format!("Failed to copy {} to {}", source_file.display(), target_path.display())
1580 })?;
1581
1582 if show_output {
1583 println!(" ✅ Installed {}", target_path.display());
1584 }
1585
1586 Ok(())
1587 }
1588
1589 /// Removes unused cached repositories to reclaim disk space.
1590 ///
1591 /// This method performs selective cache cleanup by removing repositories
1592 /// that are no longer referenced by any active source configurations.
1593 /// It's a safe operation that preserves repositories currently in use.
1594 ///
1595 /// # Cleanup Strategy
1596 ///
1597 /// 1. **Directory scanning**: Enumerates all cached repository directories
1598 /// 2. **Active comparison**: Checks each directory against active sources list
1599 /// 3. **Safe removal**: Removes only unused directories, preserving files
1600 /// 4. **Progress reporting**: Displays removal progress for user feedback
1601 ///
1602 /// # Safety Guarantees
1603 ///
1604 /// - **Active protection**: Never removes repositories listed in active sources
1605 /// - **Directory-only**: Only removes directories, preserves any loose files
1606 /// - **Atomic removal**: Each directory is removed completely or not at all
1607 /// - **Lock awareness**: Respects file locks but doesn't acquire them
1608 ///
1609 /// # Performance Considerations
1610 ///
1611 /// - **I/O intensive**: Scans entire cache directory structure
1612 /// - **Disk space recovery**: Can free significant space for large repositories
1613 /// - **Network savings**: Removed repositories will need re-cloning if used again
1614 /// - **Concurrent safe**: Can run while other cache operations are in progress
1615 ///
1616 /// # Parameters
1617 ///
1618 /// * `active_sources` - List of source names that should be preserved in cache
1619 ///
1620 /// # Returns
1621 ///
1622 /// Returns the number of repository directories that were successfully removed.
1623 ///
1624 /// # Errors
1625 ///
1626 /// Returns an error if:
1627 /// - Cache directory cannot be read (permissions)
1628 /// - Unable to remove a directory (file locks, permissions)
1629 /// - File system errors during directory traversal
1630 ///
1631 /// # Output Messages
1632 ///
1633 /// Displays progress messages for each removed repository:
1634 /// ```text
1635 /// 🗑️ Removing unused cache: old-project
1636 /// 🗑️ Removing unused cache: deprecated-tools
1637 /// ```
1638 ///
1639 /// # Examples
1640 ///
1641 /// Clean cache based on current manifest sources:
1642 ///
1643 /// ```rust,no_run
1644 /// use agpm_cli::cache::Cache;
1645 ///
1646 /// # async fn example() -> anyhow::Result<()> {
1647 /// let cache = Cache::new()?;
1648 ///
1649 /// // Active sources from current agpm.toml
1650 /// let active_sources = vec![
1651 /// "community".to_string(),
1652 /// "work-tools".to_string(),
1653 /// "personal".to_string(),
1654 /// ];
1655 ///
1656 /// let removed = cache.clean_unused(&active_sources).await?;
1657 /// println!("Cleaned {} unused repositories", removed);
1658 /// # Ok(())
1659 /// # }
1660 /// ```
1661 ///
1662 /// Clean all cached repositories:
1663 ///
1664 /// ```rust,no_run
1665 /// use agpm_cli::cache::Cache;
1666 ///
1667 /// # async fn example() -> anyhow::Result<()> {
1668 /// let cache = Cache::new()?;
1669 ///
1670 /// // Empty active list removes everything
1671 /// let removed = cache.clean_unused(&[]).await?;
1672 /// println!("Removed all {} cached repositories", removed);
1673 /// # Ok(())
1674 /// # }
1675 /// ```
1676 pub async fn clean_unused(&self, active_sources: &[String]) -> Result<usize> {
1677 self.ensure_cache_dir().await?;
1678
1679 let mut removed_count = 0;
1680 let mut entries = async_fs::read_dir(&self.cache_dir)
1681 .await
1682 .with_context(|| "Failed to read cache directory")?;
1683
1684 while let Some(entry) =
1685 entries.next_entry().await.with_context(|| "Failed to read directory entry")?
1686 {
1687 let path = entry.path();
1688 if path.is_dir() {
1689 let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1690
1691 if !active_sources.contains(&dir_name.to_string()) {
1692 println!("🗑️ Removing unused cache: {dir_name}");
1693 async_fs::remove_dir_all(&path).await.with_context(|| {
1694 format!("Failed to remove cache directory: {}", path.display())
1695 })?;
1696 removed_count += 1;
1697 }
1698 }
1699 }
1700
1701 Ok(removed_count)
1702 }
1703
1704 /// Calculates the total size of the cache directory in bytes.
1705 ///
1706 /// This method recursively calculates the disk space used by all cached
1707 /// repositories and supporting files. It's useful for cache size monitoring,
1708 /// cleanup decisions, and storage management.
1709 ///
1710 /// # Calculation Method
1711 ///
1712 /// - **Recursive traversal**: Includes all subdirectories and files
1713 /// - **Actual file sizes**: Reports real disk usage, not allocated blocks
1714 /// - **All file types**: Includes Git objects, working files, and lock files
1715 /// - **Cross-platform**: Consistent behavior across different file systems
1716 ///
1717 /// # Performance Notes
1718 ///
1719 /// - **I/O intensive**: May be slow for very large caches
1720 /// - **File system dependent**: Performance varies by underlying storage
1721 /// - **Concurrent safe**: Can run during other cache operations
1722 /// - **Memory efficient**: Streams directory traversal without loading all paths
1723 ///
1724 /// # Returns
1725 ///
1726 /// Returns the total size in bytes. For a non-existent cache directory,
1727 /// returns `0` without error.
1728 ///
1729 /// # Errors
1730 ///
1731 /// Returns an error if:
1732 /// - Permission denied reading cache directory or subdirectories
1733 /// - File system errors during directory traversal
1734 /// - Symbolic link cycles (rare, but possible)
1735 ///
1736 /// # Examples
1737 ///
1738 /// Check current cache size:
1739 ///
1740 /// ```rust,no_run
1741 /// use agpm_cli::cache::Cache;
1742 ///
1743 /// # async fn example() -> anyhow::Result<()> {
1744 /// let cache = Cache::new()?;
1745 ///
1746 /// let size_bytes = cache.get_cache_size().await?;
1747 /// let size_mb = size_bytes / 1024 / 1024;
1748 ///
1749 /// println!("Cache size: {} MB ({} bytes)", size_mb, size_bytes);
1750 /// # Ok(())
1751 /// # }
1752 /// ```
1753 ///
1754 /// Display human-readable sizes:
1755 ///
1756 /// ```rust,no_run
1757 /// use agpm_cli::cache::Cache;
1758 ///
1759 /// # async fn example() -> anyhow::Result<()> {
1760 /// let cache = Cache::new()?;
1761 /// let size_bytes = cache.get_cache_size().await?;
1762 ///
1763 /// let (size, unit) = match size_bytes {
1764 /// s if s < 1024 => (s, "B"),
1765 /// s if s < 1024 * 1024 => (s / 1024, "KB"),
1766 /// s if s < 1024 * 1024 * 1024 => (s / 1024 / 1024, "MB"),
1767 /// s => (s / 1024 / 1024 / 1024, "GB"),
1768 /// };
1769 ///
1770 /// println!("Cache size: {}{}", size, unit);
1771 /// # Ok(())
1772 /// # }
1773 /// ```
1774 pub async fn get_cache_size(&self) -> Result<u64> {
1775 if !self.cache_dir.exists() {
1776 return Ok(0);
1777 }
1778
1779 let size = fs::get_directory_size(&self.cache_dir).await?;
1780 Ok(size)
1781 }
1782
1783 /// Returns the path to the cache directory.
1784 ///
1785 /// This method provides access to the cache directory path for inspection,
1786 /// logging, or integration with other tools. The path represents where
1787 /// all cached repositories and supporting files are stored.
1788 ///
1789 /// # Return Value
1790 ///
1791 /// Returns a reference to the [`Path`] representing the cache directory.
1792 /// The path may or may not exist on the file system - use [`ensure_cache_dir`]
1793 /// to create it if needed.
1794 ///
1795 /// # Thread Safety
1796 ///
1797 /// This method is safe to call from multiple threads as it only returns
1798 /// a reference to the immutable path stored in the `Cache` instance.
1799 ///
1800 /// # Examples
1801 ///
1802 /// Display cache location:
1803 ///
1804 /// ```rust,no_run
1805 /// use agpm_cli::cache::Cache;
1806 ///
1807 /// # fn example() -> anyhow::Result<()> {
1808 /// let cache = Cache::new()?;
1809 /// println!("Cache stored at: {}", cache.get_cache_location().display());
1810 /// # Ok(())
1811 /// # }
1812 /// ```
1813 ///
1814 /// Check if cache exists:
1815 ///
1816 /// ```rust,no_run
1817 /// use agpm_cli::cache::Cache;
1818 ///
1819 /// # fn example() -> anyhow::Result<()> {
1820 /// let cache = Cache::new()?;
1821 /// let location = cache.get_cache_location();
1822 ///
1823 /// if location.exists() {
1824 /// println!("Cache directory exists at: {}", location.display());
1825 /// } else {
1826 /// println!("Cache directory not yet created: {}", location.display());
1827 /// }
1828 /// # Ok(())
1829 /// # }
1830 /// ```
1831 ///
1832 /// [`ensure_cache_dir`]: Cache::ensure_cache_dir
1833 #[must_use]
1834 pub fn get_cache_location(&self) -> &Path {
1835 &self.cache_dir
1836 }
1837
1838 /// Completely removes the entire cache directory and all its contents.
1839 ///
1840 /// This is a destructive operation that removes all cached repositories,
1841 /// lock files, and any other cache-related data. Use with caution as
1842 /// this will require re-cloning all repositories on the next operation.
1843 ///
1844 /// # Operation Details
1845 ///
1846 /// - **Complete removal**: Deletes the entire cache directory tree
1847 /// - **Recursive deletion**: Removes all subdirectories and files
1848 /// - **Lock files**: Also removes .locks directory and all lock files
1849 /// - **Atomic operation**: Either succeeds completely or leaves cache intact
1850 ///
1851 /// # Recovery Impact
1852 ///
1853 /// After calling this method:
1854 /// - All repositories must be re-cloned on next use
1855 /// - Network bandwidth will be required for repository downloads
1856 /// - Disk space is immediately reclaimed
1857 /// - Cache directory will be recreated automatically on next operation
1858 ///
1859 /// # Safety Considerations
1860 ///
1861 /// - **No confirmation**: This method doesn't ask for confirmation
1862 /// - **Irreversible**: Cannot undo the deletion operation
1863 /// - **Concurrent operations**: May interfere with running cache operations
1864 /// - **Lock respect**: Doesn't wait for locks, may fail if repositories are in use
1865 ///
1866 /// # Errors
1867 ///
1868 /// Returns an error if:
1869 /// - Permission denied for cache directory or contents
1870 /// - Files are locked by other processes
1871 /// - File system errors during deletion
1872 /// - Cache directory is in use by another process
1873 ///
1874 /// # Output Messages
1875 ///
1876 /// Displays confirmation message on successful completion:
1877 /// ```text
1878 /// 🗑️ Cleared all cache
1879 /// ```
1880 ///
1881 /// # Examples
1882 ///
1883 /// Clear cache for fresh start:
1884 ///
1885 /// ```rust,no_run
1886 /// use agpm_cli::cache::Cache;
1887 ///
1888 /// # async fn example() -> anyhow::Result<()> {
1889 /// let cache = Cache::new()?;
1890 ///
1891 /// // Check size before clearing
1892 /// let size_before = cache.get_cache_size().await?;
1893 /// println!("Cache size before: {} bytes", size_before);
1894 ///
1895 /// // Clear everything
1896 /// cache.clear_all().await?;
1897 ///
1898 /// // Verify cache is empty
1899 /// let size_after = cache.get_cache_size().await?;
1900 /// println!("Cache size after: {} bytes", size_after); // Should be 0
1901 /// # Ok(())
1902 /// # }
1903 /// ```
1904 ///
1905 /// Clear cache with error handling:
1906 ///
1907 /// ```rust,no_run
1908 /// use agpm_cli::cache::Cache;
1909 ///
1910 /// # async fn example() -> anyhow::Result<()> {
1911 /// let cache = Cache::new()?;
1912 ///
1913 /// match cache.clear_all().await {
1914 /// Ok(()) => println!("Cache cleared successfully"),
1915 /// Err(e) => {
1916 /// eprintln!("Failed to clear cache: {}", e);
1917 /// eprintln!("Some files may be in use by other processes");
1918 /// }
1919 /// }
1920 /// # Ok(())
1921 /// # }
1922 /// ```
1923 pub async fn clear_all(&self) -> Result<()> {
1924 if self.cache_dir.exists() {
1925 async_fs::remove_dir_all(&self.cache_dir)
1926 .await
1927 .with_context(|| "Failed to clear cache")?;
1928 println!("🗑️ Cleared all cache");
1929 }
1930 Ok(())
1931 }
1932
1933 /// Perform a fetch operation with hybrid locking (in-process and cross-process).
1934 ///
1935 /// This method implements a two-level locking strategy:
1936 /// 1. In-process locks (Arc<Mutex>) for fast coordination within the same process
1937 /// 2. File-based locks for cross-process coordination
1938 ///
1939 /// The fetch will only happen once per repository per command execution.
1940 ///
1941 /// # Parameters
1942 ///
1943 /// * `bare_repo_path` - Path to the bare repository
1944 /// * `context` - Optional context string for logging
1945 ///
1946 /// # Returns
1947 ///
1948 /// Returns Ok(()) if the fetch was successful or skipped.
1949 async fn fetch_with_hybrid_lock(
1950 &self,
1951 bare_repo_path: &Path,
1952 context: Option<&str>,
1953 ) -> Result<()> {
1954 // Level 1: In-process lock (fast path)
1955 let memory_lock = self
1956 .fetch_locks
1957 .entry(bare_repo_path.to_path_buf())
1958 .or_insert_with(|| Arc::new(Mutex::new(())))
1959 .clone();
1960 let _memory_guard = memory_lock.lock().await;
1961
1962 // Level 2: File-based lock (cross-process)
1963 let safe_name = bare_repo_path
1964 .file_name()
1965 .and_then(|s| s.to_str())
1966 .unwrap_or("unknown")
1967 .replace(['/', '\\', ':'], "_");
1968
1969 let lock_path = self.cache_dir.join(".locks").join(format!("{safe_name}.fetch.lock"));
1970
1971 // Ensure lock directory exists
1972 if let Some(parent) = lock_path.parent() {
1973 tokio::fs::create_dir_all(parent).await?;
1974 }
1975
1976 // Create/open lock file
1977 let lock_file = tokio::fs::OpenOptions::new()
1978 .create(true)
1979 .write(true)
1980 .truncate(false)
1981 .open(&lock_path)
1982 .await?;
1983
1984 // Convert to std::fs::File for fs4
1985 let std_file = lock_file.into_std().await;
1986
1987 // Acquire exclusive lock (blocks until available)
1988 use fs4::fs_std::FileExt;
1989 if let Some(ctx) = context {
1990 tracing::debug!(
1991 target: "agpm::git",
1992 "({}) Acquiring file lock for {}",
1993 ctx,
1994 bare_repo_path.display()
1995 );
1996 }
1997 std_file.lock_exclusive()?;
1998
1999 if let Some(ctx) = context {
2000 tracing::debug!(
2001 target: "agpm::git",
2002 "({}) Acquired file lock for {}",
2003 ctx,
2004 bare_repo_path.display()
2005 );
2006 }
2007
2008 // Now check if we've already fetched this repo in this command execution
2009 // This happens AFTER acquiring the lock to prevent race conditions
2010 let already_fetched = {
2011 let fetched = self.fetched_repos.read().await;
2012 let is_fetched = fetched.contains(bare_repo_path);
2013 if let Some(ctx) = context {
2014 tracing::debug!(
2015 target: "agpm::git",
2016 "({}) Checking if already fetched: {} - Result: {} (total fetched: {}, hashset addr: {:p})",
2017 ctx,
2018 bare_repo_path.display(),
2019 is_fetched,
2020 fetched.len(),
2021 &raw const *fetched
2022 );
2023 }
2024 is_fetched
2025 };
2026
2027 if already_fetched {
2028 if let Some(ctx) = context {
2029 tracing::debug!(
2030 target: "agpm::git",
2031 "({}) Skipping fetch (already fetched in this command): {}",
2032 ctx,
2033 bare_repo_path.display()
2034 );
2035 }
2036 // Release the file lock and return
2037 return Ok(());
2038 }
2039
2040 // Now safe to fetch
2041 let repo = GitRepo::new(bare_repo_path);
2042
2043 if let Some(ctx) = context {
2044 tracing::debug!(
2045 target: "agpm::git",
2046 "({}) Fetching updates for {}",
2047 ctx,
2048 bare_repo_path.display()
2049 );
2050 }
2051
2052 repo.fetch(None).await?;
2053
2054 // Mark this repo as fetched for this command execution
2055 {
2056 let mut fetched = self.fetched_repos.write().await;
2057 fetched.insert(bare_repo_path.to_path_buf());
2058 if let Some(ctx) = context {
2059 tracing::debug!(
2060 target: "agpm::git",
2061 "({}) Marked as fetched: {} (total fetched: {}, hashset addr: {:p})",
2062 ctx,
2063 bare_repo_path.display(),
2064 fetched.len(),
2065 &raw const *fetched
2066 );
2067 }
2068 }
2069
2070 // File lock automatically released when std_file is dropped
2071 Ok(())
2072 }
2073}
2074
2075#[cfg(test)]
2076mod tests {
2077 use super::*;
2078 use tempfile::TempDir;
2079
2080 #[tokio::test]
2081 async fn test_cache_dir_creation() {
2082 let temp_dir = TempDir::new().unwrap();
2083 let cache_dir = temp_dir.path().join("cache");
2084
2085 let cache = Cache::with_dir(cache_dir.clone()).unwrap();
2086 cache.ensure_cache_dir().await.unwrap();
2087
2088 assert!(cache_dir.exists());
2089 }
2090
2091 #[tokio::test]
2092 async fn test_cache_location() {
2093 let temp_dir = TempDir::new().unwrap();
2094 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2095 let location = cache.get_cache_location();
2096 assert_eq!(location, temp_dir.path());
2097 }
2098
2099 #[tokio::test]
2100 async fn test_cache_size_empty() {
2101 let temp_dir = TempDir::new().unwrap();
2102 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2103
2104 cache.ensure_cache_dir().await.unwrap();
2105 let size = cache.get_cache_size().await.unwrap();
2106 assert_eq!(size, 0);
2107 }
2108
2109 #[tokio::test]
2110 async fn test_cache_size_with_content() {
2111 let temp_dir = TempDir::new().unwrap();
2112 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2113
2114 cache.ensure_cache_dir().await.unwrap();
2115
2116 // Create some test content
2117 let test_file = temp_dir.path().join("test.txt");
2118 std::fs::write(&test_file, "test content").unwrap();
2119
2120 let size = cache.get_cache_size().await.unwrap();
2121 assert!(size > 0);
2122 assert_eq!(size, 12); // "test content" is 12 bytes
2123 }
2124
2125 #[tokio::test]
2126 async fn test_clean_unused_empty_cache() {
2127 let temp_dir = TempDir::new().unwrap();
2128 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2129
2130 cache.ensure_cache_dir().await.unwrap();
2131
2132 let removed = cache.clean_unused(&["active".to_string()]).await.unwrap();
2133 assert_eq!(removed, 0);
2134 }
2135
2136 #[tokio::test]
2137 async fn test_clean_unused_removes_correct_dirs() {
2138 let temp_dir = TempDir::new().unwrap();
2139 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2140
2141 cache.ensure_cache_dir().await.unwrap();
2142
2143 // Create some test directories
2144 let active_dir = temp_dir.path().join("active");
2145 let unused_dir = temp_dir.path().join("unused");
2146 let another_unused = temp_dir.path().join("another_unused");
2147
2148 std::fs::create_dir_all(&active_dir).unwrap();
2149 std::fs::create_dir_all(&unused_dir).unwrap();
2150 std::fs::create_dir_all(&another_unused).unwrap();
2151
2152 // Add some content to verify directories are removed completely
2153 std::fs::write(active_dir.join("file.txt"), "keep").unwrap();
2154 std::fs::write(unused_dir.join("file.txt"), "remove").unwrap();
2155 std::fs::write(another_unused.join("file.txt"), "remove").unwrap();
2156
2157 let removed = cache.clean_unused(&["active".to_string()]).await.unwrap();
2158
2159 assert_eq!(removed, 2);
2160 assert!(active_dir.exists());
2161 assert!(!unused_dir.exists());
2162 assert!(!another_unused.exists());
2163 }
2164
2165 #[tokio::test]
2166 async fn test_clear_all_removes_entire_cache() {
2167 let temp_dir = TempDir::new().unwrap();
2168 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2169
2170 cache.ensure_cache_dir().await.unwrap();
2171
2172 // Create some content
2173 let subdir = temp_dir.path().join("subdir");
2174 std::fs::create_dir_all(&subdir).unwrap();
2175 std::fs::write(subdir.join("file.txt"), "content").unwrap();
2176
2177 assert!(temp_dir.path().exists());
2178 assert!(subdir.exists());
2179
2180 cache.clear_all().await.unwrap();
2181
2182 assert!(!temp_dir.path().exists());
2183 }
2184
2185 #[tokio::test]
2186 async fn test_copy_resource() {
2187 let temp_dir = TempDir::new().unwrap();
2188 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2189
2190 // Create source file
2191 let source_dir = temp_dir.path().join("source");
2192 std::fs::create_dir_all(&source_dir).unwrap();
2193 let source_file = source_dir.join("resource.md");
2194 std::fs::write(&source_file, "# Test Resource\nContent").unwrap();
2195
2196 // Copy resource
2197 let dest = temp_dir.path().join("dest.md");
2198 cache.copy_resource(&source_dir, "resource.md", &dest).await.unwrap();
2199
2200 assert!(dest.exists());
2201 let content = std::fs::read_to_string(&dest).unwrap();
2202 assert_eq!(content, "# Test Resource\nContent");
2203 }
2204
2205 #[tokio::test]
2206 async fn test_copy_resource_nested_path() {
2207 let temp_dir = TempDir::new().unwrap();
2208 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2209
2210 // Create source file in nested directory
2211 let source_dir = temp_dir.path().join("source");
2212 let nested_dir = source_dir.join("nested").join("path");
2213 std::fs::create_dir_all(&nested_dir).unwrap();
2214 let source_file = nested_dir.join("resource.md");
2215 std::fs::write(&source_file, "# Nested Resource").unwrap();
2216
2217 // Copy resource using relative path from source_dir
2218 let dest = temp_dir.path().join("dest.md");
2219 cache.copy_resource(&source_dir, "nested/path/resource.md", &dest).await.unwrap();
2220
2221 assert!(dest.exists());
2222 let content = std::fs::read_to_string(&dest).unwrap();
2223 assert_eq!(content, "# Nested Resource");
2224 }
2225
2226 #[tokio::test]
2227 async fn test_copy_resource_invalid_path() {
2228 let temp_dir = TempDir::new().unwrap();
2229 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2230
2231 let source_dir = temp_dir.path().join("source");
2232 std::fs::create_dir_all(&source_dir).unwrap();
2233
2234 // Try to copy non-existent resource
2235 let dest = temp_dir.path().join("dest.md");
2236 let result = cache.copy_resource(&source_dir, "nonexistent.md", &dest).await;
2237
2238 assert!(result.is_err());
2239 assert!(!dest.exists());
2240 }
2241
2242 #[tokio::test]
2243 async fn test_ensure_cache_dir_idempotent() {
2244 let temp_dir = TempDir::new().unwrap();
2245 let cache_dir = temp_dir.path().join("cache");
2246 let cache = Cache::with_dir(cache_dir.clone()).unwrap();
2247
2248 // Call ensure_cache_dir multiple times
2249 cache.ensure_cache_dir().await.unwrap();
2250 assert!(cache_dir.exists());
2251
2252 cache.ensure_cache_dir().await.unwrap();
2253 assert!(cache_dir.exists());
2254
2255 // Add a file and ensure it's preserved
2256 std::fs::write(cache_dir.join("test.txt"), "content").unwrap();
2257
2258 cache.ensure_cache_dir().await.unwrap();
2259 assert!(cache_dir.exists());
2260 assert!(cache_dir.join("test.txt").exists());
2261 }
2262
2263 #[tokio::test]
2264 async fn test_copy_resource_creates_parent_directories() {
2265 let temp_dir = TempDir::new().unwrap();
2266 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2267
2268 // Create source file
2269 let source_dir = temp_dir.path().join("source");
2270 std::fs::create_dir_all(&source_dir).unwrap();
2271 std::fs::write(source_dir.join("file.md"), "content").unwrap();
2272
2273 // Copy to a destination with non-existent parent directories
2274 let dest = temp_dir.path().join("deep").join("nested").join("dest.md");
2275 cache.copy_resource(&source_dir, "file.md", &dest).await.unwrap();
2276
2277 assert!(dest.exists());
2278 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "content");
2279 }
2280
2281 #[tokio::test]
2282 async fn test_copy_resource_with_output_flag() {
2283 let temp_dir = TempDir::new().unwrap();
2284 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2285
2286 // Create source file
2287 let source_dir = temp_dir.path().join("source");
2288 std::fs::create_dir_all(&source_dir).unwrap();
2289 std::fs::write(source_dir.join("file.md"), "content").unwrap();
2290
2291 // Test with output flag false
2292 let dest1 = temp_dir.path().join("dest1.md");
2293 cache.copy_resource_with_output(&source_dir, "file.md", &dest1, false).await.unwrap();
2294 assert!(dest1.exists());
2295
2296 // Test with output flag true
2297 let dest2 = temp_dir.path().join("dest2.md");
2298 cache.copy_resource_with_output(&source_dir, "file.md", &dest2, true).await.unwrap();
2299 assert!(dest2.exists());
2300 }
2301
2302 #[tokio::test]
2303 async fn test_cache_size_nonexistent_dir() {
2304 let temp_dir = TempDir::new().unwrap();
2305 let nonexistent = temp_dir.path().join("nonexistent");
2306 let cache = Cache::with_dir(nonexistent).unwrap();
2307
2308 let size = cache.get_cache_size().await.unwrap();
2309 assert_eq!(size, 0);
2310 }
2311
2312 #[tokio::test]
2313 async fn test_clear_all_nonexistent_cache() {
2314 let temp_dir = TempDir::new().unwrap();
2315 let nonexistent = temp_dir.path().join("nonexistent");
2316 let cache = Cache::with_dir(nonexistent).unwrap();
2317
2318 // Should not error when clearing non-existent cache
2319 cache.clear_all().await.unwrap();
2320 }
2321
2322 #[tokio::test]
2323 async fn test_clean_unused_with_files_and_dirs() {
2324 let temp_dir = TempDir::new().unwrap();
2325 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2326
2327 cache.ensure_cache_dir().await.unwrap();
2328
2329 // Create directories
2330 std::fs::create_dir_all(temp_dir.path().join("keep")).unwrap();
2331 std::fs::create_dir_all(temp_dir.path().join("remove")).unwrap();
2332
2333 // Create a file (not a directory)
2334 std::fs::write(temp_dir.path().join("file.txt"), "content").unwrap();
2335
2336 let removed = cache.clean_unused(&["keep".to_string()]).await.unwrap();
2337
2338 // Should only remove the "remove" directory, not the file
2339 assert_eq!(removed, 1);
2340 assert!(temp_dir.path().join("keep").exists());
2341 assert!(!temp_dir.path().join("remove").exists());
2342 assert!(temp_dir.path().join("file.txt").exists());
2343 }
2344
2345 #[tokio::test]
2346 async fn test_copy_resource_overwrites_existing() {
2347 let temp_dir = TempDir::new().unwrap();
2348 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2349
2350 // Create source file
2351 let source_dir = temp_dir.path().join("source");
2352 std::fs::create_dir_all(&source_dir).unwrap();
2353 std::fs::write(source_dir.join("file.md"), "new content").unwrap();
2354
2355 // Create existing destination file
2356 let dest = temp_dir.path().join("dest.md");
2357 std::fs::write(&dest, "old content").unwrap();
2358
2359 // Copy should overwrite
2360 cache.copy_resource(&source_dir, "file.md", &dest).await.unwrap();
2361
2362 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "new content");
2363 }
2364
2365 #[tokio::test]
2366 async fn test_copy_resource_special_characters() {
2367 let temp_dir = TempDir::new().unwrap();
2368 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2369
2370 // Create source file with special characters
2371 let source_dir = temp_dir.path().join("source");
2372 std::fs::create_dir_all(&source_dir).unwrap();
2373 let special_name = "file with spaces & special-chars.md";
2374 std::fs::write(source_dir.join(special_name), "content").unwrap();
2375
2376 // Copy resource
2377 let dest = temp_dir.path().join("dest.md");
2378 cache.copy_resource(&source_dir, special_name, &dest).await.unwrap();
2379
2380 assert!(dest.exists());
2381 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "content");
2382 }
2383
2384 #[tokio::test]
2385 async fn test_cache_location_consistency() {
2386 let temp_dir = TempDir::new().unwrap();
2387 let cache_dir = temp_dir.path().join("my_cache");
2388 let cache = Cache::with_dir(cache_dir.clone()).unwrap();
2389
2390 // Get location multiple times
2391 let loc1 = cache.get_cache_location();
2392 let loc2 = cache.get_cache_location();
2393
2394 assert_eq!(loc1, loc2);
2395 assert_eq!(loc1, cache_dir.as_path());
2396 }
2397
2398 #[tokio::test]
2399 async fn test_clean_unused_empty_active_list() {
2400 let temp_dir = TempDir::new().unwrap();
2401 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2402
2403 cache.ensure_cache_dir().await.unwrap();
2404
2405 // Create some directories
2406 std::fs::create_dir_all(temp_dir.path().join("source1")).unwrap();
2407 std::fs::create_dir_all(temp_dir.path().join("source2")).unwrap();
2408
2409 // Empty active list should remove all
2410 let removed = cache.clean_unused(&[]).await.unwrap();
2411
2412 assert_eq!(removed, 2);
2413 assert!(!temp_dir.path().join("source1").exists());
2414 assert!(!temp_dir.path().join("source2").exists());
2415 }
2416
2417 #[tokio::test]
2418 async fn test_copy_resource_with_relative_paths() {
2419 let temp_dir = TempDir::new().unwrap();
2420 let cache = Cache::with_dir(temp_dir.path().join("cache")).unwrap();
2421
2422 // Create source with subdirectories
2423 let source_dir = temp_dir.path().join("source");
2424 let sub_dir = source_dir.join("agents");
2425 std::fs::create_dir_all(&sub_dir).unwrap();
2426 std::fs::write(sub_dir.join("helper.md"), "# Helper Agent").unwrap();
2427
2428 // Copy using relative path
2429 let dest = temp_dir.path().join("my-agent.md");
2430 cache.copy_resource(&source_dir, "agents/helper.md", &dest).await.unwrap();
2431
2432 assert!(dest.exists());
2433 assert_eq!(std::fs::read_to_string(&dest).unwrap(), "# Helper Agent");
2434 }
2435
2436 #[tokio::test]
2437 async fn test_cache_size_with_subdirectories() {
2438 let temp_dir = TempDir::new().unwrap();
2439 let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
2440
2441 cache.ensure_cache_dir().await.unwrap();
2442
2443 // Create nested structure with files
2444 let sub1 = temp_dir.path().join("sub1");
2445 let sub2 = sub1.join("sub2");
2446 std::fs::create_dir_all(&sub2).unwrap();
2447
2448 std::fs::write(temp_dir.path().join("file1.txt"), "12345").unwrap(); // 5 bytes
2449 std::fs::write(sub1.join("file2.txt"), "1234567890").unwrap(); // 10 bytes
2450 std::fs::write(sub2.join("file3.txt"), "abc").unwrap(); // 3 bytes
2451
2452 let size = cache.get_cache_size().await.unwrap();
2453 assert_eq!(size, 18); // 5 + 10 + 3
2454 }
2455}