agpm_cli/resolver/
version_resolver.rs

1//! Centralized version resolution module for AGPM
2//!
3//! This module implements the core version-to-SHA resolution strategy that ensures
4//! deterministic and efficient dependency management. By resolving all version
5//! specifications to commit SHAs upfront, we enable:
6//!
7//! - **SHA-based worktree caching**: Reuse worktrees for identical commits
8//! - **Reduced network operations**: Single fetch per repository
9//! - **Deterministic installations**: Same SHA always produces same result
10//! - **Efficient deduplication**: Multiple refs to same commit share one worktree
11//!
12//! # Architecture
13//!
14//! The `VersionResolver` operates in two phases:
15//! 1. **Collection Phase**: Gather all unique (source, version) pairs
16//! 2. **Resolution Phase**: Batch resolve all versions to SHAs
17//!
18//! This design minimizes Git operations and enables parallel resolution.
19
20use anyhow::{Context, Result};
21use std::collections::HashMap;
22use std::path::PathBuf;
23
24use crate::cache::Cache;
25use crate::git::GitRepo;
26
27/// Version resolution entry tracking source and version to SHA mapping
28#[derive(Debug, Clone)]
29pub struct VersionEntry {
30    /// Source name from manifest
31    pub source: String,
32    /// Source URL (Git repository)
33    pub url: String,
34    /// Version specification (tag, branch, commit, or None for HEAD)
35    pub version: Option<String>,
36    /// Resolved SHA-1 hash (populated during resolution)
37    pub resolved_sha: Option<String>,
38    /// Resolved version (e.g., "latest" -> "v2.0.0")
39    pub resolved_version: Option<String>,
40}
41
42/// Centralized version resolver for efficient SHA resolution
43///
44/// The `VersionResolver` is responsible for resolving all dependency versions
45/// to their corresponding Git commit SHAs before any worktree operations.
46/// This ensures maximum efficiency and deduplication.
47///
48/// # Example
49///
50/// ```no_run
51/// # use agpm_cli::resolver::version_resolver::{VersionResolver, VersionEntry};
52/// # use agpm_cli::cache::Cache;
53/// # async fn example() -> anyhow::Result<()> {
54/// let cache = Cache::new()?;
55/// let mut resolver = VersionResolver::new(cache);
56///
57/// // Add versions to resolve
58/// resolver.add_version("community", "https://github.com/example/repo.git", Some("v1.0.0"));
59/// resolver.add_version("community", "https://github.com/example/repo.git", Some("main"));
60///
61/// // Batch resolve all versions to SHAs
62/// resolver.resolve_all().await?;
63///
64/// // Get resolved SHA for a specific version
65/// let sha = resolver.get_resolved_sha("community", "v1.0.0");
66/// # Ok(())
67/// # }
68/// ```
69/// Resolved version information
70#[derive(Debug, Clone)]
71pub struct ResolvedVersion {
72    /// The resolved SHA-1 hash
73    pub sha: String,
74    /// The resolved version (e.g., "latest" -> "v2.0.0")
75    /// If no constraint resolution happened, this will be the same as input
76    pub resolved_ref: String,
77}
78
79/// Centralized version resolver for batch SHA resolution.
80///
81/// The `VersionResolver` manages the collection and resolution of all dependency
82/// versions in a single batch operation, enabling optimal Git repository access
83/// patterns and maximum worktree reuse.
84pub struct VersionResolver {
85    /// Cache instance for repository access
86    cache: Cache,
87    /// Collection of versions to resolve, keyed by (source, version)
88    entries: HashMap<(String, String), VersionEntry>,
89    /// Resolved SHA cache, keyed by (source, version)
90    resolved: HashMap<(String, String), ResolvedVersion>,
91    /// Bare repository paths, keyed by source name
92    bare_repos: HashMap<String, PathBuf>,
93}
94
95impl VersionResolver {
96    /// Creates a new version resolver with the given cache
97    pub fn new(cache: Cache) -> Self {
98        Self {
99            cache,
100            entries: HashMap::new(),
101            resolved: HashMap::new(),
102            bare_repos: HashMap::new(),
103        }
104    }
105
106    /// Adds a version to be resolved
107    ///
108    /// Multiple calls with the same (source, version) pair will be deduplicated.
109    ///
110    /// # Arguments
111    ///
112    /// * `source` - Source name from manifest
113    /// * `url` - Git repository URL
114    /// * `version` - Version specification (tag, branch, commit, or None for HEAD)
115    pub fn add_version(&mut self, source: &str, url: &str, version: Option<&str>) {
116        let version_key = version.unwrap_or("HEAD").to_string();
117        let key = (source.to_string(), version_key);
118
119        // Only add if not already present (deduplication)
120        self.entries.entry(key).or_insert_with(|| VersionEntry {
121            source: source.to_string(),
122            url: url.to_string(),
123            version: version.map(std::string::ToString::to_string),
124            resolved_sha: None,
125            resolved_version: None,
126        });
127    }
128
129    /// Resolves all collected versions to their commit SHAs using cached repositories.
130    ///
131    /// This method implements the second phase of AGPM's two-phase resolution architecture.
132    /// It processes all version entries collected via `add_version()` calls and resolves
133    /// them to concrete commit SHAs using locally cached Git repositories.
134    ///
135    /// # Prerequisites
136    ///
137    /// **CRITICAL**: `pre_sync_sources()` must be called before this method. The resolver
138    /// requires all repositories to be pre-synced to the cache, and will return an error
139    /// if any required repository is missing from the `bare_repos` map.
140    ///
141    /// # Resolution Process
142    ///
143    /// The method performs the following steps:
144    /// 1. **Source Grouping**: Groups entries by source to minimize repository operations
145    /// 2. **Repository Access**: Uses pre-synced repositories from `pre_sync_sources()`
146    /// 3. **Version Constraint Resolution**: Handles semver constraints (`^1.0`, `~2.1`)
147    /// 4. **SHA Resolution**: Resolves all versions to SHAs using `git rev-parse`
148    /// 5. **Result Caching**: Stores resolved SHAs for quick retrieval
149    ///
150    /// # Version Resolution Strategy
151    ///
152    /// The resolver handles different version types:
153    /// - **Exact SHAs**: Used directly without resolution
154    /// - **Semantic Versions**: Resolved using semver constraint matching
155    /// - **Tags**: Resolved to their commit SHAs
156    /// - **Branch Names**: Resolved to current HEAD commit
157    /// - **Latest/None**: Defaults to the repository's default branch
158    ///
159    /// # Performance Characteristics
160    ///
161    /// - **Time Complexity**: O(n·log(t)) where n = entries, t = tags per repo
162    /// - **Space Complexity**: O(n) for storing resolved results
163    /// - **Network I/O**: Zero (operates on cached repositories only)
164    /// - **Parallelization**: Single-threaded but optimized for batch operations
165    ///
166    /// # Example
167    ///
168    /// ```ignore
169    /// # use agpm_cli::resolver::version_resolver::VersionResolver;
170    /// # use agpm_cli::cache::Cache;
171    /// # async fn example() -> anyhow::Result<()> {
172    /// let cache = Cache::new()?;
173    /// let mut resolver = VersionResolver::new(cache);
174    ///
175    /// // Add various version types
176    /// resolver.add_version("source", "https://github.com/org/repo.git", Some("v1.2.3"));
177    /// resolver.add_version("source", "https://github.com/org/repo.git", Some("^1.0"));
178    /// resolver.add_version("source", "https://github.com/org/repo.git", Some("main"));
179    /// resolver.add_version("source", "https://github.com/org/repo.git", None); // latest
180    ///
181    /// // Phase 1: Sync repositories
182    /// resolver.pre_sync_sources().await?;
183    ///
184    /// // Phase 2: Resolve versions to SHAs (this method)
185    /// resolver.resolve_all().await?;
186    ///
187    /// // Access resolved SHAs
188    /// if resolver.is_resolved("source", "v1.2.3") {
189    ///     println!("v1.2.3 resolved successfully");
190    /// }
191    /// # Ok(())
192    /// # }
193    /// ```
194    ///
195    /// # Error Handling
196    ///
197    /// The method uses fail-fast behavior - if any version resolution fails,
198    /// the entire operation is aborted. This ensures consistency and prevents
199    /// partial resolution states.
200    ///
201    /// # Errors
202    ///
203    /// Returns an error if:
204    /// - **Pre-sync Required**: Repository was not pre-synced (call `pre_sync_sources()` first)
205    /// - **Version Not Found**: Specified version/tag/branch doesn't exist in repository
206    /// - **Constraint Resolution**: Semver constraint cannot be satisfied by available tags
207    /// - **Git Operations**: `git rev-parse` or other Git commands fail
208    /// - **Repository Access**: Cached repository is corrupted or inaccessible
209    pub async fn resolve_all(&mut self) -> Result<()> {
210        // Group entries by source for efficient processing
211        let mut by_source: HashMap<String, Vec<(String, VersionEntry)>> = HashMap::new();
212
213        for (key, entry) in &self.entries {
214            by_source.entry(entry.source.clone()).or_default().push((key.1.clone(), entry.clone()));
215        }
216
217        // Process each source
218        for (source, versions) in by_source {
219            // Repository must have been pre-synced
220            let repo_path = self
221                .bare_repos
222                .get(&source)
223                .ok_or_else(|| {
224                    anyhow::anyhow!("Repository for source '{source}' was not pre-synced. Call pre_sync_sources() first.")
225                })?
226                .clone();
227
228            let repo = GitRepo::new(&repo_path);
229
230            // Resolve each version for this source
231            for (version_str, mut entry) in versions {
232                // Check if this is a local directory source (not a Git repository)
233                let is_local = crate::utils::is_local_path(&entry.url);
234
235                // For local directory sources, we don't resolve versions - just use "local"
236                let resolved_ref = if is_local {
237                    "local".to_string()
238                } else if let Some(ref version) = entry.version {
239                    // First check if this is a version constraint
240                    if crate::resolver::version_resolution::is_version_constraint(version) {
241                        // Resolve constraint to actual tag first
242                        // Note: get_or_clone_source already fetched, so tags should be available
243                        let tags = repo.list_tags().await.unwrap_or_default();
244
245                        if tags.is_empty() {
246                            return Err(anyhow::anyhow!(
247                                "No tags found in repository for constraint '{version}'"
248                            ));
249                        }
250
251                        // Find best matching tag
252                        crate::resolver::version_resolution::find_best_matching_tag(version, tags)
253                            .with_context(|| format!("Failed to resolve version constraint '{version}' for source '{source}'"))?
254                    } else {
255                        // Not a constraint, use as-is
256                        version.clone()
257                    }
258                } else {
259                    // No version specified for Git source, resolve HEAD to actual branch name
260                    repo.get_default_branch().await.unwrap_or_else(|_| "main".to_string())
261                };
262
263                // For local sources, don't resolve SHA. For Git sources, resolve ref to actual SHA
264                let sha = if is_local {
265                    // Local directories don't have commit SHAs
266                    None
267                } else {
268                    // Resolve the actual ref to SHA for Git repositories
269                    Some(repo.resolve_to_sha(Some(&resolved_ref)).await.with_context(|| {
270                        format!("Failed to resolve version '{version_str}' for source '{source}'")
271                    })?)
272                };
273
274                // Store the resolved SHA and version
275                entry.resolved_sha = sha.clone();
276                entry.resolved_version = Some(resolved_ref.clone());
277                let key = (source.clone(), version_str);
278                // Only insert into resolved map if we have a SHA (Git sources only)
279                if let Some(sha_value) = sha {
280                    self.resolved.insert(
281                        key,
282                        ResolvedVersion {
283                            sha: sha_value,
284                            resolved_ref,
285                        },
286                    );
287                }
288            }
289        }
290
291        Ok(())
292    }
293
294    /// Resolves a single version to SHA without affecting the batch
295    ///
296    /// This is useful for incremental resolution or testing.
297    pub async fn resolve_single(
298        &mut self,
299        source: &str,
300        url: &str,
301        version: Option<&str>,
302    ) -> Result<String> {
303        // Get or clone the repository
304        let repo_path = self
305            .cache
306            .get_or_clone_source(source, url, None)
307            .await
308            .with_context(|| format!("Failed to prepare repository for source '{source}'"))?;
309
310        let repo = GitRepo::new(&repo_path);
311
312        // Resolve the version to SHA
313        let sha = repo.resolve_to_sha(version).await.with_context(|| {
314            format!(
315                "Failed to resolve version '{}' for source '{}'",
316                version.unwrap_or("HEAD"),
317                source
318            )
319        })?;
320
321        // Determine the resolved reference name
322        let resolved_ref = if let Some(v) = version {
323            v.to_string()
324        } else {
325            // When no version is specified, resolve HEAD to the actual branch name
326            repo.get_default_branch().await.unwrap_or_else(|_| "main".to_string())
327        };
328
329        // Cache the result
330        let version_key = version.unwrap_or("HEAD").to_string();
331        let key = (source.to_string(), version_key);
332        self.resolved.insert(
333            key,
334            ResolvedVersion {
335                sha: sha.clone(),
336                resolved_ref,
337            },
338        );
339
340        Ok(sha)
341    }
342
343    /// Gets the resolved SHA for a given source and version
344    ///
345    /// Returns None if the version hasn't been resolved yet.
346    ///
347    /// # Arguments
348    ///
349    /// * `source` - Source name
350    /// * `version` - Version specification (use "HEAD" for None)
351    pub fn get_resolved_sha(&self, source: &str, version: &str) -> Option<String> {
352        let key = (source.to_string(), version.to_string());
353        self.resolved.get(&key).map(|rv| rv.sha.clone())
354    }
355
356    /// Gets all resolved SHAs as a `HashMap`
357    ///
358    /// Useful for bulk operations or debugging.
359    pub fn get_all_resolved(&self) -> HashMap<(String, String), String> {
360        self.resolved.iter().map(|(k, v)| (k.clone(), v.sha.clone())).collect()
361    }
362
363    /// Gets all resolved versions with both SHA and resolved reference
364    ///
365    /// Returns a `HashMap` with (source, version) -> `ResolvedVersion`
366    pub const fn get_all_resolved_full(&self) -> &HashMap<(String, String), ResolvedVersion> {
367        &self.resolved
368    }
369
370    /// Checks if a specific version has been resolved
371    pub fn is_resolved(&self, source: &str, version: &str) -> bool {
372        let key = (source.to_string(), version.to_string());
373        self.resolved.contains_key(&key)
374    }
375
376    /// Pre-syncs all unique sources to ensure repositories are cloned/fetched.
377    ///
378    /// This method implements the first phase of AGPM's two-phase resolution architecture.
379    /// It is designed to be called during the "Syncing sources" phase to perform all
380    /// Git network operations upfront, before version resolution occurs.
381    ///
382    /// The method processes all entries in the resolver, groups them by unique source URLs,
383    /// and ensures each repository is cloned to the cache with the latest refs fetched.
384    /// This enables the subsequent `resolve_all()` method to work purely with local
385    /// cached data, providing better performance and progress reporting.
386    ///
387    /// # Post-Execution State
388    ///
389    /// After this method completes successfully:
390    /// - All required repositories will be cloned to `~/.agpm/cache/sources/`
391    /// - All repositories will have their latest refs fetched from remote
392    /// - The internal `bare_repos` map will be populated with repository paths
393    /// - `resolve_all()` can proceed without any network operations
394    ///
395    /// This separation provides several benefits:
396    /// - **Clear progress phases**: Network operations vs. local resolution
397    /// - **Better error handling**: Network failures separated from resolution logic
398    /// - **Batch optimization**: Single clone/fetch per unique repository
399    /// - **Parallelization potential**: Multiple repositories can be synced concurrently
400    ///
401    /// # Example
402    ///
403    /// ```ignore
404    /// use agpm_cli::resolver::version_resolver::VersionResolver;
405    /// use agpm_cli::cache::Cache;
406    ///
407    /// # async fn example() -> anyhow::Result<()> {
408    /// let cache = Cache::new()?;
409    /// let mut version_resolver = VersionResolver::new(cache);
410    ///
411    /// // Add versions to resolve across multiple sources
412    /// version_resolver.add_version(
413    ///     "community",
414    ///     "https://github.com/org/agpm-community.git",
415    ///     Some("v1.0.0"),
416    /// );
417    /// version_resolver.add_version(
418    ///     "community",
419    ///     "https://github.com/org/agpm-community.git",
420    ///     Some("v2.0.0"),
421    /// );
422    /// version_resolver.add_version(
423    ///     "private-tools",
424    ///     "https://github.com/company/private-agpm.git",
425    ///     Some("main"),
426    /// );
427    ///
428    /// // Phase 1: Pre-sync all repositories (network operations)
429    /// version_resolver.pre_sync_sources().await?;
430    ///
431    /// // Phase 2: Resolve all versions to SHAs (local operations only)
432    /// version_resolver.resolve_all().await?;
433    ///
434    /// // Access resolved data
435    /// if version_resolver.is_resolved("community", "v1.0.0") {
436    ///     println!("Successfully resolved community v1.0.0");
437    /// }
438    /// # Ok(())
439    /// # }
440    /// ```
441    ///
442    /// # Deduplication
443    ///
444    /// The method automatically deduplicates by source URL - if multiple entries
445    /// reference the same repository, only one clone/fetch operation is performed.
446    /// This is particularly efficient when resolving multiple versions from the
447    /// same source.
448    ///
449    /// # Errors
450    ///
451    /// Returns an error if:
452    /// - Repository cloning fails (network issues, authentication, invalid URL)
453    /// - Fetching latest refs fails (network connectivity, permission issues)
454    /// - Authentication fails for private repositories
455    /// - Disk space is insufficient for cloning repositories
456    /// - Repository is corrupted and cannot be accessed
457    pub async fn pre_sync_sources(&mut self) -> Result<()> {
458        // Group entries by source to get unique sources
459        let mut unique_sources: HashMap<String, String> = HashMap::new();
460
461        for entry in self.entries.values() {
462            unique_sources.insert(entry.source.clone(), entry.url.clone());
463        }
464
465        // Pre-sync each unique source
466        for (source, url) in unique_sources {
467            // Clone or update the repository (this does the actual Git operations)
468            let repo_path = self
469                .cache
470                .get_or_clone_source(&source, &url, None)
471                .await
472                .with_context(|| format!("Failed to sync repository for source '{source}'"))?;
473
474            // Store bare repo path for later use in resolve_all
475            self.bare_repos.insert(source.clone(), repo_path);
476        }
477
478        Ok(())
479    }
480
481    /// Gets the bare repository path for a source
482    ///
483    /// Returns None if the source hasn't been processed yet.
484    pub fn get_bare_repo_path(&self, source: &str) -> Option<&PathBuf> {
485        self.bare_repos.get(source)
486    }
487
488    /// Clears all resolved versions and cached data
489    ///
490    /// Useful for testing or when starting a fresh resolution.
491    pub fn clear(&mut self) {
492        self.entries.clear();
493        self.resolved.clear();
494        self.bare_repos.clear();
495    }
496
497    /// Returns the number of unique versions to resolve
498    pub fn pending_count(&self) -> usize {
499        self.entries.len()
500    }
501
502    /// Checks if the resolver has any entries to resolve.
503    ///
504    /// This is a convenience method to determine if the resolver has been populated
505    /// with version entries via `add_version()` calls. It's useful for conditional
506    /// logic to avoid unnecessary operations when no versions need resolution.
507    ///
508    /// # Returns
509    ///
510    /// Returns `true` if there are entries that need resolution, `false` if the
511    /// resolver is empty.
512    ///
513    /// # Example
514    ///
515    /// ```
516    /// # use agpm_cli::resolver::version_resolver::VersionResolver;
517    /// # use agpm_cli::cache::Cache;
518    /// # let cache = Cache::new().unwrap();
519    /// let mut resolver = VersionResolver::new(cache);
520    /// assert!(!resolver.has_entries()); // Initially empty
521    ///
522    /// resolver.add_version("source", "https://github.com/org/repo.git", Some("v1.0.0"));
523    /// assert!(resolver.has_entries()); // Now has entries
524    /// ```
525    pub fn has_entries(&self) -> bool {
526        !self.entries.is_empty()
527    }
528
529    /// Returns the number of successfully resolved versions
530    pub fn resolved_count(&self) -> usize {
531        self.resolved.len()
532    }
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538    use tempfile::TempDir;
539
540    #[tokio::test]
541    async fn test_version_resolver_deduplication() {
542        let temp_dir = TempDir::new().unwrap();
543        let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
544        let mut resolver = VersionResolver::new(cache);
545
546        // Add same version multiple times
547        resolver.add_version("source1", "https://example.com/repo.git", Some("v1.0.0"));
548        resolver.add_version("source1", "https://example.com/repo.git", Some("v1.0.0"));
549        resolver.add_version("source1", "https://example.com/repo.git", Some("v1.0.0"));
550
551        // Should only have one entry
552        assert_eq!(resolver.pending_count(), 1);
553    }
554
555    #[tokio::test]
556    async fn test_sha_optimization() {
557        let temp_dir = TempDir::new().unwrap();
558        let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
559        let _resolver = VersionResolver::new(cache);
560
561        // Test that full SHA is recognized
562        let full_sha = "a".repeat(40);
563        assert_eq!(full_sha.len(), 40);
564        assert!(full_sha.chars().all(|c| c.is_ascii_hexdigit()));
565    }
566
567    #[tokio::test]
568    async fn test_resolved_retrieval() {
569        let temp_dir = TempDir::new().unwrap();
570        let cache = Cache::with_dir(temp_dir.path().to_path_buf()).unwrap();
571        let mut resolver = VersionResolver::new(cache);
572
573        // Manually insert a resolved SHA for testing
574        let key = ("test_source".to_string(), "v1.0.0".to_string());
575        let sha = "1234567890abcdef1234567890abcdef12345678";
576        resolver.resolved.insert(
577            key,
578            ResolvedVersion {
579                sha: sha.to_string(),
580                resolved_ref: "v1.0.0".to_string(),
581            },
582        );
583
584        // Verify retrieval
585        assert!(resolver.is_resolved("test_source", "v1.0.0"));
586        assert_eq!(resolver.get_resolved_sha("test_source", "v1.0.0"), Some(sha.to_string()));
587        assert!(!resolver.is_resolved("test_source", "v2.0.0"));
588    }
589}