Skip to main content

loom_core/registry/
mod.rs

1pub mod url;
2
3use std::collections::HashSet;
4use std::path::{Path, PathBuf};
5
6pub use url::{CanonicalUrl, normalize_url};
7
8/// A discovered git repository in the registry.
9#[derive(Debug, Clone)]
10pub struct RepoEntry {
11    /// Short display name (basename, or `org/repo` if ambiguous)
12    pub name: String,
13    /// Organization/owner directory name (may be empty for flat layouts,
14    /// or contain path separators for deep layouts like "github.com/org")
15    pub org: String,
16    /// Absolute path to the repository root
17    pub path: PathBuf,
18    /// Remote URL for origin (if available)
19    pub remote_url: Option<String>,
20}
21
22impl RepoEntry {
23    /// Display name: bare name for flat layout, org/name for grouped layout.
24    pub fn display_name(&self) -> String {
25        if self.org.is_empty() {
26            self.name.clone()
27        } else {
28            format!("{}/{}", self.org, self.name)
29        }
30    }
31
32    /// Check if this entry matches a query string (bare name or org-qualified name).
33    pub fn matches_name(&self, query: &str) -> bool {
34        self.name == query || self.display_name() == query
35    }
36}
37
38/// Discover git repositories under the given scan roots.
39///
40/// Recursively scans up to `scan_depth` levels deep. At any level, if a directory
41/// contains `.git`, it's treated as a repo and not recursed into.
42///
43/// - depth 1: flat (`root/repo`)
44/// - depth 2: org-grouped (`root/org/repo`, default)
45/// - depth 3: `root/host/org/repo`
46/// - depth 4: max
47///
48/// Deduplicates across overlapping scan_roots using canonical paths.
49/// Excludes directories under `workspace_root` (avoids scanning worktrees).
50pub fn discover_repos(
51    scan_roots: &[PathBuf],
52    workspace_root: Option<&Path>,
53    scan_depth: u8,
54) -> Vec<RepoEntry> {
55    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
56    let mut entries = Vec::new();
57
58    // Canonicalize workspace root for exclusion comparison
59    let ws_canonical = workspace_root.and_then(|p| std::fs::canonicalize(p).ok());
60
61    for scan_root in scan_roots {
62        let root = match std::fs::canonicalize(scan_root) {
63            Ok(p) => p,
64            Err(_) => continue, // Skip non-existent roots
65        };
66
67        scan_recursive(
68            &root,
69            &root,
70            scan_depth,
71            0,
72            &ws_canonical,
73            &mut seen_paths,
74            &mut entries,
75        );
76    }
77
78    // Handle name collisions: disambiguate repos with the same basename
79    disambiguate_names(&mut entries);
80
81    // Sort by (org, name) for consistent ordering
82    entries.sort_by(|a, b| (&a.org, &a.name).cmp(&(&b.org, &b.name)));
83
84    entries
85}
86
87fn scan_recursive(
88    root: &Path,
89    current: &Path,
90    max_depth: u8,
91    current_depth: u8,
92    ws_canonical: &Option<PathBuf>,
93    seen_paths: &mut HashSet<PathBuf>,
94    entries: &mut Vec<RepoEntry>,
95) {
96    let dir_entries = match std::fs::read_dir(current) {
97        Ok(entries) => entries,
98        Err(_) => return,
99    };
100
101    for entry in dir_entries.flatten() {
102        let path = entry.path();
103        if !path.is_dir() {
104            continue;
105        }
106
107        let name = entry.file_name().to_string_lossy().to_string();
108
109        // Skip hidden directories
110        if name.starts_with('.') {
111            continue;
112        }
113
114        // Skip workspace root to avoid scanning worktrees as repos
115        if let Some(ws) = ws_canonical
116            && let Ok(canonical) = std::fs::canonicalize(&path)
117            && (canonical.starts_with(ws) || ws.starts_with(&canonical))
118        {
119            continue;
120        }
121
122        // Check if this is a git repo
123        if path.join(".git").exists() {
124            // Found a repo — compute org from path relative to root
125            let relative = path.strip_prefix(root).unwrap_or(&path);
126            let org = relative
127                .parent()
128                .map(|p| p.to_string_lossy().to_string())
129                .unwrap_or_default();
130
131            // Deduplicate by canonical path
132            let canonical = match std::fs::canonicalize(&path) {
133                Ok(p) => p,
134                Err(_) => continue,
135            };
136            if !seen_paths.insert(canonical) {
137                continue;
138            }
139
140            // Get remote URL (best effort)
141            let remote_url = crate::git::GitRepo::new(&path).remote_url().ok().flatten();
142
143            entries.push(RepoEntry {
144                name,
145                org,
146                path,
147                remote_url,
148            });
149        } else if current_depth + 1 < max_depth {
150            // Not a repo — recurse deeper if within depth limit
151            scan_recursive(
152                root,
153                &path,
154                max_depth,
155                current_depth + 1,
156                ws_canonical,
157                seen_paths,
158                entries,
159            );
160        }
161    }
162}
163
164/// Find a local repo matching a remote URL via canonical URL comparison.
165pub fn match_by_url<'a>(repos: &'a [RepoEntry], url: &str) -> Option<&'a RepoEntry> {
166    let target = normalize_url(url)?;
167    repos
168        .iter()
169        .find(|r| r.remote_url.as_deref().and_then(normalize_url).as_ref() == Some(&target))
170}
171
172/// Disambiguate repos with the same basename by prefixing with `org/`.
173fn disambiguate_names(entries: &mut [RepoEntry]) {
174    // Count occurrences of each name
175    let mut name_counts: std::collections::HashMap<String, usize> =
176        std::collections::HashMap::new();
177    for entry in entries.iter() {
178        *name_counts.entry(entry.name.clone()).or_insert(0) += 1;
179    }
180
181    // Disambiguate duplicates
182    for entry in entries.iter_mut() {
183        if name_counts.get(&entry.name).copied().unwrap_or(0) > 1 {
184            entry.name = format!("{}/{}", entry.org, entry.name);
185        }
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_discover_repos_basic() {
195        let dir = tempfile::tempdir().unwrap();
196        let root = dir.path();
197
198        // Create org/repo structure with git init
199        let repo_path = root.join("myorg").join("myrepo");
200        std::fs::create_dir_all(&repo_path).unwrap();
201        std::process::Command::new("git")
202            .args(["init", &repo_path.to_string_lossy()])
203            .env("LC_ALL", "C")
204            .output()
205            .unwrap();
206
207        let entries = discover_repos(&[root.to_path_buf()], None, 2);
208        assert_eq!(entries.len(), 1);
209        assert_eq!(entries[0].name, "myrepo");
210        assert_eq!(entries[0].org, "myorg");
211    }
212
213    #[test]
214    fn test_discover_repos_skips_non_git() {
215        let dir = tempfile::tempdir().unwrap();
216        let root = dir.path();
217
218        // Create org/repo WITHOUT git init
219        let repo_path = root.join("myorg").join("not-a-repo");
220        std::fs::create_dir_all(&repo_path).unwrap();
221
222        let entries = discover_repos(&[root.to_path_buf()], None, 2);
223        assert_eq!(entries.len(), 0);
224    }
225
226    #[test]
227    fn test_discover_repos_skips_hidden() {
228        let dir = tempfile::tempdir().unwrap();
229        let root = dir.path();
230
231        // Hidden org dir
232        let hidden = root.join(".hidden").join("repo");
233        std::fs::create_dir_all(&hidden).unwrap();
234        std::process::Command::new("git")
235            .args(["init", &hidden.to_string_lossy()])
236            .env("LC_ALL", "C")
237            .output()
238            .unwrap();
239
240        let entries = discover_repos(&[root.to_path_buf()], None, 2);
241        assert_eq!(entries.len(), 0);
242    }
243
244    #[test]
245    fn test_discover_repos_deduplicates() {
246        let dir = tempfile::tempdir().unwrap();
247        let root = dir.path();
248
249        // Create org/repo
250        let repo_path = root.join("myorg").join("myrepo");
251        std::fs::create_dir_all(&repo_path).unwrap();
252        std::process::Command::new("git")
253            .args(["init", &repo_path.to_string_lossy()])
254            .env("LC_ALL", "C")
255            .output()
256            .unwrap();
257
258        // Scan the same root twice — should still find only 1
259        let entries = discover_repos(&[root.to_path_buf(), root.to_path_buf()], None, 2);
260        assert_eq!(entries.len(), 1);
261    }
262
263    #[test]
264    fn test_discover_repos_excludes_workspace_root() {
265        let dir = tempfile::tempdir().unwrap();
266        let root = dir.path();
267
268        // Create org/repo that is inside workspace root
269        let ws_root = root.join("loom-workspaces");
270        let repo_path = ws_root.join("myorg").join("myrepo");
271        std::fs::create_dir_all(&repo_path).unwrap();
272        std::process::Command::new("git")
273            .args(["init", &repo_path.to_string_lossy()])
274            .env("LC_ALL", "C")
275            .output()
276            .unwrap();
277
278        // Should exclude repos under workspace root
279        let entries = discover_repos(&[root.to_path_buf()], Some(&ws_root), 2);
280        assert_eq!(entries.len(), 0);
281    }
282
283    #[test]
284    fn test_disambiguate_names() {
285        let dir = tempfile::tempdir().unwrap();
286        let root = dir.path();
287
288        // Two repos with same basename under different orgs
289        for org in &["org-a", "org-b"] {
290            let repo_path = root.join(org).join("shared-name");
291            std::fs::create_dir_all(&repo_path).unwrap();
292            std::process::Command::new("git")
293                .args(["init", &repo_path.to_string_lossy()])
294                .env("LC_ALL", "C")
295                .output()
296                .unwrap();
297        }
298
299        let entries = discover_repos(&[root.to_path_buf()], None, 2);
300        assert_eq!(entries.len(), 2);
301
302        // Both should be disambiguated with org prefix
303        let names: Vec<&str> = entries.iter().map(|e| e.name.as_str()).collect();
304        assert!(names.contains(&"org-a/shared-name"));
305        assert!(names.contains(&"org-b/shared-name"));
306    }
307
308    #[test]
309    fn test_match_by_url_ssh_to_https() {
310        let entries = vec![RepoEntry {
311            name: "repo".to_string(),
312            org: "org".to_string(),
313            path: PathBuf::from("/code/org/repo"),
314            remote_url: Some("git@github.com:org/repo.git".to_string()),
315        }];
316
317        let found = match_by_url(&entries, "https://github.com/org/repo");
318        assert!(found.is_some());
319        assert_eq!(found.unwrap().name, "repo");
320    }
321
322    #[test]
323    fn test_match_by_url_no_match() {
324        let entries = vec![RepoEntry {
325            name: "repo".to_string(),
326            org: "org".to_string(),
327            path: PathBuf::from("/code/org/repo"),
328            remote_url: Some("git@github.com:org/repo.git".to_string()),
329        }];
330
331        let found = match_by_url(&entries, "https://github.com/other-org/other-repo");
332        assert!(found.is_none());
333    }
334}