Skip to main content

git_same/
discovery.rs

1//! Discovery orchestration module.
2//!
3//! This module coordinates repository discovery across providers
4//! and manages action planning for clone/sync operations.
5
6use crate::config::FilterOptions;
7use crate::domain::RepoPathTemplate;
8use crate::git::GitOperations;
9use crate::operations::sync::LocalRepo;
10use crate::provider::{DiscoveryOptions, DiscoveryProgress, Provider};
11use crate::types::{ActionPlan, OwnedRepo};
12use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14
15/// Mutable context for directory scanning (keeps `scan_dir` under Clippy’s argument limit).
16struct ScanDirContext<'a, G: GitOperations> {
17    base_path: &'a Path,
18    git: &'a G,
19    repos: &'a mut Vec<(PathBuf, String, String)>,
20    visited_dirs: &'a mut HashSet<PathBuf>,
21    seen_repos: &'a mut HashSet<PathBuf>,
22    max_depth: usize,
23}
24
25/// Orchestrates repository discovery.
26pub struct DiscoveryOrchestrator {
27    /// Filter options
28    pub(crate) filters: FilterOptions,
29    /// Directory structure template
30    pub(crate) structure: String,
31}
32
33impl DiscoveryOrchestrator {
34    /// Creates a new discovery orchestrator.
35    pub fn new(filters: FilterOptions, structure: String) -> Self {
36        Self { filters, structure }
37    }
38
39    /// Converts filter options to discovery options.
40    pub fn to_discovery_options(&self) -> DiscoveryOptions {
41        DiscoveryOptions::new()
42            .with_archived(self.filters.include_archived)
43            .with_forks(self.filters.include_forks)
44            .with_orgs(self.filters.orgs.clone())
45            .with_exclusions(self.filters.exclude_repos.clone())
46    }
47
48    /// Discovers repositories from a provider.
49    pub async fn discover(
50        &self,
51        provider: &dyn Provider,
52        progress: &dyn DiscoveryProgress,
53    ) -> Result<Vec<OwnedRepo>, crate::errors::ProviderError> {
54        let options = self.to_discovery_options();
55        provider.discover_repos(&options, progress).await
56    }
57
58    /// Computes the local path for a repository.
59    pub fn compute_path(&self, base_path: &Path, repo: &OwnedRepo, provider: &str) -> PathBuf {
60        RepoPathTemplate::new(self.structure.clone()).render_owned_repo(base_path, repo, provider)
61    }
62
63    /// Creates an action plan by comparing discovered repos with local filesystem.
64    pub fn plan_clone<G: GitOperations>(
65        &self,
66        base_path: &Path,
67        repos: Vec<OwnedRepo>,
68        provider: &str,
69        git: &G,
70    ) -> ActionPlan {
71        let mut plan = ActionPlan::new();
72
73        for repo in repos {
74            let local_path = self.compute_path(base_path, &repo, provider);
75
76            if local_path.exists() {
77                if git.is_repo(&local_path) {
78                    // Existing repo - add to sync
79                    plan.add_sync(repo);
80                } else {
81                    // Directory exists but not a repo
82                    plan.add_skipped(repo, "directory exists but is not a git repository");
83                }
84            } else {
85                // New repo - add to clone
86                plan.add_clone(repo);
87            }
88        }
89
90        plan
91    }
92
93    /// Creates a sync plan for existing local repositories.
94    pub fn plan_sync<G: GitOperations>(
95        &self,
96        base_path: &Path,
97        repos: Vec<OwnedRepo>,
98        provider: &str,
99        git: &G,
100        skip_uncommitted: bool,
101    ) -> (Vec<LocalRepo>, Vec<(OwnedRepo, String)>) {
102        let mut to_sync = Vec::new();
103        let mut skipped = Vec::new();
104
105        for repo in repos {
106            let local_path = self.compute_path(base_path, &repo, provider);
107
108            if !local_path.exists() {
109                skipped.push((repo, "not cloned locally".to_string()));
110                continue;
111            }
112
113            if !git.is_repo(&local_path) {
114                skipped.push((repo, "not a git repository".to_string()));
115                continue;
116            }
117
118            if skip_uncommitted {
119                match git.status(&local_path) {
120                    Ok(status) => {
121                        if status.is_uncommitted || status.has_untracked {
122                            skipped.push((repo, "uncommitted changes".to_string()));
123                            continue;
124                        }
125                    }
126                    Err(err) => {
127                        skipped.push((repo, format!("failed to get status: {}", err)));
128                        continue;
129                    }
130                }
131            }
132
133            to_sync.push(LocalRepo::new(repo, local_path));
134        }
135
136        (to_sync, skipped)
137    }
138
139    /// Scans local filesystem for cloned repositories.
140    pub fn scan_local<G: GitOperations>(
141        &self,
142        base_path: &Path,
143        git: &G,
144    ) -> Vec<(PathBuf, String, String)> {
145        let mut repos = Vec::new();
146        let mut visited_dirs = HashSet::new();
147        let mut seen_repos = HashSet::new();
148
149        // Determine scan depth based on structure
150        // {org}/{repo} -> 2 levels
151        // {provider}/{org}/{repo} -> 3 levels
152        let depth = RepoPathTemplate::new(self.structure.clone()).scan_depth();
153        let mut ctx = ScanDirContext {
154            base_path,
155            git,
156            repos: &mut repos,
157            visited_dirs: &mut visited_dirs,
158            seen_repos: &mut seen_repos,
159            max_depth: depth,
160        };
161        self.scan_dir(base_path, 0, &mut ctx);
162
163        repos
164    }
165
166    /// Recursively scans directories for git repos.
167    fn scan_dir<G: GitOperations>(
168        &self,
169        path: &Path,
170        current_depth: usize,
171        ctx: &mut ScanDirContext<'_, G>,
172    ) {
173        if current_depth >= ctx.max_depth {
174            return;
175        }
176
177        let canonical_path = std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
178        if !ctx.visited_dirs.insert(canonical_path.clone()) {
179            return;
180        }
181
182        let entries = match std::fs::read_dir(&canonical_path) {
183            Ok(e) => e,
184            Err(_) => return,
185        };
186
187        for entry in entries.flatten() {
188            // Avoid traversing symlinks to directories.
189            let Ok(file_type) = entry.file_type() else {
190                continue;
191            };
192            if !file_type.is_dir() {
193                continue;
194            }
195
196            let entry_path = entry.path();
197
198            // Skip hidden directories
199            if entry.file_name().to_string_lossy().starts_with('.') {
200                continue;
201            }
202
203            if current_depth + 1 == ctx.max_depth && ctx.git.is_repo(&entry_path) {
204                let canonical_repo =
205                    std::fs::canonicalize(&entry_path).unwrap_or(entry_path.clone());
206                if !ctx.seen_repos.insert(canonical_repo.clone()) {
207                    continue;
208                }
209
210                // This is a repo at the expected depth
211                let rel_path = canonical_repo
212                    .strip_prefix(ctx.base_path)
213                    .unwrap_or(&canonical_repo);
214                let parts: Vec<_> = rel_path.components().collect();
215
216                if parts.len() >= 2 {
217                    let org = parts[parts.len() - 2]
218                        .as_os_str()
219                        .to_string_lossy()
220                        .to_string();
221                    let repo = parts[parts.len() - 1]
222                        .as_os_str()
223                        .to_string_lossy()
224                        .to_string();
225                    ctx.repos.push((canonical_repo, org, repo));
226                }
227            } else {
228                // Recurse into subdirectory
229                self.scan_dir(&entry_path, current_depth + 1, ctx);
230            }
231        }
232    }
233}
234
235/// Merges discovered repos from multiple providers.
236pub fn merge_repos(repos_by_provider: Vec<(String, Vec<OwnedRepo>)>) -> Vec<(String, OwnedRepo)> {
237    let mut result = Vec::new();
238
239    for (provider, repos) in repos_by_provider {
240        for repo in repos {
241            result.push((provider.clone(), repo));
242        }
243    }
244
245    result
246}
247
248/// Deduplicates repos by full name, preferring first occurrence.
249pub fn deduplicate_repos(repos: Vec<(String, OwnedRepo)>) -> Vec<(String, OwnedRepo)> {
250    let mut seen = HashSet::new();
251    let mut result = Vec::new();
252
253    for (provider, repo) in repos {
254        let key = repo.full_name().to_string();
255        if !seen.contains(&key) {
256            seen.insert(key);
257            result.push((provider, repo));
258        }
259    }
260
261    result
262}
263
264#[cfg(test)]
265#[path = "discovery_tests.rs"]
266mod tests;