1use crate::config::FilterOptions;
7use crate::domain::RepoPathTemplate;
8use crate::git::GitOperations;
9use crate::operations::sync::LocalRepo;
10use crate::provider::{DiscoveryOptions, DiscoveryProgress, Provider};
11use crate::types::{ActionPlan, OwnedRepo};
12use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14
15struct ScanDirContext<'a, G: GitOperations> {
17 base_path: &'a Path,
18 git: &'a G,
19 repos: &'a mut Vec<(PathBuf, String, String)>,
20 visited_dirs: &'a mut HashSet<PathBuf>,
21 seen_repos: &'a mut HashSet<PathBuf>,
22 max_depth: usize,
23}
24
25pub struct DiscoveryOrchestrator {
27 pub(crate) filters: FilterOptions,
29 pub(crate) structure: String,
31}
32
33impl DiscoveryOrchestrator {
34 pub fn new(filters: FilterOptions, structure: String) -> Self {
36 Self { filters, structure }
37 }
38
39 pub fn to_discovery_options(&self) -> DiscoveryOptions {
41 DiscoveryOptions::new()
42 .with_archived(self.filters.include_archived)
43 .with_forks(self.filters.include_forks)
44 .with_orgs(self.filters.orgs.clone())
45 .with_exclusions(self.filters.exclude_repos.clone())
46 }
47
48 pub async fn discover(
50 &self,
51 provider: &dyn Provider,
52 progress: &dyn DiscoveryProgress,
53 ) -> Result<Vec<OwnedRepo>, crate::errors::ProviderError> {
54 let options = self.to_discovery_options();
55 provider.discover_repos(&options, progress).await
56 }
57
58 pub fn compute_path(&self, base_path: &Path, repo: &OwnedRepo, provider: &str) -> PathBuf {
60 RepoPathTemplate::new(self.structure.clone()).render_owned_repo(base_path, repo, provider)
61 }
62
63 pub fn plan_clone<G: GitOperations>(
65 &self,
66 base_path: &Path,
67 repos: Vec<OwnedRepo>,
68 provider: &str,
69 git: &G,
70 ) -> ActionPlan {
71 let mut plan = ActionPlan::new();
72
73 for repo in repos {
74 let local_path = self.compute_path(base_path, &repo, provider);
75
76 if local_path.exists() {
77 if git.is_repo(&local_path) {
78 plan.add_sync(repo);
80 } else {
81 plan.add_skipped(repo, "directory exists but is not a git repository");
83 }
84 } else {
85 plan.add_clone(repo);
87 }
88 }
89
90 plan
91 }
92
93 pub fn plan_sync<G: GitOperations>(
95 &self,
96 base_path: &Path,
97 repos: Vec<OwnedRepo>,
98 provider: &str,
99 git: &G,
100 skip_uncommitted: bool,
101 ) -> (Vec<LocalRepo>, Vec<(OwnedRepo, String)>) {
102 let mut to_sync = Vec::new();
103 let mut skipped = Vec::new();
104
105 for repo in repos {
106 let local_path = self.compute_path(base_path, &repo, provider);
107
108 if !local_path.exists() {
109 skipped.push((repo, "not cloned locally".to_string()));
110 continue;
111 }
112
113 if !git.is_repo(&local_path) {
114 skipped.push((repo, "not a git repository".to_string()));
115 continue;
116 }
117
118 if skip_uncommitted {
119 match git.status(&local_path) {
120 Ok(status) => {
121 if status.is_uncommitted || status.has_untracked {
122 skipped.push((repo, "uncommitted changes".to_string()));
123 continue;
124 }
125 }
126 Err(err) => {
127 skipped.push((repo, format!("failed to get status: {}", err)));
128 continue;
129 }
130 }
131 }
132
133 to_sync.push(LocalRepo::new(repo, local_path));
134 }
135
136 (to_sync, skipped)
137 }
138
139 pub fn scan_local<G: GitOperations>(
141 &self,
142 base_path: &Path,
143 git: &G,
144 ) -> Vec<(PathBuf, String, String)> {
145 let mut repos = Vec::new();
146 let mut visited_dirs = HashSet::new();
147 let mut seen_repos = HashSet::new();
148
149 let depth = RepoPathTemplate::new(self.structure.clone()).scan_depth();
153 let mut ctx = ScanDirContext {
154 base_path,
155 git,
156 repos: &mut repos,
157 visited_dirs: &mut visited_dirs,
158 seen_repos: &mut seen_repos,
159 max_depth: depth,
160 };
161 self.scan_dir(base_path, 0, &mut ctx);
162
163 repos
164 }
165
166 fn scan_dir<G: GitOperations>(
168 &self,
169 path: &Path,
170 current_depth: usize,
171 ctx: &mut ScanDirContext<'_, G>,
172 ) {
173 if current_depth >= ctx.max_depth {
174 return;
175 }
176
177 let canonical_path = std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
178 if !ctx.visited_dirs.insert(canonical_path.clone()) {
179 return;
180 }
181
182 let entries = match std::fs::read_dir(&canonical_path) {
183 Ok(e) => e,
184 Err(_) => return,
185 };
186
187 for entry in entries.flatten() {
188 let Ok(file_type) = entry.file_type() else {
190 continue;
191 };
192 if !file_type.is_dir() {
193 continue;
194 }
195
196 let entry_path = entry.path();
197
198 if entry.file_name().to_string_lossy().starts_with('.') {
200 continue;
201 }
202
203 if current_depth + 1 == ctx.max_depth && ctx.git.is_repo(&entry_path) {
204 let canonical_repo =
205 std::fs::canonicalize(&entry_path).unwrap_or(entry_path.clone());
206 if !ctx.seen_repos.insert(canonical_repo.clone()) {
207 continue;
208 }
209
210 let rel_path = canonical_repo
212 .strip_prefix(ctx.base_path)
213 .unwrap_or(&canonical_repo);
214 let parts: Vec<_> = rel_path.components().collect();
215
216 if parts.len() >= 2 {
217 let org = parts[parts.len() - 2]
218 .as_os_str()
219 .to_string_lossy()
220 .to_string();
221 let repo = parts[parts.len() - 1]
222 .as_os_str()
223 .to_string_lossy()
224 .to_string();
225 ctx.repos.push((canonical_repo, org, repo));
226 }
227 } else {
228 self.scan_dir(&entry_path, current_depth + 1, ctx);
230 }
231 }
232 }
233}
234
235pub fn merge_repos(repos_by_provider: Vec<(String, Vec<OwnedRepo>)>) -> Vec<(String, OwnedRepo)> {
237 let mut result = Vec::new();
238
239 for (provider, repos) in repos_by_provider {
240 for repo in repos {
241 result.push((provider.clone(), repo));
242 }
243 }
244
245 result
246}
247
248pub fn deduplicate_repos(repos: Vec<(String, OwnedRepo)>) -> Vec<(String, OwnedRepo)> {
250 let mut seen = HashSet::new();
251 let mut result = Vec::new();
252
253 for (provider, repo) in repos {
254 let key = repo.full_name().to_string();
255 if !seen.contains(&key) {
256 seen.insert(key);
257 result.push((provider, repo));
258 }
259 }
260
261 result
262}
263
264#[cfg(test)]
265#[path = "discovery_tests.rs"]
266mod tests;