atomcode_core/setup/
scan.rs1use crate::setup::state::compute_signals_hash;
5use crate::setup::types::*;
6use std::path::{Path, PathBuf};
7
8const README_HEAD_BYTES: usize = 2048;
9const ROOT_TREE_MAX_ENTRIES: usize = 50;
10
11pub fn scan(project_root: &Path) -> ProjectSignals {
12 let mut s = ProjectSignals::empty(project_root.to_path_buf());
13 s.markers = collect_markers(project_root);
14 s.stacks = derive_stacks(&s.markers);
15 s.frameworks = derive_frameworks(project_root, &s.markers);
16 s.package_mgrs = derive_pkg_mgrs(project_root, &s.markers);
17 s.vcs = derive_vcs(project_root);
18 s.ci = derive_ci(project_root);
19 s.containerized = s
20 .markers
21 .iter()
22 .any(|m| m.kind == MarkerKind::Dockerfile || m.kind == MarkerKind::K8sManifest);
23 s.test_frameworks = derive_test_frameworks(project_root, &s.markers);
24 s.root_tree = collect_root_tree(project_root);
25 s.readme_head = read_readme_head(project_root);
26 s.signals_hash = compute_signals_hash(
27 &s.markers.iter().map(|m| m.path.clone()).collect::<Vec<_>>(),
28 );
29 s
30}
31
32fn collect_markers(root: &Path) -> Vec<Marker> {
33 let probes: &[(&str, MarkerKind)] = &[
34 ("Cargo.toml", MarkerKind::CargoToml),
35 ("package.json", MarkerKind::PackageJson),
36 ("pom.xml", MarkerKind::PomXml),
37 ("build.gradle", MarkerKind::BuildGradle),
38 ("build.gradle.kts", MarkerKind::BuildGradle),
39 ("pyproject.toml", MarkerKind::PyprojectToml),
40 ("requirements.txt", MarkerKind::RequirementsTxt),
41 ("go.mod", MarkerKind::GoMod),
42 ("Dockerfile", MarkerKind::Dockerfile),
43 (".eslintrc.js", MarkerKind::EslintConfig),
44 (".eslintrc.json", MarkerKind::EslintConfig),
45 (".eslintrc.yml", MarkerKind::EslintConfig),
46 ("rustfmt.toml", MarkerKind::RustfmtToml),
47 ("clippy.toml", MarkerKind::ClippyToml),
48 ("tsconfig.json", MarkerKind::TsConfig),
49 ];
50 let mut found = vec![];
51 for (name, kind) in probes {
52 let p = root.join(name);
53 if p.exists() {
54 found.push(Marker { path: p, kind: *kind });
55 }
56 }
57 if root.join(".git").is_dir() {
58 found.push(Marker { path: root.join(".git"), kind: MarkerKind::GitDir });
59 }
60 if root.join(".github/workflows").is_dir() {
61 found.push(Marker {
62 path: root.join(".github/workflows"),
63 kind: MarkerKind::GhActionsDir,
64 });
65 }
66 if root.join("prisma").is_dir() {
67 found.push(Marker { path: root.join("prisma"), kind: MarkerKind::PrismaDir });
68 }
69 if root.join("k8s").is_dir() || root.join("helm").is_dir() {
71 let path = if root.join("k8s").is_dir() {
72 root.join("k8s")
73 } else {
74 root.join("helm")
75 };
76 found.push(Marker { path, kind: MarkerKind::K8sManifest });
77 }
78 found
79}
80
81fn derive_stacks(markers: &[Marker]) -> Vec<Stack> {
82 let mut s = vec![];
83 let has = |k: MarkerKind| markers.iter().any(|m| m.kind == k);
84 if has(MarkerKind::CargoToml) {
85 s.push(Stack::Rust);
86 }
87 if has(MarkerKind::PackageJson) {
88 s.push(Stack::Node);
89 }
90 if has(MarkerKind::PomXml) || has(MarkerKind::BuildGradle) {
91 s.push(Stack::Java);
92 }
93 if has(MarkerKind::PyprojectToml) || has(MarkerKind::RequirementsTxt) {
94 s.push(Stack::Python);
95 }
96 if has(MarkerKind::GoMod) {
97 s.push(Stack::Go);
98 }
99 s
100}
101
102fn derive_frameworks(root: &Path, markers: &[Marker]) -> Vec<Framework> {
103 let mut f = vec![];
104 if markers.iter().any(|m| m.kind == MarkerKind::PackageJson) {
105 if let Ok(raw) = std::fs::read_to_string(root.join("package.json")) {
106 if raw.contains("\"react\"") {
107 f.push(Framework::React);
108 }
109 if raw.contains("\"vue\"") {
110 f.push(Framework::Vue);
111 }
112 if raw.contains("\"next\"") {
113 f.push(Framework::Next);
114 }
115 }
116 }
117 if markers.iter().any(|m| m.kind == MarkerKind::CargoToml) {
118 if let Ok(raw) = std::fs::read_to_string(root.join("Cargo.toml")) {
119 if raw.contains("tokio") {
120 f.push(Framework::Tokio);
121 }
122 }
123 }
124 if markers.iter().any(|m| m.kind == MarkerKind::PomXml) {
125 if let Ok(raw) = std::fs::read_to_string(root.join("pom.xml")) {
126 if raw.contains("spring-boot-starter") {
127 f.push(Framework::Spring);
128 }
129 }
130 }
131 for fname in &["pyproject.toml", "requirements.txt"] {
132 if let Ok(raw) = std::fs::read_to_string(root.join(fname)) {
133 let lower = raw.to_lowercase();
134 if lower.contains("django") && !f.contains(&Framework::Django) {
135 f.push(Framework::Django);
136 }
137 if lower.contains("flask") && !f.contains(&Framework::Flask) {
138 f.push(Framework::Flask);
139 }
140 }
141 }
142 f
143}
144
145fn derive_pkg_mgrs(root: &Path, markers: &[Marker]) -> Vec<PkgMgr> {
146 let mut p = vec![];
147 let has = |k: MarkerKind| markers.iter().any(|m| m.kind == k);
148 if has(MarkerKind::CargoToml) {
149 p.push(PkgMgr::Cargo);
150 }
151 if has(MarkerKind::GoMod) {
152 p.push(PkgMgr::GoMod);
153 }
154 if has(MarkerKind::PomXml) {
155 p.push(PkgMgr::Maven);
156 }
157 if has(MarkerKind::BuildGradle) {
158 p.push(PkgMgr::Gradle);
159 }
160 if has(MarkerKind::PyprojectToml) {
161 if root.join("poetry.lock").exists() {
162 p.push(PkgMgr::Poetry);
163 } else {
164 p.push(PkgMgr::Pip);
165 }
166 } else if has(MarkerKind::RequirementsTxt) {
167 p.push(PkgMgr::Pip);
168 }
169 if has(MarkerKind::PackageJson) {
170 if root.join("pnpm-lock.yaml").exists() {
171 p.push(PkgMgr::Pnpm);
172 } else if root.join("yarn.lock").exists() {
173 p.push(PkgMgr::Yarn);
174 } else {
175 p.push(PkgMgr::Npm);
176 }
177 }
178 p
179}
180
181fn derive_vcs(root: &Path) -> VcsInfo {
182 if !root.join(".git").exists() {
183 return VcsInfo::None;
184 }
185 let remote = std::fs::read_to_string(root.join(".git/config")).ok().and_then(|cfg| {
186 cfg.lines()
187 .find(|l| l.trim().starts_with("url"))
188 .and_then(|l| l.split('=').nth(1))
189 .map(|s| s.trim().to_string())
190 });
191 VcsInfo::Git { remote }
192}
193
194fn derive_ci(root: &Path) -> CiInfo {
195 let workflows = root.join(".github/workflows");
196 if workflows.is_dir() {
197 let count = std::fs::read_dir(&workflows)
198 .map(|it| it.filter_map(|e| e.ok()).count())
199 .unwrap_or(0);
200 return CiInfo::GhActions { workflow_count: count };
201 }
202 if root.join(".gitlab-ci.yml").exists() {
203 return CiInfo::GitLab;
204 }
205 if root.join(".circleci").is_dir() || root.join("Jenkinsfile").exists() {
206 return CiInfo::Other;
207 }
208 CiInfo::None
209}
210
211fn derive_test_frameworks(root: &Path, markers: &[Marker]) -> Vec<TestFw> {
212 let mut tfs = vec![];
213 if markers.iter().any(|m| m.kind == MarkerKind::CargoToml) {
214 tfs.push(TestFw::CargoTest);
215 }
216 if let Ok(raw) = std::fs::read_to_string(root.join("package.json")) {
217 if raw.contains("\"jest\"") {
218 tfs.push(TestFw::Jest);
219 }
220 if raw.contains("\"vitest\"") {
221 tfs.push(TestFw::Vitest);
222 }
223 }
224 if root.join("pytest.ini").exists()
225 || std::fs::read_to_string(root.join("pyproject.toml"))
226 .ok()
227 .map_or(false, |s| s.contains("[tool.pytest"))
228 {
229 tfs.push(TestFw::Pytest);
230 }
231 if root.join("pom.xml").exists()
232 && std::fs::read_to_string(root.join("pom.xml"))
233 .ok()
234 .map_or(false, |s| s.contains("junit"))
235 {
236 tfs.push(TestFw::JUnit);
237 }
238 tfs
239}
240
241fn collect_root_tree(root: &Path) -> Vec<PathBuf> {
242 let mut entries: Vec<PathBuf> = std::fs::read_dir(root)
243 .ok()
244 .into_iter()
245 .flatten()
246 .filter_map(|e| e.ok())
247 .map(|e| e.path())
248 .filter(|p| {
249 let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
250 !matches!(
251 name,
252 "node_modules" | "target" | ".git" | "dist" | "build" | ".next"
253 )
254 })
255 .collect();
256 entries.sort();
257 entries.truncate(ROOT_TREE_MAX_ENTRIES);
258 entries
259}
260
261fn read_readme_head(root: &Path) -> Option<String> {
262 for name in ["README.md", "README.rst", "README.txt", "README"] {
263 if let Ok(bytes) = std::fs::read(root.join(name)) {
264 let head = if bytes.len() > README_HEAD_BYTES {
265 &bytes[..README_HEAD_BYTES]
266 } else {
267 &bytes
268 };
269 return Some(String::from_utf8_lossy(head).to_string());
270 }
271 }
272 None
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 fn setup_dir(files: &[(&str, &str)]) -> tempfile::TempDir {
280 let dir = tempfile::tempdir().unwrap();
281 for (name, content) in files {
282 let p = dir.path().join(name);
283 if let Some(parent) = p.parent() {
284 std::fs::create_dir_all(parent).unwrap();
285 }
286 std::fs::write(&p, content).unwrap();
287 }
288 dir
289 }
290
291 #[test]
292 fn scan_empty_dir_returns_empty_signals() {
293 let dir = tempfile::tempdir().unwrap();
294 let s = scan(dir.path());
295 assert!(s.markers.is_empty());
296 assert!(s.stacks.is_empty());
297 assert!(matches!(s.vcs, VcsInfo::None));
298 }
299
300 #[test]
301 fn scan_rust_project_detects_cargo_and_stack() {
302 let dir = setup_dir(&[("Cargo.toml", "[package]\nname = \"x\"")]);
303 let s = scan(dir.path());
304 assert!(s.markers.iter().any(|m| m.kind == MarkerKind::CargoToml));
305 assert_eq!(s.stacks, vec![Stack::Rust]);
306 assert!(s.package_mgrs.contains(&PkgMgr::Cargo));
307 }
308
309 #[test]
310 fn scan_react_project_detects_framework() {
311 let dir = setup_dir(&[("package.json", r#"{"dependencies":{"react":"^18"}}"#)]);
312 let s = scan(dir.path());
313 assert!(s.frameworks.contains(&Framework::React));
314 assert!(s.package_mgrs.contains(&PkgMgr::Npm));
315 }
316
317 #[test]
318 fn scan_with_git_dir_marks_vcs() {
319 let dir = tempfile::tempdir().unwrap();
320 std::fs::create_dir_all(dir.path().join(".git")).unwrap();
321 std::fs::write(
322 dir.path().join(".git/config"),
323 "[remote \"origin\"]\n\turl = git@x.com:a/b\n",
324 )
325 .unwrap();
326 let s = scan(dir.path());
327 match s.vcs {
328 VcsInfo::Git { remote } => assert!(remote.unwrap().contains("a/b")),
329 _ => panic!("expected Git"),
330 }
331 }
332
333 #[test]
334 fn scan_docker_marks_containerized() {
335 let dir = setup_dir(&[("Dockerfile", "FROM rust:1.80")]);
336 let s = scan(dir.path());
337 assert!(s.containerized);
338 }
339
340 #[test]
341 fn scan_truncates_root_tree_to_50() {
342 let dir = tempfile::tempdir().unwrap();
343 for i in 0..100 {
344 std::fs::write(dir.path().join(format!("file{i}.txt")), "x").unwrap();
345 }
346 let s = scan(dir.path());
347 assert!(s.root_tree.len() <= 50);
348 }
349
350 #[test]
351 fn scan_reads_readme_head_2kb_max() {
352 let big = "x".repeat(5000);
353 let dir = setup_dir(&[("README.md", &big)]);
354 let s = scan(dir.path());
355 let head = s.readme_head.unwrap();
356 assert!(head.len() <= 2048);
357 }
358
359 #[test]
360 fn scan_signals_hash_changes_when_marker_content_changes() {
361 let dir = setup_dir(&[("Cargo.toml", "v1")]);
362 let h1 = scan(dir.path()).signals_hash;
363 std::fs::write(dir.path().join("Cargo.toml"), "v2").unwrap();
364 let h2 = scan(dir.path()).signals_hash;
365 assert_ne!(h1, h2);
366 }
367}