Skip to main content

fallow_core/discover/
infrastructure.rs

1use std::path::{Path, PathBuf};
2
3use fallow_types::discover::{EntryPoint, EntryPointSource};
4
5use super::entry_points::resolve_entry_path;
6use super::parse_scripts::{extract_script_file_refs, looks_like_script_file};
7
8/// Discover entry points from infrastructure config files (Dockerfile, Procfile, fly.toml).
9///
10/// These files reference source files as entry points for processes that run outside
11/// the main JS/TS build pipeline (workers, migrations, cron jobs, etc.).
12pub fn discover_infrastructure_entry_points(root: &Path) -> Vec<EntryPoint> {
13    let _span = tracing::info_span!("discover_infrastructure_entry_points").entered();
14    let file_refs = collect_infrastructure_file_refs(root);
15
16    if file_refs.is_empty() {
17        return Vec::new();
18    }
19
20    let entries = resolve_infrastructure_file_refs(root, &file_refs);
21    log_infrastructure_entries(&entries);
22    entries
23}
24
25fn collect_infrastructure_file_refs(root: &Path) -> Vec<String> {
26    let mut file_refs = Vec::new();
27    collect_dockerfile_refs(root, &mut file_refs);
28    collect_procfile_refs(root, &mut file_refs);
29    collect_fly_toml_refs(root, &mut file_refs);
30    file_refs
31}
32
33fn collect_dockerfile_refs(root: &Path, file_refs: &mut Vec<String>) {
34    for dir in infrastructure_search_dirs(root) {
35        for entry in std::fs::read_dir(dir).into_iter().flatten().flatten() {
36            let name = entry.file_name();
37            let name_str = name.to_string_lossy();
38            if is_dockerfile(&name_str)
39                && let Ok(content) = std::fs::read_to_string(entry.path())
40            {
41                file_refs.extend(extract_dockerfile_file_refs(&content));
42            }
43        }
44    }
45}
46
47fn infrastructure_search_dirs(root: &Path) -> Vec<PathBuf> {
48    std::iter::once(root.to_path_buf())
49        .chain(
50            ["config", "docker", "deploy", ".docker"]
51                .iter()
52                .map(|d| root.join(d)),
53        )
54        .filter(|d| d.is_dir())
55        .collect()
56}
57
58fn collect_procfile_refs(root: &Path, file_refs: &mut Vec<String>) {
59    if let Ok(content) = std::fs::read_to_string(root.join("Procfile")) {
60        file_refs.extend(extract_procfile_file_refs(&content));
61    }
62}
63
64fn collect_fly_toml_refs(root: &Path, file_refs: &mut Vec<String>) {
65    for entry in std::fs::read_dir(root).into_iter().flatten().flatten() {
66        let name = entry.file_name();
67        let name_str = name.to_string_lossy();
68        if (name_str == "fly.toml" || (name_str.starts_with("fly.") && name_str.ends_with(".toml")))
69            && let Ok(content) = std::fs::read_to_string(entry.path())
70        {
71            file_refs.extend(extract_fly_toml_file_refs(&content));
72        }
73    }
74}
75
76fn resolve_infrastructure_file_refs(root: &Path, file_refs: &[String]) -> Vec<EntryPoint> {
77    let canonical_root = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
78    let mut entries: Vec<EntryPoint> = file_refs
79        .iter()
80        .filter_map(|file_ref| {
81            resolve_entry_path(
82                root,
83                file_ref,
84                &canonical_root,
85                EntryPointSource::InfrastructureConfig,
86            )
87        })
88        .collect();
89
90    entries.sort_by(|a, b| a.path.cmp(&b.path));
91    entries.dedup_by(|a, b| a.path == b.path);
92    entries
93}
94
95fn log_infrastructure_entries(entries: &[EntryPoint]) {
96    if !entries.is_empty() {
97        tracing::info!(
98            count = entries.len(),
99            "infrastructure entry points discovered"
100        );
101    }
102}
103
104/// Check if a filename is a Dockerfile.
105fn is_dockerfile(name: &str) -> bool {
106    name == "Dockerfile"
107        || (name.starts_with("Dockerfile.") && !name.ends_with(".dockerignore"))
108        || name.ends_with(".Dockerfile")
109}
110
111/// Extract file path references from Dockerfile RUN/CMD/ENTRYPOINT instructions.
112///
113/// Handles both shell form (`CMD node file.js`) and exec form (`CMD ["node", "file.js"]`).
114/// Multi-line commands with `\` continuation are joined.
115fn extract_dockerfile_file_refs(content: &str) -> Vec<String> {
116    let mut refs = Vec::new();
117    let lines: Vec<&str> = content.lines().collect();
118    let mut i = 0;
119
120    while i < lines.len() {
121        let line = lines[i].trim();
122
123        if line.is_empty() || line.starts_with('#') {
124            i += 1;
125            continue;
126        }
127
128        let Some(instruction_end) = strip_dockerfile_instruction(line) else {
129            i += 1;
130            continue;
131        };
132
133        let mut full_cmd = instruction_end.to_string();
134        while full_cmd.ends_with('\\') {
135            full_cmd.truncate(full_cmd.len() - 1);
136            i += 1;
137            if i >= lines.len() {
138                break;
139            }
140            full_cmd.push(' ');
141            full_cmd.push_str(lines[i].trim());
142        }
143
144        let cmd_str = full_cmd.trim();
145        let command = if cmd_str.starts_with('[') {
146            parse_exec_form(cmd_str)
147        } else {
148            cmd_str.to_string()
149        };
150
151        refs.extend(extract_script_file_refs(&command));
152        refs.extend(extract_flag_value_file_refs(&command));
153        i += 1;
154    }
155
156    refs
157}
158
159/// Extract file path references from flag values like `--alias:name=./path.ts`.
160///
161/// Build tools (esbuild, webpack, etc.) use flag values that reference source files.
162/// This extracts paths from `--key=value` patterns where the value looks like a source file.
163fn extract_flag_value_file_refs(command: &str) -> Vec<String> {
164    let mut refs = Vec::new();
165    for token in command.split_whitespace() {
166        if !token.starts_with('-') {
167            continue;
168        }
169        if let Some((_key, value)) = token.split_once('=')
170            && looks_like_script_file(value)
171        {
172            refs.push(value.to_string());
173        }
174    }
175    refs
176}
177
178/// Strip a Dockerfile instruction keyword (RUN, CMD, ENTRYPOINT) and return the rest.
179fn strip_dockerfile_instruction(line: &str) -> Option<&str> {
180    for keyword in &["RUN ", "CMD ", "ENTRYPOINT "] {
181        if line.len() >= keyword.len() && line[..keyword.len()].eq_ignore_ascii_case(keyword) {
182            return Some(&line[keyword.len()..]);
183        }
184    }
185    None
186}
187
188/// Parse Docker/TOML exec form `["cmd", "arg1", "arg2"]` into a shell-like command string.
189///
190/// Handles commas inside quoted strings correctly.
191fn parse_exec_form(s: &str) -> String {
192    let inner = s.trim().trim_start_matches('[').trim_end_matches(']');
193    let mut parts = Vec::new();
194    let mut in_quotes = false;
195    let mut current = String::new();
196    for ch in inner.chars() {
197        match ch {
198            '"' | '\'' => in_quotes = !in_quotes,
199            ',' if !in_quotes => {
200                let t = current.trim().to_string();
201                if !t.is_empty() {
202                    parts.push(t);
203                }
204                current.clear();
205            }
206            _ => current.push(ch),
207        }
208    }
209    let t = current.trim().to_string();
210    if !t.is_empty() {
211        parts.push(t);
212    }
213    parts.join(" ")
214}
215
216/// Extract file path references from a Procfile.
217///
218/// Format: `process_type: command`
219fn extract_procfile_file_refs(content: &str) -> Vec<String> {
220    let mut refs = Vec::new();
221    for line in content.lines() {
222        let line = line.trim();
223        if line.is_empty() || line.starts_with('#') {
224            continue;
225        }
226        if let Some((_process_type, command)) = line.split_once(':') {
227            refs.extend(extract_script_file_refs(command.trim()));
228        }
229    }
230    refs
231}
232
233/// Extract file path references from fly.toml.
234///
235/// Parses `release_command`, `cmd` at any level, and all keys under `[processes]`.
236fn extract_fly_toml_file_refs(content: &str) -> Vec<String> {
237    let mut refs = Vec::new();
238    let mut in_processes_section = false;
239
240    for line in content.lines() {
241        let line = line.trim();
242        if line.is_empty() || line.starts_with('#') {
243            continue;
244        }
245
246        if line.starts_with('[') {
247            in_processes_section =
248                line.trim_start_matches('[').trim_end_matches(']').trim() == "processes";
249            continue;
250        }
251
252        if let Some((key, value)) = line.split_once('=') {
253            let key = key.trim();
254            let value = value.trim().trim_matches('"').trim_matches('\'');
255
256            if matches!(key, "release_command" | "cmd") || in_processes_section {
257                let command = if value.starts_with('[') {
258                    parse_exec_form(value)
259                } else {
260                    value.to_string()
261                };
262                refs.extend(extract_script_file_refs(&command));
263            }
264        }
265    }
266
267    refs
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn dockerfile_detection() {
276        assert!(is_dockerfile("Dockerfile"));
277        assert!(is_dockerfile("Dockerfile.worker"));
278        assert!(is_dockerfile("Dockerfile.dev"));
279        assert!(is_dockerfile("app.Dockerfile"));
280        assert!(!is_dockerfile("Dockerfile.dockerignore"));
281        assert!(!is_dockerfile("README.md"));
282        assert!(!is_dockerfile("docker-compose.yml"));
283    }
284
285    #[test]
286    fn dockerfile_run_node() {
287        let refs = extract_dockerfile_file_refs("RUN node scripts/db-migrate.mjs");
288        assert_eq!(refs, vec!["scripts/db-migrate.mjs"]);
289    }
290
291    #[test]
292    fn dockerfile_cmd_shell_form() {
293        let refs = extract_dockerfile_file_refs("CMD node dist/server.js");
294        assert_eq!(refs, vec!["dist/server.js"]);
295    }
296
297    #[test]
298    fn dockerfile_cmd_exec_form() {
299        let refs = extract_dockerfile_file_refs(r#"CMD ["node", "scripts/server.js"]"#);
300        assert_eq!(refs, vec!["scripts/server.js"]);
301    }
302
303    #[test]
304    fn dockerfile_entrypoint_exec_form() {
305        let refs = extract_dockerfile_file_refs(r#"ENTRYPOINT ["node", "src/index.ts"]"#);
306        assert_eq!(refs, vec!["src/index.ts"]);
307    }
308
309    #[test]
310    fn dockerfile_run_esbuild() {
311        let refs = extract_dockerfile_file_refs(
312            "RUN npx esbuild src/server/jobs/worker.ts --outfile=dist-worker/worker.mjs --bundle",
313        );
314        assert_eq!(
315            refs,
316            vec!["src/server/jobs/worker.ts", "dist-worker/worker.mjs"]
317        );
318    }
319
320    #[test]
321    fn dockerfile_multiline_run() {
322        let refs =
323            extract_dockerfile_file_refs("RUN node \\\n  scripts/db-migrate.mjs \\\n  --verbose");
324        assert_eq!(refs, vec!["scripts/db-migrate.mjs"]);
325    }
326
327    #[test]
328    fn dockerfile_skips_comments_and_other_instructions() {
329        let content =
330            "FROM node:20\n# This is a comment\nCOPY . .\nRUN node scripts/seed.ts\nEXPOSE 3000";
331        let refs = extract_dockerfile_file_refs(content);
332        assert_eq!(refs, vec!["scripts/seed.ts"]);
333    }
334
335    #[test]
336    fn dockerfile_case_insensitive() {
337        let refs = extract_dockerfile_file_refs("run node scripts/migrate.ts");
338        assert_eq!(refs, vec!["scripts/migrate.ts"]);
339    }
340
341    #[test]
342    fn dockerfile_run_tsx_runner() {
343        let refs = extract_dockerfile_file_refs("RUN tsx src/worker.ts");
344        assert_eq!(refs, vec!["src/worker.ts"]);
345    }
346
347    #[test]
348    fn dockerfile_no_file_refs() {
349        let content = "FROM node:20\nRUN npm install\nRUN npm run build\nCMD [\"npm\", \"start\"]";
350        let refs = extract_dockerfile_file_refs(content);
351        assert!(refs.is_empty());
352    }
353
354    #[test]
355    fn procfile_basic() {
356        let refs = extract_procfile_file_refs("web: node server.js\nworker: node worker.js");
357        assert_eq!(refs, vec!["server.js", "worker.js"]);
358    }
359
360    #[test]
361    fn procfile_with_comments() {
362        let refs = extract_procfile_file_refs("# comment\nweb: node src/index.ts");
363        assert_eq!(refs, vec!["src/index.ts"]);
364    }
365
366    #[test]
367    fn procfile_empty() {
368        let refs = extract_procfile_file_refs("");
369        assert!(refs.is_empty());
370    }
371
372    #[test]
373    fn fly_toml_release_command() {
374        let refs = extract_fly_toml_file_refs(r#"release_command = "node scripts/db-migrate.mjs""#);
375        assert_eq!(refs, vec!["scripts/db-migrate.mjs"]);
376    }
377
378    #[test]
379    fn fly_toml_process_commands() {
380        let content = "[processes]\nweb = \"node dist/server.js\"\nworker = \"node src/worker.ts\"";
381        let refs = extract_fly_toml_file_refs(content);
382        assert_eq!(refs, vec!["dist/server.js", "src/worker.ts"]);
383    }
384
385    #[test]
386    fn fly_toml_cmd() {
387        let refs = extract_fly_toml_file_refs(r#"cmd = "node src/index.js""#);
388        assert_eq!(refs, vec!["src/index.js"]);
389    }
390
391    #[test]
392    fn fly_toml_ignores_non_process_keys() {
393        let refs = extract_fly_toml_file_refs(r#"app = "my-app""#);
394        assert!(refs.is_empty());
395    }
396
397    #[test]
398    fn fly_toml_comments_and_sections() {
399        let content = "# deploy config\n[deploy]\nrelease_command = \"node scripts/migrate.mjs\"";
400        let refs = extract_fly_toml_file_refs(content);
401        assert_eq!(refs, vec!["scripts/migrate.mjs"]);
402    }
403
404    #[test]
405    fn exec_form_basic() {
406        assert_eq!(
407            parse_exec_form(r#"["node", "server.js"]"#),
408            "node server.js"
409        );
410    }
411
412    #[test]
413    fn exec_form_with_flags() {
414        assert_eq!(
415            parse_exec_form(r#"["node", "--max-old-space-size=4096", "server.js"]"#),
416            "node --max-old-space-size=4096 server.js"
417        );
418    }
419
420    #[test]
421    fn exec_form_with_commas_in_args() {
422        assert_eq!(
423            parse_exec_form(r#"["node", "--require=a,b", "server.js"]"#),
424            "node --require=a,b server.js"
425        );
426    }
427
428    #[test]
429    fn fly_toml_arbitrary_process_name() {
430        let content = "[processes]\nmigrations = \"node scripts/migrate.mjs\"";
431        let refs = extract_fly_toml_file_refs(content);
432        assert_eq!(refs, vec!["scripts/migrate.mjs"]);
433    }
434
435    #[test]
436    fn fly_toml_exec_form_array() {
437        let content = r#"cmd = ["node", "src/index.js"]"#;
438        let refs = extract_fly_toml_file_refs(content);
439        assert_eq!(refs, vec!["src/index.js"]);
440    }
441
442    #[test]
443    fn fly_toml_section_switching() {
444        let content =
445            "[processes]\nworker = \"node src/worker.ts\"\n[env]\nNODE_ENV = \"production\"";
446        let refs = extract_fly_toml_file_refs(content);
447        assert_eq!(refs, vec!["src/worker.ts"]);
448    }
449
450    #[test]
451    fn strip_instruction_run() {
452        assert_eq!(
453            strip_dockerfile_instruction("RUN node server.js"),
454            Some("node server.js")
455        );
456    }
457
458    #[test]
459    fn strip_instruction_cmd() {
460        assert_eq!(
461            strip_dockerfile_instruction("CMD node server.js"),
462            Some("node server.js")
463        );
464    }
465
466    #[test]
467    fn strip_instruction_entrypoint() {
468        assert_eq!(
469            strip_dockerfile_instruction("ENTRYPOINT node server.js"),
470            Some("node server.js")
471        );
472    }
473
474    #[test]
475    fn strip_instruction_case_insensitive() {
476        assert_eq!(
477            strip_dockerfile_instruction("run node server.js"),
478            Some("node server.js")
479        );
480        assert_eq!(
481            strip_dockerfile_instruction("cmd node server.js"),
482            Some("node server.js")
483        );
484    }
485
486    #[test]
487    fn strip_instruction_non_matching() {
488        assert_eq!(strip_dockerfile_instruction("FROM node:20"), None);
489        assert_eq!(strip_dockerfile_instruction("COPY . ."), None);
490        assert_eq!(strip_dockerfile_instruction("EXPOSE 3000"), None);
491        assert_eq!(strip_dockerfile_instruction("ENV FOO=bar"), None);
492    }
493
494    #[test]
495    fn flag_value_file_refs_esbuild_outfile() {
496        let refs = extract_flag_value_file_refs("npx esbuild src/entry.ts --outfile=dist/out.js");
497        assert_eq!(refs, vec!["dist/out.js"]);
498    }
499
500    #[test]
501    fn flag_value_file_refs_alias() {
502        let refs = extract_flag_value_file_refs("node --alias:helper=./src/helper.ts app.js");
503        assert_eq!(refs, vec!["./src/helper.ts"]);
504    }
505
506    #[test]
507    fn flag_value_file_refs_no_flags() {
508        let refs = extract_flag_value_file_refs("node src/server.js");
509        assert!(refs.is_empty(), "non-flag tokens should not match");
510    }
511
512    #[test]
513    fn flag_value_file_refs_flag_without_file() {
514        let refs = extract_flag_value_file_refs("node --max-old-space-size=4096 server.js");
515        assert!(
516            refs.is_empty(),
517            "flag values that are not file paths should not match"
518        );
519    }
520
521    #[test]
522    fn exec_form_single_element() {
523        assert_eq!(parse_exec_form(r#"["node"]"#), "node");
524    }
525
526    #[test]
527    fn exec_form_empty() {
528        assert_eq!(parse_exec_form("[]"), "");
529    }
530
531    #[test]
532    fn exec_form_single_quotes() {
533        assert_eq!(parse_exec_form("['node', 'server.js']"), "node server.js");
534    }
535
536    mod integration {
537        use super::*;
538
539        #[test]
540        fn discovers_dockerfile_cmd_entry_point() {
541            let dir = tempfile::tempdir().expect("create temp dir");
542            let src = dir.path().join("src");
543            std::fs::create_dir_all(&src).unwrap();
544            std::fs::write(src.join("server.ts"), "export const s = 1;").unwrap();
545
546            let dockerfile = "FROM node:20\nCOPY . .\nCMD node src/server.ts";
547            std::fs::write(dir.path().join("Dockerfile"), dockerfile).unwrap();
548
549            let entries = discover_infrastructure_entry_points(dir.path());
550            assert_eq!(entries.len(), 1);
551            assert!(entries[0].path.ends_with("src/server.ts"));
552            assert!(matches!(
553                entries[0].source,
554                EntryPointSource::InfrastructureConfig
555            ));
556        }
557
558        #[test]
559        fn discovers_procfile_entry_points() {
560            let dir = tempfile::tempdir().expect("create temp dir");
561            std::fs::write(dir.path().join("server.js"), "// server").unwrap();
562            std::fs::write(dir.path().join("worker.js"), "// worker").unwrap();
563
564            let procfile = "web: node server.js\nworker: node worker.js";
565            std::fs::write(dir.path().join("Procfile"), procfile).unwrap();
566
567            let entries = discover_infrastructure_entry_points(dir.path());
568            assert_eq!(entries.len(), 2);
569
570            let paths: Vec<String> = entries
571                .iter()
572                .map(|e| e.path.file_name().unwrap().to_string_lossy().into_owned())
573                .collect();
574            assert!(paths.contains(&"server.js".to_string()));
575            assert!(paths.contains(&"worker.js".to_string()));
576        }
577
578        #[test]
579        fn no_infrastructure_files_returns_empty() {
580            let dir = tempfile::tempdir().expect("create temp dir");
581            let entries = discover_infrastructure_entry_points(dir.path());
582            assert!(entries.is_empty());
583        }
584
585        #[test]
586        fn discovers_variant_dockerfile_names() {
587            let dir = tempfile::tempdir().expect("create temp dir");
588            let scripts = dir.path().join("scripts");
589            std::fs::create_dir_all(&scripts).unwrap();
590            std::fs::write(scripts.join("migrate.ts"), "// migrate").unwrap();
591
592            let dockerfile = "FROM node:20\nRUN node scripts/migrate.ts";
593            std::fs::write(dir.path().join("Dockerfile.worker"), dockerfile).unwrap();
594
595            let entries = discover_infrastructure_entry_points(dir.path());
596            assert_eq!(entries.len(), 1);
597            assert!(entries[0].path.ends_with("scripts/migrate.ts"));
598        }
599
600        #[test]
601        fn deduplicates_entry_points() {
602            let dir = tempfile::tempdir().expect("create temp dir");
603            std::fs::write(dir.path().join("server.js"), "// server").unwrap();
604
605            std::fs::write(
606                dir.path().join("Dockerfile"),
607                "FROM node:20\nCMD node server.js",
608            )
609            .unwrap();
610            std::fs::write(dir.path().join("Procfile"), "web: node server.js").unwrap();
611
612            let entries = discover_infrastructure_entry_points(dir.path());
613            assert_eq!(
614                entries.len(),
615                1,
616                "duplicate entry points should be deduplicated"
617            );
618        }
619    }
620}