Skip to main content

fallow_core/scripts/
ci.rs

1//! CI config file scanner for dependency usage detection.
2//!
3//! Extracts shell commands from `.gitlab-ci.yml` and `.github/workflows/*.yml`
4//! files, parses them for binary invocations (especially `npx`), and maps
5//! binaries to npm package names. This prevents false "unused dependency"
6//! reports for packages only used in CI pipelines.
7
8use std::path::Path;
9
10use rustc_hash::FxHashSet;
11
12use super::{parse_script, resolve_binary_to_package};
13
14/// Analyze CI config files for package binary invocations.
15///
16/// Scans GitLab CI and GitHub Actions workflow files for shell commands,
17/// extracts binary names, and returns the set of npm package names used.
18pub fn analyze_ci_files(root: &Path) -> FxHashSet<String> {
19    let _span = tracing::info_span!("analyze_ci_files").entered();
20    let mut used_packages = FxHashSet::default();
21
22    // GitLab CI
23    let gitlab_ci = root.join(".gitlab-ci.yml");
24    if let Ok(content) = std::fs::read_to_string(&gitlab_ci) {
25        extract_ci_packages(&content, root, &mut used_packages);
26    }
27
28    // GitHub Actions workflows
29    let workflows_dir = root.join(".github/workflows");
30    if let Ok(entries) = std::fs::read_dir(&workflows_dir) {
31        for entry in entries.flatten() {
32            let name = entry.file_name();
33            let name_str = name.to_string_lossy();
34            if (name_str.ends_with(".yml") || name_str.ends_with(".yaml"))
35                && let Ok(content) = std::fs::read_to_string(entry.path())
36            {
37                extract_ci_packages(&content, root, &mut used_packages);
38            }
39        }
40    }
41
42    used_packages
43}
44
45/// Extract package names from shell commands found in a CI config file.
46///
47/// Uses line-based heuristics to find shell command lines in YAML CI configs.
48/// This intentionally avoids a full YAML parser to keep dependencies minimal.
49/// Since results only mark packages as "used" (never as "unused"), false
50/// positives from non-command YAML lines are safe — they only reduce
51/// false positive unused dependency reports.
52fn extract_ci_packages(content: &str, root: &Path, packages: &mut FxHashSet<String>) {
53    for command in extract_ci_commands(content) {
54        let parsed = parse_script(&command);
55        for cmd in parsed {
56            if !cmd.binary.is_empty() && !super::is_builtin_command(&cmd.binary) {
57                let pkg = resolve_binary_to_package(&cmd.binary, root);
58                packages.insert(pkg);
59            }
60        }
61    }
62}
63
64/// Extract shell command strings from a CI config file.
65///
66/// Recognizes:
67/// - YAML list items in script blocks: `  - npx tool --flag`
68/// - GitHub Actions run fields: `  run: command`
69/// - Multi-line run blocks: `  run: |` followed by indented lines
70fn extract_ci_commands(content: &str) -> Vec<String> {
71    let mut commands = Vec::new();
72    let mut in_multiline_run = false;
73    let mut multiline_indent = 0;
74
75    for line in content.lines() {
76        let trimmed = line.trim();
77
78        // Skip comments and empty lines
79        if trimmed.is_empty() || trimmed.starts_with('#') {
80            continue;
81        }
82
83        // Track multi-line `run: |` blocks (GitHub Actions)
84        if in_multiline_run {
85            let indent = line.len() - line.trim_start().len();
86            if indent > multiline_indent && !trimmed.is_empty() {
87                commands.push(trimmed.to_string());
88                continue;
89            }
90            in_multiline_run = false;
91            // Fall through to re-classify this line normally
92        }
93
94        // GitHub Actions: `run: |` or `- run: command` (multi-line or inline)
95        // Check both bare `run:` and list-item `- run:` forms
96        let run_value = strip_yaml_key(trimmed, "run")
97            .or_else(|| {
98                trimmed
99                    .strip_prefix("- ")
100                    .and_then(|rest| strip_yaml_key(rest.trim(), "run"))
101            })
102            .map(str::trim);
103
104        if let Some(rest) = run_value {
105            if rest == "|" || rest == "|-" || rest == "|+" {
106                in_multiline_run = true;
107                multiline_indent = line.len() - line.trim_start().len();
108            } else if !rest.is_empty() {
109                // Inline run: `run: npm test` or `- run: npm test`
110                commands.push(rest.to_string());
111            }
112            continue;
113        }
114
115        // YAML list items in script/before_script/after_script blocks
116        // GitLab CI: `  - npx @cyclonedx/cyclonedx-npm --output-file sbom.json`
117        // These are the most common form of CI commands
118        if let Some(rest) = trimmed.strip_prefix("- ") {
119            let rest = rest.trim();
120            // Skip YAML mappings (key: value), image references, and other non-commands
121            if !rest.is_empty()
122                && !rest.starts_with('{')
123                && !rest.starts_with('[')
124                && !is_yaml_mapping(rest)
125            {
126                commands.push(rest.to_string());
127            }
128        }
129    }
130
131    commands
132}
133
134/// Strip a YAML key prefix from a line, returning the value part.
135/// Handles `key: value` and `key:` (empty value).
136fn strip_yaml_key<'a>(line: &'a str, key: &str) -> Option<&'a str> {
137    let rest = line.strip_prefix(key)?;
138    let rest = rest.strip_prefix(':')?;
139    Some(rest)
140}
141
142/// Check if a string looks like a YAML mapping (key: value) rather than a shell command.
143fn is_yaml_mapping(s: &str) -> bool {
144    // Simple heuristic: if the first "word" ends with `:` and doesn't look like
145    // a protocol (http:, https:, ftp:), it's likely a YAML key
146    if let Some(first_word) = s.split_whitespace().next()
147        && first_word.ends_with(':')
148        && !first_word.starts_with("http")
149        && !first_word.starts_with("ftp")
150    {
151        return true;
152    }
153    false
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    // ── extract_ci_commands tests ──────────────────────────────────
161
162    #[test]
163    fn gitlab_ci_script_items() {
164        let content = r"
165stages:
166  - build
167  - test
168
169build:
170  stage: build
171  script:
172    - npm ci
173    - npx @cyclonedx/cyclonedx-npm --output-file sbom.json
174    - npm run build
175";
176        let commands = extract_ci_commands(content);
177        assert!(commands.contains(&"npm ci".to_string()));
178        assert!(
179            commands.contains(&"npx @cyclonedx/cyclonedx-npm --output-file sbom.json".to_string())
180        );
181        assert!(commands.contains(&"npm run build".to_string()));
182    }
183
184    #[test]
185    fn github_actions_inline_run() {
186        let content = r"
187jobs:
188  build:
189    runs-on: ubuntu-latest
190    steps:
191      - uses: actions/checkout@v4
192      - run: npm ci
193      - run: npx eslint src
194";
195        let commands = extract_ci_commands(content);
196        assert!(commands.contains(&"npm ci".to_string()));
197        assert!(commands.contains(&"npx eslint src".to_string()));
198    }
199
200    #[test]
201    fn github_actions_multiline_run() {
202        let content = r"
203jobs:
204  build:
205    steps:
206      - run: |
207          npm ci
208          npx @cyclonedx/cyclonedx-npm --output sbom.json
209          npm run build
210";
211        let commands = extract_ci_commands(content);
212        assert!(commands.contains(&"npm ci".to_string()));
213        assert!(commands.contains(&"npx @cyclonedx/cyclonedx-npm --output sbom.json".to_string()));
214        assert!(commands.contains(&"npm run build".to_string()));
215    }
216
217    #[test]
218    fn yaml_mappings_filtered() {
219        let content = r"
220image: node:18
221stages:
222  - build
223variables:
224  NODE_ENV: production
225build:
226  script:
227    - npm ci
228";
229        let commands = extract_ci_commands(content);
230        // "node:18" and "NODE_ENV: production" should NOT be treated as commands
231        assert!(!commands.iter().any(|c| c.contains("node:18")));
232        assert!(!commands.iter().any(|c| c.contains("NODE_ENV")));
233        assert!(commands.contains(&"npm ci".to_string()));
234    }
235
236    #[test]
237    fn comments_and_empty_lines_skipped() {
238        let content = r"
239# This is a comment
240  # Indented comment
241
242build:
243  script:
244    - npm ci
245";
246        let commands = extract_ci_commands(content);
247        assert_eq!(commands, vec!["npm ci"]);
248    }
249
250    // ── extract_ci_packages tests ──────────────────────────────────
251
252    #[test]
253    fn npx_package_extracted() {
254        let content = r"
255build:
256  script:
257    - npx @cyclonedx/cyclonedx-npm --output-file sbom.json
258";
259        let mut packages = FxHashSet::default();
260        extract_ci_packages(content, Path::new("/nonexistent"), &mut packages);
261        assert!(
262            packages.contains("@cyclonedx/cyclonedx-npm"),
263            "packages: {packages:?}"
264        );
265    }
266
267    #[test]
268    fn multiple_binaries_extracted() {
269        let content = r"
270build:
271  script:
272    - npx eslint src
273    - npx prettier --check .
274    - tsc --noEmit
275";
276        let mut packages = FxHashSet::default();
277        extract_ci_packages(content, Path::new("/nonexistent"), &mut packages);
278        assert!(packages.contains("eslint"));
279        assert!(packages.contains("prettier"));
280        assert!(packages.contains("typescript")); // tsc → typescript via resolve
281    }
282
283    #[test]
284    fn builtin_commands_not_extracted() {
285        let content = r"
286build:
287  script:
288    - echo 'hello'
289    - mkdir -p dist
290    - cp -r build/* dist/
291";
292        let mut packages = FxHashSet::default();
293        extract_ci_packages(content, Path::new("/nonexistent"), &mut packages);
294        assert!(
295            packages.is_empty(),
296            "should not extract built-in commands: {packages:?}"
297        );
298    }
299
300    #[test]
301    fn github_actions_npx_extracted() {
302        let content = r"
303jobs:
304  sbom:
305    steps:
306      - run: npx @cyclonedx/cyclonedx-npm --output-file sbom.json
307";
308        let mut packages = FxHashSet::default();
309        extract_ci_packages(content, Path::new("/nonexistent"), &mut packages);
310        assert!(packages.contains("@cyclonedx/cyclonedx-npm"));
311    }
312
313    // ── helper tests ───────────────────────────────────────────────
314
315    #[test]
316    fn strip_yaml_key_basic() {
317        assert_eq!(strip_yaml_key("run: npm test", "run"), Some(" npm test"));
318        assert_eq!(strip_yaml_key("run:", "run"), Some(""));
319        assert_eq!(strip_yaml_key("other: value", "run"), None);
320    }
321
322    #[test]
323    fn is_yaml_mapping_basic() {
324        assert!(is_yaml_mapping("NODE_ENV: production"));
325        assert!(is_yaml_mapping("image: node:18"));
326        assert!(!is_yaml_mapping("npm ci"));
327        assert!(!is_yaml_mapping("npx eslint src"));
328        assert!(!is_yaml_mapping("https://example.com"));
329    }
330}