use std::path::Path;
use rustc_hash::{FxHashMap, FxHashSet};
use super::{parse_script, resolve_binary_to_package};
pub fn analyze_ci_files(root: &Path, bin_map: &FxHashMap<String, String>) -> FxHashSet<String> {
let _span = tracing::info_span!("analyze_ci_files").entered();
let mut used_packages = FxHashSet::default();
let gitlab_ci = root.join(".gitlab-ci.yml");
if let Ok(content) = std::fs::read_to_string(&gitlab_ci) {
extract_ci_packages(&content, root, bin_map, &mut used_packages);
}
let workflows_dir = root.join(".github/workflows");
if let Ok(entries) = std::fs::read_dir(&workflows_dir) {
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if (name_str.ends_with(".yml") || name_str.ends_with(".yaml"))
&& let Ok(content) = std::fs::read_to_string(entry.path())
{
extract_ci_packages(&content, root, bin_map, &mut used_packages);
}
}
}
used_packages
}
fn extract_ci_packages(
content: &str,
root: &Path,
bin_map: &FxHashMap<String, String>,
packages: &mut FxHashSet<String>,
) {
for command in extract_ci_commands(content) {
let parsed = parse_script(&command);
for cmd in parsed {
if !cmd.binary.is_empty() && !super::is_builtin_command(&cmd.binary) {
let pkg = resolve_binary_to_package(&cmd.binary, root, bin_map);
packages.insert(pkg);
}
}
}
}
fn extract_ci_commands(content: &str) -> Vec<String> {
let mut commands = Vec::new();
let mut in_multiline_run = false;
let mut multiline_indent = 0;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
if in_multiline_run {
let indent = line.len() - line.trim_start().len();
if indent > multiline_indent && !trimmed.is_empty() {
commands.push(trimmed.to_string());
continue;
}
in_multiline_run = false;
}
let run_value = strip_yaml_key(trimmed, "run")
.or_else(|| {
trimmed
.strip_prefix("- ")
.and_then(|rest| strip_yaml_key(rest.trim(), "run"))
})
.map(str::trim);
if let Some(rest) = run_value {
if rest == "|" || rest == "|-" || rest == "|+" {
in_multiline_run = true;
multiline_indent = line.len() - line.trim_start().len();
} else if !rest.is_empty() {
commands.push(rest.to_string());
}
continue;
}
if let Some(rest) = trimmed.strip_prefix("- ") {
let rest = rest.trim();
if !rest.is_empty()
&& !rest.starts_with('{')
&& !rest.starts_with('[')
&& !is_yaml_mapping(rest)
{
commands.push(rest.to_string());
}
}
}
commands
}
fn strip_yaml_key<'a>(line: &'a str, key: &str) -> Option<&'a str> {
let rest = line.strip_prefix(key)?;
let rest = rest.strip_prefix(':')?;
Some(rest)
}
fn is_yaml_mapping(s: &str) -> bool {
if let Some(first_word) = s.split_whitespace().next()
&& first_word.ends_with(':')
&& !first_word.starts_with("http")
&& !first_word.starts_with("ftp")
{
return true;
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn gitlab_ci_script_items() {
let content = r"
stages:
- build
- test
build:
stage: build
script:
- npm ci
- npx @cyclonedx/cyclonedx-npm --output-file sbom.json
- npm run build
";
let commands = extract_ci_commands(content);
assert!(commands.contains(&"npm ci".to_string()));
assert!(
commands.contains(&"npx @cyclonedx/cyclonedx-npm --output-file sbom.json".to_string())
);
assert!(commands.contains(&"npm run build".to_string()));
}
#[test]
fn github_actions_inline_run() {
let content = r"
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: npm ci
- run: npx eslint src
";
let commands = extract_ci_commands(content);
assert!(commands.contains(&"npm ci".to_string()));
assert!(commands.contains(&"npx eslint src".to_string()));
}
#[test]
fn github_actions_multiline_run() {
let content = r"
jobs:
build:
steps:
- run: |
npm ci
npx @cyclonedx/cyclonedx-npm --output sbom.json
npm run build
";
let commands = extract_ci_commands(content);
assert!(commands.contains(&"npm ci".to_string()));
assert!(commands.contains(&"npx @cyclonedx/cyclonedx-npm --output sbom.json".to_string()));
assert!(commands.contains(&"npm run build".to_string()));
}
#[test]
fn yaml_mappings_filtered() {
let content = r"
image: node:18
stages:
- build
variables:
NODE_ENV: production
build:
script:
- npm ci
";
let commands = extract_ci_commands(content);
assert!(!commands.iter().any(|c| c.contains("node:18")));
assert!(!commands.iter().any(|c| c.contains("NODE_ENV")));
assert!(commands.contains(&"npm ci".to_string()));
}
#[test]
fn comments_and_empty_lines_skipped() {
let content = r"
# This is a comment
# Indented comment
build:
script:
- npm ci
";
let commands = extract_ci_commands(content);
assert_eq!(commands, vec!["npm ci"]);
}
#[test]
fn npx_package_extracted() {
let content = r"
build:
script:
- npx @cyclonedx/cyclonedx-npm --output-file sbom.json
";
let mut packages = FxHashSet::default();
extract_ci_packages(
content,
Path::new("/nonexistent"),
&FxHashMap::default(),
&mut packages,
);
assert!(
packages.contains("@cyclonedx/cyclonedx-npm"),
"packages: {packages:?}"
);
}
#[test]
fn multiple_binaries_extracted() {
let content = r"
build:
script:
- npx eslint src
- npx prettier --check .
- tsc --noEmit
";
let mut packages = FxHashSet::default();
extract_ci_packages(
content,
Path::new("/nonexistent"),
&FxHashMap::default(),
&mut packages,
);
assert!(packages.contains("eslint"));
assert!(packages.contains("prettier"));
assert!(packages.contains("typescript")); }
#[test]
fn builtin_commands_not_extracted() {
let content = r"
build:
script:
- echo 'hello'
- mkdir -p dist
- cp -r build/* dist/
";
let mut packages = FxHashSet::default();
extract_ci_packages(
content,
Path::new("/nonexistent"),
&FxHashMap::default(),
&mut packages,
);
assert!(
packages.is_empty(),
"should not extract built-in commands: {packages:?}"
);
}
#[test]
fn github_actions_npx_extracted() {
let content = r"
jobs:
sbom:
steps:
- run: npx @cyclonedx/cyclonedx-npm --output-file sbom.json
";
let mut packages = FxHashSet::default();
extract_ci_packages(
content,
Path::new("/nonexistent"),
&FxHashMap::default(),
&mut packages,
);
assert!(packages.contains("@cyclonedx/cyclonedx-npm"));
}
#[test]
fn strip_yaml_key_basic() {
assert_eq!(strip_yaml_key("run: npm test", "run"), Some(" npm test"));
assert_eq!(strip_yaml_key("run:", "run"), Some(""));
assert_eq!(strip_yaml_key("other: value", "run"), None);
}
#[test]
fn is_yaml_mapping_basic() {
assert!(is_yaml_mapping("NODE_ENV: production"));
assert!(is_yaml_mapping("image: node:18"));
assert!(!is_yaml_mapping("npm ci"));
assert!(!is_yaml_mapping("npx eslint src"));
assert!(!is_yaml_mapping("https://example.com"));
}
}