Skip to main content

provenant/parsers/
bazel.rs

1//! Bazel BUILD file parser
2//!
3//! Extracts package metadata from Bazel BUILD files using Starlark (Python-like) syntax.
4//!
5//! ## Features
6//! - Parses Starlark syntax using starlark_syntax
7//! - Extracts build rules ending with "binary" or "library" (e.g., cc_binary, cc_library)
8//! - Extracts name and licenses fields from rule arguments
9//! - Falls back to parent directory name if no rules found
10//! - **Supports multiple packages**: `extract_packages()` returns all rules (100% parity)
11//!
12//! ## Usage
13//! - `extract_first_package()` - Returns first package (convenience method)
14//! - `extract_packages()` - Returns ALL packages (recommended for BUILD files)
15//!
16//! ## Reference
17//! Python implementation: `reference/scancode-toolkit/src/packagedcode/build.py` (BazelBuildHandler)
18
19use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
20use packageurl::PackageUrl;
21use serde_json::{Map as JsonMap, Value as JsonValue};
22use std::path::Path;
23
24use crate::parser_warn as warn;
25use starlark_syntax::syntax::ast;
26use starlark_syntax::syntax::module::AstModuleFields;
27use starlark_syntax::syntax::{AstModule, Dialect};
28
29use super::PackageParser;
30
31type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
32const SCANCODE_SIMPLE_TOP_LEVEL_KEY: &str = "scancode_simple_top_level";
33
34struct StarlarkCall<'a> {
35    func: &'a ast::AstExpr,
36    args: &'a StarlarkCallArgs,
37}
38
39pub struct BazelBuildParser;
40
41impl PackageParser for BazelBuildParser {
42    const PACKAGE_TYPE: PackageType = PackageType::Bazel;
43
44    fn is_match(path: &Path) -> bool {
45        path.file_name()
46            .and_then(|name| name.to_str())
47            .is_some_and(|name| name == "BUILD")
48    }
49
50    fn extract_packages(path: &Path) -> Vec<PackageData> {
51        match parse_bazel_build(path) {
52            Ok(packages) if !packages.is_empty() => packages,
53            Ok(_) => vec![fallback_package_data(path)],
54            Err(e) => {
55                warn!("Failed to parse Bazel BUILD file {:?}: {}", path, e);
56                vec![fallback_package_data(path)]
57            }
58        }
59    }
60}
61
62/// Parse a Bazel BUILD file and extract all package data
63fn parse_bazel_build(path: &Path) -> Result<Vec<PackageData>, String> {
64    let content =
65        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
66    let module = parse_starlark_module("<BUILD>", content)?;
67    let scancode_simple_top_level = is_scancode_simple_top_level_module(&module);
68
69    let mut packages = Vec::new();
70
71    for statement in top_level_statements(&module) {
72        if let Some(mut package_data) = extract_package_from_statement(statement) {
73            set_scancode_simple_top_level(&mut package_data, scancode_simple_top_level);
74            packages.push(package_data);
75        }
76    }
77
78    Ok(packages)
79}
80
81/// Extract package data from a single AST statement
82fn extract_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
83    let call = extract_call(statement)?;
84    let rule_name = extract_call_name(&call)?;
85
86    if !check_rule_name_ending(rule_name) {
87        return None;
88    }
89
90    let name = extract_string_kwarg(&call, "name")?;
91    let licenses = extract_string_list_kwarg(&call, "licenses");
92    let purl = build_bazel_purl(&name, None);
93
94    Some(PackageData {
95        package_type: Some(BazelBuildParser::PACKAGE_TYPE),
96        name: Some(name),
97        extracted_license_statement: licenses.map(|licenses| licenses.join(", ")),
98        datasource_id: Some(DatasourceId::BazelBuild),
99        purl,
100        ..Default::default()
101    })
102}
103
104/// Check if rule name ends with "binary" or "library"
105fn check_rule_name_ending(rule_name: &str) -> bool {
106    rule_name.ends_with("binary") || rule_name.ends_with("library")
107}
108
109/// Create fallback package data using parent directory name
110fn fallback_package_data(path: &Path) -> PackageData {
111    let name = path
112        .parent()
113        .and_then(|p| p.file_name())
114        .and_then(|n| n.to_str())
115        .map(|s| s.to_string());
116
117    PackageData {
118        package_type: Some(BazelBuildParser::PACKAGE_TYPE),
119        purl: name
120            .as_deref()
121            .and_then(|name| build_bazel_purl(name, None)),
122        name,
123        datasource_id: Some(DatasourceId::BazelBuild),
124        ..Default::default()
125    }
126}
127
128fn set_scancode_simple_top_level(package_data: &mut PackageData, enabled: bool) {
129    let extra_data = package_data.extra_data.get_or_insert_with(Default::default);
130    extra_data.insert(
131        SCANCODE_SIMPLE_TOP_LEVEL_KEY.to_string(),
132        JsonValue::Bool(enabled),
133    );
134}
135
136fn is_scancode_simple_top_level_module(module: &AstModule) -> bool {
137    top_level_statements(module)
138        .iter()
139        .all(is_scancode_simple_top_level_statement)
140}
141
142fn is_scancode_simple_top_level_statement(statement: &ast::AstStmt) -> bool {
143    match &statement.node {
144        ast::StmtP::Expression(expr) => {
145            matches!(&expr.node, ast::ExprP::Call(func, _) if matches!(&func.node, ast::ExprP::Identifier(_)))
146        }
147        _ => true,
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154    use crate::models::PackageType;
155    use std::path::PathBuf;
156
157    #[test]
158    fn test_is_match() {
159        assert!(BazelBuildParser::is_match(&PathBuf::from("BUILD")));
160        assert!(BazelBuildParser::is_match(&PathBuf::from("path/to/BUILD")));
161        assert!(!BazelBuildParser::is_match(&PathBuf::from("BUILD.bazel")));
162        assert!(!BazelBuildParser::is_match(&PathBuf::from("build")));
163        assert!(!BazelBuildParser::is_match(&PathBuf::from("BUCK")));
164    }
165
166    #[test]
167    fn test_check_rule_name_ending() {
168        assert!(check_rule_name_ending("cc_binary"));
169        assert!(check_rule_name_ending("cc_library"));
170        assert!(check_rule_name_ending("java_binary"));
171        assert!(check_rule_name_ending("py_library"));
172        assert!(!check_rule_name_ending("filegroup"));
173        assert!(!check_rule_name_ending("load"));
174        assert!(!check_rule_name_ending("cc_test"));
175    }
176
177    #[test]
178    fn test_fallback_package_data() {
179        let path = PathBuf::from("/path/to/myproject/BUILD");
180        let pkg = fallback_package_data(&path);
181        assert_eq!(pkg.package_type, Some(PackageType::Bazel));
182        assert_eq!(pkg.name, Some("myproject".to_string()));
183        assert_eq!(pkg.purl.as_deref(), Some("pkg:bazel/myproject"));
184    }
185
186    #[test]
187    fn test_scancode_simple_top_level_allows_direct_calls() {
188        let module = parse_starlark_module(
189            "<BUILD>",
190            "cc_library(name = \"demo\")\npy_binary(name = \"tool\")\n".to_string(),
191        )
192        .expect("parse BUILD");
193
194        assert!(is_scancode_simple_top_level_module(&module));
195    }
196
197    #[test]
198    fn test_scancode_simple_top_level_rejects_attribute_calls() {
199        let module = parse_starlark_module(
200            "<BUILD>",
201            "selects.config_setting_group(name = \"demo\")\ncc_library(name = \"demo\")\n"
202                .to_string(),
203        )
204        .expect("parse BUILD");
205
206        assert!(!is_scancode_simple_top_level_module(&module));
207    }
208
209    #[test]
210    fn test_scancode_simple_top_level_rejects_non_call_expressions() {
211        let module =
212            parse_starlark_module("<BUILD>", "[(cc_binary(name = \"demo\"),)]\n".to_string())
213                .expect("parse BUILD");
214
215        assert!(!is_scancode_simple_top_level_module(&module));
216    }
217}
218
219crate::register_parser!(
220    "Bazel BUILD file",
221    &["**/BUILD"],
222    "bazel",
223    "",
224    Some("https://bazel.build/"),
225);
226
227pub struct BazelModuleParser;
228
229impl PackageParser for BazelModuleParser {
230    const PACKAGE_TYPE: PackageType = PackageType::Bazel;
231
232    fn is_match(path: &Path) -> bool {
233        path.file_name()
234            .and_then(|name| name.to_str())
235            .is_some_and(|name| name == "MODULE.bazel")
236    }
237
238    fn extract_packages(path: &Path) -> Vec<PackageData> {
239        match parse_bazel_module(path) {
240            Ok(package) => vec![package],
241            Err(e) => {
242                warn!("Failed to parse Bazel MODULE.bazel {:?}: {}", path, e);
243                vec![default_bazel_module_package_data()]
244            }
245        }
246    }
247}
248
249fn parse_bazel_module(path: &Path) -> Result<PackageData, String> {
250    let content =
251        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
252    let module = parse_starlark_module("<MODULE.bazel>", content)?;
253
254    let mut package = default_bazel_module_package_data();
255    let mut extra_data = JsonMap::new();
256    let mut dependencies = Vec::new();
257    let mut overrides = Vec::new();
258
259    for statement in top_level_statements(&module) {
260        let Some(call) = extract_call(statement) else {
261            continue;
262        };
263
264        let Some(function_name) = extract_call_name(&call) else {
265            continue;
266        };
267
268        match function_name {
269            "module" => {
270                package.name = extract_string_kwarg(&call, "name");
271                package.version = extract_string_kwarg(&call, "version");
272                package.purl = package
273                    .name
274                    .as_deref()
275                    .and_then(|name| build_bazel_purl(name, package.version.as_deref()));
276
277                if let Some(repo_name) = extract_string_kwarg(&call, "repo_name") {
278                    extra_data.insert("repo_name".to_string(), JsonValue::String(repo_name));
279                }
280                if let Some(compatibility_level) = extract_int_kwarg(&call, "compatibility_level") {
281                    extra_data.insert(
282                        "compatibility_level".to_string(),
283                        JsonValue::Number(compatibility_level.into()),
284                    );
285                }
286                if let Some(bazel_compatibility) = extract_kwarg_json(&call, "bazel_compatibility")
287                {
288                    extra_data.insert("bazel_compatibility".to_string(), bazel_compatibility);
289                }
290            }
291            "bazel_dep" => {
292                if let Some(dep) = extract_bazel_dependency(&call) {
293                    dependencies.push(dep);
294                }
295            }
296            "archive_override"
297            | "git_override"
298            | "local_path_override"
299            | "single_version_override"
300            | "multiple_version_override" => {
301                overrides.push(extract_override(function_name, &call));
302            }
303            _ => {}
304        }
305    }
306
307    if package.name.is_none() {
308        return Ok(default_bazel_module_package_data());
309    }
310
311    if !overrides.is_empty() {
312        extra_data.insert("overrides".to_string(), JsonValue::Array(overrides));
313    }
314
315    package.dependencies = dependencies;
316    package.extra_data = (!extra_data.is_empty()).then(|| extra_data.into_iter().collect());
317    Ok(package)
318}
319
320fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
321    let dialect = Dialect {
322        enable_top_level_stmt: true,
323        ..Dialect::Standard
324    };
325    AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
326}
327
328fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
329    match &module.statement().node {
330        ast::StmtP::Statements(statements) => statements,
331        _ => std::slice::from_ref(module.statement()),
332    }
333}
334
335fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
336    match &statement.node {
337        ast::StmtP::Expression(expr) => extract_call_expr(expr),
338        ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
339        _ => None,
340    }
341}
342
343fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
344    match &expr.node {
345        ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
346        _ => None,
347    }
348}
349
350fn extract_call_name<'a>(call: &'a StarlarkCall<'_>) -> Option<&'a str> {
351    match &call.func.node {
352        ast::ExprP::Identifier(identifier) => Some(identifier.node.ident.as_str()),
353        _ => None,
354    }
355}
356
357fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
358    call.args
359        .args
360        .iter()
361        .find_map(|argument| match &argument.node {
362            ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
363            _ => None,
364        })
365}
366
367fn extract_string_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
368    extract_named_kwarg(call, key).and_then(expr_as_string)
369}
370
371fn extract_string_list_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
372    let expr = extract_named_kwarg(call, key)?;
373    let items = match &expr.node {
374        ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
375        _ => return None,
376    };
377    let values: Vec<_> = items.iter().filter_map(expr_as_string).collect();
378    (!values.is_empty()).then_some(values)
379}
380
381fn extract_bool_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<bool> {
382    extract_named_kwarg(call, key).and_then(expr_as_bool)
383}
384
385fn extract_int_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<i64> {
386    extract_named_kwarg(call, key).and_then(expr_as_i64)
387}
388
389fn extract_kwarg_json(call: &StarlarkCall<'_>, key: &str) -> Option<JsonValue> {
390    extract_named_kwarg(call, key).and_then(expr_to_json)
391}
392
393fn extract_bazel_dependency(call: &StarlarkCall<'_>) -> Option<Dependency> {
394    let name = extract_string_kwarg(call, "name")?;
395    let version = extract_string_kwarg(call, "version");
396    let is_dev = extract_bool_kwarg(call, "dev_dependency").unwrap_or(false);
397    let mut extra_data = JsonMap::new();
398
399    for field in ["repo_name", "max_compatibility_level", "registry"] {
400        if let Some(value) = extract_kwarg_json(call, field) {
401            extra_data.insert(field.to_string(), value);
402        }
403    }
404
405    Some(Dependency {
406        purl: build_bazel_purl(&name, version.as_deref()),
407        extracted_requirement: version.clone(),
408        scope: Some(if is_dev { "dev" } else { "dependencies" }.to_string()),
409        is_runtime: Some(!is_dev),
410        is_optional: Some(is_dev),
411        is_pinned: Some(version.is_some()),
412        is_direct: Some(true),
413        resolved_package: None,
414        extra_data: (!extra_data.is_empty()).then(|| extra_data.into_iter().collect()),
415    })
416}
417
418fn extract_override(kind: &str, call: &StarlarkCall<'_>) -> JsonValue {
419    let mut override_map = JsonMap::new();
420    override_map.insert("kind".to_string(), JsonValue::String(kind.to_string()));
421    for argument in &call.args.args {
422        if let ast::ArgumentP::Named(name, value) = &argument.node
423            && let Some(value) = expr_to_json(value)
424        {
425            override_map.insert(name.node.clone(), value);
426        }
427    }
428    JsonValue::Object(override_map)
429}
430
431fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
432    match &expr.node {
433        ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
434        _ => None,
435    }
436}
437
438fn expr_as_bool(expr: &ast::AstExpr) -> Option<bool> {
439    match &expr.node {
440        ast::ExprP::Identifier(identifier) => match identifier.node.ident.as_str() {
441            "True" => Some(true),
442            "False" => Some(false),
443            _ => None,
444        },
445        _ => None,
446    }
447}
448
449fn expr_as_i64(expr: &ast::AstExpr) -> Option<i64> {
450    match &expr.node {
451        ast::ExprP::Literal(ast::AstLiteral::Int(value)) => value.node.to_string().parse().ok(),
452        _ => None,
453    }
454}
455
456fn expr_to_json(expr: &ast::AstExpr) -> Option<JsonValue> {
457    match &expr.node {
458        ast::ExprP::Literal(ast::AstLiteral::String(value)) => {
459            Some(JsonValue::String(value.node.clone()))
460        }
461        ast::ExprP::Literal(ast::AstLiteral::Int(value)) => value
462            .node
463            .to_string()
464            .parse::<i64>()
465            .ok()
466            .map(|value| JsonValue::Number(value.into()))
467            .or_else(|| Some(JsonValue::String(value.node.to_string()))),
468        ast::ExprP::Literal(ast::AstLiteral::Float(value)) => {
469            serde_json::Number::from_f64(value.node).map(JsonValue::Number)
470        }
471        ast::ExprP::Identifier(identifier) => match identifier.node.ident.as_str() {
472            "True" => Some(JsonValue::Bool(true)),
473            "False" => Some(JsonValue::Bool(false)),
474            "None" => Some(JsonValue::Null),
475            _ => None,
476        },
477        ast::ExprP::List(elts) | ast::ExprP::Tuple(elts) => Some(JsonValue::Array(
478            elts.iter().filter_map(expr_to_json).collect(),
479        )),
480        ast::ExprP::Dict(items) => {
481            let mut map = JsonMap::new();
482            for (key, value) in items {
483                let Some(key) = expr_as_string(key) else {
484                    continue;
485                };
486                if let Some(value) = expr_to_json(value) {
487                    map.insert(key, value);
488                }
489            }
490            Some(JsonValue::Object(map))
491        }
492        _ => None,
493    }
494}
495
496fn build_bazel_purl(name: &str, version: Option<&str>) -> Option<String> {
497    let mut purl = PackageUrl::new("bazel", name).ok()?;
498    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
499        purl.with_version(version).ok()?;
500    }
501    Some(purl.to_string())
502}
503
504fn default_bazel_module_package_data() -> PackageData {
505    PackageData {
506        package_type: Some(BazelModuleParser::PACKAGE_TYPE),
507        datasource_id: Some(DatasourceId::BazelModule),
508        ..Default::default()
509    }
510}
511
512crate::register_parser!(
513    "Bazel MODULE.bazel file",
514    &["**/MODULE.bazel"],
515    "bazel",
516    "",
517    Some("https://bazel.build/external/module"),
518);