Skip to main content

provenant/parsers/
bazel.rs

1//! Bazel BUILD file parser
2//!
3//! Extracts package metadata from Bazel BUILD files using Starlark (Python-like) syntax.
4//!
5//! ## Features
6//! - Parses Starlark syntax using starlark_syntax
7//! - Extracts build rules ending with "binary" or "library" (e.g., cc_binary, cc_library)
8//! - Extracts name and licenses fields from rule arguments
9//! - Falls back to parent directory name if no rules found
10//! - **Supports multiple packages**: `extract_packages()` returns all rules (100% parity)
11//!
12//! ## Usage
13//! - `extract_first_package()` - Returns first package (convenience method)
14//! - `extract_packages()` - Returns ALL packages (recommended for BUILD files)
15//!
16//! ## Reference
17//! Python implementation: `reference/scancode-toolkit/src/packagedcode/build.py` (BazelBuildHandler)
18
19use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
20use packageurl::PackageUrl;
21use serde_json::{Map as JsonMap, Value as JsonValue};
22use std::path::Path;
23
24use crate::parser_warn as warn;
25use starlark_syntax::syntax::ast;
26use starlark_syntax::syntax::module::AstModuleFields;
27use starlark_syntax::syntax::{AstModule, Dialect};
28
29use super::PackageParser;
30
31type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
32
33struct StarlarkCall<'a> {
34    func: &'a ast::AstExpr,
35    args: &'a StarlarkCallArgs,
36}
37
38pub struct BazelBuildParser;
39
40impl PackageParser for BazelBuildParser {
41    const PACKAGE_TYPE: PackageType = PackageType::Bazel;
42
43    fn is_match(path: &Path) -> bool {
44        path.file_name()
45            .and_then(|name| name.to_str())
46            .is_some_and(|name| name == "BUILD")
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        match parse_bazel_build(path) {
51            Ok(packages) if !packages.is_empty() => packages,
52            Ok(_) => vec![fallback_package_data(path)],
53            Err(e) => {
54                warn!("Failed to parse Bazel BUILD file {:?}: {}", path, e);
55                vec![fallback_package_data(path)]
56            }
57        }
58    }
59}
60
61/// Parse a Bazel BUILD file and extract all package data
62fn parse_bazel_build(path: &Path) -> Result<Vec<PackageData>, String> {
63    let content =
64        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
65    let module = parse_starlark_module("<BUILD>", content)?;
66
67    let mut packages = Vec::new();
68
69    for statement in top_level_statements(&module) {
70        if let Some(package_data) = extract_package_from_statement(statement) {
71            packages.push(package_data);
72        }
73    }
74
75    Ok(packages)
76}
77
78/// Extract package data from a single AST statement
79fn extract_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
80    let call = extract_call(statement)?;
81    let rule_name = extract_call_name(&call)?;
82
83    if !check_rule_name_ending(rule_name) {
84        return None;
85    }
86
87    let name = extract_string_kwarg(&call, "name")?;
88    let licenses = extract_string_list_kwarg(&call, "licenses");
89
90    Some(PackageData {
91        package_type: Some(BazelBuildParser::PACKAGE_TYPE),
92        name: Some(name),
93        extracted_license_statement: licenses.map(|licenses| licenses.join(", ")),
94        datasource_id: Some(DatasourceId::BazelBuild),
95        ..Default::default()
96    })
97}
98
99/// Check if rule name ends with "binary" or "library"
100fn check_rule_name_ending(rule_name: &str) -> bool {
101    rule_name.ends_with("binary") || rule_name.ends_with("library")
102}
103
104/// Create fallback package data using parent directory name
105fn fallback_package_data(path: &Path) -> PackageData {
106    let name = path
107        .parent()
108        .and_then(|p| p.file_name())
109        .and_then(|n| n.to_str())
110        .map(|s| s.to_string());
111
112    PackageData {
113        package_type: Some(BazelBuildParser::PACKAGE_TYPE),
114        name,
115        datasource_id: Some(DatasourceId::BazelBuild),
116        ..Default::default()
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use crate::models::PackageType;
124    use std::path::PathBuf;
125
126    #[test]
127    fn test_is_match() {
128        assert!(BazelBuildParser::is_match(&PathBuf::from("BUILD")));
129        assert!(BazelBuildParser::is_match(&PathBuf::from("path/to/BUILD")));
130        assert!(!BazelBuildParser::is_match(&PathBuf::from("BUILD.bazel")));
131        assert!(!BazelBuildParser::is_match(&PathBuf::from("build")));
132        assert!(!BazelBuildParser::is_match(&PathBuf::from("BUCK")));
133    }
134
135    #[test]
136    fn test_check_rule_name_ending() {
137        assert!(check_rule_name_ending("cc_binary"));
138        assert!(check_rule_name_ending("cc_library"));
139        assert!(check_rule_name_ending("java_binary"));
140        assert!(check_rule_name_ending("py_library"));
141        assert!(!check_rule_name_ending("filegroup"));
142        assert!(!check_rule_name_ending("load"));
143        assert!(!check_rule_name_ending("cc_test"));
144    }
145
146    #[test]
147    fn test_fallback_package_data() {
148        let path = PathBuf::from("/path/to/myproject/BUILD");
149        let pkg = fallback_package_data(&path);
150        assert_eq!(pkg.package_type, Some(PackageType::Bazel));
151        assert_eq!(pkg.name, Some("myproject".to_string()));
152    }
153}
154
155crate::register_parser!(
156    "Bazel BUILD file",
157    &["**/BUILD"],
158    "bazel",
159    "",
160    Some("https://bazel.build/"),
161);
162
163pub struct BazelModuleParser;
164
165impl PackageParser for BazelModuleParser {
166    const PACKAGE_TYPE: PackageType = PackageType::Bazel;
167
168    fn is_match(path: &Path) -> bool {
169        path.file_name()
170            .and_then(|name| name.to_str())
171            .is_some_and(|name| name == "MODULE.bazel")
172    }
173
174    fn extract_packages(path: &Path) -> Vec<PackageData> {
175        match parse_bazel_module(path) {
176            Ok(package) => vec![package],
177            Err(e) => {
178                warn!("Failed to parse Bazel MODULE.bazel {:?}: {}", path, e);
179                vec![default_bazel_module_package_data()]
180            }
181        }
182    }
183}
184
185fn parse_bazel_module(path: &Path) -> Result<PackageData, String> {
186    let content =
187        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
188    let module = parse_starlark_module("<MODULE.bazel>", content)?;
189
190    let mut package = default_bazel_module_package_data();
191    let mut extra_data = JsonMap::new();
192    let mut dependencies = Vec::new();
193    let mut overrides = Vec::new();
194
195    for statement in top_level_statements(&module) {
196        let Some(call) = extract_call(statement) else {
197            continue;
198        };
199
200        let Some(function_name) = extract_call_name(&call) else {
201            continue;
202        };
203
204        match function_name {
205            "module" => {
206                package.name = extract_string_kwarg(&call, "name");
207                package.version = extract_string_kwarg(&call, "version");
208                package.purl = package
209                    .name
210                    .as_deref()
211                    .and_then(|name| build_bazel_purl(name, package.version.as_deref()));
212
213                if let Some(repo_name) = extract_string_kwarg(&call, "repo_name") {
214                    extra_data.insert("repo_name".to_string(), JsonValue::String(repo_name));
215                }
216                if let Some(compatibility_level) = extract_int_kwarg(&call, "compatibility_level") {
217                    extra_data.insert(
218                        "compatibility_level".to_string(),
219                        JsonValue::Number(compatibility_level.into()),
220                    );
221                }
222                if let Some(bazel_compatibility) = extract_kwarg_json(&call, "bazel_compatibility")
223                {
224                    extra_data.insert("bazel_compatibility".to_string(), bazel_compatibility);
225                }
226            }
227            "bazel_dep" => {
228                if let Some(dep) = extract_bazel_dependency(&call) {
229                    dependencies.push(dep);
230                }
231            }
232            "archive_override"
233            | "git_override"
234            | "local_path_override"
235            | "single_version_override"
236            | "multiple_version_override" => {
237                overrides.push(extract_override(function_name, &call));
238            }
239            _ => {}
240        }
241    }
242
243    if package.name.is_none() {
244        return Ok(default_bazel_module_package_data());
245    }
246
247    if !overrides.is_empty() {
248        extra_data.insert("overrides".to_string(), JsonValue::Array(overrides));
249    }
250
251    package.dependencies = dependencies;
252    package.extra_data = (!extra_data.is_empty()).then(|| extra_data.into_iter().collect());
253    Ok(package)
254}
255
256fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
257    let dialect = Dialect {
258        enable_top_level_stmt: true,
259        ..Dialect::Standard
260    };
261    AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
262}
263
264fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
265    match &module.statement().node {
266        ast::StmtP::Statements(statements) => statements,
267        _ => std::slice::from_ref(module.statement()),
268    }
269}
270
271fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
272    match &statement.node {
273        ast::StmtP::Expression(expr) => extract_call_expr(expr),
274        ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
275        _ => None,
276    }
277}
278
279fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
280    match &expr.node {
281        ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
282        _ => None,
283    }
284}
285
286fn extract_call_name<'a>(call: &'a StarlarkCall<'_>) -> Option<&'a str> {
287    match &call.func.node {
288        ast::ExprP::Identifier(identifier) => Some(identifier.node.ident.as_str()),
289        _ => None,
290    }
291}
292
293fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
294    call.args
295        .args
296        .iter()
297        .find_map(|argument| match &argument.node {
298            ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
299            _ => None,
300        })
301}
302
303fn extract_string_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
304    extract_named_kwarg(call, key).and_then(expr_as_string)
305}
306
307fn extract_string_list_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
308    let expr = extract_named_kwarg(call, key)?;
309    let items = match &expr.node {
310        ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
311        _ => return None,
312    };
313    let values: Vec<_> = items.iter().filter_map(expr_as_string).collect();
314    (!values.is_empty()).then_some(values)
315}
316
317fn extract_bool_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<bool> {
318    extract_named_kwarg(call, key).and_then(expr_as_bool)
319}
320
321fn extract_int_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<i64> {
322    extract_named_kwarg(call, key).and_then(expr_as_i64)
323}
324
325fn extract_kwarg_json(call: &StarlarkCall<'_>, key: &str) -> Option<JsonValue> {
326    extract_named_kwarg(call, key).and_then(expr_to_json)
327}
328
329fn extract_bazel_dependency(call: &StarlarkCall<'_>) -> Option<Dependency> {
330    let name = extract_string_kwarg(call, "name")?;
331    let version = extract_string_kwarg(call, "version");
332    let is_dev = extract_bool_kwarg(call, "dev_dependency").unwrap_or(false);
333    let mut extra_data = JsonMap::new();
334
335    for field in ["repo_name", "max_compatibility_level", "registry"] {
336        if let Some(value) = extract_kwarg_json(call, field) {
337            extra_data.insert(field.to_string(), value);
338        }
339    }
340
341    Some(Dependency {
342        purl: build_bazel_purl(&name, version.as_deref()),
343        extracted_requirement: version.clone(),
344        scope: Some(if is_dev { "dev" } else { "dependencies" }.to_string()),
345        is_runtime: Some(!is_dev),
346        is_optional: Some(is_dev),
347        is_pinned: Some(version.is_some()),
348        is_direct: Some(true),
349        resolved_package: None,
350        extra_data: (!extra_data.is_empty()).then(|| extra_data.into_iter().collect()),
351    })
352}
353
354fn extract_override(kind: &str, call: &StarlarkCall<'_>) -> JsonValue {
355    let mut override_map = JsonMap::new();
356    override_map.insert("kind".to_string(), JsonValue::String(kind.to_string()));
357    for argument in &call.args.args {
358        if let ast::ArgumentP::Named(name, value) = &argument.node
359            && let Some(value) = expr_to_json(value)
360        {
361            override_map.insert(name.node.clone(), value);
362        }
363    }
364    JsonValue::Object(override_map)
365}
366
367fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
368    match &expr.node {
369        ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
370        _ => None,
371    }
372}
373
374fn expr_as_bool(expr: &ast::AstExpr) -> Option<bool> {
375    match &expr.node {
376        ast::ExprP::Identifier(identifier) => match identifier.node.ident.as_str() {
377            "True" => Some(true),
378            "False" => Some(false),
379            _ => None,
380        },
381        _ => None,
382    }
383}
384
385fn expr_as_i64(expr: &ast::AstExpr) -> Option<i64> {
386    match &expr.node {
387        ast::ExprP::Literal(ast::AstLiteral::Int(value)) => value.node.to_string().parse().ok(),
388        _ => None,
389    }
390}
391
392fn expr_to_json(expr: &ast::AstExpr) -> Option<JsonValue> {
393    match &expr.node {
394        ast::ExprP::Literal(ast::AstLiteral::String(value)) => {
395            Some(JsonValue::String(value.node.clone()))
396        }
397        ast::ExprP::Literal(ast::AstLiteral::Int(value)) => value
398            .node
399            .to_string()
400            .parse::<i64>()
401            .ok()
402            .map(|value| JsonValue::Number(value.into()))
403            .or_else(|| Some(JsonValue::String(value.node.to_string()))),
404        ast::ExprP::Literal(ast::AstLiteral::Float(value)) => {
405            serde_json::Number::from_f64(value.node).map(JsonValue::Number)
406        }
407        ast::ExprP::Identifier(identifier) => match identifier.node.ident.as_str() {
408            "True" => Some(JsonValue::Bool(true)),
409            "False" => Some(JsonValue::Bool(false)),
410            "None" => Some(JsonValue::Null),
411            _ => None,
412        },
413        ast::ExprP::List(elts) | ast::ExprP::Tuple(elts) => Some(JsonValue::Array(
414            elts.iter().filter_map(expr_to_json).collect(),
415        )),
416        ast::ExprP::Dict(items) => {
417            let mut map = JsonMap::new();
418            for (key, value) in items {
419                let Some(key) = expr_as_string(key) else {
420                    continue;
421                };
422                if let Some(value) = expr_to_json(value) {
423                    map.insert(key, value);
424                }
425            }
426            Some(JsonValue::Object(map))
427        }
428        _ => None,
429    }
430}
431
432fn build_bazel_purl(name: &str, version: Option<&str>) -> Option<String> {
433    let mut purl = PackageUrl::new("bazel", name).ok()?;
434    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
435        purl.with_version(version).ok()?;
436    }
437    Some(purl.to_string())
438}
439
440fn default_bazel_module_package_data() -> PackageData {
441    PackageData {
442        package_type: Some(BazelModuleParser::PACKAGE_TYPE),
443        datasource_id: Some(DatasourceId::BazelModule),
444        ..Default::default()
445    }
446}
447
448crate::register_parser!(
449    "Bazel MODULE.bazel file",
450    &["**/MODULE.bazel"],
451    "bazel",
452    "",
453    Some("https://bazel.build/external/module"),
454);