Skip to main content

provenant/parsers/
bazel.rs

1//! Bazel BUILD file parser
2//!
3//! Extracts package metadata from Bazel BUILD files using Starlark (Python-like) syntax.
4//!
5//! ## Features
6//! - Parses Starlark syntax using rustpython_parser
7//! - Extracts build rules ending with "binary" or "library" (e.g., cc_binary, cc_library)
8//! - Extracts name and licenses fields from rule arguments
9//! - Falls back to parent directory name if no rules found
10//! - **Supports multiple packages**: `extract_packages()` returns all rules (100% parity)
11//!
12//! ## Usage
13//! - `extract_first_package()` - Returns first package (convenience method)
14//! - `extract_packages()` - Returns ALL packages (recommended for BUILD files)
15//!
16//! ## Reference
17//! Python implementation: `reference/scancode-toolkit/src/packagedcode/build.py` (BazelBuildHandler)
18
19use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
20use packageurl::PackageUrl;
21use serde_json::{Map as JsonMap, Value as JsonValue};
22use std::path::Path;
23
24use log::warn;
25use rustpython_parser::{Parse, ast};
26
27use super::PackageParser;
28
29pub struct BazelBuildParser;
30
31impl PackageParser for BazelBuildParser {
32    const PACKAGE_TYPE: PackageType = PackageType::Bazel;
33
34    fn is_match(path: &Path) -> bool {
35        path.file_name()
36            .and_then(|name| name.to_str())
37            .is_some_and(|name| name == "BUILD")
38    }
39
40    fn extract_packages(path: &Path) -> Vec<PackageData> {
41        match parse_bazel_build(path) {
42            Ok(packages) if !packages.is_empty() => packages,
43            Ok(_) => vec![fallback_package_data(path)],
44            Err(e) => {
45                warn!("Failed to parse Bazel BUILD file {:?}: {}", path, e);
46                vec![fallback_package_data(path)]
47            }
48        }
49    }
50}
51
52/// Parse a Bazel BUILD file and extract all package data
53fn parse_bazel_build(path: &Path) -> Result<Vec<PackageData>, String> {
54    let content =
55        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
56
57    let module = ast::Suite::parse(&content, "<BUILD>")
58        .map_err(|e| format!("Failed to parse Starlark: {}", e))?;
59
60    let mut packages = Vec::new();
61
62    for statement in &module {
63        if let Some(package_data) = extract_from_statement(statement) {
64            packages.push(package_data);
65        }
66    }
67
68    Ok(packages)
69}
70
71/// Extract package data from a single AST statement
72fn extract_from_statement(statement: &ast::Stmt) -> Option<PackageData> {
73    match statement {
74        // Direct function call: cc_binary(name="foo", ...)
75        ast::Stmt::Expr(ast::StmtExpr { value, .. }) => {
76            if let ast::Expr::Call(call) = value.as_ref() {
77                return extract_from_call(call);
78            }
79        }
80        // Assignment to function call: x = cc_binary(name="foo", ...)
81        ast::Stmt::Assign(ast::StmtAssign { value, .. }) => {
82            if let ast::Expr::Call(call) = value.as_ref() {
83                return extract_from_call(call);
84            }
85        }
86        _ => {}
87    }
88    None
89}
90
91/// Extract package data from a function call
92fn extract_from_call(call: &ast::ExprCall) -> Option<PackageData> {
93    // Get the function name
94    let rule_name = match call.func.as_ref() {
95        ast::Expr::Name(ast::ExprName { id, .. }) => id.as_str(),
96        _ => return None,
97    };
98
99    // Check if rule name ends with "binary" or "library"
100    if !check_rule_name_ending(rule_name) {
101        return None;
102    }
103
104    // Extract arguments
105    let mut name: Option<String> = None;
106    let mut licenses: Option<Vec<String>> = None;
107
108    for keyword in &call.keywords {
109        let arg_name = keyword.arg.as_ref()?.as_str();
110
111        match arg_name {
112            "name" => {
113                if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = &keyword.value
114                    && let ast::Constant::Str(s) = value
115                {
116                    name = Some(s.clone());
117                }
118            }
119            "licenses" => {
120                if let ast::Expr::List(ast::ExprList { elts, .. }) = &keyword.value {
121                    let mut license_list = Vec::new();
122                    for elt in elts {
123                        if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = elt
124                            && let ast::Constant::Str(s) = value
125                        {
126                            license_list.push(s.clone());
127                        }
128                    }
129                    if !license_list.is_empty() {
130                        licenses = Some(license_list);
131                    }
132                }
133            }
134            _ => {}
135        }
136    }
137
138    // Must have a name to create a package
139    let package_name = name?;
140
141    Some(PackageData {
142        package_type: Some(BazelBuildParser::PACKAGE_TYPE),
143        name: Some(package_name),
144        extracted_license_statement: licenses.map(|l| l.join(", ")),
145        datasource_id: Some(DatasourceId::BazelBuild),
146        ..Default::default()
147    })
148}
149
150/// Check if rule name ends with "binary" or "library"
151fn check_rule_name_ending(rule_name: &str) -> bool {
152    rule_name.ends_with("binary") || rule_name.ends_with("library")
153}
154
155/// Create fallback package data using parent directory name
156fn fallback_package_data(path: &Path) -> PackageData {
157    let name = path
158        .parent()
159        .and_then(|p| p.file_name())
160        .and_then(|n| n.to_str())
161        .map(|s| s.to_string());
162
163    PackageData {
164        package_type: Some(BazelBuildParser::PACKAGE_TYPE),
165        name,
166        datasource_id: Some(DatasourceId::BazelBuild),
167        ..Default::default()
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174    use crate::models::PackageType;
175    use std::path::PathBuf;
176
177    #[test]
178    fn test_is_match() {
179        assert!(BazelBuildParser::is_match(&PathBuf::from("BUILD")));
180        assert!(BazelBuildParser::is_match(&PathBuf::from("path/to/BUILD")));
181        assert!(!BazelBuildParser::is_match(&PathBuf::from("BUILD.bazel")));
182        assert!(!BazelBuildParser::is_match(&PathBuf::from("build")));
183        assert!(!BazelBuildParser::is_match(&PathBuf::from("BUCK")));
184    }
185
186    #[test]
187    fn test_check_rule_name_ending() {
188        assert!(check_rule_name_ending("cc_binary"));
189        assert!(check_rule_name_ending("cc_library"));
190        assert!(check_rule_name_ending("java_binary"));
191        assert!(check_rule_name_ending("py_library"));
192        assert!(!check_rule_name_ending("filegroup"));
193        assert!(!check_rule_name_ending("load"));
194        assert!(!check_rule_name_ending("cc_test"));
195    }
196
197    #[test]
198    fn test_fallback_package_data() {
199        let path = PathBuf::from("/path/to/myproject/BUILD");
200        let pkg = fallback_package_data(&path);
201        assert_eq!(pkg.package_type, Some(PackageType::Bazel));
202        assert_eq!(pkg.name, Some("myproject".to_string()));
203    }
204}
205
206crate::register_parser!(
207    "Bazel BUILD file",
208    &["**/BUILD"],
209    "bazel",
210    "",
211    Some("https://bazel.build/"),
212);
213
214pub struct BazelModuleParser;
215
216impl PackageParser for BazelModuleParser {
217    const PACKAGE_TYPE: PackageType = PackageType::Bazel;
218
219    fn is_match(path: &Path) -> bool {
220        path.file_name()
221            .and_then(|name| name.to_str())
222            .is_some_and(|name| name == "MODULE.bazel")
223    }
224
225    fn extract_packages(path: &Path) -> Vec<PackageData> {
226        match parse_bazel_module(path) {
227            Ok(package) => vec![package],
228            Err(e) => {
229                warn!("Failed to parse Bazel MODULE.bazel {:?}: {}", path, e);
230                vec![default_bazel_module_package_data()]
231            }
232        }
233    }
234}
235
236fn parse_bazel_module(path: &Path) -> Result<PackageData, String> {
237    let content =
238        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
239    let module = ast::Suite::parse(&content, "<MODULE.bazel>")
240        .map_err(|e| format!("Failed to parse Starlark: {}", e))?;
241
242    let mut package = default_bazel_module_package_data();
243    let mut extra_data = JsonMap::new();
244    let mut dependencies = Vec::new();
245    let mut overrides = Vec::new();
246
247    for statement in &module {
248        let Some(call) = extract_call(statement) else {
249            continue;
250        };
251
252        let Some(function_name) = extract_call_name(call) else {
253            continue;
254        };
255
256        match function_name {
257            "module" => {
258                package.name = extract_string_kwarg(call, "name");
259                package.version = extract_string_kwarg(call, "version");
260                package.purl = package
261                    .name
262                    .as_deref()
263                    .and_then(|name| build_bazel_purl(name, package.version.as_deref()));
264
265                if let Some(repo_name) = extract_string_kwarg(call, "repo_name") {
266                    extra_data.insert("repo_name".to_string(), JsonValue::String(repo_name));
267                }
268                if let Some(compatibility_level) = extract_int_kwarg(call, "compatibility_level") {
269                    extra_data.insert(
270                        "compatibility_level".to_string(),
271                        JsonValue::Number(compatibility_level.into()),
272                    );
273                }
274                if let Some(bazel_compatibility) = extract_kwarg_json(call, "bazel_compatibility") {
275                    extra_data.insert("bazel_compatibility".to_string(), bazel_compatibility);
276                }
277            }
278            "bazel_dep" => {
279                if let Some(dep) = extract_bazel_dependency(call) {
280                    dependencies.push(dep);
281                }
282            }
283            "archive_override"
284            | "git_override"
285            | "local_path_override"
286            | "single_version_override"
287            | "multiple_version_override" => {
288                overrides.push(extract_override(function_name, call));
289            }
290            _ => {}
291        }
292    }
293
294    if package.name.is_none() {
295        return Ok(default_bazel_module_package_data());
296    }
297
298    if !overrides.is_empty() {
299        extra_data.insert("overrides".to_string(), JsonValue::Array(overrides));
300    }
301
302    package.dependencies = dependencies;
303    package.extra_data = (!extra_data.is_empty()).then(|| extra_data.into_iter().collect());
304    Ok(package)
305}
306
307fn extract_call(statement: &ast::Stmt) -> Option<&ast::ExprCall> {
308    match statement {
309        ast::Stmt::Expr(ast::StmtExpr { value, .. }) => {
310            if let ast::Expr::Call(call) = value.as_ref() {
311                Some(call)
312            } else {
313                None
314            }
315        }
316        ast::Stmt::Assign(ast::StmtAssign { value, .. }) => {
317            if let ast::Expr::Call(call) = value.as_ref() {
318                Some(call)
319            } else {
320                None
321            }
322        }
323        _ => None,
324    }
325}
326
327fn extract_call_name(call: &ast::ExprCall) -> Option<&str> {
328    match call.func.as_ref() {
329        ast::Expr::Name(ast::ExprName { id, .. }) => Some(id.as_str()),
330        _ => None,
331    }
332}
333
334fn extract_string_kwarg(call: &ast::ExprCall, key: &str) -> Option<String> {
335    call.keywords.iter().find_map(|keyword| {
336        let arg_name = keyword.arg.as_ref()?.as_str();
337        if arg_name != key {
338            return None;
339        }
340        match &keyword.value {
341            ast::Expr::Constant(ast::ExprConstant {
342                value: ast::Constant::Str(value),
343                ..
344            }) => Some(value.clone()),
345            _ => None,
346        }
347    })
348}
349
350fn extract_bool_kwarg(call: &ast::ExprCall, key: &str) -> Option<bool> {
351    call.keywords.iter().find_map(|keyword| {
352        let arg_name = keyword.arg.as_ref()?.as_str();
353        if arg_name != key {
354            return None;
355        }
356        match &keyword.value {
357            ast::Expr::Constant(ast::ExprConstant {
358                value: ast::Constant::Bool(value),
359                ..
360            }) => Some(*value),
361            _ => None,
362        }
363    })
364}
365
366fn extract_int_kwarg(call: &ast::ExprCall, key: &str) -> Option<i64> {
367    call.keywords.iter().find_map(|keyword| {
368        let arg_name = keyword.arg.as_ref()?.as_str();
369        if arg_name != key {
370            return None;
371        }
372        match &keyword.value {
373            ast::Expr::Constant(ast::ExprConstant {
374                value: ast::Constant::Int(value),
375                ..
376            }) => value.to_string().parse::<i64>().ok(),
377            _ => None,
378        }
379    })
380}
381
382fn extract_kwarg_json(call: &ast::ExprCall, key: &str) -> Option<JsonValue> {
383    call.keywords.iter().find_map(|keyword| {
384        let arg_name = keyword.arg.as_ref()?.as_str();
385        if arg_name != key {
386            return None;
387        }
388        expr_to_json(&keyword.value)
389    })
390}
391
392fn extract_bazel_dependency(call: &ast::ExprCall) -> Option<Dependency> {
393    let name = extract_string_kwarg(call, "name")?;
394    let version = extract_string_kwarg(call, "version");
395    let is_dev = extract_bool_kwarg(call, "dev_dependency").unwrap_or(false);
396    let mut extra_data = JsonMap::new();
397
398    for field in ["repo_name", "max_compatibility_level", "registry"] {
399        if let Some(value) = extract_kwarg_json(call, field) {
400            extra_data.insert(field.to_string(), value);
401        }
402    }
403
404    Some(Dependency {
405        purl: build_bazel_purl(&name, version.as_deref()),
406        extracted_requirement: version.clone(),
407        scope: Some(if is_dev { "dev" } else { "dependencies" }.to_string()),
408        is_runtime: Some(!is_dev),
409        is_optional: Some(is_dev),
410        is_pinned: Some(version.is_some()),
411        is_direct: Some(true),
412        resolved_package: None,
413        extra_data: (!extra_data.is_empty()).then(|| extra_data.into_iter().collect()),
414    })
415}
416
417fn extract_override(kind: &str, call: &ast::ExprCall) -> JsonValue {
418    let mut override_map = JsonMap::new();
419    override_map.insert("kind".to_string(), JsonValue::String(kind.to_string()));
420    for keyword in &call.keywords {
421        if let Some(arg_name) = keyword.arg.as_ref().map(|arg| arg.to_string())
422            && let Some(value) = expr_to_json(&keyword.value)
423        {
424            override_map.insert(arg_name, value);
425        }
426    }
427    JsonValue::Object(override_map)
428}
429
430fn expr_to_json(expr: &ast::Expr) -> Option<JsonValue> {
431    match expr {
432        ast::Expr::Constant(ast::ExprConstant { value, .. }) => match value {
433            ast::Constant::Str(value) => Some(JsonValue::String(value.clone())),
434            ast::Constant::Bool(value) => Some(JsonValue::Bool(*value)),
435            ast::Constant::Int(value) => value
436                .to_string()
437                .parse::<i64>()
438                .ok()
439                .map(|value| JsonValue::Number(value.into()))
440                .or_else(|| Some(JsonValue::String(value.to_string()))),
441            ast::Constant::None => Some(JsonValue::Null),
442            _ => None,
443        },
444        ast::Expr::List(ast::ExprList { elts, .. })
445        | ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => Some(JsonValue::Array(
446            elts.iter().filter_map(expr_to_json).collect(),
447        )),
448        ast::Expr::Dict(ast::ExprDict { keys, values, .. }) => {
449            let mut map = JsonMap::new();
450            for (key, value) in keys.iter().zip(values.iter()) {
451                let Some(ast::Expr::Constant(ast::ExprConstant {
452                    value: ast::Constant::Str(key),
453                    ..
454                })) = key
455                else {
456                    continue;
457                };
458                if let Some(value) = expr_to_json(value) {
459                    map.insert(key.clone(), value);
460                }
461            }
462            Some(JsonValue::Object(map))
463        }
464        _ => None,
465    }
466}
467
468fn build_bazel_purl(name: &str, version: Option<&str>) -> Option<String> {
469    let mut purl = PackageUrl::new("bazel", name).ok()?;
470    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
471        purl.with_version(version).ok()?;
472    }
473    Some(purl.to_string())
474}
475
476fn default_bazel_module_package_data() -> PackageData {
477    PackageData {
478        package_type: Some(BazelModuleParser::PACKAGE_TYPE),
479        datasource_id: Some(DatasourceId::BazelModule),
480        ..Default::default()
481    }
482}
483
484crate::register_parser!(
485    "Bazel MODULE.bazel file",
486    &["**/MODULE.bazel"],
487    "bazel",
488    "",
489    Some("https://bazel.build/external/module"),
490);