1use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
20use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
21use packageurl::PackageUrl;
22use serde_json::{Map as JsonMap, Value as JsonValue};
23use std::path::Path;
24
25use crate::parser_warn as warn;
26use starlark_syntax::syntax::ast;
27use starlark_syntax::syntax::module::AstModuleFields;
28use starlark_syntax::syntax::{AstModule, Dialect};
29
30use super::PackageParser;
31
32type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
33const SCANCODE_SIMPLE_TOP_LEVEL_KEY: &str = "scancode_simple_top_level";
34const MAX_RECURSION_DEPTH: usize = 50;
35
36struct StarlarkCall<'a> {
37 func: &'a ast::AstExpr,
38 args: &'a StarlarkCallArgs,
39}
40
41pub struct BazelBuildParser;
42
43impl PackageParser for BazelBuildParser {
44 const PACKAGE_TYPE: PackageType = PackageType::Bazel;
45
46 fn is_match(path: &Path) -> bool {
47 path.file_name()
48 .and_then(|name| name.to_str())
49 .is_some_and(|name| name == "BUILD")
50 }
51
52 fn extract_packages(path: &Path) -> Vec<PackageData> {
53 match parse_bazel_build(path) {
54 Ok(packages) if !packages.is_empty() => packages,
55 Ok(_) => vec![fallback_package_data(path)],
56 Err(e) => {
57 warn!("Failed to parse Bazel BUILD file {:?}: {}", path, e);
58 vec![fallback_package_data(path)]
59 }
60 }
61 }
62}
63
64fn parse_bazel_build(path: &Path) -> Result<Vec<PackageData>, String> {
66 let content =
67 crate::parsers::utils::read_file_to_string(path, None).map_err(|e| e.to_string())?;
68 let module = parse_starlark_module("<BUILD>", content)?;
69 let scancode_simple_top_level = is_scancode_simple_top_level_module(&module);
70
71 let mut packages = Vec::new();
72
73 for statement in top_level_statements(&module)
74 .iter()
75 .take(MAX_ITERATION_COUNT)
76 {
77 if let Some(mut package_data) = extract_package_from_statement(statement) {
78 set_scancode_simple_top_level(&mut package_data, scancode_simple_top_level);
79 packages.push(package_data);
80 }
81 }
82
83 Ok(packages)
84}
85
86fn extract_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
88 let call = extract_call(statement)?;
89 let rule_name = extract_call_name(&call)?;
90
91 if !check_rule_name_ending(rule_name) {
92 return None;
93 }
94
95 let name = extract_string_kwarg(&call, "name")?;
96 let licenses = extract_string_list_kwarg(&call, "licenses");
97 let purl = build_bazel_purl(&name, None).map(truncate_field);
98
99 Some(PackageData {
100 package_type: Some(BazelBuildParser::PACKAGE_TYPE),
101 name: Some(truncate_field(name)),
102 extracted_license_statement: licenses.map(|licenses| truncate_field(licenses.join(", "))),
103 datasource_id: Some(DatasourceId::BazelBuild),
104 purl,
105 ..Default::default()
106 })
107}
108
109fn check_rule_name_ending(rule_name: &str) -> bool {
111 rule_name.ends_with("binary") || rule_name.ends_with("library")
112}
113
114fn fallback_package_data(path: &Path) -> PackageData {
116 let name = path
117 .parent()
118 .and_then(|p| p.file_name())
119 .and_then(|n| n.to_str())
120 .map(|s| truncate_field(s.to_string()));
121
122 PackageData {
123 package_type: Some(BazelBuildParser::PACKAGE_TYPE),
124 purl: name
125 .as_deref()
126 .and_then(|name| build_bazel_purl(name, None))
127 .map(truncate_field),
128 name,
129 datasource_id: Some(DatasourceId::BazelBuild),
130 ..Default::default()
131 }
132}
133
134fn set_scancode_simple_top_level(package_data: &mut PackageData, enabled: bool) {
135 let extra_data = package_data.extra_data.get_or_insert_with(Default::default);
136 extra_data.insert(
137 SCANCODE_SIMPLE_TOP_LEVEL_KEY.to_string(),
138 JsonValue::Bool(enabled),
139 );
140}
141
142fn is_scancode_simple_top_level_module(module: &AstModule) -> bool {
143 top_level_statements(module)
144 .iter()
145 .all(is_scancode_simple_top_level_statement)
146}
147
148fn is_scancode_simple_top_level_statement(statement: &ast::AstStmt) -> bool {
149 match &statement.node {
150 ast::StmtP::Expression(expr) => {
151 matches!(&expr.node, ast::ExprP::Call(func, _) if matches!(&func.node, ast::ExprP::Identifier(_)))
152 }
153 _ => true,
154 }
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160 use crate::models::PackageType;
161 use std::path::PathBuf;
162
163 #[test]
164 fn test_is_match() {
165 assert!(BazelBuildParser::is_match(&PathBuf::from("BUILD")));
166 assert!(BazelBuildParser::is_match(&PathBuf::from("path/to/BUILD")));
167 assert!(!BazelBuildParser::is_match(&PathBuf::from("BUILD.bazel")));
168 assert!(!BazelBuildParser::is_match(&PathBuf::from("build")));
169 assert!(!BazelBuildParser::is_match(&PathBuf::from("BUCK")));
170 }
171
172 #[test]
173 fn test_check_rule_name_ending() {
174 assert!(check_rule_name_ending("cc_binary"));
175 assert!(check_rule_name_ending("cc_library"));
176 assert!(check_rule_name_ending("java_binary"));
177 assert!(check_rule_name_ending("py_library"));
178 assert!(!check_rule_name_ending("filegroup"));
179 assert!(!check_rule_name_ending("load"));
180 assert!(!check_rule_name_ending("cc_test"));
181 }
182
183 #[test]
184 fn test_fallback_package_data() {
185 let path = PathBuf::from("/path/to/myproject/BUILD");
186 let pkg = fallback_package_data(&path);
187 assert_eq!(pkg.package_type, Some(PackageType::Bazel));
188 assert_eq!(pkg.name, Some("myproject".to_string()));
189 assert_eq!(pkg.purl.as_deref(), Some("pkg:bazel/myproject"));
190 }
191
192 #[test]
193 fn test_scancode_simple_top_level_allows_direct_calls() {
194 let module = parse_starlark_module(
195 "<BUILD>",
196 "cc_library(name = \"demo\")\npy_binary(name = \"tool\")\n".to_string(),
197 )
198 .expect("parse BUILD");
199
200 assert!(is_scancode_simple_top_level_module(&module));
201 }
202
203 #[test]
204 fn test_scancode_simple_top_level_rejects_attribute_calls() {
205 let module = parse_starlark_module(
206 "<BUILD>",
207 "selects.config_setting_group(name = \"demo\")\ncc_library(name = \"demo\")\n"
208 .to_string(),
209 )
210 .expect("parse BUILD");
211
212 assert!(!is_scancode_simple_top_level_module(&module));
213 }
214
215 #[test]
216 fn test_scancode_simple_top_level_rejects_non_call_expressions() {
217 let module =
218 parse_starlark_module("<BUILD>", "[(cc_binary(name = \"demo\"),)]\n".to_string())
219 .expect("parse BUILD");
220
221 assert!(!is_scancode_simple_top_level_module(&module));
222 }
223}
224
225crate::register_parser!(
226 "Bazel BUILD file",
227 &["**/BUILD"],
228 "bazel",
229 "",
230 Some("https://bazel.build/"),
231);
232
233pub struct BazelModuleParser;
234
235impl PackageParser for BazelModuleParser {
236 const PACKAGE_TYPE: PackageType = PackageType::Bazel;
237
238 fn is_match(path: &Path) -> bool {
239 path.file_name()
240 .and_then(|name| name.to_str())
241 .is_some_and(|name| name == "MODULE.bazel")
242 }
243
244 fn extract_packages(path: &Path) -> Vec<PackageData> {
245 match parse_bazel_module(path) {
246 Ok(package) => vec![package],
247 Err(e) => {
248 warn!("Failed to parse Bazel MODULE.bazel {:?}: {}", path, e);
249 vec![default_bazel_module_package_data()]
250 }
251 }
252 }
253}
254
255fn parse_bazel_module(path: &Path) -> Result<PackageData, String> {
256 let content =
257 crate::parsers::utils::read_file_to_string(path, None).map_err(|e| e.to_string())?;
258 let module = parse_starlark_module("<MODULE.bazel>", content)?;
259
260 let mut package = default_bazel_module_package_data();
261 let mut extra_data = JsonMap::new();
262 let mut dependencies = Vec::new();
263 let mut overrides = Vec::new();
264
265 for statement in top_level_statements(&module)
266 .iter()
267 .take(MAX_ITERATION_COUNT)
268 {
269 let Some(call) = extract_call(statement) else {
270 continue;
271 };
272
273 let Some(function_name) = extract_call_name(&call) else {
274 continue;
275 };
276
277 match function_name {
278 "module" => {
279 package.name = extract_string_kwarg(&call, "name").map(truncate_field);
280 package.version = extract_string_kwarg(&call, "version").map(truncate_field);
281 package.purl = package
282 .name
283 .as_deref()
284 .and_then(|name| build_bazel_purl(name, package.version.as_deref()))
285 .map(truncate_field);
286
287 if let Some(repo_name) =
288 extract_string_kwarg(&call, "repo_name").map(truncate_field)
289 {
290 extra_data.insert("repo_name".to_string(), JsonValue::String(repo_name));
291 }
292 if let Some(compatibility_level) = extract_int_kwarg(&call, "compatibility_level") {
293 extra_data.insert(
294 "compatibility_level".to_string(),
295 JsonValue::Number(compatibility_level.into()),
296 );
297 }
298 if let Some(bazel_compatibility) = extract_kwarg_json(&call, "bazel_compatibility")
299 {
300 extra_data.insert("bazel_compatibility".to_string(), bazel_compatibility);
301 }
302 }
303 "bazel_dep" => {
304 if let Some(dep) = extract_bazel_dependency(&call) {
305 dependencies.push(dep);
306 }
307 }
308 "archive_override"
309 | "git_override"
310 | "local_path_override"
311 | "single_version_override"
312 | "multiple_version_override" => {
313 overrides.push(extract_override(function_name, &call));
314 }
315 _ => {}
316 }
317 }
318
319 if package.name.is_none() {
320 return Ok(default_bazel_module_package_data());
321 }
322
323 if !overrides.is_empty() {
324 extra_data.insert("overrides".to_string(), JsonValue::Array(overrides));
325 }
326
327 package.dependencies = dependencies;
328 package.extra_data = (!extra_data.is_empty()).then(|| extra_data.into_iter().collect());
329 Ok(package)
330}
331
332fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
333 let dialect = Dialect {
334 enable_top_level_stmt: true,
335 ..Dialect::Standard
336 };
337 AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
338}
339
340fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
341 match &module.statement().node {
342 ast::StmtP::Statements(statements) => statements,
343 _ => std::slice::from_ref(module.statement()),
344 }
345}
346
347fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
348 match &statement.node {
349 ast::StmtP::Expression(expr) => extract_call_expr(expr),
350 ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
351 _ => None,
352 }
353}
354
355fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
356 match &expr.node {
357 ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
358 _ => None,
359 }
360}
361
362fn extract_call_name<'a>(call: &'a StarlarkCall<'_>) -> Option<&'a str> {
363 match &call.func.node {
364 ast::ExprP::Identifier(identifier) => Some(identifier.node.ident.as_str()),
365 _ => None,
366 }
367}
368
369fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
370 call.args
371 .args
372 .iter()
373 .find_map(|argument| match &argument.node {
374 ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
375 _ => None,
376 })
377}
378
379fn extract_string_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
380 extract_named_kwarg(call, key).and_then(expr_as_string)
381}
382
383fn extract_string_list_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
384 let expr = extract_named_kwarg(call, key)?;
385 let items = match &expr.node {
386 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
387 _ => return None,
388 };
389 let values: Vec<_> = items
390 .iter()
391 .take(MAX_ITERATION_COUNT)
392 .filter_map(expr_as_string)
393 .collect();
394 (!values.is_empty()).then_some(values)
395}
396
397fn extract_bool_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<bool> {
398 extract_named_kwarg(call, key).and_then(expr_as_bool)
399}
400
401fn extract_int_kwarg(call: &StarlarkCall<'_>, key: &str) -> Option<i64> {
402 extract_named_kwarg(call, key).and_then(expr_as_i64)
403}
404
405fn extract_kwarg_json(call: &StarlarkCall<'_>, key: &str) -> Option<JsonValue> {
406 extract_named_kwarg(call, key).and_then(|expr| expr_to_json(expr, 0))
407}
408
409fn extract_bazel_dependency(call: &StarlarkCall<'_>) -> Option<Dependency> {
410 let name = extract_string_kwarg(call, "name").map(truncate_field)?;
411 let version = extract_string_kwarg(call, "version").map(truncate_field);
412 let is_dev = extract_bool_kwarg(call, "dev_dependency").unwrap_or(false);
413 let mut extra_data = JsonMap::new();
414
415 for field in ["repo_name", "max_compatibility_level", "registry"]
416 .iter()
417 .take(MAX_ITERATION_COUNT)
418 {
419 if let Some(value) = extract_kwarg_json(call, field) {
420 extra_data.insert(field.to_string(), value);
421 }
422 }
423
424 Some(Dependency {
425 purl: build_bazel_purl(&name, version.as_deref()).map(truncate_field),
426 extracted_requirement: version.clone(),
427 scope: Some(if is_dev { "dev" } else { "dependencies" }.to_string()),
428 is_runtime: Some(!is_dev),
429 is_optional: Some(is_dev),
430 is_pinned: Some(version.is_some()),
431 is_direct: Some(true),
432 resolved_package: None,
433 extra_data: (!extra_data.is_empty()).then(|| extra_data.into_iter().collect()),
434 })
435}
436
437fn extract_override(kind: &str, call: &StarlarkCall<'_>) -> JsonValue {
438 let mut override_map = JsonMap::new();
439 override_map.insert("kind".to_string(), JsonValue::String(kind.to_string()));
440 for argument in call.args.args.iter().take(MAX_ITERATION_COUNT) {
441 if let ast::ArgumentP::Named(name, value) = &argument.node
442 && let Some(value) = expr_to_json(value, 0)
443 {
444 override_map.insert(name.node.clone(), value);
445 }
446 }
447 JsonValue::Object(override_map)
448}
449
450fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
451 match &expr.node {
452 ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
453 _ => None,
454 }
455}
456
457fn expr_as_bool(expr: &ast::AstExpr) -> Option<bool> {
458 match &expr.node {
459 ast::ExprP::Identifier(identifier) => match identifier.node.ident.as_str() {
460 "True" => Some(true),
461 "False" => Some(false),
462 _ => None,
463 },
464 _ => None,
465 }
466}
467
468fn expr_as_i64(expr: &ast::AstExpr) -> Option<i64> {
469 match &expr.node {
470 ast::ExprP::Literal(ast::AstLiteral::Int(value)) => value.node.to_string().parse().ok(),
471 _ => None,
472 }
473}
474
475fn expr_to_json(expr: &ast::AstExpr, depth: usize) -> Option<JsonValue> {
476 if depth > MAX_RECURSION_DEPTH {
477 return None;
478 }
479 match &expr.node {
480 ast::ExprP::Literal(ast::AstLiteral::String(value)) => {
481 Some(JsonValue::String(value.node.clone()))
482 }
483 ast::ExprP::Literal(ast::AstLiteral::Int(value)) => value
484 .node
485 .to_string()
486 .parse::<i64>()
487 .ok()
488 .map(|value| JsonValue::Number(value.into()))
489 .or_else(|| Some(JsonValue::String(value.node.to_string()))),
490 ast::ExprP::Literal(ast::AstLiteral::Float(value)) => {
491 serde_json::Number::from_f64(value.node).map(JsonValue::Number)
492 }
493 ast::ExprP::Identifier(identifier) => match identifier.node.ident.as_str() {
494 "True" => Some(JsonValue::Bool(true)),
495 "False" => Some(JsonValue::Bool(false)),
496 "None" => Some(JsonValue::Null),
497 _ => None,
498 },
499 ast::ExprP::List(elts) | ast::ExprP::Tuple(elts) => Some(JsonValue::Array(
500 elts.iter()
501 .take(MAX_ITERATION_COUNT)
502 .filter_map(|e| expr_to_json(e, depth + 1))
503 .collect(),
504 )),
505 ast::ExprP::Dict(items) => {
506 let mut map = JsonMap::new();
507 for (key, value) in items.iter().take(MAX_ITERATION_COUNT) {
508 let Some(key) = expr_as_string(key) else {
509 continue;
510 };
511 if let Some(value) = expr_to_json(value, depth + 1) {
512 map.insert(key, value);
513 }
514 }
515 Some(JsonValue::Object(map))
516 }
517 _ => None,
518 }
519}
520
521fn build_bazel_purl(name: &str, version: Option<&str>) -> Option<String> {
522 let mut purl = PackageUrl::new("bazel", name).ok()?;
523 if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
524 purl.with_version(version).ok()?;
525 }
526 Some(purl.to_string())
527}
528
529fn default_bazel_module_package_data() -> PackageData {
530 PackageData {
531 package_type: Some(BazelModuleParser::PACKAGE_TYPE),
532 datasource_id: Some(DatasourceId::BazelModule),
533 ..Default::default()
534 }
535}
536
537crate::register_parser!(
538 "Bazel MODULE.bazel file",
539 &["**/MODULE.bazel"],
540 "bazel",
541 "",
542 Some("https://bazel.build/external/module"),
543);