1use std::collections::HashMap;
19use std::path::Path;
20
21use crate::parser_warn as warn;
22use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
23use packageurl::PackageUrl;
24use starlark_syntax::syntax::ast;
25use starlark_syntax::syntax::module::AstModuleFields;
26use starlark_syntax::syntax::{AstModule, Dialect};
27
28use crate::models::{DatasourceId, PackageData, PackageType, Party, Sha1Digest};
29
30use super::PackageParser;
31
32type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
33
34struct StarlarkCall<'a> {
35 func: &'a ast::AstExpr,
36 args: &'a StarlarkCallArgs,
37}
38
39pub struct BuckBuildParser;
41
42impl PackageParser for BuckBuildParser {
43 const PACKAGE_TYPE: PackageType = PackageType::Buck;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name()
47 .and_then(|name| name.to_str())
48 .is_some_and(|name| name == "BUCK")
49 }
50
51 fn extract_packages(path: &Path) -> Vec<PackageData> {
52 match parse_buck_build(path) {
53 Ok(packages) if !packages.is_empty() => packages,
54 Ok(_) => vec![fallback_package_data(path)],
55 Err(e) => {
56 warn!("Failed to parse Buck BUCK file {:?}: {}", path, e);
57 vec![fallback_package_data(path)]
58 }
59 }
60 }
61}
62
63pub struct BuckMetadataBzlParser;
65
66impl PackageParser for BuckMetadataBzlParser {
67 const PACKAGE_TYPE: PackageType = PackageType::Buck;
68
69 fn is_match(path: &Path) -> bool {
70 path.file_name()
71 .and_then(|name| name.to_str())
72 .is_some_and(|name| name == "METADATA.bzl")
73 }
74
75 fn extract_packages(path: &Path) -> Vec<PackageData> {
76 vec![match parse_metadata_bzl(path) {
77 Ok(pkg) => pkg,
78 Err(e) => {
79 warn!("Failed to parse Buck METADATA.bzl {:?}: {}", path, e);
80 PackageData {
81 package_type: Some(Self::PACKAGE_TYPE),
82 datasource_id: Some(DatasourceId::BuckMetadata),
83 ..Default::default()
84 }
85 }
86 }]
87 }
88}
89
90fn parse_buck_build(path: &Path) -> Result<Vec<PackageData>, String> {
92 let content = read_file_to_string(path, None).map_err(|e| e.to_string())?;
93 let module = parse_starlark_module("<BUCK>", content)?;
94
95 let mut packages = Vec::new();
96
97 for statement in top_level_statements(&module)
98 .iter()
99 .take(MAX_ITERATION_COUNT)
100 {
101 if let Some(package_data) = extract_build_package_from_statement(statement) {
102 packages.push(package_data);
103 }
104 }
105
106 Ok(packages)
107}
108
109fn parse_metadata_bzl(path: &Path) -> Result<PackageData, String> {
111 let content = read_file_to_string(path, None).map_err(|e| e.to_string())?;
112 let module = parse_starlark_module("<METADATA.bzl>", content)?;
113
114 for statement in top_level_statements(&module)
115 .iter()
116 .take(MAX_ITERATION_COUNT)
117 {
118 if let Some(dict) = extract_metadata_assignment_dict(statement) {
119 return Ok(extract_metadata_dict(dict));
120 }
121 }
122
123 Ok(PackageData {
125 package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
126 datasource_id: Some(DatasourceId::BuckMetadata),
127 ..Default::default()
128 })
129}
130
131fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
132 let content = preprocess_starlark_content(&content);
133 let dialect = Dialect {
134 enable_top_level_stmt: true,
135 ..Dialect::Standard
136 };
137 AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
138}
139
140fn preprocess_starlark_content(content: &str) -> String {
141 let mut normalized = String::with_capacity(content.len());
142 let mut pending_oss_disable_indent: Option<String> = None;
143
144 for raw_line in content.lines() {
145 let trimmed_start = raw_line.trim_start();
146 let indent_len = raw_line.len() - trimmed_start.len();
147 let indent = &raw_line[..indent_len];
148
149 if trimmed_start.starts_with('#') && trimmed_start.contains("@oss-disable") {
150 pending_oss_disable_indent = Some(indent.to_string());
151 continue;
152 }
153
154 if let Some(marker_index) = raw_line.find("# @oss-enable") {
155 let code = raw_line[..marker_index].trim_end();
156 if !code.is_empty() {
157 if let Some(disabled_indent) = pending_oss_disable_indent.take() {
158 normalized.push_str(&disabled_indent);
159 normalized.push_str(code.trim_start());
160 } else {
161 normalized.push_str(code);
162 }
163 normalized.push('\n');
164 }
165 continue;
166 }
167
168 pending_oss_disable_indent = None;
169 normalized.push_str(raw_line);
170 normalized.push('\n');
171 }
172
173 if !content.ends_with('\n') && normalized.ends_with('\n') {
174 normalized.pop();
175 }
176
177 normalized
178}
179
180fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
181 match &module.statement().node {
182 ast::StmtP::Statements(statements) => statements,
183 _ => std::slice::from_ref(module.statement()),
184 }
185}
186
187fn extract_metadata_assignment_dict(
188 statement: &ast::AstStmt,
189) -> Option<&[(ast::AstExpr, ast::AstExpr)]> {
190 let ast::StmtP::Assign(assign) = &statement.node else {
191 return None;
192 };
193 let ast::AssignTargetP::Identifier(target) = &assign.lhs.node else {
194 return None;
195 };
196 if target.node.ident != "METADATA" {
197 return None;
198 }
199 match &assign.rhs.node {
200 ast::ExprP::Dict(items) => Some(items.as_slice()),
201 _ => None,
202 }
203}
204
205fn extract_metadata_dict(dict: &[(ast::AstExpr, ast::AstExpr)]) -> PackageData {
207 let mut fields: HashMap<String, MetadataValue> = HashMap::new();
208
209 for (key, value) in dict.iter().take(MAX_ITERATION_COUNT) {
210 let Some(key_name) = expr_as_string(key) else {
211 continue;
212 };
213 let Some(metadata_value) = metadata_value_from_expr(value) else {
214 continue;
215 };
216
217 fields.insert(key_name, metadata_value);
218 }
219
220 build_package_from_metadata(fields)
221}
222
223fn get_metadata_string(fields: &HashMap<String, MetadataValue>, keys: &[&str]) -> Option<String> {
224 keys.iter().find_map(|key| match fields.get(*key) {
225 Some(MetadataValue::String(value)) => Some(value.clone()),
226 _ => None,
227 })
228}
229
230fn get_metadata_list(
231 fields: &HashMap<String, MetadataValue>,
232 keys: &[&str],
233) -> Option<Vec<String>> {
234 keys.iter().find_map(|key| match fields.get(*key) {
235 Some(MetadataValue::List(values)) => Some(values.clone()),
236 _ => None,
237 })
238}
239
240enum MetadataValue {
242 String(String),
243 List(Vec<String>),
244}
245
246fn split_buck_license_values(values: &[String]) -> (Vec<String>, Vec<String>) {
247 let mut statements = Vec::new();
248 let mut references = Vec::new();
249
250 for value in values {
251 if is_probable_local_license_reference(value) {
252 references.push(value.clone());
253 } else {
254 statements.push(value.clone());
255 }
256 }
257
258 (statements, references)
259}
260
261fn is_probable_local_license_reference(value: &str) -> bool {
262 let trimmed = value.trim();
263 if trimmed.is_empty() {
264 return false;
265 }
266
267 let lower = trimmed.to_ascii_lowercase();
268 lower.contains('/')
269 || lower.contains('\\')
270 || lower.starts_with("license")
271 || lower.starts_with("licence")
272 || lower.starts_with("copying")
273 || lower.starts_with("notice")
274 || lower.starts_with("copyright")
275 || lower.ends_with(".txt")
276 || lower.ends_with(".md")
277 || lower.ends_with(".rst")
278 || lower.ends_with(".html")
279}
280
281fn insert_license_reference_extra_data(
282 extra_data: &mut HashMap<String, serde_json::Value>,
283 references: &[String],
284) {
285 match references {
286 [] => {}
287 [reference] => {
288 extra_data.insert(
289 "license_file".to_string(),
290 serde_json::Value::String(reference.clone()),
291 );
292 }
293 _ => {
294 extra_data.insert(
295 "license_files".to_string(),
296 serde_json::Value::Array(
297 references
298 .iter()
299 .cloned()
300 .map(serde_json::Value::String)
301 .collect(),
302 ),
303 );
304 }
305 }
306}
307
308fn build_package_from_metadata(fields: HashMap<String, MetadataValue>) -> PackageData {
310 let mut pkg = PackageData {
311 package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
312 datasource_id: Some(DatasourceId::BuckMetadata),
313 ..Default::default()
314 };
315 let mut license_references = Vec::new();
316
317 if let Some(name) = get_metadata_string(&fields, &["name"]) {
319 pkg.name = Some(truncate_field(name));
320 }
321
322 if let Some(version) = get_metadata_string(&fields, &["version"]) {
324 pkg.version = Some(truncate_field(version));
325 }
326
327 if let Some(namespace) = get_metadata_string(&fields, &["namespace"]) {
329 pkg.namespace = Some(truncate_field(namespace));
330 }
331
332 if let Some(ecosystem) = get_metadata_string(&fields, &["ecosystem", "type", "package_type"])
335 && let Ok(package_type) = ecosystem.parse::<PackageType>()
336 {
337 pkg.package_type = Some(package_type);
338 }
339
340 if let Some(licenses) = get_metadata_list(&fields, &["licenses"]) {
342 let (license_statements, references) = split_buck_license_values(&licenses);
343 license_references = references;
344 let extracted_license_statement = if !license_statements.is_empty() {
345 Some(license_statements.join(", "))
346 } else if !license_references.is_empty() {
347 Some(license_references.join(", "))
348 } else {
349 None
350 };
351 pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
352 } else if let Some(license_expression) = get_metadata_string(&fields, &["license_expression"]) {
353 pkg.extracted_license_statement = Some(truncate_field(license_expression));
354 }
355
356 if let Some(copyright) = get_metadata_list(&fields, &["copyrights"]) {
357 if !copyright.is_empty() {
358 pkg.copyright = Some(truncate_field(copyright.join("\n")));
359 }
360 } else if let Some(copyright) = get_metadata_string(&fields, &["copyright"]) {
361 pkg.copyright = Some(truncate_field(copyright));
362 }
363
364 if let Some(homepage_url) = get_metadata_string(
366 &fields,
367 &["upstream_address", "upstream_url", "homepage_url"],
368 ) {
369 pkg.homepage_url = Some(truncate_field(homepage_url));
370 }
371
372 if let Some(download_url) = get_metadata_string(&fields, &["download_url"]) {
374 pkg.download_url = Some(truncate_field(download_url));
375 }
376
377 if let Some(vcs_url) = get_metadata_string(&fields, &["vcs_url"]) {
379 pkg.vcs_url = Some(truncate_field(vcs_url));
380 }
381
382 if let Some(sha1) = get_metadata_string(&fields, &["download_archive_sha1"]) {
384 pkg.sha1 = Sha1Digest::from_hex(&sha1).ok();
385 }
386
387 if let Some(maintainers) = get_metadata_list(&fields, &["maintainers"]) {
389 pkg.parties.extend(maintainers.iter().map(|name| Party {
390 r#type: Some("organization".to_string()),
391 name: Some(name.clone()),
392 role: Some("maintainer".to_string()),
393 email: None,
394 url: None,
395 organization: None,
396 organization_url: None,
397 timezone: None,
398 }));
399 }
400
401 if let Some(vendor) = get_metadata_string(&fields, &["vendor", "publisher"]) {
402 pkg.parties.push(Party {
403 r#type: None,
404 name: Some(vendor),
405 role: Some("publisher".to_string()),
406 email: None,
407 url: None,
408 organization: None,
409 organization_url: None,
410 timezone: None,
411 });
412 }
413
414 let mut extra_data = HashMap::new();
416 if let Some(vcs_commit_hash) = get_metadata_string(&fields, &["vcs_commit_hash"]) {
417 extra_data.insert(
418 "vcs_commit_hash".to_string(),
419 serde_json::Value::String(vcs_commit_hash),
420 );
421 }
422 if let Some(upstream_hash) =
423 get_metadata_string(&fields, &["upstream_hash", "upstream_commit_hash"])
424 {
425 extra_data.insert(
426 "upstream_hash".to_string(),
427 serde_json::Value::String(upstream_hash),
428 );
429 }
430 if let Some(upstream_branch) = get_metadata_string(&fields, &["upstream_branch"]) {
431 extra_data.insert(
432 "upstream_branch".to_string(),
433 serde_json::Value::String(upstream_branch),
434 );
435 }
436 insert_license_reference_extra_data(&mut extra_data, &license_references);
437 if !extra_data.is_empty() {
438 pkg.extra_data = Some(extra_data);
439 }
440
441 if let Some(purl_str) = get_metadata_string(&fields, &["package_url"])
443 && let Ok(purl) = purl_str.parse::<PackageUrl>()
444 {
445 pkg.purl = Some(truncate_field(purl.to_string()));
446
447 if let Ok(package_type) = purl.ty().parse::<PackageType>() {
448 pkg.package_type = Some(package_type);
449 }
450 if let Some(ns) = purl.namespace() {
451 pkg.namespace = Some(truncate_field(ns.to_string()));
452 }
453 pkg.name = Some(truncate_field(purl.name().to_string()));
454 if let Some(ver) = purl.version() {
455 pkg.version = Some(truncate_field(ver.to_string()));
456 }
457 if !purl.qualifiers().is_empty() {
459 let quals: HashMap<String, String> = purl
460 .qualifiers()
461 .iter()
462 .map(|(k, v)| (k.to_string(), v.to_string()))
463 .collect();
464 pkg.qualifiers = Some(quals);
465 }
466 if let Some(sp) = purl.subpath() {
468 pkg.subpath = Some(sp.to_string());
469 }
470 }
471
472 pkg
473}
474
475fn metadata_value_from_expr(expr: &ast::AstExpr) -> Option<MetadataValue> {
476 if let Some(string) = expr_as_string(expr) {
477 return Some(MetadataValue::String(string));
478 }
479
480 let items = match &expr.node {
481 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
482 _ => return None,
483 };
484 let values: Vec<_> = items
485 .iter()
486 .take(MAX_ITERATION_COUNT)
487 .filter_map(expr_as_string)
488 .collect();
489 (!values.is_empty()).then_some(MetadataValue::List(values))
490}
491
492fn extract_build_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
494 let call = extract_call(statement)?;
495 let rule_name = match &call.func.node {
496 ast::ExprP::Identifier(identifier) => identifier.node.ident.as_str(),
497 _ => return None,
498 };
499
500 if !check_rule_name_ending(rule_name) {
501 return None;
502 }
503
504 let name = extract_named_kwarg_string(&call, "name");
505 let licenses = extract_named_kwarg_string_list(&call, "licenses");
506
507 let package_name = name?;
508 let (license_statements, license_references) = licenses
509 .as_deref()
510 .map(split_buck_license_values)
511 .unwrap_or_default();
512 let extracted_license_statement = if !license_statements.is_empty() {
513 Some(truncate_field(license_statements.join(", ")))
514 } else if !license_references.is_empty() {
515 Some(truncate_field(license_references.join(", ")))
516 } else {
517 None
518 };
519 let mut extra_data = HashMap::new();
520 insert_license_reference_extra_data(&mut extra_data, &license_references);
521
522 Some(PackageData {
523 package_type: Some(BuckBuildParser::PACKAGE_TYPE),
524 name: Some(truncate_field(package_name)),
525 extracted_license_statement,
526 extra_data: (!extra_data.is_empty()).then_some(extra_data),
527 datasource_id: Some(DatasourceId::BuckFile),
528 ..Default::default()
529 })
530}
531
532fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
533 match &statement.node {
534 ast::StmtP::Expression(expr) => extract_call_expr(expr),
535 ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
536 _ => None,
537 }
538}
539
540fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
541 match &expr.node {
542 ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
543 _ => None,
544 }
545}
546
547fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
548 call.args
549 .args
550 .iter()
551 .find_map(|argument| match &argument.node {
552 ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
553 _ => None,
554 })
555}
556
557fn extract_named_kwarg_string(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
558 extract_named_kwarg(call, key).and_then(expr_as_string)
559}
560
561fn extract_named_kwarg_string_list(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
562 let expr = extract_named_kwarg(call, key)?;
563 let items = match &expr.node {
564 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
565 _ => return None,
566 };
567 let values: Vec<_> = items
568 .iter()
569 .take(MAX_ITERATION_COUNT)
570 .filter_map(expr_as_string)
571 .collect();
572 (!values.is_empty()).then_some(values)
573}
574
575fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
576 match &expr.node {
577 ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
578 _ => None,
579 }
580}
581
582fn check_rule_name_ending(rule_name: &str) -> bool {
584 rule_name.ends_with("binary") || rule_name.ends_with("library")
585}
586
587fn fallback_package_data(path: &Path) -> PackageData {
589 let name = path
590 .parent()
591 .and_then(|p| p.file_name())
592 .and_then(|n| n.to_str())
593 .map(|s| s.to_string());
594
595 PackageData {
596 package_type: Some(BuckBuildParser::PACKAGE_TYPE),
597 name,
598 datasource_id: Some(DatasourceId::BuckFile),
599 ..Default::default()
600 }
601}
602
603#[cfg(test)]
604mod tests {
605 use super::*;
606 use std::path::PathBuf;
607
608 #[test]
609 fn test_buck_build_is_match() {
610 assert!(BuckBuildParser::is_match(&PathBuf::from("BUCK")));
611 assert!(BuckBuildParser::is_match(&PathBuf::from("path/to/BUCK")));
612 assert!(!BuckBuildParser::is_match(&PathBuf::from("BUILD")));
613 assert!(!BuckBuildParser::is_match(&PathBuf::from("buck")));
614 }
615
616 #[test]
617 fn test_metadata_bzl_is_match() {
618 assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
619 "METADATA.bzl"
620 )));
621 assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
622 "path/to/METADATA.bzl"
623 )));
624 assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from(
625 "metadata.bzl"
626 )));
627 assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from("METADATA")));
628 }
629
630 #[test]
631 fn test_check_rule_name_ending() {
632 assert!(check_rule_name_ending("android_binary"));
633 assert!(check_rule_name_ending("android_library"));
634 assert!(check_rule_name_ending("java_binary"));
635 assert!(!check_rule_name_ending("filegroup"));
636 }
637
638 #[test]
639 fn test_preprocess_starlark_content_handles_oss_guarded_alternatives() {
640 let content = r#"# @oss-disable[end= ]: load("@fbsource//tools/build_defs:rust_unittest.bzl", "rust_unittest")
641prelude = native
642
643# @oss-disable: rust_unittest(
644 rust_test( # @oss-enable
645 name = "test",
646 )
647
648platform_utils = None # @oss-enable
649"#;
650
651 let normalized = preprocess_starlark_content(content);
652
653 assert!(!normalized.contains("@oss-disable"));
654 assert!(!normalized.contains("@oss-enable"));
655 assert!(normalized.contains("rust_test("));
656 assert!(normalized.contains("platform_utils = None"));
657 assert!(!normalized.contains(" rust_test("));
658 }
659
660 #[test]
661 fn test_parse_buck_build_with_oss_guarded_rule() {
662 let content = r#"# @oss-disable[end= ]: load("@fbsource//tools/build_defs:rust_library.bzl", "rust_library")
663# @oss-disable[end= ]: load("@fbsource//tools/build_defs:rust_unittest.bzl", "rust_unittest")
664
665oncall("build_infra")
666
667rust_library(
668 name = "library",
669 srcs = ["src/lib.rs"],
670)
671
672# @oss-disable: rust_unittest(
673 rust_test( # @oss-enable
674 name = "test",
675 srcs = ["tests/test.rs"],
676)
677"#;
678
679 let temp_dir = tempfile::tempdir().unwrap();
680 let buck_path = temp_dir.path().join("BUCK");
681 std::fs::write(&buck_path, content).unwrap();
682
683 let packages = parse_buck_build(&buck_path).expect("BUCK file should parse");
684
685 assert_eq!(packages.len(), 1);
686 assert_eq!(packages[0].package_type, Some(PackageType::Buck));
687 assert_eq!(packages[0].name.as_deref(), Some("library"));
688 }
689}
690
691crate::register_parser!(
692 "Buck build file and METADATA.bzl",
693 &["**/BUCK", "**/METADATA.bzl"],
694 "buck",
695 "",
696 Some("https://buck.build/"),
697);