1use std::collections::HashMap;
22use std::path::Path;
23
24use crate::parser_warn as warn;
25use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
26use packageurl::PackageUrl;
27use starlark_syntax::syntax::ast;
28use starlark_syntax::syntax::module::AstModuleFields;
29use starlark_syntax::syntax::{AstModule, Dialect};
30
31use crate::models::{DatasourceId, PackageData, PackageType, Party, Sha1Digest};
32
33use super::PackageParser;
34use super::metadata::ParserMetadata;
35
36type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
37
38struct StarlarkCall<'a> {
39 func: &'a ast::AstExpr,
40 args: &'a StarlarkCallArgs,
41}
42
43pub struct BuckBuildParser;
45
46impl PackageParser for BuckBuildParser {
47 const PACKAGE_TYPE: PackageType = PackageType::Buck;
48
49 fn metadata() -> Vec<ParserMetadata> {
50 vec![ParserMetadata {
51 description: "Buck build file and METADATA.bzl",
52 file_patterns: &["**/BUCK", "**/METADATA.bzl"],
53 package_type: "buck",
54 primary_language: "",
55 documentation_url: Some("https://buck.build/"),
56 }]
57 }
58
59 fn is_match(path: &Path) -> bool {
60 path.file_name()
61 .and_then(|name| name.to_str())
62 .is_some_and(|name| name == "BUCK")
63 }
64
65 fn extract_packages(path: &Path) -> Vec<PackageData> {
66 match parse_buck_build(path) {
67 Ok(packages) if !packages.is_empty() => packages,
68 Ok(_) => vec![fallback_package_data(path)],
69 Err(e) => {
70 warn!("Failed to parse Buck BUCK file {:?}: {}", path, e);
71 vec![fallback_package_data(path)]
72 }
73 }
74 }
75}
76
77pub struct BuckMetadataBzlParser;
79
80impl PackageParser for BuckMetadataBzlParser {
81 const PACKAGE_TYPE: PackageType = PackageType::Buck;
82
83 fn is_match(path: &Path) -> bool {
84 path.file_name()
85 .and_then(|name| name.to_str())
86 .is_some_and(|name| name == "METADATA.bzl")
87 }
88
89 fn extract_packages(path: &Path) -> Vec<PackageData> {
90 vec![match parse_metadata_bzl(path) {
91 Ok(pkg) => pkg,
92 Err(e) => {
93 warn!("Failed to parse Buck METADATA.bzl {:?}: {}", path, e);
94 PackageData {
95 datasource_id: Some(DatasourceId::BuckMetadata),
96 ..Default::default()
97 }
98 }
99 }]
100 }
101}
102
103fn parse_buck_build(path: &Path) -> Result<Vec<PackageData>, String> {
105 let content = read_file_to_string(path, None).map_err(|e| e.to_string())?;
106 let module = parse_starlark_module("<BUCK>", content)?;
107
108 let mut packages = Vec::new();
109
110 for statement in top_level_statements(&module)
111 .iter()
112 .take(MAX_ITERATION_COUNT)
113 {
114 if let Some(package_data) = extract_build_package_from_statement(statement) {
115 packages.push(package_data);
116 }
117 }
118
119 Ok(packages)
120}
121
122fn parse_metadata_bzl(path: &Path) -> Result<PackageData, String> {
124 let content = read_file_to_string(path, None).map_err(|e| e.to_string())?;
125 let module = parse_starlark_module("<METADATA.bzl>", content)?;
126
127 for statement in top_level_statements(&module)
128 .iter()
129 .take(MAX_ITERATION_COUNT)
130 {
131 if let Some(dict) = extract_metadata_assignment_dict(statement) {
132 return Ok(extract_metadata_dict(dict));
133 }
134 }
135
136 Ok(PackageData {
138 datasource_id: Some(DatasourceId::BuckMetadata),
139 ..Default::default()
140 })
141}
142
143fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
144 let content = preprocess_starlark_content(&content);
145 let dialect = Dialect {
146 enable_top_level_stmt: true,
147 ..Dialect::Standard
148 };
149 AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
150}
151
152fn preprocess_starlark_content(content: &str) -> String {
153 let mut normalized = String::with_capacity(content.len());
154 let mut pending_oss_disable_indent: Option<String> = None;
155
156 for raw_line in content.lines() {
157 let trimmed_start = raw_line.trim_start();
158 let indent_len = raw_line.len() - trimmed_start.len();
159 let indent = &raw_line[..indent_len];
160
161 if trimmed_start.starts_with('#') && trimmed_start.contains("@oss-disable") {
162 pending_oss_disable_indent = Some(indent.to_string());
163 continue;
164 }
165
166 if let Some(marker_index) = raw_line.find("# @oss-enable") {
167 let code = raw_line[..marker_index].trim_end();
168 if !code.is_empty() {
169 if let Some(disabled_indent) = pending_oss_disable_indent.take() {
170 normalized.push_str(&disabled_indent);
171 normalized.push_str(code.trim_start());
172 } else {
173 normalized.push_str(code);
174 }
175 normalized.push('\n');
176 }
177 continue;
178 }
179
180 pending_oss_disable_indent = None;
181 normalized.push_str(raw_line);
182 normalized.push('\n');
183 }
184
185 if !content.ends_with('\n') && normalized.ends_with('\n') {
186 normalized.pop();
187 }
188
189 normalized
190}
191
192fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
193 match &module.statement().node {
194 ast::StmtP::Statements(statements) => statements,
195 _ => std::slice::from_ref(module.statement()),
196 }
197}
198
199fn extract_metadata_assignment_dict(
200 statement: &ast::AstStmt,
201) -> Option<&[(ast::AstExpr, ast::AstExpr)]> {
202 let ast::StmtP::Assign(assign) = &statement.node else {
203 return None;
204 };
205 let ast::AssignTargetP::Identifier(target) = &assign.lhs.node else {
206 return None;
207 };
208 if target.node.ident != "METADATA" {
209 return None;
210 }
211 match &assign.rhs.node {
212 ast::ExprP::Dict(items) => Some(items.as_slice()),
213 _ => None,
214 }
215}
216
217fn extract_metadata_dict(dict: &[(ast::AstExpr, ast::AstExpr)]) -> PackageData {
219 let mut fields: HashMap<String, MetadataValue> = HashMap::new();
220
221 for (key, value) in dict.iter().take(MAX_ITERATION_COUNT) {
222 let Some(key_name) = expr_as_string(key) else {
223 continue;
224 };
225 let Some(metadata_value) = metadata_value_from_expr(value) else {
226 continue;
227 };
228
229 fields.insert(key_name, metadata_value);
230 }
231
232 build_package_from_metadata(fields)
233}
234
235fn get_metadata_string(fields: &HashMap<String, MetadataValue>, keys: &[&str]) -> Option<String> {
236 keys.iter().find_map(|key| match fields.get(*key) {
237 Some(MetadataValue::String(value)) => Some(value.clone()),
238 _ => None,
239 })
240}
241
242fn get_metadata_list(
243 fields: &HashMap<String, MetadataValue>,
244 keys: &[&str],
245) -> Option<Vec<String>> {
246 keys.iter().find_map(|key| match fields.get(*key) {
247 Some(MetadataValue::List(values)) => Some(values.clone()),
248 _ => None,
249 })
250}
251
252enum MetadataValue {
254 String(String),
255 List(Vec<String>),
256}
257
258fn split_buck_license_values(values: &[String]) -> (Vec<String>, Vec<String>) {
259 let mut statements = Vec::new();
260 let mut references = Vec::new();
261
262 for value in values {
263 if is_probable_local_license_reference(value) {
264 references.push(value.clone());
265 } else {
266 statements.push(value.clone());
267 }
268 }
269
270 (statements, references)
271}
272
273fn is_probable_local_license_reference(value: &str) -> bool {
274 let trimmed = value.trim();
275 if trimmed.is_empty() {
276 return false;
277 }
278
279 let lower = trimmed.to_ascii_lowercase();
280 lower.contains('/')
281 || lower.contains('\\')
282 || lower.starts_with("license")
283 || lower.starts_with("licence")
284 || lower.starts_with("copying")
285 || lower.starts_with("notice")
286 || lower.starts_with("copyright")
287 || lower.ends_with(".txt")
288 || lower.ends_with(".md")
289 || lower.ends_with(".rst")
290 || lower.ends_with(".html")
291}
292
293fn insert_license_reference_extra_data(
294 extra_data: &mut HashMap<String, serde_json::Value>,
295 references: &[String],
296) {
297 match references {
298 [] => {}
299 [reference] => {
300 extra_data.insert(
301 "license_file".to_string(),
302 serde_json::Value::String(reference.clone()),
303 );
304 }
305 _ => {
306 extra_data.insert(
307 "license_files".to_string(),
308 serde_json::Value::Array(
309 references
310 .iter()
311 .cloned()
312 .map(serde_json::Value::String)
313 .collect(),
314 ),
315 );
316 }
317 }
318}
319
320fn build_package_from_metadata(fields: HashMap<String, MetadataValue>) -> PackageData {
322 let mut pkg = PackageData {
323 datasource_id: Some(DatasourceId::BuckMetadata),
324 ..Default::default()
325 };
326 let mut license_references = Vec::new();
327
328 if let Some(name) = get_metadata_string(&fields, &["name"]) {
330 pkg.name = Some(truncate_field(name));
331 }
332
333 if let Some(version) = get_metadata_string(&fields, &["version"]) {
335 pkg.version = Some(truncate_field(version));
336 }
337
338 if let Some(namespace) = get_metadata_string(&fields, &["namespace"]) {
340 pkg.namespace = Some(truncate_field(namespace));
341 }
342
343 if let Some(ecosystem) = get_metadata_string(&fields, &["ecosystem", "type", "package_type"])
346 && let Ok(package_type) = ecosystem.parse::<PackageType>()
347 {
348 pkg.package_type = Some(package_type);
349 }
350
351 if let Some(licenses) = get_metadata_list(&fields, &["licenses"]) {
353 let (license_statements, references) = split_buck_license_values(&licenses);
354 license_references = references;
355 let extracted_license_statement = if !license_statements.is_empty() {
356 Some(license_statements.join(", "))
357 } else if !license_references.is_empty() {
358 Some(license_references.join(", "))
359 } else {
360 None
361 };
362 pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
363 } else if let Some(license_expression) = get_metadata_string(&fields, &["license_expression"]) {
364 pkg.extracted_license_statement = Some(truncate_field(license_expression));
365 }
366
367 if let Some(copyright) = get_metadata_list(&fields, &["copyrights"]) {
368 if !copyright.is_empty() {
369 pkg.copyright = Some(truncate_field(copyright.join("\n")));
370 }
371 } else if let Some(copyright) = get_metadata_string(&fields, &["copyright"]) {
372 pkg.copyright = Some(truncate_field(copyright));
373 }
374
375 if let Some(homepage_url) = get_metadata_string(
377 &fields,
378 &["upstream_address", "upstream_url", "homepage_url"],
379 ) {
380 pkg.homepage_url = Some(truncate_field(homepage_url));
381 }
382
383 if let Some(download_url) = get_metadata_string(&fields, &["download_url"]) {
385 pkg.download_url = Some(truncate_field(download_url));
386 }
387
388 if let Some(vcs_url) = get_metadata_string(&fields, &["vcs_url"]) {
390 pkg.vcs_url = Some(truncate_field(vcs_url));
391 }
392
393 if let Some(sha1) = get_metadata_string(&fields, &["download_archive_sha1"]) {
395 pkg.sha1 = Sha1Digest::from_hex(&sha1).ok();
396 }
397
398 if let Some(maintainers) = get_metadata_list(&fields, &["maintainers"]) {
400 pkg.parties.extend(maintainers.iter().map(|name| Party {
401 r#type: Some("organization".to_string()),
402 name: Some(name.clone()),
403 role: Some("maintainer".to_string()),
404 email: None,
405 url: None,
406 organization: None,
407 organization_url: None,
408 timezone: None,
409 }));
410 }
411
412 if let Some(vendor) = get_metadata_string(&fields, &["vendor", "publisher"]) {
413 pkg.parties.push(Party {
414 r#type: None,
415 name: Some(vendor),
416 role: Some("publisher".to_string()),
417 email: None,
418 url: None,
419 organization: None,
420 organization_url: None,
421 timezone: None,
422 });
423 }
424
425 let mut extra_data = HashMap::new();
427 if let Some(vcs_commit_hash) = get_metadata_string(&fields, &["vcs_commit_hash"]) {
428 extra_data.insert(
429 "vcs_commit_hash".to_string(),
430 serde_json::Value::String(vcs_commit_hash),
431 );
432 }
433 if let Some(upstream_hash) =
434 get_metadata_string(&fields, &["upstream_hash", "upstream_commit_hash"])
435 {
436 extra_data.insert(
437 "upstream_hash".to_string(),
438 serde_json::Value::String(upstream_hash),
439 );
440 }
441 if let Some(upstream_branch) = get_metadata_string(&fields, &["upstream_branch"]) {
442 extra_data.insert(
443 "upstream_branch".to_string(),
444 serde_json::Value::String(upstream_branch),
445 );
446 }
447 insert_license_reference_extra_data(&mut extra_data, &license_references);
448 if !extra_data.is_empty() {
449 pkg.extra_data = Some(extra_data);
450 }
451
452 if let Some(purl_str) = get_metadata_string(&fields, &["package_url"])
454 && let Ok(purl) = purl_str.parse::<PackageUrl>()
455 {
456 pkg.purl = Some(truncate_field(purl.to_string()));
457
458 if let Ok(package_type) = purl.ty().parse::<PackageType>() {
459 pkg.package_type = Some(package_type);
460 }
461 if let Some(ns) = purl.namespace() {
462 pkg.namespace = Some(truncate_field(ns.to_string()));
463 }
464 pkg.name = Some(truncate_field(purl.name().to_string()));
465 if let Some(ver) = purl.version() {
466 pkg.version = Some(truncate_field(ver.to_string()));
467 }
468 if !purl.qualifiers().is_empty() {
470 let quals: HashMap<String, String> = purl
471 .qualifiers()
472 .iter()
473 .map(|(k, v)| (k.to_string(), v.to_string()))
474 .collect();
475 pkg.qualifiers = Some(quals);
476 }
477 if let Some(sp) = purl.subpath() {
479 pkg.subpath = Some(sp.to_string());
480 }
481 }
482
483 pkg
484}
485
486fn metadata_value_from_expr(expr: &ast::AstExpr) -> Option<MetadataValue> {
487 if let Some(string) = expr_as_string(expr) {
488 return Some(MetadataValue::String(string));
489 }
490
491 let items = match &expr.node {
492 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
493 _ => return None,
494 };
495 let values: Vec<_> = items
496 .iter()
497 .take(MAX_ITERATION_COUNT)
498 .filter_map(expr_as_string)
499 .collect();
500 (!values.is_empty()).then_some(MetadataValue::List(values))
501}
502
503fn extract_build_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
505 let call = extract_call(statement)?;
506 let rule_name = match &call.func.node {
507 ast::ExprP::Identifier(identifier) => identifier.node.ident.as_str(),
508 _ => return None,
509 };
510
511 if !check_rule_name_ending(rule_name) {
512 return None;
513 }
514
515 let name = extract_named_kwarg_string(&call, "name");
516 let licenses = extract_named_kwarg_string_list(&call, "licenses");
517
518 let package_name = name?;
519 let (license_statements, license_references) = licenses
520 .as_deref()
521 .map(split_buck_license_values)
522 .unwrap_or_default();
523 let extracted_license_statement = if !license_statements.is_empty() {
524 Some(truncate_field(license_statements.join(", ")))
525 } else if !license_references.is_empty() {
526 Some(truncate_field(license_references.join(", ")))
527 } else {
528 None
529 };
530 let mut extra_data = HashMap::new();
531 insert_license_reference_extra_data(&mut extra_data, &license_references);
532
533 Some(PackageData {
534 package_type: Some(BuckBuildParser::PACKAGE_TYPE),
535 name: Some(truncate_field(package_name)),
536 extracted_license_statement,
537 extra_data: (!extra_data.is_empty()).then_some(extra_data),
538 datasource_id: Some(DatasourceId::BuckFile),
539 ..Default::default()
540 })
541}
542
543fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
544 match &statement.node {
545 ast::StmtP::Expression(expr) => extract_call_expr(expr),
546 ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
547 _ => None,
548 }
549}
550
551fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
552 match &expr.node {
553 ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
554 _ => None,
555 }
556}
557
558fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
559 call.args
560 .args
561 .iter()
562 .find_map(|argument| match &argument.node {
563 ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
564 _ => None,
565 })
566}
567
568fn extract_named_kwarg_string(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
569 extract_named_kwarg(call, key).and_then(expr_as_string)
570}
571
572fn extract_named_kwarg_string_list(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
573 let expr = extract_named_kwarg(call, key)?;
574 let items = match &expr.node {
575 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
576 _ => return None,
577 };
578 let values: Vec<_> = items
579 .iter()
580 .take(MAX_ITERATION_COUNT)
581 .filter_map(expr_as_string)
582 .collect();
583 (!values.is_empty()).then_some(values)
584}
585
586fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
587 match &expr.node {
588 ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
589 _ => None,
590 }
591}
592
593fn check_rule_name_ending(rule_name: &str) -> bool {
595 rule_name.ends_with("binary") || rule_name.ends_with("library")
596}
597
598fn fallback_package_data(path: &Path) -> PackageData {
600 let name = path
601 .parent()
602 .and_then(|p| p.file_name())
603 .and_then(|n| n.to_str())
604 .map(|s| s.to_string());
605
606 PackageData {
607 package_type: Some(BuckBuildParser::PACKAGE_TYPE),
608 name,
609 datasource_id: Some(DatasourceId::BuckFile),
610 ..Default::default()
611 }
612}
613
614#[cfg(test)]
615mod tests {
616 use super::*;
617 use std::path::PathBuf;
618
619 #[test]
620 fn test_buck_build_is_match() {
621 assert!(BuckBuildParser::is_match(&PathBuf::from("BUCK")));
622 assert!(BuckBuildParser::is_match(&PathBuf::from("path/to/BUCK")));
623 assert!(!BuckBuildParser::is_match(&PathBuf::from("BUILD")));
624 assert!(!BuckBuildParser::is_match(&PathBuf::from("buck")));
625 }
626
627 #[test]
628 fn test_metadata_bzl_is_match() {
629 assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
630 "METADATA.bzl"
631 )));
632 assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
633 "path/to/METADATA.bzl"
634 )));
635 assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from(
636 "metadata.bzl"
637 )));
638 assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from("METADATA")));
639 }
640
641 #[test]
642 fn test_check_rule_name_ending() {
643 assert!(check_rule_name_ending("android_binary"));
644 assert!(check_rule_name_ending("android_library"));
645 assert!(check_rule_name_ending("java_binary"));
646 assert!(!check_rule_name_ending("filegroup"));
647 }
648
649 #[test]
650 fn test_preprocess_starlark_content_handles_oss_guarded_alternatives() {
651 let content = r#"# @oss-disable[end= ]: load("@fbsource//tools/build_defs:rust_unittest.bzl", "rust_unittest")
652prelude = native
653
654# @oss-disable: rust_unittest(
655 rust_test( # @oss-enable
656 name = "test",
657 )
658
659platform_utils = None # @oss-enable
660"#;
661
662 let normalized = preprocess_starlark_content(content);
663
664 assert!(!normalized.contains("@oss-disable"));
665 assert!(!normalized.contains("@oss-enable"));
666 assert!(normalized.contains("rust_test("));
667 assert!(normalized.contains("platform_utils = None"));
668 assert!(!normalized.contains(" rust_test("));
669 }
670
671 #[test]
672 fn test_parse_buck_build_with_oss_guarded_rule() {
673 let content = r#"# @oss-disable[end= ]: load("@fbsource//tools/build_defs:rust_library.bzl", "rust_library")
674# @oss-disable[end= ]: load("@fbsource//tools/build_defs:rust_unittest.bzl", "rust_unittest")
675
676oncall("build_infra")
677
678rust_library(
679 name = "library",
680 srcs = ["src/lib.rs"],
681)
682
683# @oss-disable: rust_unittest(
684 rust_test( # @oss-enable
685 name = "test",
686 srcs = ["tests/test.rs"],
687)
688"#;
689
690 let temp_dir = tempfile::tempdir().unwrap();
691 let buck_path = temp_dir.path().join("BUCK");
692 std::fs::write(&buck_path, content).unwrap();
693
694 let packages = parse_buck_build(&buck_path).expect("BUCK file should parse");
695
696 assert_eq!(packages.len(), 1);
697 assert_eq!(packages[0].package_type, Some(PackageType::Buck));
698 assert_eq!(packages[0].name.as_deref(), Some("library"));
699 }
700}