1use std::collections::HashMap;
19use std::path::Path;
20
21use crate::parser_warn as warn;
22use packageurl::PackageUrl;
23use starlark_syntax::syntax::ast;
24use starlark_syntax::syntax::module::AstModuleFields;
25use starlark_syntax::syntax::{AstModule, Dialect};
26
27use crate::models::{DatasourceId, PackageData, PackageType, Party, Sha1Digest};
28
29use super::PackageParser;
30
31type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
32
33struct StarlarkCall<'a> {
34 func: &'a ast::AstExpr,
35 args: &'a StarlarkCallArgs,
36}
37
38pub struct BuckBuildParser;
40
41impl PackageParser for BuckBuildParser {
42 const PACKAGE_TYPE: PackageType = PackageType::Buck;
43
44 fn is_match(path: &Path) -> bool {
45 path.file_name()
46 .and_then(|name| name.to_str())
47 .is_some_and(|name| name == "BUCK")
48 }
49
50 fn extract_packages(path: &Path) -> Vec<PackageData> {
51 match parse_buck_build(path) {
52 Ok(packages) if !packages.is_empty() => packages,
53 Ok(_) => vec![fallback_package_data(path)],
54 Err(e) => {
55 warn!("Failed to parse Buck BUCK file {:?}: {}", path, e);
56 vec![fallback_package_data(path)]
57 }
58 }
59 }
60}
61
62pub struct BuckMetadataBzlParser;
64
65impl PackageParser for BuckMetadataBzlParser {
66 const PACKAGE_TYPE: PackageType = PackageType::Buck;
67
68 fn is_match(path: &Path) -> bool {
69 path.file_name()
70 .and_then(|name| name.to_str())
71 .is_some_and(|name| name == "METADATA.bzl")
72 }
73
74 fn extract_packages(path: &Path) -> Vec<PackageData> {
75 vec![match parse_metadata_bzl(path) {
76 Ok(pkg) => pkg,
77 Err(e) => {
78 warn!("Failed to parse Buck METADATA.bzl {:?}: {}", path, e);
79 PackageData {
80 package_type: Some(Self::PACKAGE_TYPE),
81 datasource_id: Some(DatasourceId::BuckMetadata),
82 ..Default::default()
83 }
84 }
85 }]
86 }
87}
88
89fn parse_buck_build(path: &Path) -> Result<Vec<PackageData>, String> {
91 let content =
92 std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
93 let module = parse_starlark_module("<BUCK>", content)?;
94
95 let mut packages = Vec::new();
96
97 for statement in top_level_statements(&module) {
98 if let Some(package_data) = extract_build_package_from_statement(statement) {
99 packages.push(package_data);
100 }
101 }
102
103 Ok(packages)
104}
105
106fn parse_metadata_bzl(path: &Path) -> Result<PackageData, String> {
108 let content =
109 std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
110 let module = parse_starlark_module("<METADATA.bzl>", content)?;
111
112 for statement in top_level_statements(&module) {
114 if let Some(dict) = extract_metadata_assignment_dict(statement) {
115 return Ok(extract_metadata_dict(dict));
116 }
117 }
118
119 Ok(PackageData {
121 package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
122 datasource_id: Some(DatasourceId::BuckMetadata),
123 ..Default::default()
124 })
125}
126
127fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
128 let dialect = Dialect {
129 enable_top_level_stmt: true,
130 ..Dialect::Standard
131 };
132 AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
133}
134
135fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
136 match &module.statement().node {
137 ast::StmtP::Statements(statements) => statements,
138 _ => std::slice::from_ref(module.statement()),
139 }
140}
141
142fn extract_metadata_assignment_dict(
143 statement: &ast::AstStmt,
144) -> Option<&[(ast::AstExpr, ast::AstExpr)]> {
145 let ast::StmtP::Assign(assign) = &statement.node else {
146 return None;
147 };
148 let ast::AssignTargetP::Identifier(target) = &assign.lhs.node else {
149 return None;
150 };
151 if target.node.ident != "METADATA" {
152 return None;
153 }
154 match &assign.rhs.node {
155 ast::ExprP::Dict(items) => Some(items.as_slice()),
156 _ => None,
157 }
158}
159
160fn extract_metadata_dict(dict: &[(ast::AstExpr, ast::AstExpr)]) -> PackageData {
162 let mut fields: HashMap<String, MetadataValue> = HashMap::new();
163
164 for (key, value) in dict {
165 let Some(key_name) = expr_as_string(key) else {
166 continue;
167 };
168 let Some(metadata_value) = metadata_value_from_expr(value) else {
169 continue;
170 };
171
172 fields.insert(key_name, metadata_value);
173 }
174
175 build_package_from_metadata(fields)
176}
177
178fn get_metadata_string(fields: &HashMap<String, MetadataValue>, keys: &[&str]) -> Option<String> {
179 keys.iter().find_map(|key| match fields.get(*key) {
180 Some(MetadataValue::String(value)) => Some(value.clone()),
181 _ => None,
182 })
183}
184
185fn get_metadata_list(
186 fields: &HashMap<String, MetadataValue>,
187 keys: &[&str],
188) -> Option<Vec<String>> {
189 keys.iter().find_map(|key| match fields.get(*key) {
190 Some(MetadataValue::List(values)) => Some(values.clone()),
191 _ => None,
192 })
193}
194
195enum MetadataValue {
197 String(String),
198 List(Vec<String>),
199}
200
201fn split_buck_license_values(values: &[String]) -> (Vec<String>, Vec<String>) {
202 let mut statements = Vec::new();
203 let mut references = Vec::new();
204
205 for value in values {
206 if is_probable_local_license_reference(value) {
207 references.push(value.clone());
208 } else {
209 statements.push(value.clone());
210 }
211 }
212
213 (statements, references)
214}
215
216fn is_probable_local_license_reference(value: &str) -> bool {
217 let trimmed = value.trim();
218 if trimmed.is_empty() {
219 return false;
220 }
221
222 let lower = trimmed.to_ascii_lowercase();
223 lower.contains('/')
224 || lower.contains('\\')
225 || lower.starts_with("license")
226 || lower.starts_with("licence")
227 || lower.starts_with("copying")
228 || lower.starts_with("notice")
229 || lower.starts_with("copyright")
230 || lower.ends_with(".txt")
231 || lower.ends_with(".md")
232 || lower.ends_with(".rst")
233 || lower.ends_with(".html")
234}
235
236fn insert_license_reference_extra_data(
237 extra_data: &mut HashMap<String, serde_json::Value>,
238 references: &[String],
239) {
240 match references {
241 [] => {}
242 [reference] => {
243 extra_data.insert(
244 "license_file".to_string(),
245 serde_json::Value::String(reference.clone()),
246 );
247 }
248 _ => {
249 extra_data.insert(
250 "license_files".to_string(),
251 serde_json::Value::Array(
252 references
253 .iter()
254 .cloned()
255 .map(serde_json::Value::String)
256 .collect(),
257 ),
258 );
259 }
260 }
261}
262
263fn build_package_from_metadata(fields: HashMap<String, MetadataValue>) -> PackageData {
265 let mut pkg = PackageData {
266 package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
267 datasource_id: Some(DatasourceId::BuckMetadata),
268 ..Default::default()
269 };
270 let mut license_references = Vec::new();
271
272 if let Some(name) = get_metadata_string(&fields, &["name"]) {
274 pkg.name = Some(name);
275 }
276
277 if let Some(version) = get_metadata_string(&fields, &["version"]) {
279 pkg.version = Some(version);
280 }
281
282 if let Some(namespace) = get_metadata_string(&fields, &["namespace"]) {
284 pkg.namespace = Some(namespace);
285 }
286
287 if let Some(ecosystem) = get_metadata_string(&fields, &["ecosystem", "type", "package_type"])
290 && let Ok(package_type) = ecosystem.parse::<PackageType>()
291 {
292 pkg.package_type = Some(package_type);
293 }
294
295 if let Some(licenses) = get_metadata_list(&fields, &["licenses"]) {
297 let (license_statements, references) = split_buck_license_values(&licenses);
298 license_references = references;
299 let extracted_license_statement = if !license_statements.is_empty() {
300 Some(license_statements.join(", "))
301 } else if !license_references.is_empty() {
302 Some(license_references.join(", "))
303 } else {
304 None
305 };
306 pkg.extracted_license_statement = extracted_license_statement;
307 } else if let Some(license_expression) = get_metadata_string(&fields, &["license_expression"]) {
308 pkg.extracted_license_statement = Some(license_expression);
309 }
310
311 if let Some(copyright) = get_metadata_list(&fields, &["copyrights"]) {
312 if !copyright.is_empty() {
313 pkg.copyright = Some(copyright.join("\n"));
314 }
315 } else if let Some(copyright) = get_metadata_string(&fields, &["copyright"]) {
316 pkg.copyright = Some(copyright);
317 }
318
319 if let Some(homepage_url) = get_metadata_string(
321 &fields,
322 &["upstream_address", "upstream_url", "homepage_url"],
323 ) {
324 pkg.homepage_url = Some(homepage_url);
325 }
326
327 if let Some(download_url) = get_metadata_string(&fields, &["download_url"]) {
329 pkg.download_url = Some(download_url);
330 }
331
332 if let Some(vcs_url) = get_metadata_string(&fields, &["vcs_url"]) {
334 pkg.vcs_url = Some(vcs_url);
335 }
336
337 if let Some(sha1) = get_metadata_string(&fields, &["download_archive_sha1"]) {
339 pkg.sha1 = Sha1Digest::from_hex(&sha1).ok();
340 }
341
342 if let Some(maintainers) = get_metadata_list(&fields, &["maintainers"]) {
344 pkg.parties.extend(maintainers.iter().map(|name| Party {
345 r#type: Some("organization".to_string()),
346 name: Some(name.clone()),
347 role: Some("maintainer".to_string()),
348 email: None,
349 url: None,
350 organization: None,
351 organization_url: None,
352 timezone: None,
353 }));
354 }
355
356 if let Some(vendor) = get_metadata_string(&fields, &["vendor", "publisher"]) {
357 pkg.parties.push(Party {
358 r#type: None,
359 name: Some(vendor),
360 role: Some("publisher".to_string()),
361 email: None,
362 url: None,
363 organization: None,
364 organization_url: None,
365 timezone: None,
366 });
367 }
368
369 let mut extra_data = HashMap::new();
371 if let Some(vcs_commit_hash) = get_metadata_string(&fields, &["vcs_commit_hash"]) {
372 extra_data.insert(
373 "vcs_commit_hash".to_string(),
374 serde_json::Value::String(vcs_commit_hash),
375 );
376 }
377 if let Some(upstream_hash) =
378 get_metadata_string(&fields, &["upstream_hash", "upstream_commit_hash"])
379 {
380 extra_data.insert(
381 "upstream_hash".to_string(),
382 serde_json::Value::String(upstream_hash),
383 );
384 }
385 if let Some(upstream_branch) = get_metadata_string(&fields, &["upstream_branch"]) {
386 extra_data.insert(
387 "upstream_branch".to_string(),
388 serde_json::Value::String(upstream_branch),
389 );
390 }
391 insert_license_reference_extra_data(&mut extra_data, &license_references);
392 if !extra_data.is_empty() {
393 pkg.extra_data = Some(extra_data);
394 }
395
396 if let Some(purl_str) = get_metadata_string(&fields, &["package_url"])
398 && let Ok(purl) = purl_str.parse::<PackageUrl>()
399 {
400 pkg.purl = Some(purl.to_string());
401
402 if let Ok(package_type) = purl.ty().parse::<PackageType>() {
404 pkg.package_type = Some(package_type);
405 }
406 if let Some(ns) = purl.namespace() {
407 pkg.namespace = Some(ns.to_string());
408 }
409 pkg.name = Some(purl.name().to_string());
410 if let Some(ver) = purl.version() {
411 pkg.version = Some(ver.to_string());
412 }
413 if !purl.qualifiers().is_empty() {
415 let quals: HashMap<String, String> = purl
416 .qualifiers()
417 .iter()
418 .map(|(k, v)| (k.to_string(), v.to_string()))
419 .collect();
420 pkg.qualifiers = Some(quals);
421 }
422 if let Some(sp) = purl.subpath() {
424 pkg.subpath = Some(sp.to_string());
425 }
426 }
427
428 pkg
429}
430
431fn metadata_value_from_expr(expr: &ast::AstExpr) -> Option<MetadataValue> {
432 if let Some(string) = expr_as_string(expr) {
433 return Some(MetadataValue::String(string));
434 }
435
436 let items = match &expr.node {
437 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
438 _ => return None,
439 };
440 let values: Vec<_> = items.iter().filter_map(expr_as_string).collect();
441 (!values.is_empty()).then_some(MetadataValue::List(values))
442}
443
444fn extract_build_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
446 let call = extract_call(statement)?;
447 let rule_name = match &call.func.node {
448 ast::ExprP::Identifier(identifier) => identifier.node.ident.as_str(),
449 _ => return None,
450 };
451
452 if !check_rule_name_ending(rule_name) {
453 return None;
454 }
455
456 let name = extract_named_kwarg_string(&call, "name");
457 let licenses = extract_named_kwarg_string_list(&call, "licenses");
458
459 let package_name = name?;
460 let (license_statements, license_references) = licenses
461 .as_deref()
462 .map(split_buck_license_values)
463 .unwrap_or_default();
464 let extracted_license_statement = if !license_statements.is_empty() {
465 Some(license_statements.join(", "))
466 } else if !license_references.is_empty() {
467 Some(license_references.join(", "))
468 } else {
469 None
470 };
471 let mut extra_data = HashMap::new();
472 insert_license_reference_extra_data(&mut extra_data, &license_references);
473
474 Some(PackageData {
475 package_type: Some(BuckBuildParser::PACKAGE_TYPE),
476 name: Some(package_name),
477 extracted_license_statement,
478 extra_data: (!extra_data.is_empty()).then_some(extra_data),
479 datasource_id: Some(DatasourceId::BuckFile),
480 ..Default::default()
481 })
482}
483
484fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
485 match &statement.node {
486 ast::StmtP::Expression(expr) => extract_call_expr(expr),
487 ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
488 _ => None,
489 }
490}
491
492fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
493 match &expr.node {
494 ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
495 _ => None,
496 }
497}
498
499fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
500 call.args
501 .args
502 .iter()
503 .find_map(|argument| match &argument.node {
504 ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
505 _ => None,
506 })
507}
508
509fn extract_named_kwarg_string(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
510 extract_named_kwarg(call, key).and_then(expr_as_string)
511}
512
513fn extract_named_kwarg_string_list(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
514 let expr = extract_named_kwarg(call, key)?;
515 let items = match &expr.node {
516 ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
517 _ => return None,
518 };
519 let values: Vec<_> = items.iter().filter_map(expr_as_string).collect();
520 (!values.is_empty()).then_some(values)
521}
522
523fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
524 match &expr.node {
525 ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
526 _ => None,
527 }
528}
529
530fn check_rule_name_ending(rule_name: &str) -> bool {
532 rule_name.ends_with("binary") || rule_name.ends_with("library")
533}
534
535fn fallback_package_data(path: &Path) -> PackageData {
537 let name = path
538 .parent()
539 .and_then(|p| p.file_name())
540 .and_then(|n| n.to_str())
541 .map(|s| s.to_string());
542
543 PackageData {
544 package_type: Some(BuckBuildParser::PACKAGE_TYPE),
545 name,
546 datasource_id: Some(DatasourceId::BuckFile),
547 ..Default::default()
548 }
549}
550
551#[cfg(test)]
552mod tests {
553 use super::*;
554 use std::path::PathBuf;
555
556 #[test]
557 fn test_buck_build_is_match() {
558 assert!(BuckBuildParser::is_match(&PathBuf::from("BUCK")));
559 assert!(BuckBuildParser::is_match(&PathBuf::from("path/to/BUCK")));
560 assert!(!BuckBuildParser::is_match(&PathBuf::from("BUILD")));
561 assert!(!BuckBuildParser::is_match(&PathBuf::from("buck")));
562 }
563
564 #[test]
565 fn test_metadata_bzl_is_match() {
566 assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
567 "METADATA.bzl"
568 )));
569 assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
570 "path/to/METADATA.bzl"
571 )));
572 assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from(
573 "metadata.bzl"
574 )));
575 assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from("METADATA")));
576 }
577
578 #[test]
579 fn test_check_rule_name_ending() {
580 assert!(check_rule_name_ending("android_binary"));
581 assert!(check_rule_name_ending("android_library"));
582 assert!(check_rule_name_ending("java_binary"));
583 assert!(!check_rule_name_ending("filegroup"));
584 }
585}
586
587crate::register_parser!(
588 "Buck build file and METADATA.bzl",
589 &["**/BUCK", "**/METADATA.bzl"],
590 "buck",
591 "",
592 Some("https://buck.build/"),
593);