Skip to main content

provenant/parsers/
buck.rs

1//! Buck BUILD and METADATA.bzl parsers
2//!
3//! Extracts package metadata from Buck build system files using Starlark (Python-like) syntax.
4//!
5//! ## Features
6//! - **BuckBuildParser**: Parses BUCK files with multiple package support
7//! - **BuckMetadataBzlParser**: Parses METADATA.bzl dictionary assignments with package_url support
8//!
9//! ## Usage
10//! - `BuckBuildParser::extract_packages()` - Returns ALL packages from BUCK file
11//! - `BuckMetadataBzlParser::extract_first_package()` - Returns single package from METADATA.bzl
12//!
13//! ## Reference
14//! Python implementation: `reference/scancode-toolkit/src/packagedcode/build.py`
15//! - BuckPackageHandler (lines 310-325)
16//! - BuckMetadataBzlHandler (lines 328-432)
17
18use std::collections::HashMap;
19use std::path::Path;
20
21use crate::parser_warn as warn;
22use packageurl::PackageUrl;
23use starlark_syntax::syntax::ast;
24use starlark_syntax::syntax::module::AstModuleFields;
25use starlark_syntax::syntax::{AstModule, Dialect};
26
27use crate::models::{DatasourceId, PackageData, PackageType, Party, Sha1Digest};
28
29use super::PackageParser;
30
31type StarlarkCallArgs = ast::CallArgsP<ast::AstNoPayload>;
32
33struct StarlarkCall<'a> {
34    func: &'a ast::AstExpr,
35    args: &'a StarlarkCallArgs,
36}
37
38/// Parser for Buck BUCK files (build rules)
39pub struct BuckBuildParser;
40
41impl PackageParser for BuckBuildParser {
42    const PACKAGE_TYPE: PackageType = PackageType::Buck;
43
44    fn is_match(path: &Path) -> bool {
45        path.file_name()
46            .and_then(|name| name.to_str())
47            .is_some_and(|name| name == "BUCK")
48    }
49
50    fn extract_packages(path: &Path) -> Vec<PackageData> {
51        match parse_buck_build(path) {
52            Ok(packages) if !packages.is_empty() => packages,
53            Ok(_) => vec![fallback_package_data(path)],
54            Err(e) => {
55                warn!("Failed to parse Buck BUCK file {:?}: {}", path, e);
56                vec![fallback_package_data(path)]
57            }
58        }
59    }
60}
61
62/// Parser for Buck METADATA.bzl files (metadata dictionaries)
63pub struct BuckMetadataBzlParser;
64
65impl PackageParser for BuckMetadataBzlParser {
66    const PACKAGE_TYPE: PackageType = PackageType::Buck;
67
68    fn is_match(path: &Path) -> bool {
69        path.file_name()
70            .and_then(|name| name.to_str())
71            .is_some_and(|name| name == "METADATA.bzl")
72    }
73
74    fn extract_packages(path: &Path) -> Vec<PackageData> {
75        vec![match parse_metadata_bzl(path) {
76            Ok(pkg) => pkg,
77            Err(e) => {
78                warn!("Failed to parse Buck METADATA.bzl {:?}: {}", path, e);
79                PackageData {
80                    package_type: Some(Self::PACKAGE_TYPE),
81                    datasource_id: Some(DatasourceId::BuckMetadata),
82                    ..Default::default()
83                }
84            }
85        }]
86    }
87}
88
89/// Parse a Buck BUCK file (same logic as Bazel BUILD)
90fn parse_buck_build(path: &Path) -> Result<Vec<PackageData>, String> {
91    let content =
92        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
93    let module = parse_starlark_module("<BUCK>", content)?;
94
95    let mut packages = Vec::new();
96
97    for statement in top_level_statements(&module) {
98        if let Some(package_data) = extract_build_package_from_statement(statement) {
99            packages.push(package_data);
100        }
101    }
102
103    Ok(packages)
104}
105
106/// Parse a Buck METADATA.bzl file
107fn parse_metadata_bzl(path: &Path) -> Result<PackageData, String> {
108    let content =
109        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
110    let module = parse_starlark_module("<METADATA.bzl>", content)?;
111
112    // Look for METADATA = {...} assignment
113    for statement in top_level_statements(&module) {
114        if let Some(dict) = extract_metadata_assignment_dict(statement) {
115            return Ok(extract_metadata_dict(dict));
116        }
117    }
118
119    // No METADATA found
120    Ok(PackageData {
121        package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
122        datasource_id: Some(DatasourceId::BuckMetadata),
123        ..Default::default()
124    })
125}
126
127fn parse_starlark_module(filename: &str, content: String) -> Result<AstModule, String> {
128    let dialect = Dialect {
129        enable_top_level_stmt: true,
130        ..Dialect::Standard
131    };
132    AstModule::parse(filename, content, &dialect).map_err(|error| error.to_string())
133}
134
135fn top_level_statements(module: &AstModule) -> &[ast::AstStmt] {
136    match &module.statement().node {
137        ast::StmtP::Statements(statements) => statements,
138        _ => std::slice::from_ref(module.statement()),
139    }
140}
141
142fn extract_metadata_assignment_dict(
143    statement: &ast::AstStmt,
144) -> Option<&[(ast::AstExpr, ast::AstExpr)]> {
145    let ast::StmtP::Assign(assign) = &statement.node else {
146        return None;
147    };
148    let ast::AssignTargetP::Identifier(target) = &assign.lhs.node else {
149        return None;
150    };
151    if target.node.ident != "METADATA" {
152        return None;
153    }
154    match &assign.rhs.node {
155        ast::ExprP::Dict(items) => Some(items.as_slice()),
156        _ => None,
157    }
158}
159
160/// Extract metadata from a dictionary AST node
161fn extract_metadata_dict(dict: &[(ast::AstExpr, ast::AstExpr)]) -> PackageData {
162    let mut fields: HashMap<String, MetadataValue> = HashMap::new();
163
164    for (key, value) in dict {
165        let Some(key_name) = expr_as_string(key) else {
166            continue;
167        };
168        let Some(metadata_value) = metadata_value_from_expr(value) else {
169            continue;
170        };
171
172        fields.insert(key_name, metadata_value);
173    }
174
175    build_package_from_metadata(fields)
176}
177
178/// Metadata value types
179enum MetadataValue {
180    String(String),
181    List(Vec<String>),
182}
183
184fn split_buck_license_values(values: &[String]) -> (Vec<String>, Vec<String>) {
185    let mut statements = Vec::new();
186    let mut references = Vec::new();
187
188    for value in values {
189        if is_probable_local_license_reference(value) {
190            references.push(value.clone());
191        } else {
192            statements.push(value.clone());
193        }
194    }
195
196    (statements, references)
197}
198
199fn is_probable_local_license_reference(value: &str) -> bool {
200    let trimmed = value.trim();
201    if trimmed.is_empty() {
202        return false;
203    }
204
205    let lower = trimmed.to_ascii_lowercase();
206    lower.contains('/')
207        || lower.contains('\\')
208        || lower.starts_with("license")
209        || lower.starts_with("licence")
210        || lower.starts_with("copying")
211        || lower.starts_with("notice")
212        || lower.starts_with("copyright")
213        || lower.ends_with(".txt")
214        || lower.ends_with(".md")
215        || lower.ends_with(".rst")
216        || lower.ends_with(".html")
217}
218
219fn insert_license_reference_extra_data(
220    extra_data: &mut HashMap<String, serde_json::Value>,
221    references: &[String],
222) {
223    match references {
224        [] => {}
225        [reference] => {
226            extra_data.insert(
227                "license_file".to_string(),
228                serde_json::Value::String(reference.clone()),
229            );
230        }
231        _ => {
232            extra_data.insert(
233                "license_files".to_string(),
234                serde_json::Value::Array(
235                    references
236                        .iter()
237                        .cloned()
238                        .map(serde_json::Value::String)
239                        .collect(),
240                ),
241            );
242        }
243    }
244}
245
246/// Build PackageData from extracted metadata fields
247fn build_package_from_metadata(fields: HashMap<String, MetadataValue>) -> PackageData {
248    let mut pkg = PackageData {
249        package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
250        datasource_id: Some(DatasourceId::BuckMetadata),
251        ..Default::default()
252    };
253    let mut license_references = Vec::new();
254
255    // Extract name
256    if let Some(MetadataValue::String(s)) = fields.get("name") {
257        pkg.name = Some(s.clone());
258    }
259
260    // Extract version
261    if let Some(MetadataValue::String(s)) = fields.get("version") {
262        pkg.version = Some(s.clone());
263    }
264
265    // Extract package type (upstream_type or package_type)
266    if let Some(MetadataValue::String(s)) = fields.get("upstream_type") {
267        pkg.package_type = s.parse::<PackageType>().ok();
268    } else if let Some(MetadataValue::String(s)) = fields.get("package_type") {
269        pkg.package_type = s.parse::<PackageType>().ok();
270    }
271
272    // Extract licenses (licenses or license_expression)
273    if let Some(MetadataValue::List(licenses)) = fields.get("licenses") {
274        let (license_statements, references) = split_buck_license_values(licenses);
275        license_references = references;
276        let extracted_license_statement = if !license_statements.is_empty() {
277            Some(license_statements.join(", "))
278        } else if !license_references.is_empty() {
279            Some(license_references.join(", "))
280        } else {
281            None
282        };
283        pkg.extracted_license_statement = extracted_license_statement;
284    } else if let Some(MetadataValue::String(s)) = fields.get("license_expression") {
285        pkg.extracted_license_statement = Some(s.clone());
286    }
287
288    // Extract homepage (upstream_address or homepage_url)
289    if let Some(MetadataValue::String(s)) = fields.get("upstream_address") {
290        pkg.homepage_url = Some(s.clone());
291    } else if let Some(MetadataValue::String(s)) = fields.get("homepage_url") {
292        pkg.homepage_url = Some(s.clone());
293    }
294
295    // Extract download_url
296    if let Some(MetadataValue::String(s)) = fields.get("download_url") {
297        pkg.download_url = Some(s.clone());
298    }
299
300    // Extract vcs_url
301    if let Some(MetadataValue::String(s)) = fields.get("vcs_url") {
302        pkg.vcs_url = Some(s.clone());
303    }
304
305    // Extract sha1 (download_archive_sha1)
306    if let Some(MetadataValue::String(s)) = fields.get("download_archive_sha1") {
307        pkg.sha1 = Sha1Digest::from_hex(s).ok();
308    }
309
310    // Extract maintainers
311    if let Some(MetadataValue::List(maintainers)) = fields.get("maintainers") {
312        pkg.parties = maintainers
313            .iter()
314            .map(|name| Party {
315                r#type: Some("organization".to_string()),
316                name: Some(name.clone()),
317                role: Some("maintainer".to_string()),
318                email: None,
319                url: None,
320                organization: None,
321                organization_url: None,
322                timezone: None,
323            })
324            .collect();
325    }
326
327    // Extract extra_data fields
328    let mut extra_data = HashMap::new();
329    if let Some(MetadataValue::String(s)) = fields.get("vcs_commit_hash") {
330        extra_data.insert(
331            "vcs_commit_hash".to_string(),
332            serde_json::Value::String(s.clone()),
333        );
334    }
335    if let Some(MetadataValue::String(s)) = fields.get("upstream_hash") {
336        extra_data.insert(
337            "upstream_hash".to_string(),
338            serde_json::Value::String(s.clone()),
339        );
340    }
341    insert_license_reference_extra_data(&mut extra_data, &license_references);
342    if !extra_data.is_empty() {
343        pkg.extra_data = Some(extra_data);
344    }
345
346    // Parse package_url if present and update package fields
347    if let Some(MetadataValue::String(purl_str)) = fields.get("package_url")
348        && let Ok(purl) = purl_str.parse::<PackageUrl>()
349    {
350        // Override package fields with purl data
351        pkg.package_type = purl.ty().parse::<PackageType>().ok();
352        if let Some(ns) = purl.namespace() {
353            pkg.namespace = Some(ns.to_string());
354        }
355        pkg.name = Some(purl.name().to_string());
356        if let Some(ver) = purl.version() {
357            pkg.version = Some(ver.to_string());
358        }
359        // Qualifiers
360        if !purl.qualifiers().is_empty() {
361            let quals: HashMap<String, String> = purl
362                .qualifiers()
363                .iter()
364                .map(|(k, v)| (k.to_string(), v.to_string()))
365                .collect();
366            pkg.qualifiers = Some(quals);
367        }
368        // Subpath
369        if let Some(sp) = purl.subpath() {
370            pkg.subpath = Some(sp.to_string());
371        }
372    }
373
374    pkg
375}
376
377fn metadata_value_from_expr(expr: &ast::AstExpr) -> Option<MetadataValue> {
378    if let Some(string) = expr_as_string(expr) {
379        return Some(MetadataValue::String(string));
380    }
381
382    let items = match &expr.node {
383        ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
384        _ => return None,
385    };
386    let values: Vec<_> = items.iter().filter_map(expr_as_string).collect();
387    (!values.is_empty()).then_some(MetadataValue::List(values))
388}
389
390/// Extract package data from a single AST statement (for BUCK files)
391fn extract_build_package_from_statement(statement: &ast::AstStmt) -> Option<PackageData> {
392    let call = extract_call(statement)?;
393    let rule_name = match &call.func.node {
394        ast::ExprP::Identifier(identifier) => identifier.node.ident.as_str(),
395        _ => return None,
396    };
397
398    if !check_rule_name_ending(rule_name) {
399        return None;
400    }
401
402    let name = extract_named_kwarg_string(&call, "name");
403    let licenses = extract_named_kwarg_string_list(&call, "licenses");
404
405    let package_name = name?;
406    let (license_statements, license_references) = licenses
407        .as_deref()
408        .map(split_buck_license_values)
409        .unwrap_or_default();
410    let extracted_license_statement = if !license_statements.is_empty() {
411        Some(license_statements.join(", "))
412    } else if !license_references.is_empty() {
413        Some(license_references.join(", "))
414    } else {
415        None
416    };
417    let mut extra_data = HashMap::new();
418    insert_license_reference_extra_data(&mut extra_data, &license_references);
419
420    Some(PackageData {
421        package_type: Some(BuckBuildParser::PACKAGE_TYPE),
422        name: Some(package_name),
423        extracted_license_statement,
424        extra_data: (!extra_data.is_empty()).then_some(extra_data),
425        datasource_id: Some(DatasourceId::BuckFile),
426        ..Default::default()
427    })
428}
429
430fn extract_call(statement: &ast::AstStmt) -> Option<StarlarkCall<'_>> {
431    match &statement.node {
432        ast::StmtP::Expression(expr) => extract_call_expr(expr),
433        ast::StmtP::Assign(assign) => extract_call_expr(&assign.rhs),
434        _ => None,
435    }
436}
437
438fn extract_call_expr(expr: &ast::AstExpr) -> Option<StarlarkCall<'_>> {
439    match &expr.node {
440        ast::ExprP::Call(func, args) => Some(StarlarkCall { func, args }),
441        _ => None,
442    }
443}
444
445fn extract_named_kwarg<'a>(call: &'a StarlarkCall<'_>, key: &str) -> Option<&'a ast::AstExpr> {
446    call.args
447        .args
448        .iter()
449        .find_map(|argument| match &argument.node {
450            ast::ArgumentP::Named(name, value) if name.node == key => Some(value),
451            _ => None,
452        })
453}
454
455fn extract_named_kwarg_string(call: &StarlarkCall<'_>, key: &str) -> Option<String> {
456    extract_named_kwarg(call, key).and_then(expr_as_string)
457}
458
459fn extract_named_kwarg_string_list(call: &StarlarkCall<'_>, key: &str) -> Option<Vec<String>> {
460    let expr = extract_named_kwarg(call, key)?;
461    let items = match &expr.node {
462        ast::ExprP::List(items) | ast::ExprP::Tuple(items) => items,
463        _ => return None,
464    };
465    let values: Vec<_> = items.iter().filter_map(expr_as_string).collect();
466    (!values.is_empty()).then_some(values)
467}
468
469fn expr_as_string(expr: &ast::AstExpr) -> Option<String> {
470    match &expr.node {
471        ast::ExprP::Literal(ast::AstLiteral::String(value)) => Some(value.node.clone()),
472        _ => None,
473    }
474}
475
476/// Check if rule name ends with "binary" or "library"
477fn check_rule_name_ending(rule_name: &str) -> bool {
478    rule_name.ends_with("binary") || rule_name.ends_with("library")
479}
480
481/// Create fallback package data using parent directory name
482fn fallback_package_data(path: &Path) -> PackageData {
483    let name = path
484        .parent()
485        .and_then(|p| p.file_name())
486        .and_then(|n| n.to_str())
487        .map(|s| s.to_string());
488
489    PackageData {
490        package_type: Some(BuckBuildParser::PACKAGE_TYPE),
491        name,
492        datasource_id: Some(DatasourceId::BuckFile),
493        ..Default::default()
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use std::path::PathBuf;
501
502    #[test]
503    fn test_buck_build_is_match() {
504        assert!(BuckBuildParser::is_match(&PathBuf::from("BUCK")));
505        assert!(BuckBuildParser::is_match(&PathBuf::from("path/to/BUCK")));
506        assert!(!BuckBuildParser::is_match(&PathBuf::from("BUILD")));
507        assert!(!BuckBuildParser::is_match(&PathBuf::from("buck")));
508    }
509
510    #[test]
511    fn test_metadata_bzl_is_match() {
512        assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
513            "METADATA.bzl"
514        )));
515        assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
516            "path/to/METADATA.bzl"
517        )));
518        assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from(
519            "metadata.bzl"
520        )));
521        assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from("METADATA")));
522    }
523
524    #[test]
525    fn test_check_rule_name_ending() {
526        assert!(check_rule_name_ending("android_binary"));
527        assert!(check_rule_name_ending("android_library"));
528        assert!(check_rule_name_ending("java_binary"));
529        assert!(!check_rule_name_ending("filegroup"));
530    }
531}
532
533crate::register_parser!(
534    "Buck build file and METADATA.bzl",
535    &["**/BUCK", "**/METADATA.bzl"],
536    "buck",
537    "",
538    Some("https://buck.build/"),
539);