Skip to main content

provenant/parsers/
buck.rs

1//! Buck BUILD and METADATA.bzl parsers
2//!
3//! Extracts package metadata from Buck build system files using Starlark (Python-like) syntax.
4//!
5//! ## Features
6//! - **BuckBuildParser**: Parses BUCK files with multiple package support
7//! - **BuckMetadataBzlParser**: Parses METADATA.bzl dictionary assignments with package_url support
8//!
9//! ## Usage
10//! - `BuckBuildParser::extract_packages()` - Returns ALL packages from BUCK file
11//! - `BuckMetadataBzlParser::extract_first_package()` - Returns single package from METADATA.bzl
12//!
13//! ## Reference
14//! Python implementation: `reference/scancode-toolkit/src/packagedcode/build.py`
15//! - BuckPackageHandler (lines 310-325)
16//! - BuckMetadataBzlHandler (lines 328-432)
17
18use std::collections::HashMap;
19use std::path::Path;
20
21use crate::parser_warn as warn;
22use packageurl::PackageUrl;
23use rustpython_parser::{Parse, ast};
24
25use crate::models::{DatasourceId, PackageData, PackageType, Party};
26
27use super::PackageParser;
28
29/// Parser for Buck BUCK files (build rules)
30pub struct BuckBuildParser;
31
32impl PackageParser for BuckBuildParser {
33    const PACKAGE_TYPE: PackageType = PackageType::Buck;
34
35    fn is_match(path: &Path) -> bool {
36        path.file_name()
37            .and_then(|name| name.to_str())
38            .is_some_and(|name| name == "BUCK")
39    }
40
41    fn extract_packages(path: &Path) -> Vec<PackageData> {
42        match parse_buck_build(path) {
43            Ok(packages) if !packages.is_empty() => packages,
44            Ok(_) => vec![fallback_package_data(path)],
45            Err(e) => {
46                warn!("Failed to parse Buck BUCK file {:?}: {}", path, e);
47                vec![fallback_package_data(path)]
48            }
49        }
50    }
51}
52
53/// Parser for Buck METADATA.bzl files (metadata dictionaries)
54pub struct BuckMetadataBzlParser;
55
56impl PackageParser for BuckMetadataBzlParser {
57    const PACKAGE_TYPE: PackageType = PackageType::Buck;
58
59    fn is_match(path: &Path) -> bool {
60        path.file_name()
61            .and_then(|name| name.to_str())
62            .is_some_and(|name| name == "METADATA.bzl")
63    }
64
65    fn extract_packages(path: &Path) -> Vec<PackageData> {
66        vec![match parse_metadata_bzl(path) {
67            Ok(pkg) => pkg,
68            Err(e) => {
69                warn!("Failed to parse Buck METADATA.bzl {:?}: {}", path, e);
70                PackageData {
71                    package_type: Some(Self::PACKAGE_TYPE),
72                    datasource_id: Some(DatasourceId::BuckMetadata),
73                    ..Default::default()
74                }
75            }
76        }]
77    }
78}
79
80/// Parse a Buck BUCK file (same logic as Bazel BUILD)
81fn parse_buck_build(path: &Path) -> Result<Vec<PackageData>, String> {
82    let content =
83        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
84
85    let module = ast::Suite::parse(&content, "<BUCK>")
86        .map_err(|e| format!("Failed to parse Starlark: {}", e))?;
87
88    let mut packages = Vec::new();
89
90    for statement in &module {
91        if let Some(package_data) = extract_from_statement(statement) {
92            packages.push(package_data);
93        }
94    }
95
96    Ok(packages)
97}
98
99/// Parse a Buck METADATA.bzl file
100fn parse_metadata_bzl(path: &Path) -> Result<PackageData, String> {
101    let content =
102        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
103
104    let module = ast::Suite::parse(&content, "<METADATA.bzl>")
105        .map_err(|e| format!("Failed to parse Starlark: {}", e))?;
106
107    // Look for METADATA = {...} assignment
108    for statement in &module {
109        if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = statement {
110            // Check if assigning to variable named "METADATA"
111            for target in targets {
112                if let ast::Expr::Name(ast::ExprName { id, .. }) = target
113                    && id.as_str() == "METADATA"
114                {
115                    // Extract dictionary contents
116                    if let ast::Expr::Dict(dict) = value.as_ref() {
117                        return Ok(extract_metadata_dict(dict));
118                    }
119                }
120            }
121        }
122    }
123
124    // No METADATA found
125    Ok(PackageData {
126        package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
127        datasource_id: Some(DatasourceId::BuckMetadata),
128        ..Default::default()
129    })
130}
131
132/// Extract metadata from a dictionary AST node
133fn extract_metadata_dict(dict: &ast::ExprDict) -> PackageData {
134    let mut fields: HashMap<String, MetadataValue> = HashMap::new();
135
136    for (key, value) in dict.keys.iter().zip(dict.values.iter()) {
137        // Extract key name
138        let key_name = match key {
139            Some(ast::Expr::Constant(ast::ExprConstant { value, .. })) => {
140                if let ast::Constant::Str(s) = value {
141                    s.clone()
142                } else {
143                    continue;
144                }
145            }
146            _ => continue,
147        };
148
149        // Extract value
150        let metadata_value = match value {
151            ast::Expr::Constant(ast::ExprConstant {
152                value: ast::Constant::Str(s),
153                ..
154            }) => MetadataValue::String(s.clone()),
155            ast::Expr::Constant(_) => continue,
156            ast::Expr::List(ast::ExprList { elts, .. }) => {
157                let mut list_values = Vec::new();
158                for elt in elts {
159                    if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = elt
160                        && let ast::Constant::Str(s) = value
161                    {
162                        list_values.push(s.clone());
163                    }
164                }
165                MetadataValue::List(list_values)
166            }
167            _ => continue,
168        };
169
170        fields.insert(key_name, metadata_value);
171    }
172
173    build_package_from_metadata(fields)
174}
175
176/// Metadata value types
177enum MetadataValue {
178    String(String),
179    List(Vec<String>),
180}
181
182fn split_buck_license_values(values: &[String]) -> (Vec<String>, Vec<String>) {
183    let mut statements = Vec::new();
184    let mut references = Vec::new();
185
186    for value in values {
187        if is_probable_local_license_reference(value) {
188            references.push(value.clone());
189        } else {
190            statements.push(value.clone());
191        }
192    }
193
194    (statements, references)
195}
196
197fn is_probable_local_license_reference(value: &str) -> bool {
198    let trimmed = value.trim();
199    if trimmed.is_empty() {
200        return false;
201    }
202
203    let lower = trimmed.to_ascii_lowercase();
204    lower.contains('/')
205        || lower.contains('\\')
206        || lower.starts_with("license")
207        || lower.starts_with("licence")
208        || lower.starts_with("copying")
209        || lower.starts_with("notice")
210        || lower.starts_with("copyright")
211        || lower.ends_with(".txt")
212        || lower.ends_with(".md")
213        || lower.ends_with(".rst")
214        || lower.ends_with(".html")
215}
216
217fn insert_license_reference_extra_data(
218    extra_data: &mut HashMap<String, serde_json::Value>,
219    references: &[String],
220) {
221    match references {
222        [] => {}
223        [reference] => {
224            extra_data.insert(
225                "license_file".to_string(),
226                serde_json::Value::String(reference.clone()),
227            );
228        }
229        _ => {
230            extra_data.insert(
231                "license_files".to_string(),
232                serde_json::Value::Array(
233                    references
234                        .iter()
235                        .cloned()
236                        .map(serde_json::Value::String)
237                        .collect(),
238                ),
239            );
240        }
241    }
242}
243
244/// Build PackageData from extracted metadata fields
245fn build_package_from_metadata(fields: HashMap<String, MetadataValue>) -> PackageData {
246    let mut pkg = PackageData {
247        package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
248        datasource_id: Some(DatasourceId::BuckMetadata),
249        ..Default::default()
250    };
251    let mut license_references = Vec::new();
252
253    // Extract name
254    if let Some(MetadataValue::String(s)) = fields.get("name") {
255        pkg.name = Some(s.clone());
256    }
257
258    // Extract version
259    if let Some(MetadataValue::String(s)) = fields.get("version") {
260        pkg.version = Some(s.clone());
261    }
262
263    // Extract package type (upstream_type or package_type)
264    if let Some(MetadataValue::String(s)) = fields.get("upstream_type") {
265        pkg.package_type = s.parse::<PackageType>().ok();
266    } else if let Some(MetadataValue::String(s)) = fields.get("package_type") {
267        pkg.package_type = s.parse::<PackageType>().ok();
268    }
269
270    // Extract licenses (licenses or license_expression)
271    if let Some(MetadataValue::List(licenses)) = fields.get("licenses") {
272        let (license_statements, references) = split_buck_license_values(licenses);
273        license_references = references;
274        let extracted_license_statement = if !license_statements.is_empty() {
275            Some(license_statements.join(", "))
276        } else if !license_references.is_empty() {
277            Some(license_references.join(", "))
278        } else {
279            None
280        };
281        pkg.extracted_license_statement = extracted_license_statement;
282    } else if let Some(MetadataValue::String(s)) = fields.get("license_expression") {
283        pkg.extracted_license_statement = Some(s.clone());
284    }
285
286    // Extract homepage (upstream_address or homepage_url)
287    if let Some(MetadataValue::String(s)) = fields.get("upstream_address") {
288        pkg.homepage_url = Some(s.clone());
289    } else if let Some(MetadataValue::String(s)) = fields.get("homepage_url") {
290        pkg.homepage_url = Some(s.clone());
291    }
292
293    // Extract download_url
294    if let Some(MetadataValue::String(s)) = fields.get("download_url") {
295        pkg.download_url = Some(s.clone());
296    }
297
298    // Extract vcs_url
299    if let Some(MetadataValue::String(s)) = fields.get("vcs_url") {
300        pkg.vcs_url = Some(s.clone());
301    }
302
303    // Extract sha1 (download_archive_sha1)
304    if let Some(MetadataValue::String(s)) = fields.get("download_archive_sha1") {
305        pkg.sha1 = Some(s.clone());
306    }
307
308    // Extract maintainers
309    if let Some(MetadataValue::List(maintainers)) = fields.get("maintainers") {
310        pkg.parties = maintainers
311            .iter()
312            .map(|name| Party {
313                r#type: Some("organization".to_string()),
314                name: Some(name.clone()),
315                role: Some("maintainer".to_string()),
316                email: None,
317                url: None,
318                organization: None,
319                organization_url: None,
320                timezone: None,
321            })
322            .collect();
323    }
324
325    // Extract extra_data fields
326    let mut extra_data = HashMap::new();
327    if let Some(MetadataValue::String(s)) = fields.get("vcs_commit_hash") {
328        extra_data.insert(
329            "vcs_commit_hash".to_string(),
330            serde_json::Value::String(s.clone()),
331        );
332    }
333    if let Some(MetadataValue::String(s)) = fields.get("upstream_hash") {
334        extra_data.insert(
335            "upstream_hash".to_string(),
336            serde_json::Value::String(s.clone()),
337        );
338    }
339    insert_license_reference_extra_data(&mut extra_data, &license_references);
340    if !extra_data.is_empty() {
341        pkg.extra_data = Some(extra_data);
342    }
343
344    // Parse package_url if present and update package fields
345    if let Some(MetadataValue::String(purl_str)) = fields.get("package_url")
346        && let Ok(purl) = purl_str.parse::<PackageUrl>()
347    {
348        // Override package fields with purl data
349        pkg.package_type = purl.ty().parse::<PackageType>().ok();
350        if let Some(ns) = purl.namespace() {
351            pkg.namespace = Some(ns.to_string());
352        }
353        pkg.name = Some(purl.name().to_string());
354        if let Some(ver) = purl.version() {
355            pkg.version = Some(ver.to_string());
356        }
357        // Qualifiers
358        if !purl.qualifiers().is_empty() {
359            let quals: HashMap<String, String> = purl
360                .qualifiers()
361                .iter()
362                .map(|(k, v)| (k.to_string(), v.to_string()))
363                .collect();
364            pkg.qualifiers = Some(quals);
365        }
366        // Subpath
367        if let Some(sp) = purl.subpath() {
368            pkg.subpath = Some(sp.to_string());
369        }
370    }
371
372    pkg
373}
374
375/// Extract package data from a single AST statement (for BUCK files)
376fn extract_from_statement(statement: &ast::Stmt) -> Option<PackageData> {
377    match statement {
378        ast::Stmt::Expr(ast::StmtExpr { value, .. }) => {
379            if let ast::Expr::Call(call) = value.as_ref() {
380                return extract_from_call(call);
381            }
382        }
383        ast::Stmt::Assign(ast::StmtAssign { value, .. }) => {
384            if let ast::Expr::Call(call) = value.as_ref() {
385                return extract_from_call(call);
386            }
387        }
388        _ => {}
389    }
390    None
391}
392
393/// Extract package data from a function call (for BUCK files)
394fn extract_from_call(call: &ast::ExprCall) -> Option<PackageData> {
395    let rule_name = match call.func.as_ref() {
396        ast::Expr::Name(ast::ExprName { id, .. }) => id.as_str(),
397        _ => return None,
398    };
399
400    if !check_rule_name_ending(rule_name) {
401        return None;
402    }
403
404    let mut name: Option<String> = None;
405    let mut licenses: Option<Vec<String>> = None;
406
407    for keyword in &call.keywords {
408        let arg_name = keyword.arg.as_ref()?.as_str();
409
410        match arg_name {
411            "name" => {
412                if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = &keyword.value
413                    && let ast::Constant::Str(s) = value
414                {
415                    name = Some(s.clone());
416                }
417            }
418            "licenses" => {
419                if let ast::Expr::List(ast::ExprList { elts, .. }) = &keyword.value {
420                    let mut license_list = Vec::new();
421                    for elt in elts {
422                        if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = elt
423                            && let ast::Constant::Str(s) = value
424                        {
425                            license_list.push(s.clone());
426                        }
427                    }
428                    if !license_list.is_empty() {
429                        licenses = Some(license_list);
430                    }
431                }
432            }
433            _ => {}
434        }
435    }
436
437    let package_name = name?;
438    let (license_statements, license_references) = licenses
439        .as_deref()
440        .map(split_buck_license_values)
441        .unwrap_or_default();
442    let extracted_license_statement = if !license_statements.is_empty() {
443        Some(license_statements.join(", "))
444    } else if !license_references.is_empty() {
445        Some(license_references.join(", "))
446    } else {
447        None
448    };
449    let mut extra_data = HashMap::new();
450    insert_license_reference_extra_data(&mut extra_data, &license_references);
451
452    Some(PackageData {
453        package_type: Some(BuckBuildParser::PACKAGE_TYPE),
454        name: Some(package_name),
455        extracted_license_statement,
456        extra_data: (!extra_data.is_empty()).then_some(extra_data),
457        datasource_id: Some(DatasourceId::BuckFile),
458        ..Default::default()
459    })
460}
461
462/// Check if rule name ends with "binary" or "library"
463fn check_rule_name_ending(rule_name: &str) -> bool {
464    rule_name.ends_with("binary") || rule_name.ends_with("library")
465}
466
467/// Create fallback package data using parent directory name
468fn fallback_package_data(path: &Path) -> PackageData {
469    let name = path
470        .parent()
471        .and_then(|p| p.file_name())
472        .and_then(|n| n.to_str())
473        .map(|s| s.to_string());
474
475    PackageData {
476        package_type: Some(BuckBuildParser::PACKAGE_TYPE),
477        name,
478        datasource_id: Some(DatasourceId::BuckFile),
479        ..Default::default()
480    }
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486    use std::path::PathBuf;
487
488    #[test]
489    fn test_buck_build_is_match() {
490        assert!(BuckBuildParser::is_match(&PathBuf::from("BUCK")));
491        assert!(BuckBuildParser::is_match(&PathBuf::from("path/to/BUCK")));
492        assert!(!BuckBuildParser::is_match(&PathBuf::from("BUILD")));
493        assert!(!BuckBuildParser::is_match(&PathBuf::from("buck")));
494    }
495
496    #[test]
497    fn test_metadata_bzl_is_match() {
498        assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
499            "METADATA.bzl"
500        )));
501        assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
502            "path/to/METADATA.bzl"
503        )));
504        assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from(
505            "metadata.bzl"
506        )));
507        assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from("METADATA")));
508    }
509
510    #[test]
511    fn test_check_rule_name_ending() {
512        assert!(check_rule_name_ending("android_binary"));
513        assert!(check_rule_name_ending("android_library"));
514        assert!(check_rule_name_ending("java_binary"));
515        assert!(!check_rule_name_ending("filegroup"));
516    }
517}
518
519crate::register_parser!(
520    "Buck build file and METADATA.bzl",
521    &["**/BUCK", "**/METADATA.bzl"],
522    "buck",
523    "",
524    Some("https://buck.build/"),
525);