Skip to main content

provenant/parsers/
buck.rs

1//! Buck BUILD and METADATA.bzl parsers
2//!
3//! Extracts package metadata from Buck build system files using Starlark (Python-like) syntax.
4//!
5//! ## Features
6//! - **BuckBuildParser**: Parses BUCK files with multiple package support
7//! - **BuckMetadataBzlParser**: Parses METADATA.bzl dictionary assignments with package_url support
8//!
9//! ## Usage
10//! - `BuckBuildParser::extract_packages()` - Returns ALL packages from BUCK file
11//! - `BuckMetadataBzlParser::extract_first_package()` - Returns single package from METADATA.bzl
12//!
13//! ## Reference
14//! Python implementation: `reference/scancode-toolkit/src/packagedcode/build.py`
15//! - BuckPackageHandler (lines 310-325)
16//! - BuckMetadataBzlHandler (lines 328-432)
17
18use std::collections::HashMap;
19use std::path::Path;
20
21use log::warn;
22use packageurl::PackageUrl;
23use rustpython_parser::{Parse, ast};
24
25use crate::models::{DatasourceId, PackageData, PackageType, Party};
26
27use super::PackageParser;
28
29/// Parser for Buck BUCK files (build rules)
30pub struct BuckBuildParser;
31
32impl PackageParser for BuckBuildParser {
33    const PACKAGE_TYPE: PackageType = PackageType::Buck;
34
35    fn is_match(path: &Path) -> bool {
36        path.file_name()
37            .and_then(|name| name.to_str())
38            .is_some_and(|name| name == "BUCK")
39    }
40
41    fn extract_packages(path: &Path) -> Vec<PackageData> {
42        match parse_buck_build(path) {
43            Ok(packages) if !packages.is_empty() => packages,
44            Ok(_) => vec![fallback_package_data(path)],
45            Err(e) => {
46                warn!("Failed to parse Buck BUCK file {:?}: {}", path, e);
47                vec![fallback_package_data(path)]
48            }
49        }
50    }
51}
52
53/// Parser for Buck METADATA.bzl files (metadata dictionaries)
54pub struct BuckMetadataBzlParser;
55
56impl PackageParser for BuckMetadataBzlParser {
57    const PACKAGE_TYPE: PackageType = PackageType::Buck;
58
59    fn is_match(path: &Path) -> bool {
60        path.file_name()
61            .and_then(|name| name.to_str())
62            .is_some_and(|name| name == "METADATA.bzl")
63    }
64
65    fn extract_packages(path: &Path) -> Vec<PackageData> {
66        vec![match parse_metadata_bzl(path) {
67            Ok(pkg) => pkg,
68            Err(e) => {
69                warn!("Failed to parse Buck METADATA.bzl {:?}: {}", path, e);
70                PackageData {
71                    package_type: Some(Self::PACKAGE_TYPE),
72                    datasource_id: Some(DatasourceId::BuckMetadata),
73                    ..Default::default()
74                }
75            }
76        }]
77    }
78}
79
80/// Parse a Buck BUCK file (same logic as Bazel BUILD)
81fn parse_buck_build(path: &Path) -> Result<Vec<PackageData>, String> {
82    let content =
83        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
84
85    let module = ast::Suite::parse(&content, "<BUCK>")
86        .map_err(|e| format!("Failed to parse Starlark: {}", e))?;
87
88    let mut packages = Vec::new();
89
90    for statement in &module {
91        if let Some(package_data) = extract_from_statement(statement) {
92            packages.push(package_data);
93        }
94    }
95
96    Ok(packages)
97}
98
99/// Parse a Buck METADATA.bzl file
100fn parse_metadata_bzl(path: &Path) -> Result<PackageData, String> {
101    let content =
102        std::fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
103
104    let module = ast::Suite::parse(&content, "<METADATA.bzl>")
105        .map_err(|e| format!("Failed to parse Starlark: {}", e))?;
106
107    // Look for METADATA = {...} assignment
108    for statement in &module {
109        if let ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) = statement {
110            // Check if assigning to variable named "METADATA"
111            for target in targets {
112                if let ast::Expr::Name(ast::ExprName { id, .. }) = target
113                    && id.as_str() == "METADATA"
114                {
115                    // Extract dictionary contents
116                    if let ast::Expr::Dict(dict) = value.as_ref() {
117                        return Ok(extract_metadata_dict(dict));
118                    }
119                }
120            }
121        }
122    }
123
124    // No METADATA found
125    Ok(PackageData {
126        package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
127        datasource_id: Some(DatasourceId::BuckMetadata),
128        ..Default::default()
129    })
130}
131
132/// Extract metadata from a dictionary AST node
133fn extract_metadata_dict(dict: &ast::ExprDict) -> PackageData {
134    let mut fields: HashMap<String, MetadataValue> = HashMap::new();
135
136    for (key, value) in dict.keys.iter().zip(dict.values.iter()) {
137        // Extract key name
138        let key_name = match key {
139            Some(ast::Expr::Constant(ast::ExprConstant { value, .. })) => {
140                if let ast::Constant::Str(s) = value {
141                    s.clone()
142                } else {
143                    continue;
144                }
145            }
146            _ => continue,
147        };
148
149        // Extract value
150        let metadata_value = match value {
151            ast::Expr::Constant(ast::ExprConstant {
152                value: ast::Constant::Str(s),
153                ..
154            }) => MetadataValue::String(s.clone()),
155            ast::Expr::Constant(_) => continue,
156            ast::Expr::List(ast::ExprList { elts, .. }) => {
157                let mut list_values = Vec::new();
158                for elt in elts {
159                    if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = elt
160                        && let ast::Constant::Str(s) = value
161                    {
162                        list_values.push(s.clone());
163                    }
164                }
165                MetadataValue::List(list_values)
166            }
167            _ => continue,
168        };
169
170        fields.insert(key_name, metadata_value);
171    }
172
173    build_package_from_metadata(fields)
174}
175
176/// Metadata value types
177enum MetadataValue {
178    String(String),
179    List(Vec<String>),
180}
181
182/// Build PackageData from extracted metadata fields
183fn build_package_from_metadata(fields: HashMap<String, MetadataValue>) -> PackageData {
184    let mut pkg = PackageData {
185        package_type: Some(BuckMetadataBzlParser::PACKAGE_TYPE),
186        datasource_id: Some(DatasourceId::BuckMetadata),
187        ..Default::default()
188    };
189
190    // Extract name
191    if let Some(MetadataValue::String(s)) = fields.get("name") {
192        pkg.name = Some(s.clone());
193    }
194
195    // Extract version
196    if let Some(MetadataValue::String(s)) = fields.get("version") {
197        pkg.version = Some(s.clone());
198    }
199
200    // Extract package type (upstream_type or package_type)
201    if let Some(MetadataValue::String(s)) = fields.get("upstream_type") {
202        pkg.package_type = s.parse::<PackageType>().ok();
203    } else if let Some(MetadataValue::String(s)) = fields.get("package_type") {
204        pkg.package_type = s.parse::<PackageType>().ok();
205    }
206
207    // Extract licenses (licenses or license_expression)
208    if let Some(MetadataValue::List(licenses)) = fields.get("licenses") {
209        pkg.extracted_license_statement = Some(licenses.join(", "));
210    } else if let Some(MetadataValue::String(s)) = fields.get("license_expression") {
211        pkg.extracted_license_statement = Some(s.clone());
212    }
213
214    // Extract homepage (upstream_address or homepage_url)
215    if let Some(MetadataValue::String(s)) = fields.get("upstream_address") {
216        pkg.homepage_url = Some(s.clone());
217    } else if let Some(MetadataValue::String(s)) = fields.get("homepage_url") {
218        pkg.homepage_url = Some(s.clone());
219    }
220
221    // Extract download_url
222    if let Some(MetadataValue::String(s)) = fields.get("download_url") {
223        pkg.download_url = Some(s.clone());
224    }
225
226    // Extract vcs_url
227    if let Some(MetadataValue::String(s)) = fields.get("vcs_url") {
228        pkg.vcs_url = Some(s.clone());
229    }
230
231    // Extract sha1 (download_archive_sha1)
232    if let Some(MetadataValue::String(s)) = fields.get("download_archive_sha1") {
233        pkg.sha1 = Some(s.clone());
234    }
235
236    // Extract maintainers
237    if let Some(MetadataValue::List(maintainers)) = fields.get("maintainers") {
238        pkg.parties = maintainers
239            .iter()
240            .map(|name| Party {
241                r#type: Some("organization".to_string()),
242                name: Some(name.clone()),
243                role: Some("maintainer".to_string()),
244                email: None,
245                url: None,
246                organization: None,
247                organization_url: None,
248                timezone: None,
249            })
250            .collect();
251    }
252
253    // Extract extra_data fields
254    let mut extra_data = HashMap::new();
255    if let Some(MetadataValue::String(s)) = fields.get("vcs_commit_hash") {
256        extra_data.insert(
257            "vcs_commit_hash".to_string(),
258            serde_json::Value::String(s.clone()),
259        );
260    }
261    if let Some(MetadataValue::String(s)) = fields.get("upstream_hash") {
262        extra_data.insert(
263            "upstream_hash".to_string(),
264            serde_json::Value::String(s.clone()),
265        );
266    }
267    if !extra_data.is_empty() {
268        pkg.extra_data = Some(extra_data);
269    }
270
271    // Parse package_url if present and update package fields
272    if let Some(MetadataValue::String(purl_str)) = fields.get("package_url")
273        && let Ok(purl) = purl_str.parse::<PackageUrl>()
274    {
275        // Override package fields with purl data
276        pkg.package_type = purl.ty().parse::<PackageType>().ok();
277        if let Some(ns) = purl.namespace() {
278            pkg.namespace = Some(ns.to_string());
279        }
280        pkg.name = Some(purl.name().to_string());
281        if let Some(ver) = purl.version() {
282            pkg.version = Some(ver.to_string());
283        }
284        // Qualifiers
285        if !purl.qualifiers().is_empty() {
286            let quals: HashMap<String, String> = purl
287                .qualifiers()
288                .iter()
289                .map(|(k, v)| (k.to_string(), v.to_string()))
290                .collect();
291            pkg.qualifiers = Some(quals);
292        }
293        // Subpath
294        if let Some(sp) = purl.subpath() {
295            pkg.subpath = Some(sp.to_string());
296        }
297    }
298
299    pkg
300}
301
302/// Extract package data from a single AST statement (for BUCK files)
303fn extract_from_statement(statement: &ast::Stmt) -> Option<PackageData> {
304    match statement {
305        ast::Stmt::Expr(ast::StmtExpr { value, .. }) => {
306            if let ast::Expr::Call(call) = value.as_ref() {
307                return extract_from_call(call);
308            }
309        }
310        ast::Stmt::Assign(ast::StmtAssign { value, .. }) => {
311            if let ast::Expr::Call(call) = value.as_ref() {
312                return extract_from_call(call);
313            }
314        }
315        _ => {}
316    }
317    None
318}
319
320/// Extract package data from a function call (for BUCK files)
321fn extract_from_call(call: &ast::ExprCall) -> Option<PackageData> {
322    let rule_name = match call.func.as_ref() {
323        ast::Expr::Name(ast::ExprName { id, .. }) => id.as_str(),
324        _ => return None,
325    };
326
327    if !check_rule_name_ending(rule_name) {
328        return None;
329    }
330
331    let mut name: Option<String> = None;
332    let mut licenses: Option<Vec<String>> = None;
333
334    for keyword in &call.keywords {
335        let arg_name = keyword.arg.as_ref()?.as_str();
336
337        match arg_name {
338            "name" => {
339                if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = &keyword.value
340                    && let ast::Constant::Str(s) = value
341                {
342                    name = Some(s.clone());
343                }
344            }
345            "licenses" => {
346                if let ast::Expr::List(ast::ExprList { elts, .. }) = &keyword.value {
347                    let mut license_list = Vec::new();
348                    for elt in elts {
349                        if let ast::Expr::Constant(ast::ExprConstant { value, .. }) = elt
350                            && let ast::Constant::Str(s) = value
351                        {
352                            license_list.push(s.clone());
353                        }
354                    }
355                    if !license_list.is_empty() {
356                        licenses = Some(license_list);
357                    }
358                }
359            }
360            _ => {}
361        }
362    }
363
364    let package_name = name?;
365
366    Some(PackageData {
367        package_type: Some(BuckBuildParser::PACKAGE_TYPE),
368        name: Some(package_name),
369        extracted_license_statement: licenses.map(|l| l.join(", ")),
370        datasource_id: Some(DatasourceId::BuckFile),
371        ..Default::default()
372    })
373}
374
375/// Check if rule name ends with "binary" or "library"
376fn check_rule_name_ending(rule_name: &str) -> bool {
377    rule_name.ends_with("binary") || rule_name.ends_with("library")
378}
379
380/// Create fallback package data using parent directory name
381fn fallback_package_data(path: &Path) -> PackageData {
382    let name = path
383        .parent()
384        .and_then(|p| p.file_name())
385        .and_then(|n| n.to_str())
386        .map(|s| s.to_string());
387
388    PackageData {
389        package_type: Some(BuckBuildParser::PACKAGE_TYPE),
390        name,
391        datasource_id: Some(DatasourceId::BuckFile),
392        ..Default::default()
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use std::path::PathBuf;
400
401    #[test]
402    fn test_buck_build_is_match() {
403        assert!(BuckBuildParser::is_match(&PathBuf::from("BUCK")));
404        assert!(BuckBuildParser::is_match(&PathBuf::from("path/to/BUCK")));
405        assert!(!BuckBuildParser::is_match(&PathBuf::from("BUILD")));
406        assert!(!BuckBuildParser::is_match(&PathBuf::from("buck")));
407    }
408
409    #[test]
410    fn test_metadata_bzl_is_match() {
411        assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
412            "METADATA.bzl"
413        )));
414        assert!(BuckMetadataBzlParser::is_match(&PathBuf::from(
415            "path/to/METADATA.bzl"
416        )));
417        assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from(
418            "metadata.bzl"
419        )));
420        assert!(!BuckMetadataBzlParser::is_match(&PathBuf::from("METADATA")));
421    }
422
423    #[test]
424    fn test_check_rule_name_ending() {
425        assert!(check_rule_name_ending("android_binary"));
426        assert!(check_rule_name_ending("android_library"));
427        assert!(check_rule_name_ending("java_binary"));
428        assert!(!check_rule_name_ending("filegroup"));
429    }
430}
431
432crate::register_parser!(
433    "Buck build file and METADATA.bzl",
434    &["**/BUCK", "**/METADATA.bzl"],
435    "buck",
436    "",
437    Some("https://buck.build/"),
438);