Skip to main content

provenant/parsers/
chef.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Chef cookbook metadata files (JSON and Ruby).
5//!
6//! Extracts package metadata, dependencies, and maintainer information from
7//! Chef cookbook metadata files used by the Chef configuration management tool.
8//!
9//! # Supported Formats
10//! - metadata.json (Chef cookbook metadata in JSON format)
11//! - metadata.rb (Chef cookbook metadata in Ruby DSL format)
12//!
13//! # Key Features
14//! - Maintainer party extraction from maintainer/maintainer_email fields
15//! - Dependency extraction from both `dependencies` and `depends` fields (merged)
16//! - URL construction for Chef Supermarket (download, homepage, API)
17//! - dist-info guard to prevent false positives with Python wheel metadata.json
18//!
19//! # Implementation Notes
20//! - JSON parser uses serde_json for JSON parsing
21//! - Ruby parser uses line-based token extraction (not a full Ruby parser)
22//! - Description from `description` or fallback to `long_description`
23//! - Graceful error handling: logs warnings and returns default on parse failure
24//! - IO.read(...) expressions in Ruby files are skipped (cannot evaluate Ruby code)
25
26use std::collections::HashMap;
27use std::fs::{self, File};
28use std::io::{BufRead, BufReader};
29use std::path::Path;
30use std::sync::LazyLock;
31
32use crate::parser_warn as warn;
33use packageurl::PackageUrl;
34use regex::Regex;
35use serde_json::Value;
36
37use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
38
39use super::PackageParser;
40use super::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, read_file_to_string, truncate_field};
41
42const FIELD_NAME: &str = "name";
43const FIELD_VERSION: &str = "version";
44const FIELD_DESCRIPTION: &str = "description";
45const FIELD_LONG_DESCRIPTION: &str = "long_description";
46const FIELD_LICENSE: &str = "license";
47const FIELD_MAINTAINER: &str = "maintainer";
48const FIELD_MAINTAINER_EMAIL: &str = "maintainer_email";
49const FIELD_SOURCE_URL: &str = "source_url";
50const FIELD_ISSUES_URL: &str = "issues_url";
51const FIELD_DEPENDENCIES: &str = "dependencies";
52const FIELD_DEPENDS: &str = "depends";
53
54static RE_FIELD: LazyLock<Regex> =
55    LazyLock::new(|| Regex::new(r#"^\s*(\w+)\s+['"](.+?)['"]"#).expect("valid regex"));
56static RE_DEPENDS: LazyLock<Regex> = LazyLock::new(|| {
57    Regex::new(r#"^\s*depends\s+['"](.+?)['"](?:\s*,\s*['"](.+?)['"])?"#).expect("valid regex")
58});
59static RE_IO_READ: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r"IO\.read\(").expect("valid regex"));
61
62struct ChefPackageFields {
63    datasource_id: DatasourceId,
64    name: Option<String>,
65    version: Option<String>,
66    description: Option<String>,
67    extracted_license_statement: Option<String>,
68    maintainer_name: Option<String>,
69    maintainer_email: Option<String>,
70    code_view_url: Option<String>,
71    bug_tracking_url: Option<String>,
72    deps: HashMap<String, Option<String>>,
73}
74
75/// Chef metadata.json parser for Chef cookbook manifests.
76///
77/// Extracts metadata from Chef cookbook metadata.json files, including
78/// dependencies from both `dependencies` and `depends` fields.
79pub struct ChefMetadataJsonParser;
80
81impl PackageParser for ChefMetadataJsonParser {
82    const PACKAGE_TYPE: PackageType = PackageType::Chef;
83
84    fn is_match(path: &Path) -> bool {
85        if path.file_name().is_some_and(|name| name == "metadata.json") {
86            // Check parent directory doesn't end with "dist-info"
87            // to prevent false positives with Python wheel metadata.json files
88            if let Some(parent) = path.parent()
89                && let Some(parent_name) = parent.file_name().and_then(|n| n.to_str())
90            {
91                return !parent_name.ends_with("dist-info");
92            }
93            return true;
94        }
95        false
96    }
97
98    fn extract_packages(path: &Path) -> Vec<PackageData> {
99        let json_content = match read_json_file(path) {
100            Ok(content) => content,
101            Err(e) => {
102                warn!("Failed to read metadata.json at {:?}: {}", path, e);
103                return vec![default_package_data(DatasourceId::ChefCookbookMetadataJson)];
104            }
105        };
106
107        let name = json_content
108            .get(FIELD_NAME)
109            .and_then(|v| v.as_str())
110            .map(|s| s.trim().to_string())
111            .filter(|s| !s.is_empty())
112            .map(truncate_field);
113
114        let version = json_content
115            .get(FIELD_VERSION)
116            .and_then(|v| v.as_str())
117            .map(|s| s.trim().to_string())
118            .filter(|s| !s.is_empty())
119            .map(truncate_field);
120
121        let description = extract_description(&json_content).map(truncate_field);
122
123        let extracted_license_statement = json_content
124            .get(FIELD_LICENSE)
125            .and_then(|v| v.as_str())
126            .map(|s| s.trim().to_string())
127            .filter(|s| !s.is_empty())
128            .map(truncate_field);
129
130        let maintainer_name = json_content
131            .get(FIELD_MAINTAINER)
132            .and_then(|v| v.as_str())
133            .map(|s| s.trim().to_string())
134            .filter(|s| !s.is_empty())
135            .map(truncate_field);
136
137        let maintainer_email = json_content
138            .get(FIELD_MAINTAINER_EMAIL)
139            .and_then(|v| v.as_str())
140            .map(|s| s.trim().to_string())
141            .filter(|s| !s.is_empty())
142            .map(truncate_field);
143
144        let code_view_url = json_content
145            .get(FIELD_SOURCE_URL)
146            .and_then(|v| v.as_str())
147            .map(|s| s.trim().to_string())
148            .filter(|s| !s.is_empty())
149            .map(truncate_field);
150
151        let bug_tracking_url = json_content
152            .get(FIELD_ISSUES_URL)
153            .and_then(|v| v.as_str())
154            .map(|s| s.trim().to_string())
155            .filter(|s| !s.is_empty())
156            .map(truncate_field);
157
158        let mut deps: HashMap<String, Option<String>> = HashMap::new();
159
160        if let Some(deps_obj) = json_content
161            .get(FIELD_DEPENDENCIES)
162            .and_then(|v| v.as_object())
163        {
164            for (dep_name, dep_version) in deps_obj.iter().take(MAX_ITERATION_COUNT) {
165                let version_constraint = dep_version
166                    .as_str()
167                    .map(|s| s.trim().to_string())
168                    .filter(|s| !s.is_empty())
169                    .map(truncate_field);
170                deps.insert(
171                    truncate_field(dep_name.trim().to_string()),
172                    version_constraint,
173                );
174            }
175        }
176
177        if let Some(depends_obj) = json_content.get(FIELD_DEPENDS).and_then(|v| v.as_object()) {
178            for (dep_name, dep_version) in depends_obj.iter().take(MAX_ITERATION_COUNT) {
179                let version_constraint = dep_version
180                    .as_str()
181                    .map(|s| s.trim().to_string())
182                    .filter(|s| !s.is_empty())
183                    .map(truncate_field);
184                deps.insert(
185                    truncate_field(dep_name.trim().to_string()),
186                    version_constraint,
187                );
188            }
189        }
190
191        vec![build_package(ChefPackageFields {
192            datasource_id: DatasourceId::ChefCookbookMetadataJson,
193            name,
194            version,
195            description,
196            extracted_license_statement,
197            maintainer_name,
198            maintainer_email,
199            code_view_url,
200            bug_tracking_url,
201            deps,
202        })]
203    }
204}
205
206fn read_json_file(path: &Path) -> Result<Value, String> {
207    let contents = read_file_to_string(path, None).map_err(|e| e.to_string())?;
208    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
209}
210
211fn default_package_data(datasource_id: DatasourceId) -> PackageData {
212    PackageData {
213        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
214        datasource_id: Some(datasource_id),
215        ..Default::default()
216    }
217}
218
219fn extract_description(json: &Value) -> Option<String> {
220    // Try description first, then long_description
221    json.get(FIELD_DESCRIPTION)
222        .and_then(|v| v.as_str())
223        .map(|s| s.trim().to_string())
224        .filter(|s| !s.is_empty())
225        .or_else(|| {
226            json.get(FIELD_LONG_DESCRIPTION)
227                .and_then(|v| v.as_str())
228                .map(|s| s.trim().to_string())
229                .filter(|s| !s.is_empty())
230        })
231}
232
233/// Chef metadata.rb parser for Chef cookbook manifests in Ruby DSL format.
234///
235/// Uses line-based token extraction to parse Ruby DSL without executing Ruby code.
236pub struct ChefMetadataRbParser;
237
238impl PackageParser for ChefMetadataRbParser {
239    const PACKAGE_TYPE: PackageType = PackageType::Chef;
240
241    fn is_match(path: &Path) -> bool {
242        path.file_name().is_some_and(|name| name == "metadata.rb")
243    }
244
245    fn extract_packages(path: &Path) -> Vec<PackageData> {
246        if let Ok(metadata) = fs::metadata(path)
247            && metadata.len() > MAX_MANIFEST_SIZE
248        {
249            warn!(
250                "File {:?} is {} bytes, exceeding the {} byte limit",
251                path,
252                metadata.len(),
253                MAX_MANIFEST_SIZE
254            );
255            return vec![default_package_data(DatasourceId::ChefCookbookMetadataRb)];
256        }
257
258        let file = match File::open(path) {
259            Ok(f) => f,
260            Err(e) => {
261                warn!("Failed to open metadata.rb at {:?}: {}", path, e);
262                return vec![default_package_data(DatasourceId::ChefCookbookMetadataRb)];
263            }
264        };
265
266        let reader = BufReader::new(file);
267        let mut fields: HashMap<String, String> = HashMap::new();
268        let mut deps: HashMap<String, Option<String>> = HashMap::new();
269
270        for line in reader.lines().take(MAX_ITERATION_COUNT) {
271            let line = match line {
272                Ok(l) => l,
273                Err(e) => {
274                    warn!("Skipping non-UTF-8 line in {:?}: {}", path, e);
275                    continue;
276                }
277            };
278
279            let trimmed = line.trim();
280
281            if trimmed.is_empty() || trimmed.starts_with('#') {
282                continue;
283            }
284
285            if RE_IO_READ.is_match(&line) {
286                continue;
287            }
288
289            if let Some(caps) = RE_DEPENDS.captures(&line) {
290                let dep_name = caps
291                    .get(1)
292                    .map(|m| m.as_str().to_string())
293                    .unwrap_or_default();
294                let dep_version = caps.get(2).map(|m| m.as_str().to_string());
295                if !dep_name.is_empty() {
296                    deps.insert(dep_name, dep_version);
297                }
298                continue;
299            }
300
301            if let Some(caps) = RE_FIELD.captures(&line) {
302                let key = caps
303                    .get(1)
304                    .map(|m| m.as_str().to_string())
305                    .unwrap_or_default();
306                let value = caps
307                    .get(2)
308                    .map(|m| m.as_str().to_string())
309                    .unwrap_or_default();
310
311                if !key.is_empty() && !value.is_empty() {
312                    match key.as_str() {
313                        "name" | "version" | "description" | "long_description" | "license"
314                        | "maintainer" | "maintainer_email" | "source_url" | "issues_url" => {
315                            fields.insert(key, value);
316                        }
317                        _ => {}
318                    }
319                }
320            }
321        }
322
323        let name = fields
324            .get("name")
325            .map(|s| s.trim().to_string())
326            .filter(|s| !s.is_empty())
327            .map(truncate_field);
328
329        let version = fields
330            .get("version")
331            .map(|s| s.trim().to_string())
332            .filter(|s| !s.is_empty())
333            .map(truncate_field);
334
335        let description = fields
336            .get("description")
337            .map(|s| s.trim().to_string())
338            .filter(|s| !s.is_empty())
339            .map(truncate_field)
340            .or_else(|| {
341                fields
342                    .get("long_description")
343                    .map(|s| s.trim().to_string())
344                    .filter(|s| !s.is_empty())
345                    .map(truncate_field)
346            });
347
348        let extracted_license_statement = fields
349            .get("license")
350            .map(|s| s.trim().to_string())
351            .filter(|s| !s.is_empty())
352            .map(truncate_field);
353
354        let maintainer_name = fields
355            .get("maintainer")
356            .map(|s| s.trim().to_string())
357            .filter(|s| !s.is_empty())
358            .map(truncate_field);
359
360        let maintainer_email = fields
361            .get("maintainer_email")
362            .map(|s| s.trim().to_string())
363            .filter(|s| !s.is_empty())
364            .map(truncate_field);
365
366        let code_view_url = fields
367            .get("source_url")
368            .map(|s| s.trim().to_string())
369            .filter(|s| !s.is_empty())
370            .map(truncate_field);
371
372        let bug_tracking_url = fields
373            .get("issues_url")
374            .map(|s| s.trim().to_string())
375            .filter(|s| !s.is_empty())
376            .map(truncate_field);
377
378        vec![build_package(ChefPackageFields {
379            datasource_id: DatasourceId::ChefCookbookMetadataRb,
380            name,
381            version,
382            description,
383            extracted_license_statement,
384            maintainer_name,
385            maintainer_email,
386            code_view_url,
387            bug_tracking_url,
388            deps,
389        })]
390    }
391}
392
393fn build_package(fields: ChefPackageFields) -> PackageData {
394    let ChefPackageFields {
395        datasource_id,
396        name,
397        version,
398        description,
399        extracted_license_statement,
400        maintainer_name,
401        maintainer_email,
402        code_view_url,
403        bug_tracking_url,
404        deps,
405    } = fields;
406    let parties = if maintainer_name.is_some() || maintainer_email.is_some() {
407        vec![Party {
408            r#type: None,
409            role: Some(truncate_field("maintainer".to_string())),
410            name: maintainer_name.map(truncate_field),
411            email: maintainer_email.map(truncate_field),
412            url: None,
413            organization: None,
414            organization_url: None,
415            timezone: None,
416        }]
417    } else {
418        Vec::new()
419    };
420
421    let mut dependencies: Vec<Dependency> = deps
422        .into_iter()
423        .map(|(dep_name, version_constraint)| {
424            let purl = PackageUrl::new("chef", &dep_name)
425                .map(|p| truncate_field(p.to_string()))
426                .ok();
427            Dependency {
428                purl,
429                extracted_requirement: version_constraint.map(truncate_field),
430                scope: Some(truncate_field("dependencies".to_string())),
431                is_runtime: Some(true),
432                is_optional: Some(false),
433                is_pinned: None,
434                is_direct: None,
435                resolved_package: None,
436                extra_data: None,
437            }
438        })
439        .collect();
440
441    dependencies.sort_by(|a, b| {
442        let name_a = a.purl.as_deref().unwrap_or("");
443        let name_b = b.purl.as_deref().unwrap_or("");
444        name_a.cmp(name_b)
445    });
446
447    let (download_url, repository_download_url, repository_homepage_url, api_data_url) =
448        if let (Some(n), Some(v)) = (&name, &version) {
449            let download = truncate_field(format!(
450                "https://supermarket.chef.io/cookbooks/{}/versions/{}/download",
451                n, v
452            ));
453            let homepage = truncate_field(format!(
454                "https://supermarket.chef.io/cookbooks/{}/versions/{}/",
455                n, v
456            ));
457            let api = truncate_field(format!(
458                "https://supermarket.chef.io/api/v1/cookbooks/{}/versions/{}",
459                n, v
460            ));
461            (
462                Some(download.clone()),
463                Some(download),
464                Some(homepage),
465                Some(api),
466            )
467        } else {
468            (None, None, None, None)
469        };
470
471    let purl = match (name.as_deref(), version.as_deref()) {
472        (Some(name), Some(version)) => PackageUrl::new("chef", name)
473            .map(|mut p| {
474                let _ = p.with_version(version);
475                truncate_field(p.to_string())
476            })
477            .ok(),
478        _ => None,
479    };
480
481    PackageData {
482        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
483        datasource_id: Some(datasource_id),
484        name,
485        version,
486        description,
487        extracted_license_statement,
488        parties,
489        code_view_url,
490        bug_tracking_url,
491        dependencies,
492        download_url,
493        repository_download_url,
494        repository_homepage_url,
495        api_data_url,
496        purl,
497        primary_language: Some(truncate_field("Ruby".to_string())),
498        ..Default::default()
499    }
500}
501
502crate::register_parser!(
503    "Chef cookbook metadata",
504    &["**/metadata.json", "**/metadata.rb"],
505    "chef",
506    "Ruby",
507    Some("https://docs.chef.io/config_rb_metadata/"),
508);