Skip to main content

provenant/parsers/
chef.rs

1//! Parser for Chef cookbook metadata files (JSON and Ruby).
2//!
3//! Extracts package metadata, dependencies, and maintainer information from
4//! Chef cookbook metadata files used by the Chef configuration management tool.
5//!
6//! # Supported Formats
7//! - metadata.json (Chef cookbook metadata in JSON format)
8//! - metadata.rb (Chef cookbook metadata in Ruby DSL format)
9//!
10//! # Key Features
11//! - Maintainer party extraction from maintainer/maintainer_email fields
12//! - Dependency extraction from both `dependencies` and `depends` fields (merged)
13//! - URL construction for Chef Supermarket (download, homepage, API)
14//! - dist-info guard to prevent false positives with Python wheel metadata.json
15//!
16//! # Implementation Notes
17//! - JSON parser uses serde_json for JSON parsing
18//! - Ruby parser uses line-based token extraction (not a full Ruby parser)
19//! - Description from `description` or fallback to `long_description`
20//! - Graceful error handling: logs warnings and returns default on parse failure
21//! - IO.read(...) expressions in Ruby files are skipped (cannot evaluate Ruby code)
22
23use std::collections::HashMap;
24use std::fs::{self, File};
25use std::io::{BufRead, BufReader};
26use std::path::Path;
27use std::sync::LazyLock;
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use regex::Regex;
32use serde_json::Value;
33
34use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
35
36use super::PackageParser;
37use super::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, read_file_to_string, truncate_field};
38
39const FIELD_NAME: &str = "name";
40const FIELD_VERSION: &str = "version";
41const FIELD_DESCRIPTION: &str = "description";
42const FIELD_LONG_DESCRIPTION: &str = "long_description";
43const FIELD_LICENSE: &str = "license";
44const FIELD_MAINTAINER: &str = "maintainer";
45const FIELD_MAINTAINER_EMAIL: &str = "maintainer_email";
46const FIELD_SOURCE_URL: &str = "source_url";
47const FIELD_ISSUES_URL: &str = "issues_url";
48const FIELD_DEPENDENCIES: &str = "dependencies";
49const FIELD_DEPENDS: &str = "depends";
50
51static RE_FIELD: LazyLock<Regex> =
52    LazyLock::new(|| Regex::new(r#"^\s*(\w+)\s+['"](.+?)['"]"#).expect("valid regex"));
53static RE_DEPENDS: LazyLock<Regex> = LazyLock::new(|| {
54    Regex::new(r#"^\s*depends\s+['"](.+?)['"](?:\s*,\s*['"](.+?)['"])?"#).expect("valid regex")
55});
56static RE_IO_READ: LazyLock<Regex> =
57    LazyLock::new(|| Regex::new(r"IO\.read\(").expect("valid regex"));
58
59struct ChefPackageFields {
60    datasource_id: DatasourceId,
61    name: Option<String>,
62    version: Option<String>,
63    description: Option<String>,
64    extracted_license_statement: Option<String>,
65    maintainer_name: Option<String>,
66    maintainer_email: Option<String>,
67    code_view_url: Option<String>,
68    bug_tracking_url: Option<String>,
69    deps: HashMap<String, Option<String>>,
70}
71
72/// Chef metadata.json parser for Chef cookbook manifests.
73///
74/// Extracts metadata from Chef cookbook metadata.json files, including
75/// dependencies from both `dependencies` and `depends` fields.
76pub struct ChefMetadataJsonParser;
77
78impl PackageParser for ChefMetadataJsonParser {
79    const PACKAGE_TYPE: PackageType = PackageType::Chef;
80
81    fn is_match(path: &Path) -> bool {
82        if path.file_name().is_some_and(|name| name == "metadata.json") {
83            // Check parent directory doesn't end with "dist-info"
84            // to prevent false positives with Python wheel metadata.json files
85            if let Some(parent) = path.parent()
86                && let Some(parent_name) = parent.file_name().and_then(|n| n.to_str())
87            {
88                return !parent_name.ends_with("dist-info");
89            }
90            return true;
91        }
92        false
93    }
94
95    fn extract_packages(path: &Path) -> Vec<PackageData> {
96        let json_content = match read_json_file(path) {
97            Ok(content) => content,
98            Err(e) => {
99                warn!("Failed to read metadata.json at {:?}: {}", path, e);
100                return vec![default_package_data(DatasourceId::ChefCookbookMetadataJson)];
101            }
102        };
103
104        let name = json_content
105            .get(FIELD_NAME)
106            .and_then(|v| v.as_str())
107            .map(|s| s.trim().to_string())
108            .filter(|s| !s.is_empty())
109            .map(truncate_field);
110
111        let version = json_content
112            .get(FIELD_VERSION)
113            .and_then(|v| v.as_str())
114            .map(|s| s.trim().to_string())
115            .filter(|s| !s.is_empty())
116            .map(truncate_field);
117
118        let description = extract_description(&json_content).map(truncate_field);
119
120        let extracted_license_statement = json_content
121            .get(FIELD_LICENSE)
122            .and_then(|v| v.as_str())
123            .map(|s| s.trim().to_string())
124            .filter(|s| !s.is_empty())
125            .map(truncate_field);
126
127        let maintainer_name = json_content
128            .get(FIELD_MAINTAINER)
129            .and_then(|v| v.as_str())
130            .map(|s| s.trim().to_string())
131            .filter(|s| !s.is_empty())
132            .map(truncate_field);
133
134        let maintainer_email = json_content
135            .get(FIELD_MAINTAINER_EMAIL)
136            .and_then(|v| v.as_str())
137            .map(|s| s.trim().to_string())
138            .filter(|s| !s.is_empty())
139            .map(truncate_field);
140
141        let code_view_url = json_content
142            .get(FIELD_SOURCE_URL)
143            .and_then(|v| v.as_str())
144            .map(|s| s.trim().to_string())
145            .filter(|s| !s.is_empty())
146            .map(truncate_field);
147
148        let bug_tracking_url = json_content
149            .get(FIELD_ISSUES_URL)
150            .and_then(|v| v.as_str())
151            .map(|s| s.trim().to_string())
152            .filter(|s| !s.is_empty())
153            .map(truncate_field);
154
155        let mut deps: HashMap<String, Option<String>> = HashMap::new();
156
157        if let Some(deps_obj) = json_content
158            .get(FIELD_DEPENDENCIES)
159            .and_then(|v| v.as_object())
160        {
161            for (dep_name, dep_version) in deps_obj.iter().take(MAX_ITERATION_COUNT) {
162                let version_constraint = dep_version
163                    .as_str()
164                    .map(|s| s.trim().to_string())
165                    .filter(|s| !s.is_empty())
166                    .map(truncate_field);
167                deps.insert(
168                    truncate_field(dep_name.trim().to_string()),
169                    version_constraint,
170                );
171            }
172        }
173
174        if let Some(depends_obj) = json_content.get(FIELD_DEPENDS).and_then(|v| v.as_object()) {
175            for (dep_name, dep_version) in depends_obj.iter().take(MAX_ITERATION_COUNT) {
176                let version_constraint = dep_version
177                    .as_str()
178                    .map(|s| s.trim().to_string())
179                    .filter(|s| !s.is_empty())
180                    .map(truncate_field);
181                deps.insert(
182                    truncate_field(dep_name.trim().to_string()),
183                    version_constraint,
184                );
185            }
186        }
187
188        vec![build_package(ChefPackageFields {
189            datasource_id: DatasourceId::ChefCookbookMetadataJson,
190            name,
191            version,
192            description,
193            extracted_license_statement,
194            maintainer_name,
195            maintainer_email,
196            code_view_url,
197            bug_tracking_url,
198            deps,
199        })]
200    }
201}
202
203fn read_json_file(path: &Path) -> Result<Value, String> {
204    let contents = read_file_to_string(path, None).map_err(|e| e.to_string())?;
205    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
206}
207
208fn default_package_data(datasource_id: DatasourceId) -> PackageData {
209    PackageData {
210        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
211        datasource_id: Some(datasource_id),
212        ..Default::default()
213    }
214}
215
216fn extract_description(json: &Value) -> Option<String> {
217    // Try description first, then long_description
218    json.get(FIELD_DESCRIPTION)
219        .and_then(|v| v.as_str())
220        .map(|s| s.trim().to_string())
221        .filter(|s| !s.is_empty())
222        .or_else(|| {
223            json.get(FIELD_LONG_DESCRIPTION)
224                .and_then(|v| v.as_str())
225                .map(|s| s.trim().to_string())
226                .filter(|s| !s.is_empty())
227        })
228}
229
230/// Chef metadata.rb parser for Chef cookbook manifests in Ruby DSL format.
231///
232/// Uses line-based token extraction to parse Ruby DSL without executing Ruby code.
233pub struct ChefMetadataRbParser;
234
235impl PackageParser for ChefMetadataRbParser {
236    const PACKAGE_TYPE: PackageType = PackageType::Chef;
237
238    fn is_match(path: &Path) -> bool {
239        path.file_name().is_some_and(|name| name == "metadata.rb")
240    }
241
242    fn extract_packages(path: &Path) -> Vec<PackageData> {
243        if let Ok(metadata) = fs::metadata(path)
244            && metadata.len() > MAX_MANIFEST_SIZE
245        {
246            warn!(
247                "File {:?} is {} bytes, exceeding the {} byte limit",
248                path,
249                metadata.len(),
250                MAX_MANIFEST_SIZE
251            );
252            return vec![default_package_data(DatasourceId::ChefCookbookMetadataRb)];
253        }
254
255        let file = match File::open(path) {
256            Ok(f) => f,
257            Err(e) => {
258                warn!("Failed to open metadata.rb at {:?}: {}", path, e);
259                return vec![default_package_data(DatasourceId::ChefCookbookMetadataRb)];
260            }
261        };
262
263        let reader = BufReader::new(file);
264        let mut fields: HashMap<String, String> = HashMap::new();
265        let mut deps: HashMap<String, Option<String>> = HashMap::new();
266
267        for line in reader.lines().take(MAX_ITERATION_COUNT) {
268            let line = match line {
269                Ok(l) => l,
270                Err(e) => {
271                    warn!("Skipping non-UTF-8 line in {:?}: {}", path, e);
272                    continue;
273                }
274            };
275
276            let trimmed = line.trim();
277
278            if trimmed.is_empty() || trimmed.starts_with('#') {
279                continue;
280            }
281
282            if RE_IO_READ.is_match(&line) {
283                continue;
284            }
285
286            if let Some(caps) = RE_DEPENDS.captures(&line) {
287                let dep_name = caps
288                    .get(1)
289                    .map(|m| m.as_str().to_string())
290                    .unwrap_or_default();
291                let dep_version = caps.get(2).map(|m| m.as_str().to_string());
292                if !dep_name.is_empty() {
293                    deps.insert(dep_name, dep_version);
294                }
295                continue;
296            }
297
298            if let Some(caps) = RE_FIELD.captures(&line) {
299                let key = caps
300                    .get(1)
301                    .map(|m| m.as_str().to_string())
302                    .unwrap_or_default();
303                let value = caps
304                    .get(2)
305                    .map(|m| m.as_str().to_string())
306                    .unwrap_or_default();
307
308                if !key.is_empty() && !value.is_empty() {
309                    match key.as_str() {
310                        "name" | "version" | "description" | "long_description" | "license"
311                        | "maintainer" | "maintainer_email" | "source_url" | "issues_url" => {
312                            fields.insert(key, value);
313                        }
314                        _ => {}
315                    }
316                }
317            }
318        }
319
320        let name = fields
321            .get("name")
322            .map(|s| s.trim().to_string())
323            .filter(|s| !s.is_empty())
324            .map(truncate_field);
325
326        let version = fields
327            .get("version")
328            .map(|s| s.trim().to_string())
329            .filter(|s| !s.is_empty())
330            .map(truncate_field);
331
332        let description = fields
333            .get("description")
334            .map(|s| s.trim().to_string())
335            .filter(|s| !s.is_empty())
336            .map(truncate_field)
337            .or_else(|| {
338                fields
339                    .get("long_description")
340                    .map(|s| s.trim().to_string())
341                    .filter(|s| !s.is_empty())
342                    .map(truncate_field)
343            });
344
345        let extracted_license_statement = fields
346            .get("license")
347            .map(|s| s.trim().to_string())
348            .filter(|s| !s.is_empty())
349            .map(truncate_field);
350
351        let maintainer_name = fields
352            .get("maintainer")
353            .map(|s| s.trim().to_string())
354            .filter(|s| !s.is_empty())
355            .map(truncate_field);
356
357        let maintainer_email = fields
358            .get("maintainer_email")
359            .map(|s| s.trim().to_string())
360            .filter(|s| !s.is_empty())
361            .map(truncate_field);
362
363        let code_view_url = fields
364            .get("source_url")
365            .map(|s| s.trim().to_string())
366            .filter(|s| !s.is_empty())
367            .map(truncate_field);
368
369        let bug_tracking_url = fields
370            .get("issues_url")
371            .map(|s| s.trim().to_string())
372            .filter(|s| !s.is_empty())
373            .map(truncate_field);
374
375        vec![build_package(ChefPackageFields {
376            datasource_id: DatasourceId::ChefCookbookMetadataRb,
377            name,
378            version,
379            description,
380            extracted_license_statement,
381            maintainer_name,
382            maintainer_email,
383            code_view_url,
384            bug_tracking_url,
385            deps,
386        })]
387    }
388}
389
390fn build_package(fields: ChefPackageFields) -> PackageData {
391    let ChefPackageFields {
392        datasource_id,
393        name,
394        version,
395        description,
396        extracted_license_statement,
397        maintainer_name,
398        maintainer_email,
399        code_view_url,
400        bug_tracking_url,
401        deps,
402    } = fields;
403    let parties = if maintainer_name.is_some() || maintainer_email.is_some() {
404        vec![Party {
405            r#type: None,
406            role: Some(truncate_field("maintainer".to_string())),
407            name: maintainer_name.map(truncate_field),
408            email: maintainer_email.map(truncate_field),
409            url: None,
410            organization: None,
411            organization_url: None,
412            timezone: None,
413        }]
414    } else {
415        Vec::new()
416    };
417
418    let mut dependencies: Vec<Dependency> = deps
419        .into_iter()
420        .map(|(dep_name, version_constraint)| {
421            let purl = PackageUrl::new("chef", &dep_name)
422                .map(|p| truncate_field(p.to_string()))
423                .ok();
424            Dependency {
425                purl,
426                extracted_requirement: version_constraint.map(truncate_field),
427                scope: Some(truncate_field("dependencies".to_string())),
428                is_runtime: Some(true),
429                is_optional: Some(false),
430                is_pinned: None,
431                is_direct: None,
432                resolved_package: None,
433                extra_data: None,
434            }
435        })
436        .collect();
437
438    dependencies.sort_by(|a, b| {
439        let name_a = a.purl.as_deref().unwrap_or("");
440        let name_b = b.purl.as_deref().unwrap_or("");
441        name_a.cmp(name_b)
442    });
443
444    let (download_url, repository_download_url, repository_homepage_url, api_data_url) =
445        if let (Some(n), Some(v)) = (&name, &version) {
446            let download = truncate_field(format!(
447                "https://supermarket.chef.io/cookbooks/{}/versions/{}/download",
448                n, v
449            ));
450            let homepage = truncate_field(format!(
451                "https://supermarket.chef.io/cookbooks/{}/versions/{}/",
452                n, v
453            ));
454            let api = truncate_field(format!(
455                "https://supermarket.chef.io/api/v1/cookbooks/{}/versions/{}",
456                n, v
457            ));
458            (
459                Some(download.clone()),
460                Some(download),
461                Some(homepage),
462                Some(api),
463            )
464        } else {
465            (None, None, None, None)
466        };
467
468    let purl = match (name.as_deref(), version.as_deref()) {
469        (Some(name), Some(version)) => PackageUrl::new("chef", name)
470            .map(|mut p| {
471                let _ = p.with_version(version);
472                truncate_field(p.to_string())
473            })
474            .ok(),
475        _ => None,
476    };
477
478    PackageData {
479        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
480        datasource_id: Some(datasource_id),
481        name,
482        version,
483        description,
484        extracted_license_statement,
485        parties,
486        code_view_url,
487        bug_tracking_url,
488        dependencies,
489        download_url,
490        repository_download_url,
491        repository_homepage_url,
492        api_data_url,
493        purl,
494        primary_language: Some(truncate_field("Ruby".to_string())),
495        ..Default::default()
496    }
497}
498
499crate::register_parser!(
500    "Chef cookbook metadata",
501    &["**/metadata.json", "**/metadata.rb"],
502    "chef",
503    "Ruby",
504    Some("https://docs.chef.io/config_rb_metadata/"),
505);