Skip to main content

provenant/parsers/
chef.rs

1//! Parser for Chef cookbook metadata files (JSON and Ruby).
2//!
3//! Extracts package metadata, dependencies, and maintainer information from
4//! Chef cookbook metadata files used by the Chef configuration management tool.
5//!
6//! # Supported Formats
7//! - metadata.json (Chef cookbook metadata in JSON format)
8//! - metadata.rb (Chef cookbook metadata in Ruby DSL format)
9//!
10//! # Key Features
11//! - Maintainer party extraction from maintainer/maintainer_email fields
12//! - Dependency extraction from both `dependencies` and `depends` fields (merged)
13//! - URL construction for Chef Supermarket (download, homepage, API)
14//! - dist-info guard to prevent false positives with Python wheel metadata.json
15//!
16//! # Implementation Notes
17//! - JSON parser uses serde_json for JSON parsing
18//! - Ruby parser uses line-based token extraction (not a full Ruby parser)
19//! - Description from `description` or fallback to `long_description`
20//! - Graceful error handling: logs warnings and returns default on parse failure
21//! - IO.read(...) expressions in Ruby files are skipped (cannot evaluate Ruby code)
22
23use std::collections::HashMap;
24use std::fs::File;
25use std::io::{BufRead, BufReader, Read};
26use std::path::Path;
27
28use log::warn;
29use packageurl::PackageUrl;
30use regex::Regex;
31use serde_json::Value;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
34
35use super::PackageParser;
36
37const FIELD_NAME: &str = "name";
38const FIELD_VERSION: &str = "version";
39const FIELD_DESCRIPTION: &str = "description";
40const FIELD_LONG_DESCRIPTION: &str = "long_description";
41const FIELD_LICENSE: &str = "license";
42const FIELD_MAINTAINER: &str = "maintainer";
43const FIELD_MAINTAINER_EMAIL: &str = "maintainer_email";
44const FIELD_SOURCE_URL: &str = "source_url";
45const FIELD_ISSUES_URL: &str = "issues_url";
46const FIELD_DEPENDENCIES: &str = "dependencies";
47const FIELD_DEPENDS: &str = "depends";
48
49struct ChefPackageFields {
50    datasource_id: DatasourceId,
51    name: Option<String>,
52    version: Option<String>,
53    description: Option<String>,
54    extracted_license_statement: Option<String>,
55    maintainer_name: Option<String>,
56    maintainer_email: Option<String>,
57    code_view_url: Option<String>,
58    bug_tracking_url: Option<String>,
59    deps: HashMap<String, Option<String>>,
60}
61
62/// Chef metadata.json parser for Chef cookbook manifests.
63///
64/// Extracts metadata from Chef cookbook metadata.json files, including
65/// dependencies from both `dependencies` and `depends` fields.
66pub struct ChefMetadataJsonParser;
67
68impl PackageParser for ChefMetadataJsonParser {
69    const PACKAGE_TYPE: PackageType = PackageType::Chef;
70
71    fn is_match(path: &Path) -> bool {
72        if path.file_name().is_some_and(|name| name == "metadata.json") {
73            // Check parent directory doesn't end with "dist-info"
74            // to prevent false positives with Python wheel metadata.json files
75            if let Some(parent) = path.parent()
76                && let Some(parent_name) = parent.file_name().and_then(|n| n.to_str())
77            {
78                return !parent_name.ends_with("dist-info");
79            }
80            return true;
81        }
82        false
83    }
84
85    fn extract_packages(path: &Path) -> Vec<PackageData> {
86        let json_content = match read_json_file(path) {
87            Ok(content) => content,
88            Err(e) => {
89                warn!("Failed to read metadata.json at {:?}: {}", path, e);
90                return vec![default_package_data()];
91            }
92        };
93
94        let name = json_content
95            .get(FIELD_NAME)
96            .and_then(|v| v.as_str())
97            .map(|s| s.trim().to_string())
98            .filter(|s| !s.is_empty());
99
100        let version = json_content
101            .get(FIELD_VERSION)
102            .and_then(|v| v.as_str())
103            .map(|s| s.trim().to_string())
104            .filter(|s| !s.is_empty());
105
106        let description = extract_description(&json_content);
107
108        let extracted_license_statement = json_content
109            .get(FIELD_LICENSE)
110            .and_then(|v| v.as_str())
111            .map(|s| s.trim().to_string())
112            .filter(|s| !s.is_empty());
113
114        let maintainer_name = json_content
115            .get(FIELD_MAINTAINER)
116            .and_then(|v| v.as_str())
117            .map(|s| s.trim().to_string())
118            .filter(|s| !s.is_empty());
119
120        let maintainer_email = json_content
121            .get(FIELD_MAINTAINER_EMAIL)
122            .and_then(|v| v.as_str())
123            .map(|s| s.trim().to_string())
124            .filter(|s| !s.is_empty());
125
126        let code_view_url = json_content
127            .get(FIELD_SOURCE_URL)
128            .and_then(|v| v.as_str())
129            .map(|s| s.trim().to_string())
130            .filter(|s| !s.is_empty());
131
132        let bug_tracking_url = json_content
133            .get(FIELD_ISSUES_URL)
134            .and_then(|v| v.as_str())
135            .map(|s| s.trim().to_string())
136            .filter(|s| !s.is_empty());
137
138        let mut deps: HashMap<String, Option<String>> = HashMap::new();
139
140        if let Some(deps_obj) = json_content
141            .get(FIELD_DEPENDENCIES)
142            .and_then(|v| v.as_object())
143        {
144            for (dep_name, dep_version) in deps_obj {
145                let version_constraint = dep_version
146                    .as_str()
147                    .map(|s| s.trim().to_string())
148                    .filter(|s| !s.is_empty());
149                deps.insert(dep_name.trim().to_string(), version_constraint);
150            }
151        }
152
153        if let Some(depends_obj) = json_content.get(FIELD_DEPENDS).and_then(|v| v.as_object()) {
154            for (dep_name, dep_version) in depends_obj {
155                let version_constraint = dep_version
156                    .as_str()
157                    .map(|s| s.trim().to_string())
158                    .filter(|s| !s.is_empty());
159                deps.insert(dep_name.trim().to_string(), version_constraint);
160            }
161        }
162
163        vec![build_package(ChefPackageFields {
164            datasource_id: DatasourceId::ChefCookbookMetadataJson,
165            name,
166            version,
167            description,
168            extracted_license_statement,
169            maintainer_name,
170            maintainer_email,
171            code_view_url,
172            bug_tracking_url,
173            deps,
174        })]
175    }
176}
177
178fn read_json_file(path: &Path) -> Result<Value, String> {
179    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
180    let mut contents = String::new();
181    file.read_to_string(&mut contents)
182        .map_err(|e| format!("Failed to read file: {}", e))?;
183    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
184}
185
186fn default_package_data() -> PackageData {
187    PackageData {
188        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
189        datasource_id: Some(DatasourceId::ChefCookbookMetadataJson),
190        ..Default::default()
191    }
192}
193
194fn extract_description(json: &Value) -> Option<String> {
195    // Try description first, then long_description
196    json.get(FIELD_DESCRIPTION)
197        .and_then(|v| v.as_str())
198        .map(|s| s.trim().to_string())
199        .filter(|s| !s.is_empty())
200        .or_else(|| {
201            json.get(FIELD_LONG_DESCRIPTION)
202                .and_then(|v| v.as_str())
203                .map(|s| s.trim().to_string())
204                .filter(|s| !s.is_empty())
205        })
206}
207
208/// Chef metadata.rb parser for Chef cookbook manifests in Ruby DSL format.
209///
210/// Uses line-based token extraction to parse Ruby DSL without executing Ruby code.
211pub struct ChefMetadataRbParser;
212
213impl PackageParser for ChefMetadataRbParser {
214    const PACKAGE_TYPE: PackageType = PackageType::Chef;
215
216    fn is_match(path: &Path) -> bool {
217        path.file_name().is_some_and(|name| name == "metadata.rb")
218    }
219
220    fn extract_packages(path: &Path) -> Vec<PackageData> {
221        let file = match File::open(path) {
222            Ok(f) => f,
223            Err(e) => {
224                warn!("Failed to open metadata.rb at {:?}: {}", path, e);
225                return vec![default_package_data()];
226            }
227        };
228
229        let reader = BufReader::new(file);
230        let mut fields: HashMap<String, String> = HashMap::new();
231        let mut deps: HashMap<String, Option<String>> = HashMap::new();
232
233        let field_pattern = Regex::new(r#"^\s*(\w+)\s+['"](.+?)['"]"#).unwrap();
234        let depends_pattern =
235            Regex::new(r#"^\s*depends\s+['"](.+?)['"](?:\s*,\s*['"](.+?)['"])?"#).unwrap();
236        let io_read_pattern = Regex::new(r"IO\.read\(").unwrap();
237
238        for line in reader.lines() {
239            let line = match line {
240                Ok(l) => l,
241                Err(_) => continue,
242            };
243
244            let trimmed = line.trim();
245
246            if trimmed.is_empty() || trimmed.starts_with('#') {
247                continue;
248            }
249
250            if io_read_pattern.is_match(&line) {
251                continue;
252            }
253
254            if let Some(caps) = depends_pattern.captures(&line) {
255                let dep_name = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
256                let dep_version = caps.get(2).map(|m| m.as_str().to_string());
257                deps.insert(dep_name, dep_version);
258                continue;
259            }
260
261            if let Some(caps) = field_pattern.captures(&line) {
262                let key = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
263                let value = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
264
265                match key.as_str() {
266                    "name" | "version" | "description" | "long_description" | "license"
267                    | "maintainer" | "maintainer_email" | "source_url" | "issues_url" => {
268                        fields.insert(key, value);
269                    }
270                    _ => {}
271                }
272            }
273        }
274
275        let name = fields
276            .get("name")
277            .map(|s| s.trim().to_string())
278            .filter(|s| !s.is_empty());
279
280        let version = fields
281            .get("version")
282            .map(|s| s.trim().to_string())
283            .filter(|s| !s.is_empty());
284
285        let description = fields
286            .get("description")
287            .map(|s| s.trim().to_string())
288            .filter(|s| !s.is_empty())
289            .or_else(|| {
290                fields
291                    .get("long_description")
292                    .map(|s| s.trim().to_string())
293                    .filter(|s| !s.is_empty())
294            });
295
296        let extracted_license_statement = fields
297            .get("license")
298            .map(|s| s.trim().to_string())
299            .filter(|s| !s.is_empty());
300
301        let maintainer_name = fields
302            .get("maintainer")
303            .map(|s| s.trim().to_string())
304            .filter(|s| !s.is_empty());
305
306        let maintainer_email = fields
307            .get("maintainer_email")
308            .map(|s| s.trim().to_string())
309            .filter(|s| !s.is_empty());
310
311        let code_view_url = fields
312            .get("source_url")
313            .map(|s| s.trim().to_string())
314            .filter(|s| !s.is_empty());
315
316        let bug_tracking_url = fields
317            .get("issues_url")
318            .map(|s| s.trim().to_string())
319            .filter(|s| !s.is_empty());
320
321        vec![build_package(ChefPackageFields {
322            datasource_id: DatasourceId::ChefCookbookMetadataRb,
323            name,
324            version,
325            description,
326            extracted_license_statement,
327            maintainer_name,
328            maintainer_email,
329            code_view_url,
330            bug_tracking_url,
331            deps,
332        })]
333    }
334}
335
336fn build_package(fields: ChefPackageFields) -> PackageData {
337    let ChefPackageFields {
338        datasource_id,
339        name,
340        version,
341        description,
342        extracted_license_statement,
343        maintainer_name,
344        maintainer_email,
345        code_view_url,
346        bug_tracking_url,
347        deps,
348    } = fields;
349    let parties = if maintainer_name.is_some() || maintainer_email.is_some() {
350        vec![Party {
351            r#type: None,
352            role: Some("maintainer".to_string()),
353            name: maintainer_name,
354            email: maintainer_email,
355            url: None,
356            organization: None,
357            organization_url: None,
358            timezone: None,
359        }]
360    } else {
361        Vec::new()
362    };
363
364    let mut dependencies: Vec<Dependency> = deps
365        .into_iter()
366        .map(|(dep_name, version_constraint)| {
367            let purl = PackageUrl::new("chef", &dep_name)
368                .map(|p| p.to_string())
369                .ok();
370            Dependency {
371                purl,
372                extracted_requirement: version_constraint,
373                scope: Some("dependencies".to_string()),
374                is_runtime: Some(true),
375                is_optional: Some(false),
376                is_pinned: None,
377                is_direct: None,
378                resolved_package: None,
379                extra_data: None,
380            }
381        })
382        .collect();
383
384    dependencies.sort_by(|a, b| {
385        let name_a = a.purl.as_deref().unwrap_or("");
386        let name_b = b.purl.as_deref().unwrap_or("");
387        name_a.cmp(name_b)
388    });
389
390    let (download_url, repository_download_url, repository_homepage_url, api_data_url) =
391        if let (Some(n), Some(v)) = (&name, &version) {
392            let download = format!(
393                "https://supermarket.chef.io/cookbooks/{}/versions/{}/download",
394                n, v
395            );
396            let homepage = format!(
397                "https://supermarket.chef.io/cookbooks/{}/versions/{}/",
398                n, v
399            );
400            let api = format!(
401                "https://supermarket.chef.io/api/v1/cookbooks/{}/versions/{}",
402                n, v
403            );
404            (
405                Some(download.clone()),
406                Some(download),
407                Some(homepage),
408                Some(api),
409            )
410        } else {
411            (None, None, None, None)
412        };
413
414    let purl = match (name.as_deref(), version.as_deref()) {
415        (Some(name), Some(version)) => PackageUrl::new("chef", name)
416            .map(|mut p| {
417                let _ = p.with_version(version);
418                p.to_string()
419            })
420            .ok(),
421        _ => None,
422    };
423
424    PackageData {
425        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
426        datasource_id: Some(datasource_id),
427        name,
428        version,
429        description,
430        extracted_license_statement,
431        parties,
432        code_view_url,
433        bug_tracking_url,
434        dependencies,
435        download_url,
436        repository_download_url,
437        repository_homepage_url,
438        api_data_url,
439        purl,
440        primary_language: Some("Ruby".to_string()),
441        ..Default::default()
442    }
443}
444
445crate::register_parser!(
446    "Chef cookbook metadata",
447    &["**/metadata.json", "**/metadata.rb"],
448    "chef",
449    "Ruby",
450    Some("https://docs.chef.io/config_rb_metadata/"),
451);