Skip to main content

provenant/parsers/
chef.rs

1//! Parser for Chef cookbook metadata files (JSON and Ruby).
2//!
3//! Extracts package metadata, dependencies, and maintainer information from
4//! Chef cookbook metadata files used by the Chef configuration management tool.
5//!
6//! # Supported Formats
7//! - metadata.json (Chef cookbook metadata in JSON format)
8//! - metadata.rb (Chef cookbook metadata in Ruby DSL format)
9//!
10//! # Key Features
11//! - Maintainer party extraction from maintainer/maintainer_email fields
12//! - Dependency extraction from both `dependencies` and `depends` fields (merged)
13//! - URL construction for Chef Supermarket (download, homepage, API)
14//! - dist-info guard to prevent false positives with Python wheel metadata.json
15//!
16//! # Implementation Notes
17//! - JSON parser uses serde_json for JSON parsing
18//! - Ruby parser uses line-based token extraction (not a full Ruby parser)
19//! - Description from `description` or fallback to `long_description`
20//! - Graceful error handling: logs warnings and returns default on parse failure
21//! - IO.read(...) expressions in Ruby files are skipped (cannot evaluate Ruby code)
22
23use std::collections::HashMap;
24use std::fs::File;
25use std::io::{BufRead, BufReader, Read};
26use std::path::Path;
27
28use log::warn;
29use packageurl::PackageUrl;
30use regex::Regex;
31use serde_json::Value;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
34
35use super::PackageParser;
36
37const FIELD_NAME: &str = "name";
38const FIELD_VERSION: &str = "version";
39const FIELD_DESCRIPTION: &str = "description";
40const FIELD_LONG_DESCRIPTION: &str = "long_description";
41const FIELD_LICENSE: &str = "license";
42const FIELD_MAINTAINER: &str = "maintainer";
43const FIELD_MAINTAINER_EMAIL: &str = "maintainer_email";
44const FIELD_SOURCE_URL: &str = "source_url";
45const FIELD_ISSUES_URL: &str = "issues_url";
46const FIELD_DEPENDENCIES: &str = "dependencies";
47const FIELD_DEPENDS: &str = "depends";
48
49struct ChefPackageFields {
50    name: Option<String>,
51    version: Option<String>,
52    description: Option<String>,
53    extracted_license_statement: Option<String>,
54    maintainer_name: Option<String>,
55    maintainer_email: Option<String>,
56    code_view_url: Option<String>,
57    bug_tracking_url: Option<String>,
58    deps: HashMap<String, Option<String>>,
59}
60
61/// Chef metadata.json parser for Chef cookbook manifests.
62///
63/// Extracts metadata from Chef cookbook metadata.json files, including
64/// dependencies from both `dependencies` and `depends` fields.
65pub struct ChefMetadataJsonParser;
66
67impl PackageParser for ChefMetadataJsonParser {
68    const PACKAGE_TYPE: PackageType = PackageType::Chef;
69
70    fn is_match(path: &Path) -> bool {
71        if path.file_name().is_some_and(|name| name == "metadata.json") {
72            // Check parent directory doesn't end with "dist-info"
73            // to prevent false positives with Python wheel metadata.json files
74            if let Some(parent) = path.parent()
75                && let Some(parent_name) = parent.file_name().and_then(|n| n.to_str())
76            {
77                return !parent_name.ends_with("dist-info");
78            }
79            return true;
80        }
81        false
82    }
83
84    fn extract_packages(path: &Path) -> Vec<PackageData> {
85        let json_content = match read_json_file(path) {
86            Ok(content) => content,
87            Err(e) => {
88                warn!("Failed to read metadata.json at {:?}: {}", path, e);
89                return vec![default_package_data()];
90            }
91        };
92
93        let name = json_content
94            .get(FIELD_NAME)
95            .and_then(|v| v.as_str())
96            .map(|s| s.trim().to_string())
97            .filter(|s| !s.is_empty());
98
99        let version = json_content
100            .get(FIELD_VERSION)
101            .and_then(|v| v.as_str())
102            .map(|s| s.trim().to_string())
103            .filter(|s| !s.is_empty());
104
105        let description = extract_description(&json_content);
106
107        let extracted_license_statement = json_content
108            .get(FIELD_LICENSE)
109            .and_then(|v| v.as_str())
110            .map(|s| s.trim().to_string())
111            .filter(|s| !s.is_empty());
112
113        let maintainer_name = json_content
114            .get(FIELD_MAINTAINER)
115            .and_then(|v| v.as_str())
116            .map(|s| s.trim().to_string())
117            .filter(|s| !s.is_empty());
118
119        let maintainer_email = json_content
120            .get(FIELD_MAINTAINER_EMAIL)
121            .and_then(|v| v.as_str())
122            .map(|s| s.trim().to_string())
123            .filter(|s| !s.is_empty());
124
125        let code_view_url = json_content
126            .get(FIELD_SOURCE_URL)
127            .and_then(|v| v.as_str())
128            .map(|s| s.trim().to_string())
129            .filter(|s| !s.is_empty());
130
131        let bug_tracking_url = json_content
132            .get(FIELD_ISSUES_URL)
133            .and_then(|v| v.as_str())
134            .map(|s| s.trim().to_string())
135            .filter(|s| !s.is_empty());
136
137        let mut deps: HashMap<String, Option<String>> = HashMap::new();
138
139        if let Some(deps_obj) = json_content
140            .get(FIELD_DEPENDENCIES)
141            .and_then(|v| v.as_object())
142        {
143            for (dep_name, dep_version) in deps_obj {
144                let version_constraint = dep_version
145                    .as_str()
146                    .map(|s| s.trim().to_string())
147                    .filter(|s| !s.is_empty());
148                deps.insert(dep_name.trim().to_string(), version_constraint);
149            }
150        }
151
152        if let Some(depends_obj) = json_content.get(FIELD_DEPENDS).and_then(|v| v.as_object()) {
153            for (dep_name, dep_version) in depends_obj {
154                let version_constraint = dep_version
155                    .as_str()
156                    .map(|s| s.trim().to_string())
157                    .filter(|s| !s.is_empty());
158                deps.insert(dep_name.trim().to_string(), version_constraint);
159            }
160        }
161
162        vec![build_package(ChefPackageFields {
163            name,
164            version,
165            description,
166            extracted_license_statement,
167            maintainer_name,
168            maintainer_email,
169            code_view_url,
170            bug_tracking_url,
171            deps,
172        })]
173    }
174}
175
176fn read_json_file(path: &Path) -> Result<Value, String> {
177    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
178    let mut contents = String::new();
179    file.read_to_string(&mut contents)
180        .map_err(|e| format!("Failed to read file: {}", e))?;
181    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
182}
183
184fn default_package_data() -> PackageData {
185    PackageData {
186        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
187        datasource_id: Some(DatasourceId::ChefCookbookMetadataJson),
188        ..Default::default()
189    }
190}
191
192fn extract_description(json: &Value) -> Option<String> {
193    // Try description first, then long_description
194    json.get(FIELD_DESCRIPTION)
195        .and_then(|v| v.as_str())
196        .map(|s| s.trim().to_string())
197        .filter(|s| !s.is_empty())
198        .or_else(|| {
199            json.get(FIELD_LONG_DESCRIPTION)
200                .and_then(|v| v.as_str())
201                .map(|s| s.trim().to_string())
202                .filter(|s| !s.is_empty())
203        })
204}
205
206/// Chef metadata.rb parser for Chef cookbook manifests in Ruby DSL format.
207///
208/// Uses line-based token extraction to parse Ruby DSL without executing Ruby code.
209pub struct ChefMetadataRbParser;
210
211impl PackageParser for ChefMetadataRbParser {
212    const PACKAGE_TYPE: PackageType = PackageType::Chef;
213
214    fn is_match(path: &Path) -> bool {
215        path.file_name().is_some_and(|name| name == "metadata.rb")
216    }
217
218    fn extract_packages(path: &Path) -> Vec<PackageData> {
219        let file = match File::open(path) {
220            Ok(f) => f,
221            Err(e) => {
222                warn!("Failed to open metadata.rb at {:?}: {}", path, e);
223                return vec![default_package_data()];
224            }
225        };
226
227        let reader = BufReader::new(file);
228        let mut fields: HashMap<String, String> = HashMap::new();
229        let mut deps: HashMap<String, Option<String>> = HashMap::new();
230
231        let field_pattern = Regex::new(r#"^\s*(\w+)\s+['"](.+?)['"]"#).unwrap();
232        let depends_pattern =
233            Regex::new(r#"^\s*depends\s+['"](.+?)['"](?:\s*,\s*['"](.+?)['"])?"#).unwrap();
234        let io_read_pattern = Regex::new(r"IO\.read\(").unwrap();
235
236        for line in reader.lines() {
237            let line = match line {
238                Ok(l) => l,
239                Err(_) => continue,
240            };
241
242            let trimmed = line.trim();
243
244            if trimmed.is_empty() || trimmed.starts_with('#') {
245                continue;
246            }
247
248            if io_read_pattern.is_match(&line) {
249                continue;
250            }
251
252            if let Some(caps) = depends_pattern.captures(&line) {
253                let dep_name = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
254                let dep_version = caps.get(2).map(|m| m.as_str().to_string());
255                deps.insert(dep_name, dep_version);
256                continue;
257            }
258
259            if let Some(caps) = field_pattern.captures(&line) {
260                let key = caps.get(1).map(|m| m.as_str().to_string()).unwrap();
261                let value = caps.get(2).map(|m| m.as_str().to_string()).unwrap();
262
263                match key.as_str() {
264                    "name" | "version" | "description" | "long_description" | "license"
265                    | "maintainer" | "maintainer_email" | "source_url" | "issues_url" => {
266                        fields.insert(key, value);
267                    }
268                    _ => {}
269                }
270            }
271        }
272
273        let name = fields
274            .get("name")
275            .map(|s| s.trim().to_string())
276            .filter(|s| !s.is_empty());
277
278        let version = fields
279            .get("version")
280            .map(|s| s.trim().to_string())
281            .filter(|s| !s.is_empty());
282
283        let description = fields
284            .get("description")
285            .map(|s| s.trim().to_string())
286            .filter(|s| !s.is_empty())
287            .or_else(|| {
288                fields
289                    .get("long_description")
290                    .map(|s| s.trim().to_string())
291                    .filter(|s| !s.is_empty())
292            });
293
294        let extracted_license_statement = fields
295            .get("license")
296            .map(|s| s.trim().to_string())
297            .filter(|s| !s.is_empty());
298
299        let maintainer_name = fields
300            .get("maintainer")
301            .map(|s| s.trim().to_string())
302            .filter(|s| !s.is_empty());
303
304        let maintainer_email = fields
305            .get("maintainer_email")
306            .map(|s| s.trim().to_string())
307            .filter(|s| !s.is_empty());
308
309        let code_view_url = fields
310            .get("source_url")
311            .map(|s| s.trim().to_string())
312            .filter(|s| !s.is_empty());
313
314        let bug_tracking_url = fields
315            .get("issues_url")
316            .map(|s| s.trim().to_string())
317            .filter(|s| !s.is_empty());
318
319        vec![build_package(ChefPackageFields {
320            name,
321            version,
322            description,
323            extracted_license_statement,
324            maintainer_name,
325            maintainer_email,
326            code_view_url,
327            bug_tracking_url,
328            deps,
329        })]
330    }
331}
332
333fn build_package(fields: ChefPackageFields) -> PackageData {
334    let ChefPackageFields {
335        name,
336        version,
337        description,
338        extracted_license_statement,
339        maintainer_name,
340        maintainer_email,
341        code_view_url,
342        bug_tracking_url,
343        deps,
344    } = fields;
345    let parties = if maintainer_name.is_some() || maintainer_email.is_some() {
346        vec![Party {
347            r#type: None,
348            role: Some("maintainer".to_string()),
349            name: maintainer_name,
350            email: maintainer_email,
351            url: None,
352            organization: None,
353            organization_url: None,
354            timezone: None,
355        }]
356    } else {
357        Vec::new()
358    };
359
360    let mut dependencies: Vec<Dependency> = deps
361        .into_iter()
362        .map(|(dep_name, version_constraint)| {
363            let purl = PackageUrl::new("chef", &dep_name)
364                .map(|p| p.to_string())
365                .ok();
366            Dependency {
367                purl,
368                extracted_requirement: version_constraint,
369                scope: Some("dependencies".to_string()),
370                is_runtime: Some(true),
371                is_optional: Some(false),
372                is_pinned: None,
373                is_direct: None,
374                resolved_package: None,
375                extra_data: None,
376            }
377        })
378        .collect();
379
380    dependencies.sort_by(|a, b| {
381        let name_a = a.purl.as_deref().unwrap_or("");
382        let name_b = b.purl.as_deref().unwrap_or("");
383        name_a.cmp(name_b)
384    });
385
386    let (download_url, repository_download_url, repository_homepage_url, api_data_url) =
387        if let (Some(n), Some(v)) = (&name, &version) {
388            let download = format!(
389                "https://supermarket.chef.io/cookbooks/{}/versions/{}/download",
390                n, v
391            );
392            let homepage = format!(
393                "https://supermarket.chef.io/cookbooks/{}/versions/{}/",
394                n, v
395            );
396            let api = format!(
397                "https://supermarket.chef.io/api/v1/cookbooks/{}/versions/{}",
398                n, v
399            );
400            (
401                Some(download.clone()),
402                Some(download),
403                Some(homepage),
404                Some(api),
405            )
406        } else {
407            (None, None, None, None)
408        };
409
410    PackageData {
411        package_type: Some(ChefMetadataJsonParser::PACKAGE_TYPE),
412        datasource_id: Some(DatasourceId::ChefCookbookMetadataRb),
413        name,
414        version,
415        description,
416        extracted_license_statement,
417        parties,
418        code_view_url,
419        bug_tracking_url,
420        dependencies,
421        download_url,
422        repository_download_url,
423        repository_homepage_url,
424        api_data_url,
425        primary_language: Some("Ruby".to_string()),
426        ..Default::default()
427    }
428}
429
430crate::register_parser!(
431    "Chef cookbook metadata",
432    &["**/metadata.json", "**/metadata.rb"],
433    "chef",
434    "Ruby",
435    Some("https://docs.chef.io/config_rb_metadata/"),
436);