Skip to main content

provenant/parsers/
haxe.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Haxe package manifests (haxelib.json).
5//!
6//! Extracts package metadata and dependencies from Haxe haxelib.json files.
7//!
8//! # Supported Formats
9//! - haxelib.json (Haxe package manifest)
10//!
11//! # Key Features
12//! - Dependency extraction with pinned/unpinned version tracking
13//! - Contributor extraction with haxelib.org profile URLs
14//! - License statement extraction
15//! - Package URL (purl) generation
16//!
17//! # Implementation Notes
18//! - Dependencies with empty string value mean unpinned (latest version)
19//! - License must be one of: GPL, LGPL, BSD, Public, MIT, Apache
20//! - All fields are extracted with graceful error handling
21
22use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
23use crate::parser_warn as warn;
24use packageurl::PackageUrl;
25use serde::{Deserialize, Serialize};
26use std::collections::HashMap;
27use std::path::Path;
28
29use super::PackageParser;
30use super::license_normalization::{
31    DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
32    empty_declared_license_data,
33};
34use super::metadata::ParserMetadata;
35use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
36
37/// Haxe package manifest (haxelib.json) parser.
38///
39/// Extracts package metadata, dependencies, and contributor information from
40/// standard JSON haxelib.json manifest files used by the Haxe package manager.
41pub struct HaxeParser;
42
43impl PackageParser for HaxeParser {
44    const PACKAGE_TYPE: PackageType = PackageType::Haxe;
45
46    fn metadata() -> Vec<ParserMetadata> {
47        vec![ParserMetadata {
48            description: "Haxe haxelib.json package manifest",
49            file_patterns: &["**/haxelib.json"],
50            package_type: "haxe",
51            primary_language: "Haxe",
52            documentation_url: Some(
53                "https://lib.haxe.org/documentation/creating-a-haxelib-package/",
54            ),
55        }]
56    }
57
58    fn is_match(path: &Path) -> bool {
59        path.file_name().is_some_and(|name| name == "haxelib.json")
60    }
61
62    fn extract_packages(path: &Path) -> Vec<PackageData> {
63        let json_content = match read_haxelib_json(path) {
64            Ok(content) => content,
65            Err(e) => {
66                warn!("Failed to read or parse haxelib.json at {:?}: {}", path, e);
67                return vec![default_package_data()];
68            }
69        };
70
71        let name = json_content.name.map(truncate_field);
72        let version = json_content.version.map(truncate_field);
73
74        // Generate PURL
75        let purl = create_package_url(&name, &version);
76        let extracted_license_statement = json_content.license.map(truncate_field);
77        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
78            normalize_haxe_declared_license(extracted_license_statement.as_deref());
79
80        // Generate URLs
81        let (repository_homepage_url, download_url, repository_download_url) =
82            if let Some(ref n) = name {
83                let home = format!("https://lib.haxe.org/p/{}", n);
84                if let Some(ref v) = version {
85                    let dl = format!("https://lib.haxe.org/p/{}/{}/download/", n, v);
86                    (Some(home), Some(dl.clone()), Some(dl))
87                } else {
88                    (Some(home), None, None)
89                }
90            } else {
91                (None, None, None)
92            };
93
94        // Extract dependencies (maintain insertion order by sorting)
95        let mut dependencies = Vec::new();
96        let mut deps_list: Vec<_> = json_content
97            .dependencies
98            .into_iter()
99            .take(MAX_ITERATION_COUNT)
100            .collect();
101        deps_list.sort_by(|a, b| a.0.cmp(&b.0));
102
103        for (dep_name, dep_version) in deps_list {
104            let is_pinned = !dep_version.is_empty();
105            let dep_purl = create_dep_package_url(&dep_name, &dep_version, is_pinned);
106
107            dependencies.push(Dependency {
108                purl: dep_purl,
109                extracted_requirement: None,
110                scope: None,
111                is_runtime: Some(true),
112                is_optional: Some(false),
113                is_pinned: Some(is_pinned),
114                is_direct: Some(true),
115                resolved_package: None,
116                extra_data: None,
117            });
118        }
119
120        // Extract contributors as parties
121        let mut parties = Vec::new();
122        for contrib in json_content
123            .contributors
124            .into_iter()
125            .take(MAX_ITERATION_COUNT)
126        {
127            parties.push(Party {
128                r#type: Some("person".to_string()),
129                role: Some("contributor".to_string()),
130                name: Some(truncate_field(contrib.clone())),
131                email: None,
132                url: Some(format!("https://lib.haxe.org/u/{}", contrib)),
133                organization: None,
134                organization_url: None,
135                timezone: None,
136            });
137        }
138
139        vec![PackageData {
140            package_type: Some(Self::PACKAGE_TYPE),
141            namespace: None,
142            name,
143            version,
144            qualifiers: None,
145            subpath: None,
146            primary_language: Some("Haxe".to_string()),
147            description: json_content.description.map(truncate_field),
148            release_date: None,
149            parties,
150            keywords: json_content
151                .tags
152                .into_iter()
153                .take(MAX_ITERATION_COUNT)
154                .map(truncate_field)
155                .collect(),
156            homepage_url: json_content.url.map(truncate_field),
157            download_url,
158            size: None,
159            sha1: None,
160            md5: None,
161            sha256: None,
162            sha512: None,
163            bug_tracking_url: None,
164            code_view_url: None,
165            vcs_url: None,
166            copyright: None,
167            holder: None,
168            declared_license_expression,
169            declared_license_expression_spdx,
170            license_detections,
171            other_license_expression: None,
172            other_license_expression_spdx: None,
173            other_license_detections: Vec::new(),
174            extracted_license_statement,
175            notice_text: None,
176            source_packages: Vec::new(),
177            file_references: Vec::new(),
178            is_private: false,
179            is_virtual: false,
180            extra_data: None,
181            dependencies,
182            repository_homepage_url,
183            repository_download_url,
184            api_data_url: None,
185            datasource_id: Some(DatasourceId::HaxelibJson),
186            purl,
187        }]
188    }
189}
190
191/// Internal structure for deserializing haxelib.json files.
192#[derive(Debug, Deserialize, Serialize)]
193struct HaxelibJson {
194    #[serde(default)]
195    name: Option<String>,
196    #[serde(default)]
197    version: Option<String>,
198    #[serde(default)]
199    license: Option<String>,
200    #[serde(default)]
201    url: Option<String>,
202    #[serde(default)]
203    description: Option<String>,
204    #[serde(default)]
205    tags: Vec<String>,
206    #[serde(default)]
207    contributors: Vec<String>,
208    #[serde(default)]
209    dependencies: HashMap<String, String>,
210}
211
212/// Read and parse a haxelib.json file.
213fn read_haxelib_json(path: &Path) -> Result<HaxelibJson, String> {
214    let content =
215        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
216
217    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
218}
219
220/// Create a package URL for a Haxe package.
221fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
222    name.as_ref().and_then(|name| {
223        let mut package_url = match PackageUrl::new("haxe", name) {
224            Ok(p) => p,
225            Err(e) => {
226                warn!(
227                    "Failed to create PackageUrl for haxe package '{}': {}",
228                    name, e
229                );
230                return None;
231            }
232        };
233
234        if let Some(v) = version
235            && let Err(e) = package_url.with_version(v)
236        {
237            warn!(
238                "Failed to set version '{}' for haxe package '{}': {}",
239                v, name, e
240            );
241            return None;
242        }
243
244        Some(package_url.to_string())
245    })
246}
247
248/// Create a package URL for a Haxe dependency.
249fn create_dep_package_url(name: &str, version: &str, is_pinned: bool) -> Option<String> {
250    let mut package_url = match PackageUrl::new("haxe", name) {
251        Ok(p) => p,
252        Err(e) => {
253            warn!(
254                "Failed to create PackageUrl for haxe dependency '{}': {}",
255                name, e
256            );
257            return None;
258        }
259    };
260
261    if is_pinned && let Err(e) = package_url.with_version(version) {
262        warn!(
263            "Failed to set version '{}' for haxe dependency '{}': {}",
264            version, name, e
265        );
266        return None;
267    }
268
269    Some(package_url.to_string())
270}
271
272fn default_package_data() -> PackageData {
273    PackageData {
274        package_type: Some(HaxeParser::PACKAGE_TYPE),
275        primary_language: Some("Haxe".to_string()),
276        datasource_id: Some(DatasourceId::HaxelibJson),
277        ..Default::default()
278    }
279}
280
281fn normalize_haxe_declared_license(
282    statement: Option<&str>,
283) -> (
284    Option<String>,
285    Option<String>,
286    Vec<crate::models::LicenseDetection>,
287) {
288    match statement.map(str::trim).filter(|value| !value.is_empty()) {
289        Some("MIT") => build_declared_license_data_from_pair(
290            "mit",
291            "MIT",
292            DeclaredLicenseMatchMetadata::single_line("MIT"),
293        ),
294        _ => empty_declared_license_data(),
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use crate::models::DatasourceId;
302    use std::path::PathBuf;
303
304    #[test]
305    fn test_is_match() {
306        let valid_path = PathBuf::from("/some/path/haxelib.json");
307        let invalid_path = PathBuf::from("/some/path/not_haxelib.json");
308
309        assert!(HaxeParser::is_match(&valid_path));
310        assert!(!HaxeParser::is_match(&invalid_path));
311    }
312
313    #[test]
314    fn test_extract_from_testdata_basic() {
315        let haxelib_path = PathBuf::from("testdata/haxe/basic/haxelib.json");
316        let package_data = HaxeParser::extract_first_package(&haxelib_path);
317
318        assert_eq!(package_data.package_type, Some(PackageType::Haxe));
319        assert_eq!(package_data.name, Some("haxelib".to_string()));
320        assert_eq!(package_data.version, Some("3.4.0".to_string()));
321        assert_eq!(
322            package_data.homepage_url,
323            Some("https://lib.haxe.org/documentation/".to_string())
324        );
325        assert_eq!(
326            package_data.download_url,
327            Some("https://lib.haxe.org/p/haxelib/3.4.0/download/".to_string())
328        );
329        assert_eq!(
330            package_data.repository_homepage_url,
331            Some("https://lib.haxe.org/p/haxelib".to_string())
332        );
333        assert_eq!(
334            package_data.extracted_license_statement,
335            Some("GPL".to_string())
336        );
337
338        // Check PURL
339        assert_eq!(
340            package_data.purl,
341            Some("pkg:haxe/haxelib@3.4.0".to_string())
342        );
343
344        // Check contributors extraction
345        assert_eq!(package_data.parties.len(), 6);
346        let names: Vec<&str> = package_data
347            .parties
348            .iter()
349            .filter_map(|p| p.name.as_deref())
350            .collect();
351        assert!(names.contains(&"back2dos"));
352        assert!(names.contains(&"ncannasse"));
353    }
354
355    #[test]
356    fn test_extract_with_dependencies() {
357        let haxelib_path = PathBuf::from("testdata/haxe/deps/haxelib.json");
358        let package_data = HaxeParser::extract_first_package(&haxelib_path);
359
360        assert_eq!(package_data.name, Some("selecthxml".to_string()));
361        assert_eq!(package_data.version, Some("0.5.1".to_string()));
362
363        // Check dependencies: tink_core (unpinned), tink_macro (pinned to 3.23)
364        assert_eq!(package_data.dependencies.len(), 2);
365
366        let pinned_deps: Vec<_> = package_data
367            .dependencies
368            .iter()
369            .filter(|d| d.is_pinned == Some(true))
370            .collect();
371        assert_eq!(pinned_deps.len(), 1);
372        assert!(pinned_deps[0].purl.as_ref().unwrap().contains("@3.23"));
373
374        let unpinned_deps: Vec<_> = package_data
375            .dependencies
376            .iter()
377            .filter(|d| d.is_pinned == Some(false))
378            .collect();
379        assert_eq!(unpinned_deps.len(), 1);
380    }
381
382    #[test]
383    fn test_extract_with_tags() {
384        let haxelib_path = PathBuf::from("testdata/haxe/tags/haxelib.json");
385        let package_data = HaxeParser::extract_first_package(&haxelib_path);
386
387        assert_eq!(package_data.name, Some("tink_core".to_string()));
388        assert_eq!(package_data.version, Some("1.18.0".to_string()));
389        assert_eq!(
390            package_data.extracted_license_statement,
391            Some("MIT".to_string())
392        );
393
394        // Check keywords extracted from tags
395        assert_eq!(
396            package_data.keywords,
397            vec![
398                "tink".to_string(),
399                "cross".to_string(),
400                "utility".to_string(),
401                "reactive".to_string(),
402                "functional".to_string(),
403                "async".to_string(),
404                "lazy".to_string(),
405                "signal".to_string(),
406                "event".to_string(),
407            ]
408        );
409    }
410
411    #[test]
412    fn test_invalid_file() {
413        let nonexistent_path = PathBuf::from("testdata/haxe/nonexistent/haxelib.json");
414        let package_data = HaxeParser::extract_first_package(&nonexistent_path);
415
416        // Should return default data with proper type and datasource
417        assert_eq!(package_data.package_type, Some(PackageType::Haxe));
418        assert_eq!(package_data.datasource_id, Some(DatasourceId::HaxelibJson));
419        assert!(package_data.name.is_none());
420    }
421}