Skip to main content

provenant/parsers/
julia.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Julia Project.toml and Manifest.toml files.
5//!
6//! Extracts package metadata, dependencies, and license information from
7//! Julia package manager (Pkg.jl) manifest files.
8//!
9//! # Supported Formats
10//! - Project.toml (package metadata)
11//! - Manifest.toml (resolved dependency tree)
12//!
13//! # Key Features
14//! - Dependency extraction with UUID tracking
15//! - `is_pinned` analysis based on Manifest.toml resolved versions
16//! - Package URL (purl) generation
17//! - Compat section version constraint extraction
18//!
19//! # Implementation Notes
20//! - Uses toml crate for parsing
21//! - Julia packages are identified by UUID
22//! - Project.toml `[deps]` lists direct dependencies by name → UUID
23//! - Manifest.toml `[[deps]]` entries contain resolved version + tree SHA
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26use crate::parser_warn as warn;
27use crate::parsers::utils::{
28    MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field,
29};
30use packageurl::PackageUrl;
31use std::path::Path;
32use toml::Value;
33
34use super::PackageParser;
35use super::license_normalization::{
36    DeclaredLicenseMatchMetadata, build_declared_license_data, empty_declared_license_data,
37    normalize_spdx_expression,
38};
39use super::metadata::ParserMetadata;
40
41const FIELD_NAME: &str = "name";
42const FIELD_UUID: &str = "uuid";
43const FIELD_VERSION: &str = "version";
44const FIELD_LICENSE: &str = "license";
45const FIELD_AUTHOR: &str = "author";
46const FIELD_AUTHORS: &str = "authors";
47const FIELD_REPOSITORY: &str = "repository";
48const FIELD_DEPS: &str = "deps";
49const FIELD_COMPAT: &str = "compat";
50const FIELD_TARGETS: &str = "targets";
51const FIELD_HOMEPAGE: &str = "homepage";
52
53pub struct JuliaProjectTomlParser;
54
55impl PackageParser for JuliaProjectTomlParser {
56    const PACKAGE_TYPE: PackageType = PackageType::Julia;
57
58    fn metadata() -> Vec<ParserMetadata> {
59        vec![ParserMetadata {
60            description: "Julia Project.toml manifest",
61            file_patterns: &["**/Project.toml"],
62            package_type: "julia",
63            primary_language: "Julia",
64            documentation_url: Some("https://pkgdocs.julialang.org/v1/toml-files/"),
65        }]
66    }
67
68    fn extract_packages(path: &Path) -> Vec<PackageData> {
69        let toml_content = match read_julia_toml(path) {
70            Ok(content) => content,
71            Err(e) => {
72                warn!("Failed to read or parse Project.toml at {:?}: {}", path, e);
73                return vec![default_project_package_data()];
74            }
75        };
76
77        let name = toml_content
78            .get(FIELD_NAME)
79            .and_then(|v| v.as_str())
80            .map(|s| truncate_field(s.to_string()));
81
82        let _uuid = toml_content
83            .get(FIELD_UUID)
84            .and_then(|v| v.as_str())
85            .map(String::from);
86
87        let version = toml_content
88            .get(FIELD_VERSION)
89            .and_then(|v| v.as_str())
90            .map(|s| truncate_field(s.to_string()));
91
92        let raw_license = toml_content
93            .get(FIELD_LICENSE)
94            .and_then(|v| v.as_str())
95            .map(|s| truncate_field(s.to_string()));
96
97        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
98            raw_license
99                .as_deref()
100                .and_then(normalize_spdx_expression)
101                .map(|normalized| {
102                    build_declared_license_data(
103                        normalized,
104                        DeclaredLicenseMatchMetadata::single_line(
105                            raw_license.as_deref().unwrap_or_default(),
106                        ),
107                    )
108                })
109                .unwrap_or_else(empty_declared_license_data);
110
111        let extracted_license_statement = raw_license.clone().map(truncate_field);
112
113        let dependencies = extract_project_dependencies(&toml_content);
114
115        let purl = create_package_url(&name, &version);
116
117        let repository_url = toml_content
118            .get(FIELD_REPOSITORY)
119            .and_then(|v| v.as_str())
120            .map(|s| truncate_field(s.to_string()));
121
122        let homepage_url = toml_content
123            .get(FIELD_HOMEPAGE)
124            .and_then(|v| v.as_str())
125            .map(|s| truncate_field(s.to_string()));
126
127        let description = None;
128
129        let extra_data = extract_project_extra_data(&toml_content);
130
131        let is_private = false;
132
133        vec![PackageData {
134            package_type: Some(Self::PACKAGE_TYPE),
135            namespace: None,
136            name,
137            version,
138            qualifiers: None,
139            subpath: None,
140            primary_language: Some("Julia".to_string()),
141            description,
142            release_date: None,
143            parties: extract_parties(&toml_content),
144            keywords: Vec::new(),
145            homepage_url,
146            download_url: None,
147            size: None,
148            sha1: None,
149            md5: None,
150            sha256: None,
151            sha512: None,
152            bug_tracking_url: None,
153            code_view_url: None,
154            vcs_url: repository_url,
155            copyright: None,
156            holder: None,
157            declared_license_expression,
158            declared_license_expression_spdx,
159            license_detections,
160            other_license_expression: None,
161            other_license_expression_spdx: None,
162            other_license_detections: Vec::new(),
163            extracted_license_statement,
164            notice_text: None,
165            source_packages: Vec::new(),
166            file_references: Vec::new(),
167            is_private,
168            is_virtual: false,
169            extra_data,
170            dependencies,
171            repository_homepage_url: None,
172            repository_download_url: None,
173            api_data_url: None,
174            datasource_id: Some(DatasourceId::JuliaProjectToml),
175            purl,
176        }]
177    }
178
179    fn is_match(path: &Path) -> bool {
180        path.file_name()
181            .and_then(|name| name.to_str())
182            .is_some_and(|name| name.eq_ignore_ascii_case("Project.toml"))
183    }
184}
185
186pub struct JuliaManifestTomlParser;
187
188impl PackageParser for JuliaManifestTomlParser {
189    const PACKAGE_TYPE: PackageType = PackageType::Julia;
190
191    fn metadata() -> Vec<ParserMetadata> {
192        vec![ParserMetadata {
193            description: "Julia Manifest.toml resolved dependencies",
194            file_patterns: &["**/Manifest.toml"],
195            package_type: "julia",
196            primary_language: "Julia",
197            documentation_url: Some("https://pkgdocs.julialang.org/v1/toml-files/"),
198        }]
199    }
200
201    fn extract_packages(path: &Path) -> Vec<PackageData> {
202        let toml_content = match read_julia_toml(path) {
203            Ok(content) => content,
204            Err(e) => {
205                warn!("Failed to read or parse Manifest.toml at {:?}: {}", path, e);
206                return vec![];
207            }
208        };
209
210        extract_manifest_packages(&toml_content)
211    }
212
213    fn is_match(path: &Path) -> bool {
214        path.file_name()
215            .and_then(|name| name.to_str())
216            .is_some_and(|name| name.eq_ignore_ascii_case("Manifest.toml"))
217    }
218}
219
220fn read_julia_toml(path: &Path) -> Result<Value, String> {
221    let content =
222        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
223    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
224}
225
226fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
227    name.as_ref().and_then(|name| {
228        let mut package_url = match PackageUrl::new(PackageType::Julia.as_str(), name) {
229            Ok(p) => p,
230            Err(e) => {
231                warn!(
232                    "Failed to create PackageUrl for julia package '{}': {}",
233                    name, e
234                );
235                return None;
236            }
237        };
238
239        if let Some(v) = version
240            && let Err(e) = package_url.with_version(v)
241        {
242            warn!(
243                "Failed to set version '{}' for julia package '{}': {}",
244                v, name, e
245            );
246            return None;
247        }
248
249        Some(truncate_field(package_url.to_string()))
250    })
251}
252
253fn extract_parties(toml_content: &Value) -> Vec<Party> {
254    use std::collections::HashSet;
255
256    let mut parties = Vec::new();
257    let mut seen = HashSet::new();
258
259    if let Some(authors) = toml_content.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
260        for author in authors.iter().take(MAX_ITERATION_COUNT) {
261            push_author_party(author, &mut parties, &mut seen);
262        }
263    }
264
265    if let Some(author_value) = toml_content.get(FIELD_AUTHOR) {
266        match author_value {
267            Value::Array(authors) => {
268                for author in authors.iter().take(MAX_ITERATION_COUNT) {
269                    push_author_party(author, &mut parties, &mut seen);
270                }
271            }
272            other => push_author_party(other, &mut parties, &mut seen),
273        }
274    }
275
276    parties
277}
278
279fn push_author_party(
280    value: &Value,
281    parties: &mut Vec<Party>,
282    seen: &mut std::collections::HashSet<String>,
283) {
284    let Some(author_str) = value.as_str() else {
285        return;
286    };
287
288    let author_name = truncate_field(author_str.trim().to_string());
289    if author_name.is_empty() || !seen.insert(author_name.clone()) {
290        return;
291    }
292
293    parties.push(Party {
294        r#type: None,
295        role: Some("author".to_string()),
296        name: Some(author_name),
297        email: None,
298        url: None,
299        organization: None,
300        organization_url: None,
301        timezone: None,
302    });
303}
304
305fn extract_project_dependencies(toml_content: &Value) -> Vec<Dependency> {
306    let mut dependencies = Vec::new();
307
308    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
309        Some(table) => table,
310        None => return dependencies,
311    };
312
313    let compat_table = toml_content.get(FIELD_COMPAT).and_then(|v| v.as_table());
314
315    for (dep_name, dep_value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
316        let uuid = dep_value.as_str().map(String::from);
317
318        let extracted_requirement = compat_table
319            .and_then(|ct| ct.get(dep_name))
320            .and_then(|v| v.as_str())
321            .map(|s| truncate_field(s.to_string()));
322
323        let is_pinned = extracted_requirement
324            .as_deref()
325            .is_some_and(is_julia_version_pinned);
326
327        let purl = match PackageUrl::new(PackageType::Julia.as_str(), dep_name) {
328            Ok(p) => truncate_field(p.to_string()),
329            Err(e) => {
330                warn!(
331                    "Failed to create PackageUrl for julia dependency '{}': {}",
332                    dep_name, e
333                );
334                continue;
335            }
336        };
337
338        let mut extra_data_map = std::collections::HashMap::new();
339        if let Some(ref uuid_val) = uuid {
340            extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
341        }
342
343        dependencies.push(Dependency {
344            purl: Some(purl),
345            extracted_requirement,
346            scope: Some("dependencies".to_string()),
347            is_runtime: Some(true),
348            is_optional: None,
349            is_pinned: Some(is_pinned),
350            is_direct: Some(true),
351            resolved_package: None,
352            extra_data: if extra_data_map.is_empty() {
353                None
354            } else {
355                Some(extra_data_map)
356            },
357        });
358    }
359
360    dependencies
361}
362
363fn extract_manifest_packages(toml_content: &Value) -> Vec<PackageData> {
364    let mut packages = Vec::new();
365
366    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
367        Some(table) => table,
368        None => return packages,
369    };
370
371    for (dep_name, dep_value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
372        let dep_entries = match dep_value.as_array() {
373            Some(entries) => entries,
374            None => continue,
375        };
376
377        for dep_entry in dep_entries.iter().take(MAX_ITERATION_COUNT) {
378            let name = Some(truncate_field(dep_name.clone()));
379
380            let uuid = dep_entry
381                .get(FIELD_UUID)
382                .and_then(|v| v.as_str())
383                .map(String::from);
384
385            let version = dep_entry
386                .get(FIELD_VERSION)
387                .and_then(|v| v.as_str())
388                .map(|s| truncate_field(s.to_string()));
389
390            let purl = create_package_url(&name, &version);
391
392            let tree_hash = dep_entry
393                .get("git-tree-sha1")
394                .and_then(|v| v.as_str())
395                .map(String::from);
396
397            let source_url = dep_entry
398                .get("url")
399                .and_then(|v| v.as_str())
400                .map(|s| truncate_field(s.to_string()));
401
402            let mut extra_data_map = std::collections::HashMap::new();
403            if let Some(ref uuid_val) = uuid {
404                extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
405            }
406            if let Some(ref tree_hash_val) = tree_hash {
407                extra_data_map.insert("tree_hash".to_string(), serde_json::json!(tree_hash_val));
408            }
409            if let Some(ref source_url_val) = source_url {
410                extra_data_map.insert("url".to_string(), serde_json::json!(source_url_val));
411            }
412
413            packages.push(PackageData {
414                package_type: Some(PackageType::Julia),
415                namespace: None,
416                name,
417                version,
418                qualifiers: None,
419                subpath: None,
420                primary_language: Some("Julia".to_string()),
421                description: None,
422                release_date: None,
423                parties: Vec::new(),
424                keywords: Vec::new(),
425                homepage_url: None,
426                download_url: None,
427                size: None,
428                sha1: None,
429                md5: None,
430                sha256: None,
431                sha512: None,
432                bug_tracking_url: None,
433                code_view_url: None,
434                vcs_url: source_url,
435                copyright: None,
436                holder: None,
437                declared_license_expression: None,
438                declared_license_expression_spdx: None,
439                license_detections: Vec::new(),
440                other_license_expression: None,
441                other_license_expression_spdx: None,
442                other_license_detections: Vec::new(),
443                extracted_license_statement: None,
444                notice_text: None,
445                source_packages: Vec::new(),
446                file_references: Vec::new(),
447                is_private: false,
448                is_virtual: false,
449                extra_data: if extra_data_map.is_empty() {
450                    None
451                } else {
452                    Some(extra_data_map)
453                },
454                dependencies: Vec::new(),
455                repository_homepage_url: None,
456                repository_download_url: None,
457                api_data_url: None,
458                datasource_id: Some(DatasourceId::JuliaManifestToml),
459                purl,
460            });
461        }
462    }
463
464    packages
465}
466
467fn extract_project_extra_data(
468    toml_content: &Value,
469) -> Option<std::collections::HashMap<String, serde_json::Value>> {
470    use serde_json::json;
471    let mut extra_data = std::collections::HashMap::new();
472
473    if let Some(uuid) = toml_content.get(FIELD_UUID).and_then(|v| v.as_str()) {
474        extra_data.insert("uuid".to_string(), json!(uuid));
475    }
476
477    if let Some(targets) = toml_content.get(FIELD_TARGETS) {
478        extra_data.insert("targets".to_string(), toml_to_json(targets));
479    }
480
481    if let Some(compat) = toml_content.get(FIELD_COMPAT) {
482        extra_data.insert("compat".to_string(), toml_to_json(compat));
483    }
484
485    if let Some(deps) = toml_content.get(FIELD_DEPS) {
486        extra_data.insert("deps".to_string(), toml_to_json(deps));
487    }
488
489    if let Some(extras) = toml_content.get("extras") {
490        extra_data.insert("extras".to_string(), toml_to_json(extras));
491    }
492
493    if let Some(sources) = toml_content.get("sources") {
494        extra_data.insert("sources".to_string(), toml_to_json(sources));
495    }
496
497    if extra_data.is_empty() {
498        None
499    } else {
500        Some(extra_data)
501    }
502}
503
504fn toml_to_json(value: &toml::Value) -> serde_json::Value {
505    toml_to_json_inner(value, &mut RecursionGuard::depth_only())
506}
507
508fn toml_to_json_inner(value: &toml::Value, guard: &mut RecursionGuard<()>) -> serde_json::Value {
509    if guard.descend() {
510        warn!("Recursion depth exceeded in toml_to_json, returning Null");
511        return serde_json::Value::Null;
512    }
513
514    let result = match value {
515        toml::Value::String(s) => serde_json::json!(s),
516        toml::Value::Integer(i) => serde_json::json!(i),
517        toml::Value::Float(f) => serde_json::json!(f),
518        toml::Value::Boolean(b) => serde_json::json!(b),
519        toml::Value::Array(a) => {
520            serde_json::Value::Array(a.iter().map(|v| toml_to_json_inner(v, guard)).collect())
521        }
522        toml::Value::Table(t) => {
523            let map: serde_json::Map<String, serde_json::Value> = t
524                .iter()
525                .map(|(k, v)| (k.clone(), toml_to_json_inner(v, guard)))
526                .collect();
527            serde_json::Value::Object(map)
528        }
529        toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
530    };
531    guard.ascend();
532    result
533}
534
535fn default_project_package_data() -> PackageData {
536    PackageData {
537        package_type: Some(PackageType::Julia),
538        datasource_id: Some(DatasourceId::JuliaProjectToml),
539        ..Default::default()
540    }
541}
542
543fn is_julia_version_pinned(version_str: &str) -> bool {
544    let trimmed = version_str.trim();
545    if trimmed.is_empty() {
546        return false;
547    }
548    if trimmed.contains('^')
549        || trimmed.contains('~')
550        || trimmed.contains('>')
551        || trimmed.contains('<')
552        || trimmed.contains('*')
553    {
554        return false;
555    }
556    trimmed.matches('.').count() >= 2
557}