Skip to main content

provenant/parsers/
julia.rs

1//! Parser for Julia Project.toml and Manifest.toml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Julia package manager (Pkg.jl) manifest files.
5//!
6//! # Supported Formats
7//! - Project.toml (package metadata)
8//! - Manifest.toml (resolved dependency tree)
9//!
10//! # Key Features
11//! - Dependency extraction with UUID tracking
12//! - `is_pinned` analysis based on Manifest.toml resolved versions
13//! - Package URL (purl) generation
14//! - Compat section version constraint extraction
15//!
16//! # Implementation Notes
17//! - Uses toml crate for parsing
18//! - Julia packages are identified by UUID
19//! - Project.toml `[deps]` lists direct dependencies by name → UUID
20//! - Manifest.toml `[[deps]]` entries contain resolved version + tree SHA
21
22use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
23use crate::parser_warn as warn;
24use crate::parsers::utils::{
25    MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field,
26};
27use packageurl::PackageUrl;
28use std::path::Path;
29use toml::Value;
30
31use super::PackageParser;
32use super::license_normalization::{
33    DeclaredLicenseMatchMetadata, build_declared_license_data, empty_declared_license_data,
34    normalize_spdx_expression,
35};
36
37const FIELD_NAME: &str = "name";
38const FIELD_UUID: &str = "uuid";
39const FIELD_VERSION: &str = "version";
40const FIELD_LICENSE: &str = "license";
41const FIELD_AUTHORS: &str = "authors";
42const FIELD_REPOSITORY: &str = "repository";
43const FIELD_DEPS: &str = "deps";
44const FIELD_COMPAT: &str = "compat";
45const FIELD_TARGETS: &str = "targets";
46const FIELD_HOMEPAGE: &str = "homepage";
47
48pub struct JuliaProjectTomlParser;
49
50impl PackageParser for JuliaProjectTomlParser {
51    const PACKAGE_TYPE: PackageType = PackageType::Julia;
52
53    fn extract_packages(path: &Path) -> Vec<PackageData> {
54        let toml_content = match read_julia_toml(path) {
55            Ok(content) => content,
56            Err(e) => {
57                warn!("Failed to read or parse Project.toml at {:?}: {}", path, e);
58                return vec![default_project_package_data()];
59            }
60        };
61
62        let name = toml_content
63            .get(FIELD_NAME)
64            .and_then(|v| v.as_str())
65            .map(|s| truncate_field(s.to_string()));
66
67        let _uuid = toml_content
68            .get(FIELD_UUID)
69            .and_then(|v| v.as_str())
70            .map(String::from);
71
72        let version = toml_content
73            .get(FIELD_VERSION)
74            .and_then(|v| v.as_str())
75            .map(|s| truncate_field(s.to_string()));
76
77        let raw_license = toml_content
78            .get(FIELD_LICENSE)
79            .and_then(|v| v.as_str())
80            .map(|s| truncate_field(s.to_string()));
81
82        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
83            raw_license
84                .as_deref()
85                .and_then(normalize_spdx_expression)
86                .map(|normalized| {
87                    build_declared_license_data(
88                        normalized,
89                        DeclaredLicenseMatchMetadata::single_line(
90                            raw_license.as_deref().unwrap_or_default(),
91                        ),
92                    )
93                })
94                .unwrap_or_else(empty_declared_license_data);
95
96        let extracted_license_statement = raw_license.clone().map(truncate_field);
97
98        let dependencies = extract_project_dependencies(&toml_content);
99
100        let purl = create_package_url(&name, &version);
101
102        let repository_url = toml_content
103            .get(FIELD_REPOSITORY)
104            .and_then(|v| v.as_str())
105            .map(|s| truncate_field(s.to_string()));
106
107        let homepage_url = toml_content
108            .get(FIELD_HOMEPAGE)
109            .and_then(|v| v.as_str())
110            .map(|s| truncate_field(s.to_string()));
111
112        let description = None;
113
114        let extra_data = extract_project_extra_data(&toml_content);
115
116        let is_private = false;
117
118        vec![PackageData {
119            package_type: Some(Self::PACKAGE_TYPE),
120            namespace: None,
121            name,
122            version,
123            qualifiers: None,
124            subpath: None,
125            primary_language: Some("Julia".to_string()),
126            description,
127            release_date: None,
128            parties: extract_parties(&toml_content),
129            keywords: Vec::new(),
130            homepage_url,
131            download_url: None,
132            size: None,
133            sha1: None,
134            md5: None,
135            sha256: None,
136            sha512: None,
137            bug_tracking_url: None,
138            code_view_url: None,
139            vcs_url: repository_url,
140            copyright: None,
141            holder: None,
142            declared_license_expression,
143            declared_license_expression_spdx,
144            license_detections,
145            other_license_expression: None,
146            other_license_expression_spdx: None,
147            other_license_detections: Vec::new(),
148            extracted_license_statement,
149            notice_text: None,
150            source_packages: Vec::new(),
151            file_references: Vec::new(),
152            is_private,
153            is_virtual: false,
154            extra_data,
155            dependencies,
156            repository_homepage_url: None,
157            repository_download_url: None,
158            api_data_url: None,
159            datasource_id: Some(DatasourceId::JuliaProjectToml),
160            purl,
161        }]
162    }
163
164    fn is_match(path: &Path) -> bool {
165        path.file_name()
166            .and_then(|name| name.to_str())
167            .is_some_and(|name| name.eq_ignore_ascii_case("Project.toml"))
168    }
169}
170
171pub struct JuliaManifestTomlParser;
172
173impl PackageParser for JuliaManifestTomlParser {
174    const PACKAGE_TYPE: PackageType = PackageType::Julia;
175
176    fn extract_packages(path: &Path) -> Vec<PackageData> {
177        let toml_content = match read_julia_toml(path) {
178            Ok(content) => content,
179            Err(e) => {
180                warn!("Failed to read or parse Manifest.toml at {:?}: {}", path, e);
181                return vec![];
182            }
183        };
184
185        extract_manifest_packages(&toml_content)
186    }
187
188    fn is_match(path: &Path) -> bool {
189        path.file_name()
190            .and_then(|name| name.to_str())
191            .is_some_and(|name| name.eq_ignore_ascii_case("Manifest.toml"))
192    }
193}
194
195fn read_julia_toml(path: &Path) -> Result<Value, String> {
196    let content =
197        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
198    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
199}
200
201fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
202    name.as_ref().and_then(|name| {
203        let mut package_url = match PackageUrl::new(PackageType::Julia.as_str(), name) {
204            Ok(p) => p,
205            Err(e) => {
206                warn!(
207                    "Failed to create PackageUrl for julia package '{}': {}",
208                    name, e
209                );
210                return None;
211            }
212        };
213
214        if let Some(v) = version
215            && let Err(e) = package_url.with_version(v)
216        {
217            warn!(
218                "Failed to set version '{}' for julia package '{}': {}",
219                v, name, e
220            );
221            return None;
222        }
223
224        Some(truncate_field(package_url.to_string()))
225    })
226}
227
228fn extract_parties(toml_content: &Value) -> Vec<Party> {
229    let mut parties = Vec::new();
230
231    if let Some(authors) = toml_content.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
232        for author in authors.iter().take(MAX_ITERATION_COUNT) {
233            if let Some(author_str) = author.as_str() {
234                parties.push(Party {
235                    r#type: None,
236                    role: Some("author".to_string()),
237                    name: Some(truncate_field(author_str.trim().to_string())),
238                    email: None,
239                    url: None,
240                    organization: None,
241                    organization_url: None,
242                    timezone: None,
243                });
244            }
245        }
246    }
247
248    parties
249}
250
251fn extract_project_dependencies(toml_content: &Value) -> Vec<Dependency> {
252    let mut dependencies = Vec::new();
253
254    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
255        Some(table) => table,
256        None => return dependencies,
257    };
258
259    let compat_table = toml_content.get(FIELD_COMPAT).and_then(|v| v.as_table());
260
261    for (dep_name, dep_value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
262        let uuid = dep_value.as_str().map(String::from);
263
264        let extracted_requirement = compat_table
265            .and_then(|ct| ct.get(dep_name))
266            .and_then(|v| v.as_str())
267            .map(|s| truncate_field(s.to_string()));
268
269        let is_pinned = extracted_requirement
270            .as_deref()
271            .is_some_and(is_julia_version_pinned);
272
273        let purl = match PackageUrl::new(PackageType::Julia.as_str(), dep_name) {
274            Ok(p) => truncate_field(p.to_string()),
275            Err(e) => {
276                warn!(
277                    "Failed to create PackageUrl for julia dependency '{}': {}",
278                    dep_name, e
279                );
280                continue;
281            }
282        };
283
284        let mut extra_data_map = std::collections::HashMap::new();
285        if let Some(ref uuid_val) = uuid {
286            extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
287        }
288
289        dependencies.push(Dependency {
290            purl: Some(purl),
291            extracted_requirement,
292            scope: Some("dependencies".to_string()),
293            is_runtime: Some(true),
294            is_optional: None,
295            is_pinned: Some(is_pinned),
296            is_direct: Some(true),
297            resolved_package: None,
298            extra_data: if extra_data_map.is_empty() {
299                None
300            } else {
301                Some(extra_data_map)
302            },
303        });
304    }
305
306    dependencies
307}
308
309fn extract_manifest_packages(toml_content: &Value) -> Vec<PackageData> {
310    let mut packages = Vec::new();
311
312    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
313        Some(table) => table,
314        None => return packages,
315    };
316
317    for (dep_name, dep_value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
318        let dep_entries = match dep_value.as_array() {
319            Some(entries) => entries,
320            None => continue,
321        };
322
323        for dep_entry in dep_entries.iter().take(MAX_ITERATION_COUNT) {
324            let name = Some(truncate_field(dep_name.clone()));
325
326            let uuid = dep_entry
327                .get(FIELD_UUID)
328                .and_then(|v| v.as_str())
329                .map(String::from);
330
331            let version = dep_entry
332                .get(FIELD_VERSION)
333                .and_then(|v| v.as_str())
334                .map(|s| truncate_field(s.to_string()));
335
336            let purl = create_package_url(&name, &version);
337
338            let tree_hash = dep_entry
339                .get("git-tree-sha1")
340                .and_then(|v| v.as_str())
341                .map(String::from);
342
343            let source_url = dep_entry
344                .get("url")
345                .and_then(|v| v.as_str())
346                .map(|s| truncate_field(s.to_string()));
347
348            let mut extra_data_map = std::collections::HashMap::new();
349            if let Some(ref uuid_val) = uuid {
350                extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
351            }
352            if let Some(ref tree_hash_val) = tree_hash {
353                extra_data_map.insert("tree_hash".to_string(), serde_json::json!(tree_hash_val));
354            }
355            if let Some(ref source_url_val) = source_url {
356                extra_data_map.insert("url".to_string(), serde_json::json!(source_url_val));
357            }
358
359            packages.push(PackageData {
360                package_type: Some(PackageType::Julia),
361                namespace: None,
362                name,
363                version,
364                qualifiers: None,
365                subpath: None,
366                primary_language: Some("Julia".to_string()),
367                description: None,
368                release_date: None,
369                parties: Vec::new(),
370                keywords: Vec::new(),
371                homepage_url: None,
372                download_url: None,
373                size: None,
374                sha1: None,
375                md5: None,
376                sha256: None,
377                sha512: None,
378                bug_tracking_url: None,
379                code_view_url: None,
380                vcs_url: source_url,
381                copyright: None,
382                holder: None,
383                declared_license_expression: None,
384                declared_license_expression_spdx: None,
385                license_detections: Vec::new(),
386                other_license_expression: None,
387                other_license_expression_spdx: None,
388                other_license_detections: Vec::new(),
389                extracted_license_statement: None,
390                notice_text: None,
391                source_packages: Vec::new(),
392                file_references: Vec::new(),
393                is_private: false,
394                is_virtual: false,
395                extra_data: if extra_data_map.is_empty() {
396                    None
397                } else {
398                    Some(extra_data_map)
399                },
400                dependencies: Vec::new(),
401                repository_homepage_url: None,
402                repository_download_url: None,
403                api_data_url: None,
404                datasource_id: Some(DatasourceId::JuliaManifestToml),
405                purl,
406            });
407        }
408    }
409
410    packages
411}
412
413fn extract_project_extra_data(
414    toml_content: &Value,
415) -> Option<std::collections::HashMap<String, serde_json::Value>> {
416    use serde_json::json;
417    let mut extra_data = std::collections::HashMap::new();
418
419    if let Some(uuid) = toml_content.get(FIELD_UUID).and_then(|v| v.as_str()) {
420        extra_data.insert("uuid".to_string(), json!(uuid));
421    }
422
423    if let Some(targets) = toml_content.get(FIELD_TARGETS) {
424        extra_data.insert("targets".to_string(), toml_to_json(targets));
425    }
426
427    if let Some(compat) = toml_content.get(FIELD_COMPAT) {
428        extra_data.insert("compat".to_string(), toml_to_json(compat));
429    }
430
431    if let Some(deps) = toml_content.get(FIELD_DEPS) {
432        extra_data.insert("deps".to_string(), toml_to_json(deps));
433    }
434
435    if let Some(extras) = toml_content.get("extras") {
436        extra_data.insert("extras".to_string(), toml_to_json(extras));
437    }
438
439    if let Some(sources) = toml_content.get("sources") {
440        extra_data.insert("sources".to_string(), toml_to_json(sources));
441    }
442
443    if extra_data.is_empty() {
444        None
445    } else {
446        Some(extra_data)
447    }
448}
449
450fn toml_to_json(value: &toml::Value) -> serde_json::Value {
451    toml_to_json_inner(value, &mut RecursionGuard::depth_only())
452}
453
454fn toml_to_json_inner(value: &toml::Value, guard: &mut RecursionGuard<()>) -> serde_json::Value {
455    if guard.descend() {
456        warn!("Recursion depth exceeded in toml_to_json, returning Null");
457        return serde_json::Value::Null;
458    }
459
460    let result = match value {
461        toml::Value::String(s) => serde_json::json!(s),
462        toml::Value::Integer(i) => serde_json::json!(i),
463        toml::Value::Float(f) => serde_json::json!(f),
464        toml::Value::Boolean(b) => serde_json::json!(b),
465        toml::Value::Array(a) => {
466            serde_json::Value::Array(a.iter().map(|v| toml_to_json_inner(v, guard)).collect())
467        }
468        toml::Value::Table(t) => {
469            let map: serde_json::Map<String, serde_json::Value> = t
470                .iter()
471                .map(|(k, v)| (k.clone(), toml_to_json_inner(v, guard)))
472                .collect();
473            serde_json::Value::Object(map)
474        }
475        toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
476    };
477    guard.ascend();
478    result
479}
480
481fn default_project_package_data() -> PackageData {
482    PackageData {
483        package_type: Some(PackageType::Julia),
484        datasource_id: Some(DatasourceId::JuliaProjectToml),
485        ..Default::default()
486    }
487}
488
489fn is_julia_version_pinned(version_str: &str) -> bool {
490    let trimmed = version_str.trim();
491    if trimmed.is_empty() {
492        return false;
493    }
494    if trimmed.contains('^')
495        || trimmed.contains('~')
496        || trimmed.contains('>')
497        || trimmed.contains('<')
498        || trimmed.contains('*')
499    {
500        return false;
501    }
502    trimmed.matches('.').count() >= 2
503}
504
505crate::register_parser!(
506    "Julia Project.toml manifest",
507    &["**/Project.toml"],
508    "julia",
509    "Julia",
510    Some("https://pkgdocs.julialang.org/v1/toml-files/"),
511);
512
513crate::register_parser!(
514    "Julia Manifest.toml resolved dependencies",
515    &["**/Manifest.toml"],
516    "julia",
517    "Julia",
518    Some("https://pkgdocs.julialang.org/v1/toml-files/"),
519);