Skip to main content

provenant/parsers/
julia.rs

1//! Parser for Julia Project.toml and Manifest.toml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Julia package manager (Pkg.jl) manifest files.
5//!
6//! # Supported Formats
7//! - Project.toml (package metadata)
8//! - Manifest.toml (resolved dependency tree)
9//!
10//! # Key Features
11//! - Dependency extraction with UUID tracking
12//! - `is_pinned` analysis based on Manifest.toml resolved versions
13//! - Package URL (purl) generation
14//! - Compat section version constraint extraction
15//!
16//! # Implementation Notes
17//! - Uses toml crate for parsing
18//! - Julia packages are identified by UUID
19//! - Project.toml `[deps]` lists direct dependencies by name → UUID
20//! - Manifest.toml `[[deps]]` entries contain resolved version + tree SHA
21
22use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
23use crate::parser_warn as warn;
24use crate::parsers::utils::{
25    MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field,
26};
27use packageurl::PackageUrl;
28use std::path::Path;
29use toml::Value;
30
31use super::PackageParser;
32use super::license_normalization::{
33    DeclaredLicenseMatchMetadata, build_declared_license_data, empty_declared_license_data,
34    normalize_spdx_expression,
35};
36
37const FIELD_NAME: &str = "name";
38const FIELD_UUID: &str = "uuid";
39const FIELD_VERSION: &str = "version";
40const FIELD_LICENSE: &str = "license";
41const FIELD_AUTHOR: &str = "author";
42const FIELD_AUTHORS: &str = "authors";
43const FIELD_REPOSITORY: &str = "repository";
44const FIELD_DEPS: &str = "deps";
45const FIELD_COMPAT: &str = "compat";
46const FIELD_TARGETS: &str = "targets";
47const FIELD_HOMEPAGE: &str = "homepage";
48
49pub struct JuliaProjectTomlParser;
50
51impl PackageParser for JuliaProjectTomlParser {
52    const PACKAGE_TYPE: PackageType = PackageType::Julia;
53
54    fn extract_packages(path: &Path) -> Vec<PackageData> {
55        let toml_content = match read_julia_toml(path) {
56            Ok(content) => content,
57            Err(e) => {
58                warn!("Failed to read or parse Project.toml at {:?}: {}", path, e);
59                return vec![default_project_package_data()];
60            }
61        };
62
63        let name = toml_content
64            .get(FIELD_NAME)
65            .and_then(|v| v.as_str())
66            .map(|s| truncate_field(s.to_string()));
67
68        let _uuid = toml_content
69            .get(FIELD_UUID)
70            .and_then(|v| v.as_str())
71            .map(String::from);
72
73        let version = toml_content
74            .get(FIELD_VERSION)
75            .and_then(|v| v.as_str())
76            .map(|s| truncate_field(s.to_string()));
77
78        let raw_license = toml_content
79            .get(FIELD_LICENSE)
80            .and_then(|v| v.as_str())
81            .map(|s| truncate_field(s.to_string()));
82
83        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
84            raw_license
85                .as_deref()
86                .and_then(normalize_spdx_expression)
87                .map(|normalized| {
88                    build_declared_license_data(
89                        normalized,
90                        DeclaredLicenseMatchMetadata::single_line(
91                            raw_license.as_deref().unwrap_or_default(),
92                        ),
93                    )
94                })
95                .unwrap_or_else(empty_declared_license_data);
96
97        let extracted_license_statement = raw_license.clone().map(truncate_field);
98
99        let dependencies = extract_project_dependencies(&toml_content);
100
101        let purl = create_package_url(&name, &version);
102
103        let repository_url = toml_content
104            .get(FIELD_REPOSITORY)
105            .and_then(|v| v.as_str())
106            .map(|s| truncate_field(s.to_string()));
107
108        let homepage_url = toml_content
109            .get(FIELD_HOMEPAGE)
110            .and_then(|v| v.as_str())
111            .map(|s| truncate_field(s.to_string()));
112
113        let description = None;
114
115        let extra_data = extract_project_extra_data(&toml_content);
116
117        let is_private = false;
118
119        vec![PackageData {
120            package_type: Some(Self::PACKAGE_TYPE),
121            namespace: None,
122            name,
123            version,
124            qualifiers: None,
125            subpath: None,
126            primary_language: Some("Julia".to_string()),
127            description,
128            release_date: None,
129            parties: extract_parties(&toml_content),
130            keywords: Vec::new(),
131            homepage_url,
132            download_url: None,
133            size: None,
134            sha1: None,
135            md5: None,
136            sha256: None,
137            sha512: None,
138            bug_tracking_url: None,
139            code_view_url: None,
140            vcs_url: repository_url,
141            copyright: None,
142            holder: None,
143            declared_license_expression,
144            declared_license_expression_spdx,
145            license_detections,
146            other_license_expression: None,
147            other_license_expression_spdx: None,
148            other_license_detections: Vec::new(),
149            extracted_license_statement,
150            notice_text: None,
151            source_packages: Vec::new(),
152            file_references: Vec::new(),
153            is_private,
154            is_virtual: false,
155            extra_data,
156            dependencies,
157            repository_homepage_url: None,
158            repository_download_url: None,
159            api_data_url: None,
160            datasource_id: Some(DatasourceId::JuliaProjectToml),
161            purl,
162        }]
163    }
164
165    fn is_match(path: &Path) -> bool {
166        path.file_name()
167            .and_then(|name| name.to_str())
168            .is_some_and(|name| name.eq_ignore_ascii_case("Project.toml"))
169    }
170}
171
172pub struct JuliaManifestTomlParser;
173
174impl PackageParser for JuliaManifestTomlParser {
175    const PACKAGE_TYPE: PackageType = PackageType::Julia;
176
177    fn extract_packages(path: &Path) -> Vec<PackageData> {
178        let toml_content = match read_julia_toml(path) {
179            Ok(content) => content,
180            Err(e) => {
181                warn!("Failed to read or parse Manifest.toml at {:?}: {}", path, e);
182                return vec![];
183            }
184        };
185
186        extract_manifest_packages(&toml_content)
187    }
188
189    fn is_match(path: &Path) -> bool {
190        path.file_name()
191            .and_then(|name| name.to_str())
192            .is_some_and(|name| name.eq_ignore_ascii_case("Manifest.toml"))
193    }
194}
195
196fn read_julia_toml(path: &Path) -> Result<Value, String> {
197    let content =
198        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
199    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
200}
201
202fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
203    name.as_ref().and_then(|name| {
204        let mut package_url = match PackageUrl::new(PackageType::Julia.as_str(), name) {
205            Ok(p) => p,
206            Err(e) => {
207                warn!(
208                    "Failed to create PackageUrl for julia package '{}': {}",
209                    name, e
210                );
211                return None;
212            }
213        };
214
215        if let Some(v) = version
216            && let Err(e) = package_url.with_version(v)
217        {
218            warn!(
219                "Failed to set version '{}' for julia package '{}': {}",
220                v, name, e
221            );
222            return None;
223        }
224
225        Some(truncate_field(package_url.to_string()))
226    })
227}
228
229fn extract_parties(toml_content: &Value) -> Vec<Party> {
230    use std::collections::HashSet;
231
232    let mut parties = Vec::new();
233    let mut seen = HashSet::new();
234
235    if let Some(authors) = toml_content.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
236        for author in authors.iter().take(MAX_ITERATION_COUNT) {
237            push_author_party(author, &mut parties, &mut seen);
238        }
239    }
240
241    if let Some(author_value) = toml_content.get(FIELD_AUTHOR) {
242        match author_value {
243            Value::Array(authors) => {
244                for author in authors.iter().take(MAX_ITERATION_COUNT) {
245                    push_author_party(author, &mut parties, &mut seen);
246                }
247            }
248            other => push_author_party(other, &mut parties, &mut seen),
249        }
250    }
251
252    parties
253}
254
255fn push_author_party(
256    value: &Value,
257    parties: &mut Vec<Party>,
258    seen: &mut std::collections::HashSet<String>,
259) {
260    let Some(author_str) = value.as_str() else {
261        return;
262    };
263
264    let author_name = truncate_field(author_str.trim().to_string());
265    if author_name.is_empty() || !seen.insert(author_name.clone()) {
266        return;
267    }
268
269    parties.push(Party {
270        r#type: None,
271        role: Some("author".to_string()),
272        name: Some(author_name),
273        email: None,
274        url: None,
275        organization: None,
276        organization_url: None,
277        timezone: None,
278    });
279}
280
281fn extract_project_dependencies(toml_content: &Value) -> Vec<Dependency> {
282    let mut dependencies = Vec::new();
283
284    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
285        Some(table) => table,
286        None => return dependencies,
287    };
288
289    let compat_table = toml_content.get(FIELD_COMPAT).and_then(|v| v.as_table());
290
291    for (dep_name, dep_value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
292        let uuid = dep_value.as_str().map(String::from);
293
294        let extracted_requirement = compat_table
295            .and_then(|ct| ct.get(dep_name))
296            .and_then(|v| v.as_str())
297            .map(|s| truncate_field(s.to_string()));
298
299        let is_pinned = extracted_requirement
300            .as_deref()
301            .is_some_and(is_julia_version_pinned);
302
303        let purl = match PackageUrl::new(PackageType::Julia.as_str(), dep_name) {
304            Ok(p) => truncate_field(p.to_string()),
305            Err(e) => {
306                warn!(
307                    "Failed to create PackageUrl for julia dependency '{}': {}",
308                    dep_name, e
309                );
310                continue;
311            }
312        };
313
314        let mut extra_data_map = std::collections::HashMap::new();
315        if let Some(ref uuid_val) = uuid {
316            extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
317        }
318
319        dependencies.push(Dependency {
320            purl: Some(purl),
321            extracted_requirement,
322            scope: Some("dependencies".to_string()),
323            is_runtime: Some(true),
324            is_optional: None,
325            is_pinned: Some(is_pinned),
326            is_direct: Some(true),
327            resolved_package: None,
328            extra_data: if extra_data_map.is_empty() {
329                None
330            } else {
331                Some(extra_data_map)
332            },
333        });
334    }
335
336    dependencies
337}
338
339fn extract_manifest_packages(toml_content: &Value) -> Vec<PackageData> {
340    let mut packages = Vec::new();
341
342    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
343        Some(table) => table,
344        None => return packages,
345    };
346
347    for (dep_name, dep_value) in deps_table.iter().take(MAX_ITERATION_COUNT) {
348        let dep_entries = match dep_value.as_array() {
349            Some(entries) => entries,
350            None => continue,
351        };
352
353        for dep_entry in dep_entries.iter().take(MAX_ITERATION_COUNT) {
354            let name = Some(truncate_field(dep_name.clone()));
355
356            let uuid = dep_entry
357                .get(FIELD_UUID)
358                .and_then(|v| v.as_str())
359                .map(String::from);
360
361            let version = dep_entry
362                .get(FIELD_VERSION)
363                .and_then(|v| v.as_str())
364                .map(|s| truncate_field(s.to_string()));
365
366            let purl = create_package_url(&name, &version);
367
368            let tree_hash = dep_entry
369                .get("git-tree-sha1")
370                .and_then(|v| v.as_str())
371                .map(String::from);
372
373            let source_url = dep_entry
374                .get("url")
375                .and_then(|v| v.as_str())
376                .map(|s| truncate_field(s.to_string()));
377
378            let mut extra_data_map = std::collections::HashMap::new();
379            if let Some(ref uuid_val) = uuid {
380                extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
381            }
382            if let Some(ref tree_hash_val) = tree_hash {
383                extra_data_map.insert("tree_hash".to_string(), serde_json::json!(tree_hash_val));
384            }
385            if let Some(ref source_url_val) = source_url {
386                extra_data_map.insert("url".to_string(), serde_json::json!(source_url_val));
387            }
388
389            packages.push(PackageData {
390                package_type: Some(PackageType::Julia),
391                namespace: None,
392                name,
393                version,
394                qualifiers: None,
395                subpath: None,
396                primary_language: Some("Julia".to_string()),
397                description: None,
398                release_date: None,
399                parties: Vec::new(),
400                keywords: Vec::new(),
401                homepage_url: None,
402                download_url: None,
403                size: None,
404                sha1: None,
405                md5: None,
406                sha256: None,
407                sha512: None,
408                bug_tracking_url: None,
409                code_view_url: None,
410                vcs_url: source_url,
411                copyright: None,
412                holder: None,
413                declared_license_expression: None,
414                declared_license_expression_spdx: None,
415                license_detections: Vec::new(),
416                other_license_expression: None,
417                other_license_expression_spdx: None,
418                other_license_detections: Vec::new(),
419                extracted_license_statement: None,
420                notice_text: None,
421                source_packages: Vec::new(),
422                file_references: Vec::new(),
423                is_private: false,
424                is_virtual: false,
425                extra_data: if extra_data_map.is_empty() {
426                    None
427                } else {
428                    Some(extra_data_map)
429                },
430                dependencies: Vec::new(),
431                repository_homepage_url: None,
432                repository_download_url: None,
433                api_data_url: None,
434                datasource_id: Some(DatasourceId::JuliaManifestToml),
435                purl,
436            });
437        }
438    }
439
440    packages
441}
442
443fn extract_project_extra_data(
444    toml_content: &Value,
445) -> Option<std::collections::HashMap<String, serde_json::Value>> {
446    use serde_json::json;
447    let mut extra_data = std::collections::HashMap::new();
448
449    if let Some(uuid) = toml_content.get(FIELD_UUID).and_then(|v| v.as_str()) {
450        extra_data.insert("uuid".to_string(), json!(uuid));
451    }
452
453    if let Some(targets) = toml_content.get(FIELD_TARGETS) {
454        extra_data.insert("targets".to_string(), toml_to_json(targets));
455    }
456
457    if let Some(compat) = toml_content.get(FIELD_COMPAT) {
458        extra_data.insert("compat".to_string(), toml_to_json(compat));
459    }
460
461    if let Some(deps) = toml_content.get(FIELD_DEPS) {
462        extra_data.insert("deps".to_string(), toml_to_json(deps));
463    }
464
465    if let Some(extras) = toml_content.get("extras") {
466        extra_data.insert("extras".to_string(), toml_to_json(extras));
467    }
468
469    if let Some(sources) = toml_content.get("sources") {
470        extra_data.insert("sources".to_string(), toml_to_json(sources));
471    }
472
473    if extra_data.is_empty() {
474        None
475    } else {
476        Some(extra_data)
477    }
478}
479
480fn toml_to_json(value: &toml::Value) -> serde_json::Value {
481    toml_to_json_inner(value, &mut RecursionGuard::depth_only())
482}
483
484fn toml_to_json_inner(value: &toml::Value, guard: &mut RecursionGuard<()>) -> serde_json::Value {
485    if guard.descend() {
486        warn!("Recursion depth exceeded in toml_to_json, returning Null");
487        return serde_json::Value::Null;
488    }
489
490    let result = match value {
491        toml::Value::String(s) => serde_json::json!(s),
492        toml::Value::Integer(i) => serde_json::json!(i),
493        toml::Value::Float(f) => serde_json::json!(f),
494        toml::Value::Boolean(b) => serde_json::json!(b),
495        toml::Value::Array(a) => {
496            serde_json::Value::Array(a.iter().map(|v| toml_to_json_inner(v, guard)).collect())
497        }
498        toml::Value::Table(t) => {
499            let map: serde_json::Map<String, serde_json::Value> = t
500                .iter()
501                .map(|(k, v)| (k.clone(), toml_to_json_inner(v, guard)))
502                .collect();
503            serde_json::Value::Object(map)
504        }
505        toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
506    };
507    guard.ascend();
508    result
509}
510
511fn default_project_package_data() -> PackageData {
512    PackageData {
513        package_type: Some(PackageType::Julia),
514        datasource_id: Some(DatasourceId::JuliaProjectToml),
515        ..Default::default()
516    }
517}
518
519fn is_julia_version_pinned(version_str: &str) -> bool {
520    let trimmed = version_str.trim();
521    if trimmed.is_empty() {
522        return false;
523    }
524    if trimmed.contains('^')
525        || trimmed.contains('~')
526        || trimmed.contains('>')
527        || trimmed.contains('<')
528        || trimmed.contains('*')
529    {
530        return false;
531    }
532    trimmed.matches('.').count() >= 2
533}
534
535crate::register_parser!(
536    "Julia Project.toml manifest",
537    &["**/Project.toml"],
538    "julia",
539    "Julia",
540    Some("https://pkgdocs.julialang.org/v1/toml-files/"),
541);
542
543crate::register_parser!(
544    "Julia Manifest.toml resolved dependencies",
545    &["**/Manifest.toml"],
546    "julia",
547    "Julia",
548    Some("https://pkgdocs.julialang.org/v1/toml-files/"),
549);