Skip to main content

provenant/parsers/
julia.rs

1//! Parser for Julia Project.toml and Manifest.toml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Julia package manager (Pkg.jl) manifest files.
5//!
6//! # Supported Formats
7//! - Project.toml (package metadata)
8//! - Manifest.toml (resolved dependency tree)
9//!
10//! # Key Features
11//! - Dependency extraction with UUID tracking
12//! - `is_pinned` analysis based on Manifest.toml resolved versions
13//! - Package URL (purl) generation
14//! - Compat section version constraint extraction
15//!
16//! # Implementation Notes
17//! - Uses toml crate for parsing
18//! - Julia packages are identified by UUID
19//! - Project.toml `[deps]` lists direct dependencies by name → UUID
20//! - Manifest.toml `[[deps]]` entries contain resolved version + tree SHA
21
22use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
23use crate::parser_warn as warn;
24use packageurl::PackageUrl;
25use std::fs::File;
26use std::io::Read;
27use std::path::Path;
28use toml::Value;
29
30use super::PackageParser;
31use super::license_normalization::{
32    DeclaredLicenseMatchMetadata, build_declared_license_data, empty_declared_license_data,
33    normalize_spdx_expression,
34};
35
36const FIELD_NAME: &str = "name";
37const FIELD_UUID: &str = "uuid";
38const FIELD_VERSION: &str = "version";
39const FIELD_LICENSE: &str = "license";
40const FIELD_AUTHORS: &str = "authors";
41const FIELD_REPOSITORY: &str = "repository";
42const FIELD_DEPS: &str = "deps";
43const FIELD_COMPAT: &str = "compat";
44const FIELD_TARGETS: &str = "targets";
45const FIELD_HOMEPAGE: &str = "homepage";
46
47pub struct JuliaProjectTomlParser;
48
49impl PackageParser for JuliaProjectTomlParser {
50    const PACKAGE_TYPE: PackageType = PackageType::Julia;
51
52    fn extract_packages(path: &Path) -> Vec<PackageData> {
53        let toml_content = match read_julia_toml(path) {
54            Ok(content) => content,
55            Err(e) => {
56                warn!("Failed to read or parse Project.toml at {:?}: {}", path, e);
57                return vec![default_project_package_data()];
58            }
59        };
60
61        let name = toml_content
62            .get(FIELD_NAME)
63            .and_then(|v| v.as_str())
64            .map(String::from);
65
66        let _uuid = toml_content
67            .get(FIELD_UUID)
68            .and_then(|v| v.as_str())
69            .map(String::from);
70
71        let version = toml_content
72            .get(FIELD_VERSION)
73            .and_then(|v| v.as_str())
74            .map(String::from);
75
76        let raw_license = toml_content
77            .get(FIELD_LICENSE)
78            .and_then(|v| v.as_str())
79            .map(String::from);
80
81        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
82            raw_license
83                .as_deref()
84                .and_then(normalize_spdx_expression)
85                .map(|normalized| {
86                    build_declared_license_data(
87                        normalized,
88                        DeclaredLicenseMatchMetadata::single_line(
89                            raw_license.as_deref().unwrap_or_default(),
90                        ),
91                    )
92                })
93                .unwrap_or_else(empty_declared_license_data);
94
95        let extracted_license_statement = raw_license.clone();
96
97        let dependencies = extract_project_dependencies(&toml_content);
98
99        let purl = create_package_url(&name, &version);
100
101        let repository_url = toml_content
102            .get(FIELD_REPOSITORY)
103            .and_then(|v| v.as_str())
104            .map(String::from);
105
106        let homepage_url = toml_content
107            .get(FIELD_HOMEPAGE)
108            .and_then(|v| v.as_str())
109            .map(String::from);
110
111        let description = None;
112
113        let extra_data = extract_project_extra_data(&toml_content);
114
115        let is_private = false;
116
117        vec![PackageData {
118            package_type: Some(Self::PACKAGE_TYPE),
119            namespace: None,
120            name,
121            version,
122            qualifiers: None,
123            subpath: None,
124            primary_language: Some("Julia".to_string()),
125            description,
126            release_date: None,
127            parties: extract_parties(&toml_content),
128            keywords: Vec::new(),
129            homepage_url,
130            download_url: None,
131            size: None,
132            sha1: None,
133            md5: None,
134            sha256: None,
135            sha512: None,
136            bug_tracking_url: None,
137            code_view_url: None,
138            vcs_url: repository_url,
139            copyright: None,
140            holder: None,
141            declared_license_expression,
142            declared_license_expression_spdx,
143            license_detections,
144            other_license_expression: None,
145            other_license_expression_spdx: None,
146            other_license_detections: Vec::new(),
147            extracted_license_statement,
148            notice_text: None,
149            source_packages: Vec::new(),
150            file_references: Vec::new(),
151            is_private,
152            is_virtual: false,
153            extra_data,
154            dependencies,
155            repository_homepage_url: None,
156            repository_download_url: None,
157            api_data_url: None,
158            datasource_id: Some(DatasourceId::JuliaProjectToml),
159            purl,
160        }]
161    }
162
163    fn is_match(path: &Path) -> bool {
164        path.file_name()
165            .and_then(|name| name.to_str())
166            .is_some_and(|name| name.eq_ignore_ascii_case("Project.toml"))
167    }
168}
169
170pub struct JuliaManifestTomlParser;
171
172impl PackageParser for JuliaManifestTomlParser {
173    const PACKAGE_TYPE: PackageType = PackageType::Julia;
174
175    fn extract_packages(path: &Path) -> Vec<PackageData> {
176        let toml_content = match read_julia_toml(path) {
177            Ok(content) => content,
178            Err(e) => {
179                warn!("Failed to read or parse Manifest.toml at {:?}: {}", path, e);
180                return vec![];
181            }
182        };
183
184        extract_manifest_packages(&toml_content)
185    }
186
187    fn is_match(path: &Path) -> bool {
188        path.file_name()
189            .and_then(|name| name.to_str())
190            .is_some_and(|name| name.eq_ignore_ascii_case("Manifest.toml"))
191    }
192}
193
194fn read_julia_toml(path: &Path) -> Result<Value, String> {
195    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
196    let mut content = String::new();
197    file.read_to_string(&mut content)
198        .map_err(|e| format!("Error reading file: {}", e))?;
199    toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
200}
201
202fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
203    name.as_ref().and_then(|name| {
204        let mut package_url = match PackageUrl::new(PackageType::Julia.as_str(), name) {
205            Ok(p) => p,
206            Err(e) => {
207                warn!(
208                    "Failed to create PackageUrl for julia package '{}': {}",
209                    name, e
210                );
211                return None;
212            }
213        };
214
215        if let Some(v) = version
216            && let Err(e) = package_url.with_version(v)
217        {
218            warn!(
219                "Failed to set version '{}' for julia package '{}': {}",
220                v, name, e
221            );
222            return None;
223        }
224
225        Some(package_url.to_string())
226    })
227}
228
229fn extract_parties(toml_content: &Value) -> Vec<Party> {
230    let mut parties = Vec::new();
231
232    if let Some(authors) = toml_content.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
233        for author in authors {
234            if let Some(author_str) = author.as_str() {
235                parties.push(Party {
236                    r#type: None,
237                    role: Some("author".to_string()),
238                    name: Some(author_str.trim().to_string()),
239                    email: None,
240                    url: None,
241                    organization: None,
242                    organization_url: None,
243                    timezone: None,
244                });
245            }
246        }
247    }
248
249    parties
250}
251
252fn extract_project_dependencies(toml_content: &Value) -> Vec<Dependency> {
253    let mut dependencies = Vec::new();
254
255    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
256        Some(table) => table,
257        None => return dependencies,
258    };
259
260    let compat_table = toml_content.get(FIELD_COMPAT).and_then(|v| v.as_table());
261
262    for (dep_name, dep_value) in deps_table {
263        let uuid = dep_value.as_str().map(String::from);
264
265        let extracted_requirement = compat_table
266            .and_then(|ct| ct.get(dep_name))
267            .and_then(|v| v.as_str())
268            .map(String::from);
269
270        let is_pinned = extracted_requirement
271            .as_deref()
272            .is_some_and(is_julia_version_pinned);
273
274        let purl = match PackageUrl::new(PackageType::Julia.as_str(), dep_name) {
275            Ok(p) => p.to_string(),
276            Err(e) => {
277                warn!(
278                    "Failed to create PackageUrl for julia dependency '{}': {}",
279                    dep_name, e
280                );
281                continue;
282            }
283        };
284
285        let mut extra_data_map = std::collections::HashMap::new();
286        if let Some(ref uuid_val) = uuid {
287            extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
288        }
289
290        dependencies.push(Dependency {
291            purl: Some(purl),
292            extracted_requirement,
293            scope: Some("dependencies".to_string()),
294            is_runtime: Some(true),
295            is_optional: None,
296            is_pinned: Some(is_pinned),
297            is_direct: Some(true),
298            resolved_package: None,
299            extra_data: if extra_data_map.is_empty() {
300                None
301            } else {
302                Some(extra_data_map)
303            },
304        });
305    }
306
307    dependencies
308}
309
310fn extract_manifest_packages(toml_content: &Value) -> Vec<PackageData> {
311    let mut packages = Vec::new();
312
313    let deps_table = match toml_content.get(FIELD_DEPS).and_then(|v| v.as_table()) {
314        Some(table) => table,
315        None => return packages,
316    };
317
318    for (dep_name, dep_value) in deps_table {
319        let dep_entries = match dep_value.as_array() {
320            Some(entries) => entries,
321            None => continue,
322        };
323
324        for dep_entry in dep_entries {
325            let name = Some(dep_name.clone());
326
327            let uuid = dep_entry
328                .get(FIELD_UUID)
329                .and_then(|v| v.as_str())
330                .map(String::from);
331
332            let version = dep_entry
333                .get(FIELD_VERSION)
334                .and_then(|v| v.as_str())
335                .map(String::from);
336
337            let purl = create_package_url(&name, &version);
338
339            let tree_hash = dep_entry
340                .get("git-tree-sha1")
341                .and_then(|v| v.as_str())
342                .map(String::from);
343
344            let source_url = dep_entry
345                .get("url")
346                .and_then(|v| v.as_str())
347                .map(String::from);
348
349            let mut extra_data_map = std::collections::HashMap::new();
350            if let Some(ref uuid_val) = uuid {
351                extra_data_map.insert("uuid".to_string(), serde_json::json!(uuid_val));
352            }
353            if let Some(ref tree_hash_val) = tree_hash {
354                extra_data_map.insert("tree_hash".to_string(), serde_json::json!(tree_hash_val));
355            }
356            if let Some(ref source_url_val) = source_url {
357                extra_data_map.insert("url".to_string(), serde_json::json!(source_url_val));
358            }
359
360            packages.push(PackageData {
361                package_type: Some(PackageType::Julia),
362                namespace: None,
363                name,
364                version,
365                qualifiers: None,
366                subpath: None,
367                primary_language: Some("Julia".to_string()),
368                description: None,
369                release_date: None,
370                parties: Vec::new(),
371                keywords: Vec::new(),
372                homepage_url: None,
373                download_url: None,
374                size: None,
375                sha1: None,
376                md5: None,
377                sha256: None,
378                sha512: None,
379                bug_tracking_url: None,
380                code_view_url: None,
381                vcs_url: source_url,
382                copyright: None,
383                holder: None,
384                declared_license_expression: None,
385                declared_license_expression_spdx: None,
386                license_detections: Vec::new(),
387                other_license_expression: None,
388                other_license_expression_spdx: None,
389                other_license_detections: Vec::new(),
390                extracted_license_statement: None,
391                notice_text: None,
392                source_packages: Vec::new(),
393                file_references: Vec::new(),
394                is_private: false,
395                is_virtual: false,
396                extra_data: if extra_data_map.is_empty() {
397                    None
398                } else {
399                    Some(extra_data_map)
400                },
401                dependencies: Vec::new(),
402                repository_homepage_url: None,
403                repository_download_url: None,
404                api_data_url: None,
405                datasource_id: Some(DatasourceId::JuliaManifestToml),
406                purl,
407            });
408        }
409    }
410
411    packages
412}
413
414fn extract_project_extra_data(
415    toml_content: &Value,
416) -> Option<std::collections::HashMap<String, serde_json::Value>> {
417    use serde_json::json;
418    let mut extra_data = std::collections::HashMap::new();
419
420    if let Some(uuid) = toml_content.get(FIELD_UUID).and_then(|v| v.as_str()) {
421        extra_data.insert("uuid".to_string(), json!(uuid));
422    }
423
424    if let Some(targets) = toml_content.get(FIELD_TARGETS) {
425        extra_data.insert("targets".to_string(), toml_to_json(targets));
426    }
427
428    if let Some(compat) = toml_content.get(FIELD_COMPAT) {
429        extra_data.insert("compat".to_string(), toml_to_json(compat));
430    }
431
432    if let Some(deps) = toml_content.get(FIELD_DEPS) {
433        extra_data.insert("deps".to_string(), toml_to_json(deps));
434    }
435
436    if let Some(extras) = toml_content.get("extras") {
437        extra_data.insert("extras".to_string(), toml_to_json(extras));
438    }
439
440    if let Some(sources) = toml_content.get("sources") {
441        extra_data.insert("sources".to_string(), toml_to_json(sources));
442    }
443
444    if extra_data.is_empty() {
445        None
446    } else {
447        Some(extra_data)
448    }
449}
450
451fn toml_to_json(value: &toml::Value) -> serde_json::Value {
452    match value {
453        toml::Value::String(s) => serde_json::json!(s),
454        toml::Value::Integer(i) => serde_json::json!(i),
455        toml::Value::Float(f) => serde_json::json!(f),
456        toml::Value::Boolean(b) => serde_json::json!(b),
457        toml::Value::Array(a) => serde_json::Value::Array(a.iter().map(toml_to_json).collect()),
458        toml::Value::Table(t) => {
459            let map: serde_json::Map<String, serde_json::Value> = t
460                .iter()
461                .map(|(k, v)| (k.clone(), toml_to_json(v)))
462                .collect();
463            serde_json::Value::Object(map)
464        }
465        toml::Value::Datetime(d) => serde_json::json!(d.to_string()),
466    }
467}
468
469fn default_project_package_data() -> PackageData {
470    PackageData {
471        package_type: Some(PackageType::Julia),
472        datasource_id: Some(DatasourceId::JuliaProjectToml),
473        ..Default::default()
474    }
475}
476
477fn is_julia_version_pinned(version_str: &str) -> bool {
478    let trimmed = version_str.trim();
479    if trimmed.is_empty() {
480        return false;
481    }
482    if trimmed.contains('^')
483        || trimmed.contains('~')
484        || trimmed.contains('>')
485        || trimmed.contains('<')
486        || trimmed.contains('*')
487    {
488        return false;
489    }
490    trimmed.matches('.').count() >= 2
491}
492
493crate::register_parser!(
494    "Julia Project.toml manifest",
495    &["**/Project.toml"],
496    "julia",
497    "Julia",
498    Some("https://pkgdocs.julialang.org/v1/toml-files/"),
499);
500
501crate::register_parser!(
502    "Julia Manifest.toml resolved dependencies",
503    &["**/Manifest.toml"],
504    "julia",
505    "Julia",
506    Some("https://pkgdocs.julialang.org/v1/toml-files/"),
507);