Skip to main content

provenant/parsers/
gradle_module.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9use packageurl::PackageUrl;
10use serde_json::{Map as JsonMap, Value};
11
12use crate::models::{
13    DatasourceId, Dependency, FileReference, Md5Digest, PackageData, PackageType, Sha1Digest,
14    Sha256Digest, Sha512Digest,
15};
16
17use super::PackageParser;
18
19const FIELD_FORMAT_VERSION: &str = "formatVersion";
20const FIELD_COMPONENT: &str = "component";
21const FIELD_CREATED_BY: &str = "createdBy";
22const FIELD_VARIANTS: &str = "variants";
23const FIELD_ATTRIBUTES: &str = "attributes";
24const FIELD_DEPENDENCIES: &str = "dependencies";
25const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
26const FIELD_FILES: &str = "files";
27const FIELD_AVAILABLE_AT: &str = "available-at";
28
29type ArtifactHashes = (
30    Option<u64>,
31    Option<Sha1Digest>,
32    Option<Md5Digest>,
33    Option<Sha256Digest>,
34    Option<Sha512Digest>,
35);
36
37type ExtractedVariantData = (
38    Vec<Dependency>,
39    Vec<FileReference>,
40    Option<JsonMap<String, Value>>,
41    Vec<Value>,
42);
43
44pub struct GradleModuleParser;
45
46#[derive(Clone, Debug, Default)]
47struct ExtractedDependency {
48    purl: Option<String>,
49    extracted_requirement: Option<String>,
50    scope: Option<String>,
51    is_runtime: Option<bool>,
52    is_optional: Option<bool>,
53    is_pinned: Option<bool>,
54    extra_data: Option<HashMap<String, Value>>,
55    precedence: u8,
56}
57
58impl PackageParser for GradleModuleParser {
59    const PACKAGE_TYPE: PackageType = PackageType::Maven;
60
61    fn is_match(path: &Path) -> bool {
62        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
63            return false;
64        }
65
66        let Ok(content) = read_file_to_string(path, None) else {
67            return false;
68        };
69
70        let Ok(value) = serde_json::from_str(&content) else {
71            return false;
72        };
73
74        is_gradle_module_json(&value)
75    }
76
77    fn extract_packages(path: &Path) -> Vec<PackageData> {
78        let content = match read_file_to_string(path, None) {
79            Ok(content) => content,
80            Err(e) => {
81                warn!("Failed to read Gradle module file at {:?}: {}", path, e);
82                return vec![default_package_data()];
83            }
84        };
85
86        let json: Value = match serde_json::from_str(&content) {
87            Ok(json) => json,
88            Err(e) => {
89                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
90                return vec![default_package_data()];
91            }
92        };
93
94        if !is_gradle_module_json(&json) {
95            warn!("File at {:?} is not valid Gradle module metadata", path);
96            return vec![default_package_data()];
97        }
98
99        vec![parse_gradle_module(&json)]
100    }
101}
102
103fn is_gradle_module_json(json: &Value) -> bool {
104    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
105        return false;
106    };
107
108    json.get(FIELD_FORMAT_VERSION)
109        .and_then(Value::as_str)
110        .is_some()
111        && component.get("group").and_then(Value::as_str).is_some()
112        && component.get("module").and_then(Value::as_str).is_some()
113        && component.get("version").and_then(Value::as_str).is_some()
114}
115
116fn parse_gradle_module(json: &Value) -> PackageData {
117    let component = json
118        .get(FIELD_COMPONENT)
119        .and_then(Value::as_object)
120        .cloned()
121        .unwrap_or_default();
122
123    let namespace = component
124        .get("group")
125        .and_then(Value::as_str)
126        .map(|value| truncate_field(value.to_string()));
127    let name = component
128        .get("module")
129        .and_then(Value::as_str)
130        .map(|value| truncate_field(value.to_string()));
131    let version = component
132        .get("version")
133        .and_then(Value::as_str)
134        .map(|value| truncate_field(value.to_string()));
135
136    let (dependencies, file_references, top_level_artifact, variant_metadata) =
137        extract_variant_data(json.get(FIELD_VARIANTS).and_then(Value::as_array));
138
139    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
140        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
141        _ => None,
142    };
143
144    let mut extra_data = HashMap::new();
145    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
146        extra_data.insert(
147            "format_version".to_string(),
148            Value::String(truncate_field(format_version.to_string())),
149        );
150    }
151
152    if let Some(gradle_object) = json
153        .get(FIELD_CREATED_BY)
154        .and_then(Value::as_object)
155        .and_then(|created_by| created_by.get("gradle"))
156        .and_then(Value::as_object)
157    {
158        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
159            extra_data.insert(
160                "gradle_version".to_string(),
161                Value::String(truncate_field(gradle_version.to_string())),
162            );
163        }
164        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
165            extra_data.insert(
166                "build_id".to_string(),
167                Value::String(truncate_field(build_id.to_string())),
168            );
169        }
170    }
171
172    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
173        && !attributes.is_empty()
174    {
175        extra_data.insert(
176            "component_attributes".to_string(),
177            Value::Object(attributes.clone()),
178        );
179    }
180
181    if !variant_metadata.is_empty() {
182        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
183    }
184
185    let (size, sha1, md5, sha256, sha512) = top_level_artifact
186        .as_ref()
187        .map(extract_file_hashes)
188        .unwrap_or((None, None, None, None, None));
189
190    PackageData {
191        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
192        namespace,
193        name,
194        version,
195        qualifiers: None,
196        subpath: None,
197        primary_language: Some("Java".to_string()),
198        description: None,
199        release_date: None,
200        parties: Vec::new(),
201        keywords: Vec::new(),
202        homepage_url: None,
203        download_url: None,
204        size,
205        sha1,
206        md5,
207        sha256,
208        sha512,
209        bug_tracking_url: None,
210        code_view_url: None,
211        vcs_url: None,
212        copyright: None,
213        holder: None,
214        declared_license_expression: None,
215        declared_license_expression_spdx: None,
216        license_detections: Vec::new(),
217        other_license_expression: None,
218        other_license_expression_spdx: None,
219        other_license_detections: Vec::new(),
220        extracted_license_statement: None,
221        notice_text: None,
222        source_packages: Vec::new(),
223        file_references,
224        is_private: false,
225        is_virtual: false,
226        extra_data: (!extra_data.is_empty()).then_some(extra_data),
227        dependencies,
228        repository_homepage_url: None,
229        repository_download_url: None,
230        api_data_url: None,
231        datasource_id: Some(DatasourceId::GradleModule),
232        purl,
233    }
234}
235
236fn extract_variant_data(variants: Option<&Vec<Value>>) -> ExtractedVariantData {
237    let mut dependencies = Vec::new();
238    let mut file_references = Vec::new();
239    let mut variant_metadata = Vec::new();
240    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
241        HashMap::new();
242    let mut seen_files: HashSet<String> = HashSet::new();
243    let mut top_level_artifact: Option<JsonMap<String, Value>> = None;
244
245    for variant in variants
246        .into_iter()
247        .flatten()
248        .filter_map(Value::as_object)
249        .take(MAX_ITERATION_COUNT)
250    {
251        let category = variant
252            .get(FIELD_ATTRIBUTES)
253            .and_then(Value::as_object)
254            .and_then(|attrs| attrs.get("org.gradle.category"))
255            .and_then(Value::as_str)
256            .unwrap_or_default();
257        let is_documentation = category == "documentation";
258
259        let variant_name = truncate_field(
260            variant
261                .get("name")
262                .and_then(Value::as_str)
263                .unwrap_or_default()
264                .to_string(),
265        );
266        let scope = classify_variant_scope(variant);
267        let precedence = scope_precedence(scope.as_deref());
268        let is_runtime = match scope.as_deref() {
269            Some("compile") | Some("runtime") => Some(true),
270            Some("test") => Some(false),
271            _ => None,
272        };
273        let is_optional = None;
274
275        let mut variant_entry = JsonMap::new();
276        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
277        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
278            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
279        }
280        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
281            variant_entry.insert("available_at".to_string(), available_at.clone());
282        }
283        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
284            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
285        }
286        variant_metadata.push(Value::Object(variant_entry));
287
288        if !is_documentation {
289            if top_level_artifact.is_none() {
290                top_level_artifact = variant
291                    .get(FIELD_FILES)
292                    .and_then(Value::as_array)
293                    .and_then(|files| files.first())
294                    .and_then(Value::as_object)
295                    .cloned();
296            }
297
298            if let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array) {
299                for file in files
300                    .iter()
301                    .filter_map(Value::as_object)
302                    .take(MAX_ITERATION_COUNT)
303                {
304                    let file_path = truncate_field(
305                        file.get("url")
306                            .and_then(Value::as_str)
307                            .or_else(|| file.get("name").and_then(Value::as_str))
308                            .unwrap_or_default()
309                            .to_string(),
310                    );
311                    if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
312                        continue;
313                    }
314                    let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
315                    let mut extra_data = HashMap::new();
316                    if let Some(name) = file.get("name").and_then(Value::as_str) {
317                        extra_data.insert(
318                            "name".to_string(),
319                            Value::String(truncate_field(name.to_string())),
320                        );
321                    }
322                    file_references.push(FileReference {
323                        path: file_path,
324                        size,
325                        sha1,
326                        md5,
327                        sha256,
328                        sha512,
329                        extra_data: (!extra_data.is_empty()).then_some(extra_data),
330                    });
331                }
332            }
333        }
334
335        if is_documentation {
336            continue;
337        }
338
339        for dependency in variant
340            .get(FIELD_DEPENDENCIES)
341            .and_then(Value::as_array)
342            .into_iter()
343            .flatten()
344            .filter_map(Value::as_object)
345            .take(MAX_ITERATION_COUNT)
346        {
347            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
348                continue;
349            };
350            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
351                continue;
352            };
353
354            let requirement = extract_dependency_requirement(dependency.get("version"));
355            let key = (group.to_string(), module.to_string(), requirement.clone());
356            let purl = build_maven_purl(group, module, requirement.as_deref());
357            let dep_extra_data =
358                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
359
360            let entry = seen_dependencies.entry(key).or_default();
361            if precedence < entry.precedence || entry.scope.is_none() {
362                entry.scope = scope.clone();
363                entry.is_runtime = is_runtime;
364                entry.is_optional = is_optional;
365                entry.precedence = precedence;
366            }
367            entry.purl = purl.map(truncate_field);
368            entry.extracted_requirement = requirement.clone();
369            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
370            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
371        }
372    }
373
374    for dep in seen_dependencies.into_values() {
375        dependencies.push(Dependency {
376            purl: dep.purl,
377            extracted_requirement: dep.extracted_requirement,
378            scope: dep.scope,
379            is_runtime: dep.is_runtime,
380            is_optional: dep.is_optional,
381            is_pinned: dep.is_pinned,
382            is_direct: Some(true),
383            resolved_package: None,
384            extra_data: dep.extra_data,
385        });
386    }
387
388    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
389    file_references.sort_by(|left, right| left.path.cmp(&right.path));
390
391    (
392        dependencies,
393        file_references,
394        top_level_artifact,
395        variant_metadata,
396    )
397}
398
399fn build_dependency_extra_data(
400    dependency: &JsonMap<String, Value>,
401    variant_name: &str,
402    scope: Option<&str>,
403) -> Option<HashMap<String, Value>> {
404    let mut extra = HashMap::new();
405    extra.insert(
406        "variant_names".to_string(),
407        Value::Array(vec![Value::String(variant_name.to_string())]),
408    );
409    if let Some(scope) = scope {
410        extra.insert(
411            "variant_scopes".to_string(),
412            Value::Array(vec![Value::String(scope.to_string())]),
413        );
414    }
415
416    let mut variant_entry = JsonMap::new();
417    variant_entry.insert(
418        "variant_name".to_string(),
419        Value::String(variant_name.to_string()),
420    );
421    if let Some(scope) = scope {
422        variant_entry.insert(
423            "variant_scope".to_string(),
424            Value::String(scope.to_string()),
425        );
426    }
427    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
428    extra.insert(
429        "variant_dependency_entries".to_string(),
430        Value::Array(vec![Value::Object(variant_entry)]),
431    );
432
433    for field in [
434        FIELD_ATTRIBUTES,
435        "reason",
436        "requestedCapabilities",
437        "excludes",
438        "endorseStrictVersions",
439        "thirdPartyCompatibility",
440        "version",
441    ] {
442        if let Some(value) = dependency.get(field) {
443            extra.insert(field.to_string(), value.clone());
444        }
445    }
446    (!extra.is_empty()).then_some(extra)
447}
448
449fn merge_dependency_extra_data(
450    current: Option<HashMap<String, Value>>,
451    next: Option<HashMap<String, Value>>,
452) -> Option<HashMap<String, Value>> {
453    match (current, next) {
454        (None, None) => None,
455        (Some(map), None) | (None, Some(map)) => Some(map),
456        (Some(mut current), Some(mut next)) => {
457            merge_string_arrays(&mut current, &mut next, "variant_names");
458            merge_string_arrays(&mut current, &mut next, "variant_scopes");
459            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
460            for (key, value) in next {
461                current.entry(key).or_insert(value);
462            }
463            Some(current)
464        }
465    }
466}
467
468fn merge_object_arrays(
469    current: &mut HashMap<String, Value>,
470    next: &mut HashMap<String, Value>,
471    key: &str,
472) {
473    let existing = current
474        .remove(key)
475        .and_then(|value| value.as_array().cloned());
476    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
477
478    let mut values = Vec::new();
479    for array in [existing, incoming].into_iter().flatten() {
480        for value in array {
481            if !values.contains(&value) {
482                values.push(value);
483            }
484        }
485    }
486
487    if !values.is_empty() {
488        current.insert(key.to_string(), Value::Array(values));
489    }
490}
491
492fn merge_string_arrays(
493    current: &mut HashMap<String, Value>,
494    next: &mut HashMap<String, Value>,
495    key: &str,
496) {
497    let existing = current
498        .remove(key)
499        .and_then(|value| value.as_array().cloned());
500    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
501
502    let mut values = Vec::new();
503    for array in [existing, incoming].into_iter().flatten() {
504        for value in array
505            .into_iter()
506            .filter_map(|value| value.as_str().map(|s| s.to_string()))
507        {
508            if !values.contains(&value) {
509                values.push(value);
510            }
511        }
512    }
513
514    if !values.is_empty() {
515        current.insert(
516            key.to_string(),
517            Value::Array(values.into_iter().map(Value::String).collect()),
518        );
519    }
520}
521
522fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
523    let attributes = variant.get(FIELD_ATTRIBUTES).and_then(Value::as_object);
524
525    let category = attributes
526        .and_then(|attributes| attributes.get("org.gradle.category"))
527        .and_then(Value::as_str)
528        .unwrap_or_default()
529        .to_ascii_lowercase();
530
531    if category == "verification" {
532        return Some("test".to_string());
533    }
534
535    let usage = attributes
536        .and_then(|attributes| attributes.get("org.gradle.usage"))
537        .and_then(Value::as_str)
538        .unwrap_or_default()
539        .to_ascii_lowercase();
540
541    if usage.contains("api") {
542        return Some("compile".to_string());
543    }
544    if usage.contains("runtime") {
545        return Some("runtime".to_string());
546    }
547
548    None
549}
550
551fn scope_precedence(scope: Option<&str>) -> u8 {
552    match scope {
553        Some("compile") => 0,
554        Some("runtime") => 1,
555        Some("test") => 2,
556        _ => 3,
557    }
558}
559
560fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
561    match version_value {
562        Some(Value::String(version)) => Some(truncate_field(version.to_string())),
563        Some(Value::Object(version)) => version
564            .get("strictly")
565            .or_else(|| version.get("requires"))
566            .or_else(|| version.get("prefers"))
567            .and_then(Value::as_str)
568            .map(|value| truncate_field(value.to_string())),
569        _ => None,
570    }
571}
572
573fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
574    let sha256 = file
575        .get("sha256")
576        .and_then(Value::as_str)
577        .and_then(|value| Sha256Digest::from_hex(value).ok());
578
579    let sha512_field = file.get("sha512").and_then(Value::as_str);
580    let (sha256, sha512) = match sha512_field {
581        Some(hex) if hex.len() == 64 && hex::decode(hex).is_ok() => {
582            let misassigned = Sha256Digest::from_hex(hex).ok();
583            (sha256.or(misassigned), None)
584        }
585        Some(hex) => (sha256, Sha512Digest::from_hex(hex).ok()),
586        None => (sha256, None),
587    };
588
589    (
590        file.get("size").and_then(Value::as_u64),
591        file.get("sha1")
592            .and_then(Value::as_str)
593            .and_then(|value| Sha1Digest::from_hex(value).ok()),
594        file.get("md5")
595            .and_then(Value::as_str)
596            .and_then(|value| Md5Digest::from_hex(value).ok()),
597        sha256,
598        sha512,
599    )
600}
601
602fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
603    let mut purl = PackageUrl::new("maven", name).ok()?;
604    if !namespace.trim().is_empty() {
605        purl.with_namespace(namespace).ok()?;
606    }
607    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
608        purl.with_version(version).ok()?;
609    }
610    Some(purl.to_string())
611}
612
613fn is_exact_version(version: &str) -> bool {
614    !version.contains('[')
615        && !version.contains(']')
616        && !version.contains('(')
617        && !version.contains(')')
618        && !version.contains(',')
619        && !version.contains('+')
620        && !version.contains('*')
621        && !version.contains('>')
622        && !version.contains('<')
623        && !version.contains(' ')
624}
625
626fn default_package_data() -> PackageData {
627    PackageData {
628        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
629        datasource_id: Some(DatasourceId::GradleModule),
630        ..Default::default()
631    }
632}
633
634crate::register_parser!(
635    "Gradle module metadata",
636    &["**/*.module"],
637    "maven",
638    "Java",
639    Some("https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html"),
640);