Skip to main content

provenant/parsers/
gradle_module.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9use packageurl::PackageUrl;
10use serde_json::{Map as JsonMap, Value};
11
12use crate::models::{
13    DatasourceId, Dependency, FileReference, Md5Digest, PackageData, PackageType, Sha1Digest,
14    Sha256Digest, Sha512Digest,
15};
16
17use super::PackageParser;
18
19const FIELD_FORMAT_VERSION: &str = "formatVersion";
20const FIELD_COMPONENT: &str = "component";
21const FIELD_CREATED_BY: &str = "createdBy";
22const FIELD_VARIANTS: &str = "variants";
23const FIELD_ATTRIBUTES: &str = "attributes";
24const FIELD_DEPENDENCIES: &str = "dependencies";
25const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
26const FIELD_FILES: &str = "files";
27const FIELD_AVAILABLE_AT: &str = "available-at";
28
29type ArtifactHashes = (
30    Option<u64>,
31    Option<Sha1Digest>,
32    Option<Md5Digest>,
33    Option<Sha256Digest>,
34    Option<Sha512Digest>,
35);
36
37type ExtractedVariantData = (
38    Vec<Dependency>,
39    Vec<FileReference>,
40    Option<JsonMap<String, Value>>,
41    Vec<Value>,
42);
43
44pub struct GradleModuleParser;
45
46#[derive(Clone, Debug, Default)]
47struct ExtractedDependency {
48    purl: Option<String>,
49    extracted_requirement: Option<String>,
50    scope: Option<String>,
51    is_runtime: Option<bool>,
52    is_optional: Option<bool>,
53    is_pinned: Option<bool>,
54    extra_data: Option<HashMap<String, Value>>,
55    precedence: u8,
56}
57
58impl PackageParser for GradleModuleParser {
59    const PACKAGE_TYPE: PackageType = PackageType::Maven;
60
61    fn is_match(path: &Path) -> bool {
62        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
63            return false;
64        }
65
66        let Ok(content) = read_file_to_string(path, None) else {
67            return false;
68        };
69
70        let Ok(value) = serde_json::from_str(&content) else {
71            return false;
72        };
73
74        is_gradle_module_json(&value)
75    }
76
77    fn extract_packages(path: &Path) -> Vec<PackageData> {
78        let content = match read_file_to_string(path, None) {
79            Ok(content) => content,
80            Err(e) => {
81                warn!("Failed to read Gradle module file at {:?}: {}", path, e);
82                return vec![default_package_data()];
83            }
84        };
85
86        let json: Value = match serde_json::from_str(&content) {
87            Ok(json) => json,
88            Err(e) => {
89                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
90                return vec![default_package_data()];
91            }
92        };
93
94        if !is_gradle_module_json(&json) {
95            warn!("File at {:?} is not valid Gradle module metadata", path);
96            return vec![default_package_data()];
97        }
98
99        vec![parse_gradle_module(&json)]
100    }
101}
102
103fn is_gradle_module_json(json: &Value) -> bool {
104    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
105        return false;
106    };
107
108    json.get(FIELD_FORMAT_VERSION)
109        .and_then(Value::as_str)
110        .is_some()
111        && component.get("group").and_then(Value::as_str).is_some()
112        && component.get("module").and_then(Value::as_str).is_some()
113        && component.get("version").and_then(Value::as_str).is_some()
114}
115
116fn parse_gradle_module(json: &Value) -> PackageData {
117    let component = json
118        .get(FIELD_COMPONENT)
119        .and_then(Value::as_object)
120        .cloned()
121        .unwrap_or_default();
122
123    let namespace = component
124        .get("group")
125        .and_then(Value::as_str)
126        .map(|value| truncate_field(value.to_string()));
127    let name = component
128        .get("module")
129        .and_then(Value::as_str)
130        .map(|value| truncate_field(value.to_string()));
131    let version = component
132        .get("version")
133        .and_then(Value::as_str)
134        .map(|value| truncate_field(value.to_string()));
135
136    let (dependencies, file_references, top_level_artifact, variant_metadata) =
137        extract_variant_data(
138            json.get(FIELD_VARIANTS).and_then(Value::as_array),
139            name.as_deref(),
140            version.as_deref(),
141        );
142
143    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
144        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
145        _ => None,
146    };
147
148    let mut extra_data = HashMap::new();
149    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
150        extra_data.insert(
151            "format_version".to_string(),
152            Value::String(truncate_field(format_version.to_string())),
153        );
154    }
155
156    if let Some(gradle_object) = json
157        .get(FIELD_CREATED_BY)
158        .and_then(Value::as_object)
159        .and_then(|created_by| created_by.get("gradle"))
160        .and_then(Value::as_object)
161    {
162        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
163            extra_data.insert(
164                "gradle_version".to_string(),
165                Value::String(truncate_field(gradle_version.to_string())),
166            );
167        }
168        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
169            extra_data.insert(
170                "build_id".to_string(),
171                Value::String(truncate_field(build_id.to_string())),
172            );
173        }
174    }
175
176    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
177        && !attributes.is_empty()
178    {
179        extra_data.insert(
180            "component_attributes".to_string(),
181            Value::Object(attributes.clone()),
182        );
183    }
184
185    if !variant_metadata.is_empty() {
186        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
187    }
188
189    let (size, sha1, md5, sha256, sha512) = top_level_artifact
190        .as_ref()
191        .map(extract_file_hashes)
192        .unwrap_or((None, None, None, None, None));
193
194    PackageData {
195        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
196        namespace,
197        name,
198        version,
199        qualifiers: None,
200        subpath: None,
201        primary_language: Some("Java".to_string()),
202        description: None,
203        release_date: None,
204        parties: Vec::new(),
205        keywords: Vec::new(),
206        homepage_url: None,
207        download_url: None,
208        size,
209        sha1,
210        md5,
211        sha256,
212        sha512,
213        bug_tracking_url: None,
214        code_view_url: None,
215        vcs_url: None,
216        copyright: None,
217        holder: None,
218        declared_license_expression: None,
219        declared_license_expression_spdx: None,
220        license_detections: Vec::new(),
221        other_license_expression: None,
222        other_license_expression_spdx: None,
223        other_license_detections: Vec::new(),
224        extracted_license_statement: None,
225        notice_text: None,
226        source_packages: Vec::new(),
227        file_references,
228        is_private: false,
229        is_virtual: false,
230        extra_data: (!extra_data.is_empty()).then_some(extra_data),
231        dependencies,
232        repository_homepage_url: None,
233        repository_download_url: None,
234        api_data_url: None,
235        datasource_id: Some(DatasourceId::GradleModule),
236        purl,
237    }
238}
239
240fn extract_variant_data(
241    variants: Option<&Vec<Value>>,
242    module_name: Option<&str>,
243    version: Option<&str>,
244) -> ExtractedVariantData {
245    let mut dependencies = Vec::new();
246    let mut file_references = Vec::new();
247    let mut variant_metadata = Vec::new();
248    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
249        HashMap::new();
250    let mut seen_files: HashSet<String> = HashSet::new();
251    let mut top_level_artifact: Option<(i32, JsonMap<String, Value>)> = None;
252
253    for variant in variants
254        .into_iter()
255        .flatten()
256        .filter_map(Value::as_object)
257        .take(MAX_ITERATION_COUNT)
258    {
259        let category = variant
260            .get(FIELD_ATTRIBUTES)
261            .and_then(Value::as_object)
262            .and_then(|attrs| attrs.get("org.gradle.category"))
263            .and_then(Value::as_str)
264            .unwrap_or_default();
265        let is_documentation = category == "documentation";
266
267        let variant_name = truncate_field(
268            variant
269                .get("name")
270                .and_then(Value::as_str)
271                .unwrap_or_default()
272                .to_string(),
273        );
274        let scope = classify_variant_scope(variant);
275        let precedence = scope_precedence(scope.as_deref());
276        let is_runtime = match scope.as_deref() {
277            Some("compile") | Some("runtime") => Some(true),
278            Some("test") => Some(false),
279            _ => None,
280        };
281        let is_optional = None;
282
283        let mut variant_entry = JsonMap::new();
284        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
285        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
286            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
287        }
288        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
289            variant_entry.insert("available_at".to_string(), available_at.clone());
290        }
291        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
292            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
293        }
294        variant_metadata.push(Value::Object(variant_entry));
295
296        if !is_documentation && let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array)
297        {
298            for file in files
299                .iter()
300                .filter_map(Value::as_object)
301                .take(MAX_ITERATION_COUNT)
302            {
303                let artifact_score =
304                    score_top_level_artifact(file, module_name, version, scope.as_deref());
305                if top_level_artifact
306                    .as_ref()
307                    .is_none_or(|(best_score, _)| artifact_score > *best_score)
308                {
309                    top_level_artifact = Some((artifact_score, file.clone()));
310                }
311
312                let file_path = truncate_field(
313                    file.get("url")
314                        .and_then(Value::as_str)
315                        .or_else(|| file.get("name").and_then(Value::as_str))
316                        .unwrap_or_default()
317                        .to_string(),
318                );
319                if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
320                    continue;
321                }
322                let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
323                let mut extra_data = HashMap::new();
324                if let Some(name) = file.get("name").and_then(Value::as_str) {
325                    extra_data.insert(
326                        "name".to_string(),
327                        Value::String(truncate_field(name.to_string())),
328                    );
329                }
330                file_references.push(FileReference {
331                    path: file_path,
332                    size,
333                    sha1,
334                    md5,
335                    sha256,
336                    sha512,
337                    extra_data: (!extra_data.is_empty()).then_some(extra_data),
338                });
339            }
340        }
341
342        if is_documentation {
343            continue;
344        }
345
346        for dependency in variant
347            .get(FIELD_DEPENDENCIES)
348            .and_then(Value::as_array)
349            .into_iter()
350            .flatten()
351            .filter_map(Value::as_object)
352            .take(MAX_ITERATION_COUNT)
353        {
354            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
355                continue;
356            };
357            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
358                continue;
359            };
360
361            let requirement = extract_dependency_requirement(dependency.get("version"));
362            let key = (group.to_string(), module.to_string(), requirement.clone());
363            let purl = build_maven_purl(group, module, requirement.as_deref());
364            let dep_extra_data =
365                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
366
367            let entry = seen_dependencies.entry(key).or_default();
368            if precedence < entry.precedence || entry.scope.is_none() {
369                entry.scope = scope.clone();
370                entry.is_runtime = is_runtime;
371                entry.is_optional = is_optional;
372                entry.precedence = precedence;
373            }
374            entry.purl = purl.map(truncate_field);
375            entry.extracted_requirement = requirement.clone();
376            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
377            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
378        }
379    }
380
381    for dep in seen_dependencies.into_values() {
382        dependencies.push(Dependency {
383            purl: dep.purl,
384            extracted_requirement: dep.extracted_requirement,
385            scope: dep.scope,
386            is_runtime: dep.is_runtime,
387            is_optional: dep.is_optional,
388            is_pinned: dep.is_pinned,
389            is_direct: Some(true),
390            resolved_package: None,
391            extra_data: dep.extra_data,
392        });
393    }
394
395    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
396    file_references.sort_by(|left, right| left.path.cmp(&right.path));
397
398    (
399        dependencies,
400        file_references,
401        top_level_artifact.map(|(_, artifact)| artifact),
402        variant_metadata,
403    )
404}
405
406fn score_top_level_artifact(
407    file: &JsonMap<String, Value>,
408    module_name: Option<&str>,
409    version: Option<&str>,
410    scope: Option<&str>,
411) -> i32 {
412    let file_name = file
413        .get("name")
414        .and_then(Value::as_str)
415        .or_else(|| file.get("url").and_then(Value::as_str))
416        .unwrap_or_default()
417        .rsplit('/')
418        .next()
419        .unwrap_or_default()
420        .to_ascii_lowercase();
421
422    let mut score = 0;
423
424    if matches!(scope, Some("runtime")) {
425        score += 10;
426    }
427
428    if file_name.ends_with(".jar")
429        || file_name.ends_with(".aar")
430        || file_name.ends_with(".war")
431        || file_name.ends_with(".zip")
432    {
433        score += 30;
434    }
435
436    if file_name.ends_with(".pom") || file_name.ends_with(".module") {
437        score -= 20;
438    }
439
440    if file_name.contains("-sources")
441        || file_name.contains("-source")
442        || file_name.contains("-javadoc")
443        || file_name.contains("-docs")
444        || file_name.contains("-doc")
445        || file_name.contains("-kdoc")
446    {
447        score -= 100;
448    }
449
450    if let (Some(module_name), Some(version)) = (module_name, version) {
451        let module_name = module_name.to_ascii_lowercase();
452        let version = version.to_ascii_lowercase();
453        if file_name.starts_with(&format!("{}-{}.", module_name, version)) {
454            score += 50;
455        }
456    }
457
458    score
459}
460
461fn build_dependency_extra_data(
462    dependency: &JsonMap<String, Value>,
463    variant_name: &str,
464    scope: Option<&str>,
465) -> Option<HashMap<String, Value>> {
466    let mut extra = HashMap::new();
467    extra.insert(
468        "variant_names".to_string(),
469        Value::Array(vec![Value::String(variant_name.to_string())]),
470    );
471    if let Some(scope) = scope {
472        extra.insert(
473            "variant_scopes".to_string(),
474            Value::Array(vec![Value::String(scope.to_string())]),
475        );
476    }
477
478    let mut variant_entry = JsonMap::new();
479    variant_entry.insert(
480        "variant_name".to_string(),
481        Value::String(variant_name.to_string()),
482    );
483    if let Some(scope) = scope {
484        variant_entry.insert(
485            "variant_scope".to_string(),
486            Value::String(scope.to_string()),
487        );
488    }
489    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
490    extra.insert(
491        "variant_dependency_entries".to_string(),
492        Value::Array(vec![Value::Object(variant_entry)]),
493    );
494
495    for field in [
496        FIELD_ATTRIBUTES,
497        "reason",
498        "requestedCapabilities",
499        "excludes",
500        "endorseStrictVersions",
501        "thirdPartyCompatibility",
502        "version",
503    ] {
504        if let Some(value) = dependency.get(field) {
505            extra.insert(field.to_string(), value.clone());
506        }
507    }
508    (!extra.is_empty()).then_some(extra)
509}
510
511fn merge_dependency_extra_data(
512    current: Option<HashMap<String, Value>>,
513    next: Option<HashMap<String, Value>>,
514) -> Option<HashMap<String, Value>> {
515    match (current, next) {
516        (None, None) => None,
517        (Some(map), None) | (None, Some(map)) => Some(map),
518        (Some(mut current), Some(mut next)) => {
519            merge_string_arrays(&mut current, &mut next, "variant_names");
520            merge_string_arrays(&mut current, &mut next, "variant_scopes");
521            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
522            for (key, value) in next {
523                current.entry(key).or_insert(value);
524            }
525            Some(current)
526        }
527    }
528}
529
530fn merge_object_arrays(
531    current: &mut HashMap<String, Value>,
532    next: &mut HashMap<String, Value>,
533    key: &str,
534) {
535    let existing = current
536        .remove(key)
537        .and_then(|value| value.as_array().cloned());
538    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
539
540    let mut values = Vec::new();
541    for array in [existing, incoming].into_iter().flatten() {
542        for value in array {
543            if !values.contains(&value) {
544                values.push(value);
545            }
546        }
547    }
548
549    if !values.is_empty() {
550        current.insert(key.to_string(), Value::Array(values));
551    }
552}
553
554fn merge_string_arrays(
555    current: &mut HashMap<String, Value>,
556    next: &mut HashMap<String, Value>,
557    key: &str,
558) {
559    let existing = current
560        .remove(key)
561        .and_then(|value| value.as_array().cloned());
562    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
563
564    let mut values = Vec::new();
565    for array in [existing, incoming].into_iter().flatten() {
566        for value in array
567            .into_iter()
568            .filter_map(|value| value.as_str().map(|s| s.to_string()))
569        {
570            if !values.contains(&value) {
571                values.push(value);
572            }
573        }
574    }
575
576    if !values.is_empty() {
577        current.insert(
578            key.to_string(),
579            Value::Array(values.into_iter().map(Value::String).collect()),
580        );
581    }
582}
583
584fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
585    let attributes = variant.get(FIELD_ATTRIBUTES).and_then(Value::as_object);
586
587    let category = attributes
588        .and_then(|attributes| attributes.get("org.gradle.category"))
589        .and_then(Value::as_str)
590        .unwrap_or_default()
591        .to_ascii_lowercase();
592
593    if category == "verification" {
594        return Some("test".to_string());
595    }
596
597    let usage = attributes
598        .and_then(|attributes| attributes.get("org.gradle.usage"))
599        .and_then(Value::as_str)
600        .unwrap_or_default()
601        .to_ascii_lowercase();
602
603    if usage.contains("api") {
604        return Some("compile".to_string());
605    }
606    if usage.contains("runtime") {
607        return Some("runtime".to_string());
608    }
609
610    None
611}
612
613fn scope_precedence(scope: Option<&str>) -> u8 {
614    match scope {
615        Some("compile") => 0,
616        Some("runtime") => 1,
617        Some("test") => 2,
618        _ => 3,
619    }
620}
621
622fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
623    match version_value {
624        Some(Value::String(version)) => Some(truncate_field(version.to_string())),
625        Some(Value::Object(version)) => version
626            .get("strictly")
627            .or_else(|| version.get("requires"))
628            .or_else(|| version.get("prefers"))
629            .and_then(Value::as_str)
630            .map(|value| truncate_field(value.to_string())),
631        _ => None,
632    }
633}
634
635fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
636    let sha256 = file
637        .get("sha256")
638        .and_then(Value::as_str)
639        .and_then(|value| Sha256Digest::from_hex(value).ok());
640
641    let sha512_field = file.get("sha512").and_then(Value::as_str);
642    let (sha256, sha512) = match sha512_field {
643        Some(hex) if hex.len() == 64 && hex::decode(hex).is_ok() => {
644            let misassigned = Sha256Digest::from_hex(hex).ok();
645            (sha256.or(misassigned), None)
646        }
647        Some(hex) => (sha256, Sha512Digest::from_hex(hex).ok()),
648        None => (sha256, None),
649    };
650
651    (
652        file.get("size").and_then(Value::as_u64),
653        file.get("sha1")
654            .and_then(Value::as_str)
655            .and_then(|value| Sha1Digest::from_hex(value).ok()),
656        file.get("md5")
657            .and_then(Value::as_str)
658            .and_then(|value| Md5Digest::from_hex(value).ok()),
659        sha256,
660        sha512,
661    )
662}
663
664fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
665    let mut purl = PackageUrl::new("maven", name).ok()?;
666    if !namespace.trim().is_empty() {
667        purl.with_namespace(namespace).ok()?;
668    }
669    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
670        purl.with_version(version).ok()?;
671    }
672    Some(purl.to_string())
673}
674
675fn is_exact_version(version: &str) -> bool {
676    !version.contains('[')
677        && !version.contains(']')
678        && !version.contains('(')
679        && !version.contains(')')
680        && !version.contains(',')
681        && !version.contains('+')
682        && !version.contains('*')
683        && !version.contains('>')
684        && !version.contains('<')
685        && !version.contains(' ')
686}
687
688fn default_package_data() -> PackageData {
689    PackageData {
690        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
691        datasource_id: Some(DatasourceId::GradleModule),
692        ..Default::default()
693    }
694}
695
696crate::register_parser!(
697    "Gradle module metadata",
698    &["**/*.module"],
699    "maven",
700    "Java",
701    Some("https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html"),
702);