Skip to main content

provenant/parsers/
gradle_module.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9use packageurl::PackageUrl;
10use serde_json::{Map as JsonMap, Value};
11
12use crate::models::{
13    DatasourceId, Dependency, FileReference, Md5Digest, PackageData, PackageType, Sha1Digest,
14    Sha256Digest, Sha512Digest,
15};
16
17use super::PackageParser;
18use super::metadata::ParserMetadata;
19
20const FIELD_FORMAT_VERSION: &str = "formatVersion";
21const FIELD_COMPONENT: &str = "component";
22const FIELD_CREATED_BY: &str = "createdBy";
23const FIELD_VARIANTS: &str = "variants";
24const FIELD_ATTRIBUTES: &str = "attributes";
25const FIELD_DEPENDENCIES: &str = "dependencies";
26const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
27const FIELD_FILES: &str = "files";
28const FIELD_AVAILABLE_AT: &str = "available-at";
29
30type ArtifactHashes = (
31    Option<u64>,
32    Option<Sha1Digest>,
33    Option<Md5Digest>,
34    Option<Sha256Digest>,
35    Option<Sha512Digest>,
36);
37
38type ExtractedVariantData = (
39    Vec<Dependency>,
40    Vec<FileReference>,
41    Option<JsonMap<String, Value>>,
42    Vec<Value>,
43);
44
45pub struct GradleModuleParser;
46
47#[derive(Clone, Debug, Default)]
48struct ExtractedDependency {
49    purl: Option<String>,
50    extracted_requirement: Option<String>,
51    scope: Option<String>,
52    is_runtime: Option<bool>,
53    is_optional: Option<bool>,
54    is_pinned: Option<bool>,
55    extra_data: Option<HashMap<String, Value>>,
56    precedence: u8,
57}
58
59impl PackageParser for GradleModuleParser {
60    const PACKAGE_TYPE: PackageType = PackageType::Maven;
61
62    fn metadata() -> Vec<ParserMetadata> {
63        vec![ParserMetadata {
64            description: "Gradle module metadata",
65            file_patterns: &["**/*.module"],
66            package_type: "maven",
67            primary_language: "Java",
68            documentation_url: Some(
69                "https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html",
70            ),
71        }]
72    }
73
74    fn is_match(path: &Path) -> bool {
75        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
76            return false;
77        }
78
79        let Ok(content) = read_file_to_string(path, None) else {
80            return false;
81        };
82
83        let Ok(value) = serde_json::from_str(&content) else {
84            return false;
85        };
86
87        is_gradle_module_json(&value)
88    }
89
90    fn extract_packages(path: &Path) -> Vec<PackageData> {
91        let content = match read_file_to_string(path, None) {
92            Ok(content) => content,
93            Err(e) => {
94                warn!("Failed to read Gradle module file at {:?}: {}", path, e);
95                return vec![default_package_data()];
96            }
97        };
98
99        let json: Value = match serde_json::from_str(&content) {
100            Ok(json) => json,
101            Err(e) => {
102                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
103                return vec![default_package_data()];
104            }
105        };
106
107        if !is_gradle_module_json(&json) {
108            warn!("File at {:?} is not valid Gradle module metadata", path);
109            return vec![default_package_data()];
110        }
111
112        vec![parse_gradle_module(&json)]
113    }
114}
115
116fn is_gradle_module_json(json: &Value) -> bool {
117    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
118        return false;
119    };
120
121    json.get(FIELD_FORMAT_VERSION)
122        .and_then(Value::as_str)
123        .is_some()
124        && component.get("group").and_then(Value::as_str).is_some()
125        && component.get("module").and_then(Value::as_str).is_some()
126        && component.get("version").and_then(Value::as_str).is_some()
127}
128
129fn parse_gradle_module(json: &Value) -> PackageData {
130    let component = json
131        .get(FIELD_COMPONENT)
132        .and_then(Value::as_object)
133        .cloned()
134        .unwrap_or_default();
135
136    let namespace = component
137        .get("group")
138        .and_then(Value::as_str)
139        .map(|value| truncate_field(value.to_string()));
140    let name = component
141        .get("module")
142        .and_then(Value::as_str)
143        .map(|value| truncate_field(value.to_string()));
144    let version = component
145        .get("version")
146        .and_then(Value::as_str)
147        .map(|value| truncate_field(value.to_string()));
148
149    let (dependencies, file_references, top_level_artifact, variant_metadata) =
150        extract_variant_data(
151            json.get(FIELD_VARIANTS).and_then(Value::as_array),
152            name.as_deref(),
153            version.as_deref(),
154        );
155
156    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
157        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
158        _ => None,
159    };
160
161    let mut extra_data = HashMap::new();
162    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
163        extra_data.insert(
164            "format_version".to_string(),
165            Value::String(truncate_field(format_version.to_string())),
166        );
167    }
168
169    if let Some(gradle_object) = json
170        .get(FIELD_CREATED_BY)
171        .and_then(Value::as_object)
172        .and_then(|created_by| created_by.get("gradle"))
173        .and_then(Value::as_object)
174    {
175        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
176            extra_data.insert(
177                "gradle_version".to_string(),
178                Value::String(truncate_field(gradle_version.to_string())),
179            );
180        }
181        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
182            extra_data.insert(
183                "build_id".to_string(),
184                Value::String(truncate_field(build_id.to_string())),
185            );
186        }
187    }
188
189    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
190        && !attributes.is_empty()
191    {
192        extra_data.insert(
193            "component_attributes".to_string(),
194            Value::Object(attributes.clone()),
195        );
196    }
197
198    if !variant_metadata.is_empty() {
199        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
200    }
201
202    let (size, sha1, md5, sha256, sha512) = top_level_artifact
203        .as_ref()
204        .map(extract_file_hashes)
205        .unwrap_or((None, None, None, None, None));
206
207    PackageData {
208        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
209        namespace,
210        name,
211        version,
212        qualifiers: None,
213        subpath: None,
214        primary_language: Some("Java".to_string()),
215        description: None,
216        release_date: None,
217        parties: Vec::new(),
218        keywords: Vec::new(),
219        homepage_url: None,
220        download_url: None,
221        size,
222        sha1,
223        md5,
224        sha256,
225        sha512,
226        bug_tracking_url: None,
227        code_view_url: None,
228        vcs_url: None,
229        copyright: None,
230        holder: None,
231        declared_license_expression: None,
232        declared_license_expression_spdx: None,
233        license_detections: Vec::new(),
234        other_license_expression: None,
235        other_license_expression_spdx: None,
236        other_license_detections: Vec::new(),
237        extracted_license_statement: None,
238        notice_text: None,
239        source_packages: Vec::new(),
240        file_references,
241        is_private: false,
242        is_virtual: false,
243        extra_data: (!extra_data.is_empty()).then_some(extra_data),
244        dependencies,
245        repository_homepage_url: None,
246        repository_download_url: None,
247        api_data_url: None,
248        datasource_id: Some(DatasourceId::GradleModule),
249        purl,
250    }
251}
252
253fn extract_variant_data(
254    variants: Option<&Vec<Value>>,
255    module_name: Option<&str>,
256    version: Option<&str>,
257) -> ExtractedVariantData {
258    let mut dependencies = Vec::new();
259    let mut file_references = Vec::new();
260    let mut variant_metadata = Vec::new();
261    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
262        HashMap::new();
263    let mut seen_files: HashSet<String> = HashSet::new();
264    let mut top_level_artifact: Option<(i32, JsonMap<String, Value>)> = None;
265
266    for variant in variants
267        .into_iter()
268        .flatten()
269        .filter_map(Value::as_object)
270        .take(MAX_ITERATION_COUNT)
271    {
272        let category = variant
273            .get(FIELD_ATTRIBUTES)
274            .and_then(Value::as_object)
275            .and_then(|attrs| attrs.get("org.gradle.category"))
276            .and_then(Value::as_str)
277            .unwrap_or_default();
278        let is_documentation = category == "documentation";
279
280        let variant_name = truncate_field(
281            variant
282                .get("name")
283                .and_then(Value::as_str)
284                .unwrap_or_default()
285                .to_string(),
286        );
287        let scope = classify_variant_scope(variant);
288        let precedence = scope_precedence(scope.as_deref());
289        let is_runtime = match scope.as_deref() {
290            Some("compile") | Some("runtime") => Some(true),
291            Some("test") => Some(false),
292            _ => None,
293        };
294        let is_optional = None;
295
296        let mut variant_entry = JsonMap::new();
297        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
298        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
299            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
300        }
301        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
302            variant_entry.insert("available_at".to_string(), available_at.clone());
303        }
304        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
305            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
306        }
307        variant_metadata.push(Value::Object(variant_entry));
308
309        if !is_documentation && let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array)
310        {
311            for file in files
312                .iter()
313                .filter_map(Value::as_object)
314                .take(MAX_ITERATION_COUNT)
315            {
316                let artifact_score =
317                    score_top_level_artifact(file, module_name, version, scope.as_deref());
318                if top_level_artifact
319                    .as_ref()
320                    .is_none_or(|(best_score, _)| artifact_score > *best_score)
321                {
322                    top_level_artifact = Some((artifact_score, file.clone()));
323                }
324
325                let file_path = truncate_field(
326                    file.get("url")
327                        .and_then(Value::as_str)
328                        .or_else(|| file.get("name").and_then(Value::as_str))
329                        .unwrap_or_default()
330                        .to_string(),
331                );
332                if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
333                    continue;
334                }
335                let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
336                let mut extra_data = HashMap::new();
337                if let Some(name) = file.get("name").and_then(Value::as_str) {
338                    extra_data.insert(
339                        "name".to_string(),
340                        Value::String(truncate_field(name.to_string())),
341                    );
342                }
343                file_references.push(FileReference {
344                    path: file_path,
345                    size,
346                    sha1,
347                    md5,
348                    sha256,
349                    sha512,
350                    extra_data: (!extra_data.is_empty()).then_some(extra_data),
351                });
352            }
353        }
354
355        if is_documentation {
356            continue;
357        }
358
359        for dependency in variant
360            .get(FIELD_DEPENDENCIES)
361            .and_then(Value::as_array)
362            .into_iter()
363            .flatten()
364            .filter_map(Value::as_object)
365            .take(MAX_ITERATION_COUNT)
366        {
367            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
368                continue;
369            };
370            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
371                continue;
372            };
373
374            let requirement = extract_dependency_requirement(dependency.get("version"));
375            let key = (group.to_string(), module.to_string(), requirement.clone());
376            let purl = build_maven_purl(group, module, requirement.as_deref());
377            let dep_extra_data =
378                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
379
380            let entry = seen_dependencies.entry(key).or_default();
381            if precedence < entry.precedence || entry.scope.is_none() {
382                entry.scope = scope.clone();
383                entry.is_runtime = is_runtime;
384                entry.is_optional = is_optional;
385                entry.precedence = precedence;
386            }
387            entry.purl = purl.map(truncate_field);
388            entry.extracted_requirement = requirement.clone();
389            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
390            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
391        }
392    }
393
394    for dep in seen_dependencies.into_values() {
395        dependencies.push(Dependency {
396            purl: dep.purl,
397            extracted_requirement: dep.extracted_requirement,
398            scope: dep.scope,
399            is_runtime: dep.is_runtime,
400            is_optional: dep.is_optional,
401            is_pinned: dep.is_pinned,
402            is_direct: Some(true),
403            resolved_package: None,
404            extra_data: dep.extra_data,
405        });
406    }
407
408    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
409    file_references.sort_by(|left, right| left.path.cmp(&right.path));
410
411    (
412        dependencies,
413        file_references,
414        top_level_artifact.map(|(_, artifact)| artifact),
415        variant_metadata,
416    )
417}
418
419fn score_top_level_artifact(
420    file: &JsonMap<String, Value>,
421    module_name: Option<&str>,
422    version: Option<&str>,
423    scope: Option<&str>,
424) -> i32 {
425    let file_name = file
426        .get("name")
427        .and_then(Value::as_str)
428        .or_else(|| file.get("url").and_then(Value::as_str))
429        .unwrap_or_default()
430        .rsplit('/')
431        .next()
432        .unwrap_or_default()
433        .to_ascii_lowercase();
434
435    let mut score = 0;
436
437    if matches!(scope, Some("runtime")) {
438        score += 10;
439    }
440
441    if file_name.ends_with(".jar")
442        || file_name.ends_with(".aar")
443        || file_name.ends_with(".war")
444        || file_name.ends_with(".zip")
445    {
446        score += 30;
447    }
448
449    if file_name.ends_with(".pom") || file_name.ends_with(".module") {
450        score -= 20;
451    }
452
453    if file_name.contains("-sources")
454        || file_name.contains("-source")
455        || file_name.contains("-javadoc")
456        || file_name.contains("-docs")
457        || file_name.contains("-doc")
458        || file_name.contains("-kdoc")
459    {
460        score -= 100;
461    }
462
463    if let (Some(module_name), Some(version)) = (module_name, version) {
464        let module_name = module_name.to_ascii_lowercase();
465        let version = version.to_ascii_lowercase();
466        if file_name.starts_with(&format!("{}-{}.", module_name, version)) {
467            score += 50;
468        }
469    }
470
471    score
472}
473
474fn build_dependency_extra_data(
475    dependency: &JsonMap<String, Value>,
476    variant_name: &str,
477    scope: Option<&str>,
478) -> Option<HashMap<String, Value>> {
479    let mut extra = HashMap::new();
480    extra.insert(
481        "variant_names".to_string(),
482        Value::Array(vec![Value::String(variant_name.to_string())]),
483    );
484    if let Some(scope) = scope {
485        extra.insert(
486            "variant_scopes".to_string(),
487            Value::Array(vec![Value::String(scope.to_string())]),
488        );
489    }
490
491    let mut variant_entry = JsonMap::new();
492    variant_entry.insert(
493        "variant_name".to_string(),
494        Value::String(variant_name.to_string()),
495    );
496    if let Some(scope) = scope {
497        variant_entry.insert(
498            "variant_scope".to_string(),
499            Value::String(scope.to_string()),
500        );
501    }
502    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
503    extra.insert(
504        "variant_dependency_entries".to_string(),
505        Value::Array(vec![Value::Object(variant_entry)]),
506    );
507
508    for field in [
509        FIELD_ATTRIBUTES,
510        "reason",
511        "requestedCapabilities",
512        "excludes",
513        "endorseStrictVersions",
514        "thirdPartyCompatibility",
515        "version",
516    ] {
517        if let Some(value) = dependency.get(field) {
518            extra.insert(field.to_string(), value.clone());
519        }
520    }
521    (!extra.is_empty()).then_some(extra)
522}
523
524fn merge_dependency_extra_data(
525    current: Option<HashMap<String, Value>>,
526    next: Option<HashMap<String, Value>>,
527) -> Option<HashMap<String, Value>> {
528    match (current, next) {
529        (None, None) => None,
530        (Some(map), None) | (None, Some(map)) => Some(map),
531        (Some(mut current), Some(mut next)) => {
532            merge_string_arrays(&mut current, &mut next, "variant_names");
533            merge_string_arrays(&mut current, &mut next, "variant_scopes");
534            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
535            for (key, value) in next {
536                current.entry(key).or_insert(value);
537            }
538            Some(current)
539        }
540    }
541}
542
543fn merge_object_arrays(
544    current: &mut HashMap<String, Value>,
545    next: &mut HashMap<String, Value>,
546    key: &str,
547) {
548    let existing = current
549        .remove(key)
550        .and_then(|value| value.as_array().cloned());
551    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
552
553    let mut values = Vec::new();
554    for array in [existing, incoming].into_iter().flatten() {
555        for value in array {
556            if !values.contains(&value) {
557                values.push(value);
558            }
559        }
560    }
561
562    if !values.is_empty() {
563        current.insert(key.to_string(), Value::Array(values));
564    }
565}
566
567fn merge_string_arrays(
568    current: &mut HashMap<String, Value>,
569    next: &mut HashMap<String, Value>,
570    key: &str,
571) {
572    let existing = current
573        .remove(key)
574        .and_then(|value| value.as_array().cloned());
575    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
576
577    let mut values = Vec::new();
578    for array in [existing, incoming].into_iter().flatten() {
579        for value in array
580            .into_iter()
581            .filter_map(|value| value.as_str().map(|s| s.to_string()))
582        {
583            if !values.contains(&value) {
584                values.push(value);
585            }
586        }
587    }
588
589    if !values.is_empty() {
590        current.insert(
591            key.to_string(),
592            Value::Array(values.into_iter().map(Value::String).collect()),
593        );
594    }
595}
596
597fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
598    let attributes = variant.get(FIELD_ATTRIBUTES).and_then(Value::as_object);
599
600    let category = attributes
601        .and_then(|attributes| attributes.get("org.gradle.category"))
602        .and_then(Value::as_str)
603        .unwrap_or_default()
604        .to_ascii_lowercase();
605
606    if category == "verification" {
607        return Some("test".to_string());
608    }
609
610    let usage = attributes
611        .and_then(|attributes| attributes.get("org.gradle.usage"))
612        .and_then(Value::as_str)
613        .unwrap_or_default()
614        .to_ascii_lowercase();
615
616    if usage.contains("api") {
617        return Some("compile".to_string());
618    }
619    if usage.contains("runtime") {
620        return Some("runtime".to_string());
621    }
622
623    None
624}
625
626fn scope_precedence(scope: Option<&str>) -> u8 {
627    match scope {
628        Some("compile") => 0,
629        Some("runtime") => 1,
630        Some("test") => 2,
631        _ => 3,
632    }
633}
634
635fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
636    match version_value {
637        Some(Value::String(version)) => Some(truncate_field(version.to_string())),
638        Some(Value::Object(version)) => version
639            .get("strictly")
640            .or_else(|| version.get("requires"))
641            .or_else(|| version.get("prefers"))
642            .and_then(Value::as_str)
643            .map(|value| truncate_field(value.to_string())),
644        _ => None,
645    }
646}
647
648fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
649    let sha256 = file
650        .get("sha256")
651        .and_then(Value::as_str)
652        .and_then(|value| Sha256Digest::from_hex(value).ok());
653
654    let sha512_field = file.get("sha512").and_then(Value::as_str);
655    let (sha256, sha512) = match sha512_field {
656        Some(hex) if hex.len() == 64 && hex::decode(hex).is_ok() => {
657            let misassigned = Sha256Digest::from_hex(hex).ok();
658            (sha256.or(misassigned), None)
659        }
660        Some(hex) => (sha256, Sha512Digest::from_hex(hex).ok()),
661        None => (sha256, None),
662    };
663
664    (
665        file.get("size").and_then(Value::as_u64),
666        file.get("sha1")
667            .and_then(Value::as_str)
668            .and_then(|value| Sha1Digest::from_hex(value).ok()),
669        file.get("md5")
670            .and_then(Value::as_str)
671            .and_then(|value| Md5Digest::from_hex(value).ok()),
672        sha256,
673        sha512,
674    )
675}
676
677fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
678    let mut purl = PackageUrl::new("maven", name).ok()?;
679    if !namespace.trim().is_empty() {
680        purl.with_namespace(namespace).ok()?;
681    }
682    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
683        purl.with_version(version).ok()?;
684    }
685    Some(purl.to_string())
686}
687
688fn is_exact_version(version: &str) -> bool {
689    !version.contains('[')
690        && !version.contains(']')
691        && !version.contains('(')
692        && !version.contains(')')
693        && !version.contains(',')
694        && !version.contains('+')
695        && !version.contains('*')
696        && !version.contains('>')
697        && !version.contains('<')
698        && !version.contains(' ')
699}
700
701fn default_package_data() -> PackageData {
702    PackageData {
703        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
704        datasource_id: Some(DatasourceId::GradleModule),
705        ..Default::default()
706    }
707}