Skip to main content

provenant/parsers/
gradle_module.rs

1use std::collections::{HashMap, HashSet};
2use std::fs::File;
3use std::io::BufReader;
4use std::path::Path;
5
6use crate::parser_warn as warn;
7use packageurl::PackageUrl;
8use serde_json::{Map as JsonMap, Value};
9
10use crate::models::{
11    DatasourceId, Dependency, FileReference, Md5Digest, PackageData, PackageType, Sha1Digest,
12    Sha256Digest, Sha512Digest,
13};
14
15use super::PackageParser;
16
17const FIELD_FORMAT_VERSION: &str = "formatVersion";
18const FIELD_COMPONENT: &str = "component";
19const FIELD_CREATED_BY: &str = "createdBy";
20const FIELD_VARIANTS: &str = "variants";
21const FIELD_ATTRIBUTES: &str = "attributes";
22const FIELD_DEPENDENCIES: &str = "dependencies";
23const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
24const FIELD_FILES: &str = "files";
25const FIELD_AVAILABLE_AT: &str = "available-at";
26
27type ArtifactHashes = (
28    Option<u64>,
29    Option<Sha1Digest>,
30    Option<Md5Digest>,
31    Option<Sha256Digest>,
32    Option<Sha512Digest>,
33);
34
35type ExtractedVariantData = (
36    Vec<Dependency>,
37    Vec<FileReference>,
38    Option<JsonMap<String, Value>>,
39    Vec<Value>,
40);
41
42pub struct GradleModuleParser;
43
44#[derive(Clone, Debug, Default)]
45struct ExtractedDependency {
46    purl: Option<String>,
47    extracted_requirement: Option<String>,
48    scope: Option<String>,
49    is_runtime: Option<bool>,
50    is_optional: Option<bool>,
51    is_pinned: Option<bool>,
52    extra_data: Option<HashMap<String, Value>>,
53    precedence: u8,
54}
55
56impl PackageParser for GradleModuleParser {
57    const PACKAGE_TYPE: PackageType = PackageType::Maven;
58
59    fn is_match(path: &Path) -> bool {
60        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
61            return false;
62        }
63
64        let Ok(file) = File::open(path) else {
65            return false;
66        };
67
68        let Ok(value) = serde_json::from_reader::<_, Value>(BufReader::new(file)) else {
69            return false;
70        };
71
72        is_gradle_module_json(&value)
73    }
74
75    fn extract_packages(path: &Path) -> Vec<PackageData> {
76        let file = match File::open(path) {
77            Ok(file) => file,
78            Err(e) => {
79                warn!("Failed to open Gradle module file at {:?}: {}", path, e);
80                return vec![default_package_data()];
81            }
82        };
83
84        let json: Value = match serde_json::from_reader(BufReader::new(file)) {
85            Ok(json) => json,
86            Err(e) => {
87                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
88                return vec![default_package_data()];
89            }
90        };
91
92        if !is_gradle_module_json(&json) {
93            warn!("File at {:?} is not valid Gradle module metadata", path);
94            return vec![default_package_data()];
95        }
96
97        vec![parse_gradle_module(&json)]
98    }
99}
100
101fn is_gradle_module_json(json: &Value) -> bool {
102    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
103        return false;
104    };
105
106    json.get(FIELD_FORMAT_VERSION)
107        .and_then(Value::as_str)
108        .is_some()
109        && component.get("group").and_then(Value::as_str).is_some()
110        && component.get("module").and_then(Value::as_str).is_some()
111        && component.get("version").and_then(Value::as_str).is_some()
112}
113
114fn parse_gradle_module(json: &Value) -> PackageData {
115    let component = json
116        .get(FIELD_COMPONENT)
117        .and_then(Value::as_object)
118        .cloned()
119        .unwrap_or_default();
120
121    let namespace = component
122        .get("group")
123        .and_then(Value::as_str)
124        .map(|value| value.to_string());
125    let name = component
126        .get("module")
127        .and_then(Value::as_str)
128        .map(|value| value.to_string());
129    let version = component
130        .get("version")
131        .and_then(Value::as_str)
132        .map(|value| value.to_string());
133
134    let (dependencies, file_references, top_level_artifact, variant_metadata) =
135        extract_variant_data(json.get(FIELD_VARIANTS).and_then(Value::as_array));
136
137    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
138        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
139        _ => None,
140    };
141
142    let mut extra_data = HashMap::new();
143    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
144        extra_data.insert(
145            "format_version".to_string(),
146            Value::String(format_version.to_string()),
147        );
148    }
149
150    if let Some(gradle_object) = json
151        .get(FIELD_CREATED_BY)
152        .and_then(Value::as_object)
153        .and_then(|created_by| created_by.get("gradle"))
154        .and_then(Value::as_object)
155    {
156        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
157            extra_data.insert(
158                "gradle_version".to_string(),
159                Value::String(gradle_version.to_string()),
160            );
161        }
162        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
163            extra_data.insert("build_id".to_string(), Value::String(build_id.to_string()));
164        }
165    }
166
167    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
168        && !attributes.is_empty()
169    {
170        extra_data.insert(
171            "component_attributes".to_string(),
172            Value::Object(attributes.clone()),
173        );
174    }
175
176    if !variant_metadata.is_empty() {
177        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
178    }
179
180    let (size, sha1, md5, sha256, sha512) = top_level_artifact
181        .as_ref()
182        .map(extract_file_hashes)
183        .unwrap_or((None, None, None, None, None));
184
185    PackageData {
186        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
187        namespace,
188        name,
189        version,
190        qualifiers: None,
191        subpath: None,
192        primary_language: Some("Java".to_string()),
193        description: None,
194        release_date: None,
195        parties: Vec::new(),
196        keywords: Vec::new(),
197        homepage_url: None,
198        download_url: None,
199        size,
200        sha1,
201        md5,
202        sha256,
203        sha512,
204        bug_tracking_url: None,
205        code_view_url: None,
206        vcs_url: None,
207        copyright: None,
208        holder: None,
209        declared_license_expression: None,
210        declared_license_expression_spdx: None,
211        license_detections: Vec::new(),
212        other_license_expression: None,
213        other_license_expression_spdx: None,
214        other_license_detections: Vec::new(),
215        extracted_license_statement: None,
216        notice_text: None,
217        source_packages: Vec::new(),
218        file_references,
219        is_private: false,
220        is_virtual: false,
221        extra_data: (!extra_data.is_empty()).then_some(extra_data),
222        dependencies,
223        repository_homepage_url: None,
224        repository_download_url: None,
225        api_data_url: None,
226        datasource_id: Some(DatasourceId::GradleModule),
227        purl,
228    }
229}
230
231fn extract_variant_data(variants: Option<&Vec<Value>>) -> ExtractedVariantData {
232    let mut dependencies = Vec::new();
233    let mut file_references = Vec::new();
234    let mut variant_metadata = Vec::new();
235    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
236        HashMap::new();
237    let mut seen_files: HashSet<String> = HashSet::new();
238    let mut top_level_artifact: Option<JsonMap<String, Value>> = None;
239
240    for variant in variants.into_iter().flatten().filter_map(Value::as_object) {
241        let category = variant
242            .get(FIELD_ATTRIBUTES)
243            .and_then(Value::as_object)
244            .and_then(|attrs| attrs.get("org.gradle.category"))
245            .and_then(Value::as_str)
246            .unwrap_or_default();
247        let is_documentation = category == "documentation";
248
249        let variant_name = variant
250            .get("name")
251            .and_then(Value::as_str)
252            .unwrap_or_default()
253            .to_string();
254        let scope = classify_variant_scope(variant);
255        let precedence = scope_precedence(scope.as_deref());
256        let is_runtime = Some(scope.as_deref() != Some("test"));
257        let is_optional = Some(scope.as_deref() == Some("test"));
258
259        let mut variant_entry = JsonMap::new();
260        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
261        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
262            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
263        }
264        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
265            variant_entry.insert("available_at".to_string(), available_at.clone());
266        }
267        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
268            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
269        }
270        variant_metadata.push(Value::Object(variant_entry));
271
272        if !is_documentation {
273            if top_level_artifact.is_none() {
274                top_level_artifact = variant
275                    .get(FIELD_FILES)
276                    .and_then(Value::as_array)
277                    .and_then(|files| files.first())
278                    .and_then(Value::as_object)
279                    .cloned();
280            }
281
282            if let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array) {
283                for file in files.iter().filter_map(Value::as_object) {
284                    let file_path = file
285                        .get("url")
286                        .and_then(Value::as_str)
287                        .or_else(|| file.get("name").and_then(Value::as_str))
288                        .unwrap_or_default()
289                        .to_string();
290                    if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
291                        continue;
292                    }
293                    let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
294                    let mut extra_data = HashMap::new();
295                    if let Some(name) = file.get("name").and_then(Value::as_str) {
296                        extra_data.insert("name".to_string(), Value::String(name.to_string()));
297                    }
298                    file_references.push(FileReference {
299                        path: file_path,
300                        size,
301                        sha1,
302                        md5,
303                        sha256,
304                        sha512,
305                        extra_data: (!extra_data.is_empty()).then_some(extra_data),
306                    });
307                }
308            }
309        }
310
311        if is_documentation {
312            continue;
313        }
314
315        for dependency in variant
316            .get(FIELD_DEPENDENCIES)
317            .and_then(Value::as_array)
318            .into_iter()
319            .flatten()
320            .filter_map(Value::as_object)
321        {
322            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
323                continue;
324            };
325            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
326                continue;
327            };
328
329            let requirement = extract_dependency_requirement(dependency.get("version"));
330            let key = (group.to_string(), module.to_string(), requirement.clone());
331            let purl = build_maven_purl(group, module, requirement.as_deref());
332            let dep_extra_data =
333                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
334
335            let entry = seen_dependencies.entry(key).or_default();
336            if precedence < entry.precedence || entry.scope.is_none() {
337                entry.scope = scope.clone();
338                entry.is_runtime = is_runtime;
339                entry.is_optional = is_optional;
340                entry.precedence = precedence;
341            }
342            entry.purl = purl;
343            entry.extracted_requirement = requirement.clone();
344            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
345            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
346        }
347    }
348
349    for dep in seen_dependencies.into_values() {
350        dependencies.push(Dependency {
351            purl: dep.purl,
352            extracted_requirement: dep.extracted_requirement,
353            scope: dep.scope,
354            is_runtime: dep.is_runtime,
355            is_optional: dep.is_optional,
356            is_pinned: dep.is_pinned,
357            is_direct: Some(true),
358            resolved_package: None,
359            extra_data: dep.extra_data,
360        });
361    }
362
363    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
364    file_references.sort_by(|left, right| left.path.cmp(&right.path));
365
366    (
367        dependencies,
368        file_references,
369        top_level_artifact,
370        variant_metadata,
371    )
372}
373
374fn build_dependency_extra_data(
375    dependency: &JsonMap<String, Value>,
376    variant_name: &str,
377    scope: Option<&str>,
378) -> Option<HashMap<String, Value>> {
379    let mut extra = HashMap::new();
380    extra.insert(
381        "variant_names".to_string(),
382        Value::Array(vec![Value::String(variant_name.to_string())]),
383    );
384    if let Some(scope) = scope {
385        extra.insert(
386            "variant_scopes".to_string(),
387            Value::Array(vec![Value::String(scope.to_string())]),
388        );
389    }
390
391    let mut variant_entry = JsonMap::new();
392    variant_entry.insert(
393        "variant_name".to_string(),
394        Value::String(variant_name.to_string()),
395    );
396    if let Some(scope) = scope {
397        variant_entry.insert(
398            "variant_scope".to_string(),
399            Value::String(scope.to_string()),
400        );
401    }
402    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
403    extra.insert(
404        "variant_dependency_entries".to_string(),
405        Value::Array(vec![Value::Object(variant_entry)]),
406    );
407
408    for field in [
409        FIELD_ATTRIBUTES,
410        "reason",
411        "requestedCapabilities",
412        "excludes",
413        "endorseStrictVersions",
414        "thirdPartyCompatibility",
415        "version",
416    ] {
417        if let Some(value) = dependency.get(field) {
418            extra.insert(field.to_string(), value.clone());
419        }
420    }
421    (!extra.is_empty()).then_some(extra)
422}
423
424fn merge_dependency_extra_data(
425    current: Option<HashMap<String, Value>>,
426    next: Option<HashMap<String, Value>>,
427) -> Option<HashMap<String, Value>> {
428    match (current, next) {
429        (None, None) => None,
430        (Some(map), None) | (None, Some(map)) => Some(map),
431        (Some(mut current), Some(mut next)) => {
432            merge_string_arrays(&mut current, &mut next, "variant_names");
433            merge_string_arrays(&mut current, &mut next, "variant_scopes");
434            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
435            for (key, value) in next {
436                current.entry(key).or_insert(value);
437            }
438            Some(current)
439        }
440    }
441}
442
443fn merge_object_arrays(
444    current: &mut HashMap<String, Value>,
445    next: &mut HashMap<String, Value>,
446    key: &str,
447) {
448    let existing = current
449        .remove(key)
450        .and_then(|value| value.as_array().cloned());
451    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
452
453    let mut values = Vec::new();
454    for array in [existing, incoming].into_iter().flatten() {
455        for value in array {
456            if !values.contains(&value) {
457                values.push(value);
458            }
459        }
460    }
461
462    if !values.is_empty() {
463        current.insert(key.to_string(), Value::Array(values));
464    }
465}
466
467fn merge_string_arrays(
468    current: &mut HashMap<String, Value>,
469    next: &mut HashMap<String, Value>,
470    key: &str,
471) {
472    let existing = current
473        .remove(key)
474        .and_then(|value| value.as_array().cloned());
475    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
476
477    let mut values = Vec::new();
478    for array in [existing, incoming].into_iter().flatten() {
479        for value in array
480            .into_iter()
481            .filter_map(|value| value.as_str().map(|s| s.to_string()))
482        {
483            if !values.contains(&value) {
484                values.push(value);
485            }
486        }
487    }
488
489    if !values.is_empty() {
490        current.insert(
491            key.to_string(),
492            Value::Array(values.into_iter().map(Value::String).collect()),
493        );
494    }
495}
496
497fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
498    let variant_name = variant
499        .get("name")
500        .and_then(Value::as_str)
501        .unwrap_or_default()
502        .to_ascii_lowercase();
503
504    if variant_name.contains("test") {
505        return Some("test".to_string());
506    }
507
508    let usage = variant
509        .get(FIELD_ATTRIBUTES)
510        .and_then(Value::as_object)
511        .and_then(|attributes| attributes.get("org.gradle.usage"))
512        .and_then(Value::as_str)
513        .unwrap_or_default()
514        .to_ascii_lowercase();
515
516    if usage.contains("api") || (variant_name.contains("api") && !variant_name.contains("runtime"))
517    {
518        return Some("compile".to_string());
519    }
520    if usage.contains("runtime") || variant_name.contains("runtime") {
521        return Some("runtime".to_string());
522    }
523
524    (!variant_name.is_empty()).then_some(variant_name)
525}
526
527fn scope_precedence(scope: Option<&str>) -> u8 {
528    match scope {
529        Some("compile") => 0,
530        Some("runtime") => 1,
531        Some("test") => 2,
532        _ => 3,
533    }
534}
535
536fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
537    match version_value {
538        Some(Value::String(version)) => Some(version.to_string()),
539        Some(Value::Object(version)) => version
540            .get("strictly")
541            .or_else(|| version.get("requires"))
542            .or_else(|| version.get("prefers"))
543            .and_then(Value::as_str)
544            .map(|value| value.to_string()),
545        _ => None,
546    }
547}
548
549fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
550    let sha256 = file
551        .get("sha256")
552        .and_then(Value::as_str)
553        .and_then(|value| Sha256Digest::from_hex(value).ok());
554
555    let sha512_field = file.get("sha512").and_then(Value::as_str);
556    let (sha256, sha512) = match sha512_field {
557        Some(hex) if hex.len() == 64 && hex::decode(hex).is_ok() => {
558            let misassigned = Sha256Digest::from_hex(hex).ok();
559            (sha256.or(misassigned), None)
560        }
561        Some(hex) => (sha256, Sha512Digest::from_hex(hex).ok()),
562        None => (sha256, None),
563    };
564
565    (
566        file.get("size").and_then(Value::as_u64),
567        file.get("sha1")
568            .and_then(Value::as_str)
569            .and_then(|value| Sha1Digest::from_hex(value).ok()),
570        file.get("md5")
571            .and_then(Value::as_str)
572            .and_then(|value| Md5Digest::from_hex(value).ok()),
573        sha256,
574        sha512,
575    )
576}
577
578fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
579    let mut purl = PackageUrl::new("maven", name).ok()?;
580    if !namespace.trim().is_empty() {
581        purl.with_namespace(namespace).ok()?;
582    }
583    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
584        purl.with_version(version).ok()?;
585    }
586    Some(purl.to_string())
587}
588
589fn is_exact_version(version: &str) -> bool {
590    !version.contains('[')
591        && !version.contains(']')
592        && !version.contains('(')
593        && !version.contains(')')
594        && !version.contains(',')
595        && !version.contains('+')
596        && !version.contains('*')
597        && !version.contains('>')
598        && !version.contains('<')
599        && !version.contains(' ')
600}
601
602fn default_package_data() -> PackageData {
603    PackageData {
604        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
605        datasource_id: Some(DatasourceId::GradleModule),
606        ..Default::default()
607    }
608}
609
610crate::register_parser!(
611    "Gradle module metadata",
612    &["**/*.module"],
613    "maven",
614    "Java",
615    Some("https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html"),
616);