Skip to main content

provenant/parsers/
gradle_module.rs

1use std::collections::{HashMap, HashSet};
2use std::fs::File;
3use std::io::BufReader;
4use std::path::Path;
5
6use crate::parser_warn as warn;
7use packageurl::PackageUrl;
8use serde_json::{Map as JsonMap, Value};
9
10use crate::models::{
11    DatasourceId, Dependency, FileReference, Md5Digest, PackageData, PackageType, Sha1Digest,
12    Sha256Digest, Sha512Digest,
13};
14
15use super::PackageParser;
16
17const FIELD_FORMAT_VERSION: &str = "formatVersion";
18const FIELD_COMPONENT: &str = "component";
19const FIELD_CREATED_BY: &str = "createdBy";
20const FIELD_VARIANTS: &str = "variants";
21const FIELD_ATTRIBUTES: &str = "attributes";
22const FIELD_DEPENDENCIES: &str = "dependencies";
23const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
24const FIELD_FILES: &str = "files";
25const FIELD_AVAILABLE_AT: &str = "available-at";
26
27type ArtifactHashes = (
28    Option<u64>,
29    Option<Sha1Digest>,
30    Option<Md5Digest>,
31    Option<Sha256Digest>,
32    Option<Sha512Digest>,
33);
34
35type ExtractedVariantData = (
36    Vec<Dependency>,
37    Vec<FileReference>,
38    Option<JsonMap<String, Value>>,
39    Vec<Value>,
40);
41
42pub struct GradleModuleParser;
43
44#[derive(Clone, Debug, Default)]
45struct ExtractedDependency {
46    purl: Option<String>,
47    extracted_requirement: Option<String>,
48    scope: Option<String>,
49    is_runtime: Option<bool>,
50    is_optional: Option<bool>,
51    is_pinned: Option<bool>,
52    extra_data: Option<HashMap<String, Value>>,
53    precedence: u8,
54}
55
56impl PackageParser for GradleModuleParser {
57    const PACKAGE_TYPE: PackageType = PackageType::Maven;
58
59    fn is_match(path: &Path) -> bool {
60        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
61            return false;
62        }
63
64        let Ok(file) = File::open(path) else {
65            return false;
66        };
67
68        let Ok(value) = serde_json::from_reader::<_, Value>(BufReader::new(file)) else {
69            return false;
70        };
71
72        is_gradle_module_json(&value)
73    }
74
75    fn extract_packages(path: &Path) -> Vec<PackageData> {
76        let file = match File::open(path) {
77            Ok(file) => file,
78            Err(e) => {
79                warn!("Failed to open Gradle module file at {:?}: {}", path, e);
80                return vec![default_package_data()];
81            }
82        };
83
84        let json: Value = match serde_json::from_reader(BufReader::new(file)) {
85            Ok(json) => json,
86            Err(e) => {
87                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
88                return vec![default_package_data()];
89            }
90        };
91
92        if !is_gradle_module_json(&json) {
93            warn!("File at {:?} is not valid Gradle module metadata", path);
94            return vec![default_package_data()];
95        }
96
97        vec![parse_gradle_module(&json)]
98    }
99}
100
101fn is_gradle_module_json(json: &Value) -> bool {
102    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
103        return false;
104    };
105
106    json.get(FIELD_FORMAT_VERSION)
107        .and_then(Value::as_str)
108        .is_some()
109        && component.get("group").and_then(Value::as_str).is_some()
110        && component.get("module").and_then(Value::as_str).is_some()
111        && component.get("version").and_then(Value::as_str).is_some()
112}
113
114fn parse_gradle_module(json: &Value) -> PackageData {
115    let component = json
116        .get(FIELD_COMPONENT)
117        .and_then(Value::as_object)
118        .cloned()
119        .unwrap_or_default();
120
121    let namespace = component
122        .get("group")
123        .and_then(Value::as_str)
124        .map(|value| value.to_string());
125    let name = component
126        .get("module")
127        .and_then(Value::as_str)
128        .map(|value| value.to_string());
129    let version = component
130        .get("version")
131        .and_then(Value::as_str)
132        .map(|value| value.to_string());
133
134    let (dependencies, file_references, top_level_artifact, variant_metadata) =
135        extract_variant_data(json.get(FIELD_VARIANTS).and_then(Value::as_array));
136
137    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
138        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
139        _ => None,
140    };
141
142    let mut extra_data = HashMap::new();
143    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
144        extra_data.insert(
145            "format_version".to_string(),
146            Value::String(format_version.to_string()),
147        );
148    }
149
150    if let Some(gradle_object) = json
151        .get(FIELD_CREATED_BY)
152        .and_then(Value::as_object)
153        .and_then(|created_by| created_by.get("gradle"))
154        .and_then(Value::as_object)
155    {
156        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
157            extra_data.insert(
158                "gradle_version".to_string(),
159                Value::String(gradle_version.to_string()),
160            );
161        }
162        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
163            extra_data.insert("build_id".to_string(), Value::String(build_id.to_string()));
164        }
165    }
166
167    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
168        && !attributes.is_empty()
169    {
170        extra_data.insert(
171            "component_attributes".to_string(),
172            Value::Object(attributes.clone()),
173        );
174    }
175
176    if !variant_metadata.is_empty() {
177        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
178    }
179
180    let (size, sha1, md5, sha256, sha512) = top_level_artifact
181        .as_ref()
182        .map(extract_file_hashes)
183        .unwrap_or((None, None, None, None, None));
184
185    PackageData {
186        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
187        namespace,
188        name,
189        version,
190        qualifiers: None,
191        subpath: None,
192        primary_language: Some("Java".to_string()),
193        description: None,
194        release_date: None,
195        parties: Vec::new(),
196        keywords: Vec::new(),
197        homepage_url: None,
198        download_url: None,
199        size,
200        sha1,
201        md5,
202        sha256,
203        sha512,
204        bug_tracking_url: None,
205        code_view_url: None,
206        vcs_url: None,
207        copyright: None,
208        holder: None,
209        declared_license_expression: None,
210        declared_license_expression_spdx: None,
211        license_detections: Vec::new(),
212        other_license_expression: None,
213        other_license_expression_spdx: None,
214        other_license_detections: Vec::new(),
215        extracted_license_statement: None,
216        notice_text: None,
217        source_packages: Vec::new(),
218        file_references,
219        is_private: false,
220        is_virtual: false,
221        extra_data: (!extra_data.is_empty()).then_some(extra_data),
222        dependencies,
223        repository_homepage_url: None,
224        repository_download_url: None,
225        api_data_url: None,
226        datasource_id: Some(DatasourceId::GradleModule),
227        purl,
228    }
229}
230
231fn extract_variant_data(variants: Option<&Vec<Value>>) -> ExtractedVariantData {
232    let mut dependencies = Vec::new();
233    let mut file_references = Vec::new();
234    let mut variant_metadata = Vec::new();
235    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
236        HashMap::new();
237    let mut seen_files: HashSet<String> = HashSet::new();
238    let mut top_level_artifact: Option<JsonMap<String, Value>> = None;
239
240    for variant in variants.into_iter().flatten().filter_map(Value::as_object) {
241        let category = variant
242            .get(FIELD_ATTRIBUTES)
243            .and_then(Value::as_object)
244            .and_then(|attrs| attrs.get("org.gradle.category"))
245            .and_then(Value::as_str)
246            .unwrap_or_default();
247        let is_documentation = category == "documentation";
248
249        let variant_name = variant
250            .get("name")
251            .and_then(Value::as_str)
252            .unwrap_or_default()
253            .to_string();
254        let scope = classify_variant_scope(variant);
255        let precedence = scope_precedence(scope.as_deref());
256        let is_runtime = match scope.as_deref() {
257            Some("compile") | Some("runtime") => Some(true),
258            Some("test") => Some(false),
259            _ => None,
260        };
261        let is_optional = None;
262
263        let mut variant_entry = JsonMap::new();
264        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
265        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
266            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
267        }
268        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
269            variant_entry.insert("available_at".to_string(), available_at.clone());
270        }
271        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
272            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
273        }
274        variant_metadata.push(Value::Object(variant_entry));
275
276        if !is_documentation {
277            if top_level_artifact.is_none() {
278                top_level_artifact = variant
279                    .get(FIELD_FILES)
280                    .and_then(Value::as_array)
281                    .and_then(|files| files.first())
282                    .and_then(Value::as_object)
283                    .cloned();
284            }
285
286            if let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array) {
287                for file in files.iter().filter_map(Value::as_object) {
288                    let file_path = file
289                        .get("url")
290                        .and_then(Value::as_str)
291                        .or_else(|| file.get("name").and_then(Value::as_str))
292                        .unwrap_or_default()
293                        .to_string();
294                    if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
295                        continue;
296                    }
297                    let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
298                    let mut extra_data = HashMap::new();
299                    if let Some(name) = file.get("name").and_then(Value::as_str) {
300                        extra_data.insert("name".to_string(), Value::String(name.to_string()));
301                    }
302                    file_references.push(FileReference {
303                        path: file_path,
304                        size,
305                        sha1,
306                        md5,
307                        sha256,
308                        sha512,
309                        extra_data: (!extra_data.is_empty()).then_some(extra_data),
310                    });
311                }
312            }
313        }
314
315        if is_documentation {
316            continue;
317        }
318
319        for dependency in variant
320            .get(FIELD_DEPENDENCIES)
321            .and_then(Value::as_array)
322            .into_iter()
323            .flatten()
324            .filter_map(Value::as_object)
325        {
326            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
327                continue;
328            };
329            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
330                continue;
331            };
332
333            let requirement = extract_dependency_requirement(dependency.get("version"));
334            let key = (group.to_string(), module.to_string(), requirement.clone());
335            let purl = build_maven_purl(group, module, requirement.as_deref());
336            let dep_extra_data =
337                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
338
339            let entry = seen_dependencies.entry(key).or_default();
340            if precedence < entry.precedence || entry.scope.is_none() {
341                entry.scope = scope.clone();
342                entry.is_runtime = is_runtime;
343                entry.is_optional = is_optional;
344                entry.precedence = precedence;
345            }
346            entry.purl = purl;
347            entry.extracted_requirement = requirement.clone();
348            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
349            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
350        }
351    }
352
353    for dep in seen_dependencies.into_values() {
354        dependencies.push(Dependency {
355            purl: dep.purl,
356            extracted_requirement: dep.extracted_requirement,
357            scope: dep.scope,
358            is_runtime: dep.is_runtime,
359            is_optional: dep.is_optional,
360            is_pinned: dep.is_pinned,
361            is_direct: Some(true),
362            resolved_package: None,
363            extra_data: dep.extra_data,
364        });
365    }
366
367    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
368    file_references.sort_by(|left, right| left.path.cmp(&right.path));
369
370    (
371        dependencies,
372        file_references,
373        top_level_artifact,
374        variant_metadata,
375    )
376}
377
378fn build_dependency_extra_data(
379    dependency: &JsonMap<String, Value>,
380    variant_name: &str,
381    scope: Option<&str>,
382) -> Option<HashMap<String, Value>> {
383    let mut extra = HashMap::new();
384    extra.insert(
385        "variant_names".to_string(),
386        Value::Array(vec![Value::String(variant_name.to_string())]),
387    );
388    if let Some(scope) = scope {
389        extra.insert(
390            "variant_scopes".to_string(),
391            Value::Array(vec![Value::String(scope.to_string())]),
392        );
393    }
394
395    let mut variant_entry = JsonMap::new();
396    variant_entry.insert(
397        "variant_name".to_string(),
398        Value::String(variant_name.to_string()),
399    );
400    if let Some(scope) = scope {
401        variant_entry.insert(
402            "variant_scope".to_string(),
403            Value::String(scope.to_string()),
404        );
405    }
406    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
407    extra.insert(
408        "variant_dependency_entries".to_string(),
409        Value::Array(vec![Value::Object(variant_entry)]),
410    );
411
412    for field in [
413        FIELD_ATTRIBUTES,
414        "reason",
415        "requestedCapabilities",
416        "excludes",
417        "endorseStrictVersions",
418        "thirdPartyCompatibility",
419        "version",
420    ] {
421        if let Some(value) = dependency.get(field) {
422            extra.insert(field.to_string(), value.clone());
423        }
424    }
425    (!extra.is_empty()).then_some(extra)
426}
427
428fn merge_dependency_extra_data(
429    current: Option<HashMap<String, Value>>,
430    next: Option<HashMap<String, Value>>,
431) -> Option<HashMap<String, Value>> {
432    match (current, next) {
433        (None, None) => None,
434        (Some(map), None) | (None, Some(map)) => Some(map),
435        (Some(mut current), Some(mut next)) => {
436            merge_string_arrays(&mut current, &mut next, "variant_names");
437            merge_string_arrays(&mut current, &mut next, "variant_scopes");
438            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
439            for (key, value) in next {
440                current.entry(key).or_insert(value);
441            }
442            Some(current)
443        }
444    }
445}
446
447fn merge_object_arrays(
448    current: &mut HashMap<String, Value>,
449    next: &mut HashMap<String, Value>,
450    key: &str,
451) {
452    let existing = current
453        .remove(key)
454        .and_then(|value| value.as_array().cloned());
455    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
456
457    let mut values = Vec::new();
458    for array in [existing, incoming].into_iter().flatten() {
459        for value in array {
460            if !values.contains(&value) {
461                values.push(value);
462            }
463        }
464    }
465
466    if !values.is_empty() {
467        current.insert(key.to_string(), Value::Array(values));
468    }
469}
470
471fn merge_string_arrays(
472    current: &mut HashMap<String, Value>,
473    next: &mut HashMap<String, Value>,
474    key: &str,
475) {
476    let existing = current
477        .remove(key)
478        .and_then(|value| value.as_array().cloned());
479    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
480
481    let mut values = Vec::new();
482    for array in [existing, incoming].into_iter().flatten() {
483        for value in array
484            .into_iter()
485            .filter_map(|value| value.as_str().map(|s| s.to_string()))
486        {
487            if !values.contains(&value) {
488                values.push(value);
489            }
490        }
491    }
492
493    if !values.is_empty() {
494        current.insert(
495            key.to_string(),
496            Value::Array(values.into_iter().map(Value::String).collect()),
497        );
498    }
499}
500
501fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
502    let attributes = variant.get(FIELD_ATTRIBUTES).and_then(Value::as_object);
503
504    let category = attributes
505        .and_then(|attributes| attributes.get("org.gradle.category"))
506        .and_then(Value::as_str)
507        .unwrap_or_default()
508        .to_ascii_lowercase();
509
510    if category == "verification" {
511        return Some("test".to_string());
512    }
513
514    let usage = attributes
515        .and_then(|attributes| attributes.get("org.gradle.usage"))
516        .and_then(Value::as_str)
517        .unwrap_or_default()
518        .to_ascii_lowercase();
519
520    if usage.contains("api") {
521        return Some("compile".to_string());
522    }
523    if usage.contains("runtime") {
524        return Some("runtime".to_string());
525    }
526
527    None
528}
529
530fn scope_precedence(scope: Option<&str>) -> u8 {
531    match scope {
532        Some("compile") => 0,
533        Some("runtime") => 1,
534        Some("test") => 2,
535        _ => 3,
536    }
537}
538
539fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
540    match version_value {
541        Some(Value::String(version)) => Some(version.to_string()),
542        Some(Value::Object(version)) => version
543            .get("strictly")
544            .or_else(|| version.get("requires"))
545            .or_else(|| version.get("prefers"))
546            .and_then(Value::as_str)
547            .map(|value| value.to_string()),
548        _ => None,
549    }
550}
551
552fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
553    let sha256 = file
554        .get("sha256")
555        .and_then(Value::as_str)
556        .and_then(|value| Sha256Digest::from_hex(value).ok());
557
558    let sha512_field = file.get("sha512").and_then(Value::as_str);
559    let (sha256, sha512) = match sha512_field {
560        Some(hex) if hex.len() == 64 && hex::decode(hex).is_ok() => {
561            let misassigned = Sha256Digest::from_hex(hex).ok();
562            (sha256.or(misassigned), None)
563        }
564        Some(hex) => (sha256, Sha512Digest::from_hex(hex).ok()),
565        None => (sha256, None),
566    };
567
568    (
569        file.get("size").and_then(Value::as_u64),
570        file.get("sha1")
571            .and_then(Value::as_str)
572            .and_then(|value| Sha1Digest::from_hex(value).ok()),
573        file.get("md5")
574            .and_then(Value::as_str)
575            .and_then(|value| Md5Digest::from_hex(value).ok()),
576        sha256,
577        sha512,
578    )
579}
580
581fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
582    let mut purl = PackageUrl::new("maven", name).ok()?;
583    if !namespace.trim().is_empty() {
584        purl.with_namespace(namespace).ok()?;
585    }
586    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
587        purl.with_version(version).ok()?;
588    }
589    Some(purl.to_string())
590}
591
592fn is_exact_version(version: &str) -> bool {
593    !version.contains('[')
594        && !version.contains(']')
595        && !version.contains('(')
596        && !version.contains(')')
597        && !version.contains(',')
598        && !version.contains('+')
599        && !version.contains('*')
600        && !version.contains('>')
601        && !version.contains('<')
602        && !version.contains(' ')
603}
604
605fn default_package_data() -> PackageData {
606    PackageData {
607        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
608        datasource_id: Some(DatasourceId::GradleModule),
609        ..Default::default()
610    }
611}
612
613crate::register_parser!(
614    "Gradle module metadata",
615    &["**/*.module"],
616    "maven",
617    "Java",
618    Some("https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html"),
619);