Skip to main content

provenant/parsers/
gradle_module.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
6use packageurl::PackageUrl;
7use serde_json::{Map as JsonMap, Value};
8
9use crate::models::{
10    DatasourceId, Dependency, FileReference, Md5Digest, PackageData, PackageType, Sha1Digest,
11    Sha256Digest, Sha512Digest,
12};
13
14use super::PackageParser;
15
16const FIELD_FORMAT_VERSION: &str = "formatVersion";
17const FIELD_COMPONENT: &str = "component";
18const FIELD_CREATED_BY: &str = "createdBy";
19const FIELD_VARIANTS: &str = "variants";
20const FIELD_ATTRIBUTES: &str = "attributes";
21const FIELD_DEPENDENCIES: &str = "dependencies";
22const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
23const FIELD_FILES: &str = "files";
24const FIELD_AVAILABLE_AT: &str = "available-at";
25
26type ArtifactHashes = (
27    Option<u64>,
28    Option<Sha1Digest>,
29    Option<Md5Digest>,
30    Option<Sha256Digest>,
31    Option<Sha512Digest>,
32);
33
34type ExtractedVariantData = (
35    Vec<Dependency>,
36    Vec<FileReference>,
37    Option<JsonMap<String, Value>>,
38    Vec<Value>,
39);
40
41pub struct GradleModuleParser;
42
43#[derive(Clone, Debug, Default)]
44struct ExtractedDependency {
45    purl: Option<String>,
46    extracted_requirement: Option<String>,
47    scope: Option<String>,
48    is_runtime: Option<bool>,
49    is_optional: Option<bool>,
50    is_pinned: Option<bool>,
51    extra_data: Option<HashMap<String, Value>>,
52    precedence: u8,
53}
54
55impl PackageParser for GradleModuleParser {
56    const PACKAGE_TYPE: PackageType = PackageType::Maven;
57
58    fn is_match(path: &Path) -> bool {
59        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
60            return false;
61        }
62
63        let Ok(content) = read_file_to_string(path, None) else {
64            return false;
65        };
66
67        let Ok(value) = serde_json::from_str(&content) else {
68            return false;
69        };
70
71        is_gradle_module_json(&value)
72    }
73
74    fn extract_packages(path: &Path) -> Vec<PackageData> {
75        let content = match read_file_to_string(path, None) {
76            Ok(content) => content,
77            Err(e) => {
78                warn!("Failed to read Gradle module file at {:?}: {}", path, e);
79                return vec![default_package_data()];
80            }
81        };
82
83        let json: Value = match serde_json::from_str(&content) {
84            Ok(json) => json,
85            Err(e) => {
86                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
87                return vec![default_package_data()];
88            }
89        };
90
91        if !is_gradle_module_json(&json) {
92            warn!("File at {:?} is not valid Gradle module metadata", path);
93            return vec![default_package_data()];
94        }
95
96        vec![parse_gradle_module(&json)]
97    }
98}
99
100fn is_gradle_module_json(json: &Value) -> bool {
101    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
102        return false;
103    };
104
105    json.get(FIELD_FORMAT_VERSION)
106        .and_then(Value::as_str)
107        .is_some()
108        && component.get("group").and_then(Value::as_str).is_some()
109        && component.get("module").and_then(Value::as_str).is_some()
110        && component.get("version").and_then(Value::as_str).is_some()
111}
112
113fn parse_gradle_module(json: &Value) -> PackageData {
114    let component = json
115        .get(FIELD_COMPONENT)
116        .and_then(Value::as_object)
117        .cloned()
118        .unwrap_or_default();
119
120    let namespace = component
121        .get("group")
122        .and_then(Value::as_str)
123        .map(|value| truncate_field(value.to_string()));
124    let name = component
125        .get("module")
126        .and_then(Value::as_str)
127        .map(|value| truncate_field(value.to_string()));
128    let version = component
129        .get("version")
130        .and_then(Value::as_str)
131        .map(|value| truncate_field(value.to_string()));
132
133    let (dependencies, file_references, top_level_artifact, variant_metadata) =
134        extract_variant_data(json.get(FIELD_VARIANTS).and_then(Value::as_array));
135
136    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
137        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
138        _ => None,
139    };
140
141    let mut extra_data = HashMap::new();
142    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
143        extra_data.insert(
144            "format_version".to_string(),
145            Value::String(truncate_field(format_version.to_string())),
146        );
147    }
148
149    if let Some(gradle_object) = json
150        .get(FIELD_CREATED_BY)
151        .and_then(Value::as_object)
152        .and_then(|created_by| created_by.get("gradle"))
153        .and_then(Value::as_object)
154    {
155        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
156            extra_data.insert(
157                "gradle_version".to_string(),
158                Value::String(truncate_field(gradle_version.to_string())),
159            );
160        }
161        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
162            extra_data.insert(
163                "build_id".to_string(),
164                Value::String(truncate_field(build_id.to_string())),
165            );
166        }
167    }
168
169    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
170        && !attributes.is_empty()
171    {
172        extra_data.insert(
173            "component_attributes".to_string(),
174            Value::Object(attributes.clone()),
175        );
176    }
177
178    if !variant_metadata.is_empty() {
179        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
180    }
181
182    let (size, sha1, md5, sha256, sha512) = top_level_artifact
183        .as_ref()
184        .map(extract_file_hashes)
185        .unwrap_or((None, None, None, None, None));
186
187    PackageData {
188        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
189        namespace,
190        name,
191        version,
192        qualifiers: None,
193        subpath: None,
194        primary_language: Some("Java".to_string()),
195        description: None,
196        release_date: None,
197        parties: Vec::new(),
198        keywords: Vec::new(),
199        homepage_url: None,
200        download_url: None,
201        size,
202        sha1,
203        md5,
204        sha256,
205        sha512,
206        bug_tracking_url: None,
207        code_view_url: None,
208        vcs_url: None,
209        copyright: None,
210        holder: None,
211        declared_license_expression: None,
212        declared_license_expression_spdx: None,
213        license_detections: Vec::new(),
214        other_license_expression: None,
215        other_license_expression_spdx: None,
216        other_license_detections: Vec::new(),
217        extracted_license_statement: None,
218        notice_text: None,
219        source_packages: Vec::new(),
220        file_references,
221        is_private: false,
222        is_virtual: false,
223        extra_data: (!extra_data.is_empty()).then_some(extra_data),
224        dependencies,
225        repository_homepage_url: None,
226        repository_download_url: None,
227        api_data_url: None,
228        datasource_id: Some(DatasourceId::GradleModule),
229        purl,
230    }
231}
232
233fn extract_variant_data(variants: Option<&Vec<Value>>) -> ExtractedVariantData {
234    let mut dependencies = Vec::new();
235    let mut file_references = Vec::new();
236    let mut variant_metadata = Vec::new();
237    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
238        HashMap::new();
239    let mut seen_files: HashSet<String> = HashSet::new();
240    let mut top_level_artifact: Option<JsonMap<String, Value>> = None;
241
242    for variant in variants
243        .into_iter()
244        .flatten()
245        .filter_map(Value::as_object)
246        .take(MAX_ITERATION_COUNT)
247    {
248        let category = variant
249            .get(FIELD_ATTRIBUTES)
250            .and_then(Value::as_object)
251            .and_then(|attrs| attrs.get("org.gradle.category"))
252            .and_then(Value::as_str)
253            .unwrap_or_default();
254        let is_documentation = category == "documentation";
255
256        let variant_name = truncate_field(
257            variant
258                .get("name")
259                .and_then(Value::as_str)
260                .unwrap_or_default()
261                .to_string(),
262        );
263        let scope = classify_variant_scope(variant);
264        let precedence = scope_precedence(scope.as_deref());
265        let is_runtime = match scope.as_deref() {
266            Some("compile") | Some("runtime") => Some(true),
267            Some("test") => Some(false),
268            _ => None,
269        };
270        let is_optional = None;
271
272        let mut variant_entry = JsonMap::new();
273        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
274        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
275            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
276        }
277        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
278            variant_entry.insert("available_at".to_string(), available_at.clone());
279        }
280        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
281            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
282        }
283        variant_metadata.push(Value::Object(variant_entry));
284
285        if !is_documentation {
286            if top_level_artifact.is_none() {
287                top_level_artifact = variant
288                    .get(FIELD_FILES)
289                    .and_then(Value::as_array)
290                    .and_then(|files| files.first())
291                    .and_then(Value::as_object)
292                    .cloned();
293            }
294
295            if let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array) {
296                for file in files
297                    .iter()
298                    .filter_map(Value::as_object)
299                    .take(MAX_ITERATION_COUNT)
300                {
301                    let file_path = truncate_field(
302                        file.get("url")
303                            .and_then(Value::as_str)
304                            .or_else(|| file.get("name").and_then(Value::as_str))
305                            .unwrap_or_default()
306                            .to_string(),
307                    );
308                    if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
309                        continue;
310                    }
311                    let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
312                    let mut extra_data = HashMap::new();
313                    if let Some(name) = file.get("name").and_then(Value::as_str) {
314                        extra_data.insert(
315                            "name".to_string(),
316                            Value::String(truncate_field(name.to_string())),
317                        );
318                    }
319                    file_references.push(FileReference {
320                        path: file_path,
321                        size,
322                        sha1,
323                        md5,
324                        sha256,
325                        sha512,
326                        extra_data: (!extra_data.is_empty()).then_some(extra_data),
327                    });
328                }
329            }
330        }
331
332        if is_documentation {
333            continue;
334        }
335
336        for dependency in variant
337            .get(FIELD_DEPENDENCIES)
338            .and_then(Value::as_array)
339            .into_iter()
340            .flatten()
341            .filter_map(Value::as_object)
342            .take(MAX_ITERATION_COUNT)
343        {
344            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
345                continue;
346            };
347            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
348                continue;
349            };
350
351            let requirement = extract_dependency_requirement(dependency.get("version"));
352            let key = (group.to_string(), module.to_string(), requirement.clone());
353            let purl = build_maven_purl(group, module, requirement.as_deref());
354            let dep_extra_data =
355                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
356
357            let entry = seen_dependencies.entry(key).or_default();
358            if precedence < entry.precedence || entry.scope.is_none() {
359                entry.scope = scope.clone();
360                entry.is_runtime = is_runtime;
361                entry.is_optional = is_optional;
362                entry.precedence = precedence;
363            }
364            entry.purl = purl.map(truncate_field);
365            entry.extracted_requirement = requirement.clone();
366            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
367            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
368        }
369    }
370
371    for dep in seen_dependencies.into_values() {
372        dependencies.push(Dependency {
373            purl: dep.purl,
374            extracted_requirement: dep.extracted_requirement,
375            scope: dep.scope,
376            is_runtime: dep.is_runtime,
377            is_optional: dep.is_optional,
378            is_pinned: dep.is_pinned,
379            is_direct: Some(true),
380            resolved_package: None,
381            extra_data: dep.extra_data,
382        });
383    }
384
385    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
386    file_references.sort_by(|left, right| left.path.cmp(&right.path));
387
388    (
389        dependencies,
390        file_references,
391        top_level_artifact,
392        variant_metadata,
393    )
394}
395
396fn build_dependency_extra_data(
397    dependency: &JsonMap<String, Value>,
398    variant_name: &str,
399    scope: Option<&str>,
400) -> Option<HashMap<String, Value>> {
401    let mut extra = HashMap::new();
402    extra.insert(
403        "variant_names".to_string(),
404        Value::Array(vec![Value::String(variant_name.to_string())]),
405    );
406    if let Some(scope) = scope {
407        extra.insert(
408            "variant_scopes".to_string(),
409            Value::Array(vec![Value::String(scope.to_string())]),
410        );
411    }
412
413    let mut variant_entry = JsonMap::new();
414    variant_entry.insert(
415        "variant_name".to_string(),
416        Value::String(variant_name.to_string()),
417    );
418    if let Some(scope) = scope {
419        variant_entry.insert(
420            "variant_scope".to_string(),
421            Value::String(scope.to_string()),
422        );
423    }
424    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
425    extra.insert(
426        "variant_dependency_entries".to_string(),
427        Value::Array(vec![Value::Object(variant_entry)]),
428    );
429
430    for field in [
431        FIELD_ATTRIBUTES,
432        "reason",
433        "requestedCapabilities",
434        "excludes",
435        "endorseStrictVersions",
436        "thirdPartyCompatibility",
437        "version",
438    ] {
439        if let Some(value) = dependency.get(field) {
440            extra.insert(field.to_string(), value.clone());
441        }
442    }
443    (!extra.is_empty()).then_some(extra)
444}
445
446fn merge_dependency_extra_data(
447    current: Option<HashMap<String, Value>>,
448    next: Option<HashMap<String, Value>>,
449) -> Option<HashMap<String, Value>> {
450    match (current, next) {
451        (None, None) => None,
452        (Some(map), None) | (None, Some(map)) => Some(map),
453        (Some(mut current), Some(mut next)) => {
454            merge_string_arrays(&mut current, &mut next, "variant_names");
455            merge_string_arrays(&mut current, &mut next, "variant_scopes");
456            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
457            for (key, value) in next {
458                current.entry(key).or_insert(value);
459            }
460            Some(current)
461        }
462    }
463}
464
465fn merge_object_arrays(
466    current: &mut HashMap<String, Value>,
467    next: &mut HashMap<String, Value>,
468    key: &str,
469) {
470    let existing = current
471        .remove(key)
472        .and_then(|value| value.as_array().cloned());
473    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
474
475    let mut values = Vec::new();
476    for array in [existing, incoming].into_iter().flatten() {
477        for value in array {
478            if !values.contains(&value) {
479                values.push(value);
480            }
481        }
482    }
483
484    if !values.is_empty() {
485        current.insert(key.to_string(), Value::Array(values));
486    }
487}
488
489fn merge_string_arrays(
490    current: &mut HashMap<String, Value>,
491    next: &mut HashMap<String, Value>,
492    key: &str,
493) {
494    let existing = current
495        .remove(key)
496        .and_then(|value| value.as_array().cloned());
497    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
498
499    let mut values = Vec::new();
500    for array in [existing, incoming].into_iter().flatten() {
501        for value in array
502            .into_iter()
503            .filter_map(|value| value.as_str().map(|s| s.to_string()))
504        {
505            if !values.contains(&value) {
506                values.push(value);
507            }
508        }
509    }
510
511    if !values.is_empty() {
512        current.insert(
513            key.to_string(),
514            Value::Array(values.into_iter().map(Value::String).collect()),
515        );
516    }
517}
518
519fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
520    let attributes = variant.get(FIELD_ATTRIBUTES).and_then(Value::as_object);
521
522    let category = attributes
523        .and_then(|attributes| attributes.get("org.gradle.category"))
524        .and_then(Value::as_str)
525        .unwrap_or_default()
526        .to_ascii_lowercase();
527
528    if category == "verification" {
529        return Some("test".to_string());
530    }
531
532    let usage = attributes
533        .and_then(|attributes| attributes.get("org.gradle.usage"))
534        .and_then(Value::as_str)
535        .unwrap_or_default()
536        .to_ascii_lowercase();
537
538    if usage.contains("api") {
539        return Some("compile".to_string());
540    }
541    if usage.contains("runtime") {
542        return Some("runtime".to_string());
543    }
544
545    None
546}
547
548fn scope_precedence(scope: Option<&str>) -> u8 {
549    match scope {
550        Some("compile") => 0,
551        Some("runtime") => 1,
552        Some("test") => 2,
553        _ => 3,
554    }
555}
556
557fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
558    match version_value {
559        Some(Value::String(version)) => Some(truncate_field(version.to_string())),
560        Some(Value::Object(version)) => version
561            .get("strictly")
562            .or_else(|| version.get("requires"))
563            .or_else(|| version.get("prefers"))
564            .and_then(Value::as_str)
565            .map(|value| truncate_field(value.to_string())),
566        _ => None,
567    }
568}
569
570fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
571    let sha256 = file
572        .get("sha256")
573        .and_then(Value::as_str)
574        .and_then(|value| Sha256Digest::from_hex(value).ok());
575
576    let sha512_field = file.get("sha512").and_then(Value::as_str);
577    let (sha256, sha512) = match sha512_field {
578        Some(hex) if hex.len() == 64 && hex::decode(hex).is_ok() => {
579            let misassigned = Sha256Digest::from_hex(hex).ok();
580            (sha256.or(misassigned), None)
581        }
582        Some(hex) => (sha256, Sha512Digest::from_hex(hex).ok()),
583        None => (sha256, None),
584    };
585
586    (
587        file.get("size").and_then(Value::as_u64),
588        file.get("sha1")
589            .and_then(Value::as_str)
590            .and_then(|value| Sha1Digest::from_hex(value).ok()),
591        file.get("md5")
592            .and_then(Value::as_str)
593            .and_then(|value| Md5Digest::from_hex(value).ok()),
594        sha256,
595        sha512,
596    )
597}
598
599fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
600    let mut purl = PackageUrl::new("maven", name).ok()?;
601    if !namespace.trim().is_empty() {
602        purl.with_namespace(namespace).ok()?;
603    }
604    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
605        purl.with_version(version).ok()?;
606    }
607    Some(purl.to_string())
608}
609
610fn is_exact_version(version: &str) -> bool {
611    !version.contains('[')
612        && !version.contains(']')
613        && !version.contains('(')
614        && !version.contains(')')
615        && !version.contains(',')
616        && !version.contains('+')
617        && !version.contains('*')
618        && !version.contains('>')
619        && !version.contains('<')
620        && !version.contains(' ')
621}
622
623fn default_package_data() -> PackageData {
624    PackageData {
625        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
626        datasource_id: Some(DatasourceId::GradleModule),
627        ..Default::default()
628    }
629}
630
631crate::register_parser!(
632    "Gradle module metadata",
633    &["**/*.module"],
634    "maven",
635    "Java",
636    Some("https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html"),
637);