Skip to main content

provenant/parsers/
gradle_module.rs

1use std::collections::{HashMap, HashSet};
2use std::fs::File;
3use std::io::BufReader;
4use std::path::Path;
5
6use log::warn;
7use packageurl::PackageUrl;
8use serde_json::{Map as JsonMap, Value};
9
10use crate::models::{DatasourceId, Dependency, FileReference, PackageData, PackageType};
11
12use super::PackageParser;
13
14const FIELD_FORMAT_VERSION: &str = "formatVersion";
15const FIELD_COMPONENT: &str = "component";
16const FIELD_CREATED_BY: &str = "createdBy";
17const FIELD_VARIANTS: &str = "variants";
18const FIELD_ATTRIBUTES: &str = "attributes";
19const FIELD_DEPENDENCIES: &str = "dependencies";
20const FIELD_DEPENDENCY_CONSTRAINTS: &str = "dependencyConstraints";
21const FIELD_FILES: &str = "files";
22const FIELD_AVAILABLE_AT: &str = "available-at";
23
24type ArtifactHashes = (
25    Option<u64>,
26    Option<String>,
27    Option<String>,
28    Option<String>,
29    Option<String>,
30);
31
32type ExtractedVariantData = (
33    Vec<Dependency>,
34    Vec<FileReference>,
35    Option<JsonMap<String, Value>>,
36    Vec<Value>,
37);
38
39pub struct GradleModuleParser;
40
41#[derive(Clone, Debug, Default)]
42struct ExtractedDependency {
43    purl: Option<String>,
44    extracted_requirement: Option<String>,
45    scope: Option<String>,
46    is_runtime: Option<bool>,
47    is_optional: Option<bool>,
48    is_pinned: Option<bool>,
49    extra_data: Option<HashMap<String, Value>>,
50    precedence: u8,
51}
52
53impl PackageParser for GradleModuleParser {
54    const PACKAGE_TYPE: PackageType = PackageType::Maven;
55
56    fn is_match(path: &Path) -> bool {
57        if path.extension().and_then(|ext| ext.to_str()) != Some("module") {
58            return false;
59        }
60
61        let Ok(file) = File::open(path) else {
62            return false;
63        };
64
65        let Ok(value) = serde_json::from_reader::<_, Value>(BufReader::new(file)) else {
66            return false;
67        };
68
69        is_gradle_module_json(&value)
70    }
71
72    fn extract_packages(path: &Path) -> Vec<PackageData> {
73        let file = match File::open(path) {
74            Ok(file) => file,
75            Err(e) => {
76                warn!("Failed to open Gradle module file at {:?}: {}", path, e);
77                return vec![default_package_data()];
78            }
79        };
80
81        let json: Value = match serde_json::from_reader(BufReader::new(file)) {
82            Ok(json) => json,
83            Err(e) => {
84                warn!("Failed to parse Gradle module JSON at {:?}: {}", path, e);
85                return vec![default_package_data()];
86            }
87        };
88
89        if !is_gradle_module_json(&json) {
90            warn!("File at {:?} is not valid Gradle module metadata", path);
91            return vec![default_package_data()];
92        }
93
94        vec![parse_gradle_module(&json)]
95    }
96}
97
98fn is_gradle_module_json(json: &Value) -> bool {
99    let Some(component) = json.get(FIELD_COMPONENT).and_then(Value::as_object) else {
100        return false;
101    };
102
103    json.get(FIELD_FORMAT_VERSION)
104        .and_then(Value::as_str)
105        .is_some()
106        && component.get("group").and_then(Value::as_str).is_some()
107        && component.get("module").and_then(Value::as_str).is_some()
108        && component.get("version").and_then(Value::as_str).is_some()
109}
110
111fn parse_gradle_module(json: &Value) -> PackageData {
112    let component = json
113        .get(FIELD_COMPONENT)
114        .and_then(Value::as_object)
115        .cloned()
116        .unwrap_or_default();
117
118    let namespace = component
119        .get("group")
120        .and_then(Value::as_str)
121        .map(|value| value.to_string());
122    let name = component
123        .get("module")
124        .and_then(Value::as_str)
125        .map(|value| value.to_string());
126    let version = component
127        .get("version")
128        .and_then(Value::as_str)
129        .map(|value| value.to_string());
130
131    let (dependencies, file_references, top_level_artifact, variant_metadata) =
132        extract_variant_data(json.get(FIELD_VARIANTS).and_then(Value::as_array));
133
134    let purl = match (namespace.as_deref(), name.as_deref(), version.as_deref()) {
135        (Some(namespace), Some(name), version) => build_maven_purl(namespace, name, version),
136        _ => None,
137    };
138
139    let mut extra_data = HashMap::new();
140    if let Some(format_version) = json.get(FIELD_FORMAT_VERSION).and_then(Value::as_str) {
141        extra_data.insert(
142            "format_version".to_string(),
143            Value::String(format_version.to_string()),
144        );
145    }
146
147    if let Some(gradle_object) = json
148        .get(FIELD_CREATED_BY)
149        .and_then(Value::as_object)
150        .and_then(|created_by| created_by.get("gradle"))
151        .and_then(Value::as_object)
152    {
153        if let Some(gradle_version) = gradle_object.get("version").and_then(Value::as_str) {
154            extra_data.insert(
155                "gradle_version".to_string(),
156                Value::String(gradle_version.to_string()),
157            );
158        }
159        if let Some(build_id) = gradle_object.get("buildId").and_then(Value::as_str) {
160            extra_data.insert("build_id".to_string(), Value::String(build_id.to_string()));
161        }
162    }
163
164    if let Some(attributes) = component.get(FIELD_ATTRIBUTES).and_then(Value::as_object)
165        && !attributes.is_empty()
166    {
167        extra_data.insert(
168            "component_attributes".to_string(),
169            Value::Object(attributes.clone()),
170        );
171    }
172
173    if !variant_metadata.is_empty() {
174        extra_data.insert("variants".to_string(), Value::Array(variant_metadata));
175    }
176
177    let (size, sha1, md5, sha256, sha512) = top_level_artifact
178        .as_ref()
179        .map(extract_file_hashes)
180        .unwrap_or((None, None, None, None, None));
181
182    PackageData {
183        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
184        namespace,
185        name,
186        version,
187        qualifiers: None,
188        subpath: None,
189        primary_language: Some("Java".to_string()),
190        description: None,
191        release_date: None,
192        parties: Vec::new(),
193        keywords: Vec::new(),
194        homepage_url: None,
195        download_url: None,
196        size,
197        sha1,
198        md5,
199        sha256,
200        sha512,
201        bug_tracking_url: None,
202        code_view_url: None,
203        vcs_url: None,
204        copyright: None,
205        holder: None,
206        declared_license_expression: None,
207        declared_license_expression_spdx: None,
208        license_detections: Vec::new(),
209        other_license_expression: None,
210        other_license_expression_spdx: None,
211        other_license_detections: Vec::new(),
212        extracted_license_statement: None,
213        notice_text: None,
214        source_packages: Vec::new(),
215        file_references,
216        is_private: false,
217        is_virtual: false,
218        extra_data: (!extra_data.is_empty()).then_some(extra_data),
219        dependencies,
220        repository_homepage_url: None,
221        repository_download_url: None,
222        api_data_url: None,
223        datasource_id: Some(DatasourceId::GradleModule),
224        purl,
225    }
226}
227
228fn extract_variant_data(variants: Option<&Vec<Value>>) -> ExtractedVariantData {
229    let mut dependencies = Vec::new();
230    let mut file_references = Vec::new();
231    let mut variant_metadata = Vec::new();
232    let mut seen_dependencies: HashMap<(String, String, Option<String>), ExtractedDependency> =
233        HashMap::new();
234    let mut seen_files: HashSet<String> = HashSet::new();
235    let mut top_level_artifact: Option<JsonMap<String, Value>> = None;
236
237    for variant in variants.into_iter().flatten().filter_map(Value::as_object) {
238        let category = variant
239            .get(FIELD_ATTRIBUTES)
240            .and_then(Value::as_object)
241            .and_then(|attrs| attrs.get("org.gradle.category"))
242            .and_then(Value::as_str)
243            .unwrap_or_default();
244        let is_documentation = category == "documentation";
245
246        let variant_name = variant
247            .get("name")
248            .and_then(Value::as_str)
249            .unwrap_or_default()
250            .to_string();
251        let scope = classify_variant_scope(variant);
252        let precedence = scope_precedence(scope.as_deref());
253        let is_runtime = Some(scope.as_deref() != Some("test"));
254        let is_optional = Some(scope.as_deref() == Some("test"));
255
256        let mut variant_entry = JsonMap::new();
257        variant_entry.insert("name".to_string(), Value::String(variant_name.clone()));
258        if let Some(attributes) = variant.get(FIELD_ATTRIBUTES) {
259            variant_entry.insert(FIELD_ATTRIBUTES.to_string(), attributes.clone());
260        }
261        if let Some(available_at) = variant.get(FIELD_AVAILABLE_AT) {
262            variant_entry.insert("available_at".to_string(), available_at.clone());
263        }
264        if let Some(constraints) = variant.get(FIELD_DEPENDENCY_CONSTRAINTS) {
265            variant_entry.insert("dependency_constraints".to_string(), constraints.clone());
266        }
267        variant_metadata.push(Value::Object(variant_entry));
268
269        if !is_documentation {
270            if top_level_artifact.is_none() {
271                top_level_artifact = variant
272                    .get(FIELD_FILES)
273                    .and_then(Value::as_array)
274                    .and_then(|files| files.first())
275                    .and_then(Value::as_object)
276                    .cloned();
277            }
278
279            if let Some(files) = variant.get(FIELD_FILES).and_then(Value::as_array) {
280                for file in files.iter().filter_map(Value::as_object) {
281                    let file_path = file
282                        .get("url")
283                        .and_then(Value::as_str)
284                        .or_else(|| file.get("name").and_then(Value::as_str))
285                        .unwrap_or_default()
286                        .to_string();
287                    if file_path.is_empty() || !seen_files.insert(file_path.clone()) {
288                        continue;
289                    }
290                    let (size, sha1, md5, sha256, sha512) = extract_file_hashes(file);
291                    let mut extra_data = HashMap::new();
292                    if let Some(name) = file.get("name").and_then(Value::as_str) {
293                        extra_data.insert("name".to_string(), Value::String(name.to_string()));
294                    }
295                    file_references.push(FileReference {
296                        path: file_path,
297                        size,
298                        sha1,
299                        md5,
300                        sha256,
301                        sha512,
302                        extra_data: (!extra_data.is_empty()).then_some(extra_data),
303                    });
304                }
305            }
306        }
307
308        if is_documentation {
309            continue;
310        }
311
312        for dependency in variant
313            .get(FIELD_DEPENDENCIES)
314            .and_then(Value::as_array)
315            .into_iter()
316            .flatten()
317            .filter_map(Value::as_object)
318        {
319            let Some(group) = dependency.get("group").and_then(Value::as_str) else {
320                continue;
321            };
322            let Some(module) = dependency.get("module").and_then(Value::as_str) else {
323                continue;
324            };
325
326            let requirement = extract_dependency_requirement(dependency.get("version"));
327            let key = (group.to_string(), module.to_string(), requirement.clone());
328            let purl = build_maven_purl(group, module, requirement.as_deref());
329            let dep_extra_data =
330                build_dependency_extra_data(dependency, &variant_name, scope.as_deref());
331
332            let entry = seen_dependencies.entry(key).or_default();
333            if precedence < entry.precedence || entry.scope.is_none() {
334                entry.scope = scope.clone();
335                entry.is_runtime = is_runtime;
336                entry.is_optional = is_optional;
337                entry.precedence = precedence;
338            }
339            entry.purl = purl;
340            entry.extracted_requirement = requirement.clone();
341            entry.is_pinned = Some(requirement.as_deref().is_some_and(is_exact_version));
342            entry.extra_data = merge_dependency_extra_data(entry.extra_data.take(), dep_extra_data);
343        }
344    }
345
346    for dep in seen_dependencies.into_values() {
347        dependencies.push(Dependency {
348            purl: dep.purl,
349            extracted_requirement: dep.extracted_requirement,
350            scope: dep.scope,
351            is_runtime: dep.is_runtime,
352            is_optional: dep.is_optional,
353            is_pinned: dep.is_pinned,
354            is_direct: Some(true),
355            resolved_package: None,
356            extra_data: dep.extra_data,
357        });
358    }
359
360    dependencies.sort_by(|left, right| left.purl.cmp(&right.purl));
361    file_references.sort_by(|left, right| left.path.cmp(&right.path));
362
363    (
364        dependencies,
365        file_references,
366        top_level_artifact,
367        variant_metadata,
368    )
369}
370
371fn build_dependency_extra_data(
372    dependency: &JsonMap<String, Value>,
373    variant_name: &str,
374    scope: Option<&str>,
375) -> Option<HashMap<String, Value>> {
376    let mut extra = HashMap::new();
377    extra.insert(
378        "variant_names".to_string(),
379        Value::Array(vec![Value::String(variant_name.to_string())]),
380    );
381    if let Some(scope) = scope {
382        extra.insert(
383            "variant_scopes".to_string(),
384            Value::Array(vec![Value::String(scope.to_string())]),
385        );
386    }
387
388    let mut variant_entry = JsonMap::new();
389    variant_entry.insert(
390        "variant_name".to_string(),
391        Value::String(variant_name.to_string()),
392    );
393    if let Some(scope) = scope {
394        variant_entry.insert(
395            "variant_scope".to_string(),
396            Value::String(scope.to_string()),
397        );
398    }
399    variant_entry.insert("dependency".to_string(), Value::Object(dependency.clone()));
400    extra.insert(
401        "variant_dependency_entries".to_string(),
402        Value::Array(vec![Value::Object(variant_entry)]),
403    );
404
405    for field in [
406        FIELD_ATTRIBUTES,
407        "reason",
408        "requestedCapabilities",
409        "excludes",
410        "endorseStrictVersions",
411        "thirdPartyCompatibility",
412        "version",
413    ] {
414        if let Some(value) = dependency.get(field) {
415            extra.insert(field.to_string(), value.clone());
416        }
417    }
418    (!extra.is_empty()).then_some(extra)
419}
420
421fn merge_dependency_extra_data(
422    current: Option<HashMap<String, Value>>,
423    next: Option<HashMap<String, Value>>,
424) -> Option<HashMap<String, Value>> {
425    match (current, next) {
426        (None, None) => None,
427        (Some(map), None) | (None, Some(map)) => Some(map),
428        (Some(mut current), Some(mut next)) => {
429            merge_string_arrays(&mut current, &mut next, "variant_names");
430            merge_string_arrays(&mut current, &mut next, "variant_scopes");
431            merge_object_arrays(&mut current, &mut next, "variant_dependency_entries");
432            for (key, value) in next {
433                current.entry(key).or_insert(value);
434            }
435            Some(current)
436        }
437    }
438}
439
440fn merge_object_arrays(
441    current: &mut HashMap<String, Value>,
442    next: &mut HashMap<String, Value>,
443    key: &str,
444) {
445    let existing = current
446        .remove(key)
447        .and_then(|value| value.as_array().cloned());
448    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
449
450    let mut values = Vec::new();
451    for array in [existing, incoming].into_iter().flatten() {
452        for value in array {
453            if !values.contains(&value) {
454                values.push(value);
455            }
456        }
457    }
458
459    if !values.is_empty() {
460        current.insert(key.to_string(), Value::Array(values));
461    }
462}
463
464fn merge_string_arrays(
465    current: &mut HashMap<String, Value>,
466    next: &mut HashMap<String, Value>,
467    key: &str,
468) {
469    let existing = current
470        .remove(key)
471        .and_then(|value| value.as_array().cloned());
472    let incoming = next.remove(key).and_then(|value| value.as_array().cloned());
473
474    let mut values = Vec::new();
475    for array in [existing, incoming].into_iter().flatten() {
476        for value in array
477            .into_iter()
478            .filter_map(|value| value.as_str().map(|s| s.to_string()))
479        {
480            if !values.contains(&value) {
481                values.push(value);
482            }
483        }
484    }
485
486    if !values.is_empty() {
487        current.insert(
488            key.to_string(),
489            Value::Array(values.into_iter().map(Value::String).collect()),
490        );
491    }
492}
493
494fn classify_variant_scope(variant: &JsonMap<String, Value>) -> Option<String> {
495    let variant_name = variant
496        .get("name")
497        .and_then(Value::as_str)
498        .unwrap_or_default()
499        .to_ascii_lowercase();
500
501    if variant_name.contains("test") {
502        return Some("test".to_string());
503    }
504
505    let usage = variant
506        .get(FIELD_ATTRIBUTES)
507        .and_then(Value::as_object)
508        .and_then(|attributes| attributes.get("org.gradle.usage"))
509        .and_then(Value::as_str)
510        .unwrap_or_default()
511        .to_ascii_lowercase();
512
513    if usage.contains("api") || (variant_name.contains("api") && !variant_name.contains("runtime"))
514    {
515        return Some("compile".to_string());
516    }
517    if usage.contains("runtime") || variant_name.contains("runtime") {
518        return Some("runtime".to_string());
519    }
520
521    (!variant_name.is_empty()).then_some(variant_name)
522}
523
524fn scope_precedence(scope: Option<&str>) -> u8 {
525    match scope {
526        Some("compile") => 0,
527        Some("runtime") => 1,
528        Some("test") => 2,
529        _ => 3,
530    }
531}
532
533fn extract_dependency_requirement(version_value: Option<&Value>) -> Option<String> {
534    match version_value {
535        Some(Value::String(version)) => Some(version.to_string()),
536        Some(Value::Object(version)) => version
537            .get("strictly")
538            .or_else(|| version.get("requires"))
539            .or_else(|| version.get("prefers"))
540            .and_then(Value::as_str)
541            .map(|value| value.to_string()),
542        _ => None,
543    }
544}
545
546fn extract_file_hashes(file: &JsonMap<String, Value>) -> ArtifactHashes {
547    (
548        file.get("size").and_then(Value::as_u64),
549        file.get("sha1")
550            .and_then(Value::as_str)
551            .map(|value| value.to_string()),
552        file.get("md5")
553            .and_then(Value::as_str)
554            .map(|value| value.to_string()),
555        file.get("sha256")
556            .and_then(Value::as_str)
557            .map(|value| value.to_string()),
558        file.get("sha512")
559            .and_then(Value::as_str)
560            .map(|value| value.to_string()),
561    )
562}
563
564fn build_maven_purl(namespace: &str, name: &str, version: Option<&str>) -> Option<String> {
565    let mut purl = PackageUrl::new("maven", name).ok()?;
566    if !namespace.trim().is_empty() {
567        purl.with_namespace(namespace).ok()?;
568    }
569    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
570        purl.with_version(version).ok()?;
571    }
572    Some(purl.to_string())
573}
574
575fn is_exact_version(version: &str) -> bool {
576    !version.contains('[')
577        && !version.contains(']')
578        && !version.contains('(')
579        && !version.contains(')')
580        && !version.contains(',')
581        && !version.contains('+')
582        && !version.contains('*')
583        && !version.contains('>')
584        && !version.contains('<')
585        && !version.contains(' ')
586}
587
588fn default_package_data() -> PackageData {
589    PackageData {
590        package_type: Some(GradleModuleParser::PACKAGE_TYPE),
591        datasource_id: Some(DatasourceId::GradleModule),
592        ..Default::default()
593    }
594}
595
596crate::register_parser!(
597    "Gradle module metadata",
598    &["**/*.module"],
599    "maven",
600    "Java",
601    Some("https://docs.gradle.org/current/userguide/publishing_gradle_module_metadata.html"),
602);