Skip to main content

provenant/parsers/
maven.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Apache Maven pom.xml files.
5//!
6//! Extracts package metadata, dependencies, and license information from
7//! Maven Project Object Model (POM) files.
8//!
9//! # Supported Formats
10//! - pom.xml (Project Object Model)
11//! - pom.properties
12//! - MANIFEST.MF (JAR manifest)
13//!
14//! # Key Features
15//! - Property value substitution (`${project.version}`)
16//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
17//! - Dependency scope handling (compile, test, provided, runtime, system)
18//! - Package URL (purl) generation
19//! - Multiple license support (combined with " OR ")
20//!
21//! # Implementation Notes
22//! - Uses quick-xml for XML parsing
23//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
24//! - Property substitution limited to prevent infinite loops
25//! - Direct dependencies: all in pom.xml are direct
26
27use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
30use quick_xml::Reader;
31use quick_xml::events::Event;
32use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::path::Path;
35
36use super::PackageParser;
37use super::license_normalization::{
38    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
39    combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
40};
41
42#[derive(Clone, Default)]
43struct MavenDependencyData {
44    group_id: Option<String>,
45    artifact_id: Option<String>,
46    version: Option<String>,
47    classifier: Option<String>,
48    type_: Option<String>,
49    scope: Option<String>,
50    optional: Option<String>,
51    system_path: Option<String>,
52    message: Option<String>,
53}
54
55#[derive(Clone, Default)]
56struct MavenLicenseEntry {
57    name: Option<String>,
58    url: Option<String>,
59    comments: Option<String>,
60}
61
62/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
63///
64/// Maven properties can reference other properties, creating dependency graphs. This resolver:
65/// - Resolves nested placeholders: `${outer.${inner}}`
66/// - Detects circular references: `${a}` → `${b}` → `${a}`
67/// - Enforces depth limits to prevent stack overflow
68/// - Enforces substitution limits to prevent DoS on pathological inputs
69///
70/// # Algorithm
71///
72/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
73/// - `resolving_set`: For cycle detection (hash set lookup)
74/// - `resolving_stack`: For error reporting (preserves path)
75/// - `cache`: Memoizes resolved values to avoid redundant work
76struct PropertyResolver {
77    raw: HashMap<String, String>,
78    builtins: HashMap<String, String>,
79    cache: HashMap<String, String>,
80    resolving_set: HashSet<String>,
81    resolving_stack: Vec<String>,
82    max_depth: usize,
83    max_output_len: usize,
84    max_substitutions: usize,
85    warned_keys: HashSet<String>,
86}
87
88impl PropertyResolver {
89    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
90        Self {
91            raw,
92            builtins,
93            cache: HashMap::new(),
94            resolving_set: HashSet::new(),
95            resolving_stack: Vec::new(),
96            max_depth: 10,
97            max_output_len: 100_000,
98            max_substitutions: 1000,
99            warned_keys: HashSet::new(),
100        }
101    }
102
103    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
104        if let Some(value) = self.cache.get(key) {
105            return Some(value.clone());
106        }
107
108        if depth >= self.max_depth {
109            self.warn_once(
110                "depth",
111                key,
112                format!("Maven property depth limit hit resolving {key}"),
113            );
114            return None;
115        }
116
117        if self.resolving_set.contains(key) {
118            if self
119                .resolving_stack
120                .last()
121                .is_some_and(|current| current == key)
122            {
123                return None;
124            }
125
126            self.warn_once(
127                "cycle",
128                key,
129                format!(
130                    "Maven property cycle detected at {key}: {:?}",
131                    self.resolving_stack
132                ),
133            );
134            return None;
135        }
136
137        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
138            value.clone()
139        } else {
140            return None;
141        };
142
143        self.resolving_set.insert(key.to_string());
144        self.resolving_stack.push(key.to_string());
145
146        let resolved = self.resolve_text(&raw_val, depth + 1);
147
148        self.resolving_stack.pop();
149        self.resolving_set.remove(key);
150
151        self.cache.insert(key.to_string(), resolved.clone());
152        Some(resolved)
153    }
154
155    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
156        if !text.contains("${") {
157            return text.to_string();
158        }
159
160        if depth >= self.max_depth {
161            warn!("Maven property depth limit hit resolving text");
162            return text.to_string();
163        }
164
165        let bytes = text.as_bytes();
166        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
167        let mut index = 0;
168        let mut substitutions = 0;
169
170        while index < bytes.len() {
171            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
172                if substitutions >= self.max_substitutions {
173                    warn!("Maven property substitution limit hit resolving {text}");
174                    return text.to_string();
175                }
176
177                let placeholder_start = index;
178                let Some((content, closing_index)) =
179                    self.parse_placeholder_content(text, index + 2)
180                else {
181                    warn!("Maven property malformed placeholder in {text}");
182                    return text.to_string();
183                };
184
185                substitutions += 1;
186                let resolved_key = if content.contains("${") {
187                    self.resolve_text(content, depth + 1)
188                } else {
189                    content.to_string()
190                };
191
192                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
193                    if output.len() + resolved.len() > self.max_output_len {
194                        warn!("Maven property output length limit hit resolving {text}");
195                        return text.to_string();
196                    }
197                    output.extend_from_slice(resolved.as_bytes());
198                } else {
199                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
200                    if output.len() + placeholder_bytes.len() > self.max_output_len {
201                        warn!("Maven property output length limit hit resolving {text}");
202                        return text.to_string();
203                    }
204                    output.extend_from_slice(placeholder_bytes);
205                }
206
207                index = closing_index + 1;
208                continue;
209            }
210
211            if output.len() + 1 > self.max_output_len {
212                warn!("Maven property output length limit hit resolving {text}");
213                return text.to_string();
214            }
215
216            output.push(bytes[index]);
217            index += 1;
218        }
219
220        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
221    }
222
223    fn parse_placeholder_content<'a>(
224        &self,
225        text: &'a str,
226        start_index: usize,
227    ) -> Option<(&'a str, usize)> {
228        let bytes = text.as_bytes();
229        let mut index = start_index;
230        let mut depth = 0;
231
232        while index < bytes.len() {
233            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
234                depth += 1;
235                index += 2;
236                continue;
237            }
238
239            if bytes[index] == b'}' {
240                if depth == 0 {
241                    return Some((&text[start_index..index], index));
242                }
243                depth -= 1;
244            }
245
246            index += 1;
247        }
248
249        None
250    }
251
252    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
253        let token = format!("{kind}:{key}");
254        if self.warned_keys.insert(token) {
255            warn!("{message}");
256        }
257    }
258}
259
260fn sanitize_template_directives(content: &str) -> Cow<'_, str> {
261    if !content.contains("<%") {
262        return Cow::Borrowed(content);
263    }
264
265    let mut sanitized = String::with_capacity(content.len());
266    let mut remaining = content;
267
268    while let Some(start) = remaining.find("<%") {
269        let (before, after_start) = remaining.split_at(start);
270        sanitized.push_str(before);
271
272        let Some(end) = after_start.find("%>") else {
273            return Cow::Borrowed(content);
274        };
275
276        let directive = &after_start[..end + 2];
277        for ch in directive.chars() {
278            if matches!(ch, '\n' | '\r') {
279                sanitized.push(ch);
280            } else {
281                sanitized.push(' ');
282            }
283        }
284
285        remaining = &after_start[end + 2..];
286    }
287
288    sanitized.push_str(remaining);
289    Cow::Owned(sanitized)
290}
291
292fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
293    if let Some(current) = value.clone() {
294        *value = Some(resolver.resolve_text(&current, 0));
295    }
296}
297
298fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
299    for value in values.iter_mut() {
300        *value = resolver.resolve_text(value, 0);
301    }
302}
303
304fn resolve_map_strings(
305    resolver: &mut PropertyResolver,
306    values: &mut serde_json::Map<String, serde_json::Value>,
307) {
308    for value in values.values_mut() {
309        if let serde_json::Value::String(current) = value {
310            let resolved = resolver.resolve_text(current, 0);
311            *current = resolved;
312        }
313    }
314}
315
316fn resolve_maps(
317    resolver: &mut PropertyResolver,
318    values: &mut [serde_json::Map<String, serde_json::Value>],
319) {
320    for value in values.iter_mut() {
321        resolve_map_strings(resolver, value);
322    }
323}
324
325fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
326    resolve_option(resolver, &mut dependency.group_id);
327    resolve_option(resolver, &mut dependency.artifact_id);
328    resolve_option(resolver, &mut dependency.version);
329    resolve_option(resolver, &mut dependency.classifier);
330    resolve_option(resolver, &mut dependency.type_);
331    resolve_option(resolver, &mut dependency.scope);
332    resolve_option(resolver, &mut dependency.optional);
333    resolve_option(resolver, &mut dependency.system_path);
334    resolve_option(resolver, &mut dependency.message);
335}
336
337fn parse_maven_bool(value: Option<&str>) -> bool {
338    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
339}
340
341fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
342    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
343        Some(
344            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
345            | "tar.gz" | "war" | "xar" | "zip",
346        ) => packaging.map(str::trim),
347        Some(_) => Some("jar"),
348        None => None,
349    }
350}
351
352fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
353    resolve_option(resolver, &mut license.name);
354    resolve_option(resolver, &mut license.url);
355    resolve_option(resolver, &mut license.comments);
356}
357
358fn build_maven_qualifiers(
359    classifier: Option<&str>,
360    packaging: Option<&str>,
361) -> Option<HashMap<String, String>> {
362    let mut qualifiers = HashMap::new();
363
364    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
365        qualifiers.insert("classifier".to_string(), classifier.to_string());
366    }
367
368    if let Some(packaging) = normalize_maven_packaging(packaging)
369        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
370    {
371        qualifiers.insert("type".to_string(), packaging.to_string());
372    }
373
374    (!qualifiers.is_empty()).then_some(qualifiers)
375}
376
377fn build_maven_purl(
378    group_id: &str,
379    artifact_id: &str,
380    version: Option<&str>,
381    classifier: Option<&str>,
382    packaging: Option<&str>,
383) -> String {
384    let mut purl = format!(
385        "pkg:maven/{}/{}",
386        percent_encode_purl_component(group_id),
387        percent_encode_purl_component(artifact_id)
388    );
389
390    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
391        purl.push('@');
392        purl.push_str(&percent_encode_purl_component(version));
393    }
394
395    let qualifiers = build_maven_qualifiers(classifier, packaging);
396    if let Some(qualifiers) = qualifiers {
397        let mut query_parts = Vec::new();
398        if let Some(classifier) = qualifiers.get("classifier") {
399            query_parts.push(format!(
400                "classifier={}",
401                percent_encode_purl_component(classifier)
402            ));
403        }
404        if let Some(type_) = qualifiers.get("type") {
405            query_parts.push(format!("type={}", percent_encode_purl_component(type_)));
406        }
407
408        if !query_parts.is_empty() {
409            purl.push('?');
410            purl.push_str(&query_parts.join("&"));
411        }
412    }
413
414    purl
415}
416
417fn percent_encode_purl_component(value: &str) -> String {
418    let mut encoded = String::with_capacity(value.len());
419
420    for byte in value.bytes() {
421        match byte {
422            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
423                encoded.push(byte as char);
424            }
425            _ => encoded.push_str(&format!("%{byte:02X}")),
426        }
427    }
428
429    encoded
430}
431
432fn build_maven_download_url(
433    group_id: &str,
434    artifact_id: &str,
435    version: &str,
436    classifier: Option<&str>,
437    packaging: Option<&str>,
438) -> String {
439    const BASE_URL: &str = "https://repo1.maven.org/maven2";
440    let group_path = group_id.replace('.', "/");
441    let extension = normalize_maven_packaging(packaging)
442        .filter(|value| *value != "pom")
443        .unwrap_or("jar");
444    let classifier_suffix = classifier
445        .map(str::trim)
446        .filter(|value| !value.is_empty())
447        .map(|value| format!("-{value}"))
448        .unwrap_or_default();
449
450    format!(
451        "{}/{}/{}/{}/{}-{}{}.{}",
452        BASE_URL,
453        group_path,
454        artifact_id,
455        version,
456        artifact_id,
457        version,
458        classifier_suffix,
459        extension
460    )
461}
462
463fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
464    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
465}
466
467fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
468    let rendered_entries: Vec<String> = licenses
469        .iter()
470        .filter_map(|license| {
471            let mut lines = Vec::new();
472
473            if let Some(name) = license
474                .name
475                .as_ref()
476                .filter(|value| !value.trim().is_empty())
477            {
478                lines.push(format!("    name: {name}"));
479            }
480            if let Some(url) = license
481                .url
482                .as_ref()
483                .filter(|value| !value.trim().is_empty())
484            {
485                lines.push(format!("    url: {url}"));
486            }
487            if let Some(comments) = license
488                .comments
489                .as_ref()
490                .filter(|value| !value.trim().is_empty())
491            {
492                lines.push(format!("    comments: {comments}"));
493            }
494
495            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
496        })
497        .collect();
498
499    if rendered_entries.is_empty() {
500        None
501    } else {
502        Some(format!("{}\n", rendered_entries.join("\n")))
503    }
504}
505
506fn is_license_like_comment(comment: &str) -> bool {
507    let lowered = comment.to_ascii_lowercase();
508    [
509        "license",
510        "licensed",
511        "copyright",
512        "spdx",
513        "apache",
514        "mit",
515        "bsd",
516        "gpl",
517        "lgpl",
518        "mozilla public",
519        "eclipse public",
520    ]
521    .iter()
522    .any(|marker| lowered.contains(marker))
523}
524
525fn dependency_extra_data(
526    dependency: &MavenDependencyData,
527) -> Option<HashMap<String, serde_json::Value>> {
528    let mut extra_data = HashMap::new();
529
530    if let Some(classifier) = dependency
531        .classifier
532        .as_ref()
533        .filter(|value| !value.trim().is_empty())
534    {
535        extra_data.insert(
536            "classifier".to_string(),
537            serde_json::Value::String(classifier.clone()),
538        );
539    }
540    if let Some(type_) = dependency
541        .type_
542        .as_ref()
543        .filter(|value| !value.trim().is_empty())
544    {
545        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
546    }
547    if let Some(system_path) = dependency
548        .system_path
549        .as_ref()
550        .filter(|value| !value.trim().is_empty())
551    {
552        extra_data.insert(
553            "system_path".to_string(),
554            serde_json::Value::String(system_path.clone()),
555        );
556    }
557    if let Some(message) = dependency
558        .message
559        .as_ref()
560        .filter(|value| !value.trim().is_empty())
561    {
562        extra_data.insert(
563            "message".to_string(),
564            serde_json::Value::String(message.clone()),
565        );
566    }
567
568    (!extra_data.is_empty()).then_some(extra_data)
569}
570
571fn dependency_management_entry_to_value(
572    dependency: &MavenDependencyData,
573) -> serde_json::Map<String, serde_json::Value> {
574    let mut dep_obj = serde_json::Map::new();
575
576    if let Some(group_id) = dependency.group_id.as_ref() {
577        dep_obj.insert(
578            "groupId".to_string(),
579            serde_json::Value::String(group_id.clone()),
580        );
581    }
582    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
583        dep_obj.insert(
584            "artifactId".to_string(),
585            serde_json::Value::String(artifact_id.clone()),
586        );
587    }
588    if let Some(version) = dependency.version.as_ref() {
589        dep_obj.insert(
590            "version".to_string(),
591            serde_json::Value::String(version.clone()),
592        );
593    }
594    if let Some(scope) = dependency.scope.as_ref() {
595        dep_obj.insert(
596            "scope".to_string(),
597            serde_json::Value::String(scope.clone()),
598        );
599    }
600    if let Some(type_) = dependency.type_.as_ref() {
601        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
602    }
603    if let Some(classifier) = dependency.classifier.as_ref() {
604        dep_obj.insert(
605            "classifier".to_string(),
606            serde_json::Value::String(classifier.clone()),
607        );
608    }
609    if let Some(optional) = dependency.optional.as_deref() {
610        dep_obj.insert(
611            "optional".to_string(),
612            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
613        );
614    }
615    if let Some(message) = dependency.message.as_ref() {
616        dep_obj.insert(
617            "message".to_string(),
618            serde_json::Value::String(message.clone()),
619        );
620    }
621
622    dep_obj
623}
624
625fn maven_dependency_to_dependency(
626    dependency_data: &MavenDependencyData,
627    fallback_scope: Option<&str>,
628    force_non_runtime: bool,
629) -> Option<Dependency> {
630    let group_id = dependency_data.group_id.as_ref()?;
631    let artifact_id = dependency_data.artifact_id.as_ref()?;
632    let version = dependency_data.version.clone();
633    let scope = dependency_data
634        .scope
635        .clone()
636        .or_else(|| fallback_scope.map(str::to_string));
637    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
638
639    let (is_runtime, is_optional) = if force_non_runtime {
640        (Some(false), Some(explicit_optional))
641    } else {
642        match scope.as_deref() {
643            Some("test") | Some("provided") => (Some(false), Some(true)),
644            Some(_) => (Some(true), Some(explicit_optional)),
645            None => (None, Some(explicit_optional)),
646        }
647    };
648
649    Some(Dependency {
650        purl: Some(build_maven_purl(
651            group_id,
652            artifact_id,
653            version.as_deref(),
654            dependency_data.classifier.as_deref(),
655            dependency_data.type_.as_deref(),
656        )),
657        extracted_requirement: version.clone(),
658        scope,
659        is_runtime,
660        is_optional,
661        is_pinned: version.as_deref().map(is_maven_version_pinned),
662        is_direct: Some(true),
663        resolved_package: None,
664        extra_data: dependency_extra_data(dependency_data),
665    })
666}
667
668/// Determines if a Maven version specifier is pinned to an exact version.
669///
670/// A version is considered pinned if it specifies an exact version without
671/// range syntax or dynamic keywords. Examples:
672/// - Pinned: "1.0.0", "1.2.3"
673/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
674fn is_maven_version_pinned(version_str: &str) -> bool {
675    let trimmed = version_str.trim();
676
677    // Empty version is not pinned
678    if trimmed.is_empty() {
679        return false;
680    }
681
682    // Check for range syntax (brackets and parentheses)
683    if trimmed.contains('[')
684        || trimmed.contains(']')
685        || trimmed.contains('(')
686        || trimmed.contains(')')
687    {
688        return false;
689    }
690
691    // Check for dynamic version keywords
692    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
693        return false;
694    }
695
696    // If none of the unpinned indicators are present, it's pinned
697    true
698}
699
700struct MavenBuiltinPropertyInputs<'a> {
701    namespace: &'a Option<String>,
702    name: &'a Option<String>,
703    version: &'a Option<String>,
704    parent_group_id: &'a Option<String>,
705    parent_artifact_id: &'a Option<String>,
706    parent_version: &'a Option<String>,
707    project_name: &'a Option<String>,
708    project_packaging: &'a Option<String>,
709}
710
711fn build_builtin_properties(inputs: MavenBuiltinPropertyInputs<'_>) -> HashMap<String, String> {
712    let mut builtins = HashMap::new();
713    let effective_group_id = inputs
714        .namespace
715        .clone()
716        .or_else(|| inputs.parent_group_id.clone());
717    let effective_version = inputs
718        .version
719        .clone()
720        .or_else(|| inputs.parent_version.clone());
721
722    if let Some(group_id) = effective_group_id.clone() {
723        builtins.insert("project.groupId".to_string(), group_id.clone());
724        builtins.insert("pom.groupId".to_string(), group_id);
725    }
726
727    if let Some(artifact_id) = inputs.name.clone() {
728        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
729        builtins.insert("pom.artifactId".to_string(), artifact_id);
730    }
731
732    if let Some(ver) = effective_version.clone() {
733        builtins.insert("project.version".to_string(), ver.clone());
734        builtins.insert("pom.version".to_string(), ver);
735    }
736
737    if let Some(group_id) = inputs.parent_group_id.clone() {
738        builtins.insert("project.parent.groupId".to_string(), group_id);
739    }
740
741    if let Some(artifact_id) = inputs.parent_artifact_id.clone() {
742        builtins.insert("project.parent.artifactId".to_string(), artifact_id.clone());
743        builtins.insert("pom.parent.artifactId".to_string(), artifact_id.clone());
744        builtins.insert("parent.artifactId".to_string(), artifact_id);
745    }
746
747    if let Some(ver) = inputs.parent_version.clone() {
748        builtins.insert("project.parent.version".to_string(), ver.clone());
749        builtins.insert("pom.parent.version".to_string(), ver.clone());
750        builtins.insert("parent.version".to_string(), ver);
751    }
752
753    if let Some(packaging) = inputs.project_packaging.clone() {
754        builtins.insert("project.packaging".to_string(), packaging);
755    }
756
757    if let Some(name) = inputs.project_name.clone() {
758        builtins.insert("project.name".to_string(), name);
759    }
760
761    builtins
762}
763
764/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
765///
766/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
767/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
768pub struct MavenParser;
769
770impl PackageParser for MavenParser {
771    const PACKAGE_TYPE: PackageType = PackageType::Maven;
772
773    fn extract_packages(path: &Path) -> Vec<PackageData> {
774        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
775            if filename == "pom.properties" {
776                return vec![parse_pom_properties(path)];
777            } else if filename == "MANIFEST.MF" {
778                return vec![parse_manifest_mf(path)];
779            }
780        }
781
782        let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
783            Ok(content) => content,
784            Err(e) => {
785                warn!("Failed to open pom.xml at {:?}: {}", path, e);
786                return vec![default_package_data(DatasourceId::MavenPom)];
787            }
788        };
789
790        let sanitized_content = sanitize_template_directives(&content);
791        let mut reader = Reader::from_str(sanitized_content.as_ref());
792        reader.config_mut().trim_text(true);
793
794        let mut buf = Vec::new();
795        let mut package_data = default_package_data(DatasourceId::MavenPom);
796        package_data.package_type = Some(Self::PACKAGE_TYPE);
797        package_data.primary_language = Some("Java".to_string());
798        package_data.datasource_id = Some(DatasourceId::MavenPom);
799
800        let mut current_element = Vec::new();
801        let mut in_dependencies = false;
802        let mut current_dependency: Option<Dependency> = None;
803        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
804        let mut current_dependency_data: Option<MavenDependencyData> = None;
805
806        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
807        let mut xml_license_comments: Vec<String> = Vec::new();
808        let mut current_license: Option<MavenLicenseEntry> = None;
809        let mut inception_year = None;
810        let mut scm_connection = None;
811        let mut scm_developer_connection = None;
812        let mut scm_url = None;
813        let mut scm_tag = None;
814        let mut organization_name = None;
815        let mut organization_url = None;
816        let mut in_developers = false;
817        let mut in_contributors = false;
818        let mut current_party: Option<Party> = None;
819        let mut issue_management_system = None;
820        let mut issue_management_url = None;
821        let mut ci_management_system = None;
822        let mut ci_management_url = None;
823        let mut in_distribution_management = false;
824        let mut in_dist_repository = false;
825        let mut in_dist_snapshot_repository = false;
826        let mut in_dist_site = false;
827        let mut dist_download_url = None;
828        let mut dist_repository_id = None;
829        let mut dist_repository_name = None;
830        let mut dist_repository_url = None;
831        let mut dist_repository_layout = None;
832        let mut dist_snapshot_repository_id = None;
833        let mut dist_snapshot_repository_name = None;
834        let mut dist_snapshot_repository_url = None;
835        let mut dist_snapshot_repository_layout = None;
836        let mut dist_site_id = None;
837        let mut dist_site_name = None;
838        let mut dist_site_url = None;
839        let mut in_repositories = false;
840        let mut in_plugin_repositories = false;
841        let mut in_repository = false;
842        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
843        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
844        let mut current_repository_id = None;
845        let mut current_repository_name = None;
846        let mut current_repository_url = None;
847        let mut in_modules = false;
848        let mut modules: Vec<String> = Vec::new();
849        let mut in_mailing_lists = false;
850        let mut in_mailing_list = false;
851        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
852        let mut current_mailing_list_name = None;
853        let mut current_mailing_list_subscribe = None;
854        let mut current_mailing_list_unsubscribe = None;
855        let mut current_mailing_list_post = None;
856        let mut current_mailing_list_archive = None;
857        let mut in_dependency_management = false;
858        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
859        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
860        let mut in_dep_mgmt_dependency = false;
861        let mut in_parent = false;
862        let mut parent_group_id = None;
863        let mut parent_artifact_id = None;
864        let mut parent_version = None;
865        let mut parent_relative_path = None;
866        let mut in_properties = false;
867        let mut properties: HashMap<String, String> = HashMap::new();
868        let mut project_name = None;
869        let mut project_description = None;
870        let mut project_packaging = None;
871        let mut project_classifier = None;
872        let mut in_relocation = false;
873        let mut relocation = MavenDependencyData::default();
874
875        let mut iteration_count: usize = 0;
876        loop {
877            iteration_count += 1;
878            if iteration_count > MAX_ITERATION_COUNT {
879                warn!(
880                    "Exceeded MAX_ITERATION_COUNT ({}) parsing pom.xml at {:?}; stopping early",
881                    MAX_ITERATION_COUNT, path
882                );
883                break;
884            }
885            match reader.read_event_into(&mut buf) {
886                Ok(Event::Start(e)) => {
887                    let element_name = e.name().as_ref().to_vec();
888                    current_element.push(element_name.clone());
889
890                    match element_name.as_slice() {
891                        b"parent" => in_parent = true,
892                        b"dependencyManagement" => in_dependency_management = true,
893                        b"dependencies" if in_dependency_management => {}
894                        b"dependencies" => in_dependencies = true,
895                        b"dependency" if in_dependency_management => {
896                            in_dep_mgmt_dependency = true;
897                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
898                        }
899                        b"dependency" if in_dependencies => {
900                            current_dependency = Some(Dependency {
901                                purl: None,
902                                extracted_requirement: None,
903                                scope: None,
904                                is_runtime: None,
905                                is_optional: Some(false),
906                                is_pinned: None,
907                                is_direct: Some(true),
908                                resolved_package: None,
909                                extra_data: None,
910                            });
911                            current_dependency_data = Some(MavenDependencyData::default());
912                        }
913                        b"properties" => in_properties = true,
914                        b"developers" => in_developers = true,
915                        b"developer" if in_developers => {
916                            current_party = Some(Party {
917                                r#type: Some("person".to_string()),
918                                role: Some("developer".to_string()),
919                                name: None,
920                                email: None,
921                                url: None,
922                                organization: None,
923                                organization_url: None,
924                                timezone: None,
925                            });
926                        }
927                        b"contributors" => in_contributors = true,
928                        b"contributor" if in_contributors => {
929                            current_party = Some(Party {
930                                r#type: Some("person".to_string()),
931                                role: Some("contributor".to_string()),
932                                name: None,
933                                email: None,
934                                url: None,
935                                organization: None,
936                                organization_url: None,
937                                timezone: None,
938                            });
939                        }
940                        b"license" => current_license = Some(MavenLicenseEntry::default()),
941                        b"distributionManagement" => in_distribution_management = true,
942                        b"relocation" if in_distribution_management => {
943                            in_relocation = true;
944                            relocation = MavenDependencyData::default();
945                        }
946                        b"repository" if in_distribution_management => in_dist_repository = true,
947                        b"snapshotRepository" if in_distribution_management => {
948                            in_dist_snapshot_repository = true
949                        }
950                        b"site" if in_distribution_management => in_dist_site = true,
951                        b"repositories" => in_repositories = true,
952                        b"pluginRepositories" => in_plugin_repositories = true,
953                        b"repository" if in_repositories && !in_distribution_management => {
954                            in_repository = true;
955                            current_repository_id = None;
956                            current_repository_name = None;
957                            current_repository_url = None;
958                        }
959                        b"pluginRepository" if in_plugin_repositories => {
960                            in_repository = true;
961                            current_repository_id = None;
962                            current_repository_name = None;
963                            current_repository_url = None;
964                        }
965                        b"modules" => in_modules = true,
966                        b"mailingLists" => in_mailing_lists = true,
967                        b"mailingList" if in_mailing_lists => {
968                            in_mailing_list = true;
969                            current_mailing_list_name = None;
970                            current_mailing_list_subscribe = None;
971                            current_mailing_list_unsubscribe = None;
972                            current_mailing_list_post = None;
973                            current_mailing_list_archive = None;
974                        }
975                        _ => {}
976                    }
977                }
978                Ok(Event::Text(e)) => {
979                    let text = match e.decode() {
980                        Ok(Cow::Borrowed(s)) => s.to_string(),
981                        Ok(Cow::Owned(s)) => s,
982                        Err(_) => {
983                            warn!(
984                                "Invalid UTF-8 in XML text content in {:?}; using lossy conversion",
985                                path
986                            );
987                            String::from_utf8_lossy(e.as_ref()).into_owned()
988                        }
989                    };
990                    let current_path = current_element.last().map(|v| v.as_slice());
991                    let current_parent = current_element
992                        .len()
993                        .checked_sub(2)
994                        .map(|index| current_element[index].as_slice());
995
996                    if in_properties
997                        && current_element.len() >= 2
998                        && current_element[current_element.len() - 2] == b"properties"
999                    {
1000                        if let Some(property_name) = current_element
1001                            .last()
1002                            .and_then(|name| std::str::from_utf8(name).ok())
1003                        {
1004                            properties.insert(property_name.to_string(), truncate_field(text));
1005                        } else {
1006                            warn!("Failed to decode Maven property name in {:?}", path);
1007                        }
1008                    } else if in_dep_mgmt_dependency {
1009                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
1010                            match current_path {
1011                                Some(b"groupId") if current_parent == Some(b"dependency") => {
1012                                    dep_mgmt.group_id = Some(text)
1013                                }
1014                                Some(b"artifactId") if current_parent == Some(b"dependency") => {
1015                                    dep_mgmt.artifact_id = Some(text)
1016                                }
1017                                Some(b"version") if current_parent == Some(b"dependency") => {
1018                                    dep_mgmt.version = Some(text)
1019                                }
1020                                Some(b"scope") if current_parent == Some(b"dependency") => {
1021                                    dep_mgmt.scope = Some(text)
1022                                }
1023                                Some(b"type") if current_parent == Some(b"dependency") => {
1024                                    dep_mgmt.type_ = Some(text)
1025                                }
1026                                Some(b"classifier") if current_parent == Some(b"dependency") => {
1027                                    dep_mgmt.classifier = Some(text)
1028                                }
1029                                Some(b"optional") if current_parent == Some(b"dependency") => {
1030                                    dep_mgmt.optional = Some(text)
1031                                }
1032                                _ => {}
1033                            }
1034                        }
1035                    } else if let Some(license) = &mut current_license {
1036                        match current_path {
1037                            Some(b"name") => license.name = Some(text),
1038                            Some(b"url") => license.url = Some(text),
1039                            Some(b"comments") => license.comments = Some(text),
1040                            _ => {}
1041                        }
1042                    } else if let Some(party) = &mut current_party {
1043                        match current_path {
1044                            Some(b"name") => party.name = Some(text),
1045                            Some(b"email") => party.email = Some(text),
1046                            Some(b"url") => party.url = Some(text),
1047                            Some(b"organization") => party.organization = Some(text),
1048                            Some(b"organizationUrl") => party.organization_url = Some(text),
1049                            Some(b"timezone") => party.timezone = Some(text),
1050                            _ => {}
1051                        }
1052                    } else if let Some(dep) = &mut current_dependency {
1053                        match current_path {
1054                            Some(b"groupId") => {
1055                                if current_parent == Some(b"dependency")
1056                                    && let Some(coords) = current_dependency_data.as_mut()
1057                                {
1058                                    coords.group_id = Some(text);
1059                                }
1060                            }
1061                            Some(b"artifactId") => {
1062                                if current_parent == Some(b"dependency")
1063                                    && let Some(coords) = current_dependency_data.as_mut()
1064                                {
1065                                    coords.artifact_id = Some(text);
1066                                }
1067                            }
1068                            Some(b"version") => {
1069                                if current_parent == Some(b"dependency")
1070                                    && let Some(coords) = current_dependency_data.as_mut()
1071                                {
1072                                    coords.version = Some(text);
1073                                }
1074                            }
1075                            Some(b"scope") => {
1076                                if current_parent == Some(b"dependency") {
1077                                    dep.scope = Some(text.clone());
1078                                    dep.is_optional = Some(text == "test" || text == "provided");
1079                                    dep.is_runtime = Some(text != "test" && text != "provided");
1080                                }
1081                                if current_parent == Some(b"dependency")
1082                                    && let Some(coords) = current_dependency_data.as_mut()
1083                                {
1084                                    coords.scope = Some(text);
1085                                }
1086                            }
1087                            Some(b"optional") => {
1088                                if current_parent == Some(b"dependency")
1089                                    && let Some(coords) = current_dependency_data.as_mut()
1090                                {
1091                                    coords.optional = Some(text);
1092                                }
1093                            }
1094                            Some(b"type") => {
1095                                if current_parent == Some(b"dependency")
1096                                    && let Some(coords) = current_dependency_data.as_mut()
1097                                {
1098                                    coords.type_ = Some(text);
1099                                }
1100                            }
1101                            Some(b"classifier") => {
1102                                if current_parent == Some(b"dependency")
1103                                    && let Some(coords) = current_dependency_data.as_mut()
1104                                {
1105                                    coords.classifier = Some(text);
1106                                }
1107                            }
1108                            Some(b"systemPath") => {
1109                                if current_parent == Some(b"dependency")
1110                                    && let Some(coords) = current_dependency_data.as_mut()
1111                                {
1112                                    coords.system_path = Some(text);
1113                                }
1114                            }
1115                            _ => {}
1116                        }
1117                    } else if in_relocation {
1118                        match current_path {
1119                            Some(b"groupId") => relocation.group_id = Some(text),
1120                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1121                            Some(b"version") => relocation.version = Some(text),
1122                            Some(b"classifier") => relocation.classifier = Some(text),
1123                            Some(b"type") => relocation.type_ = Some(text),
1124                            Some(b"message") => relocation.message = Some(text),
1125                            _ => {}
1126                        }
1127                    } else if in_parent {
1128                        match current_path {
1129                            Some(b"groupId") => {
1130                                parent_group_id = Some(text);
1131                            }
1132                            Some(b"artifactId") => {
1133                                parent_artifact_id = Some(text);
1134                            }
1135                            Some(b"version") => {
1136                                parent_version = Some(text);
1137                            }
1138                            Some(b"relativePath") => {
1139                                parent_relative_path = Some(text);
1140                            }
1141                            _ => {}
1142                        }
1143                    } else {
1144                        match current_path {
1145                            Some(b"groupId") if current_element.len() == 2 => {
1146                                package_data.namespace = Some(text)
1147                            }
1148                            Some(b"artifactId") if current_element.len() == 2 => {
1149                                package_data.name = Some(text)
1150                            }
1151                            Some(b"version") if current_element.len() == 2 => {
1152                                package_data.version = Some(text)
1153                            }
1154                            Some(b"name") if current_element.len() == 2 => {
1155                                project_name = Some(text)
1156                            }
1157                            Some(b"description") if current_element.len() == 2 => {
1158                                project_description = Some(text)
1159                            }
1160                            Some(b"packaging") if current_element.len() == 2 => {
1161                                project_packaging = Some(text)
1162                            }
1163                            Some(b"classifier") if current_element.len() == 2 => {
1164                                project_classifier = Some(text)
1165                            }
1166                            Some(b"url") if current_element.len() == 2 => {
1167                                package_data.homepage_url = Some(text)
1168                            }
1169                            Some(b"inceptionYear") if current_element.len() == 2 => {
1170                                inception_year = Some(text)
1171                            }
1172                            Some(b"connection")
1173                                if current_element.len() >= 3
1174                                    && current_element[current_element.len() - 2] == b"scm" =>
1175                            {
1176                                scm_connection = if text.starts_with("scm:git:") {
1177                                    Some(text.replacen("scm:git:", "git+", 1))
1178                                } else if text.starts_with("scm:") {
1179                                    Some(text.replacen("scm:", "", 1))
1180                                } else {
1181                                    Some(text)
1182                                };
1183                            }
1184                            Some(b"developerConnection")
1185                                if current_element.len() >= 3
1186                                    && current_element[current_element.len() - 2] == b"scm" =>
1187                            {
1188                                scm_developer_connection = if text.starts_with("scm:git:") {
1189                                    Some(text.replacen("scm:git:", "git+", 1))
1190                                } else if text.starts_with("scm:") {
1191                                    Some(text.replacen("scm:", "", 1))
1192                                } else {
1193                                    Some(text)
1194                                };
1195                            }
1196                            Some(b"url")
1197                                if current_element.len() >= 3
1198                                    && current_element[current_element.len() - 2] == b"scm" =>
1199                            {
1200                                scm_url = Some(text);
1201                            }
1202                            Some(b"tag")
1203                                if current_element.len() >= 3
1204                                    && current_element[current_element.len() - 2] == b"scm" =>
1205                            {
1206                                scm_tag = Some(text);
1207                            }
1208                            Some(b"name")
1209                                if current_element.len() >= 2
1210                                    && current_element[current_element.len() - 2]
1211                                        == b"organization" =>
1212                            {
1213                                organization_name = Some(text);
1214                            }
1215                            Some(b"url")
1216                                if current_element.len() >= 2
1217                                    && current_element[current_element.len() - 2]
1218                                        == b"organization" =>
1219                            {
1220                                organization_url = Some(text);
1221                            }
1222                            Some(b"system")
1223                                if current_element.len() >= 2
1224                                    && current_element[current_element.len() - 2]
1225                                        == b"issueManagement" =>
1226                            {
1227                                issue_management_system = Some(text);
1228                            }
1229                            Some(b"url")
1230                                if current_element.len() >= 2
1231                                    && current_element[current_element.len() - 2]
1232                                        == b"issueManagement" =>
1233                            {
1234                                issue_management_url = Some(text);
1235                            }
1236                            Some(b"system")
1237                                if current_element.len() >= 2
1238                                    && current_element[current_element.len() - 2]
1239                                        == b"ciManagement" =>
1240                            {
1241                                ci_management_system = Some(text);
1242                            }
1243                            Some(b"url")
1244                                if current_element.len() >= 2
1245                                    && current_element[current_element.len() - 2]
1246                                        == b"ciManagement" =>
1247                            {
1248                                ci_management_url = Some(text);
1249                            }
1250                            Some(b"downloadUrl")
1251                                if current_element.len() >= 2
1252                                    && current_element[current_element.len() - 2]
1253                                        == b"distributionManagement" =>
1254                            {
1255                                dist_download_url = Some(text);
1256                            }
1257                            Some(b"id") if in_dist_repository => {
1258                                dist_repository_id = Some(text);
1259                            }
1260                            Some(b"name") if in_dist_repository => {
1261                                dist_repository_name = Some(text);
1262                            }
1263                            Some(b"url") if in_dist_repository => {
1264                                dist_repository_url = Some(text);
1265                            }
1266                            Some(b"layout") if in_dist_repository => {
1267                                dist_repository_layout = Some(text);
1268                            }
1269                            Some(b"id") if in_dist_snapshot_repository => {
1270                                dist_snapshot_repository_id = Some(text);
1271                            }
1272                            Some(b"name") if in_dist_snapshot_repository => {
1273                                dist_snapshot_repository_name = Some(text);
1274                            }
1275                            Some(b"url") if in_dist_snapshot_repository => {
1276                                dist_snapshot_repository_url = Some(text);
1277                            }
1278                            Some(b"layout") if in_dist_snapshot_repository => {
1279                                dist_snapshot_repository_layout = Some(text);
1280                            }
1281                            Some(b"id") if in_dist_site => {
1282                                dist_site_id = Some(text);
1283                            }
1284                            Some(b"name") if in_dist_site => {
1285                                dist_site_name = Some(text);
1286                            }
1287                            Some(b"url") if in_dist_site => {
1288                                dist_site_url = Some(text);
1289                            }
1290                            Some(b"id") if in_repository => {
1291                                current_repository_id = Some(text);
1292                            }
1293                            Some(b"name") if in_repository => {
1294                                current_repository_name = Some(text);
1295                            }
1296                            Some(b"url") if in_repository => {
1297                                current_repository_url = Some(text);
1298                            }
1299                            Some(b"module") if in_modules => {
1300                                modules.push(text);
1301                            }
1302                            Some(b"name") if in_mailing_list => {
1303                                current_mailing_list_name = Some(text);
1304                            }
1305                            Some(b"subscribe") if in_mailing_list => {
1306                                current_mailing_list_subscribe = Some(text);
1307                            }
1308                            Some(b"unsubscribe") if in_mailing_list => {
1309                                current_mailing_list_unsubscribe = Some(text);
1310                            }
1311                            Some(b"post") if in_mailing_list => {
1312                                current_mailing_list_post = Some(text);
1313                            }
1314                            Some(b"archive") if in_mailing_list => {
1315                                current_mailing_list_archive = Some(text);
1316                            }
1317                            _ => {}
1318                        }
1319                    }
1320                }
1321                Ok(Event::Comment(e)) => {
1322                    let comment = match e.decode() {
1323                        Ok(Cow::Borrowed(s)) => s.trim().to_string(),
1324                        Ok(Cow::Owned(s)) => s.trim().to_string(),
1325                        Err(_) => {
1326                            warn!(
1327                                "Invalid UTF-8 in XML comment in {:?}; using lossy conversion",
1328                                path
1329                            );
1330                            String::from_utf8_lossy(e.as_ref())
1331                                .into_owned()
1332                                .trim()
1333                                .to_string()
1334                        }
1335                    };
1336                    if current_element.is_empty()
1337                        && !comment.is_empty()
1338                        && is_license_like_comment(&comment)
1339                    {
1340                        xml_license_comments.push(comment);
1341                    }
1342                }
1343                Ok(Event::End(e)) => {
1344                    if !current_element.is_empty() {
1345                        current_element.pop();
1346                    }
1347
1348                    match e.name().as_ref() {
1349                        b"parent" => in_parent = false,
1350                        b"dependencyManagement" => in_dependency_management = false,
1351                        b"dependencies" => in_dependencies = false,
1352                        b"dependency" if in_dep_mgmt_dependency => {
1353                            in_dep_mgmt_dependency = false;
1354                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1355                                && (dep_mgmt.group_id.is_some()
1356                                    || dep_mgmt.artifact_id.is_some()
1357                                    || dep_mgmt.version.is_some())
1358                            {
1359                                dependency_management_entries.push(dep_mgmt);
1360                            }
1361                        }
1362                        b"dependency" => {
1363                            if let (Some(dep), Some(coords)) =
1364                                (current_dependency.take(), current_dependency_data.take())
1365                            {
1366                                package_data.dependencies.push(dep);
1367                                dependency_data.push(coords);
1368                            } else if let Some(dep) = current_dependency.take() {
1369                                package_data.dependencies.push(dep);
1370                            }
1371                        }
1372                        b"license" => {
1373                            if let Some(license) = current_license.take()
1374                                && (license.name.is_some()
1375                                    || license.url.is_some()
1376                                    || license.comments.is_some())
1377                            {
1378                                licenses.push(license);
1379                            }
1380                        }
1381                        b"developers" => in_developers = false,
1382                        b"developer" => {
1383                            if let Some(party) = current_party.take() {
1384                                package_data.parties.push(party);
1385                            }
1386                        }
1387                        b"contributors" => in_contributors = false,
1388                        b"contributor" => {
1389                            if let Some(party) = current_party.take() {
1390                                package_data.parties.push(party);
1391                            }
1392                        }
1393                        b"distributionManagement" => in_distribution_management = false,
1394                        b"relocation" => in_relocation = false,
1395                        b"repository" if !in_dependencies && in_distribution_management => {
1396                            in_dist_repository = false
1397                        }
1398                        b"repository" if !in_dependencies && in_repositories => {
1399                            in_repository = false;
1400                            if current_repository_id.is_some()
1401                                || current_repository_name.is_some()
1402                                || current_repository_url.is_some()
1403                            {
1404                                let mut repo = serde_json::Map::new();
1405                                if let Some(id) = current_repository_id.take() {
1406                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1407                                }
1408                                if let Some(name) = current_repository_name.take() {
1409                                    repo.insert(
1410                                        "name".to_string(),
1411                                        serde_json::Value::String(name),
1412                                    );
1413                                }
1414                                if let Some(url) = current_repository_url.take() {
1415                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1416                                }
1417                                repositories.push(repo);
1418                            }
1419                        }
1420                        b"pluginRepository" if in_plugin_repositories => {
1421                            in_repository = false;
1422                            if current_repository_id.is_some()
1423                                || current_repository_name.is_some()
1424                                || current_repository_url.is_some()
1425                            {
1426                                let mut repo = serde_json::Map::new();
1427                                if let Some(id) = current_repository_id.take() {
1428                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1429                                }
1430                                if let Some(name) = current_repository_name.take() {
1431                                    repo.insert(
1432                                        "name".to_string(),
1433                                        serde_json::Value::String(name),
1434                                    );
1435                                }
1436                                if let Some(url) = current_repository_url.take() {
1437                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1438                                }
1439                                plugin_repositories.push(repo);
1440                            }
1441                        }
1442                        b"repositories" => in_repositories = false,
1443                        b"properties" => in_properties = false,
1444                        b"pluginRepositories" => in_plugin_repositories = false,
1445                        b"modules" => in_modules = false,
1446                        b"mailingLists" => in_mailing_lists = false,
1447                        b"mailingList" => {
1448                            in_mailing_list = false;
1449                            if current_mailing_list_name.is_some()
1450                                || current_mailing_list_subscribe.is_some()
1451                                || current_mailing_list_unsubscribe.is_some()
1452                                || current_mailing_list_post.is_some()
1453                                || current_mailing_list_archive.is_some()
1454                            {
1455                                let mut ml = serde_json::Map::new();
1456                                if let Some(name) = current_mailing_list_name.take() {
1457                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1458                                }
1459                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1460                                    ml.insert(
1461                                        "subscribe".to_string(),
1462                                        serde_json::Value::String(subscribe),
1463                                    );
1464                                }
1465                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1466                                    ml.insert(
1467                                        "unsubscribe".to_string(),
1468                                        serde_json::Value::String(unsubscribe),
1469                                    );
1470                                }
1471                                if let Some(post) = current_mailing_list_post.take() {
1472                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1473                                }
1474                                if let Some(archive) = current_mailing_list_archive.take() {
1475                                    ml.insert(
1476                                        "archive".to_string(),
1477                                        serde_json::Value::String(archive),
1478                                    );
1479                                }
1480                                mailing_lists.push(ml);
1481                            }
1482                        }
1483                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1484                        b"site" => in_dist_site = false,
1485                        _ => {}
1486                    }
1487                }
1488                Ok(Event::Eof) => break,
1489                Err(e) => {
1490                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1491                    return vec![package_data];
1492                }
1493                _ => {}
1494            }
1495            buf.clear();
1496        }
1497
1498        let builtins = build_builtin_properties(MavenBuiltinPropertyInputs {
1499            namespace: &package_data.namespace,
1500            name: &package_data.name,
1501            version: &package_data.version,
1502            parent_group_id: &parent_group_id,
1503            parent_artifact_id: &parent_artifact_id,
1504            parent_version: &parent_version,
1505            project_name: &project_name,
1506            project_packaging: &project_packaging,
1507        });
1508        let mut resolver = PropertyResolver::new(properties, builtins);
1509
1510        resolve_option(&mut resolver, &mut package_data.namespace);
1511        resolve_option(&mut resolver, &mut package_data.name);
1512        resolve_option(&mut resolver, &mut package_data.version);
1513        resolve_option(&mut resolver, &mut package_data.homepage_url);
1514        resolve_option(&mut resolver, &mut inception_year);
1515        resolve_option(&mut resolver, &mut scm_connection);
1516        resolve_option(&mut resolver, &mut scm_developer_connection);
1517        resolve_option(&mut resolver, &mut scm_url);
1518        resolve_option(&mut resolver, &mut scm_tag);
1519        resolve_option(&mut resolver, &mut organization_name);
1520        resolve_option(&mut resolver, &mut organization_url);
1521        resolve_option(&mut resolver, &mut issue_management_system);
1522        resolve_option(&mut resolver, &mut issue_management_url);
1523        resolve_option(&mut resolver, &mut ci_management_system);
1524        resolve_option(&mut resolver, &mut ci_management_url);
1525        resolve_option(&mut resolver, &mut dist_download_url);
1526        resolve_option(&mut resolver, &mut dist_repository_id);
1527        resolve_option(&mut resolver, &mut dist_repository_name);
1528        resolve_option(&mut resolver, &mut dist_repository_url);
1529        resolve_option(&mut resolver, &mut dist_repository_layout);
1530        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1531        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1532        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1533        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1534        resolve_option(&mut resolver, &mut dist_site_id);
1535        resolve_option(&mut resolver, &mut dist_site_name);
1536        resolve_option(&mut resolver, &mut dist_site_url);
1537        resolve_option(&mut resolver, &mut parent_group_id);
1538        resolve_option(&mut resolver, &mut parent_artifact_id);
1539        resolve_option(&mut resolver, &mut parent_version);
1540        resolve_option(&mut resolver, &mut parent_relative_path);
1541        resolve_option(&mut resolver, &mut project_name);
1542        resolve_option(&mut resolver, &mut project_description);
1543        resolve_option(&mut resolver, &mut project_packaging);
1544        resolve_option(&mut resolver, &mut project_classifier);
1545        resolve_vec(&mut resolver, &mut modules);
1546        resolve_maps(&mut resolver, &mut repositories);
1547        resolve_maps(&mut resolver, &mut plugin_repositories);
1548        resolve_maps(&mut resolver, &mut mailing_lists);
1549        for comment in &mut xml_license_comments {
1550            *comment = resolver.resolve_text(comment, 0);
1551        }
1552        for dependency in &mut dependency_management_entries {
1553            resolve_dependency_data(&mut resolver, dependency);
1554        }
1555        resolve_dependency_data(&mut resolver, &mut relocation);
1556        for license in &mut licenses {
1557            resolve_license_entry(&mut resolver, license);
1558        }
1559        for comment in xml_license_comments {
1560            if !comment.trim().is_empty() {
1561                licenses.push(MavenLicenseEntry {
1562                    comments: Some(comment),
1563                    ..Default::default()
1564                });
1565            }
1566        }
1567
1568        for (dependency, coords) in package_data
1569            .dependencies
1570            .iter_mut()
1571            .zip(dependency_data.iter_mut())
1572        {
1573            resolve_dependency_data(&mut resolver, coords);
1574            dependency.scope = coords.scope.clone();
1575            dependency.extracted_requirement = coords.version.clone();
1576            dependency.extra_data = dependency_extra_data(coords);
1577            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1578
1579            match dependency.scope.as_deref() {
1580                Some("test") | Some("provided") => {
1581                    dependency.is_runtime = Some(false);
1582                    dependency.is_optional = Some(true);
1583                }
1584                Some(_) => {
1585                    dependency.is_runtime = Some(true);
1586                }
1587                None => {
1588                    dependency.is_runtime = None;
1589                }
1590            }
1591
1592            if let Some(version) = &coords.version {
1593                dependency.is_pinned = Some(is_maven_version_pinned(version));
1594            }
1595
1596            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1597                dependency.purl = Some(build_maven_purl(
1598                    group_id,
1599                    artifact_id,
1600                    coords.version.as_deref(),
1601                    coords.classifier.as_deref(),
1602                    coords.type_.as_deref(),
1603                ));
1604            }
1605        }
1606
1607        if package_data.namespace.is_none() {
1608            package_data.namespace = parent_group_id.clone();
1609        }
1610        if package_data.version.is_none() {
1611            package_data.version = parent_version.clone();
1612        }
1613
1614        package_data.qualifiers =
1615            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1616
1617        package_data.description = match (
1618            project_name.as_deref().filter(|value| !value.is_empty()),
1619            project_description
1620                .as_deref()
1621                .filter(|value| !value.is_empty()),
1622        ) {
1623            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1624            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1625            (Some(name), None) => Some(name.to_string()),
1626            (None, Some(description)) => Some(description.to_string()),
1627            (None, None) => None,
1628        };
1629
1630        if path.to_string_lossy().contains("META-INF/maven/") {
1631            let path_str = path.to_string_lossy();
1632            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1633                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1634                let parts: Vec<&str> = after_maven.split('/').collect();
1635                if parts.len() >= 2 {
1636                    if package_data.namespace.is_none() {
1637                        package_data.namespace = Some(parts[0].to_string());
1638                    }
1639                    if package_data.name.is_none() {
1640                        package_data.name = Some(parts[1].to_string());
1641                    }
1642                }
1643            }
1644        }
1645
1646        // Construct PURL from parsed data
1647        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1648            &package_data.namespace,
1649            &package_data.name,
1650            &package_data.version,
1651        ) {
1652            package_data.purl = Some(build_maven_purl(
1653                group_id,
1654                artifact_id,
1655                Some(version),
1656                project_classifier.as_deref(),
1657                project_packaging.as_deref(),
1658            ));
1659            if project_classifier.is_none() {
1660                package_data
1661                    .source_packages
1662                    .push(build_maven_source_package(group_id, artifact_id, version));
1663            }
1664        }
1665
1666        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1667            package_data.repository_homepage_url = build_maven_url(
1668                &package_data.namespace,
1669                &package_data.name,
1670                &package_data.version,
1671                None,
1672            );
1673
1674            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1675                build_maven_download_url(
1676                    group_id,
1677                    artifact_id,
1678                    ver,
1679                    project_classifier.as_deref(),
1680                    project_packaging.as_deref(),
1681                )
1682            });
1683
1684            if let Some(ver) = &package_data.version {
1685                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1686                package_data.api_data_url = build_maven_url(
1687                    &package_data.namespace,
1688                    &package_data.name,
1689                    &package_data.version,
1690                    Some(&pom_filename),
1691                );
1692            }
1693        }
1694
1695        package_data.vcs_url = scm_connection
1696            .or_else(|| scm_developer_connection.clone())
1697            .or_else(|| scm_url.clone());
1698
1699        // Set code_view_url from scm/url (human-browseable URL)
1700        if let Some(url) = &scm_url {
1701            package_data.code_view_url = Some(url.clone());
1702        }
1703
1704        // Set bug_tracking_url from issueManagement/url
1705        if let Some(url) = &issue_management_url {
1706            package_data.bug_tracking_url = Some(url.clone());
1707        }
1708
1709        // Map downloadUrl to download_url field
1710        if let Some(url) = &dist_download_url {
1711            package_data.download_url = Some(url.clone());
1712        }
1713
1714        if organization_name.is_some() || organization_url.is_some() {
1715            package_data.parties.push(Party {
1716                r#type: Some("organization".to_string()),
1717                role: Some("owner".to_string()),
1718                name: organization_name.clone(),
1719                email: None,
1720                url: organization_url.clone(),
1721                organization: None,
1722                organization_url: None,
1723                timezone: None,
1724            });
1725        }
1726
1727        for dependency in &dependency_management_entries {
1728            if dependency.scope.as_deref() == Some("import")
1729                && let Some(import_dependency) =
1730                    maven_dependency_to_dependency(dependency, Some("import"), true)
1731            {
1732                package_data.dependencies.push(import_dependency);
1733            }
1734
1735            // Import-scoped BOMs carry two distinct facts in the declared POM:
1736            // this project explicitly imports that BOM, and the imported BOM
1737            // contributes managed constraints. Keep both normalized rows in the
1738            // ordinary dependency stream so generic dependency consumers can see
1739            // BOM provenance (`scope=import`) without inspecting Maven-specific
1740            // extra_data, while still getting the managed-constraint view
1741            // (`scope=dependencymanagement`).
1742            let mut dependency_management_copy = dependency.clone();
1743            dependency_management_copy.scope = Some("dependencymanagement".to_string());
1744
1745            if let Some(converted) = maven_dependency_to_dependency(
1746                &dependency_management_copy,
1747                Some("dependencymanagement"),
1748                true,
1749            ) {
1750                package_data.dependencies.push(converted);
1751            }
1752        }
1753
1754        if (relocation.group_id.is_some()
1755            || relocation.artifact_id.is_some()
1756            || relocation.version.is_some())
1757            && let Some(converted) =
1758                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1759        {
1760            package_data.dependencies.push(converted);
1761        }
1762
1763        if inception_year.is_some()
1764            || organization_name.is_some()
1765            || organization_url.is_some()
1766            || scm_tag.is_some()
1767            || scm_developer_connection.is_some()
1768            || issue_management_system.is_some()
1769            || ci_management_system.is_some()
1770            || ci_management_url.is_some()
1771            || dist_download_url.is_some()
1772            || dist_repository_id.is_some()
1773            || dist_snapshot_repository_id.is_some()
1774            || dist_site_id.is_some()
1775            || !repositories.is_empty()
1776            || !plugin_repositories.is_empty()
1777            || !modules.is_empty()
1778            || !mailing_lists.is_empty()
1779            || !dependency_management_entries.is_empty()
1780            || parent_group_id.is_some()
1781            || relocation.group_id.is_some()
1782            || relocation.artifact_id.is_some()
1783            || relocation.version.is_some()
1784            || relocation.message.is_some()
1785        {
1786            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1787            if let Some(year) = inception_year {
1788                extra_data.insert(
1789                    "inception_year".to_string(),
1790                    serde_json::Value::String(year),
1791                );
1792            }
1793            if let Some(name) = organization_name {
1794                extra_data.insert(
1795                    "organization_name".to_string(),
1796                    serde_json::Value::String(name),
1797                );
1798            }
1799            if let Some(url) = organization_url {
1800                extra_data.insert(
1801                    "organization_url".to_string(),
1802                    serde_json::Value::String(url),
1803                );
1804            }
1805            if let Some(tag) = scm_tag {
1806                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1807            }
1808            if let Some(dev_conn) = scm_developer_connection {
1809                extra_data.insert(
1810                    "scm_developer_connection".to_string(),
1811                    serde_json::Value::String(dev_conn),
1812                );
1813            }
1814            if let Some(system) = issue_management_system {
1815                extra_data.insert(
1816                    "issue_tracking_system".to_string(),
1817                    serde_json::Value::String(system),
1818                );
1819            }
1820            if let Some(system) = ci_management_system {
1821                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1822            }
1823            if let Some(url) = ci_management_url {
1824                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1825            }
1826
1827            // Add distribution management data
1828            if let Some(url) = dist_download_url {
1829                extra_data.insert(
1830                    "distribution_download_url".to_string(),
1831                    serde_json::Value::String(url),
1832                );
1833            }
1834
1835            // Build repository object
1836            if dist_repository_id.is_some()
1837                || dist_repository_name.is_some()
1838                || dist_repository_url.is_some()
1839                || dist_repository_layout.is_some()
1840            {
1841                let mut repo = serde_json::Map::new();
1842                if let Some(id) = dist_repository_id {
1843                    repo.insert("id".to_string(), serde_json::Value::String(id));
1844                }
1845                if let Some(name) = dist_repository_name {
1846                    repo.insert("name".to_string(), serde_json::Value::String(name));
1847                }
1848                if let Some(url) = dist_repository_url {
1849                    repo.insert("url".to_string(), serde_json::Value::String(url));
1850                }
1851                if let Some(layout) = dist_repository_layout {
1852                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1853                }
1854                extra_data.insert(
1855                    "distribution_repository".to_string(),
1856                    serde_json::Value::Object(repo),
1857                );
1858            }
1859
1860            // Build snapshotRepository object
1861            if dist_snapshot_repository_id.is_some()
1862                || dist_snapshot_repository_name.is_some()
1863                || dist_snapshot_repository_url.is_some()
1864                || dist_snapshot_repository_layout.is_some()
1865            {
1866                let mut repo = serde_json::Map::new();
1867                if let Some(id) = dist_snapshot_repository_id {
1868                    repo.insert("id".to_string(), serde_json::Value::String(id));
1869                }
1870                if let Some(name) = dist_snapshot_repository_name {
1871                    repo.insert("name".to_string(), serde_json::Value::String(name));
1872                }
1873                if let Some(url) = dist_snapshot_repository_url {
1874                    repo.insert("url".to_string(), serde_json::Value::String(url));
1875                }
1876                if let Some(layout) = dist_snapshot_repository_layout {
1877                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1878                }
1879                extra_data.insert(
1880                    "distribution_snapshot_repository".to_string(),
1881                    serde_json::Value::Object(repo),
1882                );
1883            }
1884
1885            // Build site object
1886            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1887                let mut site = serde_json::Map::new();
1888                if let Some(id) = dist_site_id {
1889                    site.insert("id".to_string(), serde_json::Value::String(id));
1890                }
1891                if let Some(name) = dist_site_name {
1892                    site.insert("name".to_string(), serde_json::Value::String(name));
1893                }
1894                if let Some(url) = dist_site_url {
1895                    site.insert("url".to_string(), serde_json::Value::String(url));
1896                }
1897                extra_data.insert(
1898                    "distribution_site".to_string(),
1899                    serde_json::Value::Object(site),
1900                );
1901            }
1902
1903            if !repositories.is_empty() {
1904                extra_data.insert(
1905                    "repositories".to_string(),
1906                    serde_json::Value::Array(
1907                        repositories
1908                            .into_iter()
1909                            .map(serde_json::Value::Object)
1910                            .collect(),
1911                    ),
1912                );
1913            }
1914
1915            if !plugin_repositories.is_empty() {
1916                extra_data.insert(
1917                    "plugin_repositories".to_string(),
1918                    serde_json::Value::Array(
1919                        plugin_repositories
1920                            .into_iter()
1921                            .map(serde_json::Value::Object)
1922                            .collect(),
1923                    ),
1924                );
1925            }
1926
1927            if !modules.is_empty() {
1928                extra_data.insert(
1929                    "modules".to_string(),
1930                    serde_json::Value::Array(
1931                        modules.into_iter().map(serde_json::Value::String).collect(),
1932                    ),
1933                );
1934            }
1935
1936            if !mailing_lists.is_empty() {
1937                extra_data.insert(
1938                    "mailing_lists".to_string(),
1939                    serde_json::Value::Array(
1940                        mailing_lists
1941                            .into_iter()
1942                            .map(serde_json::Value::Object)
1943                            .collect(),
1944                    ),
1945                );
1946            }
1947
1948            if !dependency_management_entries.is_empty() {
1949                extra_data.insert(
1950                    "dependency_management".to_string(),
1951                    serde_json::Value::Array(
1952                        dependency_management_entries
1953                            .into_iter()
1954                            .map(|dependency| {
1955                                serde_json::Value::Object(dependency_management_entry_to_value(
1956                                    &dependency,
1957                                ))
1958                            })
1959                            .collect(),
1960                    ),
1961                );
1962            }
1963
1964            if relocation.group_id.is_some()
1965                || relocation.artifact_id.is_some()
1966                || relocation.version.is_some()
1967                || relocation.message.is_some()
1968            {
1969                extra_data.insert(
1970                    "relocation".to_string(),
1971                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1972                );
1973            }
1974
1975            if parent_group_id.is_some()
1976                || parent_artifact_id.is_some()
1977                || parent_version.is_some()
1978                || parent_relative_path.is_some()
1979            {
1980                let mut parent_obj = serde_json::Map::new();
1981                if let Some(group_id) = parent_group_id {
1982                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1983                }
1984                if let Some(artifact_id) = parent_artifact_id {
1985                    parent_obj.insert(
1986                        "artifactId".to_string(),
1987                        serde_json::Value::String(artifact_id),
1988                    );
1989                }
1990                if let Some(version) = parent_version {
1991                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1992                }
1993                if let Some(relative_path) = parent_relative_path {
1994                    parent_obj.insert(
1995                        "relativePath".to_string(),
1996                        serde_json::Value::String(relative_path),
1997                    );
1998                }
1999                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
2000            }
2001
2002            package_data.extra_data = Some(extra_data);
2003        }
2004
2005        package_data.extracted_license_statement =
2006            build_license_statement(&licenses).map(truncate_field);
2007        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
2008            build_maven_declared_license_data(
2009                &licenses,
2010                package_data.extracted_license_statement.as_deref(),
2011            );
2012        package_data.declared_license_expression = declared_license_expression;
2013        package_data.declared_license_expression_spdx = declared_license_expression_spdx;
2014        package_data.license_detections = license_detections;
2015
2016        package_data.namespace = package_data.namespace.map(truncate_field);
2017        package_data.name = package_data.name.map(truncate_field);
2018        package_data.version = package_data.version.map(truncate_field);
2019        package_data.description = package_data.description.map(truncate_field);
2020        package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2021        package_data.vcs_url = package_data.vcs_url.map(truncate_field);
2022        package_data.purl = package_data.purl.map(truncate_field);
2023        package_data.code_view_url = package_data.code_view_url.map(truncate_field);
2024        package_data.bug_tracking_url = package_data.bug_tracking_url.map(truncate_field);
2025        package_data.download_url = package_data.download_url.map(truncate_field);
2026        package_data.repository_homepage_url =
2027            package_data.repository_homepage_url.map(truncate_field);
2028        package_data.repository_download_url =
2029            package_data.repository_download_url.map(truncate_field);
2030        package_data.api_data_url = package_data.api_data_url.map(truncate_field);
2031        for dep in &mut package_data.dependencies {
2032            dep.purl = dep.purl.take().map(truncate_field);
2033            dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2034        }
2035
2036        vec![package_data]
2037    }
2038
2039    fn is_match(path: &Path) -> bool {
2040        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
2041            filename == "pom.xml"
2042                || filename.ends_with(".pom.xml")
2043                || filename.ends_with("-pom.xml")
2044                || filename == "pom.properties"
2045                || filename == "MANIFEST.MF"
2046                || filename.ends_with(".pom")
2047        } else {
2048            false
2049        }
2050    }
2051}
2052
2053fn build_maven_url(
2054    group_id: &Option<String>,
2055    artifact_id: &Option<String>,
2056    version: &Option<String>,
2057    filename: Option<&str>,
2058) -> Option<String> {
2059    const BASE_URL: &str = "https://repo1.maven.org/maven2";
2060
2061    let group_id = group_id.as_ref()?;
2062    let artifact_id = artifact_id.as_ref()?;
2063
2064    let group_path = group_id.replace('.', "/");
2065    let filename_str = filename.unwrap_or("");
2066
2067    let url = if let Some(ver) = version {
2068        format!(
2069            "{}/{}/{}/{}/{}",
2070            BASE_URL, group_path, artifact_id, ver, filename_str
2071        )
2072    } else {
2073        format!(
2074            "{}/{}/{}/{}",
2075            BASE_URL, group_path, artifact_id, filename_str
2076        )
2077    };
2078
2079    Some(url)
2080}
2081
2082fn build_maven_declared_license_data(
2083    licenses: &[MavenLicenseEntry],
2084    matched_text: Option<&str>,
2085) -> (
2086    Option<String>,
2087    Option<String>,
2088    Vec<crate::models::LicenseDetection>,
2089) {
2090    let normalized: Vec<_> = licenses
2091        .iter()
2092        .filter_map(|license| license.name.as_deref())
2093        .filter_map(normalize_maven_license_name)
2094        .collect();
2095
2096    if normalized.is_empty() {
2097        return empty_declared_license_data();
2098    }
2099
2100    let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
2101        return empty_declared_license_data();
2102    };
2103
2104    build_declared_license_data(
2105        combined,
2106        DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
2107    )
2108}
2109
2110fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
2111    match name.trim() {
2112        "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
2113            "public-domain",
2114            "LicenseRef-provenant-public-domain",
2115        )),
2116        other => normalize_declared_license_key(other),
2117    }
2118}
2119
2120/// Parse pom.properties file (Java properties format)
2121fn parse_pom_properties(path: &Path) -> PackageData {
2122    let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2123        Ok(content) => content,
2124        Err(e) => {
2125            warn!("Failed to read pom.properties at {:?}: {}", path, e);
2126            return PackageData {
2127                package_type: Some(PackageType::Maven),
2128                primary_language: Some("Java".to_string()),
2129                datasource_id: Some(DatasourceId::MavenPomProperties),
2130                ..Default::default()
2131            };
2132        }
2133    };
2134
2135    let mut package_data = default_package_data(DatasourceId::MavenPomProperties);
2136    package_data.package_type = Some(PackageType::Maven);
2137    package_data.primary_language = Some("Java".to_string());
2138    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
2139
2140    let mut group_id: Option<String> = None;
2141    let mut artifact_id: Option<String> = None;
2142    let mut version: Option<String> = None;
2143
2144    // Parse Java properties format
2145    let mut continuation = String::new();
2146
2147    for line in content.lines() {
2148        let current_line = if continuation.is_empty() {
2149            line.to_string()
2150        } else {
2151            format!("{}{}", continuation, line)
2152        };
2153        continuation.clear();
2154
2155        // Check for line continuation (backslash at end)
2156        if current_line.ends_with('\\') {
2157            continuation = current_line[..current_line.len() - 1].to_string();
2158            continue;
2159        }
2160
2161        // Skip comments and empty lines
2162        let trimmed = current_line.trim();
2163        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2164            continue;
2165        }
2166
2167        // Parse key=value
2168        if let Some(eq_pos) = current_line.find('=') {
2169            let key = current_line[..eq_pos].trim();
2170            let value = current_line[eq_pos + 1..].trim();
2171
2172            match key {
2173                "groupId" => group_id = Some(value.to_string()),
2174                "artifactId" => artifact_id = Some(value.to_string()),
2175                "version" => version = Some(value.to_string()),
2176                _ => {}
2177            }
2178        }
2179    }
2180
2181    package_data.namespace = group_id.map(truncate_field);
2182    package_data.name = artifact_id.map(truncate_field);
2183    package_data.version = version.map(truncate_field);
2184
2185    // Generate PURL
2186    if let (Some(group_id), Some(artifact_id), Some(version)) = (
2187        &package_data.namespace,
2188        &package_data.name,
2189        &package_data.version,
2190    ) {
2191        package_data.purl = Some(truncate_field(format!(
2192            "pkg:maven/{}/{}@{}",
2193            group_id, artifact_id, version
2194        )));
2195    }
2196
2197    package_data
2198}
2199
2200/// Parse MANIFEST.MF file (JAR manifest format)
2201///
2202/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
2203/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
2204/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
2205/// dependencies.
2206fn parse_manifest_mf(path: &Path) -> PackageData {
2207    let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2208        Ok(content) => content,
2209        Err(e) => {
2210            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2211            return default_package_data(DatasourceId::JavaJarManifest);
2212        }
2213    };
2214
2215    let mut package_data = default_package_data(DatasourceId::JavaJarManifest);
2216
2217    // Parse manifest headers (RFC822-style with space continuations)
2218    let mut headers: Vec<(String, String)> = Vec::new();
2219    let mut current_key: Option<String> = None;
2220    let mut current_value = String::new();
2221
2222    for line in content.lines() {
2223        if line.starts_with(' ') || line.starts_with('\t') {
2224            // Continuation line
2225            current_value.push_str(line.trim());
2226        } else if let Some(colon_pos) = line.find(':') {
2227            // Save previous header
2228            if let Some(key) = current_key.take() {
2229                headers.push((key, current_value.trim().to_string()));
2230                current_value.clear();
2231            }
2232
2233            // Start new header
2234            let key = line[..colon_pos].trim().to_string();
2235            let value = line[colon_pos + 1..].trim().to_string();
2236            current_key = Some(key);
2237            current_value = value;
2238        }
2239    }
2240
2241    // Save last header
2242    if let Some(key) = current_key {
2243        headers.push((key, current_value.trim().to_string()));
2244    }
2245
2246    // Convert headers to HashMap for easier lookup
2247    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2248
2249    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2250    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2251    let is_osgi = bundle_symbolic_name.is_some();
2252
2253    if is_osgi {
2254        // OSGi bundle - extract OSGi-specific metadata
2255        package_data.package_type = Some(PackageType::Osgi);
2256        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2257
2258        // Bundle-SymbolicName is the canonical name for OSGi bundles
2259        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2260        if let Some(bsn) = bundle_symbolic_name {
2261            let name = if let Some(semicolon_pos) = bsn.find(';') {
2262                bsn[..semicolon_pos].trim().to_string()
2263            } else {
2264                bsn.clone()
2265            };
2266            package_data.name = Some(name);
2267        }
2268
2269        // Bundle-Version
2270        package_data.version = headers_map.get("Bundle-Version").cloned();
2271
2272        // Bundle-Description takes priority over Bundle-Name for description
2273        if let Some(desc) = headers_map.get("Bundle-Description") {
2274            package_data.description = Some(desc.clone());
2275        } else if let Some(name) = headers_map.get("Bundle-Name") {
2276            package_data.description = Some(name.clone());
2277        }
2278
2279        // Bundle-Vendor
2280        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2281            package_data.parties.push(Party {
2282                r#type: Some("organization".to_string()),
2283                role: Some("vendor".to_string()),
2284                name: Some(vendor.clone()),
2285                email: None,
2286                url: None,
2287                organization: None,
2288                organization_url: None,
2289                timezone: None,
2290            });
2291        }
2292
2293        // Bundle-DocURL
2294        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2295
2296        // Bundle-License
2297        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2298
2299        // Import-Package -> dependencies with scope "import"
2300        if let Some(import_pkg) = headers_map.get("Import-Package") {
2301            let deps = parse_osgi_package_list(import_pkg, "import");
2302            package_data.dependencies.extend(deps);
2303        }
2304
2305        // Require-Bundle -> dependencies with scope "require-bundle"
2306        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2307            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2308            package_data.dependencies.extend(deps);
2309        }
2310
2311        // Export-Package -> store in extra_data
2312        if let Some(export_pkg) = headers_map.get("Export-Package") {
2313            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2314            extra_data.insert(
2315                "export_packages".to_string(),
2316                serde_json::Value::String(export_pkg.clone()),
2317            );
2318            package_data.extra_data = Some(extra_data);
2319        }
2320
2321        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2322        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2323            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2324        }
2325    } else {
2326        // Regular JAR manifest
2327        package_data.package_type = Some(PackageType::Maven);
2328        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2329
2330        // Extract fields with priority order for non-OSGi JARs
2331        let mut name: Option<String> = None;
2332        let mut version: Option<String> = None;
2333        let mut vendor: Option<String> = None;
2334
2335        for (key, value) in &headers {
2336            match key.as_str() {
2337                "Bundle-Name" if name.is_none() => {
2338                    name = Some(value.clone());
2339                }
2340                "Implementation-Title" if name.is_none() => {
2341                    name = Some(value.clone());
2342                }
2343                "Bundle-Version" if version.is_none() => {
2344                    version = Some(value.clone());
2345                }
2346                "Implementation-Version" if version.is_none() => {
2347                    version = Some(value.clone());
2348                }
2349                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2350                    vendor = Some(value.clone());
2351                }
2352                _ => {}
2353            }
2354        }
2355
2356        package_data.name = name;
2357        package_data.version = version;
2358
2359        // Add vendor to parties if present
2360        if let Some(vendor_name) = vendor {
2361            package_data.parties.push(Party {
2362                r#type: Some("organization".to_string()),
2363                role: Some("vendor".to_string()),
2364                name: Some(vendor_name),
2365                email: None,
2366                url: None,
2367                organization: None,
2368                organization_url: None,
2369                timezone: None,
2370            });
2371        }
2372
2373        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2374        if let Some(path_str) = path.to_str()
2375            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2376        {
2377            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2378            let parts: Vec<&str> = after_maven.split('/').collect();
2379            if parts.len() >= 2 {
2380                package_data.namespace = Some(parts[0].to_string());
2381            }
2382        }
2383
2384        // Generate Maven PURL if we have enough information
2385        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2386            &package_data.namespace,
2387            &package_data.name,
2388            &package_data.version,
2389        ) {
2390            package_data.purl = Some(format!(
2391                "pkg:maven/{}/{}@{}",
2392                group_id, artifact_id, version
2393            ));
2394        } else if package_data.name.is_none() && package_data.version.is_none() {
2395            // A bare MANIFEST.MF without Maven coordinates or implementation
2396            // identity is only evidence of a generic JAR manifest, not a Maven
2397            // package. Keep the Java manifest datasource so assembly can still
2398            // merge richer sibling metadata when present.
2399            package_data.package_type = Some(PackageType::Jar);
2400        }
2401    }
2402
2403    package_data.name = package_data.name.map(truncate_field);
2404    package_data.version = package_data.version.map(truncate_field);
2405    package_data.namespace = package_data.namespace.map(truncate_field);
2406    package_data.description = package_data.description.map(truncate_field);
2407    package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2408    package_data.extracted_license_statement =
2409        package_data.extracted_license_statement.map(truncate_field);
2410    package_data.purl = package_data.purl.map(truncate_field);
2411    for dep in &mut package_data.dependencies {
2412        dep.purl = dep.purl.take().map(truncate_field);
2413        dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2414    }
2415
2416    package_data
2417}
2418
2419/// Parse OSGi Import-Package header into dependencies.
2420///
2421/// Format: comma-separated list of packages with optional directives:
2422/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2423pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2424    let mut dependencies = Vec::new();
2425
2426    // Split by comma, but be careful not to split within quoted strings
2427    for package_entry in split_osgi_list(package_list)
2428        .into_iter()
2429        .take(MAX_ITERATION_COUNT)
2430    {
2431        let package_entry = package_entry.trim();
2432        if package_entry.is_empty() {
2433            continue;
2434        }
2435
2436        // Extract package name (before first semicolon)
2437        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2438            package_entry[..semicolon_pos].trim()
2439        } else {
2440            package_entry
2441        };
2442
2443        if package_name.is_empty() {
2444            continue;
2445        }
2446
2447        // Extract version directive if present
2448        let version_requirement = extract_osgi_version(package_entry);
2449        let is_optional = package_entry.contains("resolution:=optional");
2450
2451        dependencies.push(Dependency {
2452            purl: Some(format!("pkg:osgi/{}", package_name)),
2453            extracted_requirement: version_requirement,
2454            scope: Some(scope.to_string()),
2455            is_runtime: Some(true),
2456            is_optional: Some(is_optional),
2457            is_pinned: None,
2458            is_direct: Some(true),
2459            resolved_package: None,
2460            extra_data: None,
2461        });
2462    }
2463
2464    dependencies
2465}
2466
2467/// Parse OSGi Require-Bundle header into dependencies.
2468///
2469/// Format: comma-separated list of bundle symbolic names with optional directives:
2470/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2471pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2472    let mut dependencies = Vec::new();
2473
2474    for bundle_entry in split_osgi_list(bundle_list)
2475        .into_iter()
2476        .take(MAX_ITERATION_COUNT)
2477    {
2478        let bundle_entry = bundle_entry.trim();
2479        if bundle_entry.is_empty() {
2480            continue;
2481        }
2482
2483        // Extract bundle symbolic name (before first semicolon)
2484        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2485            bundle_entry[..semicolon_pos].trim()
2486        } else {
2487            bundle_entry
2488        };
2489
2490        if bundle_name.is_empty() {
2491            continue;
2492        }
2493
2494        // Extract bundle-version directive if present
2495        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2496
2497        // Check if optional
2498        let is_optional = bundle_entry.contains("resolution:=optional");
2499
2500        dependencies.push(Dependency {
2501            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2502            extracted_requirement: version_requirement,
2503            scope: Some(scope.to_string()),
2504            is_runtime: Some(!is_optional),
2505            is_optional: Some(is_optional),
2506            is_pinned: None,
2507            is_direct: Some(true),
2508            resolved_package: None,
2509            extra_data: None,
2510        });
2511    }
2512
2513    dependencies
2514}
2515
2516/// Split OSGi comma-separated list, respecting quoted strings.
2517///
2518/// OSGi headers can contain commas within quoted strings:
2519/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2520pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2521    let mut result = Vec::new();
2522    let mut current = String::new();
2523    let mut in_quotes = false;
2524
2525    for ch in list.chars() {
2526        match ch {
2527            '"' => {
2528                in_quotes = !in_quotes;
2529                current.push(ch);
2530            }
2531            ',' if !in_quotes => {
2532                if !current.trim().is_empty() {
2533                    result.push(current.trim().to_string());
2534                }
2535                current.clear();
2536            }
2537            _ => {
2538                current.push(ch);
2539            }
2540        }
2541    }
2542
2543    if !current.trim().is_empty() {
2544        result.push(current.trim().to_string());
2545    }
2546
2547    result
2548}
2549
2550fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2551    let needle = format!("{}=", directive);
2552    let version_pos = entry.find(&needle)?;
2553    let after_value = &entry[version_pos + needle.len()..];
2554
2555    if let Some(stripped) = after_value.strip_prefix('"') {
2556        stripped.find('"').map(|end| stripped[..end].to_string())
2557    } else {
2558        let end = after_value.find(';').unwrap_or(after_value.len());
2559        Some(after_value[..end].trim().to_string())
2560    }
2561}
2562
2563pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2564    extract_osgi_directive(entry, "version")
2565}
2566
2567pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2568    extract_osgi_directive(entry, "bundle-version")
2569}
2570
2571fn default_package_data(datasource_id: DatasourceId) -> PackageData {
2572    PackageData {
2573        package_type: Some(PackageType::Maven),
2574        datasource_id: Some(datasource_id),
2575        ..Default::default()
2576    }
2577}
2578
2579#[cfg(test)]
2580mod tests {
2581    use super::*;
2582    use std::fs;
2583    use tempfile::TempDir;
2584
2585    #[test]
2586    fn test_organization_extraction() {
2587        let temp_dir = TempDir::new().unwrap();
2588        let pom_path = temp_dir.path().join("pom.xml");
2589
2590        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2591<project>
2592    <modelVersion>4.0.0</modelVersion>
2593    <groupId>com.example</groupId>
2594    <artifactId>my-app</artifactId>
2595    <version>1.0.0</version>
2596    <organization>
2597        <name>Example Corporation</name>
2598        <url>https://example.com</url>
2599    </organization>
2600</project>"#;
2601
2602        fs::write(&pom_path, pom_content).unwrap();
2603
2604        let package_data = MavenParser::extract_first_package(&pom_path);
2605
2606        assert_eq!(package_data.name, Some("my-app".to_string()));
2607        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2608        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2609
2610        let extra_data = package_data.extra_data.unwrap();
2611        assert_eq!(
2612            extra_data.get("organization_name"),
2613            Some(&serde_json::Value::String(
2614                "Example Corporation".to_string()
2615            ))
2616        );
2617        assert_eq!(
2618            extra_data.get("organization_url"),
2619            Some(&serde_json::Value::String(
2620                "https://example.com".to_string()
2621            ))
2622        );
2623    }
2624
2625    #[test]
2626    fn test_scm_metadata_extraction() {
2627        let temp_dir = TempDir::new().unwrap();
2628        let pom_path = temp_dir.path().join("pom.xml");
2629
2630        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2631<project xmlns="http://maven.apache.org/POM/4.0.0"
2632         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2633         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2634    <modelVersion>4.0.0</modelVersion>
2635    <groupId>org.springframework.boot</groupId>
2636    <artifactId>spring-boot-starter-web</artifactId>
2637    <version>3.0.0</version>
2638    <scm>
2639        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2640        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2641        <url>https://github.com/spring-projects/spring-boot</url>
2642        <tag>v3.0.0</tag>
2643    </scm>
2644</project>"#;
2645
2646        fs::write(&pom_path, pom_content).unwrap();
2647
2648        let package_data = MavenParser::extract_first_package(&pom_path);
2649
2650        assert_eq!(
2651            package_data.name,
2652            Some("spring-boot-starter-web".to_string())
2653        );
2654        assert_eq!(
2655            package_data.namespace,
2656            Some("org.springframework.boot".to_string())
2657        );
2658        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2659
2660        assert_eq!(
2661            package_data.code_view_url,
2662            Some("https://github.com/spring-projects/spring-boot".to_string())
2663        );
2664
2665        // vcs_url prefers connection over developerConnection
2666        assert_eq!(
2667            package_data.vcs_url,
2668            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2669        );
2670
2671        let extra_data = package_data.extra_data.unwrap();
2672        assert_eq!(
2673            extra_data.get("scm_tag"),
2674            Some(&serde_json::Value::String("v3.0.0".to_string()))
2675        );
2676        // developerConnection stored separately in extra_data
2677        assert_eq!(
2678            extra_data.get("scm_developer_connection"),
2679            Some(&serde_json::Value::String(
2680                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2681            ))
2682        );
2683    }
2684
2685    #[test]
2686    fn test_developers_and_contributors_extraction() {
2687        let temp_dir = TempDir::new().unwrap();
2688        let pom_path = temp_dir.path().join("pom.xml");
2689
2690        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2691<project xmlns="http://maven.apache.org/POM/4.0.0"
2692         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2693         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2694    <modelVersion>4.0.0</modelVersion>
2695    <groupId>com.example</groupId>
2696    <artifactId>test-app</artifactId>
2697    <version>1.0.0</version>
2698    <developers>
2699        <developer>
2700            <id>jdoe</id>
2701            <name>John Doe</name>
2702            <email>john@example.com</email>
2703            <url>https://example.com/jdoe</url>
2704            <organization>Example Corp</organization>
2705            <organizationUrl>https://example.com</organizationUrl>
2706            <timezone>America/New_York</timezone>
2707        </developer>
2708        <developer>
2709            <name>Jane Smith</name>
2710            <email>jane@example.com</email>
2711        </developer>
2712    </developers>
2713    <contributors>
2714        <contributor>
2715            <name>Bob Wilson</name>
2716            <email>bob@example.com</email>
2717            <url>https://example.com/bob</url>
2718        </contributor>
2719    </contributors>
2720</project>"#;
2721
2722        fs::write(&pom_path, pom_content).unwrap();
2723
2724        let package_data = MavenParser::extract_first_package(&pom_path);
2725
2726        assert_eq!(package_data.name, Some("test-app".to_string()));
2727        assert_eq!(package_data.parties.len(), 3);
2728
2729        let dev1 = &package_data.parties[0];
2730        assert_eq!(dev1.r#type, Some("person".to_string()));
2731        assert_eq!(dev1.role, Some("developer".to_string()));
2732        assert_eq!(dev1.name, Some("John Doe".to_string()));
2733        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2734        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2735        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2736        assert_eq!(
2737            dev1.organization_url,
2738            Some("https://example.com".to_string())
2739        );
2740        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2741
2742        let dev2 = &package_data.parties[1];
2743        assert_eq!(dev2.r#type, Some("person".to_string()));
2744        assert_eq!(dev2.role, Some("developer".to_string()));
2745        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2746        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2747
2748        let contrib = &package_data.parties[2];
2749        assert_eq!(contrib.r#type, Some("person".to_string()));
2750        assert_eq!(contrib.role, Some("contributor".to_string()));
2751        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2752        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2753        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2754    }
2755
2756    #[test]
2757    fn test_issue_management_extraction() {
2758        let temp_dir = TempDir::new().unwrap();
2759        let pom_path = temp_dir.path().join("pom.xml");
2760
2761        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2762<project xmlns="http://maven.apache.org/POM/4.0.0"
2763         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2764         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2765    <modelVersion>4.0.0</modelVersion>
2766    <groupId>com.example</groupId>
2767    <artifactId>test-app</artifactId>
2768    <version>1.0.0</version>
2769    <issueManagement>
2770        <system>GitHub</system>
2771        <url>https://github.com/example/test-app/issues</url>
2772    </issueManagement>
2773</project>"#;
2774
2775        fs::write(&pom_path, pom_content).unwrap();
2776
2777        let package_data = MavenParser::extract_first_package(&pom_path);
2778
2779        assert_eq!(package_data.name, Some("test-app".to_string()));
2780        assert_eq!(
2781            package_data.bug_tracking_url,
2782            Some("https://github.com/example/test-app/issues".to_string())
2783        );
2784
2785        let extra_data = package_data.extra_data.unwrap();
2786        assert_eq!(
2787            extra_data.get("issue_tracking_system"),
2788            Some(&serde_json::Value::String("GitHub".to_string()))
2789        );
2790    }
2791
2792    #[test]
2793    fn test_ci_management_extraction() {
2794        let temp_dir = TempDir::new().unwrap();
2795        let pom_path = temp_dir.path().join("pom.xml");
2796
2797        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2798<project xmlns="http://maven.apache.org/POM/4.0.0"
2799         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2800         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2801    <modelVersion>4.0.0</modelVersion>
2802    <groupId>com.example</groupId>
2803    <artifactId>test-app</artifactId>
2804    <version>1.0.0</version>
2805    <ciManagement>
2806        <system>Jenkins</system>
2807        <url>https://ci.example.com/job/test-app</url>
2808    </ciManagement>
2809</project>"#;
2810
2811        fs::write(&pom_path, pom_content).unwrap();
2812
2813        let package_data = MavenParser::extract_first_package(&pom_path);
2814
2815        assert_eq!(package_data.name, Some("test-app".to_string()));
2816
2817        let extra_data = package_data.extra_data.unwrap();
2818        assert_eq!(
2819            extra_data.get("ci_system"),
2820            Some(&serde_json::Value::String("Jenkins".to_string()))
2821        );
2822        assert_eq!(
2823            extra_data.get("ci_url"),
2824            Some(&serde_json::Value::String(
2825                "https://ci.example.com/job/test-app".to_string()
2826            ))
2827        );
2828    }
2829
2830    #[test]
2831    fn test_distribution_management_extraction() {
2832        let temp_dir = TempDir::new().unwrap();
2833        let pom_path = temp_dir.path().join("pom.xml");
2834
2835        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2836<project xmlns="http://maven.apache.org/POM/4.0.0"
2837         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2838         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2839    <modelVersion>4.0.0</modelVersion>
2840    <groupId>com.example</groupId>
2841    <artifactId>test-app</artifactId>
2842    <version>1.0.0</version>
2843    <distributionManagement>
2844        <downloadUrl>https://example.com/downloads</downloadUrl>
2845        <repository>
2846            <id>releases</id>
2847            <name>Release Repository</name>
2848            <url>https://repo.example.com/releases</url>
2849            <layout>default</layout>
2850        </repository>
2851        <snapshotRepository>
2852            <id>snapshots</id>
2853            <name>Snapshot Repository</name>
2854            <url>https://repo.example.com/snapshots</url>
2855            <layout>default</layout>
2856        </snapshotRepository>
2857        <site>
2858            <id>site-deploy</id>
2859            <name>Project Site</name>
2860            <url>https://example.com/site</url>
2861        </site>
2862    </distributionManagement>
2863</project>"#;
2864
2865        fs::write(&pom_path, pom_content).unwrap();
2866
2867        let package_data = MavenParser::extract_first_package(&pom_path);
2868
2869        assert_eq!(package_data.name, Some("test-app".to_string()));
2870        assert_eq!(
2871            package_data.download_url,
2872            Some("https://example.com/downloads".to_string())
2873        );
2874
2875        let extra_data = package_data.extra_data.unwrap();
2876
2877        assert_eq!(
2878            extra_data.get("distribution_download_url"),
2879            Some(&serde_json::Value::String(
2880                "https://example.com/downloads".to_string()
2881            ))
2882        );
2883
2884        let repo = extra_data
2885            .get("distribution_repository")
2886            .unwrap()
2887            .as_object()
2888            .unwrap();
2889        assert_eq!(
2890            repo.get("id"),
2891            Some(&serde_json::Value::String("releases".to_string()))
2892        );
2893        assert_eq!(
2894            repo.get("name"),
2895            Some(&serde_json::Value::String("Release Repository".to_string()))
2896        );
2897        assert_eq!(
2898            repo.get("url"),
2899            Some(&serde_json::Value::String(
2900                "https://repo.example.com/releases".to_string()
2901            ))
2902        );
2903        assert_eq!(
2904            repo.get("layout"),
2905            Some(&serde_json::Value::String("default".to_string()))
2906        );
2907
2908        let snapshot_repo = extra_data
2909            .get("distribution_snapshot_repository")
2910            .unwrap()
2911            .as_object()
2912            .unwrap();
2913        assert_eq!(
2914            snapshot_repo.get("id"),
2915            Some(&serde_json::Value::String("snapshots".to_string()))
2916        );
2917        assert_eq!(
2918            snapshot_repo.get("name"),
2919            Some(&serde_json::Value::String(
2920                "Snapshot Repository".to_string()
2921            ))
2922        );
2923        assert_eq!(
2924            snapshot_repo.get("url"),
2925            Some(&serde_json::Value::String(
2926                "https://repo.example.com/snapshots".to_string()
2927            ))
2928        );
2929        assert_eq!(
2930            snapshot_repo.get("layout"),
2931            Some(&serde_json::Value::String("default".to_string()))
2932        );
2933
2934        let site = extra_data
2935            .get("distribution_site")
2936            .unwrap()
2937            .as_object()
2938            .unwrap();
2939        assert_eq!(
2940            site.get("id"),
2941            Some(&serde_json::Value::String("site-deploy".to_string()))
2942        );
2943        assert_eq!(
2944            site.get("name"),
2945            Some(&serde_json::Value::String("Project Site".to_string()))
2946        );
2947        assert_eq!(
2948            site.get("url"),
2949            Some(&serde_json::Value::String(
2950                "https://example.com/site".to_string()
2951            ))
2952        );
2953    }
2954}
2955
2956crate::register_parser!(
2957    "Apache Maven POM",
2958    &[
2959        "**/*.pom",
2960        "**/pom.xml",
2961        "**/pom.properties",
2962        "**/META-INF/MANIFEST.MF"
2963    ],
2964    "maven",
2965    "Java",
2966    Some("https://maven.apache.org/pom.html"),
2967);