Skip to main content

provenant/parsers/
maven.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Apache Maven pom.xml files.
5//!
6//! Extracts package metadata, dependencies, and license information from
7//! Maven Project Object Model (POM) files.
8//!
9//! # Supported Formats
10//! - pom.xml (Project Object Model)
11//! - pom.properties
12//! - MANIFEST.MF (JAR manifest)
13//!
14//! # Key Features
15//! - Property value substitution (`${project.version}`)
16//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
17//! - Dependency scope handling (compile, test, provided, runtime, system)
18//! - Package URL (purl) generation
19//! - Multiple license support (combined with " OR ")
20//!
21//! # Implementation Notes
22//! - Uses quick-xml for XML parsing
23//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
24//! - Property substitution limited to prevent infinite loops
25//! - Direct dependencies: all in pom.xml are direct
26
27use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
30use quick_xml::Reader;
31use quick_xml::events::Event;
32use std::borrow::Cow;
33use std::collections::{HashMap, HashSet};
34use std::path::Path;
35
36use super::PackageParser;
37use super::license_normalization::{
38    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
39    combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
40};
41
42#[derive(Clone, Default)]
43struct MavenDependencyData {
44    group_id: Option<String>,
45    artifact_id: Option<String>,
46    version: Option<String>,
47    classifier: Option<String>,
48    type_: Option<String>,
49    scope: Option<String>,
50    optional: Option<String>,
51    system_path: Option<String>,
52    message: Option<String>,
53}
54
55#[derive(Clone, Default)]
56struct MavenLicenseEntry {
57    name: Option<String>,
58    url: Option<String>,
59    comments: Option<String>,
60}
61
62/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
63///
64/// Maven properties can reference other properties, creating dependency graphs. This resolver:
65/// - Resolves nested placeholders: `${outer.${inner}}`
66/// - Detects circular references: `${a}` → `${b}` → `${a}`
67/// - Enforces depth limits to prevent stack overflow
68/// - Enforces substitution limits to prevent DoS on pathological inputs
69///
70/// # Algorithm
71///
72/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
73/// - `resolving_set`: For cycle detection (hash set lookup)
74/// - `resolving_stack`: For error reporting (preserves path)
75/// - `cache`: Memoizes resolved values to avoid redundant work
76struct PropertyResolver {
77    raw: HashMap<String, String>,
78    builtins: HashMap<String, String>,
79    cache: HashMap<String, String>,
80    resolving_set: HashSet<String>,
81    resolving_stack: Vec<String>,
82    max_depth: usize,
83    max_output_len: usize,
84    max_substitutions: usize,
85    warned_keys: HashSet<String>,
86}
87
88impl PropertyResolver {
89    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
90        Self {
91            raw,
92            builtins,
93            cache: HashMap::new(),
94            resolving_set: HashSet::new(),
95            resolving_stack: Vec::new(),
96            max_depth: 10,
97            max_output_len: 100_000,
98            max_substitutions: 1000,
99            warned_keys: HashSet::new(),
100        }
101    }
102
103    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
104        if let Some(value) = self.cache.get(key) {
105            return Some(value.clone());
106        }
107
108        if depth >= self.max_depth {
109            self.warn_once(
110                "depth",
111                key,
112                format!("Maven property depth limit hit resolving {key}"),
113            );
114            return None;
115        }
116
117        if self.resolving_set.contains(key) {
118            if self
119                .resolving_stack
120                .last()
121                .is_some_and(|current| current == key)
122            {
123                return None;
124            }
125
126            self.warn_once(
127                "cycle",
128                key,
129                format!(
130                    "Maven property cycle detected at {key}: {:?}",
131                    self.resolving_stack
132                ),
133            );
134            return None;
135        }
136
137        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
138            value.clone()
139        } else {
140            return None;
141        };
142
143        self.resolving_set.insert(key.to_string());
144        self.resolving_stack.push(key.to_string());
145
146        let resolved = self.resolve_text(&raw_val, depth + 1);
147
148        self.resolving_stack.pop();
149        self.resolving_set.remove(key);
150
151        self.cache.insert(key.to_string(), resolved.clone());
152        Some(resolved)
153    }
154
155    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
156        if !text.contains("${") {
157            return text.to_string();
158        }
159
160        if depth >= self.max_depth {
161            warn!("Maven property depth limit hit resolving text");
162            return text.to_string();
163        }
164
165        let bytes = text.as_bytes();
166        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
167        let mut index = 0;
168        let mut substitutions = 0;
169
170        while index < bytes.len() {
171            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
172                if substitutions >= self.max_substitutions {
173                    warn!("Maven property substitution limit hit resolving {text}");
174                    return text.to_string();
175                }
176
177                let placeholder_start = index;
178                let Some((content, closing_index)) =
179                    self.parse_placeholder_content(text, index + 2)
180                else {
181                    warn!("Maven property malformed placeholder in {text}");
182                    return text.to_string();
183                };
184
185                substitutions += 1;
186                let resolved_key = if content.contains("${") {
187                    self.resolve_text(content, depth + 1)
188                } else {
189                    content.to_string()
190                };
191
192                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
193                    if output.len() + resolved.len() > self.max_output_len {
194                        warn!("Maven property output length limit hit resolving {text}");
195                        return text.to_string();
196                    }
197                    output.extend_from_slice(resolved.as_bytes());
198                } else {
199                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
200                    if output.len() + placeholder_bytes.len() > self.max_output_len {
201                        warn!("Maven property output length limit hit resolving {text}");
202                        return text.to_string();
203                    }
204                    output.extend_from_slice(placeholder_bytes);
205                }
206
207                index = closing_index + 1;
208                continue;
209            }
210
211            if output.len() + 1 > self.max_output_len {
212                warn!("Maven property output length limit hit resolving {text}");
213                return text.to_string();
214            }
215
216            output.push(bytes[index]);
217            index += 1;
218        }
219
220        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
221    }
222
223    fn parse_placeholder_content<'a>(
224        &self,
225        text: &'a str,
226        start_index: usize,
227    ) -> Option<(&'a str, usize)> {
228        let bytes = text.as_bytes();
229        let mut index = start_index;
230        let mut depth = 0;
231
232        while index < bytes.len() {
233            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
234                depth += 1;
235                index += 2;
236                continue;
237            }
238
239            if bytes[index] == b'}' {
240                if depth == 0 {
241                    return Some((&text[start_index..index], index));
242                }
243                depth -= 1;
244            }
245
246            index += 1;
247        }
248
249        None
250    }
251
252    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
253        let token = format!("{kind}:{key}");
254        if self.warned_keys.insert(token) {
255            warn!("{message}");
256        }
257    }
258}
259
260fn sanitize_template_directives(content: &str) -> Cow<'_, str> {
261    if !content.contains("<%") {
262        return Cow::Borrowed(content);
263    }
264
265    let mut sanitized = String::with_capacity(content.len());
266    let mut remaining = content;
267
268    while let Some(start) = remaining.find("<%") {
269        let (before, after_start) = remaining.split_at(start);
270        sanitized.push_str(before);
271
272        let Some(end) = after_start.find("%>") else {
273            return Cow::Borrowed(content);
274        };
275
276        let directive = &after_start[..end + 2];
277        for ch in directive.chars() {
278            if matches!(ch, '\n' | '\r') {
279                sanitized.push(ch);
280            } else {
281                sanitized.push(' ');
282            }
283        }
284
285        remaining = &after_start[end + 2..];
286    }
287
288    sanitized.push_str(remaining);
289    Cow::Owned(sanitized)
290}
291
292fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
293    if let Some(current) = value.clone() {
294        *value = Some(resolver.resolve_text(&current, 0));
295    }
296}
297
298fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
299    for value in values.iter_mut() {
300        *value = resolver.resolve_text(value, 0);
301    }
302}
303
304fn resolve_map_strings(
305    resolver: &mut PropertyResolver,
306    values: &mut serde_json::Map<String, serde_json::Value>,
307) {
308    for value in values.values_mut() {
309        if let serde_json::Value::String(current) = value {
310            let resolved = resolver.resolve_text(current, 0);
311            *current = resolved;
312        }
313    }
314}
315
316fn resolve_maps(
317    resolver: &mut PropertyResolver,
318    values: &mut [serde_json::Map<String, serde_json::Value>],
319) {
320    for value in values.iter_mut() {
321        resolve_map_strings(resolver, value);
322    }
323}
324
325fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
326    resolve_option(resolver, &mut dependency.group_id);
327    resolve_option(resolver, &mut dependency.artifact_id);
328    resolve_option(resolver, &mut dependency.version);
329    resolve_option(resolver, &mut dependency.classifier);
330    resolve_option(resolver, &mut dependency.type_);
331    resolve_option(resolver, &mut dependency.scope);
332    resolve_option(resolver, &mut dependency.optional);
333    resolve_option(resolver, &mut dependency.system_path);
334    resolve_option(resolver, &mut dependency.message);
335}
336
337fn parse_maven_bool(value: Option<&str>) -> bool {
338    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
339}
340
341fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
342    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
343        Some(
344            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
345            | "tar.gz" | "war" | "xar" | "zip",
346        ) => packaging.map(str::trim),
347        Some(_) => Some("jar"),
348        None => None,
349    }
350}
351
352fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
353    resolve_option(resolver, &mut license.name);
354    resolve_option(resolver, &mut license.url);
355    resolve_option(resolver, &mut license.comments);
356}
357
358fn build_maven_qualifiers(
359    classifier: Option<&str>,
360    packaging: Option<&str>,
361) -> Option<HashMap<String, String>> {
362    let mut qualifiers = HashMap::new();
363
364    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
365        qualifiers.insert("classifier".to_string(), classifier.to_string());
366    }
367
368    if let Some(packaging) = normalize_maven_packaging(packaging)
369        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
370    {
371        qualifiers.insert("type".to_string(), packaging.to_string());
372    }
373
374    (!qualifiers.is_empty()).then_some(qualifiers)
375}
376
377fn build_maven_purl(
378    group_id: &str,
379    artifact_id: &str,
380    version: Option<&str>,
381    classifier: Option<&str>,
382    packaging: Option<&str>,
383) -> String {
384    let mut purl = format!(
385        "pkg:maven/{}/{}",
386        percent_encode_purl_component(group_id),
387        percent_encode_purl_component(artifact_id)
388    );
389
390    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
391        purl.push('@');
392        purl.push_str(&percent_encode_purl_component(version));
393    }
394
395    let qualifiers = build_maven_qualifiers(classifier, packaging);
396    if let Some(qualifiers) = qualifiers {
397        let mut query_parts = Vec::new();
398        if let Some(classifier) = qualifiers.get("classifier") {
399            query_parts.push(format!(
400                "classifier={}",
401                percent_encode_purl_component(classifier)
402            ));
403        }
404        if let Some(type_) = qualifiers.get("type") {
405            query_parts.push(format!("type={}", percent_encode_purl_component(type_)));
406        }
407
408        if !query_parts.is_empty() {
409            purl.push('?');
410            purl.push_str(&query_parts.join("&"));
411        }
412    }
413
414    purl
415}
416
417fn percent_encode_purl_component(value: &str) -> String {
418    let mut encoded = String::with_capacity(value.len());
419
420    for byte in value.bytes() {
421        match byte {
422            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
423                encoded.push(byte as char);
424            }
425            _ => encoded.push_str(&format!("%{byte:02X}")),
426        }
427    }
428
429    encoded
430}
431
432fn build_maven_download_url(
433    group_id: &str,
434    artifact_id: &str,
435    version: &str,
436    classifier: Option<&str>,
437    packaging: Option<&str>,
438) -> String {
439    const BASE_URL: &str = "https://repo1.maven.org/maven2";
440    let group_path = group_id.replace('.', "/");
441    let extension = normalize_maven_packaging(packaging)
442        .filter(|value| *value != "pom")
443        .unwrap_or("jar");
444    let classifier_suffix = classifier
445        .map(str::trim)
446        .filter(|value| !value.is_empty())
447        .map(|value| format!("-{value}"))
448        .unwrap_or_default();
449
450    format!(
451        "{}/{}/{}/{}/{}-{}{}.{}",
452        BASE_URL,
453        group_path,
454        artifact_id,
455        version,
456        artifact_id,
457        version,
458        classifier_suffix,
459        extension
460    )
461}
462
463fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
464    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
465}
466
467fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
468    let rendered_entries: Vec<String> = licenses
469        .iter()
470        .filter_map(|license| {
471            let mut lines = Vec::new();
472
473            if let Some(name) = license
474                .name
475                .as_ref()
476                .filter(|value| !value.trim().is_empty())
477            {
478                lines.push(format!("    name: {name}"));
479            }
480            if let Some(url) = license
481                .url
482                .as_ref()
483                .filter(|value| !value.trim().is_empty())
484            {
485                lines.push(format!("    url: {url}"));
486            }
487            if let Some(comments) = license
488                .comments
489                .as_ref()
490                .filter(|value| !value.trim().is_empty())
491            {
492                lines.push(format!("    comments: {comments}"));
493            }
494
495            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
496        })
497        .collect();
498
499    if rendered_entries.is_empty() {
500        None
501    } else {
502        Some(format!("{}\n", rendered_entries.join("\n")))
503    }
504}
505
506fn is_license_like_comment(comment: &str) -> bool {
507    let lowered = comment.to_ascii_lowercase();
508    [
509        "license",
510        "licensed",
511        "copyright",
512        "spdx",
513        "apache",
514        "mit",
515        "bsd",
516        "gpl",
517        "lgpl",
518        "mozilla public",
519        "eclipse public",
520    ]
521    .iter()
522    .any(|marker| lowered.contains(marker))
523}
524
525fn dependency_extra_data(
526    dependency: &MavenDependencyData,
527) -> Option<HashMap<String, serde_json::Value>> {
528    let mut extra_data = HashMap::new();
529
530    if let Some(classifier) = dependency
531        .classifier
532        .as_ref()
533        .filter(|value| !value.trim().is_empty())
534    {
535        extra_data.insert(
536            "classifier".to_string(),
537            serde_json::Value::String(classifier.clone()),
538        );
539    }
540    if let Some(type_) = dependency
541        .type_
542        .as_ref()
543        .filter(|value| !value.trim().is_empty())
544    {
545        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
546    }
547    if let Some(system_path) = dependency
548        .system_path
549        .as_ref()
550        .filter(|value| !value.trim().is_empty())
551    {
552        extra_data.insert(
553            "system_path".to_string(),
554            serde_json::Value::String(system_path.clone()),
555        );
556    }
557    if let Some(message) = dependency
558        .message
559        .as_ref()
560        .filter(|value| !value.trim().is_empty())
561    {
562        extra_data.insert(
563            "message".to_string(),
564            serde_json::Value::String(message.clone()),
565        );
566    }
567
568    (!extra_data.is_empty()).then_some(extra_data)
569}
570
571fn dependency_management_entry_to_value(
572    dependency: &MavenDependencyData,
573) -> serde_json::Map<String, serde_json::Value> {
574    let mut dep_obj = serde_json::Map::new();
575
576    if let Some(group_id) = dependency.group_id.as_ref() {
577        dep_obj.insert(
578            "groupId".to_string(),
579            serde_json::Value::String(group_id.clone()),
580        );
581    }
582    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
583        dep_obj.insert(
584            "artifactId".to_string(),
585            serde_json::Value::String(artifact_id.clone()),
586        );
587    }
588    if let Some(version) = dependency.version.as_ref() {
589        dep_obj.insert(
590            "version".to_string(),
591            serde_json::Value::String(version.clone()),
592        );
593    }
594    if let Some(scope) = dependency.scope.as_ref() {
595        dep_obj.insert(
596            "scope".to_string(),
597            serde_json::Value::String(scope.clone()),
598        );
599    }
600    if let Some(type_) = dependency.type_.as_ref() {
601        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
602    }
603    if let Some(classifier) = dependency.classifier.as_ref() {
604        dep_obj.insert(
605            "classifier".to_string(),
606            serde_json::Value::String(classifier.clone()),
607        );
608    }
609    if let Some(optional) = dependency.optional.as_deref() {
610        dep_obj.insert(
611            "optional".to_string(),
612            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
613        );
614    }
615    if let Some(message) = dependency.message.as_ref() {
616        dep_obj.insert(
617            "message".to_string(),
618            serde_json::Value::String(message.clone()),
619        );
620    }
621
622    dep_obj
623}
624
625fn maven_dependency_to_dependency(
626    dependency_data: &MavenDependencyData,
627    fallback_scope: Option<&str>,
628    force_non_runtime: bool,
629) -> Option<Dependency> {
630    let group_id = dependency_data.group_id.as_ref()?;
631    let artifact_id = dependency_data.artifact_id.as_ref()?;
632    let version = dependency_data.version.clone();
633    let scope = dependency_data
634        .scope
635        .clone()
636        .or_else(|| fallback_scope.map(str::to_string));
637    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
638
639    let (is_runtime, is_optional) = if force_non_runtime {
640        (Some(false), Some(explicit_optional))
641    } else {
642        match scope.as_deref() {
643            Some("test") | Some("provided") => (Some(false), Some(true)),
644            Some(_) => (Some(true), Some(explicit_optional)),
645            None => (None, Some(explicit_optional)),
646        }
647    };
648
649    Some(Dependency {
650        purl: Some(build_maven_purl(
651            group_id,
652            artifact_id,
653            version.as_deref(),
654            dependency_data.classifier.as_deref(),
655            dependency_data.type_.as_deref(),
656        )),
657        extracted_requirement: version.clone(),
658        scope,
659        is_runtime,
660        is_optional,
661        is_pinned: version.as_deref().map(is_maven_version_pinned),
662        is_direct: Some(true),
663        resolved_package: None,
664        extra_data: dependency_extra_data(dependency_data),
665    })
666}
667
668/// Determines if a Maven version specifier is pinned to an exact version.
669///
670/// A version is considered pinned if it specifies an exact version without
671/// range syntax or dynamic keywords. Examples:
672/// - Pinned: "1.0.0", "1.2.3"
673/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
674fn is_maven_version_pinned(version_str: &str) -> bool {
675    let trimmed = version_str.trim();
676
677    // Empty version is not pinned
678    if trimmed.is_empty() {
679        return false;
680    }
681
682    // Check for range syntax (brackets and parentheses)
683    if trimmed.contains('[')
684        || trimmed.contains(']')
685        || trimmed.contains('(')
686        || trimmed.contains(')')
687    {
688        return false;
689    }
690
691    // Check for dynamic version keywords
692    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
693        return false;
694    }
695
696    // If none of the unpinned indicators are present, it's pinned
697    true
698}
699
700struct MavenBuiltinPropertyInputs<'a> {
701    namespace: &'a Option<String>,
702    name: &'a Option<String>,
703    version: &'a Option<String>,
704    parent_group_id: &'a Option<String>,
705    parent_artifact_id: &'a Option<String>,
706    parent_version: &'a Option<String>,
707    project_name: &'a Option<String>,
708    project_packaging: &'a Option<String>,
709}
710
711fn build_builtin_properties(inputs: MavenBuiltinPropertyInputs<'_>) -> HashMap<String, String> {
712    let mut builtins = HashMap::new();
713    let effective_group_id = inputs
714        .namespace
715        .clone()
716        .or_else(|| inputs.parent_group_id.clone());
717    let effective_version = inputs
718        .version
719        .clone()
720        .or_else(|| inputs.parent_version.clone());
721
722    if let Some(group_id) = effective_group_id.clone() {
723        builtins.insert("project.groupId".to_string(), group_id.clone());
724        builtins.insert("pom.groupId".to_string(), group_id);
725    }
726
727    if let Some(artifact_id) = inputs.name.clone() {
728        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
729        builtins.insert("pom.artifactId".to_string(), artifact_id);
730    }
731
732    if let Some(ver) = effective_version.clone() {
733        builtins.insert("project.version".to_string(), ver.clone());
734        builtins.insert("pom.version".to_string(), ver);
735    }
736
737    if let Some(group_id) = inputs.parent_group_id.clone() {
738        builtins.insert("project.parent.groupId".to_string(), group_id);
739    }
740
741    if let Some(artifact_id) = inputs.parent_artifact_id.clone() {
742        builtins.insert("project.parent.artifactId".to_string(), artifact_id.clone());
743        builtins.insert("pom.parent.artifactId".to_string(), artifact_id.clone());
744        builtins.insert("parent.artifactId".to_string(), artifact_id);
745    }
746
747    if let Some(ver) = inputs.parent_version.clone() {
748        builtins.insert("project.parent.version".to_string(), ver.clone());
749        builtins.insert("pom.parent.version".to_string(), ver.clone());
750        builtins.insert("parent.version".to_string(), ver);
751    }
752
753    if let Some(packaging) = inputs.project_packaging.clone() {
754        builtins.insert("project.packaging".to_string(), packaging);
755    }
756
757    if let Some(name) = inputs.project_name.clone() {
758        builtins.insert("project.name".to_string(), name);
759    }
760
761    builtins
762}
763
764/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
765///
766/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
767/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
768pub struct MavenParser;
769
770impl PackageParser for MavenParser {
771    const PACKAGE_TYPE: PackageType = PackageType::Maven;
772
773    fn extract_packages(path: &Path) -> Vec<PackageData> {
774        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
775            if filename == "pom.properties" {
776                return vec![parse_pom_properties(path)];
777            } else if filename == "MANIFEST.MF" {
778                return vec![parse_manifest_mf(path)];
779            }
780        }
781
782        let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
783            Ok(content) => content,
784            Err(e) => {
785                warn!("Failed to open pom.xml at {:?}: {}", path, e);
786                return vec![default_package_data(DatasourceId::MavenPom)];
787            }
788        };
789
790        let sanitized_content = sanitize_template_directives(&content);
791        let mut reader = Reader::from_str(sanitized_content.as_ref());
792        reader.config_mut().trim_text(true);
793
794        let mut buf = Vec::new();
795        let mut package_data = default_package_data(DatasourceId::MavenPom);
796        package_data.package_type = Some(Self::PACKAGE_TYPE);
797        package_data.primary_language = Some("Java".to_string());
798        package_data.datasource_id = Some(DatasourceId::MavenPom);
799
800        let mut current_element = Vec::new();
801        let mut in_dependencies = false;
802        let mut current_dependency: Option<Dependency> = None;
803        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
804        let mut current_dependency_data: Option<MavenDependencyData> = None;
805
806        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
807        let mut xml_license_comments: Vec<String> = Vec::new();
808        let mut current_license: Option<MavenLicenseEntry> = None;
809        let mut inception_year = None;
810        let mut scm_connection = None;
811        let mut scm_developer_connection = None;
812        let mut scm_url = None;
813        let mut scm_tag = None;
814        let mut organization_name = None;
815        let mut organization_url = None;
816        let mut in_developers = false;
817        let mut in_contributors = false;
818        let mut current_party: Option<Party> = None;
819        let mut issue_management_system = None;
820        let mut issue_management_url = None;
821        let mut ci_management_system = None;
822        let mut ci_management_url = None;
823        let mut in_distribution_management = false;
824        let mut in_dist_repository = false;
825        let mut in_dist_snapshot_repository = false;
826        let mut in_dist_site = false;
827        let mut dist_download_url = None;
828        let mut dist_repository_id = None;
829        let mut dist_repository_name = None;
830        let mut dist_repository_url = None;
831        let mut dist_repository_layout = None;
832        let mut dist_snapshot_repository_id = None;
833        let mut dist_snapshot_repository_name = None;
834        let mut dist_snapshot_repository_url = None;
835        let mut dist_snapshot_repository_layout = None;
836        let mut dist_site_id = None;
837        let mut dist_site_name = None;
838        let mut dist_site_url = None;
839        let mut in_repositories = false;
840        let mut in_plugin_repositories = false;
841        let mut in_repository = false;
842        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
843        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
844        let mut current_repository_id = None;
845        let mut current_repository_name = None;
846        let mut current_repository_url = None;
847        let mut in_modules = false;
848        let mut modules: Vec<String> = Vec::new();
849        let mut in_mailing_lists = false;
850        let mut in_mailing_list = false;
851        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
852        let mut current_mailing_list_name = None;
853        let mut current_mailing_list_subscribe = None;
854        let mut current_mailing_list_unsubscribe = None;
855        let mut current_mailing_list_post = None;
856        let mut current_mailing_list_archive = None;
857        let mut in_dependency_management = false;
858        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
859        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
860        let mut in_dep_mgmt_dependency = false;
861        let mut in_parent = false;
862        let mut parent_group_id = None;
863        let mut parent_artifact_id = None;
864        let mut parent_version = None;
865        let mut parent_relative_path = None;
866        let mut in_properties = false;
867        let mut properties: HashMap<String, String> = HashMap::new();
868        let mut project_name = None;
869        let mut project_description = None;
870        let mut project_packaging = None;
871        let mut project_classifier = None;
872        let mut in_relocation = false;
873        let mut relocation = MavenDependencyData::default();
874
875        let mut iteration_count: usize = 0;
876        loop {
877            iteration_count += 1;
878            if iteration_count > MAX_ITERATION_COUNT {
879                warn!(
880                    "Exceeded MAX_ITERATION_COUNT ({}) parsing pom.xml at {:?}; stopping early",
881                    MAX_ITERATION_COUNT, path
882                );
883                break;
884            }
885            match reader.read_event_into(&mut buf) {
886                Ok(Event::Start(e)) => {
887                    let element_name = e.name().as_ref().to_vec();
888                    current_element.push(element_name.clone());
889
890                    match element_name.as_slice() {
891                        b"parent" => in_parent = true,
892                        b"dependencyManagement" => in_dependency_management = true,
893                        b"dependencies" if in_dependency_management => {}
894                        b"dependencies" => in_dependencies = true,
895                        b"dependency" if in_dependency_management => {
896                            in_dep_mgmt_dependency = true;
897                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
898                        }
899                        b"dependency" if in_dependencies => {
900                            current_dependency = Some(Dependency {
901                                purl: None,
902                                extracted_requirement: None,
903                                scope: None,
904                                is_runtime: None,
905                                is_optional: Some(false),
906                                is_pinned: None,
907                                is_direct: Some(true),
908                                resolved_package: None,
909                                extra_data: None,
910                            });
911                            current_dependency_data = Some(MavenDependencyData::default());
912                        }
913                        b"properties" => in_properties = true,
914                        b"developers" => in_developers = true,
915                        b"developer" if in_developers => {
916                            current_party = Some(Party {
917                                r#type: Some("person".to_string()),
918                                role: Some("developer".to_string()),
919                                name: None,
920                                email: None,
921                                url: None,
922                                organization: None,
923                                organization_url: None,
924                                timezone: None,
925                            });
926                        }
927                        b"contributors" => in_contributors = true,
928                        b"contributor" if in_contributors => {
929                            current_party = Some(Party {
930                                r#type: Some("person".to_string()),
931                                role: Some("contributor".to_string()),
932                                name: None,
933                                email: None,
934                                url: None,
935                                organization: None,
936                                organization_url: None,
937                                timezone: None,
938                            });
939                        }
940                        b"license" => current_license = Some(MavenLicenseEntry::default()),
941                        b"distributionManagement" => in_distribution_management = true,
942                        b"relocation" if in_distribution_management => {
943                            in_relocation = true;
944                            relocation = MavenDependencyData::default();
945                        }
946                        b"repository" if in_distribution_management => in_dist_repository = true,
947                        b"snapshotRepository" if in_distribution_management => {
948                            in_dist_snapshot_repository = true
949                        }
950                        b"site" if in_distribution_management => in_dist_site = true,
951                        b"repositories" => in_repositories = true,
952                        b"pluginRepositories" => in_plugin_repositories = true,
953                        b"repository" if in_repositories && !in_distribution_management => {
954                            in_repository = true;
955                            current_repository_id = None;
956                            current_repository_name = None;
957                            current_repository_url = None;
958                        }
959                        b"pluginRepository" if in_plugin_repositories => {
960                            in_repository = true;
961                            current_repository_id = None;
962                            current_repository_name = None;
963                            current_repository_url = None;
964                        }
965                        b"modules" => in_modules = true,
966                        b"mailingLists" => in_mailing_lists = true,
967                        b"mailingList" if in_mailing_lists => {
968                            in_mailing_list = true;
969                            current_mailing_list_name = None;
970                            current_mailing_list_subscribe = None;
971                            current_mailing_list_unsubscribe = None;
972                            current_mailing_list_post = None;
973                            current_mailing_list_archive = None;
974                        }
975                        _ => {}
976                    }
977                }
978                Ok(Event::Text(e)) => {
979                    let text = match e.decode() {
980                        Ok(Cow::Borrowed(s)) => s.to_string(),
981                        Ok(Cow::Owned(s)) => s,
982                        Err(_) => {
983                            warn!(
984                                "Invalid UTF-8 in XML text content in {:?}; using lossy conversion",
985                                path
986                            );
987                            String::from_utf8_lossy(e.as_ref()).into_owned()
988                        }
989                    };
990                    let current_path = current_element.last().map(|v| v.as_slice());
991                    let current_parent = current_element
992                        .len()
993                        .checked_sub(2)
994                        .map(|index| current_element[index].as_slice());
995
996                    if in_properties
997                        && current_element.len() >= 2
998                        && current_element[current_element.len() - 2] == b"properties"
999                    {
1000                        if let Some(property_name) = current_element
1001                            .last()
1002                            .and_then(|name| std::str::from_utf8(name).ok())
1003                        {
1004                            properties.insert(property_name.to_string(), truncate_field(text));
1005                        } else {
1006                            warn!("Failed to decode Maven property name in {:?}", path);
1007                        }
1008                    } else if in_dep_mgmt_dependency {
1009                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
1010                            match current_path {
1011                                Some(b"groupId") if current_parent == Some(b"dependency") => {
1012                                    dep_mgmt.group_id = Some(text)
1013                                }
1014                                Some(b"artifactId") if current_parent == Some(b"dependency") => {
1015                                    dep_mgmt.artifact_id = Some(text)
1016                                }
1017                                Some(b"version") if current_parent == Some(b"dependency") => {
1018                                    dep_mgmt.version = Some(text)
1019                                }
1020                                Some(b"scope") if current_parent == Some(b"dependency") => {
1021                                    dep_mgmt.scope = Some(text)
1022                                }
1023                                Some(b"type") if current_parent == Some(b"dependency") => {
1024                                    dep_mgmt.type_ = Some(text)
1025                                }
1026                                Some(b"classifier") if current_parent == Some(b"dependency") => {
1027                                    dep_mgmt.classifier = Some(text)
1028                                }
1029                                Some(b"optional") if current_parent == Some(b"dependency") => {
1030                                    dep_mgmt.optional = Some(text)
1031                                }
1032                                _ => {}
1033                            }
1034                        }
1035                    } else if let Some(license) = &mut current_license {
1036                        match current_path {
1037                            Some(b"name") => license.name = Some(text),
1038                            Some(b"url") => license.url = Some(text),
1039                            Some(b"comments") => license.comments = Some(text),
1040                            _ => {}
1041                        }
1042                    } else if let Some(party) = &mut current_party {
1043                        match current_path {
1044                            Some(b"name") => party.name = Some(text),
1045                            Some(b"email") => party.email = Some(text),
1046                            Some(b"url") => party.url = Some(text),
1047                            Some(b"organization") => party.organization = Some(text),
1048                            Some(b"organizationUrl") => party.organization_url = Some(text),
1049                            Some(b"timezone") => party.timezone = Some(text),
1050                            _ => {}
1051                        }
1052                    } else if let Some(dep) = &mut current_dependency {
1053                        match current_path {
1054                            Some(b"groupId") => {
1055                                if current_parent == Some(b"dependency")
1056                                    && let Some(coords) = current_dependency_data.as_mut()
1057                                {
1058                                    coords.group_id = Some(text);
1059                                }
1060                            }
1061                            Some(b"artifactId") => {
1062                                if current_parent == Some(b"dependency")
1063                                    && let Some(coords) = current_dependency_data.as_mut()
1064                                {
1065                                    coords.artifact_id = Some(text);
1066                                }
1067                            }
1068                            Some(b"version") => {
1069                                if current_parent == Some(b"dependency")
1070                                    && let Some(coords) = current_dependency_data.as_mut()
1071                                {
1072                                    coords.version = Some(text);
1073                                }
1074                            }
1075                            Some(b"scope") => {
1076                                if current_parent == Some(b"dependency") {
1077                                    dep.scope = Some(text.clone());
1078                                    dep.is_optional = Some(text == "test" || text == "provided");
1079                                    dep.is_runtime = Some(text != "test" && text != "provided");
1080                                }
1081                                if current_parent == Some(b"dependency")
1082                                    && let Some(coords) = current_dependency_data.as_mut()
1083                                {
1084                                    coords.scope = Some(text);
1085                                }
1086                            }
1087                            Some(b"optional") => {
1088                                if current_parent == Some(b"dependency")
1089                                    && let Some(coords) = current_dependency_data.as_mut()
1090                                {
1091                                    coords.optional = Some(text);
1092                                }
1093                            }
1094                            Some(b"type") => {
1095                                if current_parent == Some(b"dependency")
1096                                    && let Some(coords) = current_dependency_data.as_mut()
1097                                {
1098                                    coords.type_ = Some(text);
1099                                }
1100                            }
1101                            Some(b"classifier") => {
1102                                if current_parent == Some(b"dependency")
1103                                    && let Some(coords) = current_dependency_data.as_mut()
1104                                {
1105                                    coords.classifier = Some(text);
1106                                }
1107                            }
1108                            Some(b"systemPath") => {
1109                                if current_parent == Some(b"dependency")
1110                                    && let Some(coords) = current_dependency_data.as_mut()
1111                                {
1112                                    coords.system_path = Some(text);
1113                                }
1114                            }
1115                            _ => {}
1116                        }
1117                    } else if in_relocation {
1118                        match current_path {
1119                            Some(b"groupId") => relocation.group_id = Some(text),
1120                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1121                            Some(b"version") => relocation.version = Some(text),
1122                            Some(b"classifier") => relocation.classifier = Some(text),
1123                            Some(b"type") => relocation.type_ = Some(text),
1124                            Some(b"message") => relocation.message = Some(text),
1125                            _ => {}
1126                        }
1127                    } else if in_parent {
1128                        match current_path {
1129                            Some(b"groupId") => {
1130                                parent_group_id = Some(text);
1131                            }
1132                            Some(b"artifactId") => {
1133                                parent_artifact_id = Some(text);
1134                            }
1135                            Some(b"version") => {
1136                                parent_version = Some(text);
1137                            }
1138                            Some(b"relativePath") => {
1139                                parent_relative_path = Some(text);
1140                            }
1141                            _ => {}
1142                        }
1143                    } else {
1144                        match current_path {
1145                            Some(b"groupId") if current_element.len() == 2 => {
1146                                package_data.namespace = Some(text)
1147                            }
1148                            Some(b"artifactId") if current_element.len() == 2 => {
1149                                package_data.name = Some(text)
1150                            }
1151                            Some(b"version") if current_element.len() == 2 => {
1152                                package_data.version = Some(text)
1153                            }
1154                            Some(b"name") if current_element.len() == 2 => {
1155                                project_name = Some(text)
1156                            }
1157                            Some(b"description") if current_element.len() == 2 => {
1158                                project_description = Some(text)
1159                            }
1160                            Some(b"packaging") if current_element.len() == 2 => {
1161                                project_packaging = Some(text)
1162                            }
1163                            Some(b"classifier") if current_element.len() == 2 => {
1164                                project_classifier = Some(text)
1165                            }
1166                            Some(b"url") if current_element.len() == 2 => {
1167                                package_data.homepage_url = Some(text)
1168                            }
1169                            Some(b"inceptionYear") if current_element.len() == 2 => {
1170                                inception_year = Some(text)
1171                            }
1172                            Some(b"connection")
1173                                if current_element.len() >= 3
1174                                    && current_element[current_element.len() - 2] == b"scm" =>
1175                            {
1176                                scm_connection = if text.starts_with("scm:git:") {
1177                                    Some(text.replacen("scm:git:", "git+", 1))
1178                                } else if text.starts_with("scm:") {
1179                                    Some(text.replacen("scm:", "", 1))
1180                                } else {
1181                                    Some(text)
1182                                };
1183                            }
1184                            Some(b"developerConnection")
1185                                if current_element.len() >= 3
1186                                    && current_element[current_element.len() - 2] == b"scm" =>
1187                            {
1188                                scm_developer_connection = if text.starts_with("scm:git:") {
1189                                    Some(text.replacen("scm:git:", "git+", 1))
1190                                } else if text.starts_with("scm:") {
1191                                    Some(text.replacen("scm:", "", 1))
1192                                } else {
1193                                    Some(text)
1194                                };
1195                            }
1196                            Some(b"url")
1197                                if current_element.len() >= 3
1198                                    && current_element[current_element.len() - 2] == b"scm" =>
1199                            {
1200                                scm_url = Some(text);
1201                            }
1202                            Some(b"tag")
1203                                if current_element.len() >= 3
1204                                    && current_element[current_element.len() - 2] == b"scm" =>
1205                            {
1206                                scm_tag = Some(text);
1207                            }
1208                            Some(b"name")
1209                                if current_element.len() >= 2
1210                                    && current_element[current_element.len() - 2]
1211                                        == b"organization" =>
1212                            {
1213                                organization_name = Some(text);
1214                            }
1215                            Some(b"url")
1216                                if current_element.len() >= 2
1217                                    && current_element[current_element.len() - 2]
1218                                        == b"organization" =>
1219                            {
1220                                organization_url = Some(text);
1221                            }
1222                            Some(b"system")
1223                                if current_element.len() >= 2
1224                                    && current_element[current_element.len() - 2]
1225                                        == b"issueManagement" =>
1226                            {
1227                                issue_management_system = Some(text);
1228                            }
1229                            Some(b"url")
1230                                if current_element.len() >= 2
1231                                    && current_element[current_element.len() - 2]
1232                                        == b"issueManagement" =>
1233                            {
1234                                issue_management_url = Some(text);
1235                            }
1236                            Some(b"system")
1237                                if current_element.len() >= 2
1238                                    && current_element[current_element.len() - 2]
1239                                        == b"ciManagement" =>
1240                            {
1241                                ci_management_system = Some(text);
1242                            }
1243                            Some(b"url")
1244                                if current_element.len() >= 2
1245                                    && current_element[current_element.len() - 2]
1246                                        == b"ciManagement" =>
1247                            {
1248                                ci_management_url = Some(text);
1249                            }
1250                            Some(b"downloadUrl")
1251                                if current_element.len() >= 2
1252                                    && current_element[current_element.len() - 2]
1253                                        == b"distributionManagement" =>
1254                            {
1255                                dist_download_url = Some(text);
1256                            }
1257                            Some(b"id") if in_dist_repository => {
1258                                dist_repository_id = Some(text);
1259                            }
1260                            Some(b"name") if in_dist_repository => {
1261                                dist_repository_name = Some(text);
1262                            }
1263                            Some(b"url") if in_dist_repository => {
1264                                dist_repository_url = Some(text);
1265                            }
1266                            Some(b"layout") if in_dist_repository => {
1267                                dist_repository_layout = Some(text);
1268                            }
1269                            Some(b"id") if in_dist_snapshot_repository => {
1270                                dist_snapshot_repository_id = Some(text);
1271                            }
1272                            Some(b"name") if in_dist_snapshot_repository => {
1273                                dist_snapshot_repository_name = Some(text);
1274                            }
1275                            Some(b"url") if in_dist_snapshot_repository => {
1276                                dist_snapshot_repository_url = Some(text);
1277                            }
1278                            Some(b"layout") if in_dist_snapshot_repository => {
1279                                dist_snapshot_repository_layout = Some(text);
1280                            }
1281                            Some(b"id") if in_dist_site => {
1282                                dist_site_id = Some(text);
1283                            }
1284                            Some(b"name") if in_dist_site => {
1285                                dist_site_name = Some(text);
1286                            }
1287                            Some(b"url") if in_dist_site => {
1288                                dist_site_url = Some(text);
1289                            }
1290                            Some(b"id") if in_repository => {
1291                                current_repository_id = Some(text);
1292                            }
1293                            Some(b"name") if in_repository => {
1294                                current_repository_name = Some(text);
1295                            }
1296                            Some(b"url") if in_repository => {
1297                                current_repository_url = Some(text);
1298                            }
1299                            Some(b"module") if in_modules => {
1300                                modules.push(text);
1301                            }
1302                            Some(b"name") if in_mailing_list => {
1303                                current_mailing_list_name = Some(text);
1304                            }
1305                            Some(b"subscribe") if in_mailing_list => {
1306                                current_mailing_list_subscribe = Some(text);
1307                            }
1308                            Some(b"unsubscribe") if in_mailing_list => {
1309                                current_mailing_list_unsubscribe = Some(text);
1310                            }
1311                            Some(b"post") if in_mailing_list => {
1312                                current_mailing_list_post = Some(text);
1313                            }
1314                            Some(b"archive") if in_mailing_list => {
1315                                current_mailing_list_archive = Some(text);
1316                            }
1317                            _ => {}
1318                        }
1319                    }
1320                }
1321                Ok(Event::Comment(e)) => {
1322                    let comment = match e.decode() {
1323                        Ok(Cow::Borrowed(s)) => s.trim().to_string(),
1324                        Ok(Cow::Owned(s)) => s.trim().to_string(),
1325                        Err(_) => {
1326                            warn!(
1327                                "Invalid UTF-8 in XML comment in {:?}; using lossy conversion",
1328                                path
1329                            );
1330                            String::from_utf8_lossy(e.as_ref())
1331                                .into_owned()
1332                                .trim()
1333                                .to_string()
1334                        }
1335                    };
1336                    if current_element.is_empty()
1337                        && !comment.is_empty()
1338                        && is_license_like_comment(&comment)
1339                    {
1340                        xml_license_comments.push(comment);
1341                    }
1342                }
1343                Ok(Event::End(e)) => {
1344                    if !current_element.is_empty() {
1345                        current_element.pop();
1346                    }
1347
1348                    match e.name().as_ref() {
1349                        b"parent" => in_parent = false,
1350                        b"dependencyManagement" => in_dependency_management = false,
1351                        b"dependencies" => in_dependencies = false,
1352                        b"dependency" if in_dep_mgmt_dependency => {
1353                            in_dep_mgmt_dependency = false;
1354                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1355                                && (dep_mgmt.group_id.is_some()
1356                                    || dep_mgmt.artifact_id.is_some()
1357                                    || dep_mgmt.version.is_some())
1358                            {
1359                                dependency_management_entries.push(dep_mgmt);
1360                            }
1361                        }
1362                        b"dependency" => {
1363                            if let (Some(dep), Some(coords)) =
1364                                (current_dependency.take(), current_dependency_data.take())
1365                            {
1366                                package_data.dependencies.push(dep);
1367                                dependency_data.push(coords);
1368                            } else if let Some(dep) = current_dependency.take() {
1369                                package_data.dependencies.push(dep);
1370                            }
1371                        }
1372                        b"license" => {
1373                            if let Some(license) = current_license.take()
1374                                && (license.name.is_some()
1375                                    || license.url.is_some()
1376                                    || license.comments.is_some())
1377                            {
1378                                licenses.push(license);
1379                            }
1380                        }
1381                        b"developers" => in_developers = false,
1382                        b"developer" => {
1383                            if let Some(party) = current_party.take() {
1384                                package_data.parties.push(party);
1385                            }
1386                        }
1387                        b"contributors" => in_contributors = false,
1388                        b"contributor" => {
1389                            if let Some(party) = current_party.take() {
1390                                package_data.parties.push(party);
1391                            }
1392                        }
1393                        b"distributionManagement" => in_distribution_management = false,
1394                        b"relocation" => in_relocation = false,
1395                        b"repository" if !in_dependencies && in_distribution_management => {
1396                            in_dist_repository = false
1397                        }
1398                        b"repository" if !in_dependencies && in_repositories => {
1399                            in_repository = false;
1400                            if current_repository_id.is_some()
1401                                || current_repository_name.is_some()
1402                                || current_repository_url.is_some()
1403                            {
1404                                let mut repo = serde_json::Map::new();
1405                                if let Some(id) = current_repository_id.take() {
1406                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1407                                }
1408                                if let Some(name) = current_repository_name.take() {
1409                                    repo.insert(
1410                                        "name".to_string(),
1411                                        serde_json::Value::String(name),
1412                                    );
1413                                }
1414                                if let Some(url) = current_repository_url.take() {
1415                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1416                                }
1417                                repositories.push(repo);
1418                            }
1419                        }
1420                        b"pluginRepository" if in_plugin_repositories => {
1421                            in_repository = false;
1422                            if current_repository_id.is_some()
1423                                || current_repository_name.is_some()
1424                                || current_repository_url.is_some()
1425                            {
1426                                let mut repo = serde_json::Map::new();
1427                                if let Some(id) = current_repository_id.take() {
1428                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1429                                }
1430                                if let Some(name) = current_repository_name.take() {
1431                                    repo.insert(
1432                                        "name".to_string(),
1433                                        serde_json::Value::String(name),
1434                                    );
1435                                }
1436                                if let Some(url) = current_repository_url.take() {
1437                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1438                                }
1439                                plugin_repositories.push(repo);
1440                            }
1441                        }
1442                        b"repositories" => in_repositories = false,
1443                        b"properties" => in_properties = false,
1444                        b"pluginRepositories" => in_plugin_repositories = false,
1445                        b"modules" => in_modules = false,
1446                        b"mailingLists" => in_mailing_lists = false,
1447                        b"mailingList" => {
1448                            in_mailing_list = false;
1449                            if current_mailing_list_name.is_some()
1450                                || current_mailing_list_subscribe.is_some()
1451                                || current_mailing_list_unsubscribe.is_some()
1452                                || current_mailing_list_post.is_some()
1453                                || current_mailing_list_archive.is_some()
1454                            {
1455                                let mut ml = serde_json::Map::new();
1456                                if let Some(name) = current_mailing_list_name.take() {
1457                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1458                                }
1459                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1460                                    ml.insert(
1461                                        "subscribe".to_string(),
1462                                        serde_json::Value::String(subscribe),
1463                                    );
1464                                }
1465                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1466                                    ml.insert(
1467                                        "unsubscribe".to_string(),
1468                                        serde_json::Value::String(unsubscribe),
1469                                    );
1470                                }
1471                                if let Some(post) = current_mailing_list_post.take() {
1472                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1473                                }
1474                                if let Some(archive) = current_mailing_list_archive.take() {
1475                                    ml.insert(
1476                                        "archive".to_string(),
1477                                        serde_json::Value::String(archive),
1478                                    );
1479                                }
1480                                mailing_lists.push(ml);
1481                            }
1482                        }
1483                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1484                        b"site" => in_dist_site = false,
1485                        _ => {}
1486                    }
1487                }
1488                Ok(Event::Eof) => break,
1489                Err(e) => {
1490                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1491                    return vec![package_data];
1492                }
1493                _ => {}
1494            }
1495            buf.clear();
1496        }
1497
1498        let builtins = build_builtin_properties(MavenBuiltinPropertyInputs {
1499            namespace: &package_data.namespace,
1500            name: &package_data.name,
1501            version: &package_data.version,
1502            parent_group_id: &parent_group_id,
1503            parent_artifact_id: &parent_artifact_id,
1504            parent_version: &parent_version,
1505            project_name: &project_name,
1506            project_packaging: &project_packaging,
1507        });
1508        let mut resolver = PropertyResolver::new(properties, builtins);
1509
1510        resolve_option(&mut resolver, &mut package_data.namespace);
1511        resolve_option(&mut resolver, &mut package_data.name);
1512        resolve_option(&mut resolver, &mut package_data.version);
1513        resolve_option(&mut resolver, &mut package_data.homepage_url);
1514        resolve_option(&mut resolver, &mut inception_year);
1515        resolve_option(&mut resolver, &mut scm_connection);
1516        resolve_option(&mut resolver, &mut scm_developer_connection);
1517        resolve_option(&mut resolver, &mut scm_url);
1518        resolve_option(&mut resolver, &mut scm_tag);
1519        resolve_option(&mut resolver, &mut organization_name);
1520        resolve_option(&mut resolver, &mut organization_url);
1521        resolve_option(&mut resolver, &mut issue_management_system);
1522        resolve_option(&mut resolver, &mut issue_management_url);
1523        resolve_option(&mut resolver, &mut ci_management_system);
1524        resolve_option(&mut resolver, &mut ci_management_url);
1525        resolve_option(&mut resolver, &mut dist_download_url);
1526        resolve_option(&mut resolver, &mut dist_repository_id);
1527        resolve_option(&mut resolver, &mut dist_repository_name);
1528        resolve_option(&mut resolver, &mut dist_repository_url);
1529        resolve_option(&mut resolver, &mut dist_repository_layout);
1530        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1531        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1532        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1533        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1534        resolve_option(&mut resolver, &mut dist_site_id);
1535        resolve_option(&mut resolver, &mut dist_site_name);
1536        resolve_option(&mut resolver, &mut dist_site_url);
1537        resolve_option(&mut resolver, &mut parent_group_id);
1538        resolve_option(&mut resolver, &mut parent_artifact_id);
1539        resolve_option(&mut resolver, &mut parent_version);
1540        resolve_option(&mut resolver, &mut parent_relative_path);
1541        resolve_option(&mut resolver, &mut project_name);
1542        resolve_option(&mut resolver, &mut project_description);
1543        resolve_option(&mut resolver, &mut project_packaging);
1544        resolve_option(&mut resolver, &mut project_classifier);
1545        resolve_vec(&mut resolver, &mut modules);
1546        resolve_maps(&mut resolver, &mut repositories);
1547        resolve_maps(&mut resolver, &mut plugin_repositories);
1548        resolve_maps(&mut resolver, &mut mailing_lists);
1549        for comment in &mut xml_license_comments {
1550            *comment = resolver.resolve_text(comment, 0);
1551        }
1552        for dependency in &mut dependency_management_entries {
1553            resolve_dependency_data(&mut resolver, dependency);
1554        }
1555        resolve_dependency_data(&mut resolver, &mut relocation);
1556        for license in &mut licenses {
1557            resolve_license_entry(&mut resolver, license);
1558        }
1559        for comment in xml_license_comments {
1560            if !comment.trim().is_empty() {
1561                licenses.push(MavenLicenseEntry {
1562                    comments: Some(comment),
1563                    ..Default::default()
1564                });
1565            }
1566        }
1567
1568        for (dependency, coords) in package_data
1569            .dependencies
1570            .iter_mut()
1571            .zip(dependency_data.iter_mut())
1572        {
1573            resolve_dependency_data(&mut resolver, coords);
1574            dependency.scope = coords.scope.clone();
1575            dependency.extracted_requirement = coords.version.clone();
1576            dependency.extra_data = dependency_extra_data(coords);
1577            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1578
1579            match dependency.scope.as_deref() {
1580                Some("test") | Some("provided") => {
1581                    dependency.is_runtime = Some(false);
1582                    dependency.is_optional = Some(true);
1583                }
1584                Some(_) => {
1585                    dependency.is_runtime = Some(true);
1586                }
1587                None => {
1588                    dependency.is_runtime = None;
1589                }
1590            }
1591
1592            if let Some(version) = &coords.version {
1593                dependency.is_pinned = Some(is_maven_version_pinned(version));
1594            }
1595
1596            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1597                dependency.purl = Some(build_maven_purl(
1598                    group_id,
1599                    artifact_id,
1600                    coords.version.as_deref(),
1601                    coords.classifier.as_deref(),
1602                    coords.type_.as_deref(),
1603                ));
1604            }
1605        }
1606
1607        if package_data.namespace.is_none() {
1608            package_data.namespace = parent_group_id.clone();
1609        }
1610        if package_data.version.is_none() {
1611            package_data.version = parent_version.clone();
1612        }
1613
1614        package_data.qualifiers =
1615            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1616
1617        package_data.description = match (
1618            project_name.as_deref().filter(|value| !value.is_empty()),
1619            project_description
1620                .as_deref()
1621                .filter(|value| !value.is_empty()),
1622        ) {
1623            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1624            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1625            (Some(name), None) => Some(name.to_string()),
1626            (None, Some(description)) => Some(description.to_string()),
1627            (None, None) => None,
1628        };
1629
1630        if path.to_string_lossy().contains("META-INF/maven/") {
1631            let path_str = path.to_string_lossy();
1632            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1633                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1634                let parts: Vec<&str> = after_maven.split('/').collect();
1635                if parts.len() >= 2 {
1636                    if package_data.namespace.is_none() {
1637                        package_data.namespace = Some(parts[0].to_string());
1638                    }
1639                    if package_data.name.is_none() {
1640                        package_data.name = Some(parts[1].to_string());
1641                    }
1642                }
1643            }
1644        }
1645
1646        // Construct PURL from parsed data
1647        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1648            &package_data.namespace,
1649            &package_data.name,
1650            &package_data.version,
1651        ) {
1652            package_data.purl = Some(build_maven_purl(
1653                group_id,
1654                artifact_id,
1655                Some(version),
1656                project_classifier.as_deref(),
1657                project_packaging.as_deref(),
1658            ));
1659            if project_classifier.is_none() {
1660                package_data
1661                    .source_packages
1662                    .push(build_maven_source_package(group_id, artifact_id, version));
1663            }
1664        }
1665
1666        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1667            package_data.repository_homepage_url = build_maven_url(
1668                &package_data.namespace,
1669                &package_data.name,
1670                &package_data.version,
1671                None,
1672            );
1673
1674            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1675                build_maven_download_url(
1676                    group_id,
1677                    artifact_id,
1678                    ver,
1679                    project_classifier.as_deref(),
1680                    project_packaging.as_deref(),
1681                )
1682            });
1683
1684            if let Some(ver) = &package_data.version {
1685                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1686                package_data.api_data_url = build_maven_url(
1687                    &package_data.namespace,
1688                    &package_data.name,
1689                    &package_data.version,
1690                    Some(&pom_filename),
1691                );
1692            }
1693        }
1694
1695        package_data.vcs_url = scm_connection
1696            .or_else(|| scm_developer_connection.clone())
1697            .or_else(|| scm_url.clone());
1698
1699        // Set code_view_url from scm/url (human-browseable URL)
1700        if let Some(url) = &scm_url {
1701            package_data.code_view_url = Some(url.clone());
1702        }
1703
1704        // Set bug_tracking_url from issueManagement/url
1705        if let Some(url) = &issue_management_url {
1706            package_data.bug_tracking_url = Some(url.clone());
1707        }
1708
1709        // Map downloadUrl to download_url field
1710        if let Some(url) = &dist_download_url {
1711            package_data.download_url = Some(url.clone());
1712        }
1713
1714        if organization_name.is_some() || organization_url.is_some() {
1715            package_data.parties.push(Party {
1716                r#type: Some("organization".to_string()),
1717                role: Some("owner".to_string()),
1718                name: organization_name.clone(),
1719                email: None,
1720                url: organization_url.clone(),
1721                organization: None,
1722                organization_url: None,
1723                timezone: None,
1724            });
1725        }
1726
1727        for dependency in &dependency_management_entries {
1728            if dependency.scope.as_deref() == Some("import")
1729                && let Some(import_dependency) =
1730                    maven_dependency_to_dependency(dependency, Some("import"), true)
1731            {
1732                package_data.dependencies.push(import_dependency);
1733            }
1734
1735            // Import-scoped BOMs carry two distinct facts in the declared POM:
1736            // this project explicitly imports that BOM, and the imported BOM
1737            // contributes managed constraints. Keep both normalized rows in the
1738            // ordinary dependency stream so generic dependency consumers can see
1739            // BOM provenance (`scope=import`) without inspecting Maven-specific
1740            // extra_data, while still getting the managed-constraint view
1741            // (`scope=dependencymanagement`).
1742            let mut dependency_management_copy = dependency.clone();
1743            dependency_management_copy.scope = Some("dependencymanagement".to_string());
1744
1745            if let Some(converted) = maven_dependency_to_dependency(
1746                &dependency_management_copy,
1747                Some("dependencymanagement"),
1748                true,
1749            ) {
1750                package_data.dependencies.push(converted);
1751            }
1752        }
1753
1754        if (relocation.group_id.is_some()
1755            || relocation.artifact_id.is_some()
1756            || relocation.version.is_some())
1757            && let Some(converted) =
1758                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1759        {
1760            package_data.dependencies.push(converted);
1761        }
1762
1763        if inception_year.is_some()
1764            || organization_name.is_some()
1765            || organization_url.is_some()
1766            || scm_tag.is_some()
1767            || scm_developer_connection.is_some()
1768            || issue_management_system.is_some()
1769            || ci_management_system.is_some()
1770            || ci_management_url.is_some()
1771            || dist_download_url.is_some()
1772            || dist_repository_id.is_some()
1773            || dist_snapshot_repository_id.is_some()
1774            || dist_site_id.is_some()
1775            || !repositories.is_empty()
1776            || !plugin_repositories.is_empty()
1777            || !modules.is_empty()
1778            || !mailing_lists.is_empty()
1779            || !dependency_management_entries.is_empty()
1780            || parent_group_id.is_some()
1781            || relocation.group_id.is_some()
1782            || relocation.artifact_id.is_some()
1783            || relocation.version.is_some()
1784            || relocation.message.is_some()
1785        {
1786            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1787            if let Some(year) = inception_year {
1788                extra_data.insert(
1789                    "inception_year".to_string(),
1790                    serde_json::Value::String(year),
1791                );
1792            }
1793            if let Some(name) = organization_name {
1794                extra_data.insert(
1795                    "organization_name".to_string(),
1796                    serde_json::Value::String(name),
1797                );
1798            }
1799            if let Some(url) = organization_url {
1800                extra_data.insert(
1801                    "organization_url".to_string(),
1802                    serde_json::Value::String(url),
1803                );
1804            }
1805            if let Some(tag) = scm_tag {
1806                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1807            }
1808            if let Some(dev_conn) = scm_developer_connection {
1809                extra_data.insert(
1810                    "scm_developer_connection".to_string(),
1811                    serde_json::Value::String(dev_conn),
1812                );
1813            }
1814            if let Some(system) = issue_management_system {
1815                extra_data.insert(
1816                    "issue_tracking_system".to_string(),
1817                    serde_json::Value::String(system),
1818                );
1819            }
1820            if let Some(system) = ci_management_system {
1821                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1822            }
1823            if let Some(url) = ci_management_url {
1824                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1825            }
1826
1827            // Add distribution management data
1828            if let Some(url) = dist_download_url {
1829                extra_data.insert(
1830                    "distribution_download_url".to_string(),
1831                    serde_json::Value::String(url),
1832                );
1833            }
1834
1835            // Build repository object
1836            if dist_repository_id.is_some()
1837                || dist_repository_name.is_some()
1838                || dist_repository_url.is_some()
1839                || dist_repository_layout.is_some()
1840            {
1841                let mut repo = serde_json::Map::new();
1842                if let Some(id) = dist_repository_id {
1843                    repo.insert("id".to_string(), serde_json::Value::String(id));
1844                }
1845                if let Some(name) = dist_repository_name {
1846                    repo.insert("name".to_string(), serde_json::Value::String(name));
1847                }
1848                if let Some(url) = dist_repository_url {
1849                    repo.insert("url".to_string(), serde_json::Value::String(url));
1850                }
1851                if let Some(layout) = dist_repository_layout {
1852                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1853                }
1854                extra_data.insert(
1855                    "distribution_repository".to_string(),
1856                    serde_json::Value::Object(repo),
1857                );
1858            }
1859
1860            // Build snapshotRepository object
1861            if dist_snapshot_repository_id.is_some()
1862                || dist_snapshot_repository_name.is_some()
1863                || dist_snapshot_repository_url.is_some()
1864                || dist_snapshot_repository_layout.is_some()
1865            {
1866                let mut repo = serde_json::Map::new();
1867                if let Some(id) = dist_snapshot_repository_id {
1868                    repo.insert("id".to_string(), serde_json::Value::String(id));
1869                }
1870                if let Some(name) = dist_snapshot_repository_name {
1871                    repo.insert("name".to_string(), serde_json::Value::String(name));
1872                }
1873                if let Some(url) = dist_snapshot_repository_url {
1874                    repo.insert("url".to_string(), serde_json::Value::String(url));
1875                }
1876                if let Some(layout) = dist_snapshot_repository_layout {
1877                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1878                }
1879                extra_data.insert(
1880                    "distribution_snapshot_repository".to_string(),
1881                    serde_json::Value::Object(repo),
1882                );
1883            }
1884
1885            // Build site object
1886            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1887                let mut site = serde_json::Map::new();
1888                if let Some(id) = dist_site_id {
1889                    site.insert("id".to_string(), serde_json::Value::String(id));
1890                }
1891                if let Some(name) = dist_site_name {
1892                    site.insert("name".to_string(), serde_json::Value::String(name));
1893                }
1894                if let Some(url) = dist_site_url {
1895                    site.insert("url".to_string(), serde_json::Value::String(url));
1896                }
1897                extra_data.insert(
1898                    "distribution_site".to_string(),
1899                    serde_json::Value::Object(site),
1900                );
1901            }
1902
1903            if !repositories.is_empty() {
1904                extra_data.insert(
1905                    "repositories".to_string(),
1906                    serde_json::Value::Array(
1907                        repositories
1908                            .into_iter()
1909                            .map(serde_json::Value::Object)
1910                            .collect(),
1911                    ),
1912                );
1913            }
1914
1915            if !plugin_repositories.is_empty() {
1916                extra_data.insert(
1917                    "plugin_repositories".to_string(),
1918                    serde_json::Value::Array(
1919                        plugin_repositories
1920                            .into_iter()
1921                            .map(serde_json::Value::Object)
1922                            .collect(),
1923                    ),
1924                );
1925            }
1926
1927            if !modules.is_empty() {
1928                extra_data.insert(
1929                    "modules".to_string(),
1930                    serde_json::Value::Array(
1931                        modules.into_iter().map(serde_json::Value::String).collect(),
1932                    ),
1933                );
1934            }
1935
1936            if !mailing_lists.is_empty() {
1937                extra_data.insert(
1938                    "mailing_lists".to_string(),
1939                    serde_json::Value::Array(
1940                        mailing_lists
1941                            .into_iter()
1942                            .map(serde_json::Value::Object)
1943                            .collect(),
1944                    ),
1945                );
1946            }
1947
1948            if !dependency_management_entries.is_empty() {
1949                extra_data.insert(
1950                    "dependency_management".to_string(),
1951                    serde_json::Value::Array(
1952                        dependency_management_entries
1953                            .into_iter()
1954                            .map(|dependency| {
1955                                serde_json::Value::Object(dependency_management_entry_to_value(
1956                                    &dependency,
1957                                ))
1958                            })
1959                            .collect(),
1960                    ),
1961                );
1962            }
1963
1964            if relocation.group_id.is_some()
1965                || relocation.artifact_id.is_some()
1966                || relocation.version.is_some()
1967                || relocation.message.is_some()
1968            {
1969                extra_data.insert(
1970                    "relocation".to_string(),
1971                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1972                );
1973            }
1974
1975            if parent_group_id.is_some()
1976                || parent_artifact_id.is_some()
1977                || parent_version.is_some()
1978                || parent_relative_path.is_some()
1979            {
1980                let mut parent_obj = serde_json::Map::new();
1981                if let Some(group_id) = parent_group_id {
1982                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1983                }
1984                if let Some(artifact_id) = parent_artifact_id {
1985                    parent_obj.insert(
1986                        "artifactId".to_string(),
1987                        serde_json::Value::String(artifact_id),
1988                    );
1989                }
1990                if let Some(version) = parent_version {
1991                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1992                }
1993                if let Some(relative_path) = parent_relative_path {
1994                    parent_obj.insert(
1995                        "relativePath".to_string(),
1996                        serde_json::Value::String(relative_path),
1997                    );
1998                }
1999                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
2000            }
2001
2002            package_data.extra_data = Some(extra_data);
2003        }
2004
2005        package_data.extracted_license_statement =
2006            build_license_statement(&licenses).map(truncate_field);
2007        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
2008            build_maven_declared_license_data(
2009                &licenses,
2010                package_data.extracted_license_statement.as_deref(),
2011            );
2012        package_data.declared_license_expression = declared_license_expression;
2013        package_data.declared_license_expression_spdx = declared_license_expression_spdx;
2014        package_data.license_detections = license_detections;
2015
2016        package_data.namespace = package_data.namespace.map(truncate_field);
2017        package_data.name = package_data.name.map(truncate_field);
2018        package_data.version = package_data.version.map(truncate_field);
2019        package_data.description = package_data.description.map(truncate_field);
2020        package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2021        package_data.vcs_url = package_data.vcs_url.map(truncate_field);
2022        package_data.purl = package_data.purl.map(truncate_field);
2023        package_data.code_view_url = package_data.code_view_url.map(truncate_field);
2024        package_data.bug_tracking_url = package_data.bug_tracking_url.map(truncate_field);
2025        package_data.download_url = package_data.download_url.map(truncate_field);
2026        package_data.repository_homepage_url =
2027            package_data.repository_homepage_url.map(truncate_field);
2028        package_data.repository_download_url =
2029            package_data.repository_download_url.map(truncate_field);
2030        package_data.api_data_url = package_data.api_data_url.map(truncate_field);
2031        for dep in &mut package_data.dependencies {
2032            dep.purl = dep.purl.take().map(truncate_field);
2033            dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2034        }
2035
2036        vec![package_data]
2037    }
2038
2039    fn is_match(path: &Path) -> bool {
2040        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
2041            filename == "pom.xml"
2042                || filename.ends_with(".pom.xml")
2043                || filename.ends_with("-pom.xml")
2044                || filename.ends_with("_pom.xml")
2045                || filename == "pom.properties"
2046                || filename == "MANIFEST.MF"
2047                || filename.ends_with(".pom")
2048        } else {
2049            false
2050        }
2051    }
2052}
2053
2054fn build_maven_url(
2055    group_id: &Option<String>,
2056    artifact_id: &Option<String>,
2057    version: &Option<String>,
2058    filename: Option<&str>,
2059) -> Option<String> {
2060    const BASE_URL: &str = "https://repo1.maven.org/maven2";
2061
2062    let group_id = group_id.as_ref()?;
2063    let artifact_id = artifact_id.as_ref()?;
2064
2065    let group_path = group_id.replace('.', "/");
2066    let filename_str = filename.unwrap_or("");
2067
2068    let url = if let Some(ver) = version {
2069        format!(
2070            "{}/{}/{}/{}/{}",
2071            BASE_URL, group_path, artifact_id, ver, filename_str
2072        )
2073    } else {
2074        format!(
2075            "{}/{}/{}/{}",
2076            BASE_URL, group_path, artifact_id, filename_str
2077        )
2078    };
2079
2080    Some(url)
2081}
2082
2083fn build_maven_declared_license_data(
2084    licenses: &[MavenLicenseEntry],
2085    matched_text: Option<&str>,
2086) -> (
2087    Option<String>,
2088    Option<String>,
2089    Vec<crate::models::LicenseDetection>,
2090) {
2091    let normalized: Vec<_> = licenses
2092        .iter()
2093        .filter_map(|license| license.name.as_deref())
2094        .filter_map(normalize_maven_license_name)
2095        .collect();
2096
2097    if normalized.is_empty() {
2098        return empty_declared_license_data();
2099    }
2100
2101    let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
2102        return empty_declared_license_data();
2103    };
2104
2105    build_declared_license_data(
2106        combined,
2107        DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
2108    )
2109}
2110
2111fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
2112    match name.trim() {
2113        "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
2114            "public-domain",
2115            "LicenseRef-provenant-public-domain",
2116        )),
2117        other => normalize_declared_license_key(other),
2118    }
2119}
2120
2121/// Parse pom.properties file (Java properties format)
2122fn parse_pom_properties(path: &Path) -> PackageData {
2123    let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2124        Ok(content) => content,
2125        Err(e) => {
2126            warn!("Failed to read pom.properties at {:?}: {}", path, e);
2127            return PackageData {
2128                package_type: Some(PackageType::Maven),
2129                primary_language: Some("Java".to_string()),
2130                datasource_id: Some(DatasourceId::MavenPomProperties),
2131                ..Default::default()
2132            };
2133        }
2134    };
2135
2136    let mut package_data = default_package_data(DatasourceId::MavenPomProperties);
2137    package_data.package_type = Some(PackageType::Maven);
2138    package_data.primary_language = Some("Java".to_string());
2139    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
2140
2141    let mut group_id: Option<String> = None;
2142    let mut artifact_id: Option<String> = None;
2143    let mut version: Option<String> = None;
2144
2145    // Parse Java properties format
2146    let mut continuation = String::new();
2147
2148    for line in content.lines() {
2149        let current_line = if continuation.is_empty() {
2150            line.to_string()
2151        } else {
2152            format!("{}{}", continuation, line)
2153        };
2154        continuation.clear();
2155
2156        // Check for line continuation (backslash at end)
2157        if current_line.ends_with('\\') {
2158            continuation = current_line[..current_line.len() - 1].to_string();
2159            continue;
2160        }
2161
2162        // Skip comments and empty lines
2163        let trimmed = current_line.trim();
2164        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2165            continue;
2166        }
2167
2168        // Parse key=value
2169        if let Some(eq_pos) = current_line.find('=') {
2170            let key = current_line[..eq_pos].trim();
2171            let value = current_line[eq_pos + 1..].trim();
2172
2173            match key {
2174                "groupId" => group_id = Some(value.to_string()),
2175                "artifactId" => artifact_id = Some(value.to_string()),
2176                "version" => version = Some(value.to_string()),
2177                _ => {}
2178            }
2179        }
2180    }
2181
2182    package_data.namespace = group_id.map(truncate_field);
2183    package_data.name = artifact_id.map(truncate_field);
2184    package_data.version = version.map(truncate_field);
2185
2186    // Generate PURL
2187    if let (Some(group_id), Some(artifact_id), Some(version)) = (
2188        &package_data.namespace,
2189        &package_data.name,
2190        &package_data.version,
2191    ) {
2192        package_data.purl = Some(truncate_field(format!(
2193            "pkg:maven/{}/{}@{}",
2194            group_id, artifact_id, version
2195        )));
2196    }
2197
2198    package_data
2199}
2200
2201/// Parse MANIFEST.MF file (JAR manifest format)
2202///
2203/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
2204/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
2205/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
2206/// dependencies.
2207fn parse_manifest_mf(path: &Path) -> PackageData {
2208    let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2209        Ok(content) => content,
2210        Err(e) => {
2211            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2212            return default_package_data(DatasourceId::JavaJarManifest);
2213        }
2214    };
2215
2216    let mut package_data = default_package_data(DatasourceId::JavaJarManifest);
2217
2218    // Parse manifest headers (RFC822-style with space continuations)
2219    let mut headers: Vec<(String, String)> = Vec::new();
2220    let mut current_key: Option<String> = None;
2221    let mut current_value = String::new();
2222
2223    for line in content.lines() {
2224        if line.starts_with(' ') || line.starts_with('\t') {
2225            // Continuation line
2226            current_value.push_str(line.trim());
2227        } else if let Some(colon_pos) = line.find(':') {
2228            // Save previous header
2229            if let Some(key) = current_key.take() {
2230                headers.push((key, current_value.trim().to_string()));
2231                current_value.clear();
2232            }
2233
2234            // Start new header
2235            let key = line[..colon_pos].trim().to_string();
2236            let value = line[colon_pos + 1..].trim().to_string();
2237            current_key = Some(key);
2238            current_value = value;
2239        }
2240    }
2241
2242    // Save last header
2243    if let Some(key) = current_key {
2244        headers.push((key, current_value.trim().to_string()));
2245    }
2246
2247    // Convert headers to HashMap for easier lookup
2248    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2249
2250    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2251    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2252    let is_osgi = bundle_symbolic_name.is_some();
2253
2254    if is_osgi {
2255        // OSGi bundle - extract OSGi-specific metadata
2256        package_data.package_type = Some(PackageType::Osgi);
2257        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2258
2259        // Bundle-SymbolicName is the canonical name for OSGi bundles
2260        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2261        if let Some(bsn) = bundle_symbolic_name {
2262            let name = if let Some(semicolon_pos) = bsn.find(';') {
2263                bsn[..semicolon_pos].trim().to_string()
2264            } else {
2265                bsn.clone()
2266            };
2267            package_data.name = Some(name);
2268        }
2269
2270        // Bundle-Version
2271        package_data.version = headers_map.get("Bundle-Version").cloned();
2272
2273        // Bundle-Description takes priority over Bundle-Name for description
2274        if let Some(desc) = headers_map.get("Bundle-Description") {
2275            package_data.description = Some(desc.clone());
2276        } else if let Some(name) = headers_map.get("Bundle-Name") {
2277            package_data.description = Some(name.clone());
2278        }
2279
2280        // Bundle-Vendor
2281        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2282            package_data.parties.push(Party {
2283                r#type: Some("organization".to_string()),
2284                role: Some("vendor".to_string()),
2285                name: Some(vendor.clone()),
2286                email: None,
2287                url: None,
2288                organization: None,
2289                organization_url: None,
2290                timezone: None,
2291            });
2292        }
2293
2294        // Bundle-DocURL
2295        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2296
2297        // Bundle-License
2298        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2299
2300        // Import-Package -> dependencies with scope "import"
2301        if let Some(import_pkg) = headers_map.get("Import-Package") {
2302            let deps = parse_osgi_package_list(import_pkg, "import");
2303            package_data.dependencies.extend(deps);
2304        }
2305
2306        // Require-Bundle -> dependencies with scope "require-bundle"
2307        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2308            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2309            package_data.dependencies.extend(deps);
2310        }
2311
2312        // Export-Package -> store in extra_data
2313        if let Some(export_pkg) = headers_map.get("Export-Package") {
2314            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2315            extra_data.insert(
2316                "export_packages".to_string(),
2317                serde_json::Value::String(export_pkg.clone()),
2318            );
2319            package_data.extra_data = Some(extra_data);
2320        }
2321
2322        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2323        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2324            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2325        }
2326    } else {
2327        // Regular JAR manifest
2328        package_data.package_type = Some(PackageType::Maven);
2329        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2330
2331        // Extract fields with priority order for non-OSGi JARs
2332        let mut name: Option<String> = None;
2333        let mut version: Option<String> = None;
2334        let mut vendor: Option<String> = None;
2335
2336        for (key, value) in &headers {
2337            match key.as_str() {
2338                "Bundle-Name" if name.is_none() => {
2339                    name = Some(value.clone());
2340                }
2341                "Implementation-Title" if name.is_none() => {
2342                    name = Some(value.clone());
2343                }
2344                "Bundle-Version" if version.is_none() => {
2345                    version = Some(value.clone());
2346                }
2347                "Implementation-Version" if version.is_none() => {
2348                    version = Some(value.clone());
2349                }
2350                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2351                    vendor = Some(value.clone());
2352                }
2353                _ => {}
2354            }
2355        }
2356
2357        package_data.name = name;
2358        package_data.version = version;
2359
2360        // Add vendor to parties if present
2361        if let Some(vendor_name) = vendor {
2362            package_data.parties.push(Party {
2363                r#type: Some("organization".to_string()),
2364                role: Some("vendor".to_string()),
2365                name: Some(vendor_name),
2366                email: None,
2367                url: None,
2368                organization: None,
2369                organization_url: None,
2370                timezone: None,
2371            });
2372        }
2373
2374        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2375        if let Some(path_str) = path.to_str()
2376            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2377        {
2378            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2379            let parts: Vec<&str> = after_maven.split('/').collect();
2380            if parts.len() >= 2 {
2381                package_data.namespace = Some(parts[0].to_string());
2382            }
2383        }
2384
2385        // Generate Maven PURL if we have enough information
2386        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2387            &package_data.namespace,
2388            &package_data.name,
2389            &package_data.version,
2390        ) {
2391            package_data.purl = Some(format!(
2392                "pkg:maven/{}/{}@{}",
2393                group_id, artifact_id, version
2394            ));
2395        } else if package_data.name.is_none() && package_data.version.is_none() {
2396            // A bare MANIFEST.MF without Maven coordinates or implementation
2397            // identity is only evidence of a generic JAR manifest, not a Maven
2398            // package. Keep the Java manifest datasource so assembly can still
2399            // merge richer sibling metadata when present.
2400            package_data.package_type = Some(PackageType::Jar);
2401        }
2402    }
2403
2404    package_data.name = package_data.name.map(truncate_field);
2405    package_data.version = package_data.version.map(truncate_field);
2406    package_data.namespace = package_data.namespace.map(truncate_field);
2407    package_data.description = package_data.description.map(truncate_field);
2408    package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2409    package_data.extracted_license_statement =
2410        package_data.extracted_license_statement.map(truncate_field);
2411    package_data.purl = package_data.purl.map(truncate_field);
2412    for dep in &mut package_data.dependencies {
2413        dep.purl = dep.purl.take().map(truncate_field);
2414        dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2415    }
2416
2417    package_data
2418}
2419
2420/// Parse OSGi Import-Package header into dependencies.
2421///
2422/// Format: comma-separated list of packages with optional directives:
2423/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2424pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2425    let mut dependencies = Vec::new();
2426
2427    // Split by comma, but be careful not to split within quoted strings
2428    for package_entry in split_osgi_list(package_list)
2429        .into_iter()
2430        .take(MAX_ITERATION_COUNT)
2431    {
2432        let package_entry = package_entry.trim();
2433        if package_entry.is_empty() {
2434            continue;
2435        }
2436
2437        // Extract package name (before first semicolon)
2438        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2439            package_entry[..semicolon_pos].trim()
2440        } else {
2441            package_entry
2442        };
2443
2444        if package_name.is_empty() {
2445            continue;
2446        }
2447
2448        // Extract version directive if present
2449        let version_requirement = extract_osgi_version(package_entry);
2450        let is_optional = package_entry.contains("resolution:=optional");
2451
2452        dependencies.push(Dependency {
2453            purl: Some(format!("pkg:osgi/{}", package_name)),
2454            extracted_requirement: version_requirement,
2455            scope: Some(scope.to_string()),
2456            is_runtime: Some(true),
2457            is_optional: Some(is_optional),
2458            is_pinned: None,
2459            is_direct: Some(true),
2460            resolved_package: None,
2461            extra_data: None,
2462        });
2463    }
2464
2465    dependencies
2466}
2467
2468/// Parse OSGi Require-Bundle header into dependencies.
2469///
2470/// Format: comma-separated list of bundle symbolic names with optional directives:
2471/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2472pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2473    let mut dependencies = Vec::new();
2474
2475    for bundle_entry in split_osgi_list(bundle_list)
2476        .into_iter()
2477        .take(MAX_ITERATION_COUNT)
2478    {
2479        let bundle_entry = bundle_entry.trim();
2480        if bundle_entry.is_empty() {
2481            continue;
2482        }
2483
2484        // Extract bundle symbolic name (before first semicolon)
2485        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2486            bundle_entry[..semicolon_pos].trim()
2487        } else {
2488            bundle_entry
2489        };
2490
2491        if bundle_name.is_empty() {
2492            continue;
2493        }
2494
2495        // Extract bundle-version directive if present
2496        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2497
2498        // Check if optional
2499        let is_optional = bundle_entry.contains("resolution:=optional");
2500
2501        dependencies.push(Dependency {
2502            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2503            extracted_requirement: version_requirement,
2504            scope: Some(scope.to_string()),
2505            is_runtime: Some(!is_optional),
2506            is_optional: Some(is_optional),
2507            is_pinned: None,
2508            is_direct: Some(true),
2509            resolved_package: None,
2510            extra_data: None,
2511        });
2512    }
2513
2514    dependencies
2515}
2516
2517/// Split OSGi comma-separated list, respecting quoted strings.
2518///
2519/// OSGi headers can contain commas within quoted strings:
2520/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2521pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2522    let mut result = Vec::new();
2523    let mut current = String::new();
2524    let mut in_quotes = false;
2525
2526    for ch in list.chars() {
2527        match ch {
2528            '"' => {
2529                in_quotes = !in_quotes;
2530                current.push(ch);
2531            }
2532            ',' if !in_quotes => {
2533                if !current.trim().is_empty() {
2534                    result.push(current.trim().to_string());
2535                }
2536                current.clear();
2537            }
2538            _ => {
2539                current.push(ch);
2540            }
2541        }
2542    }
2543
2544    if !current.trim().is_empty() {
2545        result.push(current.trim().to_string());
2546    }
2547
2548    result
2549}
2550
2551fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2552    let needle = format!("{}=", directive);
2553    let version_pos = entry.find(&needle)?;
2554    let after_value = &entry[version_pos + needle.len()..];
2555
2556    if let Some(stripped) = after_value.strip_prefix('"') {
2557        stripped.find('"').map(|end| stripped[..end].to_string())
2558    } else {
2559        let end = after_value.find(';').unwrap_or(after_value.len());
2560        Some(after_value[..end].trim().to_string())
2561    }
2562}
2563
2564pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2565    extract_osgi_directive(entry, "version")
2566}
2567
2568pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2569    extract_osgi_directive(entry, "bundle-version")
2570}
2571
2572fn default_package_data(datasource_id: DatasourceId) -> PackageData {
2573    PackageData {
2574        package_type: Some(PackageType::Maven),
2575        datasource_id: Some(datasource_id),
2576        ..Default::default()
2577    }
2578}
2579
2580#[cfg(test)]
2581mod tests {
2582    use super::*;
2583    use std::fs;
2584    use std::path::PathBuf;
2585    use tempfile::TempDir;
2586
2587    #[test]
2588    fn test_maven_parser_matches_underscore_pom_suffix() {
2589        assert!(MavenParser::is_match(&PathBuf::from("dbwebx_pom.xml")));
2590    }
2591
2592    #[test]
2593    fn test_maven_parser_extracts_parent_derived_identity_from_underscore_pom_fixture() {
2594        let fixture = PathBuf::from(
2595            "reference/scancode-toolkit/tests/packagedcode/data/maven2/dbwebx_pom/dbwebx_pom.xml",
2596        );
2597        if !fixture.exists() {
2598            return;
2599        }
2600
2601        let package_data = MavenParser::extract_first_package(&fixture);
2602
2603        assert_eq!(package_data.namespace.as_deref(), Some("org.dbwebx"));
2604        assert_eq!(package_data.name.as_deref(), Some("tools"));
2605        assert_eq!(package_data.version.as_deref(), Some("0.0.1.SNAPSHOT"));
2606        assert_eq!(
2607            package_data.purl.as_deref(),
2608            Some("pkg:maven/org.dbwebx/tools@0.0.1.SNAPSHOT")
2609        );
2610    }
2611
2612    #[test]
2613    fn test_organization_extraction() {
2614        let temp_dir = TempDir::new().unwrap();
2615        let pom_path = temp_dir.path().join("pom.xml");
2616
2617        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2618<project>
2619    <modelVersion>4.0.0</modelVersion>
2620    <groupId>com.example</groupId>
2621    <artifactId>my-app</artifactId>
2622    <version>1.0.0</version>
2623    <organization>
2624        <name>Example Corporation</name>
2625        <url>https://example.com</url>
2626    </organization>
2627</project>"#;
2628
2629        fs::write(&pom_path, pom_content).unwrap();
2630
2631        let package_data = MavenParser::extract_first_package(&pom_path);
2632
2633        assert_eq!(package_data.name, Some("my-app".to_string()));
2634        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2635        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2636
2637        let extra_data = package_data.extra_data.unwrap();
2638        assert_eq!(
2639            extra_data.get("organization_name"),
2640            Some(&serde_json::Value::String(
2641                "Example Corporation".to_string()
2642            ))
2643        );
2644        assert_eq!(
2645            extra_data.get("organization_url"),
2646            Some(&serde_json::Value::String(
2647                "https://example.com".to_string()
2648            ))
2649        );
2650    }
2651
2652    #[test]
2653    fn test_scm_metadata_extraction() {
2654        let temp_dir = TempDir::new().unwrap();
2655        let pom_path = temp_dir.path().join("pom.xml");
2656
2657        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2658<project xmlns="http://maven.apache.org/POM/4.0.0"
2659         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2660         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2661    <modelVersion>4.0.0</modelVersion>
2662    <groupId>org.springframework.boot</groupId>
2663    <artifactId>spring-boot-starter-web</artifactId>
2664    <version>3.0.0</version>
2665    <scm>
2666        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2667        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2668        <url>https://github.com/spring-projects/spring-boot</url>
2669        <tag>v3.0.0</tag>
2670    </scm>
2671</project>"#;
2672
2673        fs::write(&pom_path, pom_content).unwrap();
2674
2675        let package_data = MavenParser::extract_first_package(&pom_path);
2676
2677        assert_eq!(
2678            package_data.name,
2679            Some("spring-boot-starter-web".to_string())
2680        );
2681        assert_eq!(
2682            package_data.namespace,
2683            Some("org.springframework.boot".to_string())
2684        );
2685        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2686
2687        assert_eq!(
2688            package_data.code_view_url,
2689            Some("https://github.com/spring-projects/spring-boot".to_string())
2690        );
2691
2692        // vcs_url prefers connection over developerConnection
2693        assert_eq!(
2694            package_data.vcs_url,
2695            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2696        );
2697
2698        let extra_data = package_data.extra_data.unwrap();
2699        assert_eq!(
2700            extra_data.get("scm_tag"),
2701            Some(&serde_json::Value::String("v3.0.0".to_string()))
2702        );
2703        // developerConnection stored separately in extra_data
2704        assert_eq!(
2705            extra_data.get("scm_developer_connection"),
2706            Some(&serde_json::Value::String(
2707                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2708            ))
2709        );
2710    }
2711
2712    #[test]
2713    fn test_developers_and_contributors_extraction() {
2714        let temp_dir = TempDir::new().unwrap();
2715        let pom_path = temp_dir.path().join("pom.xml");
2716
2717        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2718<project xmlns="http://maven.apache.org/POM/4.0.0"
2719         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2720         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2721    <modelVersion>4.0.0</modelVersion>
2722    <groupId>com.example</groupId>
2723    <artifactId>test-app</artifactId>
2724    <version>1.0.0</version>
2725    <developers>
2726        <developer>
2727            <id>jdoe</id>
2728            <name>John Doe</name>
2729            <email>john@example.com</email>
2730            <url>https://example.com/jdoe</url>
2731            <organization>Example Corp</organization>
2732            <organizationUrl>https://example.com</organizationUrl>
2733            <timezone>America/New_York</timezone>
2734        </developer>
2735        <developer>
2736            <name>Jane Smith</name>
2737            <email>jane@example.com</email>
2738        </developer>
2739    </developers>
2740    <contributors>
2741        <contributor>
2742            <name>Bob Wilson</name>
2743            <email>bob@example.com</email>
2744            <url>https://example.com/bob</url>
2745        </contributor>
2746    </contributors>
2747</project>"#;
2748
2749        fs::write(&pom_path, pom_content).unwrap();
2750
2751        let package_data = MavenParser::extract_first_package(&pom_path);
2752
2753        assert_eq!(package_data.name, Some("test-app".to_string()));
2754        assert_eq!(package_data.parties.len(), 3);
2755
2756        let dev1 = &package_data.parties[0];
2757        assert_eq!(dev1.r#type, Some("person".to_string()));
2758        assert_eq!(dev1.role, Some("developer".to_string()));
2759        assert_eq!(dev1.name, Some("John Doe".to_string()));
2760        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2761        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2762        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2763        assert_eq!(
2764            dev1.organization_url,
2765            Some("https://example.com".to_string())
2766        );
2767        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2768
2769        let dev2 = &package_data.parties[1];
2770        assert_eq!(dev2.r#type, Some("person".to_string()));
2771        assert_eq!(dev2.role, Some("developer".to_string()));
2772        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2773        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2774
2775        let contrib = &package_data.parties[2];
2776        assert_eq!(contrib.r#type, Some("person".to_string()));
2777        assert_eq!(contrib.role, Some("contributor".to_string()));
2778        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2779        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2780        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2781    }
2782
2783    #[test]
2784    fn test_issue_management_extraction() {
2785        let temp_dir = TempDir::new().unwrap();
2786        let pom_path = temp_dir.path().join("pom.xml");
2787
2788        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2789<project xmlns="http://maven.apache.org/POM/4.0.0"
2790         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2791         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2792    <modelVersion>4.0.0</modelVersion>
2793    <groupId>com.example</groupId>
2794    <artifactId>test-app</artifactId>
2795    <version>1.0.0</version>
2796    <issueManagement>
2797        <system>GitHub</system>
2798        <url>https://github.com/example/test-app/issues</url>
2799    </issueManagement>
2800</project>"#;
2801
2802        fs::write(&pom_path, pom_content).unwrap();
2803
2804        let package_data = MavenParser::extract_first_package(&pom_path);
2805
2806        assert_eq!(package_data.name, Some("test-app".to_string()));
2807        assert_eq!(
2808            package_data.bug_tracking_url,
2809            Some("https://github.com/example/test-app/issues".to_string())
2810        );
2811
2812        let extra_data = package_data.extra_data.unwrap();
2813        assert_eq!(
2814            extra_data.get("issue_tracking_system"),
2815            Some(&serde_json::Value::String("GitHub".to_string()))
2816        );
2817    }
2818
2819    #[test]
2820    fn test_ci_management_extraction() {
2821        let temp_dir = TempDir::new().unwrap();
2822        let pom_path = temp_dir.path().join("pom.xml");
2823
2824        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2825<project xmlns="http://maven.apache.org/POM/4.0.0"
2826         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2827         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2828    <modelVersion>4.0.0</modelVersion>
2829    <groupId>com.example</groupId>
2830    <artifactId>test-app</artifactId>
2831    <version>1.0.0</version>
2832    <ciManagement>
2833        <system>Jenkins</system>
2834        <url>https://ci.example.com/job/test-app</url>
2835    </ciManagement>
2836</project>"#;
2837
2838        fs::write(&pom_path, pom_content).unwrap();
2839
2840        let package_data = MavenParser::extract_first_package(&pom_path);
2841
2842        assert_eq!(package_data.name, Some("test-app".to_string()));
2843
2844        let extra_data = package_data.extra_data.unwrap();
2845        assert_eq!(
2846            extra_data.get("ci_system"),
2847            Some(&serde_json::Value::String("Jenkins".to_string()))
2848        );
2849        assert_eq!(
2850            extra_data.get("ci_url"),
2851            Some(&serde_json::Value::String(
2852                "https://ci.example.com/job/test-app".to_string()
2853            ))
2854        );
2855    }
2856
2857    #[test]
2858    fn test_distribution_management_extraction() {
2859        let temp_dir = TempDir::new().unwrap();
2860        let pom_path = temp_dir.path().join("pom.xml");
2861
2862        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2863<project xmlns="http://maven.apache.org/POM/4.0.0"
2864         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2865         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2866    <modelVersion>4.0.0</modelVersion>
2867    <groupId>com.example</groupId>
2868    <artifactId>test-app</artifactId>
2869    <version>1.0.0</version>
2870    <distributionManagement>
2871        <downloadUrl>https://example.com/downloads</downloadUrl>
2872        <repository>
2873            <id>releases</id>
2874            <name>Release Repository</name>
2875            <url>https://repo.example.com/releases</url>
2876            <layout>default</layout>
2877        </repository>
2878        <snapshotRepository>
2879            <id>snapshots</id>
2880            <name>Snapshot Repository</name>
2881            <url>https://repo.example.com/snapshots</url>
2882            <layout>default</layout>
2883        </snapshotRepository>
2884        <site>
2885            <id>site-deploy</id>
2886            <name>Project Site</name>
2887            <url>https://example.com/site</url>
2888        </site>
2889    </distributionManagement>
2890</project>"#;
2891
2892        fs::write(&pom_path, pom_content).unwrap();
2893
2894        let package_data = MavenParser::extract_first_package(&pom_path);
2895
2896        assert_eq!(package_data.name, Some("test-app".to_string()));
2897        assert_eq!(
2898            package_data.download_url,
2899            Some("https://example.com/downloads".to_string())
2900        );
2901
2902        let extra_data = package_data.extra_data.unwrap();
2903
2904        assert_eq!(
2905            extra_data.get("distribution_download_url"),
2906            Some(&serde_json::Value::String(
2907                "https://example.com/downloads".to_string()
2908            ))
2909        );
2910
2911        let repo = extra_data
2912            .get("distribution_repository")
2913            .unwrap()
2914            .as_object()
2915            .unwrap();
2916        assert_eq!(
2917            repo.get("id"),
2918            Some(&serde_json::Value::String("releases".to_string()))
2919        );
2920        assert_eq!(
2921            repo.get("name"),
2922            Some(&serde_json::Value::String("Release Repository".to_string()))
2923        );
2924        assert_eq!(
2925            repo.get("url"),
2926            Some(&serde_json::Value::String(
2927                "https://repo.example.com/releases".to_string()
2928            ))
2929        );
2930        assert_eq!(
2931            repo.get("layout"),
2932            Some(&serde_json::Value::String("default".to_string()))
2933        );
2934
2935        let snapshot_repo = extra_data
2936            .get("distribution_snapshot_repository")
2937            .unwrap()
2938            .as_object()
2939            .unwrap();
2940        assert_eq!(
2941            snapshot_repo.get("id"),
2942            Some(&serde_json::Value::String("snapshots".to_string()))
2943        );
2944        assert_eq!(
2945            snapshot_repo.get("name"),
2946            Some(&serde_json::Value::String(
2947                "Snapshot Repository".to_string()
2948            ))
2949        );
2950        assert_eq!(
2951            snapshot_repo.get("url"),
2952            Some(&serde_json::Value::String(
2953                "https://repo.example.com/snapshots".to_string()
2954            ))
2955        );
2956        assert_eq!(
2957            snapshot_repo.get("layout"),
2958            Some(&serde_json::Value::String("default".to_string()))
2959        );
2960
2961        let site = extra_data
2962            .get("distribution_site")
2963            .unwrap()
2964            .as_object()
2965            .unwrap();
2966        assert_eq!(
2967            site.get("id"),
2968            Some(&serde_json::Value::String("site-deploy".to_string()))
2969        );
2970        assert_eq!(
2971            site.get("name"),
2972            Some(&serde_json::Value::String("Project Site".to_string()))
2973        );
2974        assert_eq!(
2975            site.get("url"),
2976            Some(&serde_json::Value::String(
2977                "https://example.com/site".to_string()
2978            ))
2979        );
2980    }
2981}
2982
2983crate::register_parser!(
2984    "Apache Maven POM",
2985    &[
2986        "**/*.pom",
2987        "**/pom.xml",
2988        "**/pom.properties",
2989        "**/META-INF/MANIFEST.MF"
2990    ],
2991    "maven",
2992    "Java",
2993    Some("https://maven.apache.org/pom.html"),
2994);