Skip to main content

provenant/parsers/
maven.rs

1//! Parser for Apache Maven pom.xml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Maven Project Object Model (POM) files.
5//!
6//! # Supported Formats
7//! - pom.xml (Project Object Model)
8//! - pom.properties
9//! - MANIFEST.MF (JAR manifest)
10//!
11//! # Key Features
12//! - Property value substitution (`${project.version}`)
13//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
14//! - Dependency scope handling (compile, test, provided, runtime, system)
15//! - Package URL (purl) generation
16//! - Multiple license support (combined with " OR ")
17//!
18//! # Implementation Notes
19//! - Uses quick-xml for XML parsing
20//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
21//! - Property substitution limited to prevent infinite loops
22//! - Direct dependencies: all in pom.xml are direct
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use crate::parsers::utils::read_file_to_string;
27use quick_xml::Reader;
28use quick_xml::events::Event;
29use std::borrow::Cow;
30use std::collections::{HashMap, HashSet};
31use std::path::Path;
32
33use super::PackageParser;
34use super::license_normalization::{
35    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
36    combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
37};
38
39#[derive(Clone, Default)]
40struct MavenDependencyData {
41    group_id: Option<String>,
42    artifact_id: Option<String>,
43    version: Option<String>,
44    classifier: Option<String>,
45    type_: Option<String>,
46    scope: Option<String>,
47    optional: Option<String>,
48    system_path: Option<String>,
49    message: Option<String>,
50}
51
52#[derive(Clone, Default)]
53struct MavenLicenseEntry {
54    name: Option<String>,
55    url: Option<String>,
56    comments: Option<String>,
57}
58
59/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
60///
61/// Maven properties can reference other properties, creating dependency graphs. This resolver:
62/// - Resolves nested placeholders: `${outer.${inner}}`
63/// - Detects circular references: `${a}` → `${b}` → `${a}`
64/// - Enforces depth limits to prevent stack overflow
65/// - Enforces substitution limits to prevent DoS on pathological inputs
66///
67/// # Algorithm
68///
69/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
70/// - `resolving_set`: For cycle detection (hash set lookup)
71/// - `resolving_stack`: For error reporting (preserves path)
72/// - `cache`: Memoizes resolved values to avoid redundant work
73struct PropertyResolver {
74    raw: HashMap<String, String>,
75    builtins: HashMap<String, String>,
76    cache: HashMap<String, String>,
77    resolving_set: HashSet<String>,
78    resolving_stack: Vec<String>,
79    max_depth: usize,
80    max_output_len: usize,
81    max_substitutions: usize,
82    warned_keys: HashSet<String>,
83}
84
85impl PropertyResolver {
86    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
87        Self {
88            raw,
89            builtins,
90            cache: HashMap::new(),
91            resolving_set: HashSet::new(),
92            resolving_stack: Vec::new(),
93            max_depth: 10,
94            max_output_len: 100_000,
95            max_substitutions: 1000,
96            warned_keys: HashSet::new(),
97        }
98    }
99
100    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
101        if let Some(value) = self.cache.get(key) {
102            return Some(value.clone());
103        }
104
105        if depth >= self.max_depth {
106            self.warn_once(
107                "depth",
108                key,
109                format!("Maven property depth limit hit resolving {key}"),
110            );
111            return None;
112        }
113
114        if self.resolving_set.contains(key) {
115            if self
116                .resolving_stack
117                .last()
118                .is_some_and(|current| current == key)
119            {
120                return None;
121            }
122
123            self.warn_once(
124                "cycle",
125                key,
126                format!(
127                    "Maven property cycle detected at {key}: {:?}",
128                    self.resolving_stack
129                ),
130            );
131            return None;
132        }
133
134        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
135            value.clone()
136        } else {
137            return None;
138        };
139
140        self.resolving_set.insert(key.to_string());
141        self.resolving_stack.push(key.to_string());
142
143        let resolved = self.resolve_text(&raw_val, depth + 1);
144
145        self.resolving_stack.pop();
146        self.resolving_set.remove(key);
147
148        self.cache.insert(key.to_string(), resolved.clone());
149        Some(resolved)
150    }
151
152    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
153        if !text.contains("${") {
154            return text.to_string();
155        }
156
157        if depth >= self.max_depth {
158            warn!("Maven property depth limit hit resolving text");
159            return text.to_string();
160        }
161
162        let bytes = text.as_bytes();
163        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
164        let mut index = 0;
165        let mut substitutions = 0;
166
167        while index < bytes.len() {
168            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
169                if substitutions >= self.max_substitutions {
170                    warn!("Maven property substitution limit hit resolving {text}");
171                    return text.to_string();
172                }
173
174                let placeholder_start = index;
175                let Some((content, closing_index)) =
176                    self.parse_placeholder_content(text, index + 2)
177                else {
178                    warn!("Maven property malformed placeholder in {text}");
179                    return text.to_string();
180                };
181
182                substitutions += 1;
183                let resolved_key = if content.contains("${") {
184                    self.resolve_text(content, depth + 1)
185                } else {
186                    content.to_string()
187                };
188
189                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
190                    if output.len() + resolved.len() > self.max_output_len {
191                        warn!("Maven property output length limit hit resolving {text}");
192                        return text.to_string();
193                    }
194                    output.extend_from_slice(resolved.as_bytes());
195                } else {
196                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
197                    if output.len() + placeholder_bytes.len() > self.max_output_len {
198                        warn!("Maven property output length limit hit resolving {text}");
199                        return text.to_string();
200                    }
201                    output.extend_from_slice(placeholder_bytes);
202                }
203
204                index = closing_index + 1;
205                continue;
206            }
207
208            if output.len() + 1 > self.max_output_len {
209                warn!("Maven property output length limit hit resolving {text}");
210                return text.to_string();
211            }
212
213            output.push(bytes[index]);
214            index += 1;
215        }
216
217        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
218    }
219
220    fn parse_placeholder_content<'a>(
221        &self,
222        text: &'a str,
223        start_index: usize,
224    ) -> Option<(&'a str, usize)> {
225        let bytes = text.as_bytes();
226        let mut index = start_index;
227        let mut depth = 0;
228
229        while index < bytes.len() {
230            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
231                depth += 1;
232                index += 2;
233                continue;
234            }
235
236            if bytes[index] == b'}' {
237                if depth == 0 {
238                    return Some((&text[start_index..index], index));
239                }
240                depth -= 1;
241            }
242
243            index += 1;
244        }
245
246        None
247    }
248
249    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
250        let token = format!("{kind}:{key}");
251        if self.warned_keys.insert(token) {
252            warn!("{message}");
253        }
254    }
255}
256
257fn sanitize_template_directives(content: &str) -> Cow<'_, str> {
258    if !content.contains("<%") {
259        return Cow::Borrowed(content);
260    }
261
262    let mut sanitized = String::with_capacity(content.len());
263    let mut remaining = content;
264
265    while let Some(start) = remaining.find("<%") {
266        let (before, after_start) = remaining.split_at(start);
267        sanitized.push_str(before);
268
269        let Some(end) = after_start.find("%>") else {
270            return Cow::Borrowed(content);
271        };
272
273        let directive = &after_start[..end + 2];
274        for ch in directive.chars() {
275            if matches!(ch, '\n' | '\r') {
276                sanitized.push(ch);
277            } else {
278                sanitized.push(' ');
279            }
280        }
281
282        remaining = &after_start[end + 2..];
283    }
284
285    sanitized.push_str(remaining);
286    Cow::Owned(sanitized)
287}
288
289fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
290    if let Some(current) = value.clone() {
291        *value = Some(resolver.resolve_text(&current, 0));
292    }
293}
294
295fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
296    for value in values.iter_mut() {
297        *value = resolver.resolve_text(value, 0);
298    }
299}
300
301fn resolve_map_strings(
302    resolver: &mut PropertyResolver,
303    values: &mut serde_json::Map<String, serde_json::Value>,
304) {
305    for value in values.values_mut() {
306        if let serde_json::Value::String(current) = value {
307            let resolved = resolver.resolve_text(current, 0);
308            *current = resolved;
309        }
310    }
311}
312
313fn resolve_maps(
314    resolver: &mut PropertyResolver,
315    values: &mut [serde_json::Map<String, serde_json::Value>],
316) {
317    for value in values.iter_mut() {
318        resolve_map_strings(resolver, value);
319    }
320}
321
322fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
323    resolve_option(resolver, &mut dependency.group_id);
324    resolve_option(resolver, &mut dependency.artifact_id);
325    resolve_option(resolver, &mut dependency.version);
326    resolve_option(resolver, &mut dependency.classifier);
327    resolve_option(resolver, &mut dependency.type_);
328    resolve_option(resolver, &mut dependency.scope);
329    resolve_option(resolver, &mut dependency.optional);
330    resolve_option(resolver, &mut dependency.system_path);
331    resolve_option(resolver, &mut dependency.message);
332}
333
334fn parse_maven_bool(value: Option<&str>) -> bool {
335    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
336}
337
338fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
339    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
340        Some(
341            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
342            | "tar.gz" | "war" | "xar" | "zip",
343        ) => packaging.map(str::trim),
344        Some(_) => Some("jar"),
345        None => None,
346    }
347}
348
349fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
350    resolve_option(resolver, &mut license.name);
351    resolve_option(resolver, &mut license.url);
352    resolve_option(resolver, &mut license.comments);
353}
354
355fn build_maven_qualifiers(
356    classifier: Option<&str>,
357    packaging: Option<&str>,
358) -> Option<HashMap<String, String>> {
359    let mut qualifiers = HashMap::new();
360
361    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
362        qualifiers.insert("classifier".to_string(), classifier.to_string());
363    }
364
365    if let Some(packaging) = normalize_maven_packaging(packaging)
366        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
367    {
368        qualifiers.insert("type".to_string(), packaging.to_string());
369    }
370
371    (!qualifiers.is_empty()).then_some(qualifiers)
372}
373
374fn build_maven_purl(
375    group_id: &str,
376    artifact_id: &str,
377    version: Option<&str>,
378    classifier: Option<&str>,
379    packaging: Option<&str>,
380) -> String {
381    let mut purl = format!(
382        "pkg:maven/{}/{}",
383        percent_encode_purl_component(group_id),
384        percent_encode_purl_component(artifact_id)
385    );
386
387    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
388        purl.push('@');
389        purl.push_str(&percent_encode_purl_component(version));
390    }
391
392    let qualifiers = build_maven_qualifiers(classifier, packaging);
393    if let Some(qualifiers) = qualifiers {
394        let mut query_parts = Vec::new();
395        if let Some(classifier) = qualifiers.get("classifier") {
396            query_parts.push(format!(
397                "classifier={}",
398                percent_encode_purl_component(classifier)
399            ));
400        }
401        if let Some(type_) = qualifiers.get("type") {
402            query_parts.push(format!("type={}", percent_encode_purl_component(type_)));
403        }
404
405        if !query_parts.is_empty() {
406            purl.push('?');
407            purl.push_str(&query_parts.join("&"));
408        }
409    }
410
411    purl
412}
413
414fn percent_encode_purl_component(value: &str) -> String {
415    let mut encoded = String::with_capacity(value.len());
416
417    for byte in value.bytes() {
418        match byte {
419            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
420                encoded.push(byte as char);
421            }
422            _ => encoded.push_str(&format!("%{byte:02X}")),
423        }
424    }
425
426    encoded
427}
428
429fn build_maven_download_url(
430    group_id: &str,
431    artifact_id: &str,
432    version: &str,
433    classifier: Option<&str>,
434    packaging: Option<&str>,
435) -> String {
436    const BASE_URL: &str = "https://repo1.maven.org/maven2";
437    let group_path = group_id.replace('.', "/");
438    let extension = normalize_maven_packaging(packaging)
439        .filter(|value| *value != "pom")
440        .unwrap_or("jar");
441    let classifier_suffix = classifier
442        .map(str::trim)
443        .filter(|value| !value.is_empty())
444        .map(|value| format!("-{value}"))
445        .unwrap_or_default();
446
447    format!(
448        "{}/{}/{}/{}/{}-{}{}.{}",
449        BASE_URL,
450        group_path,
451        artifact_id,
452        version,
453        artifact_id,
454        version,
455        classifier_suffix,
456        extension
457    )
458}
459
460fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
461    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
462}
463
464fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
465    let rendered_entries: Vec<String> = licenses
466        .iter()
467        .filter_map(|license| {
468            let mut lines = Vec::new();
469
470            if let Some(name) = license
471                .name
472                .as_ref()
473                .filter(|value| !value.trim().is_empty())
474            {
475                lines.push(format!("    name: {name}"));
476            }
477            if let Some(url) = license
478                .url
479                .as_ref()
480                .filter(|value| !value.trim().is_empty())
481            {
482                lines.push(format!("    url: {url}"));
483            }
484            if let Some(comments) = license
485                .comments
486                .as_ref()
487                .filter(|value| !value.trim().is_empty())
488            {
489                lines.push(format!("    comments: {comments}"));
490            }
491
492            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
493        })
494        .collect();
495
496    if rendered_entries.is_empty() {
497        None
498    } else {
499        Some(format!("{}\n", rendered_entries.join("\n")))
500    }
501}
502
503fn is_license_like_comment(comment: &str) -> bool {
504    let lowered = comment.to_ascii_lowercase();
505    [
506        "license",
507        "licensed",
508        "copyright",
509        "spdx",
510        "apache",
511        "mit",
512        "bsd",
513        "gpl",
514        "lgpl",
515        "mozilla public",
516        "eclipse public",
517    ]
518    .iter()
519    .any(|marker| lowered.contains(marker))
520}
521
522fn dependency_extra_data(
523    dependency: &MavenDependencyData,
524) -> Option<HashMap<String, serde_json::Value>> {
525    let mut extra_data = HashMap::new();
526
527    if let Some(classifier) = dependency
528        .classifier
529        .as_ref()
530        .filter(|value| !value.trim().is_empty())
531    {
532        extra_data.insert(
533            "classifier".to_string(),
534            serde_json::Value::String(classifier.clone()),
535        );
536    }
537    if let Some(type_) = dependency
538        .type_
539        .as_ref()
540        .filter(|value| !value.trim().is_empty())
541    {
542        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
543    }
544    if let Some(system_path) = dependency
545        .system_path
546        .as_ref()
547        .filter(|value| !value.trim().is_empty())
548    {
549        extra_data.insert(
550            "system_path".to_string(),
551            serde_json::Value::String(system_path.clone()),
552        );
553    }
554    if let Some(message) = dependency
555        .message
556        .as_ref()
557        .filter(|value| !value.trim().is_empty())
558    {
559        extra_data.insert(
560            "message".to_string(),
561            serde_json::Value::String(message.clone()),
562        );
563    }
564
565    (!extra_data.is_empty()).then_some(extra_data)
566}
567
568fn dependency_management_entry_to_value(
569    dependency: &MavenDependencyData,
570) -> serde_json::Map<String, serde_json::Value> {
571    let mut dep_obj = serde_json::Map::new();
572
573    if let Some(group_id) = dependency.group_id.as_ref() {
574        dep_obj.insert(
575            "groupId".to_string(),
576            serde_json::Value::String(group_id.clone()),
577        );
578    }
579    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
580        dep_obj.insert(
581            "artifactId".to_string(),
582            serde_json::Value::String(artifact_id.clone()),
583        );
584    }
585    if let Some(version) = dependency.version.as_ref() {
586        dep_obj.insert(
587            "version".to_string(),
588            serde_json::Value::String(version.clone()),
589        );
590    }
591    if let Some(scope) = dependency.scope.as_ref() {
592        dep_obj.insert(
593            "scope".to_string(),
594            serde_json::Value::String(scope.clone()),
595        );
596    }
597    if let Some(type_) = dependency.type_.as_ref() {
598        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
599    }
600    if let Some(classifier) = dependency.classifier.as_ref() {
601        dep_obj.insert(
602            "classifier".to_string(),
603            serde_json::Value::String(classifier.clone()),
604        );
605    }
606    if let Some(optional) = dependency.optional.as_deref() {
607        dep_obj.insert(
608            "optional".to_string(),
609            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
610        );
611    }
612    if let Some(message) = dependency.message.as_ref() {
613        dep_obj.insert(
614            "message".to_string(),
615            serde_json::Value::String(message.clone()),
616        );
617    }
618
619    dep_obj
620}
621
622fn maven_dependency_to_dependency(
623    dependency_data: &MavenDependencyData,
624    fallback_scope: Option<&str>,
625    force_non_runtime: bool,
626) -> Option<Dependency> {
627    let group_id = dependency_data.group_id.as_ref()?;
628    let artifact_id = dependency_data.artifact_id.as_ref()?;
629    let version = dependency_data.version.clone();
630    let scope = dependency_data
631        .scope
632        .clone()
633        .or_else(|| fallback_scope.map(str::to_string));
634    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
635
636    let (is_runtime, is_optional) = if force_non_runtime {
637        (Some(false), Some(explicit_optional))
638    } else {
639        match scope.as_deref() {
640            Some("test") | Some("provided") => (Some(false), Some(true)),
641            Some(_) => (Some(true), Some(explicit_optional)),
642            None => (None, Some(explicit_optional)),
643        }
644    };
645
646    Some(Dependency {
647        purl: Some(build_maven_purl(
648            group_id,
649            artifact_id,
650            version.as_deref(),
651            dependency_data.classifier.as_deref(),
652            dependency_data.type_.as_deref(),
653        )),
654        extracted_requirement: version.clone(),
655        scope,
656        is_runtime,
657        is_optional,
658        is_pinned: version.as_deref().map(is_maven_version_pinned),
659        is_direct: Some(true),
660        resolved_package: None,
661        extra_data: dependency_extra_data(dependency_data),
662    })
663}
664
665/// Determines if a Maven version specifier is pinned to an exact version.
666///
667/// A version is considered pinned if it specifies an exact version without
668/// range syntax or dynamic keywords. Examples:
669/// - Pinned: "1.0.0", "1.2.3"
670/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
671fn is_maven_version_pinned(version_str: &str) -> bool {
672    let trimmed = version_str.trim();
673
674    // Empty version is not pinned
675    if trimmed.is_empty() {
676        return false;
677    }
678
679    // Check for range syntax (brackets and parentheses)
680    if trimmed.contains('[')
681        || trimmed.contains(']')
682        || trimmed.contains('(')
683        || trimmed.contains(')')
684    {
685        return false;
686    }
687
688    // Check for dynamic version keywords
689    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
690        return false;
691    }
692
693    // If none of the unpinned indicators are present, it's pinned
694    true
695}
696
697struct MavenBuiltinPropertyInputs<'a> {
698    namespace: &'a Option<String>,
699    name: &'a Option<String>,
700    version: &'a Option<String>,
701    parent_group_id: &'a Option<String>,
702    parent_artifact_id: &'a Option<String>,
703    parent_version: &'a Option<String>,
704    project_name: &'a Option<String>,
705    project_packaging: &'a Option<String>,
706}
707
708fn build_builtin_properties(inputs: MavenBuiltinPropertyInputs<'_>) -> HashMap<String, String> {
709    let mut builtins = HashMap::new();
710    let effective_group_id = inputs
711        .namespace
712        .clone()
713        .or_else(|| inputs.parent_group_id.clone());
714    let effective_version = inputs
715        .version
716        .clone()
717        .or_else(|| inputs.parent_version.clone());
718
719    if let Some(group_id) = effective_group_id.clone() {
720        builtins.insert("project.groupId".to_string(), group_id.clone());
721        builtins.insert("pom.groupId".to_string(), group_id);
722    }
723
724    if let Some(artifact_id) = inputs.name.clone() {
725        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
726        builtins.insert("pom.artifactId".to_string(), artifact_id);
727    }
728
729    if let Some(ver) = effective_version.clone() {
730        builtins.insert("project.version".to_string(), ver.clone());
731        builtins.insert("pom.version".to_string(), ver);
732    }
733
734    if let Some(group_id) = inputs.parent_group_id.clone() {
735        builtins.insert("project.parent.groupId".to_string(), group_id);
736    }
737
738    if let Some(artifact_id) = inputs.parent_artifact_id.clone() {
739        builtins.insert("project.parent.artifactId".to_string(), artifact_id.clone());
740        builtins.insert("pom.parent.artifactId".to_string(), artifact_id.clone());
741        builtins.insert("parent.artifactId".to_string(), artifact_id);
742    }
743
744    if let Some(ver) = inputs.parent_version.clone() {
745        builtins.insert("project.parent.version".to_string(), ver.clone());
746        builtins.insert("pom.parent.version".to_string(), ver.clone());
747        builtins.insert("parent.version".to_string(), ver);
748    }
749
750    if let Some(packaging) = inputs.project_packaging.clone() {
751        builtins.insert("project.packaging".to_string(), packaging);
752    }
753
754    if let Some(name) = inputs.project_name.clone() {
755        builtins.insert("project.name".to_string(), name);
756    }
757
758    builtins
759}
760
761/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
762///
763/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
764/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
765pub struct MavenParser;
766
767impl PackageParser for MavenParser {
768    const PACKAGE_TYPE: PackageType = PackageType::Maven;
769
770    fn extract_packages(path: &Path) -> Vec<PackageData> {
771        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
772            if filename == "pom.properties" {
773                return vec![parse_pom_properties(path)];
774            } else if filename == "MANIFEST.MF" {
775                return vec![parse_manifest_mf(path)];
776            }
777        }
778
779        let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
780            Ok(content) => content,
781            Err(e) => {
782                warn!("Failed to open pom.xml at {:?}: {}", path, e);
783                return vec![default_package_data(DatasourceId::MavenPom)];
784            }
785        };
786
787        let sanitized_content = sanitize_template_directives(&content);
788        let mut reader = Reader::from_str(sanitized_content.as_ref());
789        reader.config_mut().trim_text(true);
790
791        let mut buf = Vec::new();
792        let mut package_data = default_package_data(DatasourceId::MavenPom);
793        package_data.package_type = Some(Self::PACKAGE_TYPE);
794        package_data.primary_language = Some("Java".to_string());
795        package_data.datasource_id = Some(DatasourceId::MavenPom);
796
797        let mut current_element = Vec::new();
798        let mut in_dependencies = false;
799        let mut current_dependency: Option<Dependency> = None;
800        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
801        let mut current_dependency_data: Option<MavenDependencyData> = None;
802
803        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
804        let mut xml_license_comments: Vec<String> = Vec::new();
805        let mut current_license: Option<MavenLicenseEntry> = None;
806        let mut inception_year = None;
807        let mut scm_connection = None;
808        let mut scm_developer_connection = None;
809        let mut scm_url = None;
810        let mut scm_tag = None;
811        let mut organization_name = None;
812        let mut organization_url = None;
813        let mut in_developers = false;
814        let mut in_contributors = false;
815        let mut current_party: Option<Party> = None;
816        let mut issue_management_system = None;
817        let mut issue_management_url = None;
818        let mut ci_management_system = None;
819        let mut ci_management_url = None;
820        let mut in_distribution_management = false;
821        let mut in_dist_repository = false;
822        let mut in_dist_snapshot_repository = false;
823        let mut in_dist_site = false;
824        let mut dist_download_url = None;
825        let mut dist_repository_id = None;
826        let mut dist_repository_name = None;
827        let mut dist_repository_url = None;
828        let mut dist_repository_layout = None;
829        let mut dist_snapshot_repository_id = None;
830        let mut dist_snapshot_repository_name = None;
831        let mut dist_snapshot_repository_url = None;
832        let mut dist_snapshot_repository_layout = None;
833        let mut dist_site_id = None;
834        let mut dist_site_name = None;
835        let mut dist_site_url = None;
836        let mut in_repositories = false;
837        let mut in_plugin_repositories = false;
838        let mut in_repository = false;
839        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
840        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
841        let mut current_repository_id = None;
842        let mut current_repository_name = None;
843        let mut current_repository_url = None;
844        let mut in_modules = false;
845        let mut modules: Vec<String> = Vec::new();
846        let mut in_mailing_lists = false;
847        let mut in_mailing_list = false;
848        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
849        let mut current_mailing_list_name = None;
850        let mut current_mailing_list_subscribe = None;
851        let mut current_mailing_list_unsubscribe = None;
852        let mut current_mailing_list_post = None;
853        let mut current_mailing_list_archive = None;
854        let mut in_dependency_management = false;
855        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
856        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
857        let mut in_dep_mgmt_dependency = false;
858        let mut in_parent = false;
859        let mut parent_group_id = None;
860        let mut parent_artifact_id = None;
861        let mut parent_version = None;
862        let mut parent_relative_path = None;
863        let mut in_properties = false;
864        let mut properties: HashMap<String, String> = HashMap::new();
865        let mut project_name = None;
866        let mut project_description = None;
867        let mut project_packaging = None;
868        let mut project_classifier = None;
869        let mut in_relocation = false;
870        let mut relocation = MavenDependencyData::default();
871
872        loop {
873            match reader.read_event_into(&mut buf) {
874                Ok(Event::Start(e)) => {
875                    let element_name = e.name().as_ref().to_vec();
876                    current_element.push(element_name.clone());
877
878                    match element_name.as_slice() {
879                        b"parent" => in_parent = true,
880                        b"dependencyManagement" => in_dependency_management = true,
881                        b"dependencies" if in_dependency_management => {}
882                        b"dependencies" => in_dependencies = true,
883                        b"dependency" if in_dependency_management => {
884                            in_dep_mgmt_dependency = true;
885                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
886                        }
887                        b"dependency" if in_dependencies => {
888                            current_dependency = Some(Dependency {
889                                purl: None,
890                                extracted_requirement: None,
891                                scope: None,
892                                is_runtime: None,
893                                is_optional: Some(false),
894                                is_pinned: None,
895                                is_direct: Some(true),
896                                resolved_package: None,
897                                extra_data: None,
898                            });
899                            current_dependency_data = Some(MavenDependencyData::default());
900                        }
901                        b"properties" => in_properties = true,
902                        b"developers" => in_developers = true,
903                        b"developer" if in_developers => {
904                            current_party = Some(Party {
905                                r#type: Some("person".to_string()),
906                                role: Some("developer".to_string()),
907                                name: None,
908                                email: None,
909                                url: None,
910                                organization: None,
911                                organization_url: None,
912                                timezone: None,
913                            });
914                        }
915                        b"contributors" => in_contributors = true,
916                        b"contributor" if in_contributors => {
917                            current_party = Some(Party {
918                                r#type: Some("person".to_string()),
919                                role: Some("contributor".to_string()),
920                                name: None,
921                                email: None,
922                                url: None,
923                                organization: None,
924                                organization_url: None,
925                                timezone: None,
926                            });
927                        }
928                        b"license" => current_license = Some(MavenLicenseEntry::default()),
929                        b"distributionManagement" => in_distribution_management = true,
930                        b"relocation" if in_distribution_management => {
931                            in_relocation = true;
932                            relocation = MavenDependencyData::default();
933                        }
934                        b"repository" if in_distribution_management => in_dist_repository = true,
935                        b"snapshotRepository" if in_distribution_management => {
936                            in_dist_snapshot_repository = true
937                        }
938                        b"site" if in_distribution_management => in_dist_site = true,
939                        b"repositories" => in_repositories = true,
940                        b"pluginRepositories" => in_plugin_repositories = true,
941                        b"repository" if in_repositories && !in_distribution_management => {
942                            in_repository = true;
943                            current_repository_id = None;
944                            current_repository_name = None;
945                            current_repository_url = None;
946                        }
947                        b"pluginRepository" if in_plugin_repositories => {
948                            in_repository = true;
949                            current_repository_id = None;
950                            current_repository_name = None;
951                            current_repository_url = None;
952                        }
953                        b"modules" => in_modules = true,
954                        b"mailingLists" => in_mailing_lists = true,
955                        b"mailingList" if in_mailing_lists => {
956                            in_mailing_list = true;
957                            current_mailing_list_name = None;
958                            current_mailing_list_subscribe = None;
959                            current_mailing_list_unsubscribe = None;
960                            current_mailing_list_post = None;
961                            current_mailing_list_archive = None;
962                        }
963                        _ => {}
964                    }
965                }
966                Ok(Event::Text(e)) => {
967                    let text = e.decode().unwrap_or_default().to_string();
968                    let current_path = current_element.last().map(|v| v.as_slice());
969                    let current_parent = current_element
970                        .len()
971                        .checked_sub(2)
972                        .map(|index| current_element[index].as_slice());
973
974                    if in_properties
975                        && current_element.len() >= 2
976                        && current_element[current_element.len() - 2] == b"properties"
977                    {
978                        if let Some(property_name) = current_element
979                            .last()
980                            .and_then(|name| std::str::from_utf8(name).ok())
981                        {
982                            properties.insert(property_name.to_string(), text);
983                        } else {
984                            warn!("Failed to decode Maven property name in {:?}", path);
985                        }
986                    } else if in_dep_mgmt_dependency {
987                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
988                            match current_path {
989                                Some(b"groupId") if current_parent == Some(b"dependency") => {
990                                    dep_mgmt.group_id = Some(text)
991                                }
992                                Some(b"artifactId") if current_parent == Some(b"dependency") => {
993                                    dep_mgmt.artifact_id = Some(text)
994                                }
995                                Some(b"version") if current_parent == Some(b"dependency") => {
996                                    dep_mgmt.version = Some(text)
997                                }
998                                Some(b"scope") if current_parent == Some(b"dependency") => {
999                                    dep_mgmt.scope = Some(text)
1000                                }
1001                                Some(b"type") if current_parent == Some(b"dependency") => {
1002                                    dep_mgmt.type_ = Some(text)
1003                                }
1004                                Some(b"classifier") if current_parent == Some(b"dependency") => {
1005                                    dep_mgmt.classifier = Some(text)
1006                                }
1007                                Some(b"optional") if current_parent == Some(b"dependency") => {
1008                                    dep_mgmt.optional = Some(text)
1009                                }
1010                                _ => {}
1011                            }
1012                        }
1013                    } else if let Some(license) = &mut current_license {
1014                        match current_path {
1015                            Some(b"name") => license.name = Some(text),
1016                            Some(b"url") => license.url = Some(text),
1017                            Some(b"comments") => license.comments = Some(text),
1018                            _ => {}
1019                        }
1020                    } else if let Some(party) = &mut current_party {
1021                        match current_path {
1022                            Some(b"name") => party.name = Some(text),
1023                            Some(b"email") => party.email = Some(text),
1024                            Some(b"url") => party.url = Some(text),
1025                            Some(b"organization") => party.organization = Some(text),
1026                            Some(b"organizationUrl") => party.organization_url = Some(text),
1027                            Some(b"timezone") => party.timezone = Some(text),
1028                            _ => {}
1029                        }
1030                    } else if let Some(dep) = &mut current_dependency {
1031                        match current_path {
1032                            Some(b"groupId") => {
1033                                if current_parent == Some(b"dependency")
1034                                    && let Some(coords) = current_dependency_data.as_mut()
1035                                {
1036                                    coords.group_id = Some(text);
1037                                }
1038                            }
1039                            Some(b"artifactId") => {
1040                                if current_parent == Some(b"dependency")
1041                                    && let Some(coords) = current_dependency_data.as_mut()
1042                                {
1043                                    coords.artifact_id = Some(text);
1044                                }
1045                            }
1046                            Some(b"version") => {
1047                                if current_parent == Some(b"dependency")
1048                                    && let Some(coords) = current_dependency_data.as_mut()
1049                                {
1050                                    coords.version = Some(text);
1051                                }
1052                            }
1053                            Some(b"scope") => {
1054                                if current_parent == Some(b"dependency") {
1055                                    dep.scope = Some(text.clone());
1056                                    dep.is_optional = Some(text == "test" || text == "provided");
1057                                    dep.is_runtime = Some(text != "test" && text != "provided");
1058                                }
1059                                if current_parent == Some(b"dependency")
1060                                    && let Some(coords) = current_dependency_data.as_mut()
1061                                {
1062                                    coords.scope = Some(text);
1063                                }
1064                            }
1065                            Some(b"optional") => {
1066                                if current_parent == Some(b"dependency")
1067                                    && let Some(coords) = current_dependency_data.as_mut()
1068                                {
1069                                    coords.optional = Some(text);
1070                                }
1071                            }
1072                            Some(b"type") => {
1073                                if current_parent == Some(b"dependency")
1074                                    && let Some(coords) = current_dependency_data.as_mut()
1075                                {
1076                                    coords.type_ = Some(text);
1077                                }
1078                            }
1079                            Some(b"classifier") => {
1080                                if current_parent == Some(b"dependency")
1081                                    && let Some(coords) = current_dependency_data.as_mut()
1082                                {
1083                                    coords.classifier = Some(text);
1084                                }
1085                            }
1086                            Some(b"systemPath") => {
1087                                if current_parent == Some(b"dependency")
1088                                    && let Some(coords) = current_dependency_data.as_mut()
1089                                {
1090                                    coords.system_path = Some(text);
1091                                }
1092                            }
1093                            _ => {}
1094                        }
1095                    } else if in_relocation {
1096                        match current_path {
1097                            Some(b"groupId") => relocation.group_id = Some(text),
1098                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1099                            Some(b"version") => relocation.version = Some(text),
1100                            Some(b"classifier") => relocation.classifier = Some(text),
1101                            Some(b"type") => relocation.type_ = Some(text),
1102                            Some(b"message") => relocation.message = Some(text),
1103                            _ => {}
1104                        }
1105                    } else if in_parent {
1106                        match current_path {
1107                            Some(b"groupId") => {
1108                                parent_group_id = Some(text);
1109                            }
1110                            Some(b"artifactId") => {
1111                                parent_artifact_id = Some(text);
1112                            }
1113                            Some(b"version") => {
1114                                parent_version = Some(text);
1115                            }
1116                            Some(b"relativePath") => {
1117                                parent_relative_path = Some(text);
1118                            }
1119                            _ => {}
1120                        }
1121                    } else {
1122                        match current_path {
1123                            Some(b"groupId") if current_element.len() == 2 => {
1124                                package_data.namespace = Some(text)
1125                            }
1126                            Some(b"artifactId") if current_element.len() == 2 => {
1127                                package_data.name = Some(text)
1128                            }
1129                            Some(b"version") if current_element.len() == 2 => {
1130                                package_data.version = Some(text)
1131                            }
1132                            Some(b"name") if current_element.len() == 2 => {
1133                                project_name = Some(text)
1134                            }
1135                            Some(b"description") if current_element.len() == 2 => {
1136                                project_description = Some(text)
1137                            }
1138                            Some(b"packaging") if current_element.len() == 2 => {
1139                                project_packaging = Some(text)
1140                            }
1141                            Some(b"classifier") if current_element.len() == 2 => {
1142                                project_classifier = Some(text)
1143                            }
1144                            Some(b"url") if current_element.len() == 2 => {
1145                                package_data.homepage_url = Some(text)
1146                            }
1147                            Some(b"inceptionYear") if current_element.len() == 2 => {
1148                                inception_year = Some(text)
1149                            }
1150                            Some(b"connection")
1151                                if current_element.len() >= 3
1152                                    && current_element[current_element.len() - 2] == b"scm" =>
1153                            {
1154                                scm_connection = if text.starts_with("scm:git:") {
1155                                    Some(text.replacen("scm:git:", "git+", 1))
1156                                } else if text.starts_with("scm:") {
1157                                    Some(text.replacen("scm:", "", 1))
1158                                } else {
1159                                    Some(text)
1160                                };
1161                            }
1162                            Some(b"developerConnection")
1163                                if current_element.len() >= 3
1164                                    && current_element[current_element.len() - 2] == b"scm" =>
1165                            {
1166                                scm_developer_connection = if text.starts_with("scm:git:") {
1167                                    Some(text.replacen("scm:git:", "git+", 1))
1168                                } else if text.starts_with("scm:") {
1169                                    Some(text.replacen("scm:", "", 1))
1170                                } else {
1171                                    Some(text)
1172                                };
1173                            }
1174                            Some(b"url")
1175                                if current_element.len() >= 3
1176                                    && current_element[current_element.len() - 2] == b"scm" =>
1177                            {
1178                                scm_url = Some(text);
1179                            }
1180                            Some(b"tag")
1181                                if current_element.len() >= 3
1182                                    && current_element[current_element.len() - 2] == b"scm" =>
1183                            {
1184                                scm_tag = Some(text);
1185                            }
1186                            Some(b"name")
1187                                if current_element.len() >= 2
1188                                    && current_element[current_element.len() - 2]
1189                                        == b"organization" =>
1190                            {
1191                                organization_name = Some(text);
1192                            }
1193                            Some(b"url")
1194                                if current_element.len() >= 2
1195                                    && current_element[current_element.len() - 2]
1196                                        == b"organization" =>
1197                            {
1198                                organization_url = Some(text);
1199                            }
1200                            Some(b"system")
1201                                if current_element.len() >= 2
1202                                    && current_element[current_element.len() - 2]
1203                                        == b"issueManagement" =>
1204                            {
1205                                issue_management_system = Some(text);
1206                            }
1207                            Some(b"url")
1208                                if current_element.len() >= 2
1209                                    && current_element[current_element.len() - 2]
1210                                        == b"issueManagement" =>
1211                            {
1212                                issue_management_url = Some(text);
1213                            }
1214                            Some(b"system")
1215                                if current_element.len() >= 2
1216                                    && current_element[current_element.len() - 2]
1217                                        == b"ciManagement" =>
1218                            {
1219                                ci_management_system = Some(text);
1220                            }
1221                            Some(b"url")
1222                                if current_element.len() >= 2
1223                                    && current_element[current_element.len() - 2]
1224                                        == b"ciManagement" =>
1225                            {
1226                                ci_management_url = Some(text);
1227                            }
1228                            Some(b"downloadUrl")
1229                                if current_element.len() >= 2
1230                                    && current_element[current_element.len() - 2]
1231                                        == b"distributionManagement" =>
1232                            {
1233                                dist_download_url = Some(text);
1234                            }
1235                            Some(b"id") if in_dist_repository => {
1236                                dist_repository_id = Some(text);
1237                            }
1238                            Some(b"name") if in_dist_repository => {
1239                                dist_repository_name = Some(text);
1240                            }
1241                            Some(b"url") if in_dist_repository => {
1242                                dist_repository_url = Some(text);
1243                            }
1244                            Some(b"layout") if in_dist_repository => {
1245                                dist_repository_layout = Some(text);
1246                            }
1247                            Some(b"id") if in_dist_snapshot_repository => {
1248                                dist_snapshot_repository_id = Some(text);
1249                            }
1250                            Some(b"name") if in_dist_snapshot_repository => {
1251                                dist_snapshot_repository_name = Some(text);
1252                            }
1253                            Some(b"url") if in_dist_snapshot_repository => {
1254                                dist_snapshot_repository_url = Some(text);
1255                            }
1256                            Some(b"layout") if in_dist_snapshot_repository => {
1257                                dist_snapshot_repository_layout = Some(text);
1258                            }
1259                            Some(b"id") if in_dist_site => {
1260                                dist_site_id = Some(text);
1261                            }
1262                            Some(b"name") if in_dist_site => {
1263                                dist_site_name = Some(text);
1264                            }
1265                            Some(b"url") if in_dist_site => {
1266                                dist_site_url = Some(text);
1267                            }
1268                            Some(b"id") if in_repository => {
1269                                current_repository_id = Some(text);
1270                            }
1271                            Some(b"name") if in_repository => {
1272                                current_repository_name = Some(text);
1273                            }
1274                            Some(b"url") if in_repository => {
1275                                current_repository_url = Some(text);
1276                            }
1277                            Some(b"module") if in_modules => {
1278                                modules.push(text);
1279                            }
1280                            Some(b"name") if in_mailing_list => {
1281                                current_mailing_list_name = Some(text);
1282                            }
1283                            Some(b"subscribe") if in_mailing_list => {
1284                                current_mailing_list_subscribe = Some(text);
1285                            }
1286                            Some(b"unsubscribe") if in_mailing_list => {
1287                                current_mailing_list_unsubscribe = Some(text);
1288                            }
1289                            Some(b"post") if in_mailing_list => {
1290                                current_mailing_list_post = Some(text);
1291                            }
1292                            Some(b"archive") if in_mailing_list => {
1293                                current_mailing_list_archive = Some(text);
1294                            }
1295                            _ => {}
1296                        }
1297                    }
1298                }
1299                Ok(Event::Comment(e)) => {
1300                    let comment = e.decode().unwrap_or_default().trim().to_string();
1301                    if current_element.is_empty()
1302                        && !comment.is_empty()
1303                        && is_license_like_comment(&comment)
1304                    {
1305                        xml_license_comments.push(comment);
1306                    }
1307                }
1308                Ok(Event::End(e)) => {
1309                    if !current_element.is_empty() {
1310                        current_element.pop();
1311                    }
1312
1313                    match e.name().as_ref() {
1314                        b"parent" => in_parent = false,
1315                        b"dependencyManagement" => in_dependency_management = false,
1316                        b"dependencies" => in_dependencies = false,
1317                        b"dependency" if in_dep_mgmt_dependency => {
1318                            in_dep_mgmt_dependency = false;
1319                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1320                                && (dep_mgmt.group_id.is_some()
1321                                    || dep_mgmt.artifact_id.is_some()
1322                                    || dep_mgmt.version.is_some())
1323                            {
1324                                dependency_management_entries.push(dep_mgmt);
1325                            }
1326                        }
1327                        b"dependency" => {
1328                            if let (Some(dep), Some(coords)) =
1329                                (current_dependency.take(), current_dependency_data.take())
1330                            {
1331                                package_data.dependencies.push(dep);
1332                                dependency_data.push(coords);
1333                            } else if let Some(dep) = current_dependency.take() {
1334                                package_data.dependencies.push(dep);
1335                            }
1336                        }
1337                        b"license" => {
1338                            if let Some(license) = current_license.take()
1339                                && (license.name.is_some()
1340                                    || license.url.is_some()
1341                                    || license.comments.is_some())
1342                            {
1343                                licenses.push(license);
1344                            }
1345                        }
1346                        b"developers" => in_developers = false,
1347                        b"developer" => {
1348                            if let Some(party) = current_party.take() {
1349                                package_data.parties.push(party);
1350                            }
1351                        }
1352                        b"contributors" => in_contributors = false,
1353                        b"contributor" => {
1354                            if let Some(party) = current_party.take() {
1355                                package_data.parties.push(party);
1356                            }
1357                        }
1358                        b"distributionManagement" => in_distribution_management = false,
1359                        b"relocation" => in_relocation = false,
1360                        b"repository" if !in_dependencies && in_distribution_management => {
1361                            in_dist_repository = false
1362                        }
1363                        b"repository" if !in_dependencies && in_repositories => {
1364                            in_repository = false;
1365                            if current_repository_id.is_some()
1366                                || current_repository_name.is_some()
1367                                || current_repository_url.is_some()
1368                            {
1369                                let mut repo = serde_json::Map::new();
1370                                if let Some(id) = current_repository_id.take() {
1371                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1372                                }
1373                                if let Some(name) = current_repository_name.take() {
1374                                    repo.insert(
1375                                        "name".to_string(),
1376                                        serde_json::Value::String(name),
1377                                    );
1378                                }
1379                                if let Some(url) = current_repository_url.take() {
1380                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1381                                }
1382                                repositories.push(repo);
1383                            }
1384                        }
1385                        b"pluginRepository" if in_plugin_repositories => {
1386                            in_repository = false;
1387                            if current_repository_id.is_some()
1388                                || current_repository_name.is_some()
1389                                || current_repository_url.is_some()
1390                            {
1391                                let mut repo = serde_json::Map::new();
1392                                if let Some(id) = current_repository_id.take() {
1393                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1394                                }
1395                                if let Some(name) = current_repository_name.take() {
1396                                    repo.insert(
1397                                        "name".to_string(),
1398                                        serde_json::Value::String(name),
1399                                    );
1400                                }
1401                                if let Some(url) = current_repository_url.take() {
1402                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1403                                }
1404                                plugin_repositories.push(repo);
1405                            }
1406                        }
1407                        b"repositories" => in_repositories = false,
1408                        b"properties" => in_properties = false,
1409                        b"pluginRepositories" => in_plugin_repositories = false,
1410                        b"modules" => in_modules = false,
1411                        b"mailingLists" => in_mailing_lists = false,
1412                        b"mailingList" => {
1413                            in_mailing_list = false;
1414                            if current_mailing_list_name.is_some()
1415                                || current_mailing_list_subscribe.is_some()
1416                                || current_mailing_list_unsubscribe.is_some()
1417                                || current_mailing_list_post.is_some()
1418                                || current_mailing_list_archive.is_some()
1419                            {
1420                                let mut ml = serde_json::Map::new();
1421                                if let Some(name) = current_mailing_list_name.take() {
1422                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1423                                }
1424                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1425                                    ml.insert(
1426                                        "subscribe".to_string(),
1427                                        serde_json::Value::String(subscribe),
1428                                    );
1429                                }
1430                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1431                                    ml.insert(
1432                                        "unsubscribe".to_string(),
1433                                        serde_json::Value::String(unsubscribe),
1434                                    );
1435                                }
1436                                if let Some(post) = current_mailing_list_post.take() {
1437                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1438                                }
1439                                if let Some(archive) = current_mailing_list_archive.take() {
1440                                    ml.insert(
1441                                        "archive".to_string(),
1442                                        serde_json::Value::String(archive),
1443                                    );
1444                                }
1445                                mailing_lists.push(ml);
1446                            }
1447                        }
1448                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1449                        b"site" => in_dist_site = false,
1450                        _ => {}
1451                    }
1452                }
1453                Ok(Event::Eof) => break,
1454                Err(e) => {
1455                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1456                    return vec![package_data];
1457                }
1458                _ => {}
1459            }
1460            buf.clear();
1461        }
1462
1463        let builtins = build_builtin_properties(MavenBuiltinPropertyInputs {
1464            namespace: &package_data.namespace,
1465            name: &package_data.name,
1466            version: &package_data.version,
1467            parent_group_id: &parent_group_id,
1468            parent_artifact_id: &parent_artifact_id,
1469            parent_version: &parent_version,
1470            project_name: &project_name,
1471            project_packaging: &project_packaging,
1472        });
1473        let mut resolver = PropertyResolver::new(properties, builtins);
1474
1475        resolve_option(&mut resolver, &mut package_data.namespace);
1476        resolve_option(&mut resolver, &mut package_data.name);
1477        resolve_option(&mut resolver, &mut package_data.version);
1478        resolve_option(&mut resolver, &mut package_data.homepage_url);
1479        resolve_option(&mut resolver, &mut inception_year);
1480        resolve_option(&mut resolver, &mut scm_connection);
1481        resolve_option(&mut resolver, &mut scm_developer_connection);
1482        resolve_option(&mut resolver, &mut scm_url);
1483        resolve_option(&mut resolver, &mut scm_tag);
1484        resolve_option(&mut resolver, &mut organization_name);
1485        resolve_option(&mut resolver, &mut organization_url);
1486        resolve_option(&mut resolver, &mut issue_management_system);
1487        resolve_option(&mut resolver, &mut issue_management_url);
1488        resolve_option(&mut resolver, &mut ci_management_system);
1489        resolve_option(&mut resolver, &mut ci_management_url);
1490        resolve_option(&mut resolver, &mut dist_download_url);
1491        resolve_option(&mut resolver, &mut dist_repository_id);
1492        resolve_option(&mut resolver, &mut dist_repository_name);
1493        resolve_option(&mut resolver, &mut dist_repository_url);
1494        resolve_option(&mut resolver, &mut dist_repository_layout);
1495        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1496        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1497        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1498        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1499        resolve_option(&mut resolver, &mut dist_site_id);
1500        resolve_option(&mut resolver, &mut dist_site_name);
1501        resolve_option(&mut resolver, &mut dist_site_url);
1502        resolve_option(&mut resolver, &mut parent_group_id);
1503        resolve_option(&mut resolver, &mut parent_artifact_id);
1504        resolve_option(&mut resolver, &mut parent_version);
1505        resolve_option(&mut resolver, &mut parent_relative_path);
1506        resolve_option(&mut resolver, &mut project_name);
1507        resolve_option(&mut resolver, &mut project_description);
1508        resolve_option(&mut resolver, &mut project_packaging);
1509        resolve_option(&mut resolver, &mut project_classifier);
1510        resolve_vec(&mut resolver, &mut modules);
1511        resolve_maps(&mut resolver, &mut repositories);
1512        resolve_maps(&mut resolver, &mut plugin_repositories);
1513        resolve_maps(&mut resolver, &mut mailing_lists);
1514        for comment in &mut xml_license_comments {
1515            *comment = resolver.resolve_text(comment, 0);
1516        }
1517        for dependency in &mut dependency_management_entries {
1518            resolve_dependency_data(&mut resolver, dependency);
1519        }
1520        resolve_dependency_data(&mut resolver, &mut relocation);
1521        for license in &mut licenses {
1522            resolve_license_entry(&mut resolver, license);
1523        }
1524        for comment in xml_license_comments {
1525            if !comment.trim().is_empty() {
1526                licenses.push(MavenLicenseEntry {
1527                    comments: Some(comment),
1528                    ..Default::default()
1529                });
1530            }
1531        }
1532
1533        for (dependency, coords) in package_data
1534            .dependencies
1535            .iter_mut()
1536            .zip(dependency_data.iter_mut())
1537        {
1538            resolve_dependency_data(&mut resolver, coords);
1539            dependency.scope = coords.scope.clone();
1540            dependency.extracted_requirement = coords.version.clone();
1541            dependency.extra_data = dependency_extra_data(coords);
1542            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1543
1544            match dependency.scope.as_deref() {
1545                Some("test") | Some("provided") => {
1546                    dependency.is_runtime = Some(false);
1547                    dependency.is_optional = Some(true);
1548                }
1549                Some(_) => {
1550                    dependency.is_runtime = Some(true);
1551                }
1552                None => {
1553                    dependency.is_runtime = None;
1554                }
1555            }
1556
1557            if let Some(version) = &coords.version {
1558                dependency.is_pinned = Some(is_maven_version_pinned(version));
1559            }
1560
1561            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1562                dependency.purl = Some(build_maven_purl(
1563                    group_id,
1564                    artifact_id,
1565                    coords.version.as_deref(),
1566                    coords.classifier.as_deref(),
1567                    coords.type_.as_deref(),
1568                ));
1569            }
1570        }
1571
1572        if package_data.namespace.is_none() {
1573            package_data.namespace = parent_group_id.clone();
1574        }
1575        if package_data.version.is_none() {
1576            package_data.version = parent_version.clone();
1577        }
1578
1579        package_data.qualifiers =
1580            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1581
1582        package_data.description = match (
1583            project_name.as_deref().filter(|value| !value.is_empty()),
1584            project_description
1585                .as_deref()
1586                .filter(|value| !value.is_empty()),
1587        ) {
1588            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1589            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1590            (Some(name), None) => Some(name.to_string()),
1591            (None, Some(description)) => Some(description.to_string()),
1592            (None, None) => None,
1593        };
1594
1595        if path.to_string_lossy().contains("META-INF/maven/") {
1596            let path_str = path.to_string_lossy();
1597            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1598                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1599                let parts: Vec<&str> = after_maven.split('/').collect();
1600                if parts.len() >= 2 {
1601                    if package_data.namespace.is_none() {
1602                        package_data.namespace = Some(parts[0].to_string());
1603                    }
1604                    if package_data.name.is_none() {
1605                        package_data.name = Some(parts[1].to_string());
1606                    }
1607                }
1608            }
1609        }
1610
1611        // Construct PURL from parsed data
1612        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1613            &package_data.namespace,
1614            &package_data.name,
1615            &package_data.version,
1616        ) {
1617            package_data.purl = Some(build_maven_purl(
1618                group_id,
1619                artifact_id,
1620                Some(version),
1621                project_classifier.as_deref(),
1622                project_packaging.as_deref(),
1623            ));
1624            if project_classifier.is_none() {
1625                package_data
1626                    .source_packages
1627                    .push(build_maven_source_package(group_id, artifact_id, version));
1628            }
1629        }
1630
1631        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1632            package_data.repository_homepage_url = build_maven_url(
1633                &package_data.namespace,
1634                &package_data.name,
1635                &package_data.version,
1636                None,
1637            );
1638
1639            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1640                build_maven_download_url(
1641                    group_id,
1642                    artifact_id,
1643                    ver,
1644                    project_classifier.as_deref(),
1645                    project_packaging.as_deref(),
1646                )
1647            });
1648
1649            if let Some(ver) = &package_data.version {
1650                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1651                package_data.api_data_url = build_maven_url(
1652                    &package_data.namespace,
1653                    &package_data.name,
1654                    &package_data.version,
1655                    Some(&pom_filename),
1656                );
1657            }
1658        }
1659
1660        package_data.vcs_url = scm_connection
1661            .or_else(|| scm_developer_connection.clone())
1662            .or_else(|| scm_url.clone());
1663
1664        // Set code_view_url from scm/url (human-browseable URL)
1665        if let Some(url) = &scm_url {
1666            package_data.code_view_url = Some(url.clone());
1667        }
1668
1669        // Set bug_tracking_url from issueManagement/url
1670        if let Some(url) = &issue_management_url {
1671            package_data.bug_tracking_url = Some(url.clone());
1672        }
1673
1674        // Map downloadUrl to download_url field
1675        if let Some(url) = &dist_download_url {
1676            package_data.download_url = Some(url.clone());
1677        }
1678
1679        if organization_name.is_some() || organization_url.is_some() {
1680            package_data.parties.push(Party {
1681                r#type: Some("organization".to_string()),
1682                role: Some("owner".to_string()),
1683                name: organization_name.clone(),
1684                email: None,
1685                url: organization_url.clone(),
1686                organization: None,
1687                organization_url: None,
1688                timezone: None,
1689            });
1690        }
1691
1692        for dependency in &dependency_management_entries {
1693            let fallback_scope = if dependency.scope.as_deref() == Some("import") {
1694                Some("import")
1695            } else {
1696                Some("dependencymanagement")
1697            };
1698
1699            if let Some(converted) =
1700                maven_dependency_to_dependency(dependency, fallback_scope, true)
1701            {
1702                package_data.dependencies.push(converted);
1703            }
1704        }
1705
1706        if (relocation.group_id.is_some()
1707            || relocation.artifact_id.is_some()
1708            || relocation.version.is_some())
1709            && let Some(converted) =
1710                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1711        {
1712            package_data.dependencies.push(converted);
1713        }
1714
1715        if inception_year.is_some()
1716            || organization_name.is_some()
1717            || organization_url.is_some()
1718            || scm_tag.is_some()
1719            || scm_developer_connection.is_some()
1720            || issue_management_system.is_some()
1721            || ci_management_system.is_some()
1722            || ci_management_url.is_some()
1723            || dist_download_url.is_some()
1724            || dist_repository_id.is_some()
1725            || dist_snapshot_repository_id.is_some()
1726            || dist_site_id.is_some()
1727            || !repositories.is_empty()
1728            || !plugin_repositories.is_empty()
1729            || !modules.is_empty()
1730            || !mailing_lists.is_empty()
1731            || !dependency_management_entries.is_empty()
1732            || parent_group_id.is_some()
1733            || relocation.group_id.is_some()
1734            || relocation.artifact_id.is_some()
1735            || relocation.version.is_some()
1736            || relocation.message.is_some()
1737        {
1738            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1739            if let Some(year) = inception_year {
1740                extra_data.insert(
1741                    "inception_year".to_string(),
1742                    serde_json::Value::String(year),
1743                );
1744            }
1745            if let Some(name) = organization_name {
1746                extra_data.insert(
1747                    "organization_name".to_string(),
1748                    serde_json::Value::String(name),
1749                );
1750            }
1751            if let Some(url) = organization_url {
1752                extra_data.insert(
1753                    "organization_url".to_string(),
1754                    serde_json::Value::String(url),
1755                );
1756            }
1757            if let Some(tag) = scm_tag {
1758                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1759            }
1760            if let Some(dev_conn) = scm_developer_connection {
1761                extra_data.insert(
1762                    "scm_developer_connection".to_string(),
1763                    serde_json::Value::String(dev_conn),
1764                );
1765            }
1766            if let Some(system) = issue_management_system {
1767                extra_data.insert(
1768                    "issue_tracking_system".to_string(),
1769                    serde_json::Value::String(system),
1770                );
1771            }
1772            if let Some(system) = ci_management_system {
1773                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1774            }
1775            if let Some(url) = ci_management_url {
1776                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1777            }
1778
1779            // Add distribution management data
1780            if let Some(url) = dist_download_url {
1781                extra_data.insert(
1782                    "distribution_download_url".to_string(),
1783                    serde_json::Value::String(url),
1784                );
1785            }
1786
1787            // Build repository object
1788            if dist_repository_id.is_some()
1789                || dist_repository_name.is_some()
1790                || dist_repository_url.is_some()
1791                || dist_repository_layout.is_some()
1792            {
1793                let mut repo = serde_json::Map::new();
1794                if let Some(id) = dist_repository_id {
1795                    repo.insert("id".to_string(), serde_json::Value::String(id));
1796                }
1797                if let Some(name) = dist_repository_name {
1798                    repo.insert("name".to_string(), serde_json::Value::String(name));
1799                }
1800                if let Some(url) = dist_repository_url {
1801                    repo.insert("url".to_string(), serde_json::Value::String(url));
1802                }
1803                if let Some(layout) = dist_repository_layout {
1804                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1805                }
1806                extra_data.insert(
1807                    "distribution_repository".to_string(),
1808                    serde_json::Value::Object(repo),
1809                );
1810            }
1811
1812            // Build snapshotRepository object
1813            if dist_snapshot_repository_id.is_some()
1814                || dist_snapshot_repository_name.is_some()
1815                || dist_snapshot_repository_url.is_some()
1816                || dist_snapshot_repository_layout.is_some()
1817            {
1818                let mut repo = serde_json::Map::new();
1819                if let Some(id) = dist_snapshot_repository_id {
1820                    repo.insert("id".to_string(), serde_json::Value::String(id));
1821                }
1822                if let Some(name) = dist_snapshot_repository_name {
1823                    repo.insert("name".to_string(), serde_json::Value::String(name));
1824                }
1825                if let Some(url) = dist_snapshot_repository_url {
1826                    repo.insert("url".to_string(), serde_json::Value::String(url));
1827                }
1828                if let Some(layout) = dist_snapshot_repository_layout {
1829                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1830                }
1831                extra_data.insert(
1832                    "distribution_snapshot_repository".to_string(),
1833                    serde_json::Value::Object(repo),
1834                );
1835            }
1836
1837            // Build site object
1838            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1839                let mut site = serde_json::Map::new();
1840                if let Some(id) = dist_site_id {
1841                    site.insert("id".to_string(), serde_json::Value::String(id));
1842                }
1843                if let Some(name) = dist_site_name {
1844                    site.insert("name".to_string(), serde_json::Value::String(name));
1845                }
1846                if let Some(url) = dist_site_url {
1847                    site.insert("url".to_string(), serde_json::Value::String(url));
1848                }
1849                extra_data.insert(
1850                    "distribution_site".to_string(),
1851                    serde_json::Value::Object(site),
1852                );
1853            }
1854
1855            if !repositories.is_empty() {
1856                extra_data.insert(
1857                    "repositories".to_string(),
1858                    serde_json::Value::Array(
1859                        repositories
1860                            .into_iter()
1861                            .map(serde_json::Value::Object)
1862                            .collect(),
1863                    ),
1864                );
1865            }
1866
1867            if !plugin_repositories.is_empty() {
1868                extra_data.insert(
1869                    "plugin_repositories".to_string(),
1870                    serde_json::Value::Array(
1871                        plugin_repositories
1872                            .into_iter()
1873                            .map(serde_json::Value::Object)
1874                            .collect(),
1875                    ),
1876                );
1877            }
1878
1879            if !modules.is_empty() {
1880                extra_data.insert(
1881                    "modules".to_string(),
1882                    serde_json::Value::Array(
1883                        modules.into_iter().map(serde_json::Value::String).collect(),
1884                    ),
1885                );
1886            }
1887
1888            if !mailing_lists.is_empty() {
1889                extra_data.insert(
1890                    "mailing_lists".to_string(),
1891                    serde_json::Value::Array(
1892                        mailing_lists
1893                            .into_iter()
1894                            .map(serde_json::Value::Object)
1895                            .collect(),
1896                    ),
1897                );
1898            }
1899
1900            if !dependency_management_entries.is_empty() {
1901                extra_data.insert(
1902                    "dependency_management".to_string(),
1903                    serde_json::Value::Array(
1904                        dependency_management_entries
1905                            .into_iter()
1906                            .map(|dependency| {
1907                                serde_json::Value::Object(dependency_management_entry_to_value(
1908                                    &dependency,
1909                                ))
1910                            })
1911                            .collect(),
1912                    ),
1913                );
1914            }
1915
1916            if relocation.group_id.is_some()
1917                || relocation.artifact_id.is_some()
1918                || relocation.version.is_some()
1919                || relocation.message.is_some()
1920            {
1921                extra_data.insert(
1922                    "relocation".to_string(),
1923                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1924                );
1925            }
1926
1927            if parent_group_id.is_some()
1928                || parent_artifact_id.is_some()
1929                || parent_version.is_some()
1930                || parent_relative_path.is_some()
1931            {
1932                let mut parent_obj = serde_json::Map::new();
1933                if let Some(group_id) = parent_group_id {
1934                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1935                }
1936                if let Some(artifact_id) = parent_artifact_id {
1937                    parent_obj.insert(
1938                        "artifactId".to_string(),
1939                        serde_json::Value::String(artifact_id),
1940                    );
1941                }
1942                if let Some(version) = parent_version {
1943                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1944                }
1945                if let Some(relative_path) = parent_relative_path {
1946                    parent_obj.insert(
1947                        "relativePath".to_string(),
1948                        serde_json::Value::String(relative_path),
1949                    );
1950                }
1951                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
1952            }
1953
1954            package_data.extra_data = Some(extra_data);
1955        }
1956
1957        package_data.extracted_license_statement = build_license_statement(&licenses);
1958        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1959            build_maven_declared_license_data(
1960                &licenses,
1961                package_data.extracted_license_statement.as_deref(),
1962            );
1963        package_data.declared_license_expression = declared_license_expression;
1964        package_data.declared_license_expression_spdx = declared_license_expression_spdx;
1965        package_data.license_detections = license_detections;
1966
1967        vec![package_data]
1968    }
1969
1970    fn is_match(path: &Path) -> bool {
1971        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
1972            filename == "pom.xml"
1973                || filename.ends_with(".pom.xml")
1974                || filename.ends_with("-pom.xml")
1975                || filename == "pom.properties"
1976                || filename == "MANIFEST.MF"
1977                || filename.ends_with(".pom")
1978        } else {
1979            false
1980        }
1981    }
1982}
1983
1984fn build_maven_url(
1985    group_id: &Option<String>,
1986    artifact_id: &Option<String>,
1987    version: &Option<String>,
1988    filename: Option<&str>,
1989) -> Option<String> {
1990    const BASE_URL: &str = "https://repo1.maven.org/maven2";
1991
1992    let group_id = group_id.as_ref()?;
1993    let artifact_id = artifact_id.as_ref()?;
1994
1995    let group_path = group_id.replace('.', "/");
1996    let filename_str = filename.unwrap_or("");
1997
1998    let url = if let Some(ver) = version {
1999        format!(
2000            "{}/{}/{}/{}/{}",
2001            BASE_URL, group_path, artifact_id, ver, filename_str
2002        )
2003    } else {
2004        format!(
2005            "{}/{}/{}/{}",
2006            BASE_URL, group_path, artifact_id, filename_str
2007        )
2008    };
2009
2010    Some(url)
2011}
2012
2013fn build_maven_declared_license_data(
2014    licenses: &[MavenLicenseEntry],
2015    matched_text: Option<&str>,
2016) -> (
2017    Option<String>,
2018    Option<String>,
2019    Vec<crate::models::LicenseDetection>,
2020) {
2021    let normalized: Vec<_> = licenses
2022        .iter()
2023        .filter_map(|license| license.name.as_deref())
2024        .filter_map(normalize_maven_license_name)
2025        .collect();
2026
2027    if normalized.is_empty() {
2028        return empty_declared_license_data();
2029    }
2030
2031    let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
2032        return empty_declared_license_data();
2033    };
2034
2035    build_declared_license_data(
2036        combined,
2037        DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
2038    )
2039}
2040
2041fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
2042    match name.trim() {
2043        "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
2044            "public-domain",
2045            "LicenseRef-provenant-public-domain",
2046        )),
2047        other => normalize_declared_license_key(other),
2048    }
2049}
2050
2051/// Parse pom.properties file (Java properties format)
2052fn parse_pom_properties(path: &Path) -> PackageData {
2053    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
2054        Ok(content) => content,
2055        Err(e) => {
2056            warn!("Failed to read pom.properties at {:?}: {}", path, e);
2057            return PackageData {
2058                package_type: Some(PackageType::Maven),
2059                primary_language: Some("Java".to_string()),
2060                datasource_id: Some(DatasourceId::MavenPomProperties),
2061                ..Default::default()
2062            };
2063        }
2064    };
2065
2066    let mut package_data = default_package_data(DatasourceId::MavenPomProperties);
2067    package_data.package_type = Some(PackageType::Maven);
2068    package_data.primary_language = Some("Java".to_string());
2069    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
2070
2071    let mut group_id: Option<String> = None;
2072    let mut artifact_id: Option<String> = None;
2073    let mut version: Option<String> = None;
2074
2075    // Parse Java properties format
2076    let mut continuation = String::new();
2077
2078    for line in content.lines() {
2079        let current_line = if continuation.is_empty() {
2080            line.to_string()
2081        } else {
2082            format!("{}{}", continuation, line)
2083        };
2084        continuation.clear();
2085
2086        // Check for line continuation (backslash at end)
2087        if current_line.ends_with('\\') {
2088            continuation = current_line[..current_line.len() - 1].to_string();
2089            continue;
2090        }
2091
2092        // Skip comments and empty lines
2093        let trimmed = current_line.trim();
2094        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2095            continue;
2096        }
2097
2098        // Parse key=value
2099        if let Some(eq_pos) = current_line.find('=') {
2100            let key = current_line[..eq_pos].trim();
2101            let value = current_line[eq_pos + 1..].trim();
2102
2103            match key {
2104                "groupId" => group_id = Some(value.to_string()),
2105                "artifactId" => artifact_id = Some(value.to_string()),
2106                "version" => version = Some(value.to_string()),
2107                _ => {}
2108            }
2109        }
2110    }
2111
2112    package_data.namespace = group_id.clone();
2113    package_data.name = artifact_id.clone();
2114    package_data.version = version.clone();
2115
2116    // Generate PURL
2117    if let (Some(group_id), Some(artifact_id), Some(version)) = (
2118        &package_data.namespace,
2119        &package_data.name,
2120        &package_data.version,
2121    ) {
2122        package_data.purl = Some(format!(
2123            "pkg:maven/{}/{}@{}",
2124            group_id, artifact_id, version
2125        ));
2126    }
2127
2128    package_data
2129}
2130
2131/// Parse MANIFEST.MF file (JAR manifest format)
2132///
2133/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
2134/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
2135/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
2136/// dependencies.
2137fn parse_manifest_mf(path: &Path) -> PackageData {
2138    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
2139        Ok(content) => content,
2140        Err(e) => {
2141            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2142            return default_package_data(DatasourceId::JavaJarManifest);
2143        }
2144    };
2145
2146    let mut package_data = default_package_data(DatasourceId::JavaJarManifest);
2147
2148    // Parse manifest headers (RFC822-style with space continuations)
2149    let mut headers: Vec<(String, String)> = Vec::new();
2150    let mut current_key: Option<String> = None;
2151    let mut current_value = String::new();
2152
2153    for line in content.lines() {
2154        if line.starts_with(' ') || line.starts_with('\t') {
2155            // Continuation line
2156            current_value.push_str(line.trim());
2157        } else if let Some(colon_pos) = line.find(':') {
2158            // Save previous header
2159            if let Some(key) = current_key.take() {
2160                headers.push((key, current_value.trim().to_string()));
2161                current_value.clear();
2162            }
2163
2164            // Start new header
2165            let key = line[..colon_pos].trim().to_string();
2166            let value = line[colon_pos + 1..].trim().to_string();
2167            current_key = Some(key);
2168            current_value = value;
2169        }
2170    }
2171
2172    // Save last header
2173    if let Some(key) = current_key {
2174        headers.push((key, current_value.trim().to_string()));
2175    }
2176
2177    // Convert headers to HashMap for easier lookup
2178    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2179
2180    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2181    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2182    let is_osgi = bundle_symbolic_name.is_some();
2183
2184    if is_osgi {
2185        // OSGi bundle - extract OSGi-specific metadata
2186        package_data.package_type = Some(PackageType::Osgi);
2187        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2188
2189        // Bundle-SymbolicName is the canonical name for OSGi bundles
2190        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2191        if let Some(bsn) = bundle_symbolic_name {
2192            let name = if let Some(semicolon_pos) = bsn.find(';') {
2193                bsn[..semicolon_pos].trim().to_string()
2194            } else {
2195                bsn.clone()
2196            };
2197            package_data.name = Some(name);
2198        }
2199
2200        // Bundle-Version
2201        package_data.version = headers_map.get("Bundle-Version").cloned();
2202
2203        // Bundle-Description takes priority over Bundle-Name for description
2204        if let Some(desc) = headers_map.get("Bundle-Description") {
2205            package_data.description = Some(desc.clone());
2206        } else if let Some(name) = headers_map.get("Bundle-Name") {
2207            package_data.description = Some(name.clone());
2208        }
2209
2210        // Bundle-Vendor
2211        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2212            package_data.parties.push(Party {
2213                r#type: Some("organization".to_string()),
2214                role: Some("vendor".to_string()),
2215                name: Some(vendor.clone()),
2216                email: None,
2217                url: None,
2218                organization: None,
2219                organization_url: None,
2220                timezone: None,
2221            });
2222        }
2223
2224        // Bundle-DocURL
2225        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2226
2227        // Bundle-License
2228        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2229
2230        // Import-Package -> dependencies with scope "import"
2231        if let Some(import_pkg) = headers_map.get("Import-Package") {
2232            let deps = parse_osgi_package_list(import_pkg, "import");
2233            package_data.dependencies.extend(deps);
2234        }
2235
2236        // Require-Bundle -> dependencies with scope "require-bundle"
2237        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2238            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2239            package_data.dependencies.extend(deps);
2240        }
2241
2242        // Export-Package -> store in extra_data
2243        if let Some(export_pkg) = headers_map.get("Export-Package") {
2244            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2245            extra_data.insert(
2246                "export_packages".to_string(),
2247                serde_json::Value::String(export_pkg.clone()),
2248            );
2249            package_data.extra_data = Some(extra_data);
2250        }
2251
2252        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2253        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2254            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2255        }
2256    } else {
2257        // Regular JAR manifest
2258        package_data.package_type = Some(PackageType::Maven);
2259        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2260
2261        // Extract fields with priority order for non-OSGi JARs
2262        let mut name: Option<String> = None;
2263        let mut version: Option<String> = None;
2264        let mut vendor: Option<String> = None;
2265
2266        for (key, value) in &headers {
2267            match key.as_str() {
2268                "Bundle-Name" if name.is_none() => {
2269                    name = Some(value.clone());
2270                }
2271                "Implementation-Title" if name.is_none() => {
2272                    name = Some(value.clone());
2273                }
2274                "Bundle-Version" if version.is_none() => {
2275                    version = Some(value.clone());
2276                }
2277                "Implementation-Version" if version.is_none() => {
2278                    version = Some(value.clone());
2279                }
2280                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2281                    vendor = Some(value.clone());
2282                }
2283                _ => {}
2284            }
2285        }
2286
2287        package_data.name = name;
2288        package_data.version = version;
2289
2290        // Add vendor to parties if present
2291        if let Some(vendor_name) = vendor {
2292            package_data.parties.push(Party {
2293                r#type: Some("organization".to_string()),
2294                role: Some("vendor".to_string()),
2295                name: Some(vendor_name),
2296                email: None,
2297                url: None,
2298                organization: None,
2299                organization_url: None,
2300                timezone: None,
2301            });
2302        }
2303
2304        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2305        if let Some(path_str) = path.to_str()
2306            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2307        {
2308            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2309            let parts: Vec<&str> = after_maven.split('/').collect();
2310            if parts.len() >= 2 {
2311                package_data.namespace = Some(parts[0].to_string());
2312            }
2313        }
2314
2315        // Generate Maven PURL if we have enough information
2316        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2317            &package_data.namespace,
2318            &package_data.name,
2319            &package_data.version,
2320        ) {
2321            package_data.purl = Some(format!(
2322                "pkg:maven/{}/{}@{}",
2323                group_id, artifact_id, version
2324            ));
2325        } else if package_data.name.is_none() && package_data.version.is_none() {
2326            // A bare MANIFEST.MF without Maven coordinates or implementation
2327            // identity is only evidence of a generic JAR manifest, not a Maven
2328            // package. Keep the Java manifest datasource so assembly can still
2329            // merge richer sibling metadata when present.
2330            package_data.package_type = Some(PackageType::Jar);
2331        }
2332    }
2333
2334    package_data
2335}
2336
2337/// Parse OSGi Import-Package header into dependencies.
2338///
2339/// Format: comma-separated list of packages with optional directives:
2340/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2341pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2342    let mut dependencies = Vec::new();
2343
2344    // Split by comma, but be careful not to split within quoted strings
2345    for package_entry in split_osgi_list(package_list) {
2346        let package_entry = package_entry.trim();
2347        if package_entry.is_empty() {
2348            continue;
2349        }
2350
2351        // Extract package name (before first semicolon)
2352        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2353            package_entry[..semicolon_pos].trim()
2354        } else {
2355            package_entry
2356        };
2357
2358        if package_name.is_empty() {
2359            continue;
2360        }
2361
2362        // Extract version directive if present
2363        let version_requirement = extract_osgi_version(package_entry);
2364        let is_optional = package_entry.contains("resolution:=optional");
2365
2366        dependencies.push(Dependency {
2367            purl: Some(format!("pkg:osgi/{}", package_name)),
2368            extracted_requirement: version_requirement,
2369            scope: Some(scope.to_string()),
2370            is_runtime: Some(true),
2371            is_optional: Some(is_optional),
2372            is_pinned: None,
2373            is_direct: Some(true),
2374            resolved_package: None,
2375            extra_data: None,
2376        });
2377    }
2378
2379    dependencies
2380}
2381
2382/// Parse OSGi Require-Bundle header into dependencies.
2383///
2384/// Format: comma-separated list of bundle symbolic names with optional directives:
2385/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2386pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2387    let mut dependencies = Vec::new();
2388
2389    for bundle_entry in split_osgi_list(bundle_list) {
2390        let bundle_entry = bundle_entry.trim();
2391        if bundle_entry.is_empty() {
2392            continue;
2393        }
2394
2395        // Extract bundle symbolic name (before first semicolon)
2396        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2397            bundle_entry[..semicolon_pos].trim()
2398        } else {
2399            bundle_entry
2400        };
2401
2402        if bundle_name.is_empty() {
2403            continue;
2404        }
2405
2406        // Extract bundle-version directive if present
2407        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2408
2409        // Check if optional
2410        let is_optional = bundle_entry.contains("resolution:=optional");
2411
2412        dependencies.push(Dependency {
2413            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2414            extracted_requirement: version_requirement,
2415            scope: Some(scope.to_string()),
2416            is_runtime: Some(!is_optional),
2417            is_optional: Some(is_optional),
2418            is_pinned: None,
2419            is_direct: Some(true),
2420            resolved_package: None,
2421            extra_data: None,
2422        });
2423    }
2424
2425    dependencies
2426}
2427
2428/// Split OSGi comma-separated list, respecting quoted strings.
2429///
2430/// OSGi headers can contain commas within quoted strings:
2431/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2432pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2433    let mut result = Vec::new();
2434    let mut current = String::new();
2435    let mut in_quotes = false;
2436
2437    for ch in list.chars() {
2438        match ch {
2439            '"' => {
2440                in_quotes = !in_quotes;
2441                current.push(ch);
2442            }
2443            ',' if !in_quotes => {
2444                if !current.trim().is_empty() {
2445                    result.push(current.trim().to_string());
2446                }
2447                current.clear();
2448            }
2449            _ => {
2450                current.push(ch);
2451            }
2452        }
2453    }
2454
2455    if !current.trim().is_empty() {
2456        result.push(current.trim().to_string());
2457    }
2458
2459    result
2460}
2461
2462fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2463    let needle = format!("{}=", directive);
2464    let version_pos = entry.find(&needle)?;
2465    let after_value = &entry[version_pos + needle.len()..];
2466
2467    if let Some(stripped) = after_value.strip_prefix('"') {
2468        stripped.find('"').map(|end| stripped[..end].to_string())
2469    } else {
2470        let end = after_value.find(';').unwrap_or(after_value.len());
2471        Some(after_value[..end].trim().to_string())
2472    }
2473}
2474
2475pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2476    extract_osgi_directive(entry, "version")
2477}
2478
2479pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2480    extract_osgi_directive(entry, "bundle-version")
2481}
2482
2483fn default_package_data(datasource_id: DatasourceId) -> PackageData {
2484    PackageData {
2485        package_type: Some(PackageType::Maven),
2486        datasource_id: Some(datasource_id),
2487        ..Default::default()
2488    }
2489}
2490
2491#[cfg(test)]
2492mod tests {
2493    use super::*;
2494    use std::fs;
2495    use tempfile::TempDir;
2496
2497    #[test]
2498    fn test_organization_extraction() {
2499        let temp_dir = TempDir::new().unwrap();
2500        let pom_path = temp_dir.path().join("pom.xml");
2501
2502        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2503<project>
2504    <modelVersion>4.0.0</modelVersion>
2505    <groupId>com.example</groupId>
2506    <artifactId>my-app</artifactId>
2507    <version>1.0.0</version>
2508    <organization>
2509        <name>Example Corporation</name>
2510        <url>https://example.com</url>
2511    </organization>
2512</project>"#;
2513
2514        fs::write(&pom_path, pom_content).unwrap();
2515
2516        let package_data = MavenParser::extract_first_package(&pom_path);
2517
2518        assert_eq!(package_data.name, Some("my-app".to_string()));
2519        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2520        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2521
2522        let extra_data = package_data.extra_data.unwrap();
2523        assert_eq!(
2524            extra_data.get("organization_name"),
2525            Some(&serde_json::Value::String(
2526                "Example Corporation".to_string()
2527            ))
2528        );
2529        assert_eq!(
2530            extra_data.get("organization_url"),
2531            Some(&serde_json::Value::String(
2532                "https://example.com".to_string()
2533            ))
2534        );
2535    }
2536
2537    #[test]
2538    fn test_scm_metadata_extraction() {
2539        let temp_dir = TempDir::new().unwrap();
2540        let pom_path = temp_dir.path().join("pom.xml");
2541
2542        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2543<project xmlns="http://maven.apache.org/POM/4.0.0"
2544         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2545         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2546    <modelVersion>4.0.0</modelVersion>
2547    <groupId>org.springframework.boot</groupId>
2548    <artifactId>spring-boot-starter-web</artifactId>
2549    <version>3.0.0</version>
2550    <scm>
2551        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2552        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2553        <url>https://github.com/spring-projects/spring-boot</url>
2554        <tag>v3.0.0</tag>
2555    </scm>
2556</project>"#;
2557
2558        fs::write(&pom_path, pom_content).unwrap();
2559
2560        let package_data = MavenParser::extract_first_package(&pom_path);
2561
2562        assert_eq!(
2563            package_data.name,
2564            Some("spring-boot-starter-web".to_string())
2565        );
2566        assert_eq!(
2567            package_data.namespace,
2568            Some("org.springframework.boot".to_string())
2569        );
2570        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2571
2572        assert_eq!(
2573            package_data.code_view_url,
2574            Some("https://github.com/spring-projects/spring-boot".to_string())
2575        );
2576
2577        // vcs_url prefers connection over developerConnection
2578        assert_eq!(
2579            package_data.vcs_url,
2580            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2581        );
2582
2583        let extra_data = package_data.extra_data.unwrap();
2584        assert_eq!(
2585            extra_data.get("scm_tag"),
2586            Some(&serde_json::Value::String("v3.0.0".to_string()))
2587        );
2588        // developerConnection stored separately in extra_data
2589        assert_eq!(
2590            extra_data.get("scm_developer_connection"),
2591            Some(&serde_json::Value::String(
2592                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2593            ))
2594        );
2595    }
2596
2597    #[test]
2598    fn test_developers_and_contributors_extraction() {
2599        let temp_dir = TempDir::new().unwrap();
2600        let pom_path = temp_dir.path().join("pom.xml");
2601
2602        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2603<project xmlns="http://maven.apache.org/POM/4.0.0"
2604         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2605         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2606    <modelVersion>4.0.0</modelVersion>
2607    <groupId>com.example</groupId>
2608    <artifactId>test-app</artifactId>
2609    <version>1.0.0</version>
2610    <developers>
2611        <developer>
2612            <id>jdoe</id>
2613            <name>John Doe</name>
2614            <email>john@example.com</email>
2615            <url>https://example.com/jdoe</url>
2616            <organization>Example Corp</organization>
2617            <organizationUrl>https://example.com</organizationUrl>
2618            <timezone>America/New_York</timezone>
2619        </developer>
2620        <developer>
2621            <name>Jane Smith</name>
2622            <email>jane@example.com</email>
2623        </developer>
2624    </developers>
2625    <contributors>
2626        <contributor>
2627            <name>Bob Wilson</name>
2628            <email>bob@example.com</email>
2629            <url>https://example.com/bob</url>
2630        </contributor>
2631    </contributors>
2632</project>"#;
2633
2634        fs::write(&pom_path, pom_content).unwrap();
2635
2636        let package_data = MavenParser::extract_first_package(&pom_path);
2637
2638        assert_eq!(package_data.name, Some("test-app".to_string()));
2639        assert_eq!(package_data.parties.len(), 3);
2640
2641        let dev1 = &package_data.parties[0];
2642        assert_eq!(dev1.r#type, Some("person".to_string()));
2643        assert_eq!(dev1.role, Some("developer".to_string()));
2644        assert_eq!(dev1.name, Some("John Doe".to_string()));
2645        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2646        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2647        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2648        assert_eq!(
2649            dev1.organization_url,
2650            Some("https://example.com".to_string())
2651        );
2652        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2653
2654        let dev2 = &package_data.parties[1];
2655        assert_eq!(dev2.r#type, Some("person".to_string()));
2656        assert_eq!(dev2.role, Some("developer".to_string()));
2657        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2658        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2659
2660        let contrib = &package_data.parties[2];
2661        assert_eq!(contrib.r#type, Some("person".to_string()));
2662        assert_eq!(contrib.role, Some("contributor".to_string()));
2663        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2664        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2665        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2666    }
2667
2668    #[test]
2669    fn test_issue_management_extraction() {
2670        let temp_dir = TempDir::new().unwrap();
2671        let pom_path = temp_dir.path().join("pom.xml");
2672
2673        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2674<project xmlns="http://maven.apache.org/POM/4.0.0"
2675         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2676         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2677    <modelVersion>4.0.0</modelVersion>
2678    <groupId>com.example</groupId>
2679    <artifactId>test-app</artifactId>
2680    <version>1.0.0</version>
2681    <issueManagement>
2682        <system>GitHub</system>
2683        <url>https://github.com/example/test-app/issues</url>
2684    </issueManagement>
2685</project>"#;
2686
2687        fs::write(&pom_path, pom_content).unwrap();
2688
2689        let package_data = MavenParser::extract_first_package(&pom_path);
2690
2691        assert_eq!(package_data.name, Some("test-app".to_string()));
2692        assert_eq!(
2693            package_data.bug_tracking_url,
2694            Some("https://github.com/example/test-app/issues".to_string())
2695        );
2696
2697        let extra_data = package_data.extra_data.unwrap();
2698        assert_eq!(
2699            extra_data.get("issue_tracking_system"),
2700            Some(&serde_json::Value::String("GitHub".to_string()))
2701        );
2702    }
2703
2704    #[test]
2705    fn test_ci_management_extraction() {
2706        let temp_dir = TempDir::new().unwrap();
2707        let pom_path = temp_dir.path().join("pom.xml");
2708
2709        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2710<project xmlns="http://maven.apache.org/POM/4.0.0"
2711         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2712         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2713    <modelVersion>4.0.0</modelVersion>
2714    <groupId>com.example</groupId>
2715    <artifactId>test-app</artifactId>
2716    <version>1.0.0</version>
2717    <ciManagement>
2718        <system>Jenkins</system>
2719        <url>https://ci.example.com/job/test-app</url>
2720    </ciManagement>
2721</project>"#;
2722
2723        fs::write(&pom_path, pom_content).unwrap();
2724
2725        let package_data = MavenParser::extract_first_package(&pom_path);
2726
2727        assert_eq!(package_data.name, Some("test-app".to_string()));
2728
2729        let extra_data = package_data.extra_data.unwrap();
2730        assert_eq!(
2731            extra_data.get("ci_system"),
2732            Some(&serde_json::Value::String("Jenkins".to_string()))
2733        );
2734        assert_eq!(
2735            extra_data.get("ci_url"),
2736            Some(&serde_json::Value::String(
2737                "https://ci.example.com/job/test-app".to_string()
2738            ))
2739        );
2740    }
2741
2742    #[test]
2743    fn test_distribution_management_extraction() {
2744        let temp_dir = TempDir::new().unwrap();
2745        let pom_path = temp_dir.path().join("pom.xml");
2746
2747        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2748<project xmlns="http://maven.apache.org/POM/4.0.0"
2749         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2750         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2751    <modelVersion>4.0.0</modelVersion>
2752    <groupId>com.example</groupId>
2753    <artifactId>test-app</artifactId>
2754    <version>1.0.0</version>
2755    <distributionManagement>
2756        <downloadUrl>https://example.com/downloads</downloadUrl>
2757        <repository>
2758            <id>releases</id>
2759            <name>Release Repository</name>
2760            <url>https://repo.example.com/releases</url>
2761            <layout>default</layout>
2762        </repository>
2763        <snapshotRepository>
2764            <id>snapshots</id>
2765            <name>Snapshot Repository</name>
2766            <url>https://repo.example.com/snapshots</url>
2767            <layout>default</layout>
2768        </snapshotRepository>
2769        <site>
2770            <id>site-deploy</id>
2771            <name>Project Site</name>
2772            <url>https://example.com/site</url>
2773        </site>
2774    </distributionManagement>
2775</project>"#;
2776
2777        fs::write(&pom_path, pom_content).unwrap();
2778
2779        let package_data = MavenParser::extract_first_package(&pom_path);
2780
2781        assert_eq!(package_data.name, Some("test-app".to_string()));
2782        assert_eq!(
2783            package_data.download_url,
2784            Some("https://example.com/downloads".to_string())
2785        );
2786
2787        let extra_data = package_data.extra_data.unwrap();
2788
2789        assert_eq!(
2790            extra_data.get("distribution_download_url"),
2791            Some(&serde_json::Value::String(
2792                "https://example.com/downloads".to_string()
2793            ))
2794        );
2795
2796        let repo = extra_data
2797            .get("distribution_repository")
2798            .unwrap()
2799            .as_object()
2800            .unwrap();
2801        assert_eq!(
2802            repo.get("id"),
2803            Some(&serde_json::Value::String("releases".to_string()))
2804        );
2805        assert_eq!(
2806            repo.get("name"),
2807            Some(&serde_json::Value::String("Release Repository".to_string()))
2808        );
2809        assert_eq!(
2810            repo.get("url"),
2811            Some(&serde_json::Value::String(
2812                "https://repo.example.com/releases".to_string()
2813            ))
2814        );
2815        assert_eq!(
2816            repo.get("layout"),
2817            Some(&serde_json::Value::String("default".to_string()))
2818        );
2819
2820        let snapshot_repo = extra_data
2821            .get("distribution_snapshot_repository")
2822            .unwrap()
2823            .as_object()
2824            .unwrap();
2825        assert_eq!(
2826            snapshot_repo.get("id"),
2827            Some(&serde_json::Value::String("snapshots".to_string()))
2828        );
2829        assert_eq!(
2830            snapshot_repo.get("name"),
2831            Some(&serde_json::Value::String(
2832                "Snapshot Repository".to_string()
2833            ))
2834        );
2835        assert_eq!(
2836            snapshot_repo.get("url"),
2837            Some(&serde_json::Value::String(
2838                "https://repo.example.com/snapshots".to_string()
2839            ))
2840        );
2841        assert_eq!(
2842            snapshot_repo.get("layout"),
2843            Some(&serde_json::Value::String("default".to_string()))
2844        );
2845
2846        let site = extra_data
2847            .get("distribution_site")
2848            .unwrap()
2849            .as_object()
2850            .unwrap();
2851        assert_eq!(
2852            site.get("id"),
2853            Some(&serde_json::Value::String("site-deploy".to_string()))
2854        );
2855        assert_eq!(
2856            site.get("name"),
2857            Some(&serde_json::Value::String("Project Site".to_string()))
2858        );
2859        assert_eq!(
2860            site.get("url"),
2861            Some(&serde_json::Value::String(
2862                "https://example.com/site".to_string()
2863            ))
2864        );
2865    }
2866}
2867
2868crate::register_parser!(
2869    "Apache Maven POM",
2870    &[
2871        "**/*.pom",
2872        "**/pom.xml",
2873        "**/pom.properties",
2874        "**/META-INF/MANIFEST.MF"
2875    ],
2876    "maven",
2877    "Java",
2878    Some("https://maven.apache.org/pom.html"),
2879);