Skip to main content

provenant/parsers/
maven.rs

1//! Parser for Apache Maven pom.xml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Maven Project Object Model (POM) files.
5//!
6//! # Supported Formats
7//! - pom.xml (Project Object Model)
8//! - pom.properties
9//! - MANIFEST.MF (JAR manifest)
10//!
11//! # Key Features
12//! - Property value substitution (`${project.version}`)
13//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
14//! - Dependency scope handling (compile, test, provided, runtime, system)
15//! - Package URL (purl) generation
16//! - Multiple license support (combined with " OR ")
17//!
18//! # Implementation Notes
19//! - Uses quick-xml for XML parsing
20//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
21//! - Property substitution limited to prevent infinite loops
22//! - Direct dependencies: all in pom.xml are direct
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use quick_xml::Reader;
28use quick_xml::events::Event;
29use std::borrow::Cow;
30use std::collections::{HashMap, HashSet};
31use std::path::Path;
32
33use super::PackageParser;
34use super::license_normalization::{
35    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
36    combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
37};
38
39#[derive(Clone, Default)]
40struct MavenDependencyData {
41    group_id: Option<String>,
42    artifact_id: Option<String>,
43    version: Option<String>,
44    classifier: Option<String>,
45    type_: Option<String>,
46    scope: Option<String>,
47    optional: Option<String>,
48    system_path: Option<String>,
49    message: Option<String>,
50}
51
52#[derive(Clone, Default)]
53struct MavenLicenseEntry {
54    name: Option<String>,
55    url: Option<String>,
56    comments: Option<String>,
57}
58
59/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
60///
61/// Maven properties can reference other properties, creating dependency graphs. This resolver:
62/// - Resolves nested placeholders: `${outer.${inner}}`
63/// - Detects circular references: `${a}` → `${b}` → `${a}`
64/// - Enforces depth limits to prevent stack overflow
65/// - Enforces substitution limits to prevent DoS on pathological inputs
66///
67/// # Algorithm
68///
69/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
70/// - `resolving_set`: For cycle detection (hash set lookup)
71/// - `resolving_stack`: For error reporting (preserves path)
72/// - `cache`: Memoizes resolved values to avoid redundant work
73struct PropertyResolver {
74    raw: HashMap<String, String>,
75    builtins: HashMap<String, String>,
76    cache: HashMap<String, String>,
77    resolving_set: HashSet<String>,
78    resolving_stack: Vec<String>,
79    max_depth: usize,
80    max_output_len: usize,
81    max_substitutions: usize,
82    warned_keys: HashSet<String>,
83}
84
85impl PropertyResolver {
86    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
87        Self {
88            raw,
89            builtins,
90            cache: HashMap::new(),
91            resolving_set: HashSet::new(),
92            resolving_stack: Vec::new(),
93            max_depth: 10,
94            max_output_len: 100_000,
95            max_substitutions: 1000,
96            warned_keys: HashSet::new(),
97        }
98    }
99
100    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
101        if let Some(value) = self.cache.get(key) {
102            return Some(value.clone());
103        }
104
105        if depth >= self.max_depth {
106            self.warn_once(
107                "depth",
108                key,
109                format!("Maven property depth limit hit resolving {key}"),
110            );
111            return None;
112        }
113
114        if self.resolving_set.contains(key) {
115            if self
116                .resolving_stack
117                .last()
118                .is_some_and(|current| current == key)
119            {
120                return None;
121            }
122
123            self.warn_once(
124                "cycle",
125                key,
126                format!(
127                    "Maven property cycle detected at {key}: {:?}",
128                    self.resolving_stack
129                ),
130            );
131            return None;
132        }
133
134        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
135            value.clone()
136        } else {
137            return None;
138        };
139
140        self.resolving_set.insert(key.to_string());
141        self.resolving_stack.push(key.to_string());
142
143        let resolved = self.resolve_text(&raw_val, depth + 1);
144
145        self.resolving_stack.pop();
146        self.resolving_set.remove(key);
147
148        self.cache.insert(key.to_string(), resolved.clone());
149        Some(resolved)
150    }
151
152    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
153        if !text.contains("${") {
154            return text.to_string();
155        }
156
157        if depth >= self.max_depth {
158            warn!("Maven property depth limit hit resolving text");
159            return text.to_string();
160        }
161
162        let bytes = text.as_bytes();
163        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
164        let mut index = 0;
165        let mut substitutions = 0;
166
167        while index < bytes.len() {
168            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
169                if substitutions >= self.max_substitutions {
170                    warn!("Maven property substitution limit hit resolving {text}");
171                    return text.to_string();
172                }
173
174                let placeholder_start = index;
175                let Some((content, closing_index)) =
176                    self.parse_placeholder_content(text, index + 2)
177                else {
178                    warn!("Maven property malformed placeholder in {text}");
179                    return text.to_string();
180                };
181
182                substitutions += 1;
183                let resolved_key = if content.contains("${") {
184                    self.resolve_text(content, depth + 1)
185                } else {
186                    content.to_string()
187                };
188
189                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
190                    if output.len() + resolved.len() > self.max_output_len {
191                        warn!("Maven property output length limit hit resolving {text}");
192                        return text.to_string();
193                    }
194                    output.extend_from_slice(resolved.as_bytes());
195                } else {
196                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
197                    if output.len() + placeholder_bytes.len() > self.max_output_len {
198                        warn!("Maven property output length limit hit resolving {text}");
199                        return text.to_string();
200                    }
201                    output.extend_from_slice(placeholder_bytes);
202                }
203
204                index = closing_index + 1;
205                continue;
206            }
207
208            if output.len() + 1 > self.max_output_len {
209                warn!("Maven property output length limit hit resolving {text}");
210                return text.to_string();
211            }
212
213            output.push(bytes[index]);
214            index += 1;
215        }
216
217        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
218    }
219
220    fn parse_placeholder_content<'a>(
221        &self,
222        text: &'a str,
223        start_index: usize,
224    ) -> Option<(&'a str, usize)> {
225        let bytes = text.as_bytes();
226        let mut index = start_index;
227        let mut depth = 0;
228
229        while index < bytes.len() {
230            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
231                depth += 1;
232                index += 2;
233                continue;
234            }
235
236            if bytes[index] == b'}' {
237                if depth == 0 {
238                    return Some((&text[start_index..index], index));
239                }
240                depth -= 1;
241            }
242
243            index += 1;
244        }
245
246        None
247    }
248
249    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
250        let token = format!("{kind}:{key}");
251        if self.warned_keys.insert(token) {
252            warn!("{message}");
253        }
254    }
255}
256
257fn sanitize_template_directives(content: &str) -> Cow<'_, str> {
258    if !content.contains("<%") {
259        return Cow::Borrowed(content);
260    }
261
262    let mut sanitized = String::with_capacity(content.len());
263    let mut remaining = content;
264
265    while let Some(start) = remaining.find("<%") {
266        let (before, after_start) = remaining.split_at(start);
267        sanitized.push_str(before);
268
269        let Some(end) = after_start.find("%>") else {
270            return Cow::Borrowed(content);
271        };
272
273        let directive = &after_start[..end + 2];
274        for ch in directive.chars() {
275            if matches!(ch, '\n' | '\r') {
276                sanitized.push(ch);
277            } else {
278                sanitized.push(' ');
279            }
280        }
281
282        remaining = &after_start[end + 2..];
283    }
284
285    sanitized.push_str(remaining);
286    Cow::Owned(sanitized)
287}
288
289fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
290    if let Some(current) = value.clone() {
291        *value = Some(resolver.resolve_text(&current, 0));
292    }
293}
294
295fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
296    for value in values.iter_mut() {
297        *value = resolver.resolve_text(value, 0);
298    }
299}
300
301fn resolve_map_strings(
302    resolver: &mut PropertyResolver,
303    values: &mut serde_json::Map<String, serde_json::Value>,
304) {
305    for value in values.values_mut() {
306        if let serde_json::Value::String(current) = value {
307            let resolved = resolver.resolve_text(current, 0);
308            *current = resolved;
309        }
310    }
311}
312
313fn resolve_maps(
314    resolver: &mut PropertyResolver,
315    values: &mut [serde_json::Map<String, serde_json::Value>],
316) {
317    for value in values.iter_mut() {
318        resolve_map_strings(resolver, value);
319    }
320}
321
322fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
323    resolve_option(resolver, &mut dependency.group_id);
324    resolve_option(resolver, &mut dependency.artifact_id);
325    resolve_option(resolver, &mut dependency.version);
326    resolve_option(resolver, &mut dependency.classifier);
327    resolve_option(resolver, &mut dependency.type_);
328    resolve_option(resolver, &mut dependency.scope);
329    resolve_option(resolver, &mut dependency.optional);
330    resolve_option(resolver, &mut dependency.system_path);
331    resolve_option(resolver, &mut dependency.message);
332}
333
334fn parse_maven_bool(value: Option<&str>) -> bool {
335    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
336}
337
338fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
339    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
340        Some(
341            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
342            | "tar.gz" | "war" | "xar" | "zip",
343        ) => packaging.map(str::trim),
344        Some(_) => Some("jar"),
345        None => None,
346    }
347}
348
349fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
350    resolve_option(resolver, &mut license.name);
351    resolve_option(resolver, &mut license.url);
352    resolve_option(resolver, &mut license.comments);
353}
354
355fn build_maven_qualifiers(
356    classifier: Option<&str>,
357    packaging: Option<&str>,
358) -> Option<HashMap<String, String>> {
359    let mut qualifiers = HashMap::new();
360
361    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
362        qualifiers.insert("classifier".to_string(), classifier.to_string());
363    }
364
365    if let Some(packaging) = normalize_maven_packaging(packaging)
366        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
367    {
368        qualifiers.insert("type".to_string(), packaging.to_string());
369    }
370
371    (!qualifiers.is_empty()).then_some(qualifiers)
372}
373
374fn build_maven_purl(
375    group_id: &str,
376    artifact_id: &str,
377    version: Option<&str>,
378    classifier: Option<&str>,
379    packaging: Option<&str>,
380) -> String {
381    let mut purl = format!(
382        "pkg:maven/{}/{}",
383        percent_encode_purl_component(group_id),
384        percent_encode_purl_component(artifact_id)
385    );
386
387    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
388        purl.push('@');
389        purl.push_str(&percent_encode_purl_component(version));
390    }
391
392    let qualifiers = build_maven_qualifiers(classifier, packaging);
393    if let Some(qualifiers) = qualifiers {
394        let mut query_parts = Vec::new();
395        if let Some(classifier) = qualifiers.get("classifier") {
396            query_parts.push(format!(
397                "classifier={}",
398                percent_encode_purl_component(classifier)
399            ));
400        }
401        if let Some(type_) = qualifiers.get("type") {
402            query_parts.push(format!("type={}", percent_encode_purl_component(type_)));
403        }
404
405        if !query_parts.is_empty() {
406            purl.push('?');
407            purl.push_str(&query_parts.join("&"));
408        }
409    }
410
411    purl
412}
413
414fn percent_encode_purl_component(value: &str) -> String {
415    let mut encoded = String::with_capacity(value.len());
416
417    for byte in value.bytes() {
418        match byte {
419            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
420                encoded.push(byte as char);
421            }
422            _ => encoded.push_str(&format!("%{byte:02X}")),
423        }
424    }
425
426    encoded
427}
428
429fn build_maven_download_url(
430    group_id: &str,
431    artifact_id: &str,
432    version: &str,
433    classifier: Option<&str>,
434    packaging: Option<&str>,
435) -> String {
436    const BASE_URL: &str = "https://repo1.maven.org/maven2";
437    let group_path = group_id.replace('.', "/");
438    let extension = normalize_maven_packaging(packaging)
439        .filter(|value| *value != "pom")
440        .unwrap_or("jar");
441    let classifier_suffix = classifier
442        .map(str::trim)
443        .filter(|value| !value.is_empty())
444        .map(|value| format!("-{value}"))
445        .unwrap_or_default();
446
447    format!(
448        "{}/{}/{}/{}/{}-{}{}.{}",
449        BASE_URL,
450        group_path,
451        artifact_id,
452        version,
453        artifact_id,
454        version,
455        classifier_suffix,
456        extension
457    )
458}
459
460fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
461    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
462}
463
464fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
465    let rendered_entries: Vec<String> = licenses
466        .iter()
467        .filter_map(|license| {
468            let mut lines = Vec::new();
469
470            if let Some(name) = license
471                .name
472                .as_ref()
473                .filter(|value| !value.trim().is_empty())
474            {
475                lines.push(format!("    name: {name}"));
476            }
477            if let Some(url) = license
478                .url
479                .as_ref()
480                .filter(|value| !value.trim().is_empty())
481            {
482                lines.push(format!("    url: {url}"));
483            }
484            if let Some(comments) = license
485                .comments
486                .as_ref()
487                .filter(|value| !value.trim().is_empty())
488            {
489                lines.push(format!("    comments: {comments}"));
490            }
491
492            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
493        })
494        .collect();
495
496    if rendered_entries.is_empty() {
497        None
498    } else {
499        Some(format!("{}\n", rendered_entries.join("\n")))
500    }
501}
502
503fn is_license_like_comment(comment: &str) -> bool {
504    let lowered = comment.to_ascii_lowercase();
505    [
506        "license",
507        "licensed",
508        "copyright",
509        "spdx",
510        "apache",
511        "mit",
512        "bsd",
513        "gpl",
514        "lgpl",
515        "mozilla public",
516        "eclipse public",
517    ]
518    .iter()
519    .any(|marker| lowered.contains(marker))
520}
521
522fn dependency_extra_data(
523    dependency: &MavenDependencyData,
524) -> Option<HashMap<String, serde_json::Value>> {
525    let mut extra_data = HashMap::new();
526
527    if let Some(classifier) = dependency
528        .classifier
529        .as_ref()
530        .filter(|value| !value.trim().is_empty())
531    {
532        extra_data.insert(
533            "classifier".to_string(),
534            serde_json::Value::String(classifier.clone()),
535        );
536    }
537    if let Some(type_) = dependency
538        .type_
539        .as_ref()
540        .filter(|value| !value.trim().is_empty())
541    {
542        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
543    }
544    if let Some(system_path) = dependency
545        .system_path
546        .as_ref()
547        .filter(|value| !value.trim().is_empty())
548    {
549        extra_data.insert(
550            "system_path".to_string(),
551            serde_json::Value::String(system_path.clone()),
552        );
553    }
554    if let Some(message) = dependency
555        .message
556        .as_ref()
557        .filter(|value| !value.trim().is_empty())
558    {
559        extra_data.insert(
560            "message".to_string(),
561            serde_json::Value::String(message.clone()),
562        );
563    }
564
565    (!extra_data.is_empty()).then_some(extra_data)
566}
567
568fn dependency_management_entry_to_value(
569    dependency: &MavenDependencyData,
570) -> serde_json::Map<String, serde_json::Value> {
571    let mut dep_obj = serde_json::Map::new();
572
573    if let Some(group_id) = dependency.group_id.as_ref() {
574        dep_obj.insert(
575            "groupId".to_string(),
576            serde_json::Value::String(group_id.clone()),
577        );
578    }
579    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
580        dep_obj.insert(
581            "artifactId".to_string(),
582            serde_json::Value::String(artifact_id.clone()),
583        );
584    }
585    if let Some(version) = dependency.version.as_ref() {
586        dep_obj.insert(
587            "version".to_string(),
588            serde_json::Value::String(version.clone()),
589        );
590    }
591    if let Some(scope) = dependency.scope.as_ref() {
592        dep_obj.insert(
593            "scope".to_string(),
594            serde_json::Value::String(scope.clone()),
595        );
596    }
597    if let Some(type_) = dependency.type_.as_ref() {
598        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
599    }
600    if let Some(classifier) = dependency.classifier.as_ref() {
601        dep_obj.insert(
602            "classifier".to_string(),
603            serde_json::Value::String(classifier.clone()),
604        );
605    }
606    if let Some(optional) = dependency.optional.as_deref() {
607        dep_obj.insert(
608            "optional".to_string(),
609            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
610        );
611    }
612    if let Some(message) = dependency.message.as_ref() {
613        dep_obj.insert(
614            "message".to_string(),
615            serde_json::Value::String(message.clone()),
616        );
617    }
618
619    dep_obj
620}
621
622fn maven_dependency_to_dependency(
623    dependency_data: &MavenDependencyData,
624    fallback_scope: Option<&str>,
625    force_non_runtime: bool,
626) -> Option<Dependency> {
627    let group_id = dependency_data.group_id.as_ref()?;
628    let artifact_id = dependency_data.artifact_id.as_ref()?;
629    let version = dependency_data.version.clone();
630    let scope = dependency_data
631        .scope
632        .clone()
633        .or_else(|| fallback_scope.map(str::to_string));
634    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
635
636    let (is_runtime, is_optional) = if force_non_runtime {
637        (Some(false), Some(explicit_optional))
638    } else {
639        match scope.as_deref() {
640            Some("test") | Some("provided") => (Some(false), Some(true)),
641            Some(_) => (Some(true), Some(explicit_optional)),
642            None => (None, Some(explicit_optional)),
643        }
644    };
645
646    Some(Dependency {
647        purl: Some(build_maven_purl(
648            group_id,
649            artifact_id,
650            version.as_deref(),
651            dependency_data.classifier.as_deref(),
652            dependency_data.type_.as_deref(),
653        )),
654        extracted_requirement: version.clone(),
655        scope,
656        is_runtime,
657        is_optional,
658        is_pinned: version.as_deref().map(is_maven_version_pinned),
659        is_direct: Some(true),
660        resolved_package: None,
661        extra_data: dependency_extra_data(dependency_data),
662    })
663}
664
665/// Determines if a Maven version specifier is pinned to an exact version.
666///
667/// A version is considered pinned if it specifies an exact version without
668/// range syntax or dynamic keywords. Examples:
669/// - Pinned: "1.0.0", "1.2.3"
670/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
671fn is_maven_version_pinned(version_str: &str) -> bool {
672    let trimmed = version_str.trim();
673
674    // Empty version is not pinned
675    if trimmed.is_empty() {
676        return false;
677    }
678
679    // Check for range syntax (brackets and parentheses)
680    if trimmed.contains('[')
681        || trimmed.contains(']')
682        || trimmed.contains('(')
683        || trimmed.contains(')')
684    {
685        return false;
686    }
687
688    // Check for dynamic version keywords
689    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
690        return false;
691    }
692
693    // If none of the unpinned indicators are present, it's pinned
694    true
695}
696
697struct MavenBuiltinPropertyInputs<'a> {
698    namespace: &'a Option<String>,
699    name: &'a Option<String>,
700    version: &'a Option<String>,
701    parent_group_id: &'a Option<String>,
702    parent_artifact_id: &'a Option<String>,
703    parent_version: &'a Option<String>,
704    project_name: &'a Option<String>,
705    project_packaging: &'a Option<String>,
706}
707
708fn build_builtin_properties(inputs: MavenBuiltinPropertyInputs<'_>) -> HashMap<String, String> {
709    let mut builtins = HashMap::new();
710    let effective_group_id = inputs
711        .namespace
712        .clone()
713        .or_else(|| inputs.parent_group_id.clone());
714    let effective_version = inputs
715        .version
716        .clone()
717        .or_else(|| inputs.parent_version.clone());
718
719    if let Some(group_id) = effective_group_id.clone() {
720        builtins.insert("project.groupId".to_string(), group_id.clone());
721        builtins.insert("pom.groupId".to_string(), group_id);
722    }
723
724    if let Some(artifact_id) = inputs.name.clone() {
725        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
726        builtins.insert("pom.artifactId".to_string(), artifact_id);
727    }
728
729    if let Some(ver) = effective_version.clone() {
730        builtins.insert("project.version".to_string(), ver.clone());
731        builtins.insert("pom.version".to_string(), ver);
732    }
733
734    if let Some(group_id) = inputs.parent_group_id.clone() {
735        builtins.insert("project.parent.groupId".to_string(), group_id);
736    }
737
738    if let Some(artifact_id) = inputs.parent_artifact_id.clone() {
739        builtins.insert("project.parent.artifactId".to_string(), artifact_id.clone());
740        builtins.insert("pom.parent.artifactId".to_string(), artifact_id.clone());
741        builtins.insert("parent.artifactId".to_string(), artifact_id);
742    }
743
744    if let Some(ver) = inputs.parent_version.clone() {
745        builtins.insert("project.parent.version".to_string(), ver.clone());
746        builtins.insert("pom.parent.version".to_string(), ver.clone());
747        builtins.insert("parent.version".to_string(), ver);
748    }
749
750    if let Some(packaging) = inputs.project_packaging.clone() {
751        builtins.insert("project.packaging".to_string(), packaging);
752    }
753
754    if let Some(name) = inputs.project_name.clone() {
755        builtins.insert("project.name".to_string(), name);
756    }
757
758    builtins
759}
760
761/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
762///
763/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
764/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
765pub struct MavenParser;
766
767impl PackageParser for MavenParser {
768    const PACKAGE_TYPE: PackageType = PackageType::Maven;
769
770    fn extract_packages(path: &Path) -> Vec<PackageData> {
771        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
772            if filename == "pom.properties" {
773                return vec![parse_pom_properties(path)];
774            } else if filename == "MANIFEST.MF" {
775                return vec![parse_manifest_mf(path)];
776            }
777        }
778
779        let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
780            Ok(content) => content,
781            Err(e) => {
782                warn!("Failed to open pom.xml at {:?}: {}", path, e);
783                return vec![default_package_data(DatasourceId::MavenPom)];
784            }
785        };
786
787        let sanitized_content = sanitize_template_directives(&content);
788        let mut reader = Reader::from_str(sanitized_content.as_ref());
789        reader.config_mut().trim_text(true);
790
791        let mut buf = Vec::new();
792        let mut package_data = default_package_data(DatasourceId::MavenPom);
793        package_data.package_type = Some(Self::PACKAGE_TYPE);
794        package_data.primary_language = Some("Java".to_string());
795        package_data.datasource_id = Some(DatasourceId::MavenPom);
796
797        let mut current_element = Vec::new();
798        let mut in_dependencies = false;
799        let mut current_dependency: Option<Dependency> = None;
800        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
801        let mut current_dependency_data: Option<MavenDependencyData> = None;
802
803        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
804        let mut xml_license_comments: Vec<String> = Vec::new();
805        let mut current_license: Option<MavenLicenseEntry> = None;
806        let mut inception_year = None;
807        let mut scm_connection = None;
808        let mut scm_developer_connection = None;
809        let mut scm_url = None;
810        let mut scm_tag = None;
811        let mut organization_name = None;
812        let mut organization_url = None;
813        let mut in_developers = false;
814        let mut in_contributors = false;
815        let mut current_party: Option<Party> = None;
816        let mut issue_management_system = None;
817        let mut issue_management_url = None;
818        let mut ci_management_system = None;
819        let mut ci_management_url = None;
820        let mut in_distribution_management = false;
821        let mut in_dist_repository = false;
822        let mut in_dist_snapshot_repository = false;
823        let mut in_dist_site = false;
824        let mut dist_download_url = None;
825        let mut dist_repository_id = None;
826        let mut dist_repository_name = None;
827        let mut dist_repository_url = None;
828        let mut dist_repository_layout = None;
829        let mut dist_snapshot_repository_id = None;
830        let mut dist_snapshot_repository_name = None;
831        let mut dist_snapshot_repository_url = None;
832        let mut dist_snapshot_repository_layout = None;
833        let mut dist_site_id = None;
834        let mut dist_site_name = None;
835        let mut dist_site_url = None;
836        let mut in_repositories = false;
837        let mut in_plugin_repositories = false;
838        let mut in_repository = false;
839        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
840        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
841        let mut current_repository_id = None;
842        let mut current_repository_name = None;
843        let mut current_repository_url = None;
844        let mut in_modules = false;
845        let mut modules: Vec<String> = Vec::new();
846        let mut in_mailing_lists = false;
847        let mut in_mailing_list = false;
848        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
849        let mut current_mailing_list_name = None;
850        let mut current_mailing_list_subscribe = None;
851        let mut current_mailing_list_unsubscribe = None;
852        let mut current_mailing_list_post = None;
853        let mut current_mailing_list_archive = None;
854        let mut in_dependency_management = false;
855        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
856        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
857        let mut in_dep_mgmt_dependency = false;
858        let mut in_parent = false;
859        let mut parent_group_id = None;
860        let mut parent_artifact_id = None;
861        let mut parent_version = None;
862        let mut parent_relative_path = None;
863        let mut in_properties = false;
864        let mut properties: HashMap<String, String> = HashMap::new();
865        let mut project_name = None;
866        let mut project_description = None;
867        let mut project_packaging = None;
868        let mut project_classifier = None;
869        let mut in_relocation = false;
870        let mut relocation = MavenDependencyData::default();
871
872        let mut iteration_count: usize = 0;
873        loop {
874            iteration_count += 1;
875            if iteration_count > MAX_ITERATION_COUNT {
876                warn!(
877                    "Exceeded MAX_ITERATION_COUNT ({}) parsing pom.xml at {:?}; stopping early",
878                    MAX_ITERATION_COUNT, path
879                );
880                break;
881            }
882            match reader.read_event_into(&mut buf) {
883                Ok(Event::Start(e)) => {
884                    let element_name = e.name().as_ref().to_vec();
885                    current_element.push(element_name.clone());
886
887                    match element_name.as_slice() {
888                        b"parent" => in_parent = true,
889                        b"dependencyManagement" => in_dependency_management = true,
890                        b"dependencies" if in_dependency_management => {}
891                        b"dependencies" => in_dependencies = true,
892                        b"dependency" if in_dependency_management => {
893                            in_dep_mgmt_dependency = true;
894                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
895                        }
896                        b"dependency" if in_dependencies => {
897                            current_dependency = Some(Dependency {
898                                purl: None,
899                                extracted_requirement: None,
900                                scope: None,
901                                is_runtime: None,
902                                is_optional: Some(false),
903                                is_pinned: None,
904                                is_direct: Some(true),
905                                resolved_package: None,
906                                extra_data: None,
907                            });
908                            current_dependency_data = Some(MavenDependencyData::default());
909                        }
910                        b"properties" => in_properties = true,
911                        b"developers" => in_developers = true,
912                        b"developer" if in_developers => {
913                            current_party = Some(Party {
914                                r#type: Some("person".to_string()),
915                                role: Some("developer".to_string()),
916                                name: None,
917                                email: None,
918                                url: None,
919                                organization: None,
920                                organization_url: None,
921                                timezone: None,
922                            });
923                        }
924                        b"contributors" => in_contributors = true,
925                        b"contributor" if in_contributors => {
926                            current_party = Some(Party {
927                                r#type: Some("person".to_string()),
928                                role: Some("contributor".to_string()),
929                                name: None,
930                                email: None,
931                                url: None,
932                                organization: None,
933                                organization_url: None,
934                                timezone: None,
935                            });
936                        }
937                        b"license" => current_license = Some(MavenLicenseEntry::default()),
938                        b"distributionManagement" => in_distribution_management = true,
939                        b"relocation" if in_distribution_management => {
940                            in_relocation = true;
941                            relocation = MavenDependencyData::default();
942                        }
943                        b"repository" if in_distribution_management => in_dist_repository = true,
944                        b"snapshotRepository" if in_distribution_management => {
945                            in_dist_snapshot_repository = true
946                        }
947                        b"site" if in_distribution_management => in_dist_site = true,
948                        b"repositories" => in_repositories = true,
949                        b"pluginRepositories" => in_plugin_repositories = true,
950                        b"repository" if in_repositories && !in_distribution_management => {
951                            in_repository = true;
952                            current_repository_id = None;
953                            current_repository_name = None;
954                            current_repository_url = None;
955                        }
956                        b"pluginRepository" if in_plugin_repositories => {
957                            in_repository = true;
958                            current_repository_id = None;
959                            current_repository_name = None;
960                            current_repository_url = None;
961                        }
962                        b"modules" => in_modules = true,
963                        b"mailingLists" => in_mailing_lists = true,
964                        b"mailingList" if in_mailing_lists => {
965                            in_mailing_list = true;
966                            current_mailing_list_name = None;
967                            current_mailing_list_subscribe = None;
968                            current_mailing_list_unsubscribe = None;
969                            current_mailing_list_post = None;
970                            current_mailing_list_archive = None;
971                        }
972                        _ => {}
973                    }
974                }
975                Ok(Event::Text(e)) => {
976                    let text = match e.decode() {
977                        Ok(Cow::Borrowed(s)) => s.to_string(),
978                        Ok(Cow::Owned(s)) => s,
979                        Err(_) => {
980                            warn!(
981                                "Invalid UTF-8 in XML text content in {:?}; using lossy conversion",
982                                path
983                            );
984                            String::from_utf8_lossy(e.as_ref()).into_owned()
985                        }
986                    };
987                    let current_path = current_element.last().map(|v| v.as_slice());
988                    let current_parent = current_element
989                        .len()
990                        .checked_sub(2)
991                        .map(|index| current_element[index].as_slice());
992
993                    if in_properties
994                        && current_element.len() >= 2
995                        && current_element[current_element.len() - 2] == b"properties"
996                    {
997                        if let Some(property_name) = current_element
998                            .last()
999                            .and_then(|name| std::str::from_utf8(name).ok())
1000                        {
1001                            properties.insert(property_name.to_string(), truncate_field(text));
1002                        } else {
1003                            warn!("Failed to decode Maven property name in {:?}", path);
1004                        }
1005                    } else if in_dep_mgmt_dependency {
1006                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
1007                            match current_path {
1008                                Some(b"groupId") if current_parent == Some(b"dependency") => {
1009                                    dep_mgmt.group_id = Some(text)
1010                                }
1011                                Some(b"artifactId") if current_parent == Some(b"dependency") => {
1012                                    dep_mgmt.artifact_id = Some(text)
1013                                }
1014                                Some(b"version") if current_parent == Some(b"dependency") => {
1015                                    dep_mgmt.version = Some(text)
1016                                }
1017                                Some(b"scope") if current_parent == Some(b"dependency") => {
1018                                    dep_mgmt.scope = Some(text)
1019                                }
1020                                Some(b"type") if current_parent == Some(b"dependency") => {
1021                                    dep_mgmt.type_ = Some(text)
1022                                }
1023                                Some(b"classifier") if current_parent == Some(b"dependency") => {
1024                                    dep_mgmt.classifier = Some(text)
1025                                }
1026                                Some(b"optional") if current_parent == Some(b"dependency") => {
1027                                    dep_mgmt.optional = Some(text)
1028                                }
1029                                _ => {}
1030                            }
1031                        }
1032                    } else if let Some(license) = &mut current_license {
1033                        match current_path {
1034                            Some(b"name") => license.name = Some(text),
1035                            Some(b"url") => license.url = Some(text),
1036                            Some(b"comments") => license.comments = Some(text),
1037                            _ => {}
1038                        }
1039                    } else if let Some(party) = &mut current_party {
1040                        match current_path {
1041                            Some(b"name") => party.name = Some(text),
1042                            Some(b"email") => party.email = Some(text),
1043                            Some(b"url") => party.url = Some(text),
1044                            Some(b"organization") => party.organization = Some(text),
1045                            Some(b"organizationUrl") => party.organization_url = Some(text),
1046                            Some(b"timezone") => party.timezone = Some(text),
1047                            _ => {}
1048                        }
1049                    } else if let Some(dep) = &mut current_dependency {
1050                        match current_path {
1051                            Some(b"groupId") => {
1052                                if current_parent == Some(b"dependency")
1053                                    && let Some(coords) = current_dependency_data.as_mut()
1054                                {
1055                                    coords.group_id = Some(text);
1056                                }
1057                            }
1058                            Some(b"artifactId") => {
1059                                if current_parent == Some(b"dependency")
1060                                    && let Some(coords) = current_dependency_data.as_mut()
1061                                {
1062                                    coords.artifact_id = Some(text);
1063                                }
1064                            }
1065                            Some(b"version") => {
1066                                if current_parent == Some(b"dependency")
1067                                    && let Some(coords) = current_dependency_data.as_mut()
1068                                {
1069                                    coords.version = Some(text);
1070                                }
1071                            }
1072                            Some(b"scope") => {
1073                                if current_parent == Some(b"dependency") {
1074                                    dep.scope = Some(text.clone());
1075                                    dep.is_optional = Some(text == "test" || text == "provided");
1076                                    dep.is_runtime = Some(text != "test" && text != "provided");
1077                                }
1078                                if current_parent == Some(b"dependency")
1079                                    && let Some(coords) = current_dependency_data.as_mut()
1080                                {
1081                                    coords.scope = Some(text);
1082                                }
1083                            }
1084                            Some(b"optional") => {
1085                                if current_parent == Some(b"dependency")
1086                                    && let Some(coords) = current_dependency_data.as_mut()
1087                                {
1088                                    coords.optional = Some(text);
1089                                }
1090                            }
1091                            Some(b"type") => {
1092                                if current_parent == Some(b"dependency")
1093                                    && let Some(coords) = current_dependency_data.as_mut()
1094                                {
1095                                    coords.type_ = Some(text);
1096                                }
1097                            }
1098                            Some(b"classifier") => {
1099                                if current_parent == Some(b"dependency")
1100                                    && let Some(coords) = current_dependency_data.as_mut()
1101                                {
1102                                    coords.classifier = Some(text);
1103                                }
1104                            }
1105                            Some(b"systemPath") => {
1106                                if current_parent == Some(b"dependency")
1107                                    && let Some(coords) = current_dependency_data.as_mut()
1108                                {
1109                                    coords.system_path = Some(text);
1110                                }
1111                            }
1112                            _ => {}
1113                        }
1114                    } else if in_relocation {
1115                        match current_path {
1116                            Some(b"groupId") => relocation.group_id = Some(text),
1117                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1118                            Some(b"version") => relocation.version = Some(text),
1119                            Some(b"classifier") => relocation.classifier = Some(text),
1120                            Some(b"type") => relocation.type_ = Some(text),
1121                            Some(b"message") => relocation.message = Some(text),
1122                            _ => {}
1123                        }
1124                    } else if in_parent {
1125                        match current_path {
1126                            Some(b"groupId") => {
1127                                parent_group_id = Some(text);
1128                            }
1129                            Some(b"artifactId") => {
1130                                parent_artifact_id = Some(text);
1131                            }
1132                            Some(b"version") => {
1133                                parent_version = Some(text);
1134                            }
1135                            Some(b"relativePath") => {
1136                                parent_relative_path = Some(text);
1137                            }
1138                            _ => {}
1139                        }
1140                    } else {
1141                        match current_path {
1142                            Some(b"groupId") if current_element.len() == 2 => {
1143                                package_data.namespace = Some(text)
1144                            }
1145                            Some(b"artifactId") if current_element.len() == 2 => {
1146                                package_data.name = Some(text)
1147                            }
1148                            Some(b"version") if current_element.len() == 2 => {
1149                                package_data.version = Some(text)
1150                            }
1151                            Some(b"name") if current_element.len() == 2 => {
1152                                project_name = Some(text)
1153                            }
1154                            Some(b"description") if current_element.len() == 2 => {
1155                                project_description = Some(text)
1156                            }
1157                            Some(b"packaging") if current_element.len() == 2 => {
1158                                project_packaging = Some(text)
1159                            }
1160                            Some(b"classifier") if current_element.len() == 2 => {
1161                                project_classifier = Some(text)
1162                            }
1163                            Some(b"url") if current_element.len() == 2 => {
1164                                package_data.homepage_url = Some(text)
1165                            }
1166                            Some(b"inceptionYear") if current_element.len() == 2 => {
1167                                inception_year = Some(text)
1168                            }
1169                            Some(b"connection")
1170                                if current_element.len() >= 3
1171                                    && current_element[current_element.len() - 2] == b"scm" =>
1172                            {
1173                                scm_connection = if text.starts_with("scm:git:") {
1174                                    Some(text.replacen("scm:git:", "git+", 1))
1175                                } else if text.starts_with("scm:") {
1176                                    Some(text.replacen("scm:", "", 1))
1177                                } else {
1178                                    Some(text)
1179                                };
1180                            }
1181                            Some(b"developerConnection")
1182                                if current_element.len() >= 3
1183                                    && current_element[current_element.len() - 2] == b"scm" =>
1184                            {
1185                                scm_developer_connection = if text.starts_with("scm:git:") {
1186                                    Some(text.replacen("scm:git:", "git+", 1))
1187                                } else if text.starts_with("scm:") {
1188                                    Some(text.replacen("scm:", "", 1))
1189                                } else {
1190                                    Some(text)
1191                                };
1192                            }
1193                            Some(b"url")
1194                                if current_element.len() >= 3
1195                                    && current_element[current_element.len() - 2] == b"scm" =>
1196                            {
1197                                scm_url = Some(text);
1198                            }
1199                            Some(b"tag")
1200                                if current_element.len() >= 3
1201                                    && current_element[current_element.len() - 2] == b"scm" =>
1202                            {
1203                                scm_tag = Some(text);
1204                            }
1205                            Some(b"name")
1206                                if current_element.len() >= 2
1207                                    && current_element[current_element.len() - 2]
1208                                        == b"organization" =>
1209                            {
1210                                organization_name = Some(text);
1211                            }
1212                            Some(b"url")
1213                                if current_element.len() >= 2
1214                                    && current_element[current_element.len() - 2]
1215                                        == b"organization" =>
1216                            {
1217                                organization_url = Some(text);
1218                            }
1219                            Some(b"system")
1220                                if current_element.len() >= 2
1221                                    && current_element[current_element.len() - 2]
1222                                        == b"issueManagement" =>
1223                            {
1224                                issue_management_system = Some(text);
1225                            }
1226                            Some(b"url")
1227                                if current_element.len() >= 2
1228                                    && current_element[current_element.len() - 2]
1229                                        == b"issueManagement" =>
1230                            {
1231                                issue_management_url = Some(text);
1232                            }
1233                            Some(b"system")
1234                                if current_element.len() >= 2
1235                                    && current_element[current_element.len() - 2]
1236                                        == b"ciManagement" =>
1237                            {
1238                                ci_management_system = Some(text);
1239                            }
1240                            Some(b"url")
1241                                if current_element.len() >= 2
1242                                    && current_element[current_element.len() - 2]
1243                                        == b"ciManagement" =>
1244                            {
1245                                ci_management_url = Some(text);
1246                            }
1247                            Some(b"downloadUrl")
1248                                if current_element.len() >= 2
1249                                    && current_element[current_element.len() - 2]
1250                                        == b"distributionManagement" =>
1251                            {
1252                                dist_download_url = Some(text);
1253                            }
1254                            Some(b"id") if in_dist_repository => {
1255                                dist_repository_id = Some(text);
1256                            }
1257                            Some(b"name") if in_dist_repository => {
1258                                dist_repository_name = Some(text);
1259                            }
1260                            Some(b"url") if in_dist_repository => {
1261                                dist_repository_url = Some(text);
1262                            }
1263                            Some(b"layout") if in_dist_repository => {
1264                                dist_repository_layout = Some(text);
1265                            }
1266                            Some(b"id") if in_dist_snapshot_repository => {
1267                                dist_snapshot_repository_id = Some(text);
1268                            }
1269                            Some(b"name") if in_dist_snapshot_repository => {
1270                                dist_snapshot_repository_name = Some(text);
1271                            }
1272                            Some(b"url") if in_dist_snapshot_repository => {
1273                                dist_snapshot_repository_url = Some(text);
1274                            }
1275                            Some(b"layout") if in_dist_snapshot_repository => {
1276                                dist_snapshot_repository_layout = Some(text);
1277                            }
1278                            Some(b"id") if in_dist_site => {
1279                                dist_site_id = Some(text);
1280                            }
1281                            Some(b"name") if in_dist_site => {
1282                                dist_site_name = Some(text);
1283                            }
1284                            Some(b"url") if in_dist_site => {
1285                                dist_site_url = Some(text);
1286                            }
1287                            Some(b"id") if in_repository => {
1288                                current_repository_id = Some(text);
1289                            }
1290                            Some(b"name") if in_repository => {
1291                                current_repository_name = Some(text);
1292                            }
1293                            Some(b"url") if in_repository => {
1294                                current_repository_url = Some(text);
1295                            }
1296                            Some(b"module") if in_modules => {
1297                                modules.push(text);
1298                            }
1299                            Some(b"name") if in_mailing_list => {
1300                                current_mailing_list_name = Some(text);
1301                            }
1302                            Some(b"subscribe") if in_mailing_list => {
1303                                current_mailing_list_subscribe = Some(text);
1304                            }
1305                            Some(b"unsubscribe") if in_mailing_list => {
1306                                current_mailing_list_unsubscribe = Some(text);
1307                            }
1308                            Some(b"post") if in_mailing_list => {
1309                                current_mailing_list_post = Some(text);
1310                            }
1311                            Some(b"archive") if in_mailing_list => {
1312                                current_mailing_list_archive = Some(text);
1313                            }
1314                            _ => {}
1315                        }
1316                    }
1317                }
1318                Ok(Event::Comment(e)) => {
1319                    let comment = match e.decode() {
1320                        Ok(Cow::Borrowed(s)) => s.trim().to_string(),
1321                        Ok(Cow::Owned(s)) => s.trim().to_string(),
1322                        Err(_) => {
1323                            warn!(
1324                                "Invalid UTF-8 in XML comment in {:?}; using lossy conversion",
1325                                path
1326                            );
1327                            String::from_utf8_lossy(e.as_ref())
1328                                .into_owned()
1329                                .trim()
1330                                .to_string()
1331                        }
1332                    };
1333                    if current_element.is_empty()
1334                        && !comment.is_empty()
1335                        && is_license_like_comment(&comment)
1336                    {
1337                        xml_license_comments.push(comment);
1338                    }
1339                }
1340                Ok(Event::End(e)) => {
1341                    if !current_element.is_empty() {
1342                        current_element.pop();
1343                    }
1344
1345                    match e.name().as_ref() {
1346                        b"parent" => in_parent = false,
1347                        b"dependencyManagement" => in_dependency_management = false,
1348                        b"dependencies" => in_dependencies = false,
1349                        b"dependency" if in_dep_mgmt_dependency => {
1350                            in_dep_mgmt_dependency = false;
1351                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1352                                && (dep_mgmt.group_id.is_some()
1353                                    || dep_mgmt.artifact_id.is_some()
1354                                    || dep_mgmt.version.is_some())
1355                            {
1356                                dependency_management_entries.push(dep_mgmt);
1357                            }
1358                        }
1359                        b"dependency" => {
1360                            if let (Some(dep), Some(coords)) =
1361                                (current_dependency.take(), current_dependency_data.take())
1362                            {
1363                                package_data.dependencies.push(dep);
1364                                dependency_data.push(coords);
1365                            } else if let Some(dep) = current_dependency.take() {
1366                                package_data.dependencies.push(dep);
1367                            }
1368                        }
1369                        b"license" => {
1370                            if let Some(license) = current_license.take()
1371                                && (license.name.is_some()
1372                                    || license.url.is_some()
1373                                    || license.comments.is_some())
1374                            {
1375                                licenses.push(license);
1376                            }
1377                        }
1378                        b"developers" => in_developers = false,
1379                        b"developer" => {
1380                            if let Some(party) = current_party.take() {
1381                                package_data.parties.push(party);
1382                            }
1383                        }
1384                        b"contributors" => in_contributors = false,
1385                        b"contributor" => {
1386                            if let Some(party) = current_party.take() {
1387                                package_data.parties.push(party);
1388                            }
1389                        }
1390                        b"distributionManagement" => in_distribution_management = false,
1391                        b"relocation" => in_relocation = false,
1392                        b"repository" if !in_dependencies && in_distribution_management => {
1393                            in_dist_repository = false
1394                        }
1395                        b"repository" if !in_dependencies && in_repositories => {
1396                            in_repository = false;
1397                            if current_repository_id.is_some()
1398                                || current_repository_name.is_some()
1399                                || current_repository_url.is_some()
1400                            {
1401                                let mut repo = serde_json::Map::new();
1402                                if let Some(id) = current_repository_id.take() {
1403                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1404                                }
1405                                if let Some(name) = current_repository_name.take() {
1406                                    repo.insert(
1407                                        "name".to_string(),
1408                                        serde_json::Value::String(name),
1409                                    );
1410                                }
1411                                if let Some(url) = current_repository_url.take() {
1412                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1413                                }
1414                                repositories.push(repo);
1415                            }
1416                        }
1417                        b"pluginRepository" if in_plugin_repositories => {
1418                            in_repository = false;
1419                            if current_repository_id.is_some()
1420                                || current_repository_name.is_some()
1421                                || current_repository_url.is_some()
1422                            {
1423                                let mut repo = serde_json::Map::new();
1424                                if let Some(id) = current_repository_id.take() {
1425                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1426                                }
1427                                if let Some(name) = current_repository_name.take() {
1428                                    repo.insert(
1429                                        "name".to_string(),
1430                                        serde_json::Value::String(name),
1431                                    );
1432                                }
1433                                if let Some(url) = current_repository_url.take() {
1434                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1435                                }
1436                                plugin_repositories.push(repo);
1437                            }
1438                        }
1439                        b"repositories" => in_repositories = false,
1440                        b"properties" => in_properties = false,
1441                        b"pluginRepositories" => in_plugin_repositories = false,
1442                        b"modules" => in_modules = false,
1443                        b"mailingLists" => in_mailing_lists = false,
1444                        b"mailingList" => {
1445                            in_mailing_list = false;
1446                            if current_mailing_list_name.is_some()
1447                                || current_mailing_list_subscribe.is_some()
1448                                || current_mailing_list_unsubscribe.is_some()
1449                                || current_mailing_list_post.is_some()
1450                                || current_mailing_list_archive.is_some()
1451                            {
1452                                let mut ml = serde_json::Map::new();
1453                                if let Some(name) = current_mailing_list_name.take() {
1454                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1455                                }
1456                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1457                                    ml.insert(
1458                                        "subscribe".to_string(),
1459                                        serde_json::Value::String(subscribe),
1460                                    );
1461                                }
1462                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1463                                    ml.insert(
1464                                        "unsubscribe".to_string(),
1465                                        serde_json::Value::String(unsubscribe),
1466                                    );
1467                                }
1468                                if let Some(post) = current_mailing_list_post.take() {
1469                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1470                                }
1471                                if let Some(archive) = current_mailing_list_archive.take() {
1472                                    ml.insert(
1473                                        "archive".to_string(),
1474                                        serde_json::Value::String(archive),
1475                                    );
1476                                }
1477                                mailing_lists.push(ml);
1478                            }
1479                        }
1480                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1481                        b"site" => in_dist_site = false,
1482                        _ => {}
1483                    }
1484                }
1485                Ok(Event::Eof) => break,
1486                Err(e) => {
1487                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1488                    return vec![package_data];
1489                }
1490                _ => {}
1491            }
1492            buf.clear();
1493        }
1494
1495        let builtins = build_builtin_properties(MavenBuiltinPropertyInputs {
1496            namespace: &package_data.namespace,
1497            name: &package_data.name,
1498            version: &package_data.version,
1499            parent_group_id: &parent_group_id,
1500            parent_artifact_id: &parent_artifact_id,
1501            parent_version: &parent_version,
1502            project_name: &project_name,
1503            project_packaging: &project_packaging,
1504        });
1505        let mut resolver = PropertyResolver::new(properties, builtins);
1506
1507        resolve_option(&mut resolver, &mut package_data.namespace);
1508        resolve_option(&mut resolver, &mut package_data.name);
1509        resolve_option(&mut resolver, &mut package_data.version);
1510        resolve_option(&mut resolver, &mut package_data.homepage_url);
1511        resolve_option(&mut resolver, &mut inception_year);
1512        resolve_option(&mut resolver, &mut scm_connection);
1513        resolve_option(&mut resolver, &mut scm_developer_connection);
1514        resolve_option(&mut resolver, &mut scm_url);
1515        resolve_option(&mut resolver, &mut scm_tag);
1516        resolve_option(&mut resolver, &mut organization_name);
1517        resolve_option(&mut resolver, &mut organization_url);
1518        resolve_option(&mut resolver, &mut issue_management_system);
1519        resolve_option(&mut resolver, &mut issue_management_url);
1520        resolve_option(&mut resolver, &mut ci_management_system);
1521        resolve_option(&mut resolver, &mut ci_management_url);
1522        resolve_option(&mut resolver, &mut dist_download_url);
1523        resolve_option(&mut resolver, &mut dist_repository_id);
1524        resolve_option(&mut resolver, &mut dist_repository_name);
1525        resolve_option(&mut resolver, &mut dist_repository_url);
1526        resolve_option(&mut resolver, &mut dist_repository_layout);
1527        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1528        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1529        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1530        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1531        resolve_option(&mut resolver, &mut dist_site_id);
1532        resolve_option(&mut resolver, &mut dist_site_name);
1533        resolve_option(&mut resolver, &mut dist_site_url);
1534        resolve_option(&mut resolver, &mut parent_group_id);
1535        resolve_option(&mut resolver, &mut parent_artifact_id);
1536        resolve_option(&mut resolver, &mut parent_version);
1537        resolve_option(&mut resolver, &mut parent_relative_path);
1538        resolve_option(&mut resolver, &mut project_name);
1539        resolve_option(&mut resolver, &mut project_description);
1540        resolve_option(&mut resolver, &mut project_packaging);
1541        resolve_option(&mut resolver, &mut project_classifier);
1542        resolve_vec(&mut resolver, &mut modules);
1543        resolve_maps(&mut resolver, &mut repositories);
1544        resolve_maps(&mut resolver, &mut plugin_repositories);
1545        resolve_maps(&mut resolver, &mut mailing_lists);
1546        for comment in &mut xml_license_comments {
1547            *comment = resolver.resolve_text(comment, 0);
1548        }
1549        for dependency in &mut dependency_management_entries {
1550            resolve_dependency_data(&mut resolver, dependency);
1551        }
1552        resolve_dependency_data(&mut resolver, &mut relocation);
1553        for license in &mut licenses {
1554            resolve_license_entry(&mut resolver, license);
1555        }
1556        for comment in xml_license_comments {
1557            if !comment.trim().is_empty() {
1558                licenses.push(MavenLicenseEntry {
1559                    comments: Some(comment),
1560                    ..Default::default()
1561                });
1562            }
1563        }
1564
1565        for (dependency, coords) in package_data
1566            .dependencies
1567            .iter_mut()
1568            .zip(dependency_data.iter_mut())
1569        {
1570            resolve_dependency_data(&mut resolver, coords);
1571            dependency.scope = coords.scope.clone();
1572            dependency.extracted_requirement = coords.version.clone();
1573            dependency.extra_data = dependency_extra_data(coords);
1574            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1575
1576            match dependency.scope.as_deref() {
1577                Some("test") | Some("provided") => {
1578                    dependency.is_runtime = Some(false);
1579                    dependency.is_optional = Some(true);
1580                }
1581                Some(_) => {
1582                    dependency.is_runtime = Some(true);
1583                }
1584                None => {
1585                    dependency.is_runtime = None;
1586                }
1587            }
1588
1589            if let Some(version) = &coords.version {
1590                dependency.is_pinned = Some(is_maven_version_pinned(version));
1591            }
1592
1593            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1594                dependency.purl = Some(build_maven_purl(
1595                    group_id,
1596                    artifact_id,
1597                    coords.version.as_deref(),
1598                    coords.classifier.as_deref(),
1599                    coords.type_.as_deref(),
1600                ));
1601            }
1602        }
1603
1604        if package_data.namespace.is_none() {
1605            package_data.namespace = parent_group_id.clone();
1606        }
1607        if package_data.version.is_none() {
1608            package_data.version = parent_version.clone();
1609        }
1610
1611        package_data.qualifiers =
1612            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1613
1614        package_data.description = match (
1615            project_name.as_deref().filter(|value| !value.is_empty()),
1616            project_description
1617                .as_deref()
1618                .filter(|value| !value.is_empty()),
1619        ) {
1620            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1621            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1622            (Some(name), None) => Some(name.to_string()),
1623            (None, Some(description)) => Some(description.to_string()),
1624            (None, None) => None,
1625        };
1626
1627        if path.to_string_lossy().contains("META-INF/maven/") {
1628            let path_str = path.to_string_lossy();
1629            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1630                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1631                let parts: Vec<&str> = after_maven.split('/').collect();
1632                if parts.len() >= 2 {
1633                    if package_data.namespace.is_none() {
1634                        package_data.namespace = Some(parts[0].to_string());
1635                    }
1636                    if package_data.name.is_none() {
1637                        package_data.name = Some(parts[1].to_string());
1638                    }
1639                }
1640            }
1641        }
1642
1643        // Construct PURL from parsed data
1644        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1645            &package_data.namespace,
1646            &package_data.name,
1647            &package_data.version,
1648        ) {
1649            package_data.purl = Some(build_maven_purl(
1650                group_id,
1651                artifact_id,
1652                Some(version),
1653                project_classifier.as_deref(),
1654                project_packaging.as_deref(),
1655            ));
1656            if project_classifier.is_none() {
1657                package_data
1658                    .source_packages
1659                    .push(build_maven_source_package(group_id, artifact_id, version));
1660            }
1661        }
1662
1663        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1664            package_data.repository_homepage_url = build_maven_url(
1665                &package_data.namespace,
1666                &package_data.name,
1667                &package_data.version,
1668                None,
1669            );
1670
1671            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1672                build_maven_download_url(
1673                    group_id,
1674                    artifact_id,
1675                    ver,
1676                    project_classifier.as_deref(),
1677                    project_packaging.as_deref(),
1678                )
1679            });
1680
1681            if let Some(ver) = &package_data.version {
1682                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1683                package_data.api_data_url = build_maven_url(
1684                    &package_data.namespace,
1685                    &package_data.name,
1686                    &package_data.version,
1687                    Some(&pom_filename),
1688                );
1689            }
1690        }
1691
1692        package_data.vcs_url = scm_connection
1693            .or_else(|| scm_developer_connection.clone())
1694            .or_else(|| scm_url.clone());
1695
1696        // Set code_view_url from scm/url (human-browseable URL)
1697        if let Some(url) = &scm_url {
1698            package_data.code_view_url = Some(url.clone());
1699        }
1700
1701        // Set bug_tracking_url from issueManagement/url
1702        if let Some(url) = &issue_management_url {
1703            package_data.bug_tracking_url = Some(url.clone());
1704        }
1705
1706        // Map downloadUrl to download_url field
1707        if let Some(url) = &dist_download_url {
1708            package_data.download_url = Some(url.clone());
1709        }
1710
1711        if organization_name.is_some() || organization_url.is_some() {
1712            package_data.parties.push(Party {
1713                r#type: Some("organization".to_string()),
1714                role: Some("owner".to_string()),
1715                name: organization_name.clone(),
1716                email: None,
1717                url: organization_url.clone(),
1718                organization: None,
1719                organization_url: None,
1720                timezone: None,
1721            });
1722        }
1723
1724        for dependency in &dependency_management_entries {
1725            let fallback_scope = if dependency.scope.as_deref() == Some("import") {
1726                Some("import")
1727            } else {
1728                Some("dependencymanagement")
1729            };
1730
1731            if let Some(converted) =
1732                maven_dependency_to_dependency(dependency, fallback_scope, true)
1733            {
1734                package_data.dependencies.push(converted);
1735            }
1736        }
1737
1738        if (relocation.group_id.is_some()
1739            || relocation.artifact_id.is_some()
1740            || relocation.version.is_some())
1741            && let Some(converted) =
1742                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1743        {
1744            package_data.dependencies.push(converted);
1745        }
1746
1747        if inception_year.is_some()
1748            || organization_name.is_some()
1749            || organization_url.is_some()
1750            || scm_tag.is_some()
1751            || scm_developer_connection.is_some()
1752            || issue_management_system.is_some()
1753            || ci_management_system.is_some()
1754            || ci_management_url.is_some()
1755            || dist_download_url.is_some()
1756            || dist_repository_id.is_some()
1757            || dist_snapshot_repository_id.is_some()
1758            || dist_site_id.is_some()
1759            || !repositories.is_empty()
1760            || !plugin_repositories.is_empty()
1761            || !modules.is_empty()
1762            || !mailing_lists.is_empty()
1763            || !dependency_management_entries.is_empty()
1764            || parent_group_id.is_some()
1765            || relocation.group_id.is_some()
1766            || relocation.artifact_id.is_some()
1767            || relocation.version.is_some()
1768            || relocation.message.is_some()
1769        {
1770            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1771            if let Some(year) = inception_year {
1772                extra_data.insert(
1773                    "inception_year".to_string(),
1774                    serde_json::Value::String(year),
1775                );
1776            }
1777            if let Some(name) = organization_name {
1778                extra_data.insert(
1779                    "organization_name".to_string(),
1780                    serde_json::Value::String(name),
1781                );
1782            }
1783            if let Some(url) = organization_url {
1784                extra_data.insert(
1785                    "organization_url".to_string(),
1786                    serde_json::Value::String(url),
1787                );
1788            }
1789            if let Some(tag) = scm_tag {
1790                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1791            }
1792            if let Some(dev_conn) = scm_developer_connection {
1793                extra_data.insert(
1794                    "scm_developer_connection".to_string(),
1795                    serde_json::Value::String(dev_conn),
1796                );
1797            }
1798            if let Some(system) = issue_management_system {
1799                extra_data.insert(
1800                    "issue_tracking_system".to_string(),
1801                    serde_json::Value::String(system),
1802                );
1803            }
1804            if let Some(system) = ci_management_system {
1805                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1806            }
1807            if let Some(url) = ci_management_url {
1808                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1809            }
1810
1811            // Add distribution management data
1812            if let Some(url) = dist_download_url {
1813                extra_data.insert(
1814                    "distribution_download_url".to_string(),
1815                    serde_json::Value::String(url),
1816                );
1817            }
1818
1819            // Build repository object
1820            if dist_repository_id.is_some()
1821                || dist_repository_name.is_some()
1822                || dist_repository_url.is_some()
1823                || dist_repository_layout.is_some()
1824            {
1825                let mut repo = serde_json::Map::new();
1826                if let Some(id) = dist_repository_id {
1827                    repo.insert("id".to_string(), serde_json::Value::String(id));
1828                }
1829                if let Some(name) = dist_repository_name {
1830                    repo.insert("name".to_string(), serde_json::Value::String(name));
1831                }
1832                if let Some(url) = dist_repository_url {
1833                    repo.insert("url".to_string(), serde_json::Value::String(url));
1834                }
1835                if let Some(layout) = dist_repository_layout {
1836                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1837                }
1838                extra_data.insert(
1839                    "distribution_repository".to_string(),
1840                    serde_json::Value::Object(repo),
1841                );
1842            }
1843
1844            // Build snapshotRepository object
1845            if dist_snapshot_repository_id.is_some()
1846                || dist_snapshot_repository_name.is_some()
1847                || dist_snapshot_repository_url.is_some()
1848                || dist_snapshot_repository_layout.is_some()
1849            {
1850                let mut repo = serde_json::Map::new();
1851                if let Some(id) = dist_snapshot_repository_id {
1852                    repo.insert("id".to_string(), serde_json::Value::String(id));
1853                }
1854                if let Some(name) = dist_snapshot_repository_name {
1855                    repo.insert("name".to_string(), serde_json::Value::String(name));
1856                }
1857                if let Some(url) = dist_snapshot_repository_url {
1858                    repo.insert("url".to_string(), serde_json::Value::String(url));
1859                }
1860                if let Some(layout) = dist_snapshot_repository_layout {
1861                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1862                }
1863                extra_data.insert(
1864                    "distribution_snapshot_repository".to_string(),
1865                    serde_json::Value::Object(repo),
1866                );
1867            }
1868
1869            // Build site object
1870            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1871                let mut site = serde_json::Map::new();
1872                if let Some(id) = dist_site_id {
1873                    site.insert("id".to_string(), serde_json::Value::String(id));
1874                }
1875                if let Some(name) = dist_site_name {
1876                    site.insert("name".to_string(), serde_json::Value::String(name));
1877                }
1878                if let Some(url) = dist_site_url {
1879                    site.insert("url".to_string(), serde_json::Value::String(url));
1880                }
1881                extra_data.insert(
1882                    "distribution_site".to_string(),
1883                    serde_json::Value::Object(site),
1884                );
1885            }
1886
1887            if !repositories.is_empty() {
1888                extra_data.insert(
1889                    "repositories".to_string(),
1890                    serde_json::Value::Array(
1891                        repositories
1892                            .into_iter()
1893                            .map(serde_json::Value::Object)
1894                            .collect(),
1895                    ),
1896                );
1897            }
1898
1899            if !plugin_repositories.is_empty() {
1900                extra_data.insert(
1901                    "plugin_repositories".to_string(),
1902                    serde_json::Value::Array(
1903                        plugin_repositories
1904                            .into_iter()
1905                            .map(serde_json::Value::Object)
1906                            .collect(),
1907                    ),
1908                );
1909            }
1910
1911            if !modules.is_empty() {
1912                extra_data.insert(
1913                    "modules".to_string(),
1914                    serde_json::Value::Array(
1915                        modules.into_iter().map(serde_json::Value::String).collect(),
1916                    ),
1917                );
1918            }
1919
1920            if !mailing_lists.is_empty() {
1921                extra_data.insert(
1922                    "mailing_lists".to_string(),
1923                    serde_json::Value::Array(
1924                        mailing_lists
1925                            .into_iter()
1926                            .map(serde_json::Value::Object)
1927                            .collect(),
1928                    ),
1929                );
1930            }
1931
1932            if !dependency_management_entries.is_empty() {
1933                extra_data.insert(
1934                    "dependency_management".to_string(),
1935                    serde_json::Value::Array(
1936                        dependency_management_entries
1937                            .into_iter()
1938                            .map(|dependency| {
1939                                serde_json::Value::Object(dependency_management_entry_to_value(
1940                                    &dependency,
1941                                ))
1942                            })
1943                            .collect(),
1944                    ),
1945                );
1946            }
1947
1948            if relocation.group_id.is_some()
1949                || relocation.artifact_id.is_some()
1950                || relocation.version.is_some()
1951                || relocation.message.is_some()
1952            {
1953                extra_data.insert(
1954                    "relocation".to_string(),
1955                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1956                );
1957            }
1958
1959            if parent_group_id.is_some()
1960                || parent_artifact_id.is_some()
1961                || parent_version.is_some()
1962                || parent_relative_path.is_some()
1963            {
1964                let mut parent_obj = serde_json::Map::new();
1965                if let Some(group_id) = parent_group_id {
1966                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1967                }
1968                if let Some(artifact_id) = parent_artifact_id {
1969                    parent_obj.insert(
1970                        "artifactId".to_string(),
1971                        serde_json::Value::String(artifact_id),
1972                    );
1973                }
1974                if let Some(version) = parent_version {
1975                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1976                }
1977                if let Some(relative_path) = parent_relative_path {
1978                    parent_obj.insert(
1979                        "relativePath".to_string(),
1980                        serde_json::Value::String(relative_path),
1981                    );
1982                }
1983                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
1984            }
1985
1986            package_data.extra_data = Some(extra_data);
1987        }
1988
1989        package_data.extracted_license_statement =
1990            build_license_statement(&licenses).map(truncate_field);
1991        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1992            build_maven_declared_license_data(
1993                &licenses,
1994                package_data.extracted_license_statement.as_deref(),
1995            );
1996        package_data.declared_license_expression = declared_license_expression;
1997        package_data.declared_license_expression_spdx = declared_license_expression_spdx;
1998        package_data.license_detections = license_detections;
1999
2000        package_data.namespace = package_data.namespace.map(truncate_field);
2001        package_data.name = package_data.name.map(truncate_field);
2002        package_data.version = package_data.version.map(truncate_field);
2003        package_data.description = package_data.description.map(truncate_field);
2004        package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2005        package_data.vcs_url = package_data.vcs_url.map(truncate_field);
2006        package_data.purl = package_data.purl.map(truncate_field);
2007        package_data.code_view_url = package_data.code_view_url.map(truncate_field);
2008        package_data.bug_tracking_url = package_data.bug_tracking_url.map(truncate_field);
2009        package_data.download_url = package_data.download_url.map(truncate_field);
2010        package_data.repository_homepage_url =
2011            package_data.repository_homepage_url.map(truncate_field);
2012        package_data.repository_download_url =
2013            package_data.repository_download_url.map(truncate_field);
2014        package_data.api_data_url = package_data.api_data_url.map(truncate_field);
2015        for dep in &mut package_data.dependencies {
2016            dep.purl = dep.purl.take().map(truncate_field);
2017            dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2018        }
2019
2020        vec![package_data]
2021    }
2022
2023    fn is_match(path: &Path) -> bool {
2024        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
2025            filename == "pom.xml"
2026                || filename.ends_with(".pom.xml")
2027                || filename.ends_with("-pom.xml")
2028                || filename == "pom.properties"
2029                || filename == "MANIFEST.MF"
2030                || filename.ends_with(".pom")
2031        } else {
2032            false
2033        }
2034    }
2035}
2036
2037fn build_maven_url(
2038    group_id: &Option<String>,
2039    artifact_id: &Option<String>,
2040    version: &Option<String>,
2041    filename: Option<&str>,
2042) -> Option<String> {
2043    const BASE_URL: &str = "https://repo1.maven.org/maven2";
2044
2045    let group_id = group_id.as_ref()?;
2046    let artifact_id = artifact_id.as_ref()?;
2047
2048    let group_path = group_id.replace('.', "/");
2049    let filename_str = filename.unwrap_or("");
2050
2051    let url = if let Some(ver) = version {
2052        format!(
2053            "{}/{}/{}/{}/{}",
2054            BASE_URL, group_path, artifact_id, ver, filename_str
2055        )
2056    } else {
2057        format!(
2058            "{}/{}/{}/{}",
2059            BASE_URL, group_path, artifact_id, filename_str
2060        )
2061    };
2062
2063    Some(url)
2064}
2065
2066fn build_maven_declared_license_data(
2067    licenses: &[MavenLicenseEntry],
2068    matched_text: Option<&str>,
2069) -> (
2070    Option<String>,
2071    Option<String>,
2072    Vec<crate::models::LicenseDetection>,
2073) {
2074    let normalized: Vec<_> = licenses
2075        .iter()
2076        .filter_map(|license| license.name.as_deref())
2077        .filter_map(normalize_maven_license_name)
2078        .collect();
2079
2080    if normalized.is_empty() {
2081        return empty_declared_license_data();
2082    }
2083
2084    let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
2085        return empty_declared_license_data();
2086    };
2087
2088    build_declared_license_data(
2089        combined,
2090        DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
2091    )
2092}
2093
2094fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
2095    match name.trim() {
2096        "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
2097            "public-domain",
2098            "LicenseRef-provenant-public-domain",
2099        )),
2100        other => normalize_declared_license_key(other),
2101    }
2102}
2103
2104/// Parse pom.properties file (Java properties format)
2105fn parse_pom_properties(path: &Path) -> PackageData {
2106    let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2107        Ok(content) => content,
2108        Err(e) => {
2109            warn!("Failed to read pom.properties at {:?}: {}", path, e);
2110            return PackageData {
2111                package_type: Some(PackageType::Maven),
2112                primary_language: Some("Java".to_string()),
2113                datasource_id: Some(DatasourceId::MavenPomProperties),
2114                ..Default::default()
2115            };
2116        }
2117    };
2118
2119    let mut package_data = default_package_data(DatasourceId::MavenPomProperties);
2120    package_data.package_type = Some(PackageType::Maven);
2121    package_data.primary_language = Some("Java".to_string());
2122    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
2123
2124    let mut group_id: Option<String> = None;
2125    let mut artifact_id: Option<String> = None;
2126    let mut version: Option<String> = None;
2127
2128    // Parse Java properties format
2129    let mut continuation = String::new();
2130
2131    for line in content.lines() {
2132        let current_line = if continuation.is_empty() {
2133            line.to_string()
2134        } else {
2135            format!("{}{}", continuation, line)
2136        };
2137        continuation.clear();
2138
2139        // Check for line continuation (backslash at end)
2140        if current_line.ends_with('\\') {
2141            continuation = current_line[..current_line.len() - 1].to_string();
2142            continue;
2143        }
2144
2145        // Skip comments and empty lines
2146        let trimmed = current_line.trim();
2147        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2148            continue;
2149        }
2150
2151        // Parse key=value
2152        if let Some(eq_pos) = current_line.find('=') {
2153            let key = current_line[..eq_pos].trim();
2154            let value = current_line[eq_pos + 1..].trim();
2155
2156            match key {
2157                "groupId" => group_id = Some(value.to_string()),
2158                "artifactId" => artifact_id = Some(value.to_string()),
2159                "version" => version = Some(value.to_string()),
2160                _ => {}
2161            }
2162        }
2163    }
2164
2165    package_data.namespace = group_id.map(truncate_field);
2166    package_data.name = artifact_id.map(truncate_field);
2167    package_data.version = version.map(truncate_field);
2168
2169    // Generate PURL
2170    if let (Some(group_id), Some(artifact_id), Some(version)) = (
2171        &package_data.namespace,
2172        &package_data.name,
2173        &package_data.version,
2174    ) {
2175        package_data.purl = Some(truncate_field(format!(
2176            "pkg:maven/{}/{}@{}",
2177            group_id, artifact_id, version
2178        )));
2179    }
2180
2181    package_data
2182}
2183
2184/// Parse MANIFEST.MF file (JAR manifest format)
2185///
2186/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
2187/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
2188/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
2189/// dependencies.
2190fn parse_manifest_mf(path: &Path) -> PackageData {
2191    let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2192        Ok(content) => content,
2193        Err(e) => {
2194            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2195            return default_package_data(DatasourceId::JavaJarManifest);
2196        }
2197    };
2198
2199    let mut package_data = default_package_data(DatasourceId::JavaJarManifest);
2200
2201    // Parse manifest headers (RFC822-style with space continuations)
2202    let mut headers: Vec<(String, String)> = Vec::new();
2203    let mut current_key: Option<String> = None;
2204    let mut current_value = String::new();
2205
2206    for line in content.lines() {
2207        if line.starts_with(' ') || line.starts_with('\t') {
2208            // Continuation line
2209            current_value.push_str(line.trim());
2210        } else if let Some(colon_pos) = line.find(':') {
2211            // Save previous header
2212            if let Some(key) = current_key.take() {
2213                headers.push((key, current_value.trim().to_string()));
2214                current_value.clear();
2215            }
2216
2217            // Start new header
2218            let key = line[..colon_pos].trim().to_string();
2219            let value = line[colon_pos + 1..].trim().to_string();
2220            current_key = Some(key);
2221            current_value = value;
2222        }
2223    }
2224
2225    // Save last header
2226    if let Some(key) = current_key {
2227        headers.push((key, current_value.trim().to_string()));
2228    }
2229
2230    // Convert headers to HashMap for easier lookup
2231    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2232
2233    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2234    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2235    let is_osgi = bundle_symbolic_name.is_some();
2236
2237    if is_osgi {
2238        // OSGi bundle - extract OSGi-specific metadata
2239        package_data.package_type = Some(PackageType::Osgi);
2240        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2241
2242        // Bundle-SymbolicName is the canonical name for OSGi bundles
2243        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2244        if let Some(bsn) = bundle_symbolic_name {
2245            let name = if let Some(semicolon_pos) = bsn.find(';') {
2246                bsn[..semicolon_pos].trim().to_string()
2247            } else {
2248                bsn.clone()
2249            };
2250            package_data.name = Some(name);
2251        }
2252
2253        // Bundle-Version
2254        package_data.version = headers_map.get("Bundle-Version").cloned();
2255
2256        // Bundle-Description takes priority over Bundle-Name for description
2257        if let Some(desc) = headers_map.get("Bundle-Description") {
2258            package_data.description = Some(desc.clone());
2259        } else if let Some(name) = headers_map.get("Bundle-Name") {
2260            package_data.description = Some(name.clone());
2261        }
2262
2263        // Bundle-Vendor
2264        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2265            package_data.parties.push(Party {
2266                r#type: Some("organization".to_string()),
2267                role: Some("vendor".to_string()),
2268                name: Some(vendor.clone()),
2269                email: None,
2270                url: None,
2271                organization: None,
2272                organization_url: None,
2273                timezone: None,
2274            });
2275        }
2276
2277        // Bundle-DocURL
2278        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2279
2280        // Bundle-License
2281        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2282
2283        // Import-Package -> dependencies with scope "import"
2284        if let Some(import_pkg) = headers_map.get("Import-Package") {
2285            let deps = parse_osgi_package_list(import_pkg, "import");
2286            package_data.dependencies.extend(deps);
2287        }
2288
2289        // Require-Bundle -> dependencies with scope "require-bundle"
2290        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2291            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2292            package_data.dependencies.extend(deps);
2293        }
2294
2295        // Export-Package -> store in extra_data
2296        if let Some(export_pkg) = headers_map.get("Export-Package") {
2297            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2298            extra_data.insert(
2299                "export_packages".to_string(),
2300                serde_json::Value::String(export_pkg.clone()),
2301            );
2302            package_data.extra_data = Some(extra_data);
2303        }
2304
2305        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2306        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2307            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2308        }
2309    } else {
2310        // Regular JAR manifest
2311        package_data.package_type = Some(PackageType::Maven);
2312        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2313
2314        // Extract fields with priority order for non-OSGi JARs
2315        let mut name: Option<String> = None;
2316        let mut version: Option<String> = None;
2317        let mut vendor: Option<String> = None;
2318
2319        for (key, value) in &headers {
2320            match key.as_str() {
2321                "Bundle-Name" if name.is_none() => {
2322                    name = Some(value.clone());
2323                }
2324                "Implementation-Title" if name.is_none() => {
2325                    name = Some(value.clone());
2326                }
2327                "Bundle-Version" if version.is_none() => {
2328                    version = Some(value.clone());
2329                }
2330                "Implementation-Version" if version.is_none() => {
2331                    version = Some(value.clone());
2332                }
2333                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2334                    vendor = Some(value.clone());
2335                }
2336                _ => {}
2337            }
2338        }
2339
2340        package_data.name = name;
2341        package_data.version = version;
2342
2343        // Add vendor to parties if present
2344        if let Some(vendor_name) = vendor {
2345            package_data.parties.push(Party {
2346                r#type: Some("organization".to_string()),
2347                role: Some("vendor".to_string()),
2348                name: Some(vendor_name),
2349                email: None,
2350                url: None,
2351                organization: None,
2352                organization_url: None,
2353                timezone: None,
2354            });
2355        }
2356
2357        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2358        if let Some(path_str) = path.to_str()
2359            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2360        {
2361            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2362            let parts: Vec<&str> = after_maven.split('/').collect();
2363            if parts.len() >= 2 {
2364                package_data.namespace = Some(parts[0].to_string());
2365            }
2366        }
2367
2368        // Generate Maven PURL if we have enough information
2369        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2370            &package_data.namespace,
2371            &package_data.name,
2372            &package_data.version,
2373        ) {
2374            package_data.purl = Some(format!(
2375                "pkg:maven/{}/{}@{}",
2376                group_id, artifact_id, version
2377            ));
2378        } else if package_data.name.is_none() && package_data.version.is_none() {
2379            // A bare MANIFEST.MF without Maven coordinates or implementation
2380            // identity is only evidence of a generic JAR manifest, not a Maven
2381            // package. Keep the Java manifest datasource so assembly can still
2382            // merge richer sibling metadata when present.
2383            package_data.package_type = Some(PackageType::Jar);
2384        }
2385    }
2386
2387    package_data.name = package_data.name.map(truncate_field);
2388    package_data.version = package_data.version.map(truncate_field);
2389    package_data.namespace = package_data.namespace.map(truncate_field);
2390    package_data.description = package_data.description.map(truncate_field);
2391    package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2392    package_data.extracted_license_statement =
2393        package_data.extracted_license_statement.map(truncate_field);
2394    package_data.purl = package_data.purl.map(truncate_field);
2395    for dep in &mut package_data.dependencies {
2396        dep.purl = dep.purl.take().map(truncate_field);
2397        dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2398    }
2399
2400    package_data
2401}
2402
2403/// Parse OSGi Import-Package header into dependencies.
2404///
2405/// Format: comma-separated list of packages with optional directives:
2406/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2407pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2408    let mut dependencies = Vec::new();
2409
2410    // Split by comma, but be careful not to split within quoted strings
2411    for package_entry in split_osgi_list(package_list)
2412        .into_iter()
2413        .take(MAX_ITERATION_COUNT)
2414    {
2415        let package_entry = package_entry.trim();
2416        if package_entry.is_empty() {
2417            continue;
2418        }
2419
2420        // Extract package name (before first semicolon)
2421        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2422            package_entry[..semicolon_pos].trim()
2423        } else {
2424            package_entry
2425        };
2426
2427        if package_name.is_empty() {
2428            continue;
2429        }
2430
2431        // Extract version directive if present
2432        let version_requirement = extract_osgi_version(package_entry);
2433        let is_optional = package_entry.contains("resolution:=optional");
2434
2435        dependencies.push(Dependency {
2436            purl: Some(format!("pkg:osgi/{}", package_name)),
2437            extracted_requirement: version_requirement,
2438            scope: Some(scope.to_string()),
2439            is_runtime: Some(true),
2440            is_optional: Some(is_optional),
2441            is_pinned: None,
2442            is_direct: Some(true),
2443            resolved_package: None,
2444            extra_data: None,
2445        });
2446    }
2447
2448    dependencies
2449}
2450
2451/// Parse OSGi Require-Bundle header into dependencies.
2452///
2453/// Format: comma-separated list of bundle symbolic names with optional directives:
2454/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2455pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2456    let mut dependencies = Vec::new();
2457
2458    for bundle_entry in split_osgi_list(bundle_list)
2459        .into_iter()
2460        .take(MAX_ITERATION_COUNT)
2461    {
2462        let bundle_entry = bundle_entry.trim();
2463        if bundle_entry.is_empty() {
2464            continue;
2465        }
2466
2467        // Extract bundle symbolic name (before first semicolon)
2468        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2469            bundle_entry[..semicolon_pos].trim()
2470        } else {
2471            bundle_entry
2472        };
2473
2474        if bundle_name.is_empty() {
2475            continue;
2476        }
2477
2478        // Extract bundle-version directive if present
2479        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2480
2481        // Check if optional
2482        let is_optional = bundle_entry.contains("resolution:=optional");
2483
2484        dependencies.push(Dependency {
2485            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2486            extracted_requirement: version_requirement,
2487            scope: Some(scope.to_string()),
2488            is_runtime: Some(!is_optional),
2489            is_optional: Some(is_optional),
2490            is_pinned: None,
2491            is_direct: Some(true),
2492            resolved_package: None,
2493            extra_data: None,
2494        });
2495    }
2496
2497    dependencies
2498}
2499
2500/// Split OSGi comma-separated list, respecting quoted strings.
2501///
2502/// OSGi headers can contain commas within quoted strings:
2503/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2504pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2505    let mut result = Vec::new();
2506    let mut current = String::new();
2507    let mut in_quotes = false;
2508
2509    for ch in list.chars() {
2510        match ch {
2511            '"' => {
2512                in_quotes = !in_quotes;
2513                current.push(ch);
2514            }
2515            ',' if !in_quotes => {
2516                if !current.trim().is_empty() {
2517                    result.push(current.trim().to_string());
2518                }
2519                current.clear();
2520            }
2521            _ => {
2522                current.push(ch);
2523            }
2524        }
2525    }
2526
2527    if !current.trim().is_empty() {
2528        result.push(current.trim().to_string());
2529    }
2530
2531    result
2532}
2533
2534fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2535    let needle = format!("{}=", directive);
2536    let version_pos = entry.find(&needle)?;
2537    let after_value = &entry[version_pos + needle.len()..];
2538
2539    if let Some(stripped) = after_value.strip_prefix('"') {
2540        stripped.find('"').map(|end| stripped[..end].to_string())
2541    } else {
2542        let end = after_value.find(';').unwrap_or(after_value.len());
2543        Some(after_value[..end].trim().to_string())
2544    }
2545}
2546
2547pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2548    extract_osgi_directive(entry, "version")
2549}
2550
2551pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2552    extract_osgi_directive(entry, "bundle-version")
2553}
2554
2555fn default_package_data(datasource_id: DatasourceId) -> PackageData {
2556    PackageData {
2557        package_type: Some(PackageType::Maven),
2558        datasource_id: Some(datasource_id),
2559        ..Default::default()
2560    }
2561}
2562
2563#[cfg(test)]
2564mod tests {
2565    use super::*;
2566    use std::fs;
2567    use tempfile::TempDir;
2568
2569    #[test]
2570    fn test_organization_extraction() {
2571        let temp_dir = TempDir::new().unwrap();
2572        let pom_path = temp_dir.path().join("pom.xml");
2573
2574        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2575<project>
2576    <modelVersion>4.0.0</modelVersion>
2577    <groupId>com.example</groupId>
2578    <artifactId>my-app</artifactId>
2579    <version>1.0.0</version>
2580    <organization>
2581        <name>Example Corporation</name>
2582        <url>https://example.com</url>
2583    </organization>
2584</project>"#;
2585
2586        fs::write(&pom_path, pom_content).unwrap();
2587
2588        let package_data = MavenParser::extract_first_package(&pom_path);
2589
2590        assert_eq!(package_data.name, Some("my-app".to_string()));
2591        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2592        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2593
2594        let extra_data = package_data.extra_data.unwrap();
2595        assert_eq!(
2596            extra_data.get("organization_name"),
2597            Some(&serde_json::Value::String(
2598                "Example Corporation".to_string()
2599            ))
2600        );
2601        assert_eq!(
2602            extra_data.get("organization_url"),
2603            Some(&serde_json::Value::String(
2604                "https://example.com".to_string()
2605            ))
2606        );
2607    }
2608
2609    #[test]
2610    fn test_scm_metadata_extraction() {
2611        let temp_dir = TempDir::new().unwrap();
2612        let pom_path = temp_dir.path().join("pom.xml");
2613
2614        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2615<project xmlns="http://maven.apache.org/POM/4.0.0"
2616         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2617         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2618    <modelVersion>4.0.0</modelVersion>
2619    <groupId>org.springframework.boot</groupId>
2620    <artifactId>spring-boot-starter-web</artifactId>
2621    <version>3.0.0</version>
2622    <scm>
2623        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2624        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2625        <url>https://github.com/spring-projects/spring-boot</url>
2626        <tag>v3.0.0</tag>
2627    </scm>
2628</project>"#;
2629
2630        fs::write(&pom_path, pom_content).unwrap();
2631
2632        let package_data = MavenParser::extract_first_package(&pom_path);
2633
2634        assert_eq!(
2635            package_data.name,
2636            Some("spring-boot-starter-web".to_string())
2637        );
2638        assert_eq!(
2639            package_data.namespace,
2640            Some("org.springframework.boot".to_string())
2641        );
2642        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2643
2644        assert_eq!(
2645            package_data.code_view_url,
2646            Some("https://github.com/spring-projects/spring-boot".to_string())
2647        );
2648
2649        // vcs_url prefers connection over developerConnection
2650        assert_eq!(
2651            package_data.vcs_url,
2652            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2653        );
2654
2655        let extra_data = package_data.extra_data.unwrap();
2656        assert_eq!(
2657            extra_data.get("scm_tag"),
2658            Some(&serde_json::Value::String("v3.0.0".to_string()))
2659        );
2660        // developerConnection stored separately in extra_data
2661        assert_eq!(
2662            extra_data.get("scm_developer_connection"),
2663            Some(&serde_json::Value::String(
2664                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2665            ))
2666        );
2667    }
2668
2669    #[test]
2670    fn test_developers_and_contributors_extraction() {
2671        let temp_dir = TempDir::new().unwrap();
2672        let pom_path = temp_dir.path().join("pom.xml");
2673
2674        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2675<project xmlns="http://maven.apache.org/POM/4.0.0"
2676         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2677         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2678    <modelVersion>4.0.0</modelVersion>
2679    <groupId>com.example</groupId>
2680    <artifactId>test-app</artifactId>
2681    <version>1.0.0</version>
2682    <developers>
2683        <developer>
2684            <id>jdoe</id>
2685            <name>John Doe</name>
2686            <email>john@example.com</email>
2687            <url>https://example.com/jdoe</url>
2688            <organization>Example Corp</organization>
2689            <organizationUrl>https://example.com</organizationUrl>
2690            <timezone>America/New_York</timezone>
2691        </developer>
2692        <developer>
2693            <name>Jane Smith</name>
2694            <email>jane@example.com</email>
2695        </developer>
2696    </developers>
2697    <contributors>
2698        <contributor>
2699            <name>Bob Wilson</name>
2700            <email>bob@example.com</email>
2701            <url>https://example.com/bob</url>
2702        </contributor>
2703    </contributors>
2704</project>"#;
2705
2706        fs::write(&pom_path, pom_content).unwrap();
2707
2708        let package_data = MavenParser::extract_first_package(&pom_path);
2709
2710        assert_eq!(package_data.name, Some("test-app".to_string()));
2711        assert_eq!(package_data.parties.len(), 3);
2712
2713        let dev1 = &package_data.parties[0];
2714        assert_eq!(dev1.r#type, Some("person".to_string()));
2715        assert_eq!(dev1.role, Some("developer".to_string()));
2716        assert_eq!(dev1.name, Some("John Doe".to_string()));
2717        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2718        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2719        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2720        assert_eq!(
2721            dev1.organization_url,
2722            Some("https://example.com".to_string())
2723        );
2724        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2725
2726        let dev2 = &package_data.parties[1];
2727        assert_eq!(dev2.r#type, Some("person".to_string()));
2728        assert_eq!(dev2.role, Some("developer".to_string()));
2729        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2730        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2731
2732        let contrib = &package_data.parties[2];
2733        assert_eq!(contrib.r#type, Some("person".to_string()));
2734        assert_eq!(contrib.role, Some("contributor".to_string()));
2735        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2736        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2737        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2738    }
2739
2740    #[test]
2741    fn test_issue_management_extraction() {
2742        let temp_dir = TempDir::new().unwrap();
2743        let pom_path = temp_dir.path().join("pom.xml");
2744
2745        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2746<project xmlns="http://maven.apache.org/POM/4.0.0"
2747         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2748         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2749    <modelVersion>4.0.0</modelVersion>
2750    <groupId>com.example</groupId>
2751    <artifactId>test-app</artifactId>
2752    <version>1.0.0</version>
2753    <issueManagement>
2754        <system>GitHub</system>
2755        <url>https://github.com/example/test-app/issues</url>
2756    </issueManagement>
2757</project>"#;
2758
2759        fs::write(&pom_path, pom_content).unwrap();
2760
2761        let package_data = MavenParser::extract_first_package(&pom_path);
2762
2763        assert_eq!(package_data.name, Some("test-app".to_string()));
2764        assert_eq!(
2765            package_data.bug_tracking_url,
2766            Some("https://github.com/example/test-app/issues".to_string())
2767        );
2768
2769        let extra_data = package_data.extra_data.unwrap();
2770        assert_eq!(
2771            extra_data.get("issue_tracking_system"),
2772            Some(&serde_json::Value::String("GitHub".to_string()))
2773        );
2774    }
2775
2776    #[test]
2777    fn test_ci_management_extraction() {
2778        let temp_dir = TempDir::new().unwrap();
2779        let pom_path = temp_dir.path().join("pom.xml");
2780
2781        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2782<project xmlns="http://maven.apache.org/POM/4.0.0"
2783         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2784         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2785    <modelVersion>4.0.0</modelVersion>
2786    <groupId>com.example</groupId>
2787    <artifactId>test-app</artifactId>
2788    <version>1.0.0</version>
2789    <ciManagement>
2790        <system>Jenkins</system>
2791        <url>https://ci.example.com/job/test-app</url>
2792    </ciManagement>
2793</project>"#;
2794
2795        fs::write(&pom_path, pom_content).unwrap();
2796
2797        let package_data = MavenParser::extract_first_package(&pom_path);
2798
2799        assert_eq!(package_data.name, Some("test-app".to_string()));
2800
2801        let extra_data = package_data.extra_data.unwrap();
2802        assert_eq!(
2803            extra_data.get("ci_system"),
2804            Some(&serde_json::Value::String("Jenkins".to_string()))
2805        );
2806        assert_eq!(
2807            extra_data.get("ci_url"),
2808            Some(&serde_json::Value::String(
2809                "https://ci.example.com/job/test-app".to_string()
2810            ))
2811        );
2812    }
2813
2814    #[test]
2815    fn test_distribution_management_extraction() {
2816        let temp_dir = TempDir::new().unwrap();
2817        let pom_path = temp_dir.path().join("pom.xml");
2818
2819        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2820<project xmlns="http://maven.apache.org/POM/4.0.0"
2821         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2822         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2823    <modelVersion>4.0.0</modelVersion>
2824    <groupId>com.example</groupId>
2825    <artifactId>test-app</artifactId>
2826    <version>1.0.0</version>
2827    <distributionManagement>
2828        <downloadUrl>https://example.com/downloads</downloadUrl>
2829        <repository>
2830            <id>releases</id>
2831            <name>Release Repository</name>
2832            <url>https://repo.example.com/releases</url>
2833            <layout>default</layout>
2834        </repository>
2835        <snapshotRepository>
2836            <id>snapshots</id>
2837            <name>Snapshot Repository</name>
2838            <url>https://repo.example.com/snapshots</url>
2839            <layout>default</layout>
2840        </snapshotRepository>
2841        <site>
2842            <id>site-deploy</id>
2843            <name>Project Site</name>
2844            <url>https://example.com/site</url>
2845        </site>
2846    </distributionManagement>
2847</project>"#;
2848
2849        fs::write(&pom_path, pom_content).unwrap();
2850
2851        let package_data = MavenParser::extract_first_package(&pom_path);
2852
2853        assert_eq!(package_data.name, Some("test-app".to_string()));
2854        assert_eq!(
2855            package_data.download_url,
2856            Some("https://example.com/downloads".to_string())
2857        );
2858
2859        let extra_data = package_data.extra_data.unwrap();
2860
2861        assert_eq!(
2862            extra_data.get("distribution_download_url"),
2863            Some(&serde_json::Value::String(
2864                "https://example.com/downloads".to_string()
2865            ))
2866        );
2867
2868        let repo = extra_data
2869            .get("distribution_repository")
2870            .unwrap()
2871            .as_object()
2872            .unwrap();
2873        assert_eq!(
2874            repo.get("id"),
2875            Some(&serde_json::Value::String("releases".to_string()))
2876        );
2877        assert_eq!(
2878            repo.get("name"),
2879            Some(&serde_json::Value::String("Release Repository".to_string()))
2880        );
2881        assert_eq!(
2882            repo.get("url"),
2883            Some(&serde_json::Value::String(
2884                "https://repo.example.com/releases".to_string()
2885            ))
2886        );
2887        assert_eq!(
2888            repo.get("layout"),
2889            Some(&serde_json::Value::String("default".to_string()))
2890        );
2891
2892        let snapshot_repo = extra_data
2893            .get("distribution_snapshot_repository")
2894            .unwrap()
2895            .as_object()
2896            .unwrap();
2897        assert_eq!(
2898            snapshot_repo.get("id"),
2899            Some(&serde_json::Value::String("snapshots".to_string()))
2900        );
2901        assert_eq!(
2902            snapshot_repo.get("name"),
2903            Some(&serde_json::Value::String(
2904                "Snapshot Repository".to_string()
2905            ))
2906        );
2907        assert_eq!(
2908            snapshot_repo.get("url"),
2909            Some(&serde_json::Value::String(
2910                "https://repo.example.com/snapshots".to_string()
2911            ))
2912        );
2913        assert_eq!(
2914            snapshot_repo.get("layout"),
2915            Some(&serde_json::Value::String("default".to_string()))
2916        );
2917
2918        let site = extra_data
2919            .get("distribution_site")
2920            .unwrap()
2921            .as_object()
2922            .unwrap();
2923        assert_eq!(
2924            site.get("id"),
2925            Some(&serde_json::Value::String("site-deploy".to_string()))
2926        );
2927        assert_eq!(
2928            site.get("name"),
2929            Some(&serde_json::Value::String("Project Site".to_string()))
2930        );
2931        assert_eq!(
2932            site.get("url"),
2933            Some(&serde_json::Value::String(
2934                "https://example.com/site".to_string()
2935            ))
2936        );
2937    }
2938}
2939
2940crate::register_parser!(
2941    "Apache Maven POM",
2942    &[
2943        "**/*.pom",
2944        "**/pom.xml",
2945        "**/pom.properties",
2946        "**/META-INF/MANIFEST.MF"
2947    ],
2948    "maven",
2949    "Java",
2950    Some("https://maven.apache.org/pom.html"),
2951);