Skip to main content

provenant/parsers/
maven.rs

1//! Parser for Apache Maven pom.xml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Maven Project Object Model (POM) files.
5//!
6//! # Supported Formats
7//! - pom.xml (Project Object Model)
8//! - pom.properties
9//! - MANIFEST.MF (JAR manifest)
10//!
11//! # Key Features
12//! - Property value substitution (`${project.version}`)
13//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
14//! - Dependency scope handling (compile, test, provided, runtime, system)
15//! - Package URL (purl) generation
16//! - Multiple license support (combined with " OR ")
17//!
18//! # Implementation Notes
19//! - Uses quick-xml for XML parsing
20//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
21//! - Property substitution limited to prevent infinite loops
22//! - Direct dependencies: all in pom.xml are direct
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use crate::parsers::utils::read_file_to_string;
27use quick_xml::Reader;
28use quick_xml::events::Event;
29use std::collections::{HashMap, HashSet};
30use std::fs::File;
31use std::io::BufReader;
32use std::path::Path;
33
34use super::PackageParser;
35use super::license_normalization::{
36    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
37    combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
38};
39
40#[derive(Clone, Default)]
41struct MavenDependencyData {
42    group_id: Option<String>,
43    artifact_id: Option<String>,
44    version: Option<String>,
45    classifier: Option<String>,
46    type_: Option<String>,
47    scope: Option<String>,
48    optional: Option<String>,
49    system_path: Option<String>,
50    message: Option<String>,
51}
52
53#[derive(Clone, Default)]
54struct MavenLicenseEntry {
55    name: Option<String>,
56    url: Option<String>,
57    comments: Option<String>,
58}
59
60/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
61///
62/// Maven properties can reference other properties, creating dependency graphs. This resolver:
63/// - Resolves nested placeholders: `${outer.${inner}}`
64/// - Detects circular references: `${a}` → `${b}` → `${a}`
65/// - Enforces depth limits to prevent stack overflow
66/// - Enforces substitution limits to prevent DoS on pathological inputs
67///
68/// # Algorithm
69///
70/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
71/// - `resolving_set`: For cycle detection (hash set lookup)
72/// - `resolving_stack`: For error reporting (preserves path)
73/// - `cache`: Memoizes resolved values to avoid redundant work
74struct PropertyResolver {
75    raw: HashMap<String, String>,
76    builtins: HashMap<String, String>,
77    cache: HashMap<String, String>,
78    resolving_set: HashSet<String>,
79    resolving_stack: Vec<String>,
80    max_depth: usize,
81    max_output_len: usize,
82    max_substitutions: usize,
83    warned_keys: HashSet<String>,
84}
85
86impl PropertyResolver {
87    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
88        Self {
89            raw,
90            builtins,
91            cache: HashMap::new(),
92            resolving_set: HashSet::new(),
93            resolving_stack: Vec::new(),
94            max_depth: 10,
95            max_output_len: 100_000,
96            max_substitutions: 1000,
97            warned_keys: HashSet::new(),
98        }
99    }
100
101    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
102        if let Some(value) = self.cache.get(key) {
103            return Some(value.clone());
104        }
105
106        if depth >= self.max_depth {
107            self.warn_once(
108                "depth",
109                key,
110                format!("Maven property depth limit hit resolving {key}"),
111            );
112            return None;
113        }
114
115        if self.resolving_set.contains(key) {
116            if self
117                .resolving_stack
118                .last()
119                .is_some_and(|current| current == key)
120            {
121                return None;
122            }
123
124            self.warn_once(
125                "cycle",
126                key,
127                format!(
128                    "Maven property cycle detected at {key}: {:?}",
129                    self.resolving_stack
130                ),
131            );
132            return None;
133        }
134
135        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
136            value.clone()
137        } else {
138            return None;
139        };
140
141        self.resolving_set.insert(key.to_string());
142        self.resolving_stack.push(key.to_string());
143
144        let resolved = self.resolve_text(&raw_val, depth + 1);
145
146        self.resolving_stack.pop();
147        self.resolving_set.remove(key);
148
149        self.cache.insert(key.to_string(), resolved.clone());
150        Some(resolved)
151    }
152
153    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
154        if !text.contains("${") {
155            return text.to_string();
156        }
157
158        if depth >= self.max_depth {
159            warn!("Maven property depth limit hit resolving text");
160            return text.to_string();
161        }
162
163        let bytes = text.as_bytes();
164        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
165        let mut index = 0;
166        let mut substitutions = 0;
167
168        while index < bytes.len() {
169            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
170                if substitutions >= self.max_substitutions {
171                    warn!("Maven property substitution limit hit resolving {text}");
172                    return text.to_string();
173                }
174
175                let placeholder_start = index;
176                let Some((content, closing_index)) =
177                    self.parse_placeholder_content(text, index + 2)
178                else {
179                    warn!("Maven property malformed placeholder in {text}");
180                    return text.to_string();
181                };
182
183                substitutions += 1;
184                let resolved_key = if content.contains("${") {
185                    self.resolve_text(content, depth + 1)
186                } else {
187                    content.to_string()
188                };
189
190                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
191                    if output.len() + resolved.len() > self.max_output_len {
192                        warn!("Maven property output length limit hit resolving {text}");
193                        return text.to_string();
194                    }
195                    output.extend_from_slice(resolved.as_bytes());
196                } else {
197                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
198                    if output.len() + placeholder_bytes.len() > self.max_output_len {
199                        warn!("Maven property output length limit hit resolving {text}");
200                        return text.to_string();
201                    }
202                    output.extend_from_slice(placeholder_bytes);
203                }
204
205                index = closing_index + 1;
206                continue;
207            }
208
209            if output.len() + 1 > self.max_output_len {
210                warn!("Maven property output length limit hit resolving {text}");
211                return text.to_string();
212            }
213
214            output.push(bytes[index]);
215            index += 1;
216        }
217
218        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
219    }
220
221    fn parse_placeholder_content<'a>(
222        &self,
223        text: &'a str,
224        start_index: usize,
225    ) -> Option<(&'a str, usize)> {
226        let bytes = text.as_bytes();
227        let mut index = start_index;
228        let mut depth = 0;
229
230        while index < bytes.len() {
231            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
232                depth += 1;
233                index += 2;
234                continue;
235            }
236
237            if bytes[index] == b'}' {
238                if depth == 0 {
239                    return Some((&text[start_index..index], index));
240                }
241                depth -= 1;
242            }
243
244            index += 1;
245        }
246
247        None
248    }
249
250    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
251        let token = format!("{kind}:{key}");
252        if self.warned_keys.insert(token) {
253            warn!("{message}");
254        }
255    }
256}
257
258fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
259    if let Some(current) = value.clone() {
260        *value = Some(resolver.resolve_text(&current, 0));
261    }
262}
263
264fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
265    for value in values.iter_mut() {
266        *value = resolver.resolve_text(value, 0);
267    }
268}
269
270fn resolve_map_strings(
271    resolver: &mut PropertyResolver,
272    values: &mut serde_json::Map<String, serde_json::Value>,
273) {
274    for value in values.values_mut() {
275        if let serde_json::Value::String(current) = value {
276            let resolved = resolver.resolve_text(current, 0);
277            *current = resolved;
278        }
279    }
280}
281
282fn resolve_maps(
283    resolver: &mut PropertyResolver,
284    values: &mut [serde_json::Map<String, serde_json::Value>],
285) {
286    for value in values.iter_mut() {
287        resolve_map_strings(resolver, value);
288    }
289}
290
291fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
292    resolve_option(resolver, &mut dependency.group_id);
293    resolve_option(resolver, &mut dependency.artifact_id);
294    resolve_option(resolver, &mut dependency.version);
295    resolve_option(resolver, &mut dependency.classifier);
296    resolve_option(resolver, &mut dependency.type_);
297    resolve_option(resolver, &mut dependency.scope);
298    resolve_option(resolver, &mut dependency.optional);
299    resolve_option(resolver, &mut dependency.system_path);
300    resolve_option(resolver, &mut dependency.message);
301}
302
303fn parse_maven_bool(value: Option<&str>) -> bool {
304    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
305}
306
307fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
308    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
309        Some(
310            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
311            | "tar.gz" | "war" | "xar" | "zip",
312        ) => packaging.map(str::trim),
313        Some(_) => Some("jar"),
314        None => None,
315    }
316}
317
318fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
319    resolve_option(resolver, &mut license.name);
320    resolve_option(resolver, &mut license.url);
321    resolve_option(resolver, &mut license.comments);
322}
323
324fn build_maven_qualifiers(
325    classifier: Option<&str>,
326    packaging: Option<&str>,
327) -> Option<HashMap<String, String>> {
328    let mut qualifiers = HashMap::new();
329
330    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
331        qualifiers.insert("classifier".to_string(), classifier.to_string());
332    }
333
334    if let Some(packaging) = normalize_maven_packaging(packaging)
335        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
336    {
337        qualifiers.insert("type".to_string(), packaging.to_string());
338    }
339
340    (!qualifiers.is_empty()).then_some(qualifiers)
341}
342
343fn build_maven_purl(
344    group_id: &str,
345    artifact_id: &str,
346    version: Option<&str>,
347    classifier: Option<&str>,
348    packaging: Option<&str>,
349) -> String {
350    let mut purl = format!(
351        "pkg:maven/{}/{}",
352        percent_encode_purl_component(group_id),
353        percent_encode_purl_component(artifact_id)
354    );
355
356    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
357        purl.push('@');
358        purl.push_str(&percent_encode_purl_component(version));
359    }
360
361    let qualifiers = build_maven_qualifiers(classifier, packaging);
362    if let Some(qualifiers) = qualifiers {
363        let mut query_parts = Vec::new();
364        if let Some(classifier) = qualifiers.get("classifier") {
365            query_parts.push(format!(
366                "classifier={}",
367                percent_encode_purl_component(classifier)
368            ));
369        }
370        if let Some(type_) = qualifiers.get("type") {
371            query_parts.push(format!("type={}", percent_encode_purl_component(type_)));
372        }
373
374        if !query_parts.is_empty() {
375            purl.push('?');
376            purl.push_str(&query_parts.join("&"));
377        }
378    }
379
380    purl
381}
382
383fn percent_encode_purl_component(value: &str) -> String {
384    let mut encoded = String::with_capacity(value.len());
385
386    for byte in value.bytes() {
387        match byte {
388            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
389                encoded.push(byte as char);
390            }
391            _ => encoded.push_str(&format!("%{byte:02X}")),
392        }
393    }
394
395    encoded
396}
397
398fn build_maven_download_url(
399    group_id: &str,
400    artifact_id: &str,
401    version: &str,
402    classifier: Option<&str>,
403    packaging: Option<&str>,
404) -> String {
405    const BASE_URL: &str = "https://repo1.maven.org/maven2";
406    let group_path = group_id.replace('.', "/");
407    let extension = normalize_maven_packaging(packaging)
408        .filter(|value| *value != "pom")
409        .unwrap_or("jar");
410    let classifier_suffix = classifier
411        .map(str::trim)
412        .filter(|value| !value.is_empty())
413        .map(|value| format!("-{value}"))
414        .unwrap_or_default();
415
416    format!(
417        "{}/{}/{}/{}/{}-{}{}.{}",
418        BASE_URL,
419        group_path,
420        artifact_id,
421        version,
422        artifact_id,
423        version,
424        classifier_suffix,
425        extension
426    )
427}
428
429fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
430    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
431}
432
433fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
434    let rendered_entries: Vec<String> = licenses
435        .iter()
436        .filter_map(|license| {
437            let mut lines = Vec::new();
438
439            if let Some(name) = license
440                .name
441                .as_ref()
442                .filter(|value| !value.trim().is_empty())
443            {
444                lines.push(format!("    name: {name}"));
445            }
446            if let Some(url) = license
447                .url
448                .as_ref()
449                .filter(|value| !value.trim().is_empty())
450            {
451                lines.push(format!("    url: {url}"));
452            }
453            if let Some(comments) = license
454                .comments
455                .as_ref()
456                .filter(|value| !value.trim().is_empty())
457            {
458                lines.push(format!("    comments: {comments}"));
459            }
460
461            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
462        })
463        .collect();
464
465    if rendered_entries.is_empty() {
466        None
467    } else {
468        Some(format!("{}\n", rendered_entries.join("\n")))
469    }
470}
471
472fn is_license_like_comment(comment: &str) -> bool {
473    let lowered = comment.to_ascii_lowercase();
474    [
475        "license",
476        "licensed",
477        "copyright",
478        "spdx",
479        "apache",
480        "mit",
481        "bsd",
482        "gpl",
483        "lgpl",
484        "mozilla public",
485        "eclipse public",
486    ]
487    .iter()
488    .any(|marker| lowered.contains(marker))
489}
490
491fn dependency_extra_data(
492    dependency: &MavenDependencyData,
493) -> Option<HashMap<String, serde_json::Value>> {
494    let mut extra_data = HashMap::new();
495
496    if let Some(classifier) = dependency
497        .classifier
498        .as_ref()
499        .filter(|value| !value.trim().is_empty())
500    {
501        extra_data.insert(
502            "classifier".to_string(),
503            serde_json::Value::String(classifier.clone()),
504        );
505    }
506    if let Some(type_) = dependency
507        .type_
508        .as_ref()
509        .filter(|value| !value.trim().is_empty())
510    {
511        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
512    }
513    if let Some(system_path) = dependency
514        .system_path
515        .as_ref()
516        .filter(|value| !value.trim().is_empty())
517    {
518        extra_data.insert(
519            "system_path".to_string(),
520            serde_json::Value::String(system_path.clone()),
521        );
522    }
523    if let Some(message) = dependency
524        .message
525        .as_ref()
526        .filter(|value| !value.trim().is_empty())
527    {
528        extra_data.insert(
529            "message".to_string(),
530            serde_json::Value::String(message.clone()),
531        );
532    }
533
534    (!extra_data.is_empty()).then_some(extra_data)
535}
536
537fn dependency_management_entry_to_value(
538    dependency: &MavenDependencyData,
539) -> serde_json::Map<String, serde_json::Value> {
540    let mut dep_obj = serde_json::Map::new();
541
542    if let Some(group_id) = dependency.group_id.as_ref() {
543        dep_obj.insert(
544            "groupId".to_string(),
545            serde_json::Value::String(group_id.clone()),
546        );
547    }
548    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
549        dep_obj.insert(
550            "artifactId".to_string(),
551            serde_json::Value::String(artifact_id.clone()),
552        );
553    }
554    if let Some(version) = dependency.version.as_ref() {
555        dep_obj.insert(
556            "version".to_string(),
557            serde_json::Value::String(version.clone()),
558        );
559    }
560    if let Some(scope) = dependency.scope.as_ref() {
561        dep_obj.insert(
562            "scope".to_string(),
563            serde_json::Value::String(scope.clone()),
564        );
565    }
566    if let Some(type_) = dependency.type_.as_ref() {
567        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
568    }
569    if let Some(classifier) = dependency.classifier.as_ref() {
570        dep_obj.insert(
571            "classifier".to_string(),
572            serde_json::Value::String(classifier.clone()),
573        );
574    }
575    if let Some(optional) = dependency.optional.as_deref() {
576        dep_obj.insert(
577            "optional".to_string(),
578            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
579        );
580    }
581    if let Some(message) = dependency.message.as_ref() {
582        dep_obj.insert(
583            "message".to_string(),
584            serde_json::Value::String(message.clone()),
585        );
586    }
587
588    dep_obj
589}
590
591fn maven_dependency_to_dependency(
592    dependency_data: &MavenDependencyData,
593    fallback_scope: Option<&str>,
594    force_non_runtime: bool,
595) -> Option<Dependency> {
596    let group_id = dependency_data.group_id.as_ref()?;
597    let artifact_id = dependency_data.artifact_id.as_ref()?;
598    let version = dependency_data.version.clone();
599    let scope = dependency_data
600        .scope
601        .clone()
602        .or_else(|| fallback_scope.map(str::to_string));
603    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
604
605    let (is_runtime, is_optional) = if force_non_runtime {
606        (Some(false), Some(explicit_optional))
607    } else {
608        match scope.as_deref() {
609            Some("test") | Some("provided") => (Some(false), Some(true)),
610            Some(_) => (Some(true), Some(explicit_optional)),
611            None => (None, Some(explicit_optional)),
612        }
613    };
614
615    Some(Dependency {
616        purl: Some(build_maven_purl(
617            group_id,
618            artifact_id,
619            version.as_deref(),
620            dependency_data.classifier.as_deref(),
621            dependency_data.type_.as_deref(),
622        )),
623        extracted_requirement: version.clone(),
624        scope,
625        is_runtime,
626        is_optional,
627        is_pinned: version.as_deref().map(is_maven_version_pinned),
628        is_direct: Some(true),
629        resolved_package: None,
630        extra_data: dependency_extra_data(dependency_data),
631    })
632}
633
634/// Determines if a Maven version specifier is pinned to an exact version.
635///
636/// A version is considered pinned if it specifies an exact version without
637/// range syntax or dynamic keywords. Examples:
638/// - Pinned: "1.0.0", "1.2.3"
639/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
640fn is_maven_version_pinned(version_str: &str) -> bool {
641    let trimmed = version_str.trim();
642
643    // Empty version is not pinned
644    if trimmed.is_empty() {
645        return false;
646    }
647
648    // Check for range syntax (brackets and parentheses)
649    if trimmed.contains('[')
650        || trimmed.contains(']')
651        || trimmed.contains('(')
652        || trimmed.contains(')')
653    {
654        return false;
655    }
656
657    // Check for dynamic version keywords
658    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
659        return false;
660    }
661
662    // If none of the unpinned indicators are present, it's pinned
663    true
664}
665
666struct MavenBuiltinPropertyInputs<'a> {
667    namespace: &'a Option<String>,
668    name: &'a Option<String>,
669    version: &'a Option<String>,
670    parent_group_id: &'a Option<String>,
671    parent_artifact_id: &'a Option<String>,
672    parent_version: &'a Option<String>,
673    project_name: &'a Option<String>,
674    project_packaging: &'a Option<String>,
675}
676
677fn build_builtin_properties(inputs: MavenBuiltinPropertyInputs<'_>) -> HashMap<String, String> {
678    let mut builtins = HashMap::new();
679    let effective_group_id = inputs
680        .namespace
681        .clone()
682        .or_else(|| inputs.parent_group_id.clone());
683    let effective_version = inputs
684        .version
685        .clone()
686        .or_else(|| inputs.parent_version.clone());
687
688    if let Some(group_id) = effective_group_id.clone() {
689        builtins.insert("project.groupId".to_string(), group_id.clone());
690        builtins.insert("pom.groupId".to_string(), group_id);
691    }
692
693    if let Some(artifact_id) = inputs.name.clone() {
694        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
695        builtins.insert("pom.artifactId".to_string(), artifact_id);
696    }
697
698    if let Some(ver) = effective_version.clone() {
699        builtins.insert("project.version".to_string(), ver.clone());
700        builtins.insert("pom.version".to_string(), ver);
701    }
702
703    if let Some(group_id) = inputs.parent_group_id.clone() {
704        builtins.insert("project.parent.groupId".to_string(), group_id);
705    }
706
707    if let Some(artifact_id) = inputs.parent_artifact_id.clone() {
708        builtins.insert("project.parent.artifactId".to_string(), artifact_id.clone());
709        builtins.insert("pom.parent.artifactId".to_string(), artifact_id.clone());
710        builtins.insert("parent.artifactId".to_string(), artifact_id);
711    }
712
713    if let Some(ver) = inputs.parent_version.clone() {
714        builtins.insert("project.parent.version".to_string(), ver.clone());
715        builtins.insert("pom.parent.version".to_string(), ver.clone());
716        builtins.insert("parent.version".to_string(), ver);
717    }
718
719    if let Some(packaging) = inputs.project_packaging.clone() {
720        builtins.insert("project.packaging".to_string(), packaging);
721    }
722
723    if let Some(name) = inputs.project_name.clone() {
724        builtins.insert("project.name".to_string(), name);
725    }
726
727    builtins
728}
729
730/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
731///
732/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
733/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
734pub struct MavenParser;
735
736impl PackageParser for MavenParser {
737    const PACKAGE_TYPE: PackageType = PackageType::Maven;
738
739    fn extract_packages(path: &Path) -> Vec<PackageData> {
740        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
741            if filename == "pom.properties" {
742                return vec![parse_pom_properties(path)];
743            } else if filename == "MANIFEST.MF" {
744                return vec![parse_manifest_mf(path)];
745            }
746        }
747
748        let file = match File::open(path) {
749            Ok(f) => f,
750            Err(e) => {
751                warn!("Failed to open pom.xml at {:?}: {}", path, e);
752                return vec![default_package_data(DatasourceId::MavenPom)];
753            }
754        };
755
756        let mut reader = Reader::from_reader(BufReader::new(file));
757        reader.config_mut().trim_text(true);
758
759        let mut buf = Vec::new();
760        let mut package_data = default_package_data(DatasourceId::MavenPom);
761        package_data.package_type = Some(Self::PACKAGE_TYPE);
762        package_data.primary_language = Some("Java".to_string());
763        package_data.datasource_id = Some(DatasourceId::MavenPom);
764
765        let mut current_element = Vec::new();
766        let mut in_dependencies = false;
767        let mut current_dependency: Option<Dependency> = None;
768        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
769        let mut current_dependency_data: Option<MavenDependencyData> = None;
770
771        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
772        let mut xml_license_comments: Vec<String> = Vec::new();
773        let mut current_license: Option<MavenLicenseEntry> = None;
774        let mut inception_year = None;
775        let mut scm_connection = None;
776        let mut scm_developer_connection = None;
777        let mut scm_url = None;
778        let mut scm_tag = None;
779        let mut organization_name = None;
780        let mut organization_url = None;
781        let mut in_developers = false;
782        let mut in_contributors = false;
783        let mut current_party: Option<Party> = None;
784        let mut issue_management_system = None;
785        let mut issue_management_url = None;
786        let mut ci_management_system = None;
787        let mut ci_management_url = None;
788        let mut in_distribution_management = false;
789        let mut in_dist_repository = false;
790        let mut in_dist_snapshot_repository = false;
791        let mut in_dist_site = false;
792        let mut dist_download_url = None;
793        let mut dist_repository_id = None;
794        let mut dist_repository_name = None;
795        let mut dist_repository_url = None;
796        let mut dist_repository_layout = None;
797        let mut dist_snapshot_repository_id = None;
798        let mut dist_snapshot_repository_name = None;
799        let mut dist_snapshot_repository_url = None;
800        let mut dist_snapshot_repository_layout = None;
801        let mut dist_site_id = None;
802        let mut dist_site_name = None;
803        let mut dist_site_url = None;
804        let mut in_repositories = false;
805        let mut in_plugin_repositories = false;
806        let mut in_repository = false;
807        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
808        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
809        let mut current_repository_id = None;
810        let mut current_repository_name = None;
811        let mut current_repository_url = None;
812        let mut in_modules = false;
813        let mut modules: Vec<String> = Vec::new();
814        let mut in_mailing_lists = false;
815        let mut in_mailing_list = false;
816        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
817        let mut current_mailing_list_name = None;
818        let mut current_mailing_list_subscribe = None;
819        let mut current_mailing_list_unsubscribe = None;
820        let mut current_mailing_list_post = None;
821        let mut current_mailing_list_archive = None;
822        let mut in_dependency_management = false;
823        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
824        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
825        let mut in_dep_mgmt_dependency = false;
826        let mut in_parent = false;
827        let mut parent_group_id = None;
828        let mut parent_artifact_id = None;
829        let mut parent_version = None;
830        let mut parent_relative_path = None;
831        let mut in_properties = false;
832        let mut properties: HashMap<String, String> = HashMap::new();
833        let mut project_name = None;
834        let mut project_description = None;
835        let mut project_packaging = None;
836        let mut project_classifier = None;
837        let mut in_relocation = false;
838        let mut relocation = MavenDependencyData::default();
839
840        loop {
841            match reader.read_event_into(&mut buf) {
842                Ok(Event::Start(e)) => {
843                    let element_name = e.name().as_ref().to_vec();
844                    current_element.push(element_name.clone());
845
846                    match element_name.as_slice() {
847                        b"parent" => in_parent = true,
848                        b"dependencyManagement" => in_dependency_management = true,
849                        b"dependencies" if in_dependency_management => {}
850                        b"dependencies" => in_dependencies = true,
851                        b"dependency" if in_dependency_management => {
852                            in_dep_mgmt_dependency = true;
853                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
854                        }
855                        b"dependency" if in_dependencies => {
856                            current_dependency = Some(Dependency {
857                                purl: None,
858                                extracted_requirement: None,
859                                scope: None,
860                                is_runtime: None,
861                                is_optional: Some(false),
862                                is_pinned: None,
863                                is_direct: Some(true),
864                                resolved_package: None,
865                                extra_data: None,
866                            });
867                            current_dependency_data = Some(MavenDependencyData::default());
868                        }
869                        b"properties" => in_properties = true,
870                        b"developers" => in_developers = true,
871                        b"developer" if in_developers => {
872                            current_party = Some(Party {
873                                r#type: Some("person".to_string()),
874                                role: Some("developer".to_string()),
875                                name: None,
876                                email: None,
877                                url: None,
878                                organization: None,
879                                organization_url: None,
880                                timezone: None,
881                            });
882                        }
883                        b"contributors" => in_contributors = true,
884                        b"contributor" if in_contributors => {
885                            current_party = Some(Party {
886                                r#type: Some("person".to_string()),
887                                role: Some("contributor".to_string()),
888                                name: None,
889                                email: None,
890                                url: None,
891                                organization: None,
892                                organization_url: None,
893                                timezone: None,
894                            });
895                        }
896                        b"license" => current_license = Some(MavenLicenseEntry::default()),
897                        b"distributionManagement" => in_distribution_management = true,
898                        b"relocation" if in_distribution_management => {
899                            in_relocation = true;
900                            relocation = MavenDependencyData::default();
901                        }
902                        b"repository" if in_distribution_management => in_dist_repository = true,
903                        b"snapshotRepository" if in_distribution_management => {
904                            in_dist_snapshot_repository = true
905                        }
906                        b"site" if in_distribution_management => in_dist_site = true,
907                        b"repositories" => in_repositories = true,
908                        b"pluginRepositories" => in_plugin_repositories = true,
909                        b"repository" if in_repositories && !in_distribution_management => {
910                            in_repository = true;
911                            current_repository_id = None;
912                            current_repository_name = None;
913                            current_repository_url = None;
914                        }
915                        b"pluginRepository" if in_plugin_repositories => {
916                            in_repository = true;
917                            current_repository_id = None;
918                            current_repository_name = None;
919                            current_repository_url = None;
920                        }
921                        b"modules" => in_modules = true,
922                        b"mailingLists" => in_mailing_lists = true,
923                        b"mailingList" if in_mailing_lists => {
924                            in_mailing_list = true;
925                            current_mailing_list_name = None;
926                            current_mailing_list_subscribe = None;
927                            current_mailing_list_unsubscribe = None;
928                            current_mailing_list_post = None;
929                            current_mailing_list_archive = None;
930                        }
931                        _ => {}
932                    }
933                }
934                Ok(Event::Text(e)) => {
935                    let text = e.decode().unwrap_or_default().to_string();
936                    let current_path = current_element.last().map(|v| v.as_slice());
937                    let current_parent = current_element
938                        .len()
939                        .checked_sub(2)
940                        .map(|index| current_element[index].as_slice());
941
942                    if in_properties
943                        && current_element.len() >= 2
944                        && current_element[current_element.len() - 2] == b"properties"
945                    {
946                        if let Some(property_name) = current_element
947                            .last()
948                            .and_then(|name| std::str::from_utf8(name).ok())
949                        {
950                            properties.insert(property_name.to_string(), text);
951                        } else {
952                            warn!("Failed to decode Maven property name in {:?}", path);
953                        }
954                    } else if in_dep_mgmt_dependency {
955                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
956                            match current_path {
957                                Some(b"groupId") if current_parent == Some(b"dependency") => {
958                                    dep_mgmt.group_id = Some(text)
959                                }
960                                Some(b"artifactId") if current_parent == Some(b"dependency") => {
961                                    dep_mgmt.artifact_id = Some(text)
962                                }
963                                Some(b"version") if current_parent == Some(b"dependency") => {
964                                    dep_mgmt.version = Some(text)
965                                }
966                                Some(b"scope") if current_parent == Some(b"dependency") => {
967                                    dep_mgmt.scope = Some(text)
968                                }
969                                Some(b"type") if current_parent == Some(b"dependency") => {
970                                    dep_mgmt.type_ = Some(text)
971                                }
972                                Some(b"classifier") if current_parent == Some(b"dependency") => {
973                                    dep_mgmt.classifier = Some(text)
974                                }
975                                Some(b"optional") if current_parent == Some(b"dependency") => {
976                                    dep_mgmt.optional = Some(text)
977                                }
978                                _ => {}
979                            }
980                        }
981                    } else if let Some(license) = &mut current_license {
982                        match current_path {
983                            Some(b"name") => license.name = Some(text),
984                            Some(b"url") => license.url = Some(text),
985                            Some(b"comments") => license.comments = Some(text),
986                            _ => {}
987                        }
988                    } else if let Some(party) = &mut current_party {
989                        match current_path {
990                            Some(b"name") => party.name = Some(text),
991                            Some(b"email") => party.email = Some(text),
992                            Some(b"url") => party.url = Some(text),
993                            Some(b"organization") => party.organization = Some(text),
994                            Some(b"organizationUrl") => party.organization_url = Some(text),
995                            Some(b"timezone") => party.timezone = Some(text),
996                            _ => {}
997                        }
998                    } else if let Some(dep) = &mut current_dependency {
999                        match current_path {
1000                            Some(b"groupId") => {
1001                                if current_parent == Some(b"dependency")
1002                                    && let Some(coords) = current_dependency_data.as_mut()
1003                                {
1004                                    coords.group_id = Some(text);
1005                                }
1006                            }
1007                            Some(b"artifactId") => {
1008                                if current_parent == Some(b"dependency")
1009                                    && let Some(coords) = current_dependency_data.as_mut()
1010                                {
1011                                    coords.artifact_id = Some(text);
1012                                }
1013                            }
1014                            Some(b"version") => {
1015                                if current_parent == Some(b"dependency")
1016                                    && let Some(coords) = current_dependency_data.as_mut()
1017                                {
1018                                    coords.version = Some(text);
1019                                }
1020                            }
1021                            Some(b"scope") => {
1022                                if current_parent == Some(b"dependency") {
1023                                    dep.scope = Some(text.clone());
1024                                    dep.is_optional = Some(text == "test" || text == "provided");
1025                                    dep.is_runtime = Some(text != "test" && text != "provided");
1026                                }
1027                                if current_parent == Some(b"dependency")
1028                                    && let Some(coords) = current_dependency_data.as_mut()
1029                                {
1030                                    coords.scope = Some(text);
1031                                }
1032                            }
1033                            Some(b"optional") => {
1034                                if current_parent == Some(b"dependency")
1035                                    && let Some(coords) = current_dependency_data.as_mut()
1036                                {
1037                                    coords.optional = Some(text);
1038                                }
1039                            }
1040                            Some(b"type") => {
1041                                if current_parent == Some(b"dependency")
1042                                    && let Some(coords) = current_dependency_data.as_mut()
1043                                {
1044                                    coords.type_ = Some(text);
1045                                }
1046                            }
1047                            Some(b"classifier") => {
1048                                if current_parent == Some(b"dependency")
1049                                    && let Some(coords) = current_dependency_data.as_mut()
1050                                {
1051                                    coords.classifier = Some(text);
1052                                }
1053                            }
1054                            Some(b"systemPath") => {
1055                                if current_parent == Some(b"dependency")
1056                                    && let Some(coords) = current_dependency_data.as_mut()
1057                                {
1058                                    coords.system_path = Some(text);
1059                                }
1060                            }
1061                            _ => {}
1062                        }
1063                    } else if in_relocation {
1064                        match current_path {
1065                            Some(b"groupId") => relocation.group_id = Some(text),
1066                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1067                            Some(b"version") => relocation.version = Some(text),
1068                            Some(b"classifier") => relocation.classifier = Some(text),
1069                            Some(b"type") => relocation.type_ = Some(text),
1070                            Some(b"message") => relocation.message = Some(text),
1071                            _ => {}
1072                        }
1073                    } else if in_parent {
1074                        match current_path {
1075                            Some(b"groupId") => {
1076                                parent_group_id = Some(text);
1077                            }
1078                            Some(b"artifactId") => {
1079                                parent_artifact_id = Some(text);
1080                            }
1081                            Some(b"version") => {
1082                                parent_version = Some(text);
1083                            }
1084                            Some(b"relativePath") => {
1085                                parent_relative_path = Some(text);
1086                            }
1087                            _ => {}
1088                        }
1089                    } else {
1090                        match current_path {
1091                            Some(b"groupId") if current_element.len() == 2 => {
1092                                package_data.namespace = Some(text)
1093                            }
1094                            Some(b"artifactId") if current_element.len() == 2 => {
1095                                package_data.name = Some(text)
1096                            }
1097                            Some(b"version") if current_element.len() == 2 => {
1098                                package_data.version = Some(text)
1099                            }
1100                            Some(b"name") if current_element.len() == 2 => {
1101                                project_name = Some(text)
1102                            }
1103                            Some(b"description") if current_element.len() == 2 => {
1104                                project_description = Some(text)
1105                            }
1106                            Some(b"packaging") if current_element.len() == 2 => {
1107                                project_packaging = Some(text)
1108                            }
1109                            Some(b"classifier") if current_element.len() == 2 => {
1110                                project_classifier = Some(text)
1111                            }
1112                            Some(b"url") if current_element.len() == 2 => {
1113                                package_data.homepage_url = Some(text)
1114                            }
1115                            Some(b"inceptionYear") if current_element.len() == 2 => {
1116                                inception_year = Some(text)
1117                            }
1118                            Some(b"connection")
1119                                if current_element.len() >= 3
1120                                    && current_element[current_element.len() - 2] == b"scm" =>
1121                            {
1122                                scm_connection = if text.starts_with("scm:git:") {
1123                                    Some(text.replacen("scm:git:", "git+", 1))
1124                                } else if text.starts_with("scm:") {
1125                                    Some(text.replacen("scm:", "", 1))
1126                                } else {
1127                                    Some(text)
1128                                };
1129                            }
1130                            Some(b"developerConnection")
1131                                if current_element.len() >= 3
1132                                    && current_element[current_element.len() - 2] == b"scm" =>
1133                            {
1134                                scm_developer_connection = if text.starts_with("scm:git:") {
1135                                    Some(text.replacen("scm:git:", "git+", 1))
1136                                } else if text.starts_with("scm:") {
1137                                    Some(text.replacen("scm:", "", 1))
1138                                } else {
1139                                    Some(text)
1140                                };
1141                            }
1142                            Some(b"url")
1143                                if current_element.len() >= 3
1144                                    && current_element[current_element.len() - 2] == b"scm" =>
1145                            {
1146                                scm_url = Some(text);
1147                            }
1148                            Some(b"tag")
1149                                if current_element.len() >= 3
1150                                    && current_element[current_element.len() - 2] == b"scm" =>
1151                            {
1152                                scm_tag = Some(text);
1153                            }
1154                            Some(b"name")
1155                                if current_element.len() >= 2
1156                                    && current_element[current_element.len() - 2]
1157                                        == b"organization" =>
1158                            {
1159                                organization_name = Some(text);
1160                            }
1161                            Some(b"url")
1162                                if current_element.len() >= 2
1163                                    && current_element[current_element.len() - 2]
1164                                        == b"organization" =>
1165                            {
1166                                organization_url = Some(text);
1167                            }
1168                            Some(b"system")
1169                                if current_element.len() >= 2
1170                                    && current_element[current_element.len() - 2]
1171                                        == b"issueManagement" =>
1172                            {
1173                                issue_management_system = Some(text);
1174                            }
1175                            Some(b"url")
1176                                if current_element.len() >= 2
1177                                    && current_element[current_element.len() - 2]
1178                                        == b"issueManagement" =>
1179                            {
1180                                issue_management_url = Some(text);
1181                            }
1182                            Some(b"system")
1183                                if current_element.len() >= 2
1184                                    && current_element[current_element.len() - 2]
1185                                        == b"ciManagement" =>
1186                            {
1187                                ci_management_system = Some(text);
1188                            }
1189                            Some(b"url")
1190                                if current_element.len() >= 2
1191                                    && current_element[current_element.len() - 2]
1192                                        == b"ciManagement" =>
1193                            {
1194                                ci_management_url = Some(text);
1195                            }
1196                            Some(b"downloadUrl")
1197                                if current_element.len() >= 2
1198                                    && current_element[current_element.len() - 2]
1199                                        == b"distributionManagement" =>
1200                            {
1201                                dist_download_url = Some(text);
1202                            }
1203                            Some(b"id") if in_dist_repository => {
1204                                dist_repository_id = Some(text);
1205                            }
1206                            Some(b"name") if in_dist_repository => {
1207                                dist_repository_name = Some(text);
1208                            }
1209                            Some(b"url") if in_dist_repository => {
1210                                dist_repository_url = Some(text);
1211                            }
1212                            Some(b"layout") if in_dist_repository => {
1213                                dist_repository_layout = Some(text);
1214                            }
1215                            Some(b"id") if in_dist_snapshot_repository => {
1216                                dist_snapshot_repository_id = Some(text);
1217                            }
1218                            Some(b"name") if in_dist_snapshot_repository => {
1219                                dist_snapshot_repository_name = Some(text);
1220                            }
1221                            Some(b"url") if in_dist_snapshot_repository => {
1222                                dist_snapshot_repository_url = Some(text);
1223                            }
1224                            Some(b"layout") if in_dist_snapshot_repository => {
1225                                dist_snapshot_repository_layout = Some(text);
1226                            }
1227                            Some(b"id") if in_dist_site => {
1228                                dist_site_id = Some(text);
1229                            }
1230                            Some(b"name") if in_dist_site => {
1231                                dist_site_name = Some(text);
1232                            }
1233                            Some(b"url") if in_dist_site => {
1234                                dist_site_url = Some(text);
1235                            }
1236                            Some(b"id") if in_repository => {
1237                                current_repository_id = Some(text);
1238                            }
1239                            Some(b"name") if in_repository => {
1240                                current_repository_name = Some(text);
1241                            }
1242                            Some(b"url") if in_repository => {
1243                                current_repository_url = Some(text);
1244                            }
1245                            Some(b"module") if in_modules => {
1246                                modules.push(text);
1247                            }
1248                            Some(b"name") if in_mailing_list => {
1249                                current_mailing_list_name = Some(text);
1250                            }
1251                            Some(b"subscribe") if in_mailing_list => {
1252                                current_mailing_list_subscribe = Some(text);
1253                            }
1254                            Some(b"unsubscribe") if in_mailing_list => {
1255                                current_mailing_list_unsubscribe = Some(text);
1256                            }
1257                            Some(b"post") if in_mailing_list => {
1258                                current_mailing_list_post = Some(text);
1259                            }
1260                            Some(b"archive") if in_mailing_list => {
1261                                current_mailing_list_archive = Some(text);
1262                            }
1263                            _ => {}
1264                        }
1265                    }
1266                }
1267                Ok(Event::Comment(e)) => {
1268                    let comment = e.decode().unwrap_or_default().trim().to_string();
1269                    if current_element.is_empty()
1270                        && !comment.is_empty()
1271                        && is_license_like_comment(&comment)
1272                    {
1273                        xml_license_comments.push(comment);
1274                    }
1275                }
1276                Ok(Event::End(e)) => {
1277                    if !current_element.is_empty() {
1278                        current_element.pop();
1279                    }
1280
1281                    match e.name().as_ref() {
1282                        b"parent" => in_parent = false,
1283                        b"dependencyManagement" => in_dependency_management = false,
1284                        b"dependencies" => in_dependencies = false,
1285                        b"dependency" if in_dep_mgmt_dependency => {
1286                            in_dep_mgmt_dependency = false;
1287                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1288                                && (dep_mgmt.group_id.is_some()
1289                                    || dep_mgmt.artifact_id.is_some()
1290                                    || dep_mgmt.version.is_some())
1291                            {
1292                                dependency_management_entries.push(dep_mgmt);
1293                            }
1294                        }
1295                        b"dependency" => {
1296                            if let (Some(dep), Some(coords)) =
1297                                (current_dependency.take(), current_dependency_data.take())
1298                            {
1299                                package_data.dependencies.push(dep);
1300                                dependency_data.push(coords);
1301                            } else if let Some(dep) = current_dependency.take() {
1302                                package_data.dependencies.push(dep);
1303                            }
1304                        }
1305                        b"license" => {
1306                            if let Some(license) = current_license.take()
1307                                && (license.name.is_some()
1308                                    || license.url.is_some()
1309                                    || license.comments.is_some())
1310                            {
1311                                licenses.push(license);
1312                            }
1313                        }
1314                        b"developers" => in_developers = false,
1315                        b"developer" => {
1316                            if let Some(party) = current_party.take() {
1317                                package_data.parties.push(party);
1318                            }
1319                        }
1320                        b"contributors" => in_contributors = false,
1321                        b"contributor" => {
1322                            if let Some(party) = current_party.take() {
1323                                package_data.parties.push(party);
1324                            }
1325                        }
1326                        b"distributionManagement" => in_distribution_management = false,
1327                        b"relocation" => in_relocation = false,
1328                        b"repository" if !in_dependencies && in_distribution_management => {
1329                            in_dist_repository = false
1330                        }
1331                        b"repository" if !in_dependencies && in_repositories => {
1332                            in_repository = false;
1333                            if current_repository_id.is_some()
1334                                || current_repository_name.is_some()
1335                                || current_repository_url.is_some()
1336                            {
1337                                let mut repo = serde_json::Map::new();
1338                                if let Some(id) = current_repository_id.take() {
1339                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1340                                }
1341                                if let Some(name) = current_repository_name.take() {
1342                                    repo.insert(
1343                                        "name".to_string(),
1344                                        serde_json::Value::String(name),
1345                                    );
1346                                }
1347                                if let Some(url) = current_repository_url.take() {
1348                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1349                                }
1350                                repositories.push(repo);
1351                            }
1352                        }
1353                        b"pluginRepository" if in_plugin_repositories => {
1354                            in_repository = false;
1355                            if current_repository_id.is_some()
1356                                || current_repository_name.is_some()
1357                                || current_repository_url.is_some()
1358                            {
1359                                let mut repo = serde_json::Map::new();
1360                                if let Some(id) = current_repository_id.take() {
1361                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1362                                }
1363                                if let Some(name) = current_repository_name.take() {
1364                                    repo.insert(
1365                                        "name".to_string(),
1366                                        serde_json::Value::String(name),
1367                                    );
1368                                }
1369                                if let Some(url) = current_repository_url.take() {
1370                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1371                                }
1372                                plugin_repositories.push(repo);
1373                            }
1374                        }
1375                        b"repositories" => in_repositories = false,
1376                        b"properties" => in_properties = false,
1377                        b"pluginRepositories" => in_plugin_repositories = false,
1378                        b"modules" => in_modules = false,
1379                        b"mailingLists" => in_mailing_lists = false,
1380                        b"mailingList" => {
1381                            in_mailing_list = false;
1382                            if current_mailing_list_name.is_some()
1383                                || current_mailing_list_subscribe.is_some()
1384                                || current_mailing_list_unsubscribe.is_some()
1385                                || current_mailing_list_post.is_some()
1386                                || current_mailing_list_archive.is_some()
1387                            {
1388                                let mut ml = serde_json::Map::new();
1389                                if let Some(name) = current_mailing_list_name.take() {
1390                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1391                                }
1392                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1393                                    ml.insert(
1394                                        "subscribe".to_string(),
1395                                        serde_json::Value::String(subscribe),
1396                                    );
1397                                }
1398                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1399                                    ml.insert(
1400                                        "unsubscribe".to_string(),
1401                                        serde_json::Value::String(unsubscribe),
1402                                    );
1403                                }
1404                                if let Some(post) = current_mailing_list_post.take() {
1405                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1406                                }
1407                                if let Some(archive) = current_mailing_list_archive.take() {
1408                                    ml.insert(
1409                                        "archive".to_string(),
1410                                        serde_json::Value::String(archive),
1411                                    );
1412                                }
1413                                mailing_lists.push(ml);
1414                            }
1415                        }
1416                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1417                        b"site" => in_dist_site = false,
1418                        _ => {}
1419                    }
1420                }
1421                Ok(Event::Eof) => break,
1422                Err(e) => {
1423                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1424                    return vec![package_data];
1425                }
1426                _ => {}
1427            }
1428            buf.clear();
1429        }
1430
1431        let builtins = build_builtin_properties(MavenBuiltinPropertyInputs {
1432            namespace: &package_data.namespace,
1433            name: &package_data.name,
1434            version: &package_data.version,
1435            parent_group_id: &parent_group_id,
1436            parent_artifact_id: &parent_artifact_id,
1437            parent_version: &parent_version,
1438            project_name: &project_name,
1439            project_packaging: &project_packaging,
1440        });
1441        let mut resolver = PropertyResolver::new(properties, builtins);
1442
1443        resolve_option(&mut resolver, &mut package_data.namespace);
1444        resolve_option(&mut resolver, &mut package_data.name);
1445        resolve_option(&mut resolver, &mut package_data.version);
1446        resolve_option(&mut resolver, &mut package_data.homepage_url);
1447        resolve_option(&mut resolver, &mut inception_year);
1448        resolve_option(&mut resolver, &mut scm_connection);
1449        resolve_option(&mut resolver, &mut scm_developer_connection);
1450        resolve_option(&mut resolver, &mut scm_url);
1451        resolve_option(&mut resolver, &mut scm_tag);
1452        resolve_option(&mut resolver, &mut organization_name);
1453        resolve_option(&mut resolver, &mut organization_url);
1454        resolve_option(&mut resolver, &mut issue_management_system);
1455        resolve_option(&mut resolver, &mut issue_management_url);
1456        resolve_option(&mut resolver, &mut ci_management_system);
1457        resolve_option(&mut resolver, &mut ci_management_url);
1458        resolve_option(&mut resolver, &mut dist_download_url);
1459        resolve_option(&mut resolver, &mut dist_repository_id);
1460        resolve_option(&mut resolver, &mut dist_repository_name);
1461        resolve_option(&mut resolver, &mut dist_repository_url);
1462        resolve_option(&mut resolver, &mut dist_repository_layout);
1463        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1464        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1465        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1466        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1467        resolve_option(&mut resolver, &mut dist_site_id);
1468        resolve_option(&mut resolver, &mut dist_site_name);
1469        resolve_option(&mut resolver, &mut dist_site_url);
1470        resolve_option(&mut resolver, &mut parent_group_id);
1471        resolve_option(&mut resolver, &mut parent_artifact_id);
1472        resolve_option(&mut resolver, &mut parent_version);
1473        resolve_option(&mut resolver, &mut parent_relative_path);
1474        resolve_option(&mut resolver, &mut project_name);
1475        resolve_option(&mut resolver, &mut project_description);
1476        resolve_option(&mut resolver, &mut project_packaging);
1477        resolve_option(&mut resolver, &mut project_classifier);
1478        resolve_vec(&mut resolver, &mut modules);
1479        resolve_maps(&mut resolver, &mut repositories);
1480        resolve_maps(&mut resolver, &mut plugin_repositories);
1481        resolve_maps(&mut resolver, &mut mailing_lists);
1482        for comment in &mut xml_license_comments {
1483            *comment = resolver.resolve_text(comment, 0);
1484        }
1485        for dependency in &mut dependency_management_entries {
1486            resolve_dependency_data(&mut resolver, dependency);
1487        }
1488        resolve_dependency_data(&mut resolver, &mut relocation);
1489        for license in &mut licenses {
1490            resolve_license_entry(&mut resolver, license);
1491        }
1492        for comment in xml_license_comments {
1493            if !comment.trim().is_empty() {
1494                licenses.push(MavenLicenseEntry {
1495                    comments: Some(comment),
1496                    ..Default::default()
1497                });
1498            }
1499        }
1500
1501        for (dependency, coords) in package_data
1502            .dependencies
1503            .iter_mut()
1504            .zip(dependency_data.iter_mut())
1505        {
1506            resolve_dependency_data(&mut resolver, coords);
1507            dependency.scope = coords.scope.clone();
1508            dependency.extracted_requirement = coords.version.clone();
1509            dependency.extra_data = dependency_extra_data(coords);
1510            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1511
1512            match dependency.scope.as_deref() {
1513                Some("test") | Some("provided") => {
1514                    dependency.is_runtime = Some(false);
1515                    dependency.is_optional = Some(true);
1516                }
1517                Some(_) => {
1518                    dependency.is_runtime = Some(true);
1519                }
1520                None => {
1521                    dependency.is_runtime = None;
1522                }
1523            }
1524
1525            if let Some(version) = &coords.version {
1526                dependency.is_pinned = Some(is_maven_version_pinned(version));
1527            }
1528
1529            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1530                dependency.purl = Some(build_maven_purl(
1531                    group_id,
1532                    artifact_id,
1533                    coords.version.as_deref(),
1534                    coords.classifier.as_deref(),
1535                    coords.type_.as_deref(),
1536                ));
1537            }
1538        }
1539
1540        if package_data.namespace.is_none() {
1541            package_data.namespace = parent_group_id.clone();
1542        }
1543        if package_data.version.is_none() {
1544            package_data.version = parent_version.clone();
1545        }
1546
1547        package_data.qualifiers =
1548            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1549
1550        package_data.description = match (
1551            project_name.as_deref().filter(|value| !value.is_empty()),
1552            project_description
1553                .as_deref()
1554                .filter(|value| !value.is_empty()),
1555        ) {
1556            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1557            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1558            (Some(name), None) => Some(name.to_string()),
1559            (None, Some(description)) => Some(description.to_string()),
1560            (None, None) => None,
1561        };
1562
1563        if path.to_string_lossy().contains("META-INF/maven/") {
1564            let path_str = path.to_string_lossy();
1565            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1566                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1567                let parts: Vec<&str> = after_maven.split('/').collect();
1568                if parts.len() >= 2 {
1569                    if package_data.namespace.is_none() {
1570                        package_data.namespace = Some(parts[0].to_string());
1571                    }
1572                    if package_data.name.is_none() {
1573                        package_data.name = Some(parts[1].to_string());
1574                    }
1575                }
1576            }
1577        }
1578
1579        // Construct PURL from parsed data
1580        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1581            &package_data.namespace,
1582            &package_data.name,
1583            &package_data.version,
1584        ) {
1585            package_data.purl = Some(build_maven_purl(
1586                group_id,
1587                artifact_id,
1588                Some(version),
1589                project_classifier.as_deref(),
1590                project_packaging.as_deref(),
1591            ));
1592            if project_classifier.is_none() {
1593                package_data
1594                    .source_packages
1595                    .push(build_maven_source_package(group_id, artifact_id, version));
1596            }
1597        }
1598
1599        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1600            package_data.repository_homepage_url = build_maven_url(
1601                &package_data.namespace,
1602                &package_data.name,
1603                &package_data.version,
1604                None,
1605            );
1606
1607            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1608                build_maven_download_url(
1609                    group_id,
1610                    artifact_id,
1611                    ver,
1612                    project_classifier.as_deref(),
1613                    project_packaging.as_deref(),
1614                )
1615            });
1616
1617            if let Some(ver) = &package_data.version {
1618                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1619                package_data.api_data_url = build_maven_url(
1620                    &package_data.namespace,
1621                    &package_data.name,
1622                    &package_data.version,
1623                    Some(&pom_filename),
1624                );
1625            }
1626        }
1627
1628        package_data.vcs_url = scm_connection
1629            .or_else(|| scm_developer_connection.clone())
1630            .or_else(|| scm_url.clone());
1631
1632        // Set code_view_url from scm/url (human-browseable URL)
1633        if let Some(url) = &scm_url {
1634            package_data.code_view_url = Some(url.clone());
1635        }
1636
1637        // Set bug_tracking_url from issueManagement/url
1638        if let Some(url) = &issue_management_url {
1639            package_data.bug_tracking_url = Some(url.clone());
1640        }
1641
1642        // Map downloadUrl to download_url field
1643        if let Some(url) = &dist_download_url {
1644            package_data.download_url = Some(url.clone());
1645        }
1646
1647        if organization_name.is_some() || organization_url.is_some() {
1648            package_data.parties.push(Party {
1649                r#type: Some("organization".to_string()),
1650                role: Some("owner".to_string()),
1651                name: organization_name.clone(),
1652                email: None,
1653                url: organization_url.clone(),
1654                organization: None,
1655                organization_url: None,
1656                timezone: None,
1657            });
1658        }
1659
1660        for dependency in &dependency_management_entries {
1661            let fallback_scope = if dependency.scope.as_deref() == Some("import") {
1662                Some("import")
1663            } else {
1664                Some("dependencymanagement")
1665            };
1666
1667            if let Some(converted) =
1668                maven_dependency_to_dependency(dependency, fallback_scope, true)
1669            {
1670                package_data.dependencies.push(converted);
1671            }
1672        }
1673
1674        if (relocation.group_id.is_some()
1675            || relocation.artifact_id.is_some()
1676            || relocation.version.is_some())
1677            && let Some(converted) =
1678                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1679        {
1680            package_data.dependencies.push(converted);
1681        }
1682
1683        if inception_year.is_some()
1684            || organization_name.is_some()
1685            || organization_url.is_some()
1686            || scm_tag.is_some()
1687            || scm_developer_connection.is_some()
1688            || issue_management_system.is_some()
1689            || ci_management_system.is_some()
1690            || ci_management_url.is_some()
1691            || dist_download_url.is_some()
1692            || dist_repository_id.is_some()
1693            || dist_snapshot_repository_id.is_some()
1694            || dist_site_id.is_some()
1695            || !repositories.is_empty()
1696            || !plugin_repositories.is_empty()
1697            || !modules.is_empty()
1698            || !mailing_lists.is_empty()
1699            || !dependency_management_entries.is_empty()
1700            || parent_group_id.is_some()
1701            || relocation.group_id.is_some()
1702            || relocation.artifact_id.is_some()
1703            || relocation.version.is_some()
1704            || relocation.message.is_some()
1705        {
1706            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1707            if let Some(year) = inception_year {
1708                extra_data.insert(
1709                    "inception_year".to_string(),
1710                    serde_json::Value::String(year),
1711                );
1712            }
1713            if let Some(name) = organization_name {
1714                extra_data.insert(
1715                    "organization_name".to_string(),
1716                    serde_json::Value::String(name),
1717                );
1718            }
1719            if let Some(url) = organization_url {
1720                extra_data.insert(
1721                    "organization_url".to_string(),
1722                    serde_json::Value::String(url),
1723                );
1724            }
1725            if let Some(tag) = scm_tag {
1726                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1727            }
1728            if let Some(dev_conn) = scm_developer_connection {
1729                extra_data.insert(
1730                    "scm_developer_connection".to_string(),
1731                    serde_json::Value::String(dev_conn),
1732                );
1733            }
1734            if let Some(system) = issue_management_system {
1735                extra_data.insert(
1736                    "issue_tracking_system".to_string(),
1737                    serde_json::Value::String(system),
1738                );
1739            }
1740            if let Some(system) = ci_management_system {
1741                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1742            }
1743            if let Some(url) = ci_management_url {
1744                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1745            }
1746
1747            // Add distribution management data
1748            if let Some(url) = dist_download_url {
1749                extra_data.insert(
1750                    "distribution_download_url".to_string(),
1751                    serde_json::Value::String(url),
1752                );
1753            }
1754
1755            // Build repository object
1756            if dist_repository_id.is_some()
1757                || dist_repository_name.is_some()
1758                || dist_repository_url.is_some()
1759                || dist_repository_layout.is_some()
1760            {
1761                let mut repo = serde_json::Map::new();
1762                if let Some(id) = dist_repository_id {
1763                    repo.insert("id".to_string(), serde_json::Value::String(id));
1764                }
1765                if let Some(name) = dist_repository_name {
1766                    repo.insert("name".to_string(), serde_json::Value::String(name));
1767                }
1768                if let Some(url) = dist_repository_url {
1769                    repo.insert("url".to_string(), serde_json::Value::String(url));
1770                }
1771                if let Some(layout) = dist_repository_layout {
1772                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1773                }
1774                extra_data.insert(
1775                    "distribution_repository".to_string(),
1776                    serde_json::Value::Object(repo),
1777                );
1778            }
1779
1780            // Build snapshotRepository object
1781            if dist_snapshot_repository_id.is_some()
1782                || dist_snapshot_repository_name.is_some()
1783                || dist_snapshot_repository_url.is_some()
1784                || dist_snapshot_repository_layout.is_some()
1785            {
1786                let mut repo = serde_json::Map::new();
1787                if let Some(id) = dist_snapshot_repository_id {
1788                    repo.insert("id".to_string(), serde_json::Value::String(id));
1789                }
1790                if let Some(name) = dist_snapshot_repository_name {
1791                    repo.insert("name".to_string(), serde_json::Value::String(name));
1792                }
1793                if let Some(url) = dist_snapshot_repository_url {
1794                    repo.insert("url".to_string(), serde_json::Value::String(url));
1795                }
1796                if let Some(layout) = dist_snapshot_repository_layout {
1797                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1798                }
1799                extra_data.insert(
1800                    "distribution_snapshot_repository".to_string(),
1801                    serde_json::Value::Object(repo),
1802                );
1803            }
1804
1805            // Build site object
1806            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1807                let mut site = serde_json::Map::new();
1808                if let Some(id) = dist_site_id {
1809                    site.insert("id".to_string(), serde_json::Value::String(id));
1810                }
1811                if let Some(name) = dist_site_name {
1812                    site.insert("name".to_string(), serde_json::Value::String(name));
1813                }
1814                if let Some(url) = dist_site_url {
1815                    site.insert("url".to_string(), serde_json::Value::String(url));
1816                }
1817                extra_data.insert(
1818                    "distribution_site".to_string(),
1819                    serde_json::Value::Object(site),
1820                );
1821            }
1822
1823            if !repositories.is_empty() {
1824                extra_data.insert(
1825                    "repositories".to_string(),
1826                    serde_json::Value::Array(
1827                        repositories
1828                            .into_iter()
1829                            .map(serde_json::Value::Object)
1830                            .collect(),
1831                    ),
1832                );
1833            }
1834
1835            if !plugin_repositories.is_empty() {
1836                extra_data.insert(
1837                    "plugin_repositories".to_string(),
1838                    serde_json::Value::Array(
1839                        plugin_repositories
1840                            .into_iter()
1841                            .map(serde_json::Value::Object)
1842                            .collect(),
1843                    ),
1844                );
1845            }
1846
1847            if !modules.is_empty() {
1848                extra_data.insert(
1849                    "modules".to_string(),
1850                    serde_json::Value::Array(
1851                        modules.into_iter().map(serde_json::Value::String).collect(),
1852                    ),
1853                );
1854            }
1855
1856            if !mailing_lists.is_empty() {
1857                extra_data.insert(
1858                    "mailing_lists".to_string(),
1859                    serde_json::Value::Array(
1860                        mailing_lists
1861                            .into_iter()
1862                            .map(serde_json::Value::Object)
1863                            .collect(),
1864                    ),
1865                );
1866            }
1867
1868            if !dependency_management_entries.is_empty() {
1869                extra_data.insert(
1870                    "dependency_management".to_string(),
1871                    serde_json::Value::Array(
1872                        dependency_management_entries
1873                            .into_iter()
1874                            .map(|dependency| {
1875                                serde_json::Value::Object(dependency_management_entry_to_value(
1876                                    &dependency,
1877                                ))
1878                            })
1879                            .collect(),
1880                    ),
1881                );
1882            }
1883
1884            if relocation.group_id.is_some()
1885                || relocation.artifact_id.is_some()
1886                || relocation.version.is_some()
1887                || relocation.message.is_some()
1888            {
1889                extra_data.insert(
1890                    "relocation".to_string(),
1891                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1892                );
1893            }
1894
1895            if parent_group_id.is_some()
1896                || parent_artifact_id.is_some()
1897                || parent_version.is_some()
1898                || parent_relative_path.is_some()
1899            {
1900                let mut parent_obj = serde_json::Map::new();
1901                if let Some(group_id) = parent_group_id {
1902                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1903                }
1904                if let Some(artifact_id) = parent_artifact_id {
1905                    parent_obj.insert(
1906                        "artifactId".to_string(),
1907                        serde_json::Value::String(artifact_id),
1908                    );
1909                }
1910                if let Some(version) = parent_version {
1911                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1912                }
1913                if let Some(relative_path) = parent_relative_path {
1914                    parent_obj.insert(
1915                        "relativePath".to_string(),
1916                        serde_json::Value::String(relative_path),
1917                    );
1918                }
1919                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
1920            }
1921
1922            package_data.extra_data = Some(extra_data);
1923        }
1924
1925        package_data.extracted_license_statement = build_license_statement(&licenses);
1926        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1927            build_maven_declared_license_data(
1928                &licenses,
1929                package_data.extracted_license_statement.as_deref(),
1930            );
1931        package_data.declared_license_expression = declared_license_expression;
1932        package_data.declared_license_expression_spdx = declared_license_expression_spdx;
1933        package_data.license_detections = license_detections;
1934
1935        vec![package_data]
1936    }
1937
1938    fn is_match(path: &Path) -> bool {
1939        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
1940            filename == "pom.xml"
1941                || filename.ends_with(".pom.xml")
1942                || filename.ends_with("-pom.xml")
1943                || filename == "pom.properties"
1944                || filename == "MANIFEST.MF"
1945                || filename.ends_with(".pom")
1946        } else {
1947            false
1948        }
1949    }
1950}
1951
1952fn build_maven_url(
1953    group_id: &Option<String>,
1954    artifact_id: &Option<String>,
1955    version: &Option<String>,
1956    filename: Option<&str>,
1957) -> Option<String> {
1958    const BASE_URL: &str = "https://repo1.maven.org/maven2";
1959
1960    let group_id = group_id.as_ref()?;
1961    let artifact_id = artifact_id.as_ref()?;
1962
1963    let group_path = group_id.replace('.', "/");
1964    let filename_str = filename.unwrap_or("");
1965
1966    let url = if let Some(ver) = version {
1967        format!(
1968            "{}/{}/{}/{}/{}",
1969            BASE_URL, group_path, artifact_id, ver, filename_str
1970        )
1971    } else {
1972        format!(
1973            "{}/{}/{}/{}",
1974            BASE_URL, group_path, artifact_id, filename_str
1975        )
1976    };
1977
1978    Some(url)
1979}
1980
1981fn build_maven_declared_license_data(
1982    licenses: &[MavenLicenseEntry],
1983    matched_text: Option<&str>,
1984) -> (
1985    Option<String>,
1986    Option<String>,
1987    Vec<crate::models::LicenseDetection>,
1988) {
1989    let normalized: Vec<_> = licenses
1990        .iter()
1991        .filter_map(|license| license.name.as_deref())
1992        .filter_map(normalize_maven_license_name)
1993        .collect();
1994
1995    if normalized.is_empty() {
1996        return empty_declared_license_data();
1997    }
1998
1999    let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
2000        return empty_declared_license_data();
2001    };
2002
2003    build_declared_license_data(
2004        combined,
2005        DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
2006    )
2007}
2008
2009fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
2010    match name.trim() {
2011        "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
2012            "public-domain",
2013            "LicenseRef-provenant-public-domain",
2014        )),
2015        other => normalize_declared_license_key(other),
2016    }
2017}
2018
2019/// Parse pom.properties file (Java properties format)
2020fn parse_pom_properties(path: &Path) -> PackageData {
2021    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
2022        Ok(content) => content,
2023        Err(e) => {
2024            warn!("Failed to read pom.properties at {:?}: {}", path, e);
2025            return PackageData {
2026                package_type: Some(PackageType::Maven),
2027                primary_language: Some("Java".to_string()),
2028                datasource_id: Some(DatasourceId::MavenPomProperties),
2029                ..Default::default()
2030            };
2031        }
2032    };
2033
2034    let mut package_data = default_package_data(DatasourceId::MavenPomProperties);
2035    package_data.package_type = Some(PackageType::Maven);
2036    package_data.primary_language = Some("Java".to_string());
2037    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
2038
2039    let mut group_id: Option<String> = None;
2040    let mut artifact_id: Option<String> = None;
2041    let mut version: Option<String> = None;
2042
2043    // Parse Java properties format
2044    let mut continuation = String::new();
2045
2046    for line in content.lines() {
2047        let current_line = if continuation.is_empty() {
2048            line.to_string()
2049        } else {
2050            format!("{}{}", continuation, line)
2051        };
2052        continuation.clear();
2053
2054        // Check for line continuation (backslash at end)
2055        if current_line.ends_with('\\') {
2056            continuation = current_line[..current_line.len() - 1].to_string();
2057            continue;
2058        }
2059
2060        // Skip comments and empty lines
2061        let trimmed = current_line.trim();
2062        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2063            continue;
2064        }
2065
2066        // Parse key=value
2067        if let Some(eq_pos) = current_line.find('=') {
2068            let key = current_line[..eq_pos].trim();
2069            let value = current_line[eq_pos + 1..].trim();
2070
2071            match key {
2072                "groupId" => group_id = Some(value.to_string()),
2073                "artifactId" => artifact_id = Some(value.to_string()),
2074                "version" => version = Some(value.to_string()),
2075                _ => {}
2076            }
2077        }
2078    }
2079
2080    package_data.namespace = group_id.clone();
2081    package_data.name = artifact_id.clone();
2082    package_data.version = version.clone();
2083
2084    // Generate PURL
2085    if let (Some(group_id), Some(artifact_id), Some(version)) = (
2086        &package_data.namespace,
2087        &package_data.name,
2088        &package_data.version,
2089    ) {
2090        package_data.purl = Some(format!(
2091            "pkg:maven/{}/{}@{}",
2092            group_id, artifact_id, version
2093        ));
2094    }
2095
2096    package_data
2097}
2098
2099/// Parse MANIFEST.MF file (JAR manifest format)
2100///
2101/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
2102/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
2103/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
2104/// dependencies.
2105fn parse_manifest_mf(path: &Path) -> PackageData {
2106    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
2107        Ok(content) => content,
2108        Err(e) => {
2109            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2110            return default_package_data(DatasourceId::JavaJarManifest);
2111        }
2112    };
2113
2114    let mut package_data = default_package_data(DatasourceId::JavaJarManifest);
2115
2116    // Parse manifest headers (RFC822-style with space continuations)
2117    let mut headers: Vec<(String, String)> = Vec::new();
2118    let mut current_key: Option<String> = None;
2119    let mut current_value = String::new();
2120
2121    for line in content.lines() {
2122        if line.starts_with(' ') || line.starts_with('\t') {
2123            // Continuation line
2124            current_value.push_str(line.trim());
2125        } else if let Some(colon_pos) = line.find(':') {
2126            // Save previous header
2127            if let Some(key) = current_key.take() {
2128                headers.push((key, current_value.trim().to_string()));
2129                current_value.clear();
2130            }
2131
2132            // Start new header
2133            let key = line[..colon_pos].trim().to_string();
2134            let value = line[colon_pos + 1..].trim().to_string();
2135            current_key = Some(key);
2136            current_value = value;
2137        }
2138    }
2139
2140    // Save last header
2141    if let Some(key) = current_key {
2142        headers.push((key, current_value.trim().to_string()));
2143    }
2144
2145    // Convert headers to HashMap for easier lookup
2146    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2147
2148    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2149    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2150    let is_osgi = bundle_symbolic_name.is_some();
2151
2152    if is_osgi {
2153        // OSGi bundle - extract OSGi-specific metadata
2154        package_data.package_type = Some(PackageType::Osgi);
2155        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2156
2157        // Bundle-SymbolicName is the canonical name for OSGi bundles
2158        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2159        if let Some(bsn) = bundle_symbolic_name {
2160            let name = if let Some(semicolon_pos) = bsn.find(';') {
2161                bsn[..semicolon_pos].trim().to_string()
2162            } else {
2163                bsn.clone()
2164            };
2165            package_data.name = Some(name);
2166        }
2167
2168        // Bundle-Version
2169        package_data.version = headers_map.get("Bundle-Version").cloned();
2170
2171        // Bundle-Description takes priority over Bundle-Name for description
2172        if let Some(desc) = headers_map.get("Bundle-Description") {
2173            package_data.description = Some(desc.clone());
2174        } else if let Some(name) = headers_map.get("Bundle-Name") {
2175            package_data.description = Some(name.clone());
2176        }
2177
2178        // Bundle-Vendor
2179        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2180            package_data.parties.push(Party {
2181                r#type: Some("organization".to_string()),
2182                role: Some("vendor".to_string()),
2183                name: Some(vendor.clone()),
2184                email: None,
2185                url: None,
2186                organization: None,
2187                organization_url: None,
2188                timezone: None,
2189            });
2190        }
2191
2192        // Bundle-DocURL
2193        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2194
2195        // Bundle-License
2196        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2197
2198        // Import-Package -> dependencies with scope "import"
2199        if let Some(import_pkg) = headers_map.get("Import-Package") {
2200            let deps = parse_osgi_package_list(import_pkg, "import");
2201            package_data.dependencies.extend(deps);
2202        }
2203
2204        // Require-Bundle -> dependencies with scope "require-bundle"
2205        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2206            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2207            package_data.dependencies.extend(deps);
2208        }
2209
2210        // Export-Package -> store in extra_data
2211        if let Some(export_pkg) = headers_map.get("Export-Package") {
2212            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2213            extra_data.insert(
2214                "export_packages".to_string(),
2215                serde_json::Value::String(export_pkg.clone()),
2216            );
2217            package_data.extra_data = Some(extra_data);
2218        }
2219
2220        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2221        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2222            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2223        }
2224    } else {
2225        // Regular JAR manifest
2226        package_data.package_type = Some(PackageType::Maven);
2227        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2228
2229        // Extract fields with priority order for non-OSGi JARs
2230        let mut name: Option<String> = None;
2231        let mut version: Option<String> = None;
2232        let mut vendor: Option<String> = None;
2233
2234        for (key, value) in &headers {
2235            match key.as_str() {
2236                "Bundle-Name" if name.is_none() => {
2237                    name = Some(value.clone());
2238                }
2239                "Implementation-Title" if name.is_none() => {
2240                    name = Some(value.clone());
2241                }
2242                "Bundle-Version" if version.is_none() => {
2243                    version = Some(value.clone());
2244                }
2245                "Implementation-Version" if version.is_none() => {
2246                    version = Some(value.clone());
2247                }
2248                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2249                    vendor = Some(value.clone());
2250                }
2251                _ => {}
2252            }
2253        }
2254
2255        package_data.name = name;
2256        package_data.version = version;
2257
2258        // Add vendor to parties if present
2259        if let Some(vendor_name) = vendor {
2260            package_data.parties.push(Party {
2261                r#type: Some("organization".to_string()),
2262                role: Some("vendor".to_string()),
2263                name: Some(vendor_name),
2264                email: None,
2265                url: None,
2266                organization: None,
2267                organization_url: None,
2268                timezone: None,
2269            });
2270        }
2271
2272        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2273        if let Some(path_str) = path.to_str()
2274            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2275        {
2276            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2277            let parts: Vec<&str> = after_maven.split('/').collect();
2278            if parts.len() >= 2 {
2279                package_data.namespace = Some(parts[0].to_string());
2280            }
2281        }
2282
2283        // Generate Maven PURL if we have enough information
2284        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2285            &package_data.namespace,
2286            &package_data.name,
2287            &package_data.version,
2288        ) {
2289            package_data.purl = Some(format!(
2290                "pkg:maven/{}/{}@{}",
2291                group_id, artifact_id, version
2292            ));
2293        } else if package_data.name.is_none() && package_data.version.is_none() {
2294            // A bare MANIFEST.MF without Maven coordinates or implementation
2295            // identity is only evidence of a generic JAR manifest, not a Maven
2296            // package. Keep the Java manifest datasource so assembly can still
2297            // merge richer sibling metadata when present.
2298            package_data.package_type = Some(PackageType::Jar);
2299        }
2300    }
2301
2302    package_data
2303}
2304
2305/// Parse OSGi Import-Package header into dependencies.
2306///
2307/// Format: comma-separated list of packages with optional directives:
2308/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2309pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2310    let mut dependencies = Vec::new();
2311
2312    // Split by comma, but be careful not to split within quoted strings
2313    for package_entry in split_osgi_list(package_list) {
2314        let package_entry = package_entry.trim();
2315        if package_entry.is_empty() {
2316            continue;
2317        }
2318
2319        // Extract package name (before first semicolon)
2320        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2321            package_entry[..semicolon_pos].trim()
2322        } else {
2323            package_entry
2324        };
2325
2326        if package_name.is_empty() {
2327            continue;
2328        }
2329
2330        // Extract version directive if present
2331        let version_requirement = extract_osgi_version(package_entry);
2332        let is_optional = package_entry.contains("resolution:=optional");
2333
2334        dependencies.push(Dependency {
2335            purl: Some(format!("pkg:osgi/{}", package_name)),
2336            extracted_requirement: version_requirement,
2337            scope: Some(scope.to_string()),
2338            is_runtime: Some(true),
2339            is_optional: Some(is_optional),
2340            is_pinned: None,
2341            is_direct: Some(true),
2342            resolved_package: None,
2343            extra_data: None,
2344        });
2345    }
2346
2347    dependencies
2348}
2349
2350/// Parse OSGi Require-Bundle header into dependencies.
2351///
2352/// Format: comma-separated list of bundle symbolic names with optional directives:
2353/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2354pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2355    let mut dependencies = Vec::new();
2356
2357    for bundle_entry in split_osgi_list(bundle_list) {
2358        let bundle_entry = bundle_entry.trim();
2359        if bundle_entry.is_empty() {
2360            continue;
2361        }
2362
2363        // Extract bundle symbolic name (before first semicolon)
2364        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2365            bundle_entry[..semicolon_pos].trim()
2366        } else {
2367            bundle_entry
2368        };
2369
2370        if bundle_name.is_empty() {
2371            continue;
2372        }
2373
2374        // Extract bundle-version directive if present
2375        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2376
2377        // Check if optional
2378        let is_optional = bundle_entry.contains("resolution:=optional");
2379
2380        dependencies.push(Dependency {
2381            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2382            extracted_requirement: version_requirement,
2383            scope: Some(scope.to_string()),
2384            is_runtime: Some(!is_optional),
2385            is_optional: Some(is_optional),
2386            is_pinned: None,
2387            is_direct: Some(true),
2388            resolved_package: None,
2389            extra_data: None,
2390        });
2391    }
2392
2393    dependencies
2394}
2395
2396/// Split OSGi comma-separated list, respecting quoted strings.
2397///
2398/// OSGi headers can contain commas within quoted strings:
2399/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2400pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2401    let mut result = Vec::new();
2402    let mut current = String::new();
2403    let mut in_quotes = false;
2404
2405    for ch in list.chars() {
2406        match ch {
2407            '"' => {
2408                in_quotes = !in_quotes;
2409                current.push(ch);
2410            }
2411            ',' if !in_quotes => {
2412                if !current.trim().is_empty() {
2413                    result.push(current.trim().to_string());
2414                }
2415                current.clear();
2416            }
2417            _ => {
2418                current.push(ch);
2419            }
2420        }
2421    }
2422
2423    if !current.trim().is_empty() {
2424        result.push(current.trim().to_string());
2425    }
2426
2427    result
2428}
2429
2430fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2431    let needle = format!("{}=", directive);
2432    let version_pos = entry.find(&needle)?;
2433    let after_value = &entry[version_pos + needle.len()..];
2434
2435    if let Some(stripped) = after_value.strip_prefix('"') {
2436        stripped.find('"').map(|end| stripped[..end].to_string())
2437    } else {
2438        let end = after_value.find(';').unwrap_or(after_value.len());
2439        Some(after_value[..end].trim().to_string())
2440    }
2441}
2442
2443pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2444    extract_osgi_directive(entry, "version")
2445}
2446
2447pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2448    extract_osgi_directive(entry, "bundle-version")
2449}
2450
2451fn default_package_data(datasource_id: DatasourceId) -> PackageData {
2452    PackageData {
2453        package_type: Some(PackageType::Maven),
2454        datasource_id: Some(datasource_id),
2455        ..Default::default()
2456    }
2457}
2458
2459#[cfg(test)]
2460mod tests {
2461    use super::*;
2462    use std::fs;
2463    use tempfile::TempDir;
2464
2465    #[test]
2466    fn test_organization_extraction() {
2467        let temp_dir = TempDir::new().unwrap();
2468        let pom_path = temp_dir.path().join("pom.xml");
2469
2470        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2471<project>
2472    <modelVersion>4.0.0</modelVersion>
2473    <groupId>com.example</groupId>
2474    <artifactId>my-app</artifactId>
2475    <version>1.0.0</version>
2476    <organization>
2477        <name>Example Corporation</name>
2478        <url>https://example.com</url>
2479    </organization>
2480</project>"#;
2481
2482        fs::write(&pom_path, pom_content).unwrap();
2483
2484        let package_data = MavenParser::extract_first_package(&pom_path);
2485
2486        assert_eq!(package_data.name, Some("my-app".to_string()));
2487        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2488        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2489
2490        let extra_data = package_data.extra_data.unwrap();
2491        assert_eq!(
2492            extra_data.get("organization_name"),
2493            Some(&serde_json::Value::String(
2494                "Example Corporation".to_string()
2495            ))
2496        );
2497        assert_eq!(
2498            extra_data.get("organization_url"),
2499            Some(&serde_json::Value::String(
2500                "https://example.com".to_string()
2501            ))
2502        );
2503    }
2504
2505    #[test]
2506    fn test_scm_metadata_extraction() {
2507        let temp_dir = TempDir::new().unwrap();
2508        let pom_path = temp_dir.path().join("pom.xml");
2509
2510        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2511<project xmlns="http://maven.apache.org/POM/4.0.0"
2512         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2513         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2514    <modelVersion>4.0.0</modelVersion>
2515    <groupId>org.springframework.boot</groupId>
2516    <artifactId>spring-boot-starter-web</artifactId>
2517    <version>3.0.0</version>
2518    <scm>
2519        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2520        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2521        <url>https://github.com/spring-projects/spring-boot</url>
2522        <tag>v3.0.0</tag>
2523    </scm>
2524</project>"#;
2525
2526        fs::write(&pom_path, pom_content).unwrap();
2527
2528        let package_data = MavenParser::extract_first_package(&pom_path);
2529
2530        assert_eq!(
2531            package_data.name,
2532            Some("spring-boot-starter-web".to_string())
2533        );
2534        assert_eq!(
2535            package_data.namespace,
2536            Some("org.springframework.boot".to_string())
2537        );
2538        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2539
2540        assert_eq!(
2541            package_data.code_view_url,
2542            Some("https://github.com/spring-projects/spring-boot".to_string())
2543        );
2544
2545        // vcs_url prefers connection over developerConnection
2546        assert_eq!(
2547            package_data.vcs_url,
2548            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2549        );
2550
2551        let extra_data = package_data.extra_data.unwrap();
2552        assert_eq!(
2553            extra_data.get("scm_tag"),
2554            Some(&serde_json::Value::String("v3.0.0".to_string()))
2555        );
2556        // developerConnection stored separately in extra_data
2557        assert_eq!(
2558            extra_data.get("scm_developer_connection"),
2559            Some(&serde_json::Value::String(
2560                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2561            ))
2562        );
2563    }
2564
2565    #[test]
2566    fn test_developers_and_contributors_extraction() {
2567        let temp_dir = TempDir::new().unwrap();
2568        let pom_path = temp_dir.path().join("pom.xml");
2569
2570        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2571<project xmlns="http://maven.apache.org/POM/4.0.0"
2572         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2573         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2574    <modelVersion>4.0.0</modelVersion>
2575    <groupId>com.example</groupId>
2576    <artifactId>test-app</artifactId>
2577    <version>1.0.0</version>
2578    <developers>
2579        <developer>
2580            <id>jdoe</id>
2581            <name>John Doe</name>
2582            <email>john@example.com</email>
2583            <url>https://example.com/jdoe</url>
2584            <organization>Example Corp</organization>
2585            <organizationUrl>https://example.com</organizationUrl>
2586            <timezone>America/New_York</timezone>
2587        </developer>
2588        <developer>
2589            <name>Jane Smith</name>
2590            <email>jane@example.com</email>
2591        </developer>
2592    </developers>
2593    <contributors>
2594        <contributor>
2595            <name>Bob Wilson</name>
2596            <email>bob@example.com</email>
2597            <url>https://example.com/bob</url>
2598        </contributor>
2599    </contributors>
2600</project>"#;
2601
2602        fs::write(&pom_path, pom_content).unwrap();
2603
2604        let package_data = MavenParser::extract_first_package(&pom_path);
2605
2606        assert_eq!(package_data.name, Some("test-app".to_string()));
2607        assert_eq!(package_data.parties.len(), 3);
2608
2609        let dev1 = &package_data.parties[0];
2610        assert_eq!(dev1.r#type, Some("person".to_string()));
2611        assert_eq!(dev1.role, Some("developer".to_string()));
2612        assert_eq!(dev1.name, Some("John Doe".to_string()));
2613        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2614        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2615        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2616        assert_eq!(
2617            dev1.organization_url,
2618            Some("https://example.com".to_string())
2619        );
2620        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2621
2622        let dev2 = &package_data.parties[1];
2623        assert_eq!(dev2.r#type, Some("person".to_string()));
2624        assert_eq!(dev2.role, Some("developer".to_string()));
2625        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2626        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2627
2628        let contrib = &package_data.parties[2];
2629        assert_eq!(contrib.r#type, Some("person".to_string()));
2630        assert_eq!(contrib.role, Some("contributor".to_string()));
2631        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2632        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2633        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2634    }
2635
2636    #[test]
2637    fn test_issue_management_extraction() {
2638        let temp_dir = TempDir::new().unwrap();
2639        let pom_path = temp_dir.path().join("pom.xml");
2640
2641        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2642<project xmlns="http://maven.apache.org/POM/4.0.0"
2643         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2644         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2645    <modelVersion>4.0.0</modelVersion>
2646    <groupId>com.example</groupId>
2647    <artifactId>test-app</artifactId>
2648    <version>1.0.0</version>
2649    <issueManagement>
2650        <system>GitHub</system>
2651        <url>https://github.com/example/test-app/issues</url>
2652    </issueManagement>
2653</project>"#;
2654
2655        fs::write(&pom_path, pom_content).unwrap();
2656
2657        let package_data = MavenParser::extract_first_package(&pom_path);
2658
2659        assert_eq!(package_data.name, Some("test-app".to_string()));
2660        assert_eq!(
2661            package_data.bug_tracking_url,
2662            Some("https://github.com/example/test-app/issues".to_string())
2663        );
2664
2665        let extra_data = package_data.extra_data.unwrap();
2666        assert_eq!(
2667            extra_data.get("issue_tracking_system"),
2668            Some(&serde_json::Value::String("GitHub".to_string()))
2669        );
2670    }
2671
2672    #[test]
2673    fn test_ci_management_extraction() {
2674        let temp_dir = TempDir::new().unwrap();
2675        let pom_path = temp_dir.path().join("pom.xml");
2676
2677        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2678<project xmlns="http://maven.apache.org/POM/4.0.0"
2679         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2680         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2681    <modelVersion>4.0.0</modelVersion>
2682    <groupId>com.example</groupId>
2683    <artifactId>test-app</artifactId>
2684    <version>1.0.0</version>
2685    <ciManagement>
2686        <system>Jenkins</system>
2687        <url>https://ci.example.com/job/test-app</url>
2688    </ciManagement>
2689</project>"#;
2690
2691        fs::write(&pom_path, pom_content).unwrap();
2692
2693        let package_data = MavenParser::extract_first_package(&pom_path);
2694
2695        assert_eq!(package_data.name, Some("test-app".to_string()));
2696
2697        let extra_data = package_data.extra_data.unwrap();
2698        assert_eq!(
2699            extra_data.get("ci_system"),
2700            Some(&serde_json::Value::String("Jenkins".to_string()))
2701        );
2702        assert_eq!(
2703            extra_data.get("ci_url"),
2704            Some(&serde_json::Value::String(
2705                "https://ci.example.com/job/test-app".to_string()
2706            ))
2707        );
2708    }
2709
2710    #[test]
2711    fn test_distribution_management_extraction() {
2712        let temp_dir = TempDir::new().unwrap();
2713        let pom_path = temp_dir.path().join("pom.xml");
2714
2715        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2716<project xmlns="http://maven.apache.org/POM/4.0.0"
2717         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2718         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2719    <modelVersion>4.0.0</modelVersion>
2720    <groupId>com.example</groupId>
2721    <artifactId>test-app</artifactId>
2722    <version>1.0.0</version>
2723    <distributionManagement>
2724        <downloadUrl>https://example.com/downloads</downloadUrl>
2725        <repository>
2726            <id>releases</id>
2727            <name>Release Repository</name>
2728            <url>https://repo.example.com/releases</url>
2729            <layout>default</layout>
2730        </repository>
2731        <snapshotRepository>
2732            <id>snapshots</id>
2733            <name>Snapshot Repository</name>
2734            <url>https://repo.example.com/snapshots</url>
2735            <layout>default</layout>
2736        </snapshotRepository>
2737        <site>
2738            <id>site-deploy</id>
2739            <name>Project Site</name>
2740            <url>https://example.com/site</url>
2741        </site>
2742    </distributionManagement>
2743</project>"#;
2744
2745        fs::write(&pom_path, pom_content).unwrap();
2746
2747        let package_data = MavenParser::extract_first_package(&pom_path);
2748
2749        assert_eq!(package_data.name, Some("test-app".to_string()));
2750        assert_eq!(
2751            package_data.download_url,
2752            Some("https://example.com/downloads".to_string())
2753        );
2754
2755        let extra_data = package_data.extra_data.unwrap();
2756
2757        assert_eq!(
2758            extra_data.get("distribution_download_url"),
2759            Some(&serde_json::Value::String(
2760                "https://example.com/downloads".to_string()
2761            ))
2762        );
2763
2764        let repo = extra_data
2765            .get("distribution_repository")
2766            .unwrap()
2767            .as_object()
2768            .unwrap();
2769        assert_eq!(
2770            repo.get("id"),
2771            Some(&serde_json::Value::String("releases".to_string()))
2772        );
2773        assert_eq!(
2774            repo.get("name"),
2775            Some(&serde_json::Value::String("Release Repository".to_string()))
2776        );
2777        assert_eq!(
2778            repo.get("url"),
2779            Some(&serde_json::Value::String(
2780                "https://repo.example.com/releases".to_string()
2781            ))
2782        );
2783        assert_eq!(
2784            repo.get("layout"),
2785            Some(&serde_json::Value::String("default".to_string()))
2786        );
2787
2788        let snapshot_repo = extra_data
2789            .get("distribution_snapshot_repository")
2790            .unwrap()
2791            .as_object()
2792            .unwrap();
2793        assert_eq!(
2794            snapshot_repo.get("id"),
2795            Some(&serde_json::Value::String("snapshots".to_string()))
2796        );
2797        assert_eq!(
2798            snapshot_repo.get("name"),
2799            Some(&serde_json::Value::String(
2800                "Snapshot Repository".to_string()
2801            ))
2802        );
2803        assert_eq!(
2804            snapshot_repo.get("url"),
2805            Some(&serde_json::Value::String(
2806                "https://repo.example.com/snapshots".to_string()
2807            ))
2808        );
2809        assert_eq!(
2810            snapshot_repo.get("layout"),
2811            Some(&serde_json::Value::String("default".to_string()))
2812        );
2813
2814        let site = extra_data
2815            .get("distribution_site")
2816            .unwrap()
2817            .as_object()
2818            .unwrap();
2819        assert_eq!(
2820            site.get("id"),
2821            Some(&serde_json::Value::String("site-deploy".to_string()))
2822        );
2823        assert_eq!(
2824            site.get("name"),
2825            Some(&serde_json::Value::String("Project Site".to_string()))
2826        );
2827        assert_eq!(
2828            site.get("url"),
2829            Some(&serde_json::Value::String(
2830                "https://example.com/site".to_string()
2831            ))
2832        );
2833    }
2834}
2835
2836crate::register_parser!(
2837    "Apache Maven POM",
2838    &[
2839        "**/*.pom",
2840        "**/pom.xml",
2841        "**/pom.properties",
2842        "**/META-INF/MANIFEST.MF"
2843    ],
2844    "maven",
2845    "Java",
2846    Some("https://maven.apache.org/pom.html"),
2847);