Skip to main content

provenant/parsers/
maven.rs

1//! Parser for Apache Maven pom.xml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Maven Project Object Model (POM) files.
5//!
6//! # Supported Formats
7//! - pom.xml (Project Object Model)
8//! - pom.properties
9//! - MANIFEST.MF (JAR manifest)
10//!
11//! # Key Features
12//! - Property value substitution (`${project.version}`)
13//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
14//! - Dependency scope handling (compile, test, provided, runtime, system)
15//! - Package URL (purl) generation
16//! - Multiple license support (combined with " OR ")
17//!
18//! # Implementation Notes
19//! - Uses quick-xml for XML parsing
20//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
21//! - Property substitution limited to prevent infinite loops
22//! - Direct dependencies: all in pom.xml are direct
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25use crate::parsers::utils::read_file_to_string;
26use log::warn;
27use quick_xml::Reader;
28use quick_xml::events::Event;
29use std::collections::{HashMap, HashSet};
30use std::fs::File;
31use std::io::BufReader;
32use std::path::Path;
33
34use super::PackageParser;
35
36#[derive(Clone, Default)]
37struct MavenDependencyData {
38    group_id: Option<String>,
39    artifact_id: Option<String>,
40    version: Option<String>,
41    classifier: Option<String>,
42    type_: Option<String>,
43    scope: Option<String>,
44    optional: Option<String>,
45    system_path: Option<String>,
46    message: Option<String>,
47}
48
49#[derive(Clone, Default)]
50struct MavenLicenseEntry {
51    name: Option<String>,
52    url: Option<String>,
53    comments: Option<String>,
54}
55
56/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
57///
58/// Maven properties can reference other properties, creating dependency graphs. This resolver:
59/// - Resolves nested placeholders: `${outer.${inner}}`
60/// - Detects circular references: `${a}` → `${b}` → `${a}`
61/// - Enforces depth limits to prevent stack overflow
62/// - Enforces substitution limits to prevent DoS on pathological inputs
63///
64/// # Algorithm
65///
66/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
67/// - `resolving_set`: For cycle detection (hash set lookup)
68/// - `resolving_stack`: For error reporting (preserves path)
69/// - `cache`: Memoizes resolved values to avoid redundant work
70struct PropertyResolver {
71    raw: HashMap<String, String>,
72    builtins: HashMap<String, String>,
73    cache: HashMap<String, String>,
74    resolving_set: HashSet<String>,
75    resolving_stack: Vec<String>,
76    max_depth: usize,
77    max_output_len: usize,
78    max_substitutions: usize,
79    warned_keys: HashSet<String>,
80}
81
82impl PropertyResolver {
83    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
84        Self {
85            raw,
86            builtins,
87            cache: HashMap::new(),
88            resolving_set: HashSet::new(),
89            resolving_stack: Vec::new(),
90            max_depth: 10,
91            max_output_len: 100_000,
92            max_substitutions: 1000,
93            warned_keys: HashSet::new(),
94        }
95    }
96
97    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
98        if let Some(value) = self.cache.get(key) {
99            return Some(value.clone());
100        }
101
102        if depth >= self.max_depth {
103            self.warn_once(
104                "depth",
105                key,
106                format!("Maven property depth limit hit resolving {key}"),
107            );
108            return None;
109        }
110
111        if self.resolving_set.contains(key) {
112            self.warn_once(
113                "cycle",
114                key,
115                format!(
116                    "Maven property cycle detected at {key}: {:?}",
117                    self.resolving_stack
118                ),
119            );
120            return None;
121        }
122
123        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
124            value.clone()
125        } else {
126            self.warn_once("missing", key, format!("Maven property missing key {key}"));
127            return None;
128        };
129
130        self.resolving_set.insert(key.to_string());
131        self.resolving_stack.push(key.to_string());
132
133        let resolved = self.resolve_text(&raw_val, depth + 1);
134
135        self.resolving_stack.pop();
136        self.resolving_set.remove(key);
137
138        self.cache.insert(key.to_string(), resolved.clone());
139        Some(resolved)
140    }
141
142    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
143        if !text.contains("${") {
144            return text.to_string();
145        }
146
147        if depth >= self.max_depth {
148            warn!("Maven property depth limit hit resolving text");
149            return text.to_string();
150        }
151
152        let bytes = text.as_bytes();
153        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
154        let mut index = 0;
155        let mut substitutions = 0;
156
157        while index < bytes.len() {
158            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
159                if substitutions >= self.max_substitutions {
160                    warn!("Maven property substitution limit hit resolving {text}");
161                    return text.to_string();
162                }
163
164                let placeholder_start = index;
165                let Some((content, closing_index)) =
166                    self.parse_placeholder_content(text, index + 2)
167                else {
168                    warn!("Maven property malformed placeholder in {text}");
169                    return text.to_string();
170                };
171
172                substitutions += 1;
173                let resolved_key = if content.contains("${") {
174                    self.resolve_text(content, depth + 1)
175                } else {
176                    content.to_string()
177                };
178
179                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
180                    if output.len() + resolved.len() > self.max_output_len {
181                        warn!("Maven property output length limit hit resolving {text}");
182                        return text.to_string();
183                    }
184                    output.extend_from_slice(resolved.as_bytes());
185                } else {
186                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
187                    if output.len() + placeholder_bytes.len() > self.max_output_len {
188                        warn!("Maven property output length limit hit resolving {text}");
189                        return text.to_string();
190                    }
191                    output.extend_from_slice(placeholder_bytes);
192                }
193
194                index = closing_index + 1;
195                continue;
196            }
197
198            if output.len() + 1 > self.max_output_len {
199                warn!("Maven property output length limit hit resolving {text}");
200                return text.to_string();
201            }
202
203            output.push(bytes[index]);
204            index += 1;
205        }
206
207        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
208    }
209
210    fn parse_placeholder_content<'a>(
211        &self,
212        text: &'a str,
213        start_index: usize,
214    ) -> Option<(&'a str, usize)> {
215        let bytes = text.as_bytes();
216        let mut index = start_index;
217        let mut depth = 0;
218
219        while index < bytes.len() {
220            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
221                depth += 1;
222                index += 2;
223                continue;
224            }
225
226            if bytes[index] == b'}' {
227                if depth == 0 {
228                    return Some((&text[start_index..index], index));
229                }
230                depth -= 1;
231            }
232
233            index += 1;
234        }
235
236        None
237    }
238
239    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
240        let token = format!("{kind}:{key}");
241        if self.warned_keys.insert(token) {
242            warn!("{message}");
243        }
244    }
245}
246
247fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
248    if let Some(current) = value.clone() {
249        *value = Some(resolver.resolve_text(&current, 0));
250    }
251}
252
253fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
254    for value in values.iter_mut() {
255        *value = resolver.resolve_text(value, 0);
256    }
257}
258
259fn resolve_map_strings(
260    resolver: &mut PropertyResolver,
261    values: &mut serde_json::Map<String, serde_json::Value>,
262) {
263    for value in values.values_mut() {
264        if let serde_json::Value::String(current) = value {
265            let resolved = resolver.resolve_text(current, 0);
266            *current = resolved;
267        }
268    }
269}
270
271fn resolve_maps(
272    resolver: &mut PropertyResolver,
273    values: &mut [serde_json::Map<String, serde_json::Value>],
274) {
275    for value in values.iter_mut() {
276        resolve_map_strings(resolver, value);
277    }
278}
279
280fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
281    resolve_option(resolver, &mut dependency.group_id);
282    resolve_option(resolver, &mut dependency.artifact_id);
283    resolve_option(resolver, &mut dependency.version);
284    resolve_option(resolver, &mut dependency.classifier);
285    resolve_option(resolver, &mut dependency.type_);
286    resolve_option(resolver, &mut dependency.scope);
287    resolve_option(resolver, &mut dependency.optional);
288    resolve_option(resolver, &mut dependency.system_path);
289    resolve_option(resolver, &mut dependency.message);
290}
291
292fn parse_maven_bool(value: Option<&str>) -> bool {
293    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
294}
295
296fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
297    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
298        Some(
299            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
300            | "tar.gz" | "war" | "xar" | "zip",
301        ) => packaging.map(str::trim),
302        Some(_) => Some("jar"),
303        None => None,
304    }
305}
306
307fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
308    resolve_option(resolver, &mut license.name);
309    resolve_option(resolver, &mut license.url);
310    resolve_option(resolver, &mut license.comments);
311}
312
313fn build_maven_qualifiers(
314    classifier: Option<&str>,
315    packaging: Option<&str>,
316) -> Option<HashMap<String, String>> {
317    let mut qualifiers = HashMap::new();
318
319    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
320        qualifiers.insert("classifier".to_string(), classifier.to_string());
321    }
322
323    if let Some(packaging) = normalize_maven_packaging(packaging)
324        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
325    {
326        qualifiers.insert("type".to_string(), packaging.to_string());
327    }
328
329    (!qualifiers.is_empty()).then_some(qualifiers)
330}
331
332fn build_maven_purl(
333    group_id: &str,
334    artifact_id: &str,
335    version: Option<&str>,
336    classifier: Option<&str>,
337    packaging: Option<&str>,
338) -> String {
339    let mut purl = format!("pkg:maven/{group_id}/{artifact_id}");
340
341    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
342        purl.push('@');
343        purl.push_str(version);
344    }
345
346    let qualifiers = build_maven_qualifiers(classifier, packaging);
347    if let Some(qualifiers) = qualifiers {
348        let mut query_parts = Vec::new();
349        if let Some(classifier) = qualifiers.get("classifier") {
350            query_parts.push(format!("classifier={classifier}"));
351        }
352        if let Some(type_) = qualifiers.get("type") {
353            query_parts.push(format!("type={type_}"));
354        }
355
356        if !query_parts.is_empty() {
357            purl.push('?');
358            purl.push_str(&query_parts.join("&"));
359        }
360    }
361
362    purl
363}
364
365fn build_maven_download_url(
366    group_id: &str,
367    artifact_id: &str,
368    version: &str,
369    classifier: Option<&str>,
370    packaging: Option<&str>,
371) -> String {
372    const BASE_URL: &str = "https://repo1.maven.org/maven2";
373    let group_path = group_id.replace('.', "/");
374    let extension = normalize_maven_packaging(packaging)
375        .filter(|value| *value != "pom")
376        .unwrap_or("jar");
377    let classifier_suffix = classifier
378        .map(str::trim)
379        .filter(|value| !value.is_empty())
380        .map(|value| format!("-{value}"))
381        .unwrap_or_default();
382
383    format!(
384        "{}/{}/{}/{}/{}-{}{}.{}",
385        BASE_URL,
386        group_path,
387        artifact_id,
388        version,
389        artifact_id,
390        version,
391        classifier_suffix,
392        extension
393    )
394}
395
396fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
397    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
398}
399
400fn has_unresolved_template_coordinates(
401    namespace: Option<&str>,
402    name: Option<&str>,
403    version: Option<&str>,
404) -> bool {
405    const TEMPLATE_PLACEHOLDERS: &[&str] = &[
406        "${groupId}",
407        "${artifactId}",
408        "${version}",
409        "${package}",
410        "${packageName}",
411    ];
412
413    [namespace, name, version]
414        .into_iter()
415        .flatten()
416        .map(str::trim)
417        .any(|value| TEMPLATE_PLACEHOLDERS.contains(&value))
418}
419
420fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
421    let rendered_entries: Vec<String> = licenses
422        .iter()
423        .filter_map(|license| {
424            let mut lines = Vec::new();
425
426            if let Some(name) = license
427                .name
428                .as_ref()
429                .filter(|value| !value.trim().is_empty())
430            {
431                lines.push(format!("    name: {name}"));
432            }
433            if let Some(url) = license
434                .url
435                .as_ref()
436                .filter(|value| !value.trim().is_empty())
437            {
438                lines.push(format!("    url: {url}"));
439            }
440            if let Some(comments) = license
441                .comments
442                .as_ref()
443                .filter(|value| !value.trim().is_empty())
444            {
445                lines.push(format!("    comments: {comments}"));
446            }
447
448            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
449        })
450        .collect();
451
452    if rendered_entries.is_empty() {
453        None
454    } else {
455        Some(format!("{}\n", rendered_entries.join("\n")))
456    }
457}
458
459fn is_license_like_comment(comment: &str) -> bool {
460    let lowered = comment.to_ascii_lowercase();
461    [
462        "license",
463        "licensed",
464        "copyright",
465        "spdx",
466        "apache",
467        "mit",
468        "bsd",
469        "gpl",
470        "lgpl",
471        "mozilla public",
472        "eclipse public",
473    ]
474    .iter()
475    .any(|marker| lowered.contains(marker))
476}
477
478fn dependency_extra_data(
479    dependency: &MavenDependencyData,
480) -> Option<HashMap<String, serde_json::Value>> {
481    let mut extra_data = HashMap::new();
482
483    if let Some(classifier) = dependency
484        .classifier
485        .as_ref()
486        .filter(|value| !value.trim().is_empty())
487    {
488        extra_data.insert(
489            "classifier".to_string(),
490            serde_json::Value::String(classifier.clone()),
491        );
492    }
493    if let Some(type_) = dependency
494        .type_
495        .as_ref()
496        .filter(|value| !value.trim().is_empty())
497    {
498        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
499    }
500    if let Some(system_path) = dependency
501        .system_path
502        .as_ref()
503        .filter(|value| !value.trim().is_empty())
504    {
505        extra_data.insert(
506            "system_path".to_string(),
507            serde_json::Value::String(system_path.clone()),
508        );
509    }
510    if let Some(message) = dependency
511        .message
512        .as_ref()
513        .filter(|value| !value.trim().is_empty())
514    {
515        extra_data.insert(
516            "message".to_string(),
517            serde_json::Value::String(message.clone()),
518        );
519    }
520
521    (!extra_data.is_empty()).then_some(extra_data)
522}
523
524fn dependency_management_entry_to_value(
525    dependency: &MavenDependencyData,
526) -> serde_json::Map<String, serde_json::Value> {
527    let mut dep_obj = serde_json::Map::new();
528
529    if let Some(group_id) = dependency.group_id.as_ref() {
530        dep_obj.insert(
531            "groupId".to_string(),
532            serde_json::Value::String(group_id.clone()),
533        );
534    }
535    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
536        dep_obj.insert(
537            "artifactId".to_string(),
538            serde_json::Value::String(artifact_id.clone()),
539        );
540    }
541    if let Some(version) = dependency.version.as_ref() {
542        dep_obj.insert(
543            "version".to_string(),
544            serde_json::Value::String(version.clone()),
545        );
546    }
547    if let Some(scope) = dependency.scope.as_ref() {
548        dep_obj.insert(
549            "scope".to_string(),
550            serde_json::Value::String(scope.clone()),
551        );
552    }
553    if let Some(type_) = dependency.type_.as_ref() {
554        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
555    }
556    if let Some(classifier) = dependency.classifier.as_ref() {
557        dep_obj.insert(
558            "classifier".to_string(),
559            serde_json::Value::String(classifier.clone()),
560        );
561    }
562    if let Some(optional) = dependency.optional.as_deref() {
563        dep_obj.insert(
564            "optional".to_string(),
565            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
566        );
567    }
568    if let Some(message) = dependency.message.as_ref() {
569        dep_obj.insert(
570            "message".to_string(),
571            serde_json::Value::String(message.clone()),
572        );
573    }
574
575    dep_obj
576}
577
578fn maven_dependency_to_dependency(
579    dependency_data: &MavenDependencyData,
580    fallback_scope: Option<&str>,
581    force_non_runtime: bool,
582) -> Option<Dependency> {
583    let group_id = dependency_data.group_id.as_ref()?;
584    let artifact_id = dependency_data.artifact_id.as_ref()?;
585    let version = dependency_data.version.clone();
586    let scope = dependency_data
587        .scope
588        .clone()
589        .or_else(|| fallback_scope.map(str::to_string));
590    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
591
592    let (is_runtime, is_optional) = if force_non_runtime {
593        (Some(false), Some(explicit_optional))
594    } else {
595        match scope.as_deref() {
596            Some("test") | Some("provided") => (Some(false), Some(true)),
597            Some(_) => (Some(true), Some(explicit_optional)),
598            None => (None, Some(explicit_optional)),
599        }
600    };
601
602    Some(Dependency {
603        purl: Some(build_maven_purl(
604            group_id,
605            artifact_id,
606            version.as_deref(),
607            dependency_data.classifier.as_deref(),
608            dependency_data.type_.as_deref(),
609        )),
610        extracted_requirement: version.clone(),
611        scope,
612        is_runtime,
613        is_optional,
614        is_pinned: version.as_deref().map(is_maven_version_pinned),
615        is_direct: Some(true),
616        resolved_package: None,
617        extra_data: dependency_extra_data(dependency_data),
618    })
619}
620
621/// Determines if a Maven version specifier is pinned to an exact version.
622///
623/// A version is considered pinned if it specifies an exact version without
624/// range syntax or dynamic keywords. Examples:
625/// - Pinned: "1.0.0", "1.2.3"
626/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
627fn is_maven_version_pinned(version_str: &str) -> bool {
628    let trimmed = version_str.trim();
629
630    // Empty version is not pinned
631    if trimmed.is_empty() {
632        return false;
633    }
634
635    // Check for range syntax (brackets and parentheses)
636    if trimmed.contains('[')
637        || trimmed.contains(']')
638        || trimmed.contains('(')
639        || trimmed.contains(')')
640    {
641        return false;
642    }
643
644    // Check for dynamic version keywords
645    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
646        return false;
647    }
648
649    // If none of the unpinned indicators are present, it's pinned
650    true
651}
652
653fn build_builtin_properties(
654    namespace: &Option<String>,
655    name: &Option<String>,
656    version: &Option<String>,
657    parent_group_id: &Option<String>,
658    parent_version: &Option<String>,
659    project_name: &Option<String>,
660    project_packaging: &Option<String>,
661) -> HashMap<String, String> {
662    let mut builtins = HashMap::new();
663    let effective_group_id = namespace.clone().or_else(|| parent_group_id.clone());
664    let effective_version = version.clone().or_else(|| parent_version.clone());
665
666    if let Some(group_id) = effective_group_id.clone() {
667        builtins.insert("project.groupId".to_string(), group_id.clone());
668        builtins.insert("pom.groupId".to_string(), group_id);
669    }
670
671    if let Some(artifact_id) = name.clone() {
672        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
673        builtins.insert("pom.artifactId".to_string(), artifact_id);
674    }
675
676    if let Some(ver) = effective_version.clone() {
677        builtins.insert("project.version".to_string(), ver.clone());
678        builtins.insert("pom.version".to_string(), ver);
679    }
680
681    if let Some(group_id) = parent_group_id.clone() {
682        builtins.insert("project.parent.groupId".to_string(), group_id);
683    }
684
685    if let Some(ver) = parent_version.clone() {
686        builtins.insert("project.parent.version".to_string(), ver);
687    }
688
689    if let Some(packaging) = project_packaging.clone() {
690        builtins.insert("project.packaging".to_string(), packaging);
691    }
692
693    if let Some(name) = project_name.clone() {
694        builtins.insert("project.name".to_string(), name);
695    }
696
697    builtins
698}
699
700/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
701///
702/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
703/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
704pub struct MavenParser;
705
706impl PackageParser for MavenParser {
707    const PACKAGE_TYPE: PackageType = PackageType::Maven;
708
709    fn extract_packages(path: &Path) -> Vec<PackageData> {
710        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
711            if filename == "pom.properties" {
712                return vec![parse_pom_properties(path)];
713            } else if filename == "MANIFEST.MF" {
714                return vec![parse_manifest_mf(path)];
715            }
716        }
717
718        let file = match File::open(path) {
719            Ok(f) => f,
720            Err(e) => {
721                warn!("Failed to open pom.xml at {:?}: {}", path, e);
722                return vec![default_package_data()];
723            }
724        };
725
726        let mut reader = Reader::from_reader(BufReader::new(file));
727        reader.config_mut().trim_text(true);
728
729        let mut buf = Vec::new();
730        let mut package_data = default_package_data();
731        package_data.package_type = Some(Self::PACKAGE_TYPE);
732        package_data.primary_language = Some("Java".to_string());
733        package_data.datasource_id = Some(DatasourceId::MavenPom);
734
735        let mut current_element = Vec::new();
736        let mut in_dependencies = false;
737        let mut current_dependency: Option<Dependency> = None;
738        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
739        let mut current_dependency_data: Option<MavenDependencyData> = None;
740
741        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
742        let mut xml_license_comments: Vec<String> = Vec::new();
743        let mut current_license: Option<MavenLicenseEntry> = None;
744        let mut inception_year = None;
745        let mut scm_connection = None;
746        let mut scm_developer_connection = None;
747        let mut scm_url = None;
748        let mut scm_tag = None;
749        let mut organization_name = None;
750        let mut organization_url = None;
751        let mut in_developers = false;
752        let mut in_contributors = false;
753        let mut current_party: Option<Party> = None;
754        let mut issue_management_system = None;
755        let mut issue_management_url = None;
756        let mut ci_management_system = None;
757        let mut ci_management_url = None;
758        let mut in_distribution_management = false;
759        let mut in_dist_repository = false;
760        let mut in_dist_snapshot_repository = false;
761        let mut in_dist_site = false;
762        let mut dist_download_url = None;
763        let mut dist_repository_id = None;
764        let mut dist_repository_name = None;
765        let mut dist_repository_url = None;
766        let mut dist_repository_layout = None;
767        let mut dist_snapshot_repository_id = None;
768        let mut dist_snapshot_repository_name = None;
769        let mut dist_snapshot_repository_url = None;
770        let mut dist_snapshot_repository_layout = None;
771        let mut dist_site_id = None;
772        let mut dist_site_name = None;
773        let mut dist_site_url = None;
774        let mut in_repositories = false;
775        let mut in_plugin_repositories = false;
776        let mut in_repository = false;
777        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
778        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
779        let mut current_repository_id = None;
780        let mut current_repository_name = None;
781        let mut current_repository_url = None;
782        let mut in_modules = false;
783        let mut modules: Vec<String> = Vec::new();
784        let mut in_mailing_lists = false;
785        let mut in_mailing_list = false;
786        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
787        let mut current_mailing_list_name = None;
788        let mut current_mailing_list_subscribe = None;
789        let mut current_mailing_list_unsubscribe = None;
790        let mut current_mailing_list_post = None;
791        let mut current_mailing_list_archive = None;
792        let mut in_dependency_management = false;
793        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
794        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
795        let mut in_dep_mgmt_dependency = false;
796        let mut in_parent = false;
797        let mut parent_group_id = None;
798        let mut parent_artifact_id = None;
799        let mut parent_version = None;
800        let mut parent_relative_path = None;
801        let mut in_properties = false;
802        let mut properties: HashMap<String, String> = HashMap::new();
803        let mut project_name = None;
804        let mut project_description = None;
805        let mut project_packaging = None;
806        let mut project_classifier = None;
807        let mut in_relocation = false;
808        let mut relocation = MavenDependencyData::default();
809
810        loop {
811            match reader.read_event_into(&mut buf) {
812                Ok(Event::Start(e)) => {
813                    let element_name = e.name().as_ref().to_vec();
814                    current_element.push(element_name.clone());
815
816                    match element_name.as_slice() {
817                        b"parent" => in_parent = true,
818                        b"dependencyManagement" => in_dependency_management = true,
819                        b"dependencies" if in_dependency_management => {}
820                        b"dependencies" => in_dependencies = true,
821                        b"dependency" if in_dependency_management => {
822                            in_dep_mgmt_dependency = true;
823                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
824                        }
825                        b"dependency" if in_dependencies => {
826                            current_dependency = Some(Dependency {
827                                purl: None,
828                                extracted_requirement: None,
829                                scope: None,
830                                is_runtime: None,
831                                is_optional: Some(false),
832                                is_pinned: None,
833                                is_direct: Some(true),
834                                resolved_package: None,
835                                extra_data: None,
836                            });
837                            current_dependency_data = Some(MavenDependencyData::default());
838                        }
839                        b"properties" => in_properties = true,
840                        b"developers" => in_developers = true,
841                        b"developer" if in_developers => {
842                            current_party = Some(Party {
843                                r#type: Some("person".to_string()),
844                                role: Some("developer".to_string()),
845                                name: None,
846                                email: None,
847                                url: None,
848                                organization: None,
849                                organization_url: None,
850                                timezone: None,
851                            });
852                        }
853                        b"contributors" => in_contributors = true,
854                        b"contributor" if in_contributors => {
855                            current_party = Some(Party {
856                                r#type: Some("person".to_string()),
857                                role: Some("contributor".to_string()),
858                                name: None,
859                                email: None,
860                                url: None,
861                                organization: None,
862                                organization_url: None,
863                                timezone: None,
864                            });
865                        }
866                        b"license" => current_license = Some(MavenLicenseEntry::default()),
867                        b"distributionManagement" => in_distribution_management = true,
868                        b"relocation" if in_distribution_management => {
869                            in_relocation = true;
870                            relocation = MavenDependencyData::default();
871                        }
872                        b"repository" if in_distribution_management => in_dist_repository = true,
873                        b"snapshotRepository" if in_distribution_management => {
874                            in_dist_snapshot_repository = true
875                        }
876                        b"site" if in_distribution_management => in_dist_site = true,
877                        b"repositories" => in_repositories = true,
878                        b"pluginRepositories" => in_plugin_repositories = true,
879                        b"repository" if in_repositories && !in_distribution_management => {
880                            in_repository = true;
881                            current_repository_id = None;
882                            current_repository_name = None;
883                            current_repository_url = None;
884                        }
885                        b"pluginRepository" if in_plugin_repositories => {
886                            in_repository = true;
887                            current_repository_id = None;
888                            current_repository_name = None;
889                            current_repository_url = None;
890                        }
891                        b"modules" => in_modules = true,
892                        b"mailingLists" => in_mailing_lists = true,
893                        b"mailingList" if in_mailing_lists => {
894                            in_mailing_list = true;
895                            current_mailing_list_name = None;
896                            current_mailing_list_subscribe = None;
897                            current_mailing_list_unsubscribe = None;
898                            current_mailing_list_post = None;
899                            current_mailing_list_archive = None;
900                        }
901                        _ => {}
902                    }
903                }
904                Ok(Event::Text(e)) => {
905                    let text = e.decode().unwrap_or_default().to_string();
906                    let current_path = current_element.last().map(|v| v.as_slice());
907
908                    if in_properties
909                        && current_element.len() >= 2
910                        && current_element[current_element.len() - 2] == b"properties"
911                    {
912                        if let Some(property_name) = current_element
913                            .last()
914                            .and_then(|name| std::str::from_utf8(name).ok())
915                        {
916                            properties.insert(property_name.to_string(), text);
917                        } else {
918                            warn!("Failed to decode Maven property name in {:?}", path);
919                        }
920                    } else if in_dep_mgmt_dependency {
921                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
922                            match current_path {
923                                Some(b"groupId") => dep_mgmt.group_id = Some(text),
924                                Some(b"artifactId") => dep_mgmt.artifact_id = Some(text),
925                                Some(b"version") => dep_mgmt.version = Some(text),
926                                Some(b"scope") => dep_mgmt.scope = Some(text),
927                                Some(b"type") => dep_mgmt.type_ = Some(text),
928                                Some(b"classifier") => dep_mgmt.classifier = Some(text),
929                                Some(b"optional") => dep_mgmt.optional = Some(text),
930                                _ => {}
931                            }
932                        }
933                    } else if let Some(license) = &mut current_license {
934                        match current_path {
935                            Some(b"name") => license.name = Some(text),
936                            Some(b"url") => license.url = Some(text),
937                            Some(b"comments") => license.comments = Some(text),
938                            _ => {}
939                        }
940                    } else if let Some(party) = &mut current_party {
941                        match current_path {
942                            Some(b"name") => party.name = Some(text),
943                            Some(b"email") => party.email = Some(text),
944                            Some(b"url") => party.url = Some(text),
945                            Some(b"organization") => party.organization = Some(text),
946                            Some(b"organizationUrl") => party.organization_url = Some(text),
947                            Some(b"timezone") => party.timezone = Some(text),
948                            _ => {}
949                        }
950                    } else if let Some(dep) = &mut current_dependency {
951                        match current_path {
952                            Some(b"groupId") => {
953                                if let Some(coords) = current_dependency_data.as_mut() {
954                                    coords.group_id = Some(text);
955                                }
956                            }
957                            Some(b"artifactId") => {
958                                if let Some(coords) = current_dependency_data.as_mut() {
959                                    coords.artifact_id = Some(text);
960                                }
961                            }
962                            Some(b"version") => {
963                                if let Some(coords) = current_dependency_data.as_mut() {
964                                    coords.version = Some(text);
965                                }
966                            }
967                            Some(b"scope") => {
968                                dep.scope = Some(text.clone());
969                                dep.is_optional = Some(text == "test" || text == "provided");
970                                dep.is_runtime = Some(text != "test" && text != "provided");
971                                if let Some(coords) = current_dependency_data.as_mut() {
972                                    coords.scope = Some(text);
973                                }
974                            }
975                            Some(b"optional") => {
976                                if let Some(coords) = current_dependency_data.as_mut() {
977                                    coords.optional = Some(text);
978                                }
979                            }
980                            Some(b"type") => {
981                                if let Some(coords) = current_dependency_data.as_mut() {
982                                    coords.type_ = Some(text);
983                                }
984                            }
985                            Some(b"classifier") => {
986                                if let Some(coords) = current_dependency_data.as_mut() {
987                                    coords.classifier = Some(text);
988                                }
989                            }
990                            Some(b"systemPath") => {
991                                if let Some(coords) = current_dependency_data.as_mut() {
992                                    coords.system_path = Some(text);
993                                }
994                            }
995                            _ => {}
996                        }
997                    } else if in_relocation {
998                        match current_path {
999                            Some(b"groupId") => relocation.group_id = Some(text),
1000                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1001                            Some(b"version") => relocation.version = Some(text),
1002                            Some(b"classifier") => relocation.classifier = Some(text),
1003                            Some(b"type") => relocation.type_ = Some(text),
1004                            Some(b"message") => relocation.message = Some(text),
1005                            _ => {}
1006                        }
1007                    } else if in_parent {
1008                        match current_path {
1009                            Some(b"groupId") => {
1010                                parent_group_id = Some(text);
1011                            }
1012                            Some(b"artifactId") => {
1013                                parent_artifact_id = Some(text);
1014                            }
1015                            Some(b"version") => {
1016                                parent_version = Some(text);
1017                            }
1018                            Some(b"relativePath") => {
1019                                parent_relative_path = Some(text);
1020                            }
1021                            _ => {}
1022                        }
1023                    } else {
1024                        match current_path {
1025                            Some(b"groupId") if current_element.len() == 2 => {
1026                                package_data.namespace = Some(text)
1027                            }
1028                            Some(b"artifactId") if current_element.len() == 2 => {
1029                                package_data.name = Some(text)
1030                            }
1031                            Some(b"version") if current_element.len() == 2 => {
1032                                package_data.version = Some(text)
1033                            }
1034                            Some(b"name") if current_element.len() == 2 => {
1035                                project_name = Some(text)
1036                            }
1037                            Some(b"description") if current_element.len() == 2 => {
1038                                project_description = Some(text)
1039                            }
1040                            Some(b"packaging") if current_element.len() == 2 => {
1041                                project_packaging = Some(text)
1042                            }
1043                            Some(b"classifier") if current_element.len() == 2 => {
1044                                project_classifier = Some(text)
1045                            }
1046                            Some(b"url") if current_element.len() == 2 => {
1047                                package_data.homepage_url = Some(text)
1048                            }
1049                            Some(b"inceptionYear") if current_element.len() == 2 => {
1050                                inception_year = Some(text)
1051                            }
1052                            Some(b"connection")
1053                                if current_element.len() >= 3
1054                                    && current_element[current_element.len() - 2] == b"scm" =>
1055                            {
1056                                scm_connection = if text.starts_with("scm:git:") {
1057                                    Some(text.replacen("scm:git:", "git+", 1))
1058                                } else if text.starts_with("scm:") {
1059                                    Some(text.replacen("scm:", "", 1))
1060                                } else {
1061                                    Some(text)
1062                                };
1063                            }
1064                            Some(b"developerConnection")
1065                                if current_element.len() >= 3
1066                                    && current_element[current_element.len() - 2] == b"scm" =>
1067                            {
1068                                scm_developer_connection = if text.starts_with("scm:git:") {
1069                                    Some(text.replacen("scm:git:", "git+", 1))
1070                                } else if text.starts_with("scm:") {
1071                                    Some(text.replacen("scm:", "", 1))
1072                                } else {
1073                                    Some(text)
1074                                };
1075                            }
1076                            Some(b"url")
1077                                if current_element.len() >= 3
1078                                    && current_element[current_element.len() - 2] == b"scm" =>
1079                            {
1080                                scm_url = Some(text);
1081                            }
1082                            Some(b"tag")
1083                                if current_element.len() >= 3
1084                                    && current_element[current_element.len() - 2] == b"scm" =>
1085                            {
1086                                scm_tag = Some(text);
1087                            }
1088                            Some(b"name")
1089                                if current_element.len() >= 2
1090                                    && current_element[current_element.len() - 2]
1091                                        == b"organization" =>
1092                            {
1093                                organization_name = Some(text);
1094                            }
1095                            Some(b"url")
1096                                if current_element.len() >= 2
1097                                    && current_element[current_element.len() - 2]
1098                                        == b"organization" =>
1099                            {
1100                                organization_url = Some(text);
1101                            }
1102                            Some(b"system")
1103                                if current_element.len() >= 2
1104                                    && current_element[current_element.len() - 2]
1105                                        == b"issueManagement" =>
1106                            {
1107                                issue_management_system = Some(text);
1108                            }
1109                            Some(b"url")
1110                                if current_element.len() >= 2
1111                                    && current_element[current_element.len() - 2]
1112                                        == b"issueManagement" =>
1113                            {
1114                                issue_management_url = Some(text);
1115                            }
1116                            Some(b"system")
1117                                if current_element.len() >= 2
1118                                    && current_element[current_element.len() - 2]
1119                                        == b"ciManagement" =>
1120                            {
1121                                ci_management_system = Some(text);
1122                            }
1123                            Some(b"url")
1124                                if current_element.len() >= 2
1125                                    && current_element[current_element.len() - 2]
1126                                        == b"ciManagement" =>
1127                            {
1128                                ci_management_url = Some(text);
1129                            }
1130                            Some(b"downloadUrl")
1131                                if current_element.len() >= 2
1132                                    && current_element[current_element.len() - 2]
1133                                        == b"distributionManagement" =>
1134                            {
1135                                dist_download_url = Some(text);
1136                            }
1137                            Some(b"id") if in_dist_repository => {
1138                                dist_repository_id = Some(text);
1139                            }
1140                            Some(b"name") if in_dist_repository => {
1141                                dist_repository_name = Some(text);
1142                            }
1143                            Some(b"url") if in_dist_repository => {
1144                                dist_repository_url = Some(text);
1145                            }
1146                            Some(b"layout") if in_dist_repository => {
1147                                dist_repository_layout = Some(text);
1148                            }
1149                            Some(b"id") if in_dist_snapshot_repository => {
1150                                dist_snapshot_repository_id = Some(text);
1151                            }
1152                            Some(b"name") if in_dist_snapshot_repository => {
1153                                dist_snapshot_repository_name = Some(text);
1154                            }
1155                            Some(b"url") if in_dist_snapshot_repository => {
1156                                dist_snapshot_repository_url = Some(text);
1157                            }
1158                            Some(b"layout") if in_dist_snapshot_repository => {
1159                                dist_snapshot_repository_layout = Some(text);
1160                            }
1161                            Some(b"id") if in_dist_site => {
1162                                dist_site_id = Some(text);
1163                            }
1164                            Some(b"name") if in_dist_site => {
1165                                dist_site_name = Some(text);
1166                            }
1167                            Some(b"url") if in_dist_site => {
1168                                dist_site_url = Some(text);
1169                            }
1170                            Some(b"id") if in_repository => {
1171                                current_repository_id = Some(text);
1172                            }
1173                            Some(b"name") if in_repository => {
1174                                current_repository_name = Some(text);
1175                            }
1176                            Some(b"url") if in_repository => {
1177                                current_repository_url = Some(text);
1178                            }
1179                            Some(b"module") if in_modules => {
1180                                modules.push(text);
1181                            }
1182                            Some(b"name") if in_mailing_list => {
1183                                current_mailing_list_name = Some(text);
1184                            }
1185                            Some(b"subscribe") if in_mailing_list => {
1186                                current_mailing_list_subscribe = Some(text);
1187                            }
1188                            Some(b"unsubscribe") if in_mailing_list => {
1189                                current_mailing_list_unsubscribe = Some(text);
1190                            }
1191                            Some(b"post") if in_mailing_list => {
1192                                current_mailing_list_post = Some(text);
1193                            }
1194                            Some(b"archive") if in_mailing_list => {
1195                                current_mailing_list_archive = Some(text);
1196                            }
1197                            _ => {}
1198                        }
1199                    }
1200                }
1201                Ok(Event::Comment(e)) => {
1202                    let comment = e.decode().unwrap_or_default().trim().to_string();
1203                    if current_element.is_empty()
1204                        && !comment.is_empty()
1205                        && is_license_like_comment(&comment)
1206                    {
1207                        xml_license_comments.push(comment);
1208                    }
1209                }
1210                Ok(Event::End(e)) => {
1211                    if !current_element.is_empty() {
1212                        current_element.pop();
1213                    }
1214
1215                    match e.name().as_ref() {
1216                        b"parent" => in_parent = false,
1217                        b"dependencyManagement" => in_dependency_management = false,
1218                        b"dependencies" => in_dependencies = false,
1219                        b"dependency" if in_dep_mgmt_dependency => {
1220                            in_dep_mgmt_dependency = false;
1221                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1222                                && (dep_mgmt.group_id.is_some()
1223                                    || dep_mgmt.artifact_id.is_some()
1224                                    || dep_mgmt.version.is_some())
1225                            {
1226                                dependency_management_entries.push(dep_mgmt);
1227                            }
1228                        }
1229                        b"dependency" => {
1230                            if let (Some(dep), Some(coords)) =
1231                                (current_dependency.take(), current_dependency_data.take())
1232                            {
1233                                package_data.dependencies.push(dep);
1234                                dependency_data.push(coords);
1235                            } else if let Some(dep) = current_dependency.take() {
1236                                package_data.dependencies.push(dep);
1237                            }
1238                        }
1239                        b"license" => {
1240                            if let Some(license) = current_license.take()
1241                                && (license.name.is_some()
1242                                    || license.url.is_some()
1243                                    || license.comments.is_some())
1244                            {
1245                                licenses.push(license);
1246                            }
1247                        }
1248                        b"developers" => in_developers = false,
1249                        b"developer" => {
1250                            if let Some(party) = current_party.take() {
1251                                package_data.parties.push(party);
1252                            }
1253                        }
1254                        b"contributors" => in_contributors = false,
1255                        b"contributor" => {
1256                            if let Some(party) = current_party.take() {
1257                                package_data.parties.push(party);
1258                            }
1259                        }
1260                        b"distributionManagement" => in_distribution_management = false,
1261                        b"relocation" => in_relocation = false,
1262                        b"repository" if !in_dependencies && in_distribution_management => {
1263                            in_dist_repository = false
1264                        }
1265                        b"repository" if !in_dependencies && in_repositories => {
1266                            in_repository = false;
1267                            if current_repository_id.is_some()
1268                                || current_repository_name.is_some()
1269                                || current_repository_url.is_some()
1270                            {
1271                                let mut repo = serde_json::Map::new();
1272                                if let Some(id) = current_repository_id.take() {
1273                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1274                                }
1275                                if let Some(name) = current_repository_name.take() {
1276                                    repo.insert(
1277                                        "name".to_string(),
1278                                        serde_json::Value::String(name),
1279                                    );
1280                                }
1281                                if let Some(url) = current_repository_url.take() {
1282                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1283                                }
1284                                repositories.push(repo);
1285                            }
1286                        }
1287                        b"pluginRepository" if in_plugin_repositories => {
1288                            in_repository = false;
1289                            if current_repository_id.is_some()
1290                                || current_repository_name.is_some()
1291                                || current_repository_url.is_some()
1292                            {
1293                                let mut repo = serde_json::Map::new();
1294                                if let Some(id) = current_repository_id.take() {
1295                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1296                                }
1297                                if let Some(name) = current_repository_name.take() {
1298                                    repo.insert(
1299                                        "name".to_string(),
1300                                        serde_json::Value::String(name),
1301                                    );
1302                                }
1303                                if let Some(url) = current_repository_url.take() {
1304                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1305                                }
1306                                plugin_repositories.push(repo);
1307                            }
1308                        }
1309                        b"repositories" => in_repositories = false,
1310                        b"properties" => in_properties = false,
1311                        b"pluginRepositories" => in_plugin_repositories = false,
1312                        b"modules" => in_modules = false,
1313                        b"mailingLists" => in_mailing_lists = false,
1314                        b"mailingList" => {
1315                            in_mailing_list = false;
1316                            if current_mailing_list_name.is_some()
1317                                || current_mailing_list_subscribe.is_some()
1318                                || current_mailing_list_unsubscribe.is_some()
1319                                || current_mailing_list_post.is_some()
1320                                || current_mailing_list_archive.is_some()
1321                            {
1322                                let mut ml = serde_json::Map::new();
1323                                if let Some(name) = current_mailing_list_name.take() {
1324                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1325                                }
1326                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1327                                    ml.insert(
1328                                        "subscribe".to_string(),
1329                                        serde_json::Value::String(subscribe),
1330                                    );
1331                                }
1332                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1333                                    ml.insert(
1334                                        "unsubscribe".to_string(),
1335                                        serde_json::Value::String(unsubscribe),
1336                                    );
1337                                }
1338                                if let Some(post) = current_mailing_list_post.take() {
1339                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1340                                }
1341                                if let Some(archive) = current_mailing_list_archive.take() {
1342                                    ml.insert(
1343                                        "archive".to_string(),
1344                                        serde_json::Value::String(archive),
1345                                    );
1346                                }
1347                                mailing_lists.push(ml);
1348                            }
1349                        }
1350                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1351                        b"site" => in_dist_site = false,
1352                        _ => {}
1353                    }
1354                }
1355                Ok(Event::Eof) => break,
1356                Err(e) => {
1357                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1358                    return vec![package_data];
1359                }
1360                _ => {}
1361            }
1362            buf.clear();
1363        }
1364
1365        let builtins = build_builtin_properties(
1366            &package_data.namespace,
1367            &package_data.name,
1368            &package_data.version,
1369            &parent_group_id,
1370            &parent_version,
1371            &project_name,
1372            &project_packaging,
1373        );
1374        let mut resolver = PropertyResolver::new(properties, builtins);
1375
1376        resolve_option(&mut resolver, &mut package_data.namespace);
1377        resolve_option(&mut resolver, &mut package_data.name);
1378        resolve_option(&mut resolver, &mut package_data.version);
1379        resolve_option(&mut resolver, &mut package_data.homepage_url);
1380        resolve_option(&mut resolver, &mut inception_year);
1381        resolve_option(&mut resolver, &mut scm_connection);
1382        resolve_option(&mut resolver, &mut scm_developer_connection);
1383        resolve_option(&mut resolver, &mut scm_url);
1384        resolve_option(&mut resolver, &mut scm_tag);
1385        resolve_option(&mut resolver, &mut organization_name);
1386        resolve_option(&mut resolver, &mut organization_url);
1387        resolve_option(&mut resolver, &mut issue_management_system);
1388        resolve_option(&mut resolver, &mut issue_management_url);
1389        resolve_option(&mut resolver, &mut ci_management_system);
1390        resolve_option(&mut resolver, &mut ci_management_url);
1391        resolve_option(&mut resolver, &mut dist_download_url);
1392        resolve_option(&mut resolver, &mut dist_repository_id);
1393        resolve_option(&mut resolver, &mut dist_repository_name);
1394        resolve_option(&mut resolver, &mut dist_repository_url);
1395        resolve_option(&mut resolver, &mut dist_repository_layout);
1396        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1397        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1398        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1399        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1400        resolve_option(&mut resolver, &mut dist_site_id);
1401        resolve_option(&mut resolver, &mut dist_site_name);
1402        resolve_option(&mut resolver, &mut dist_site_url);
1403        resolve_option(&mut resolver, &mut parent_group_id);
1404        resolve_option(&mut resolver, &mut parent_artifact_id);
1405        resolve_option(&mut resolver, &mut parent_version);
1406        resolve_option(&mut resolver, &mut parent_relative_path);
1407        resolve_option(&mut resolver, &mut project_name);
1408        resolve_option(&mut resolver, &mut project_description);
1409        resolve_option(&mut resolver, &mut project_packaging);
1410        resolve_option(&mut resolver, &mut project_classifier);
1411        resolve_vec(&mut resolver, &mut modules);
1412        resolve_maps(&mut resolver, &mut repositories);
1413        resolve_maps(&mut resolver, &mut plugin_repositories);
1414        resolve_maps(&mut resolver, &mut mailing_lists);
1415        for comment in &mut xml_license_comments {
1416            *comment = resolver.resolve_text(comment, 0);
1417        }
1418        for dependency in &mut dependency_management_entries {
1419            resolve_dependency_data(&mut resolver, dependency);
1420        }
1421        resolve_dependency_data(&mut resolver, &mut relocation);
1422        for license in &mut licenses {
1423            resolve_license_entry(&mut resolver, license);
1424        }
1425        for comment in xml_license_comments {
1426            if !comment.trim().is_empty() {
1427                licenses.push(MavenLicenseEntry {
1428                    comments: Some(comment),
1429                    ..Default::default()
1430                });
1431            }
1432        }
1433
1434        for (dependency, coords) in package_data
1435            .dependencies
1436            .iter_mut()
1437            .zip(dependency_data.iter_mut())
1438        {
1439            resolve_dependency_data(&mut resolver, coords);
1440            dependency.scope = coords.scope.clone();
1441            dependency.extracted_requirement = coords.version.clone();
1442            dependency.extra_data = dependency_extra_data(coords);
1443            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1444
1445            match dependency.scope.as_deref() {
1446                Some("test") | Some("provided") => {
1447                    dependency.is_runtime = Some(false);
1448                    dependency.is_optional = Some(true);
1449                }
1450                Some(_) => {
1451                    dependency.is_runtime = Some(true);
1452                }
1453                None => {
1454                    dependency.is_runtime = None;
1455                }
1456            }
1457
1458            if let Some(version) = &coords.version {
1459                dependency.is_pinned = Some(is_maven_version_pinned(version));
1460            }
1461
1462            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1463                dependency.purl = Some(build_maven_purl(
1464                    group_id,
1465                    artifact_id,
1466                    coords.version.as_deref(),
1467                    coords.classifier.as_deref(),
1468                    coords.type_.as_deref(),
1469                ));
1470            }
1471        }
1472
1473        if package_data.namespace.is_none() {
1474            package_data.namespace = parent_group_id.clone();
1475        }
1476        if package_data.version.is_none() {
1477            package_data.version = parent_version.clone();
1478        }
1479
1480        package_data.qualifiers =
1481            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1482
1483        package_data.description = match (
1484            project_name.as_deref().filter(|value| !value.is_empty()),
1485            project_description
1486                .as_deref()
1487                .filter(|value| !value.is_empty()),
1488        ) {
1489            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1490            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1491            (Some(name), None) => Some(name.to_string()),
1492            (None, Some(description)) => Some(description.to_string()),
1493            (None, None) => None,
1494        };
1495
1496        if path.to_string_lossy().contains("META-INF/maven/") {
1497            let path_str = path.to_string_lossy();
1498            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1499                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1500                let parts: Vec<&str> = after_maven.split('/').collect();
1501                if parts.len() >= 2 {
1502                    if package_data.namespace.is_none() {
1503                        package_data.namespace = Some(parts[0].to_string());
1504                    }
1505                    if package_data.name.is_none() {
1506                        package_data.name = Some(parts[1].to_string());
1507                    }
1508                }
1509            }
1510        }
1511
1512        if has_unresolved_template_coordinates(
1513            package_data.namespace.as_deref(),
1514            package_data.name.as_deref(),
1515            package_data.version.as_deref(),
1516        ) {
1517            warn!("Skipping Maven template coordinates in {:?}", path);
1518            return vec![default_package_data()];
1519        }
1520
1521        // Construct PURL from parsed data
1522        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1523            &package_data.namespace,
1524            &package_data.name,
1525            &package_data.version,
1526        ) {
1527            package_data.purl = Some(build_maven_purl(
1528                group_id,
1529                artifact_id,
1530                Some(version),
1531                project_classifier.as_deref(),
1532                project_packaging.as_deref(),
1533            ));
1534            if project_classifier.is_none() {
1535                package_data
1536                    .source_packages
1537                    .push(build_maven_source_package(group_id, artifact_id, version));
1538            }
1539        }
1540
1541        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1542            package_data.repository_homepage_url = build_maven_url(
1543                &package_data.namespace,
1544                &package_data.name,
1545                &package_data.version,
1546                None,
1547            );
1548
1549            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1550                build_maven_download_url(
1551                    group_id,
1552                    artifact_id,
1553                    ver,
1554                    project_classifier.as_deref(),
1555                    project_packaging.as_deref(),
1556                )
1557            });
1558
1559            if let Some(ver) = &package_data.version {
1560                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1561                package_data.api_data_url = build_maven_url(
1562                    &package_data.namespace,
1563                    &package_data.name,
1564                    &package_data.version,
1565                    Some(&pom_filename),
1566                );
1567            }
1568        }
1569
1570        package_data.vcs_url = scm_connection
1571            .or_else(|| scm_developer_connection.clone())
1572            .or_else(|| scm_url.clone());
1573
1574        // Set code_view_url from scm/url (human-browseable URL)
1575        if let Some(url) = &scm_url {
1576            package_data.code_view_url = Some(url.clone());
1577        }
1578
1579        // Set bug_tracking_url from issueManagement/url
1580        if let Some(url) = &issue_management_url {
1581            package_data.bug_tracking_url = Some(url.clone());
1582        }
1583
1584        // Map downloadUrl to download_url field
1585        if let Some(url) = &dist_download_url {
1586            package_data.download_url = Some(url.clone());
1587        }
1588
1589        if organization_name.is_some() || organization_url.is_some() {
1590            package_data.parties.push(Party {
1591                r#type: Some("organization".to_string()),
1592                role: Some("owner".to_string()),
1593                name: organization_name.clone(),
1594                email: None,
1595                url: organization_url.clone(),
1596                organization: None,
1597                organization_url: None,
1598                timezone: None,
1599            });
1600        }
1601
1602        for dependency in &dependency_management_entries {
1603            let fallback_scope = if dependency.scope.as_deref() == Some("import") {
1604                Some("import")
1605            } else {
1606                Some("dependencymanagement")
1607            };
1608
1609            if let Some(converted) =
1610                maven_dependency_to_dependency(dependency, fallback_scope, true)
1611            {
1612                package_data.dependencies.push(converted);
1613            }
1614        }
1615
1616        if (relocation.group_id.is_some()
1617            || relocation.artifact_id.is_some()
1618            || relocation.version.is_some())
1619            && let Some(converted) =
1620                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1621        {
1622            package_data.dependencies.push(converted);
1623        }
1624
1625        if inception_year.is_some()
1626            || organization_name.is_some()
1627            || organization_url.is_some()
1628            || scm_tag.is_some()
1629            || scm_developer_connection.is_some()
1630            || issue_management_system.is_some()
1631            || ci_management_system.is_some()
1632            || ci_management_url.is_some()
1633            || dist_download_url.is_some()
1634            || dist_repository_id.is_some()
1635            || dist_snapshot_repository_id.is_some()
1636            || dist_site_id.is_some()
1637            || !repositories.is_empty()
1638            || !plugin_repositories.is_empty()
1639            || !modules.is_empty()
1640            || !mailing_lists.is_empty()
1641            || !dependency_management_entries.is_empty()
1642            || parent_group_id.is_some()
1643            || relocation.group_id.is_some()
1644            || relocation.artifact_id.is_some()
1645            || relocation.version.is_some()
1646            || relocation.message.is_some()
1647        {
1648            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1649            if let Some(year) = inception_year {
1650                extra_data.insert(
1651                    "inception_year".to_string(),
1652                    serde_json::Value::String(year),
1653                );
1654            }
1655            if let Some(name) = organization_name {
1656                extra_data.insert(
1657                    "organization_name".to_string(),
1658                    serde_json::Value::String(name),
1659                );
1660            }
1661            if let Some(url) = organization_url {
1662                extra_data.insert(
1663                    "organization_url".to_string(),
1664                    serde_json::Value::String(url),
1665                );
1666            }
1667            if let Some(tag) = scm_tag {
1668                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1669            }
1670            if let Some(dev_conn) = scm_developer_connection {
1671                extra_data.insert(
1672                    "scm_developer_connection".to_string(),
1673                    serde_json::Value::String(dev_conn),
1674                );
1675            }
1676            if let Some(system) = issue_management_system {
1677                extra_data.insert(
1678                    "issue_tracking_system".to_string(),
1679                    serde_json::Value::String(system),
1680                );
1681            }
1682            if let Some(system) = ci_management_system {
1683                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1684            }
1685            if let Some(url) = ci_management_url {
1686                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1687            }
1688
1689            // Add distribution management data
1690            if let Some(url) = dist_download_url {
1691                extra_data.insert(
1692                    "distribution_download_url".to_string(),
1693                    serde_json::Value::String(url),
1694                );
1695            }
1696
1697            // Build repository object
1698            if dist_repository_id.is_some()
1699                || dist_repository_name.is_some()
1700                || dist_repository_url.is_some()
1701                || dist_repository_layout.is_some()
1702            {
1703                let mut repo = serde_json::Map::new();
1704                if let Some(id) = dist_repository_id {
1705                    repo.insert("id".to_string(), serde_json::Value::String(id));
1706                }
1707                if let Some(name) = dist_repository_name {
1708                    repo.insert("name".to_string(), serde_json::Value::String(name));
1709                }
1710                if let Some(url) = dist_repository_url {
1711                    repo.insert("url".to_string(), serde_json::Value::String(url));
1712                }
1713                if let Some(layout) = dist_repository_layout {
1714                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1715                }
1716                extra_data.insert(
1717                    "distribution_repository".to_string(),
1718                    serde_json::Value::Object(repo),
1719                );
1720            }
1721
1722            // Build snapshotRepository object
1723            if dist_snapshot_repository_id.is_some()
1724                || dist_snapshot_repository_name.is_some()
1725                || dist_snapshot_repository_url.is_some()
1726                || dist_snapshot_repository_layout.is_some()
1727            {
1728                let mut repo = serde_json::Map::new();
1729                if let Some(id) = dist_snapshot_repository_id {
1730                    repo.insert("id".to_string(), serde_json::Value::String(id));
1731                }
1732                if let Some(name) = dist_snapshot_repository_name {
1733                    repo.insert("name".to_string(), serde_json::Value::String(name));
1734                }
1735                if let Some(url) = dist_snapshot_repository_url {
1736                    repo.insert("url".to_string(), serde_json::Value::String(url));
1737                }
1738                if let Some(layout) = dist_snapshot_repository_layout {
1739                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1740                }
1741                extra_data.insert(
1742                    "distribution_snapshot_repository".to_string(),
1743                    serde_json::Value::Object(repo),
1744                );
1745            }
1746
1747            // Build site object
1748            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1749                let mut site = serde_json::Map::new();
1750                if let Some(id) = dist_site_id {
1751                    site.insert("id".to_string(), serde_json::Value::String(id));
1752                }
1753                if let Some(name) = dist_site_name {
1754                    site.insert("name".to_string(), serde_json::Value::String(name));
1755                }
1756                if let Some(url) = dist_site_url {
1757                    site.insert("url".to_string(), serde_json::Value::String(url));
1758                }
1759                extra_data.insert(
1760                    "distribution_site".to_string(),
1761                    serde_json::Value::Object(site),
1762                );
1763            }
1764
1765            if !repositories.is_empty() {
1766                extra_data.insert(
1767                    "repositories".to_string(),
1768                    serde_json::Value::Array(
1769                        repositories
1770                            .into_iter()
1771                            .map(serde_json::Value::Object)
1772                            .collect(),
1773                    ),
1774                );
1775            }
1776
1777            if !plugin_repositories.is_empty() {
1778                extra_data.insert(
1779                    "plugin_repositories".to_string(),
1780                    serde_json::Value::Array(
1781                        plugin_repositories
1782                            .into_iter()
1783                            .map(serde_json::Value::Object)
1784                            .collect(),
1785                    ),
1786                );
1787            }
1788
1789            if !modules.is_empty() {
1790                extra_data.insert(
1791                    "modules".to_string(),
1792                    serde_json::Value::Array(
1793                        modules.into_iter().map(serde_json::Value::String).collect(),
1794                    ),
1795                );
1796            }
1797
1798            if !mailing_lists.is_empty() {
1799                extra_data.insert(
1800                    "mailing_lists".to_string(),
1801                    serde_json::Value::Array(
1802                        mailing_lists
1803                            .into_iter()
1804                            .map(serde_json::Value::Object)
1805                            .collect(),
1806                    ),
1807                );
1808            }
1809
1810            if !dependency_management_entries.is_empty() {
1811                extra_data.insert(
1812                    "dependency_management".to_string(),
1813                    serde_json::Value::Array(
1814                        dependency_management_entries
1815                            .into_iter()
1816                            .map(|dependency| {
1817                                serde_json::Value::Object(dependency_management_entry_to_value(
1818                                    &dependency,
1819                                ))
1820                            })
1821                            .collect(),
1822                    ),
1823                );
1824            }
1825
1826            if relocation.group_id.is_some()
1827                || relocation.artifact_id.is_some()
1828                || relocation.version.is_some()
1829                || relocation.message.is_some()
1830            {
1831                extra_data.insert(
1832                    "relocation".to_string(),
1833                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1834                );
1835            }
1836
1837            if parent_group_id.is_some()
1838                || parent_artifact_id.is_some()
1839                || parent_version.is_some()
1840                || parent_relative_path.is_some()
1841            {
1842                let mut parent_obj = serde_json::Map::new();
1843                if let Some(group_id) = parent_group_id {
1844                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1845                }
1846                if let Some(artifact_id) = parent_artifact_id {
1847                    parent_obj.insert(
1848                        "artifactId".to_string(),
1849                        serde_json::Value::String(artifact_id),
1850                    );
1851                }
1852                if let Some(version) = parent_version {
1853                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1854                }
1855                if let Some(relative_path) = parent_relative_path {
1856                    parent_obj.insert(
1857                        "relativePath".to_string(),
1858                        serde_json::Value::String(relative_path),
1859                    );
1860                }
1861                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
1862            }
1863
1864            package_data.extra_data = Some(extra_data);
1865        }
1866
1867        package_data.extracted_license_statement = build_license_statement(&licenses);
1868
1869        vec![package_data]
1870    }
1871
1872    fn is_match(path: &Path) -> bool {
1873        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
1874            filename == "pom.xml" || filename == "pom.properties" || filename == "MANIFEST.MF"
1875        } else {
1876            false
1877        }
1878    }
1879}
1880
1881fn build_maven_url(
1882    group_id: &Option<String>,
1883    artifact_id: &Option<String>,
1884    version: &Option<String>,
1885    filename: Option<&str>,
1886) -> Option<String> {
1887    const BASE_URL: &str = "https://repo1.maven.org/maven2";
1888
1889    let group_id = group_id.as_ref()?;
1890    let artifact_id = artifact_id.as_ref()?;
1891
1892    let group_path = group_id.replace('.', "/");
1893    let filename_str = filename.unwrap_or("");
1894
1895    let url = if let Some(ver) = version {
1896        format!(
1897            "{}/{}/{}/{}/{}",
1898            BASE_URL, group_path, artifact_id, ver, filename_str
1899        )
1900    } else {
1901        format!(
1902            "{}/{}/{}/{}",
1903            BASE_URL, group_path, artifact_id, filename_str
1904        )
1905    };
1906
1907    Some(url)
1908}
1909
1910/// Parse pom.properties file (Java properties format)
1911fn parse_pom_properties(path: &Path) -> PackageData {
1912    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
1913        Ok(content) => content,
1914        Err(e) => {
1915            warn!("Failed to read pom.properties at {:?}: {}", path, e);
1916            return PackageData {
1917                package_type: Some(PackageType::Maven),
1918                primary_language: Some("Java".to_string()),
1919                datasource_id: Some(DatasourceId::MavenPomProperties),
1920                ..Default::default()
1921            };
1922        }
1923    };
1924
1925    let mut package_data = default_package_data();
1926    package_data.package_type = Some(PackageType::Maven);
1927    package_data.primary_language = Some("Java".to_string());
1928    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
1929
1930    let mut group_id: Option<String> = None;
1931    let mut artifact_id: Option<String> = None;
1932    let mut version: Option<String> = None;
1933
1934    // Parse Java properties format
1935    let mut continuation = String::new();
1936
1937    for line in content.lines() {
1938        let current_line = if continuation.is_empty() {
1939            line.to_string()
1940        } else {
1941            format!("{}{}", continuation, line)
1942        };
1943        continuation.clear();
1944
1945        // Check for line continuation (backslash at end)
1946        if current_line.ends_with('\\') {
1947            continuation = current_line[..current_line.len() - 1].to_string();
1948            continue;
1949        }
1950
1951        // Skip comments and empty lines
1952        let trimmed = current_line.trim();
1953        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
1954            continue;
1955        }
1956
1957        // Parse key=value
1958        if let Some(eq_pos) = current_line.find('=') {
1959            let key = current_line[..eq_pos].trim();
1960            let value = current_line[eq_pos + 1..].trim();
1961
1962            match key {
1963                "groupId" => group_id = Some(value.to_string()),
1964                "artifactId" => artifact_id = Some(value.to_string()),
1965                "version" => version = Some(value.to_string()),
1966                _ => {}
1967            }
1968        }
1969    }
1970
1971    package_data.namespace = group_id.clone();
1972    package_data.name = artifact_id.clone();
1973    package_data.version = version.clone();
1974
1975    // Generate PURL
1976    if let (Some(group_id), Some(artifact_id), Some(version)) = (
1977        &package_data.namespace,
1978        &package_data.name,
1979        &package_data.version,
1980    ) {
1981        package_data.purl = Some(format!(
1982            "pkg:maven/{}/{}@{}",
1983            group_id, artifact_id, version
1984        ));
1985    }
1986
1987    package_data
1988}
1989
1990/// Parse MANIFEST.MF file (JAR manifest format)
1991///
1992/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
1993/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
1994/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
1995/// dependencies.
1996fn parse_manifest_mf(path: &Path) -> PackageData {
1997    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
1998        Ok(content) => content,
1999        Err(e) => {
2000            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2001            return default_package_data();
2002        }
2003    };
2004
2005    let mut package_data = default_package_data();
2006
2007    // Parse manifest headers (RFC822-style with space continuations)
2008    let mut headers: Vec<(String, String)> = Vec::new();
2009    let mut current_key: Option<String> = None;
2010    let mut current_value = String::new();
2011
2012    for line in content.lines() {
2013        if line.starts_with(' ') || line.starts_with('\t') {
2014            // Continuation line
2015            current_value.push_str(line.trim());
2016        } else if let Some(colon_pos) = line.find(':') {
2017            // Save previous header
2018            if let Some(key) = current_key.take() {
2019                headers.push((key, current_value.trim().to_string()));
2020                current_value.clear();
2021            }
2022
2023            // Start new header
2024            let key = line[..colon_pos].trim().to_string();
2025            let value = line[colon_pos + 1..].trim().to_string();
2026            current_key = Some(key);
2027            current_value = value;
2028        }
2029    }
2030
2031    // Save last header
2032    if let Some(key) = current_key {
2033        headers.push((key, current_value.trim().to_string()));
2034    }
2035
2036    // Convert headers to HashMap for easier lookup
2037    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2038
2039    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2040    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2041    let is_osgi = bundle_symbolic_name.is_some();
2042
2043    if is_osgi {
2044        // OSGi bundle - extract OSGi-specific metadata
2045        package_data.package_type = Some(PackageType::Osgi);
2046        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2047
2048        // Bundle-SymbolicName is the canonical name for OSGi bundles
2049        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2050        if let Some(bsn) = bundle_symbolic_name {
2051            let name = if let Some(semicolon_pos) = bsn.find(';') {
2052                bsn[..semicolon_pos].trim().to_string()
2053            } else {
2054                bsn.clone()
2055            };
2056            package_data.name = Some(name);
2057        }
2058
2059        // Bundle-Version
2060        package_data.version = headers_map.get("Bundle-Version").cloned();
2061
2062        // Bundle-Description takes priority over Bundle-Name for description
2063        if let Some(desc) = headers_map.get("Bundle-Description") {
2064            package_data.description = Some(desc.clone());
2065        } else if let Some(name) = headers_map.get("Bundle-Name") {
2066            package_data.description = Some(name.clone());
2067        }
2068
2069        // Bundle-Vendor
2070        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2071            package_data.parties.push(Party {
2072                r#type: Some("organization".to_string()),
2073                role: Some("vendor".to_string()),
2074                name: Some(vendor.clone()),
2075                email: None,
2076                url: None,
2077                organization: None,
2078                organization_url: None,
2079                timezone: None,
2080            });
2081        }
2082
2083        // Bundle-DocURL
2084        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2085
2086        // Bundle-License
2087        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2088
2089        // Import-Package -> dependencies with scope "import"
2090        if let Some(import_pkg) = headers_map.get("Import-Package") {
2091            let deps = parse_osgi_package_list(import_pkg, "import");
2092            package_data.dependencies.extend(deps);
2093        }
2094
2095        // Require-Bundle -> dependencies with scope "require-bundle"
2096        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2097            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2098            package_data.dependencies.extend(deps);
2099        }
2100
2101        // Export-Package -> store in extra_data
2102        if let Some(export_pkg) = headers_map.get("Export-Package") {
2103            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2104            extra_data.insert(
2105                "export_packages".to_string(),
2106                serde_json::Value::String(export_pkg.clone()),
2107            );
2108            package_data.extra_data = Some(extra_data);
2109        }
2110
2111        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2112        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2113            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2114        }
2115    } else {
2116        // Regular JAR manifest
2117        package_data.package_type = Some(PackageType::Maven);
2118        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2119
2120        // Extract fields with priority order for non-OSGi JARs
2121        let mut name: Option<String> = None;
2122        let mut version: Option<String> = None;
2123        let mut vendor: Option<String> = None;
2124
2125        for (key, value) in &headers {
2126            match key.as_str() {
2127                "Bundle-Name" if name.is_none() => {
2128                    name = Some(value.clone());
2129                }
2130                "Implementation-Title" if name.is_none() => {
2131                    name = Some(value.clone());
2132                }
2133                "Bundle-Version" if version.is_none() => {
2134                    version = Some(value.clone());
2135                }
2136                "Implementation-Version" if version.is_none() => {
2137                    version = Some(value.clone());
2138                }
2139                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2140                    vendor = Some(value.clone());
2141                }
2142                _ => {}
2143            }
2144        }
2145
2146        package_data.name = name;
2147        package_data.version = version;
2148
2149        // Add vendor to parties if present
2150        if let Some(vendor_name) = vendor {
2151            package_data.parties.push(Party {
2152                r#type: Some("organization".to_string()),
2153                role: Some("vendor".to_string()),
2154                name: Some(vendor_name),
2155                email: None,
2156                url: None,
2157                organization: None,
2158                organization_url: None,
2159                timezone: None,
2160            });
2161        }
2162
2163        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2164        if let Some(path_str) = path.to_str()
2165            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2166        {
2167            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2168            let parts: Vec<&str> = after_maven.split('/').collect();
2169            if parts.len() >= 2 {
2170                package_data.namespace = Some(parts[0].to_string());
2171            }
2172        }
2173
2174        // Generate Maven PURL if we have enough information
2175        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2176            &package_data.namespace,
2177            &package_data.name,
2178            &package_data.version,
2179        ) {
2180            package_data.purl = Some(format!(
2181                "pkg:maven/{}/{}@{}",
2182                group_id, artifact_id, version
2183            ));
2184        }
2185    }
2186
2187    package_data
2188}
2189
2190/// Parse OSGi Import-Package header into dependencies.
2191///
2192/// Format: comma-separated list of packages with optional directives:
2193/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2194pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2195    let mut dependencies = Vec::new();
2196
2197    // Split by comma, but be careful not to split within quoted strings
2198    for package_entry in split_osgi_list(package_list) {
2199        let package_entry = package_entry.trim();
2200        if package_entry.is_empty() {
2201            continue;
2202        }
2203
2204        // Extract package name (before first semicolon)
2205        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2206            package_entry[..semicolon_pos].trim()
2207        } else {
2208            package_entry
2209        };
2210
2211        if package_name.is_empty() {
2212            continue;
2213        }
2214
2215        // Extract version directive if present
2216        let version_requirement = extract_osgi_version(package_entry);
2217
2218        dependencies.push(Dependency {
2219            purl: Some(format!("pkg:osgi/{}", package_name)),
2220            extracted_requirement: version_requirement,
2221            scope: Some(scope.to_string()),
2222            is_runtime: Some(true),
2223            is_optional: Some(false),
2224            is_pinned: None,
2225            is_direct: Some(true),
2226            resolved_package: None,
2227            extra_data: None,
2228        });
2229    }
2230
2231    dependencies
2232}
2233
2234/// Parse OSGi Require-Bundle header into dependencies.
2235///
2236/// Format: comma-separated list of bundle symbolic names with optional directives:
2237/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2238pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2239    let mut dependencies = Vec::new();
2240
2241    for bundle_entry in split_osgi_list(bundle_list) {
2242        let bundle_entry = bundle_entry.trim();
2243        if bundle_entry.is_empty() {
2244            continue;
2245        }
2246
2247        // Extract bundle symbolic name (before first semicolon)
2248        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2249            bundle_entry[..semicolon_pos].trim()
2250        } else {
2251            bundle_entry
2252        };
2253
2254        if bundle_name.is_empty() {
2255            continue;
2256        }
2257
2258        // Extract bundle-version directive if present
2259        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2260
2261        // Check if optional
2262        let is_optional = bundle_entry.contains("resolution:=optional");
2263
2264        dependencies.push(Dependency {
2265            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2266            extracted_requirement: version_requirement,
2267            scope: Some(scope.to_string()),
2268            is_runtime: Some(!is_optional),
2269            is_optional: Some(is_optional),
2270            is_pinned: None,
2271            is_direct: Some(true),
2272            resolved_package: None,
2273            extra_data: None,
2274        });
2275    }
2276
2277    dependencies
2278}
2279
2280/// Split OSGi comma-separated list, respecting quoted strings.
2281///
2282/// OSGi headers can contain commas within quoted strings:
2283/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2284pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2285    let mut result = Vec::new();
2286    let mut current = String::new();
2287    let mut in_quotes = false;
2288
2289    for ch in list.chars() {
2290        match ch {
2291            '"' => {
2292                in_quotes = !in_quotes;
2293                current.push(ch);
2294            }
2295            ',' if !in_quotes => {
2296                if !current.trim().is_empty() {
2297                    result.push(current.trim().to_string());
2298                }
2299                current.clear();
2300            }
2301            _ => {
2302                current.push(ch);
2303            }
2304        }
2305    }
2306
2307    if !current.trim().is_empty() {
2308        result.push(current.trim().to_string());
2309    }
2310
2311    result
2312}
2313
2314fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2315    let needle = format!("{}=", directive);
2316    let version_pos = entry.find(&needle)?;
2317    let after_value = &entry[version_pos + needle.len()..];
2318
2319    if let Some(stripped) = after_value.strip_prefix('"') {
2320        stripped.find('"').map(|end| stripped[..end].to_string())
2321    } else {
2322        let end = after_value.find(';').unwrap_or(after_value.len());
2323        Some(after_value[..end].trim().to_string())
2324    }
2325}
2326
2327pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2328    extract_osgi_directive(entry, "version")
2329}
2330
2331pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2332    extract_osgi_directive(entry, "bundle-version")
2333}
2334
2335fn default_package_data() -> PackageData {
2336    PackageData {
2337        package_type: Some(PackageType::Maven),
2338        datasource_id: Some(DatasourceId::MavenPom),
2339        ..Default::default()
2340    }
2341}
2342
2343#[cfg(test)]
2344mod tests {
2345    use super::*;
2346    use std::fs;
2347    use tempfile::TempDir;
2348
2349    #[test]
2350    fn test_organization_extraction() {
2351        let temp_dir = TempDir::new().unwrap();
2352        let pom_path = temp_dir.path().join("pom.xml");
2353
2354        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2355<project>
2356    <modelVersion>4.0.0</modelVersion>
2357    <groupId>com.example</groupId>
2358    <artifactId>my-app</artifactId>
2359    <version>1.0.0</version>
2360    <organization>
2361        <name>Example Corporation</name>
2362        <url>https://example.com</url>
2363    </organization>
2364</project>"#;
2365
2366        fs::write(&pom_path, pom_content).unwrap();
2367
2368        let package_data = MavenParser::extract_first_package(&pom_path);
2369
2370        assert_eq!(package_data.name, Some("my-app".to_string()));
2371        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2372        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2373
2374        let extra_data = package_data.extra_data.unwrap();
2375        assert_eq!(
2376            extra_data.get("organization_name"),
2377            Some(&serde_json::Value::String(
2378                "Example Corporation".to_string()
2379            ))
2380        );
2381        assert_eq!(
2382            extra_data.get("organization_url"),
2383            Some(&serde_json::Value::String(
2384                "https://example.com".to_string()
2385            ))
2386        );
2387    }
2388
2389    #[test]
2390    fn test_scm_metadata_extraction() {
2391        let temp_dir = TempDir::new().unwrap();
2392        let pom_path = temp_dir.path().join("pom.xml");
2393
2394        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2395<project xmlns="http://maven.apache.org/POM/4.0.0"
2396         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2397         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2398    <modelVersion>4.0.0</modelVersion>
2399    <groupId>org.springframework.boot</groupId>
2400    <artifactId>spring-boot-starter-web</artifactId>
2401    <version>3.0.0</version>
2402    <scm>
2403        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2404        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2405        <url>https://github.com/spring-projects/spring-boot</url>
2406        <tag>v3.0.0</tag>
2407    </scm>
2408</project>"#;
2409
2410        fs::write(&pom_path, pom_content).unwrap();
2411
2412        let package_data = MavenParser::extract_first_package(&pom_path);
2413
2414        assert_eq!(
2415            package_data.name,
2416            Some("spring-boot-starter-web".to_string())
2417        );
2418        assert_eq!(
2419            package_data.namespace,
2420            Some("org.springframework.boot".to_string())
2421        );
2422        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2423
2424        assert_eq!(
2425            package_data.code_view_url,
2426            Some("https://github.com/spring-projects/spring-boot".to_string())
2427        );
2428
2429        // vcs_url prefers connection over developerConnection
2430        assert_eq!(
2431            package_data.vcs_url,
2432            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2433        );
2434
2435        let extra_data = package_data.extra_data.unwrap();
2436        assert_eq!(
2437            extra_data.get("scm_tag"),
2438            Some(&serde_json::Value::String("v3.0.0".to_string()))
2439        );
2440        // developerConnection stored separately in extra_data
2441        assert_eq!(
2442            extra_data.get("scm_developer_connection"),
2443            Some(&serde_json::Value::String(
2444                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2445            ))
2446        );
2447    }
2448
2449    #[test]
2450    fn test_developers_and_contributors_extraction() {
2451        let temp_dir = TempDir::new().unwrap();
2452        let pom_path = temp_dir.path().join("pom.xml");
2453
2454        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2455<project xmlns="http://maven.apache.org/POM/4.0.0"
2456         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2457         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2458    <modelVersion>4.0.0</modelVersion>
2459    <groupId>com.example</groupId>
2460    <artifactId>test-app</artifactId>
2461    <version>1.0.0</version>
2462    <developers>
2463        <developer>
2464            <id>jdoe</id>
2465            <name>John Doe</name>
2466            <email>john@example.com</email>
2467            <url>https://example.com/jdoe</url>
2468            <organization>Example Corp</organization>
2469            <organizationUrl>https://example.com</organizationUrl>
2470            <timezone>America/New_York</timezone>
2471        </developer>
2472        <developer>
2473            <name>Jane Smith</name>
2474            <email>jane@example.com</email>
2475        </developer>
2476    </developers>
2477    <contributors>
2478        <contributor>
2479            <name>Bob Wilson</name>
2480            <email>bob@example.com</email>
2481            <url>https://example.com/bob</url>
2482        </contributor>
2483    </contributors>
2484</project>"#;
2485
2486        fs::write(&pom_path, pom_content).unwrap();
2487
2488        let package_data = MavenParser::extract_first_package(&pom_path);
2489
2490        assert_eq!(package_data.name, Some("test-app".to_string()));
2491        assert_eq!(package_data.parties.len(), 3);
2492
2493        let dev1 = &package_data.parties[0];
2494        assert_eq!(dev1.r#type, Some("person".to_string()));
2495        assert_eq!(dev1.role, Some("developer".to_string()));
2496        assert_eq!(dev1.name, Some("John Doe".to_string()));
2497        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2498        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2499        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2500        assert_eq!(
2501            dev1.organization_url,
2502            Some("https://example.com".to_string())
2503        );
2504        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2505
2506        let dev2 = &package_data.parties[1];
2507        assert_eq!(dev2.r#type, Some("person".to_string()));
2508        assert_eq!(dev2.role, Some("developer".to_string()));
2509        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2510        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2511
2512        let contrib = &package_data.parties[2];
2513        assert_eq!(contrib.r#type, Some("person".to_string()));
2514        assert_eq!(contrib.role, Some("contributor".to_string()));
2515        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2516        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2517        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2518    }
2519
2520    #[test]
2521    fn test_issue_management_extraction() {
2522        let temp_dir = TempDir::new().unwrap();
2523        let pom_path = temp_dir.path().join("pom.xml");
2524
2525        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2526<project xmlns="http://maven.apache.org/POM/4.0.0"
2527         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2528         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2529    <modelVersion>4.0.0</modelVersion>
2530    <groupId>com.example</groupId>
2531    <artifactId>test-app</artifactId>
2532    <version>1.0.0</version>
2533    <issueManagement>
2534        <system>GitHub</system>
2535        <url>https://github.com/example/test-app/issues</url>
2536    </issueManagement>
2537</project>"#;
2538
2539        fs::write(&pom_path, pom_content).unwrap();
2540
2541        let package_data = MavenParser::extract_first_package(&pom_path);
2542
2543        assert_eq!(package_data.name, Some("test-app".to_string()));
2544        assert_eq!(
2545            package_data.bug_tracking_url,
2546            Some("https://github.com/example/test-app/issues".to_string())
2547        );
2548
2549        let extra_data = package_data.extra_data.unwrap();
2550        assert_eq!(
2551            extra_data.get("issue_tracking_system"),
2552            Some(&serde_json::Value::String("GitHub".to_string()))
2553        );
2554    }
2555
2556    #[test]
2557    fn test_ci_management_extraction() {
2558        let temp_dir = TempDir::new().unwrap();
2559        let pom_path = temp_dir.path().join("pom.xml");
2560
2561        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2562<project xmlns="http://maven.apache.org/POM/4.0.0"
2563         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2564         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2565    <modelVersion>4.0.0</modelVersion>
2566    <groupId>com.example</groupId>
2567    <artifactId>test-app</artifactId>
2568    <version>1.0.0</version>
2569    <ciManagement>
2570        <system>Jenkins</system>
2571        <url>https://ci.example.com/job/test-app</url>
2572    </ciManagement>
2573</project>"#;
2574
2575        fs::write(&pom_path, pom_content).unwrap();
2576
2577        let package_data = MavenParser::extract_first_package(&pom_path);
2578
2579        assert_eq!(package_data.name, Some("test-app".to_string()));
2580
2581        let extra_data = package_data.extra_data.unwrap();
2582        assert_eq!(
2583            extra_data.get("ci_system"),
2584            Some(&serde_json::Value::String("Jenkins".to_string()))
2585        );
2586        assert_eq!(
2587            extra_data.get("ci_url"),
2588            Some(&serde_json::Value::String(
2589                "https://ci.example.com/job/test-app".to_string()
2590            ))
2591        );
2592    }
2593
2594    #[test]
2595    fn test_distribution_management_extraction() {
2596        let temp_dir = TempDir::new().unwrap();
2597        let pom_path = temp_dir.path().join("pom.xml");
2598
2599        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2600<project xmlns="http://maven.apache.org/POM/4.0.0"
2601         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2602         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2603    <modelVersion>4.0.0</modelVersion>
2604    <groupId>com.example</groupId>
2605    <artifactId>test-app</artifactId>
2606    <version>1.0.0</version>
2607    <distributionManagement>
2608        <downloadUrl>https://example.com/downloads</downloadUrl>
2609        <repository>
2610            <id>releases</id>
2611            <name>Release Repository</name>
2612            <url>https://repo.example.com/releases</url>
2613            <layout>default</layout>
2614        </repository>
2615        <snapshotRepository>
2616            <id>snapshots</id>
2617            <name>Snapshot Repository</name>
2618            <url>https://repo.example.com/snapshots</url>
2619            <layout>default</layout>
2620        </snapshotRepository>
2621        <site>
2622            <id>site-deploy</id>
2623            <name>Project Site</name>
2624            <url>https://example.com/site</url>
2625        </site>
2626    </distributionManagement>
2627</project>"#;
2628
2629        fs::write(&pom_path, pom_content).unwrap();
2630
2631        let package_data = MavenParser::extract_first_package(&pom_path);
2632
2633        assert_eq!(package_data.name, Some("test-app".to_string()));
2634        assert_eq!(
2635            package_data.download_url,
2636            Some("https://example.com/downloads".to_string())
2637        );
2638
2639        let extra_data = package_data.extra_data.unwrap();
2640
2641        assert_eq!(
2642            extra_data.get("distribution_download_url"),
2643            Some(&serde_json::Value::String(
2644                "https://example.com/downloads".to_string()
2645            ))
2646        );
2647
2648        let repo = extra_data
2649            .get("distribution_repository")
2650            .unwrap()
2651            .as_object()
2652            .unwrap();
2653        assert_eq!(
2654            repo.get("id"),
2655            Some(&serde_json::Value::String("releases".to_string()))
2656        );
2657        assert_eq!(
2658            repo.get("name"),
2659            Some(&serde_json::Value::String("Release Repository".to_string()))
2660        );
2661        assert_eq!(
2662            repo.get("url"),
2663            Some(&serde_json::Value::String(
2664                "https://repo.example.com/releases".to_string()
2665            ))
2666        );
2667        assert_eq!(
2668            repo.get("layout"),
2669            Some(&serde_json::Value::String("default".to_string()))
2670        );
2671
2672        let snapshot_repo = extra_data
2673            .get("distribution_snapshot_repository")
2674            .unwrap()
2675            .as_object()
2676            .unwrap();
2677        assert_eq!(
2678            snapshot_repo.get("id"),
2679            Some(&serde_json::Value::String("snapshots".to_string()))
2680        );
2681        assert_eq!(
2682            snapshot_repo.get("name"),
2683            Some(&serde_json::Value::String(
2684                "Snapshot Repository".to_string()
2685            ))
2686        );
2687        assert_eq!(
2688            snapshot_repo.get("url"),
2689            Some(&serde_json::Value::String(
2690                "https://repo.example.com/snapshots".to_string()
2691            ))
2692        );
2693        assert_eq!(
2694            snapshot_repo.get("layout"),
2695            Some(&serde_json::Value::String("default".to_string()))
2696        );
2697
2698        let site = extra_data
2699            .get("distribution_site")
2700            .unwrap()
2701            .as_object()
2702            .unwrap();
2703        assert_eq!(
2704            site.get("id"),
2705            Some(&serde_json::Value::String("site-deploy".to_string()))
2706        );
2707        assert_eq!(
2708            site.get("name"),
2709            Some(&serde_json::Value::String("Project Site".to_string()))
2710        );
2711        assert_eq!(
2712            site.get("url"),
2713            Some(&serde_json::Value::String(
2714                "https://example.com/site".to_string()
2715            ))
2716        );
2717    }
2718}
2719
2720crate::register_parser!(
2721    "Apache Maven POM",
2722    &[
2723        "**/*.pom",
2724        "**/pom.xml",
2725        "**/pom.properties",
2726        "**/META-INF/MANIFEST.MF"
2727    ],
2728    "maven",
2729    "Java",
2730    Some("https://maven.apache.org/pom.html"),
2731);