Skip to main content

provenant/parsers/
maven.rs

1//! Parser for Apache Maven pom.xml files.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! Maven Project Object Model (POM) files.
5//!
6//! # Supported Formats
7//! - pom.xml (Project Object Model)
8//! - pom.properties
9//! - MANIFEST.MF (JAR manifest)
10//!
11//! # Key Features
12//! - Property value substitution (`${project.version}`)
13//! - `is_pinned` analysis (exact version vs ranges like `[1.0,2.0)`)
14//! - Dependency scope handling (compile, test, provided, runtime, system)
15//! - Package URL (purl) generation
16//! - Multiple license support (combined with " OR ")
17//!
18//! # Implementation Notes
19//! - Uses quick-xml for XML parsing
20//! - Version pinning: `"1.0.0"` is pinned, `"[1.0,2.0)"` is not
21//! - Property substitution limited to prevent infinite loops
22//! - Direct dependencies: all in pom.xml are direct
23
24use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25use crate::parsers::utils::read_file_to_string;
26use log::warn;
27use quick_xml::Reader;
28use quick_xml::events::Event;
29use std::collections::{HashMap, HashSet};
30use std::fs::File;
31use std::io::BufReader;
32use std::path::Path;
33
34use super::PackageParser;
35use super::license_normalization::{
36    DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
37    combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
38};
39
40#[derive(Clone, Default)]
41struct MavenDependencyData {
42    group_id: Option<String>,
43    artifact_id: Option<String>,
44    version: Option<String>,
45    classifier: Option<String>,
46    type_: Option<String>,
47    scope: Option<String>,
48    optional: Option<String>,
49    system_path: Option<String>,
50    message: Option<String>,
51}
52
53#[derive(Clone, Default)]
54struct MavenLicenseEntry {
55    name: Option<String>,
56    url: Option<String>,
57    comments: Option<String>,
58}
59
60/// Resolves Maven property placeholders (`${property.name}`) with cycle and DoS protection.
61///
62/// Maven properties can reference other properties, creating dependency graphs. This resolver:
63/// - Resolves nested placeholders: `${outer.${inner}}`
64/// - Detects circular references: `${a}` → `${b}` → `${a}`
65/// - Enforces depth limits to prevent stack overflow
66/// - Enforces substitution limits to prevent DoS on pathological inputs
67///
68/// # Algorithm
69///
70/// Uses byte-level parsing for efficient placeholder extraction. Tracks:
71/// - `resolving_set`: For cycle detection (hash set lookup)
72/// - `resolving_stack`: For error reporting (preserves path)
73/// - `cache`: Memoizes resolved values to avoid redundant work
74struct PropertyResolver {
75    raw: HashMap<String, String>,
76    builtins: HashMap<String, String>,
77    cache: HashMap<String, String>,
78    resolving_set: HashSet<String>,
79    resolving_stack: Vec<String>,
80    max_depth: usize,
81    max_output_len: usize,
82    max_substitutions: usize,
83    warned_keys: HashSet<String>,
84}
85
86impl PropertyResolver {
87    fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
88        Self {
89            raw,
90            builtins,
91            cache: HashMap::new(),
92            resolving_set: HashSet::new(),
93            resolving_stack: Vec::new(),
94            max_depth: 10,
95            max_output_len: 100_000,
96            max_substitutions: 1000,
97            warned_keys: HashSet::new(),
98        }
99    }
100
101    fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
102        if let Some(value) = self.cache.get(key) {
103            return Some(value.clone());
104        }
105
106        if depth >= self.max_depth {
107            self.warn_once(
108                "depth",
109                key,
110                format!("Maven property depth limit hit resolving {key}"),
111            );
112            return None;
113        }
114
115        if self.resolving_set.contains(key) {
116            self.warn_once(
117                "cycle",
118                key,
119                format!(
120                    "Maven property cycle detected at {key}: {:?}",
121                    self.resolving_stack
122                ),
123            );
124            return None;
125        }
126
127        let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
128            value.clone()
129        } else {
130            self.warn_once("missing", key, format!("Maven property missing key {key}"));
131            return None;
132        };
133
134        self.resolving_set.insert(key.to_string());
135        self.resolving_stack.push(key.to_string());
136
137        let resolved = self.resolve_text(&raw_val, depth + 1);
138
139        self.resolving_stack.pop();
140        self.resolving_set.remove(key);
141
142        self.cache.insert(key.to_string(), resolved.clone());
143        Some(resolved)
144    }
145
146    fn resolve_text(&mut self, text: &str, depth: usize) -> String {
147        if !text.contains("${") {
148            return text.to_string();
149        }
150
151        if depth >= self.max_depth {
152            warn!("Maven property depth limit hit resolving text");
153            return text.to_string();
154        }
155
156        let bytes = text.as_bytes();
157        let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
158        let mut index = 0;
159        let mut substitutions = 0;
160
161        while index < bytes.len() {
162            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
163                if substitutions >= self.max_substitutions {
164                    warn!("Maven property substitution limit hit resolving {text}");
165                    return text.to_string();
166                }
167
168                let placeholder_start = index;
169                let Some((content, closing_index)) =
170                    self.parse_placeholder_content(text, index + 2)
171                else {
172                    warn!("Maven property malformed placeholder in {text}");
173                    return text.to_string();
174                };
175
176                substitutions += 1;
177                let resolved_key = if content.contains("${") {
178                    self.resolve_text(content, depth + 1)
179                } else {
180                    content.to_string()
181                };
182
183                if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
184                    if output.len() + resolved.len() > self.max_output_len {
185                        warn!("Maven property output length limit hit resolving {text}");
186                        return text.to_string();
187                    }
188                    output.extend_from_slice(resolved.as_bytes());
189                } else {
190                    let placeholder_bytes = &bytes[placeholder_start..=closing_index];
191                    if output.len() + placeholder_bytes.len() > self.max_output_len {
192                        warn!("Maven property output length limit hit resolving {text}");
193                        return text.to_string();
194                    }
195                    output.extend_from_slice(placeholder_bytes);
196                }
197
198                index = closing_index + 1;
199                continue;
200            }
201
202            if output.len() + 1 > self.max_output_len {
203                warn!("Maven property output length limit hit resolving {text}");
204                return text.to_string();
205            }
206
207            output.push(bytes[index]);
208            index += 1;
209        }
210
211        String::from_utf8(output).unwrap_or_else(|_| text.to_string())
212    }
213
214    fn parse_placeholder_content<'a>(
215        &self,
216        text: &'a str,
217        start_index: usize,
218    ) -> Option<(&'a str, usize)> {
219        let bytes = text.as_bytes();
220        let mut index = start_index;
221        let mut depth = 0;
222
223        while index < bytes.len() {
224            if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
225                depth += 1;
226                index += 2;
227                continue;
228            }
229
230            if bytes[index] == b'}' {
231                if depth == 0 {
232                    return Some((&text[start_index..index], index));
233                }
234                depth -= 1;
235            }
236
237            index += 1;
238        }
239
240        None
241    }
242
243    fn warn_once(&mut self, kind: &str, key: &str, message: String) {
244        let token = format!("{kind}:{key}");
245        if self.warned_keys.insert(token) {
246            warn!("{message}");
247        }
248    }
249}
250
251fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
252    if let Some(current) = value.clone() {
253        *value = Some(resolver.resolve_text(&current, 0));
254    }
255}
256
257fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
258    for value in values.iter_mut() {
259        *value = resolver.resolve_text(value, 0);
260    }
261}
262
263fn resolve_map_strings(
264    resolver: &mut PropertyResolver,
265    values: &mut serde_json::Map<String, serde_json::Value>,
266) {
267    for value in values.values_mut() {
268        if let serde_json::Value::String(current) = value {
269            let resolved = resolver.resolve_text(current, 0);
270            *current = resolved;
271        }
272    }
273}
274
275fn resolve_maps(
276    resolver: &mut PropertyResolver,
277    values: &mut [serde_json::Map<String, serde_json::Value>],
278) {
279    for value in values.iter_mut() {
280        resolve_map_strings(resolver, value);
281    }
282}
283
284fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
285    resolve_option(resolver, &mut dependency.group_id);
286    resolve_option(resolver, &mut dependency.artifact_id);
287    resolve_option(resolver, &mut dependency.version);
288    resolve_option(resolver, &mut dependency.classifier);
289    resolve_option(resolver, &mut dependency.type_);
290    resolve_option(resolver, &mut dependency.scope);
291    resolve_option(resolver, &mut dependency.optional);
292    resolve_option(resolver, &mut dependency.system_path);
293    resolve_option(resolver, &mut dependency.message);
294}
295
296fn parse_maven_bool(value: Option<&str>) -> bool {
297    value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
298}
299
300fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
301    match packaging.map(str::trim).filter(|value| !value.is_empty()) {
302        Some(
303            "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
304            | "tar.gz" | "war" | "xar" | "zip",
305        ) => packaging.map(str::trim),
306        Some(_) => Some("jar"),
307        None => None,
308    }
309}
310
311fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
312    resolve_option(resolver, &mut license.name);
313    resolve_option(resolver, &mut license.url);
314    resolve_option(resolver, &mut license.comments);
315}
316
317fn build_maven_qualifiers(
318    classifier: Option<&str>,
319    packaging: Option<&str>,
320) -> Option<HashMap<String, String>> {
321    let mut qualifiers = HashMap::new();
322
323    if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
324        qualifiers.insert("classifier".to_string(), classifier.to_string());
325    }
326
327    if let Some(packaging) = normalize_maven_packaging(packaging)
328        .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
329    {
330        qualifiers.insert("type".to_string(), packaging.to_string());
331    }
332
333    (!qualifiers.is_empty()).then_some(qualifiers)
334}
335
336fn build_maven_purl(
337    group_id: &str,
338    artifact_id: &str,
339    version: Option<&str>,
340    classifier: Option<&str>,
341    packaging: Option<&str>,
342) -> String {
343    let mut purl = format!("pkg:maven/{group_id}/{artifact_id}");
344
345    if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
346        purl.push('@');
347        purl.push_str(version);
348    }
349
350    let qualifiers = build_maven_qualifiers(classifier, packaging);
351    if let Some(qualifiers) = qualifiers {
352        let mut query_parts = Vec::new();
353        if let Some(classifier) = qualifiers.get("classifier") {
354            query_parts.push(format!("classifier={classifier}"));
355        }
356        if let Some(type_) = qualifiers.get("type") {
357            query_parts.push(format!("type={type_}"));
358        }
359
360        if !query_parts.is_empty() {
361            purl.push('?');
362            purl.push_str(&query_parts.join("&"));
363        }
364    }
365
366    purl
367}
368
369fn build_maven_download_url(
370    group_id: &str,
371    artifact_id: &str,
372    version: &str,
373    classifier: Option<&str>,
374    packaging: Option<&str>,
375) -> String {
376    const BASE_URL: &str = "https://repo1.maven.org/maven2";
377    let group_path = group_id.replace('.', "/");
378    let extension = normalize_maven_packaging(packaging)
379        .filter(|value| *value != "pom")
380        .unwrap_or("jar");
381    let classifier_suffix = classifier
382        .map(str::trim)
383        .filter(|value| !value.is_empty())
384        .map(|value| format!("-{value}"))
385        .unwrap_or_default();
386
387    format!(
388        "{}/{}/{}/{}/{}-{}{}.{}",
389        BASE_URL,
390        group_path,
391        artifact_id,
392        version,
393        artifact_id,
394        version,
395        classifier_suffix,
396        extension
397    )
398}
399
400fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
401    build_maven_purl(namespace, name, Some(version), Some("sources"), None)
402}
403
404fn has_unresolved_template_coordinates(
405    namespace: Option<&str>,
406    name: Option<&str>,
407    version: Option<&str>,
408) -> bool {
409    const TEMPLATE_PLACEHOLDERS: &[&str] = &[
410        "${groupId}",
411        "${artifactId}",
412        "${version}",
413        "${package}",
414        "${packageName}",
415    ];
416
417    [namespace, name, version]
418        .into_iter()
419        .flatten()
420        .map(str::trim)
421        .any(|value| TEMPLATE_PLACEHOLDERS.contains(&value))
422}
423
424fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
425    let rendered_entries: Vec<String> = licenses
426        .iter()
427        .filter_map(|license| {
428            let mut lines = Vec::new();
429
430            if let Some(name) = license
431                .name
432                .as_ref()
433                .filter(|value| !value.trim().is_empty())
434            {
435                lines.push(format!("    name: {name}"));
436            }
437            if let Some(url) = license
438                .url
439                .as_ref()
440                .filter(|value| !value.trim().is_empty())
441            {
442                lines.push(format!("    url: {url}"));
443            }
444            if let Some(comments) = license
445                .comments
446                .as_ref()
447                .filter(|value| !value.trim().is_empty())
448            {
449                lines.push(format!("    comments: {comments}"));
450            }
451
452            (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
453        })
454        .collect();
455
456    if rendered_entries.is_empty() {
457        None
458    } else {
459        Some(format!("{}\n", rendered_entries.join("\n")))
460    }
461}
462
463fn is_license_like_comment(comment: &str) -> bool {
464    let lowered = comment.to_ascii_lowercase();
465    [
466        "license",
467        "licensed",
468        "copyright",
469        "spdx",
470        "apache",
471        "mit",
472        "bsd",
473        "gpl",
474        "lgpl",
475        "mozilla public",
476        "eclipse public",
477    ]
478    .iter()
479    .any(|marker| lowered.contains(marker))
480}
481
482fn dependency_extra_data(
483    dependency: &MavenDependencyData,
484) -> Option<HashMap<String, serde_json::Value>> {
485    let mut extra_data = HashMap::new();
486
487    if let Some(classifier) = dependency
488        .classifier
489        .as_ref()
490        .filter(|value| !value.trim().is_empty())
491    {
492        extra_data.insert(
493            "classifier".to_string(),
494            serde_json::Value::String(classifier.clone()),
495        );
496    }
497    if let Some(type_) = dependency
498        .type_
499        .as_ref()
500        .filter(|value| !value.trim().is_empty())
501    {
502        extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
503    }
504    if let Some(system_path) = dependency
505        .system_path
506        .as_ref()
507        .filter(|value| !value.trim().is_empty())
508    {
509        extra_data.insert(
510            "system_path".to_string(),
511            serde_json::Value::String(system_path.clone()),
512        );
513    }
514    if let Some(message) = dependency
515        .message
516        .as_ref()
517        .filter(|value| !value.trim().is_empty())
518    {
519        extra_data.insert(
520            "message".to_string(),
521            serde_json::Value::String(message.clone()),
522        );
523    }
524
525    (!extra_data.is_empty()).then_some(extra_data)
526}
527
528fn dependency_management_entry_to_value(
529    dependency: &MavenDependencyData,
530) -> serde_json::Map<String, serde_json::Value> {
531    let mut dep_obj = serde_json::Map::new();
532
533    if let Some(group_id) = dependency.group_id.as_ref() {
534        dep_obj.insert(
535            "groupId".to_string(),
536            serde_json::Value::String(group_id.clone()),
537        );
538    }
539    if let Some(artifact_id) = dependency.artifact_id.as_ref() {
540        dep_obj.insert(
541            "artifactId".to_string(),
542            serde_json::Value::String(artifact_id.clone()),
543        );
544    }
545    if let Some(version) = dependency.version.as_ref() {
546        dep_obj.insert(
547            "version".to_string(),
548            serde_json::Value::String(version.clone()),
549        );
550    }
551    if let Some(scope) = dependency.scope.as_ref() {
552        dep_obj.insert(
553            "scope".to_string(),
554            serde_json::Value::String(scope.clone()),
555        );
556    }
557    if let Some(type_) = dependency.type_.as_ref() {
558        dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
559    }
560    if let Some(classifier) = dependency.classifier.as_ref() {
561        dep_obj.insert(
562            "classifier".to_string(),
563            serde_json::Value::String(classifier.clone()),
564        );
565    }
566    if let Some(optional) = dependency.optional.as_deref() {
567        dep_obj.insert(
568            "optional".to_string(),
569            serde_json::Value::Bool(parse_maven_bool(Some(optional))),
570        );
571    }
572    if let Some(message) = dependency.message.as_ref() {
573        dep_obj.insert(
574            "message".to_string(),
575            serde_json::Value::String(message.clone()),
576        );
577    }
578
579    dep_obj
580}
581
582fn maven_dependency_to_dependency(
583    dependency_data: &MavenDependencyData,
584    fallback_scope: Option<&str>,
585    force_non_runtime: bool,
586) -> Option<Dependency> {
587    let group_id = dependency_data.group_id.as_ref()?;
588    let artifact_id = dependency_data.artifact_id.as_ref()?;
589    let version = dependency_data.version.clone();
590    let scope = dependency_data
591        .scope
592        .clone()
593        .or_else(|| fallback_scope.map(str::to_string));
594    let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
595
596    let (is_runtime, is_optional) = if force_non_runtime {
597        (Some(false), Some(explicit_optional))
598    } else {
599        match scope.as_deref() {
600            Some("test") | Some("provided") => (Some(false), Some(true)),
601            Some(_) => (Some(true), Some(explicit_optional)),
602            None => (None, Some(explicit_optional)),
603        }
604    };
605
606    Some(Dependency {
607        purl: Some(build_maven_purl(
608            group_id,
609            artifact_id,
610            version.as_deref(),
611            dependency_data.classifier.as_deref(),
612            dependency_data.type_.as_deref(),
613        )),
614        extracted_requirement: version.clone(),
615        scope,
616        is_runtime,
617        is_optional,
618        is_pinned: version.as_deref().map(is_maven_version_pinned),
619        is_direct: Some(true),
620        resolved_package: None,
621        extra_data: dependency_extra_data(dependency_data),
622    })
623}
624
625/// Determines if a Maven version specifier is pinned to an exact version.
626///
627/// A version is considered pinned if it specifies an exact version without
628/// range syntax or dynamic keywords. Examples:
629/// - Pinned: "1.0.0", "1.2.3"
630/// - NOT pinned: "[1.0.0,2.0.0)" (range), "[1.0.0,)" (open-ended), "LATEST", "RELEASE"
631fn is_maven_version_pinned(version_str: &str) -> bool {
632    let trimmed = version_str.trim();
633
634    // Empty version is not pinned
635    if trimmed.is_empty() {
636        return false;
637    }
638
639    // Check for range syntax (brackets and parentheses)
640    if trimmed.contains('[')
641        || trimmed.contains(']')
642        || trimmed.contains('(')
643        || trimmed.contains(')')
644    {
645        return false;
646    }
647
648    // Check for dynamic version keywords
649    if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
650        return false;
651    }
652
653    // If none of the unpinned indicators are present, it's pinned
654    true
655}
656
657fn build_builtin_properties(
658    namespace: &Option<String>,
659    name: &Option<String>,
660    version: &Option<String>,
661    parent_group_id: &Option<String>,
662    parent_version: &Option<String>,
663    project_name: &Option<String>,
664    project_packaging: &Option<String>,
665) -> HashMap<String, String> {
666    let mut builtins = HashMap::new();
667    let effective_group_id = namespace.clone().or_else(|| parent_group_id.clone());
668    let effective_version = version.clone().or_else(|| parent_version.clone());
669
670    if let Some(group_id) = effective_group_id.clone() {
671        builtins.insert("project.groupId".to_string(), group_id.clone());
672        builtins.insert("pom.groupId".to_string(), group_id);
673    }
674
675    if let Some(artifact_id) = name.clone() {
676        builtins.insert("project.artifactId".to_string(), artifact_id.clone());
677        builtins.insert("pom.artifactId".to_string(), artifact_id);
678    }
679
680    if let Some(ver) = effective_version.clone() {
681        builtins.insert("project.version".to_string(), ver.clone());
682        builtins.insert("pom.version".to_string(), ver);
683    }
684
685    if let Some(group_id) = parent_group_id.clone() {
686        builtins.insert("project.parent.groupId".to_string(), group_id);
687    }
688
689    if let Some(ver) = parent_version.clone() {
690        builtins.insert("project.parent.version".to_string(), ver);
691    }
692
693    if let Some(packaging) = project_packaging.clone() {
694        builtins.insert("project.packaging".to_string(), packaging);
695    }
696
697    if let Some(name) = project_name.clone() {
698        builtins.insert("project.name".to_string(), name);
699    }
700
701    builtins
702}
703
704/// Maven package parser supporting pom.xml, pom.properties, and MANIFEST.MF files.
705///
706/// Handles Maven property resolution (`${property.name}` syntax) with cycle detection
707/// and depth limits. See `PropertyResolver` for property substitution algorithm details.
708pub struct MavenParser;
709
710impl PackageParser for MavenParser {
711    const PACKAGE_TYPE: PackageType = PackageType::Maven;
712
713    fn extract_packages(path: &Path) -> Vec<PackageData> {
714        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
715            if filename == "pom.properties" {
716                return vec![parse_pom_properties(path)];
717            } else if filename == "MANIFEST.MF" {
718                return vec![parse_manifest_mf(path)];
719            }
720        }
721
722        let file = match File::open(path) {
723            Ok(f) => f,
724            Err(e) => {
725                warn!("Failed to open pom.xml at {:?}: {}", path, e);
726                return vec![default_package_data()];
727            }
728        };
729
730        let mut reader = Reader::from_reader(BufReader::new(file));
731        reader.config_mut().trim_text(true);
732
733        let mut buf = Vec::new();
734        let mut package_data = default_package_data();
735        package_data.package_type = Some(Self::PACKAGE_TYPE);
736        package_data.primary_language = Some("Java".to_string());
737        package_data.datasource_id = Some(DatasourceId::MavenPom);
738
739        let mut current_element = Vec::new();
740        let mut in_dependencies = false;
741        let mut current_dependency: Option<Dependency> = None;
742        let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
743        let mut current_dependency_data: Option<MavenDependencyData> = None;
744
745        let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
746        let mut xml_license_comments: Vec<String> = Vec::new();
747        let mut current_license: Option<MavenLicenseEntry> = None;
748        let mut inception_year = None;
749        let mut scm_connection = None;
750        let mut scm_developer_connection = None;
751        let mut scm_url = None;
752        let mut scm_tag = None;
753        let mut organization_name = None;
754        let mut organization_url = None;
755        let mut in_developers = false;
756        let mut in_contributors = false;
757        let mut current_party: Option<Party> = None;
758        let mut issue_management_system = None;
759        let mut issue_management_url = None;
760        let mut ci_management_system = None;
761        let mut ci_management_url = None;
762        let mut in_distribution_management = false;
763        let mut in_dist_repository = false;
764        let mut in_dist_snapshot_repository = false;
765        let mut in_dist_site = false;
766        let mut dist_download_url = None;
767        let mut dist_repository_id = None;
768        let mut dist_repository_name = None;
769        let mut dist_repository_url = None;
770        let mut dist_repository_layout = None;
771        let mut dist_snapshot_repository_id = None;
772        let mut dist_snapshot_repository_name = None;
773        let mut dist_snapshot_repository_url = None;
774        let mut dist_snapshot_repository_layout = None;
775        let mut dist_site_id = None;
776        let mut dist_site_name = None;
777        let mut dist_site_url = None;
778        let mut in_repositories = false;
779        let mut in_plugin_repositories = false;
780        let mut in_repository = false;
781        let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
782        let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
783        let mut current_repository_id = None;
784        let mut current_repository_name = None;
785        let mut current_repository_url = None;
786        let mut in_modules = false;
787        let mut modules: Vec<String> = Vec::new();
788        let mut in_mailing_lists = false;
789        let mut in_mailing_list = false;
790        let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
791        let mut current_mailing_list_name = None;
792        let mut current_mailing_list_subscribe = None;
793        let mut current_mailing_list_unsubscribe = None;
794        let mut current_mailing_list_post = None;
795        let mut current_mailing_list_archive = None;
796        let mut in_dependency_management = false;
797        let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
798        let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
799        let mut in_dep_mgmt_dependency = false;
800        let mut in_parent = false;
801        let mut parent_group_id = None;
802        let mut parent_artifact_id = None;
803        let mut parent_version = None;
804        let mut parent_relative_path = None;
805        let mut in_properties = false;
806        let mut properties: HashMap<String, String> = HashMap::new();
807        let mut project_name = None;
808        let mut project_description = None;
809        let mut project_packaging = None;
810        let mut project_classifier = None;
811        let mut in_relocation = false;
812        let mut relocation = MavenDependencyData::default();
813
814        loop {
815            match reader.read_event_into(&mut buf) {
816                Ok(Event::Start(e)) => {
817                    let element_name = e.name().as_ref().to_vec();
818                    current_element.push(element_name.clone());
819
820                    match element_name.as_slice() {
821                        b"parent" => in_parent = true,
822                        b"dependencyManagement" => in_dependency_management = true,
823                        b"dependencies" if in_dependency_management => {}
824                        b"dependencies" => in_dependencies = true,
825                        b"dependency" if in_dependency_management => {
826                            in_dep_mgmt_dependency = true;
827                            current_dep_mgmt_dependency = Some(MavenDependencyData::default());
828                        }
829                        b"dependency" if in_dependencies => {
830                            current_dependency = Some(Dependency {
831                                purl: None,
832                                extracted_requirement: None,
833                                scope: None,
834                                is_runtime: None,
835                                is_optional: Some(false),
836                                is_pinned: None,
837                                is_direct: Some(true),
838                                resolved_package: None,
839                                extra_data: None,
840                            });
841                            current_dependency_data = Some(MavenDependencyData::default());
842                        }
843                        b"properties" => in_properties = true,
844                        b"developers" => in_developers = true,
845                        b"developer" if in_developers => {
846                            current_party = Some(Party {
847                                r#type: Some("person".to_string()),
848                                role: Some("developer".to_string()),
849                                name: None,
850                                email: None,
851                                url: None,
852                                organization: None,
853                                organization_url: None,
854                                timezone: None,
855                            });
856                        }
857                        b"contributors" => in_contributors = true,
858                        b"contributor" if in_contributors => {
859                            current_party = Some(Party {
860                                r#type: Some("person".to_string()),
861                                role: Some("contributor".to_string()),
862                                name: None,
863                                email: None,
864                                url: None,
865                                organization: None,
866                                organization_url: None,
867                                timezone: None,
868                            });
869                        }
870                        b"license" => current_license = Some(MavenLicenseEntry::default()),
871                        b"distributionManagement" => in_distribution_management = true,
872                        b"relocation" if in_distribution_management => {
873                            in_relocation = true;
874                            relocation = MavenDependencyData::default();
875                        }
876                        b"repository" if in_distribution_management => in_dist_repository = true,
877                        b"snapshotRepository" if in_distribution_management => {
878                            in_dist_snapshot_repository = true
879                        }
880                        b"site" if in_distribution_management => in_dist_site = true,
881                        b"repositories" => in_repositories = true,
882                        b"pluginRepositories" => in_plugin_repositories = true,
883                        b"repository" if in_repositories && !in_distribution_management => {
884                            in_repository = true;
885                            current_repository_id = None;
886                            current_repository_name = None;
887                            current_repository_url = None;
888                        }
889                        b"pluginRepository" if in_plugin_repositories => {
890                            in_repository = true;
891                            current_repository_id = None;
892                            current_repository_name = None;
893                            current_repository_url = None;
894                        }
895                        b"modules" => in_modules = true,
896                        b"mailingLists" => in_mailing_lists = true,
897                        b"mailingList" if in_mailing_lists => {
898                            in_mailing_list = true;
899                            current_mailing_list_name = None;
900                            current_mailing_list_subscribe = None;
901                            current_mailing_list_unsubscribe = None;
902                            current_mailing_list_post = None;
903                            current_mailing_list_archive = None;
904                        }
905                        _ => {}
906                    }
907                }
908                Ok(Event::Text(e)) => {
909                    let text = e.decode().unwrap_or_default().to_string();
910                    let current_path = current_element.last().map(|v| v.as_slice());
911
912                    if in_properties
913                        && current_element.len() >= 2
914                        && current_element[current_element.len() - 2] == b"properties"
915                    {
916                        if let Some(property_name) = current_element
917                            .last()
918                            .and_then(|name| std::str::from_utf8(name).ok())
919                        {
920                            properties.insert(property_name.to_string(), text);
921                        } else {
922                            warn!("Failed to decode Maven property name in {:?}", path);
923                        }
924                    } else if in_dep_mgmt_dependency {
925                        if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
926                            match current_path {
927                                Some(b"groupId") => dep_mgmt.group_id = Some(text),
928                                Some(b"artifactId") => dep_mgmt.artifact_id = Some(text),
929                                Some(b"version") => dep_mgmt.version = Some(text),
930                                Some(b"scope") => dep_mgmt.scope = Some(text),
931                                Some(b"type") => dep_mgmt.type_ = Some(text),
932                                Some(b"classifier") => dep_mgmt.classifier = Some(text),
933                                Some(b"optional") => dep_mgmt.optional = Some(text),
934                                _ => {}
935                            }
936                        }
937                    } else if let Some(license) = &mut current_license {
938                        match current_path {
939                            Some(b"name") => license.name = Some(text),
940                            Some(b"url") => license.url = Some(text),
941                            Some(b"comments") => license.comments = Some(text),
942                            _ => {}
943                        }
944                    } else if let Some(party) = &mut current_party {
945                        match current_path {
946                            Some(b"name") => party.name = Some(text),
947                            Some(b"email") => party.email = Some(text),
948                            Some(b"url") => party.url = Some(text),
949                            Some(b"organization") => party.organization = Some(text),
950                            Some(b"organizationUrl") => party.organization_url = Some(text),
951                            Some(b"timezone") => party.timezone = Some(text),
952                            _ => {}
953                        }
954                    } else if let Some(dep) = &mut current_dependency {
955                        match current_path {
956                            Some(b"groupId") => {
957                                if let Some(coords) = current_dependency_data.as_mut() {
958                                    coords.group_id = Some(text);
959                                }
960                            }
961                            Some(b"artifactId") => {
962                                if let Some(coords) = current_dependency_data.as_mut() {
963                                    coords.artifact_id = Some(text);
964                                }
965                            }
966                            Some(b"version") => {
967                                if let Some(coords) = current_dependency_data.as_mut() {
968                                    coords.version = Some(text);
969                                }
970                            }
971                            Some(b"scope") => {
972                                dep.scope = Some(text.clone());
973                                dep.is_optional = Some(text == "test" || text == "provided");
974                                dep.is_runtime = Some(text != "test" && text != "provided");
975                                if let Some(coords) = current_dependency_data.as_mut() {
976                                    coords.scope = Some(text);
977                                }
978                            }
979                            Some(b"optional") => {
980                                if let Some(coords) = current_dependency_data.as_mut() {
981                                    coords.optional = Some(text);
982                                }
983                            }
984                            Some(b"type") => {
985                                if let Some(coords) = current_dependency_data.as_mut() {
986                                    coords.type_ = Some(text);
987                                }
988                            }
989                            Some(b"classifier") => {
990                                if let Some(coords) = current_dependency_data.as_mut() {
991                                    coords.classifier = Some(text);
992                                }
993                            }
994                            Some(b"systemPath") => {
995                                if let Some(coords) = current_dependency_data.as_mut() {
996                                    coords.system_path = Some(text);
997                                }
998                            }
999                            _ => {}
1000                        }
1001                    } else if in_relocation {
1002                        match current_path {
1003                            Some(b"groupId") => relocation.group_id = Some(text),
1004                            Some(b"artifactId") => relocation.artifact_id = Some(text),
1005                            Some(b"version") => relocation.version = Some(text),
1006                            Some(b"classifier") => relocation.classifier = Some(text),
1007                            Some(b"type") => relocation.type_ = Some(text),
1008                            Some(b"message") => relocation.message = Some(text),
1009                            _ => {}
1010                        }
1011                    } else if in_parent {
1012                        match current_path {
1013                            Some(b"groupId") => {
1014                                parent_group_id = Some(text);
1015                            }
1016                            Some(b"artifactId") => {
1017                                parent_artifact_id = Some(text);
1018                            }
1019                            Some(b"version") => {
1020                                parent_version = Some(text);
1021                            }
1022                            Some(b"relativePath") => {
1023                                parent_relative_path = Some(text);
1024                            }
1025                            _ => {}
1026                        }
1027                    } else {
1028                        match current_path {
1029                            Some(b"groupId") if current_element.len() == 2 => {
1030                                package_data.namespace = Some(text)
1031                            }
1032                            Some(b"artifactId") if current_element.len() == 2 => {
1033                                package_data.name = Some(text)
1034                            }
1035                            Some(b"version") if current_element.len() == 2 => {
1036                                package_data.version = Some(text)
1037                            }
1038                            Some(b"name") if current_element.len() == 2 => {
1039                                project_name = Some(text)
1040                            }
1041                            Some(b"description") if current_element.len() == 2 => {
1042                                project_description = Some(text)
1043                            }
1044                            Some(b"packaging") if current_element.len() == 2 => {
1045                                project_packaging = Some(text)
1046                            }
1047                            Some(b"classifier") if current_element.len() == 2 => {
1048                                project_classifier = Some(text)
1049                            }
1050                            Some(b"url") if current_element.len() == 2 => {
1051                                package_data.homepage_url = Some(text)
1052                            }
1053                            Some(b"inceptionYear") if current_element.len() == 2 => {
1054                                inception_year = Some(text)
1055                            }
1056                            Some(b"connection")
1057                                if current_element.len() >= 3
1058                                    && current_element[current_element.len() - 2] == b"scm" =>
1059                            {
1060                                scm_connection = if text.starts_with("scm:git:") {
1061                                    Some(text.replacen("scm:git:", "git+", 1))
1062                                } else if text.starts_with("scm:") {
1063                                    Some(text.replacen("scm:", "", 1))
1064                                } else {
1065                                    Some(text)
1066                                };
1067                            }
1068                            Some(b"developerConnection")
1069                                if current_element.len() >= 3
1070                                    && current_element[current_element.len() - 2] == b"scm" =>
1071                            {
1072                                scm_developer_connection = if text.starts_with("scm:git:") {
1073                                    Some(text.replacen("scm:git:", "git+", 1))
1074                                } else if text.starts_with("scm:") {
1075                                    Some(text.replacen("scm:", "", 1))
1076                                } else {
1077                                    Some(text)
1078                                };
1079                            }
1080                            Some(b"url")
1081                                if current_element.len() >= 3
1082                                    && current_element[current_element.len() - 2] == b"scm" =>
1083                            {
1084                                scm_url = Some(text);
1085                            }
1086                            Some(b"tag")
1087                                if current_element.len() >= 3
1088                                    && current_element[current_element.len() - 2] == b"scm" =>
1089                            {
1090                                scm_tag = Some(text);
1091                            }
1092                            Some(b"name")
1093                                if current_element.len() >= 2
1094                                    && current_element[current_element.len() - 2]
1095                                        == b"organization" =>
1096                            {
1097                                organization_name = Some(text);
1098                            }
1099                            Some(b"url")
1100                                if current_element.len() >= 2
1101                                    && current_element[current_element.len() - 2]
1102                                        == b"organization" =>
1103                            {
1104                                organization_url = Some(text);
1105                            }
1106                            Some(b"system")
1107                                if current_element.len() >= 2
1108                                    && current_element[current_element.len() - 2]
1109                                        == b"issueManagement" =>
1110                            {
1111                                issue_management_system = Some(text);
1112                            }
1113                            Some(b"url")
1114                                if current_element.len() >= 2
1115                                    && current_element[current_element.len() - 2]
1116                                        == b"issueManagement" =>
1117                            {
1118                                issue_management_url = Some(text);
1119                            }
1120                            Some(b"system")
1121                                if current_element.len() >= 2
1122                                    && current_element[current_element.len() - 2]
1123                                        == b"ciManagement" =>
1124                            {
1125                                ci_management_system = Some(text);
1126                            }
1127                            Some(b"url")
1128                                if current_element.len() >= 2
1129                                    && current_element[current_element.len() - 2]
1130                                        == b"ciManagement" =>
1131                            {
1132                                ci_management_url = Some(text);
1133                            }
1134                            Some(b"downloadUrl")
1135                                if current_element.len() >= 2
1136                                    && current_element[current_element.len() - 2]
1137                                        == b"distributionManagement" =>
1138                            {
1139                                dist_download_url = Some(text);
1140                            }
1141                            Some(b"id") if in_dist_repository => {
1142                                dist_repository_id = Some(text);
1143                            }
1144                            Some(b"name") if in_dist_repository => {
1145                                dist_repository_name = Some(text);
1146                            }
1147                            Some(b"url") if in_dist_repository => {
1148                                dist_repository_url = Some(text);
1149                            }
1150                            Some(b"layout") if in_dist_repository => {
1151                                dist_repository_layout = Some(text);
1152                            }
1153                            Some(b"id") if in_dist_snapshot_repository => {
1154                                dist_snapshot_repository_id = Some(text);
1155                            }
1156                            Some(b"name") if in_dist_snapshot_repository => {
1157                                dist_snapshot_repository_name = Some(text);
1158                            }
1159                            Some(b"url") if in_dist_snapshot_repository => {
1160                                dist_snapshot_repository_url = Some(text);
1161                            }
1162                            Some(b"layout") if in_dist_snapshot_repository => {
1163                                dist_snapshot_repository_layout = Some(text);
1164                            }
1165                            Some(b"id") if in_dist_site => {
1166                                dist_site_id = Some(text);
1167                            }
1168                            Some(b"name") if in_dist_site => {
1169                                dist_site_name = Some(text);
1170                            }
1171                            Some(b"url") if in_dist_site => {
1172                                dist_site_url = Some(text);
1173                            }
1174                            Some(b"id") if in_repository => {
1175                                current_repository_id = Some(text);
1176                            }
1177                            Some(b"name") if in_repository => {
1178                                current_repository_name = Some(text);
1179                            }
1180                            Some(b"url") if in_repository => {
1181                                current_repository_url = Some(text);
1182                            }
1183                            Some(b"module") if in_modules => {
1184                                modules.push(text);
1185                            }
1186                            Some(b"name") if in_mailing_list => {
1187                                current_mailing_list_name = Some(text);
1188                            }
1189                            Some(b"subscribe") if in_mailing_list => {
1190                                current_mailing_list_subscribe = Some(text);
1191                            }
1192                            Some(b"unsubscribe") if in_mailing_list => {
1193                                current_mailing_list_unsubscribe = Some(text);
1194                            }
1195                            Some(b"post") if in_mailing_list => {
1196                                current_mailing_list_post = Some(text);
1197                            }
1198                            Some(b"archive") if in_mailing_list => {
1199                                current_mailing_list_archive = Some(text);
1200                            }
1201                            _ => {}
1202                        }
1203                    }
1204                }
1205                Ok(Event::Comment(e)) => {
1206                    let comment = e.decode().unwrap_or_default().trim().to_string();
1207                    if current_element.is_empty()
1208                        && !comment.is_empty()
1209                        && is_license_like_comment(&comment)
1210                    {
1211                        xml_license_comments.push(comment);
1212                    }
1213                }
1214                Ok(Event::End(e)) => {
1215                    if !current_element.is_empty() {
1216                        current_element.pop();
1217                    }
1218
1219                    match e.name().as_ref() {
1220                        b"parent" => in_parent = false,
1221                        b"dependencyManagement" => in_dependency_management = false,
1222                        b"dependencies" => in_dependencies = false,
1223                        b"dependency" if in_dep_mgmt_dependency => {
1224                            in_dep_mgmt_dependency = false;
1225                            if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1226                                && (dep_mgmt.group_id.is_some()
1227                                    || dep_mgmt.artifact_id.is_some()
1228                                    || dep_mgmt.version.is_some())
1229                            {
1230                                dependency_management_entries.push(dep_mgmt);
1231                            }
1232                        }
1233                        b"dependency" => {
1234                            if let (Some(dep), Some(coords)) =
1235                                (current_dependency.take(), current_dependency_data.take())
1236                            {
1237                                package_data.dependencies.push(dep);
1238                                dependency_data.push(coords);
1239                            } else if let Some(dep) = current_dependency.take() {
1240                                package_data.dependencies.push(dep);
1241                            }
1242                        }
1243                        b"license" => {
1244                            if let Some(license) = current_license.take()
1245                                && (license.name.is_some()
1246                                    || license.url.is_some()
1247                                    || license.comments.is_some())
1248                            {
1249                                licenses.push(license);
1250                            }
1251                        }
1252                        b"developers" => in_developers = false,
1253                        b"developer" => {
1254                            if let Some(party) = current_party.take() {
1255                                package_data.parties.push(party);
1256                            }
1257                        }
1258                        b"contributors" => in_contributors = false,
1259                        b"contributor" => {
1260                            if let Some(party) = current_party.take() {
1261                                package_data.parties.push(party);
1262                            }
1263                        }
1264                        b"distributionManagement" => in_distribution_management = false,
1265                        b"relocation" => in_relocation = false,
1266                        b"repository" if !in_dependencies && in_distribution_management => {
1267                            in_dist_repository = false
1268                        }
1269                        b"repository" if !in_dependencies && in_repositories => {
1270                            in_repository = false;
1271                            if current_repository_id.is_some()
1272                                || current_repository_name.is_some()
1273                                || current_repository_url.is_some()
1274                            {
1275                                let mut repo = serde_json::Map::new();
1276                                if let Some(id) = current_repository_id.take() {
1277                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1278                                }
1279                                if let Some(name) = current_repository_name.take() {
1280                                    repo.insert(
1281                                        "name".to_string(),
1282                                        serde_json::Value::String(name),
1283                                    );
1284                                }
1285                                if let Some(url) = current_repository_url.take() {
1286                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1287                                }
1288                                repositories.push(repo);
1289                            }
1290                        }
1291                        b"pluginRepository" if in_plugin_repositories => {
1292                            in_repository = false;
1293                            if current_repository_id.is_some()
1294                                || current_repository_name.is_some()
1295                                || current_repository_url.is_some()
1296                            {
1297                                let mut repo = serde_json::Map::new();
1298                                if let Some(id) = current_repository_id.take() {
1299                                    repo.insert("id".to_string(), serde_json::Value::String(id));
1300                                }
1301                                if let Some(name) = current_repository_name.take() {
1302                                    repo.insert(
1303                                        "name".to_string(),
1304                                        serde_json::Value::String(name),
1305                                    );
1306                                }
1307                                if let Some(url) = current_repository_url.take() {
1308                                    repo.insert("url".to_string(), serde_json::Value::String(url));
1309                                }
1310                                plugin_repositories.push(repo);
1311                            }
1312                        }
1313                        b"repositories" => in_repositories = false,
1314                        b"properties" => in_properties = false,
1315                        b"pluginRepositories" => in_plugin_repositories = false,
1316                        b"modules" => in_modules = false,
1317                        b"mailingLists" => in_mailing_lists = false,
1318                        b"mailingList" => {
1319                            in_mailing_list = false;
1320                            if current_mailing_list_name.is_some()
1321                                || current_mailing_list_subscribe.is_some()
1322                                || current_mailing_list_unsubscribe.is_some()
1323                                || current_mailing_list_post.is_some()
1324                                || current_mailing_list_archive.is_some()
1325                            {
1326                                let mut ml = serde_json::Map::new();
1327                                if let Some(name) = current_mailing_list_name.take() {
1328                                    ml.insert("name".to_string(), serde_json::Value::String(name));
1329                                }
1330                                if let Some(subscribe) = current_mailing_list_subscribe.take() {
1331                                    ml.insert(
1332                                        "subscribe".to_string(),
1333                                        serde_json::Value::String(subscribe),
1334                                    );
1335                                }
1336                                if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1337                                    ml.insert(
1338                                        "unsubscribe".to_string(),
1339                                        serde_json::Value::String(unsubscribe),
1340                                    );
1341                                }
1342                                if let Some(post) = current_mailing_list_post.take() {
1343                                    ml.insert("post".to_string(), serde_json::Value::String(post));
1344                                }
1345                                if let Some(archive) = current_mailing_list_archive.take() {
1346                                    ml.insert(
1347                                        "archive".to_string(),
1348                                        serde_json::Value::String(archive),
1349                                    );
1350                                }
1351                                mailing_lists.push(ml);
1352                            }
1353                        }
1354                        b"snapshotRepository" => in_dist_snapshot_repository = false,
1355                        b"site" => in_dist_site = false,
1356                        _ => {}
1357                    }
1358                }
1359                Ok(Event::Eof) => break,
1360                Err(e) => {
1361                    warn!("Error parsing pom.xml at {:?}: {}", path, e);
1362                    return vec![package_data];
1363                }
1364                _ => {}
1365            }
1366            buf.clear();
1367        }
1368
1369        let builtins = build_builtin_properties(
1370            &package_data.namespace,
1371            &package_data.name,
1372            &package_data.version,
1373            &parent_group_id,
1374            &parent_version,
1375            &project_name,
1376            &project_packaging,
1377        );
1378        let mut resolver = PropertyResolver::new(properties, builtins);
1379
1380        resolve_option(&mut resolver, &mut package_data.namespace);
1381        resolve_option(&mut resolver, &mut package_data.name);
1382        resolve_option(&mut resolver, &mut package_data.version);
1383        resolve_option(&mut resolver, &mut package_data.homepage_url);
1384        resolve_option(&mut resolver, &mut inception_year);
1385        resolve_option(&mut resolver, &mut scm_connection);
1386        resolve_option(&mut resolver, &mut scm_developer_connection);
1387        resolve_option(&mut resolver, &mut scm_url);
1388        resolve_option(&mut resolver, &mut scm_tag);
1389        resolve_option(&mut resolver, &mut organization_name);
1390        resolve_option(&mut resolver, &mut organization_url);
1391        resolve_option(&mut resolver, &mut issue_management_system);
1392        resolve_option(&mut resolver, &mut issue_management_url);
1393        resolve_option(&mut resolver, &mut ci_management_system);
1394        resolve_option(&mut resolver, &mut ci_management_url);
1395        resolve_option(&mut resolver, &mut dist_download_url);
1396        resolve_option(&mut resolver, &mut dist_repository_id);
1397        resolve_option(&mut resolver, &mut dist_repository_name);
1398        resolve_option(&mut resolver, &mut dist_repository_url);
1399        resolve_option(&mut resolver, &mut dist_repository_layout);
1400        resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1401        resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1402        resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1403        resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1404        resolve_option(&mut resolver, &mut dist_site_id);
1405        resolve_option(&mut resolver, &mut dist_site_name);
1406        resolve_option(&mut resolver, &mut dist_site_url);
1407        resolve_option(&mut resolver, &mut parent_group_id);
1408        resolve_option(&mut resolver, &mut parent_artifact_id);
1409        resolve_option(&mut resolver, &mut parent_version);
1410        resolve_option(&mut resolver, &mut parent_relative_path);
1411        resolve_option(&mut resolver, &mut project_name);
1412        resolve_option(&mut resolver, &mut project_description);
1413        resolve_option(&mut resolver, &mut project_packaging);
1414        resolve_option(&mut resolver, &mut project_classifier);
1415        resolve_vec(&mut resolver, &mut modules);
1416        resolve_maps(&mut resolver, &mut repositories);
1417        resolve_maps(&mut resolver, &mut plugin_repositories);
1418        resolve_maps(&mut resolver, &mut mailing_lists);
1419        for comment in &mut xml_license_comments {
1420            *comment = resolver.resolve_text(comment, 0);
1421        }
1422        for dependency in &mut dependency_management_entries {
1423            resolve_dependency_data(&mut resolver, dependency);
1424        }
1425        resolve_dependency_data(&mut resolver, &mut relocation);
1426        for license in &mut licenses {
1427            resolve_license_entry(&mut resolver, license);
1428        }
1429        for comment in xml_license_comments {
1430            if !comment.trim().is_empty() {
1431                licenses.push(MavenLicenseEntry {
1432                    comments: Some(comment),
1433                    ..Default::default()
1434                });
1435            }
1436        }
1437
1438        for (dependency, coords) in package_data
1439            .dependencies
1440            .iter_mut()
1441            .zip(dependency_data.iter_mut())
1442        {
1443            resolve_dependency_data(&mut resolver, coords);
1444            dependency.scope = coords.scope.clone();
1445            dependency.extracted_requirement = coords.version.clone();
1446            dependency.extra_data = dependency_extra_data(coords);
1447            dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1448
1449            match dependency.scope.as_deref() {
1450                Some("test") | Some("provided") => {
1451                    dependency.is_runtime = Some(false);
1452                    dependency.is_optional = Some(true);
1453                }
1454                Some(_) => {
1455                    dependency.is_runtime = Some(true);
1456                }
1457                None => {
1458                    dependency.is_runtime = None;
1459                }
1460            }
1461
1462            if let Some(version) = &coords.version {
1463                dependency.is_pinned = Some(is_maven_version_pinned(version));
1464            }
1465
1466            if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1467                dependency.purl = Some(build_maven_purl(
1468                    group_id,
1469                    artifact_id,
1470                    coords.version.as_deref(),
1471                    coords.classifier.as_deref(),
1472                    coords.type_.as_deref(),
1473                ));
1474            }
1475        }
1476
1477        if package_data.namespace.is_none() {
1478            package_data.namespace = parent_group_id.clone();
1479        }
1480        if package_data.version.is_none() {
1481            package_data.version = parent_version.clone();
1482        }
1483
1484        package_data.qualifiers =
1485            build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1486
1487        package_data.description = match (
1488            project_name.as_deref().filter(|value| !value.is_empty()),
1489            project_description
1490                .as_deref()
1491                .filter(|value| !value.is_empty()),
1492        ) {
1493            (Some(name), Some(description)) if name == description => Some(name.to_string()),
1494            (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1495            (Some(name), None) => Some(name.to_string()),
1496            (None, Some(description)) => Some(description.to_string()),
1497            (None, None) => None,
1498        };
1499
1500        if path.to_string_lossy().contains("META-INF/maven/") {
1501            let path_str = path.to_string_lossy();
1502            if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1503                let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1504                let parts: Vec<&str> = after_maven.split('/').collect();
1505                if parts.len() >= 2 {
1506                    if package_data.namespace.is_none() {
1507                        package_data.namespace = Some(parts[0].to_string());
1508                    }
1509                    if package_data.name.is_none() {
1510                        package_data.name = Some(parts[1].to_string());
1511                    }
1512                }
1513            }
1514        }
1515
1516        if has_unresolved_template_coordinates(
1517            package_data.namespace.as_deref(),
1518            package_data.name.as_deref(),
1519            package_data.version.as_deref(),
1520        ) {
1521            warn!("Skipping Maven template coordinates in {:?}", path);
1522            return vec![default_package_data()];
1523        }
1524
1525        // Construct PURL from parsed data
1526        if let (Some(group_id), Some(artifact_id), Some(version)) = (
1527            &package_data.namespace,
1528            &package_data.name,
1529            &package_data.version,
1530        ) {
1531            package_data.purl = Some(build_maven_purl(
1532                group_id,
1533                artifact_id,
1534                Some(version),
1535                project_classifier.as_deref(),
1536                project_packaging.as_deref(),
1537            ));
1538            if project_classifier.is_none() {
1539                package_data
1540                    .source_packages
1541                    .push(build_maven_source_package(group_id, artifact_id, version));
1542            }
1543        }
1544
1545        if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1546            package_data.repository_homepage_url = build_maven_url(
1547                &package_data.namespace,
1548                &package_data.name,
1549                &package_data.version,
1550                None,
1551            );
1552
1553            package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1554                build_maven_download_url(
1555                    group_id,
1556                    artifact_id,
1557                    ver,
1558                    project_classifier.as_deref(),
1559                    project_packaging.as_deref(),
1560                )
1561            });
1562
1563            if let Some(ver) = &package_data.version {
1564                let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1565                package_data.api_data_url = build_maven_url(
1566                    &package_data.namespace,
1567                    &package_data.name,
1568                    &package_data.version,
1569                    Some(&pom_filename),
1570                );
1571            }
1572        }
1573
1574        package_data.vcs_url = scm_connection
1575            .or_else(|| scm_developer_connection.clone())
1576            .or_else(|| scm_url.clone());
1577
1578        // Set code_view_url from scm/url (human-browseable URL)
1579        if let Some(url) = &scm_url {
1580            package_data.code_view_url = Some(url.clone());
1581        }
1582
1583        // Set bug_tracking_url from issueManagement/url
1584        if let Some(url) = &issue_management_url {
1585            package_data.bug_tracking_url = Some(url.clone());
1586        }
1587
1588        // Map downloadUrl to download_url field
1589        if let Some(url) = &dist_download_url {
1590            package_data.download_url = Some(url.clone());
1591        }
1592
1593        if organization_name.is_some() || organization_url.is_some() {
1594            package_data.parties.push(Party {
1595                r#type: Some("organization".to_string()),
1596                role: Some("owner".to_string()),
1597                name: organization_name.clone(),
1598                email: None,
1599                url: organization_url.clone(),
1600                organization: None,
1601                organization_url: None,
1602                timezone: None,
1603            });
1604        }
1605
1606        for dependency in &dependency_management_entries {
1607            let fallback_scope = if dependency.scope.as_deref() == Some("import") {
1608                Some("import")
1609            } else {
1610                Some("dependencymanagement")
1611            };
1612
1613            if let Some(converted) =
1614                maven_dependency_to_dependency(dependency, fallback_scope, true)
1615            {
1616                package_data.dependencies.push(converted);
1617            }
1618        }
1619
1620        if (relocation.group_id.is_some()
1621            || relocation.artifact_id.is_some()
1622            || relocation.version.is_some())
1623            && let Some(converted) =
1624                maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1625        {
1626            package_data.dependencies.push(converted);
1627        }
1628
1629        if inception_year.is_some()
1630            || organization_name.is_some()
1631            || organization_url.is_some()
1632            || scm_tag.is_some()
1633            || scm_developer_connection.is_some()
1634            || issue_management_system.is_some()
1635            || ci_management_system.is_some()
1636            || ci_management_url.is_some()
1637            || dist_download_url.is_some()
1638            || dist_repository_id.is_some()
1639            || dist_snapshot_repository_id.is_some()
1640            || dist_site_id.is_some()
1641            || !repositories.is_empty()
1642            || !plugin_repositories.is_empty()
1643            || !modules.is_empty()
1644            || !mailing_lists.is_empty()
1645            || !dependency_management_entries.is_empty()
1646            || parent_group_id.is_some()
1647            || relocation.group_id.is_some()
1648            || relocation.artifact_id.is_some()
1649            || relocation.version.is_some()
1650            || relocation.message.is_some()
1651        {
1652            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1653            if let Some(year) = inception_year {
1654                extra_data.insert(
1655                    "inception_year".to_string(),
1656                    serde_json::Value::String(year),
1657                );
1658            }
1659            if let Some(name) = organization_name {
1660                extra_data.insert(
1661                    "organization_name".to_string(),
1662                    serde_json::Value::String(name),
1663                );
1664            }
1665            if let Some(url) = organization_url {
1666                extra_data.insert(
1667                    "organization_url".to_string(),
1668                    serde_json::Value::String(url),
1669                );
1670            }
1671            if let Some(tag) = scm_tag {
1672                extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1673            }
1674            if let Some(dev_conn) = scm_developer_connection {
1675                extra_data.insert(
1676                    "scm_developer_connection".to_string(),
1677                    serde_json::Value::String(dev_conn),
1678                );
1679            }
1680            if let Some(system) = issue_management_system {
1681                extra_data.insert(
1682                    "issue_tracking_system".to_string(),
1683                    serde_json::Value::String(system),
1684                );
1685            }
1686            if let Some(system) = ci_management_system {
1687                extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1688            }
1689            if let Some(url) = ci_management_url {
1690                extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1691            }
1692
1693            // Add distribution management data
1694            if let Some(url) = dist_download_url {
1695                extra_data.insert(
1696                    "distribution_download_url".to_string(),
1697                    serde_json::Value::String(url),
1698                );
1699            }
1700
1701            // Build repository object
1702            if dist_repository_id.is_some()
1703                || dist_repository_name.is_some()
1704                || dist_repository_url.is_some()
1705                || dist_repository_layout.is_some()
1706            {
1707                let mut repo = serde_json::Map::new();
1708                if let Some(id) = dist_repository_id {
1709                    repo.insert("id".to_string(), serde_json::Value::String(id));
1710                }
1711                if let Some(name) = dist_repository_name {
1712                    repo.insert("name".to_string(), serde_json::Value::String(name));
1713                }
1714                if let Some(url) = dist_repository_url {
1715                    repo.insert("url".to_string(), serde_json::Value::String(url));
1716                }
1717                if let Some(layout) = dist_repository_layout {
1718                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1719                }
1720                extra_data.insert(
1721                    "distribution_repository".to_string(),
1722                    serde_json::Value::Object(repo),
1723                );
1724            }
1725
1726            // Build snapshotRepository object
1727            if dist_snapshot_repository_id.is_some()
1728                || dist_snapshot_repository_name.is_some()
1729                || dist_snapshot_repository_url.is_some()
1730                || dist_snapshot_repository_layout.is_some()
1731            {
1732                let mut repo = serde_json::Map::new();
1733                if let Some(id) = dist_snapshot_repository_id {
1734                    repo.insert("id".to_string(), serde_json::Value::String(id));
1735                }
1736                if let Some(name) = dist_snapshot_repository_name {
1737                    repo.insert("name".to_string(), serde_json::Value::String(name));
1738                }
1739                if let Some(url) = dist_snapshot_repository_url {
1740                    repo.insert("url".to_string(), serde_json::Value::String(url));
1741                }
1742                if let Some(layout) = dist_snapshot_repository_layout {
1743                    repo.insert("layout".to_string(), serde_json::Value::String(layout));
1744                }
1745                extra_data.insert(
1746                    "distribution_snapshot_repository".to_string(),
1747                    serde_json::Value::Object(repo),
1748                );
1749            }
1750
1751            // Build site object
1752            if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1753                let mut site = serde_json::Map::new();
1754                if let Some(id) = dist_site_id {
1755                    site.insert("id".to_string(), serde_json::Value::String(id));
1756                }
1757                if let Some(name) = dist_site_name {
1758                    site.insert("name".to_string(), serde_json::Value::String(name));
1759                }
1760                if let Some(url) = dist_site_url {
1761                    site.insert("url".to_string(), serde_json::Value::String(url));
1762                }
1763                extra_data.insert(
1764                    "distribution_site".to_string(),
1765                    serde_json::Value::Object(site),
1766                );
1767            }
1768
1769            if !repositories.is_empty() {
1770                extra_data.insert(
1771                    "repositories".to_string(),
1772                    serde_json::Value::Array(
1773                        repositories
1774                            .into_iter()
1775                            .map(serde_json::Value::Object)
1776                            .collect(),
1777                    ),
1778                );
1779            }
1780
1781            if !plugin_repositories.is_empty() {
1782                extra_data.insert(
1783                    "plugin_repositories".to_string(),
1784                    serde_json::Value::Array(
1785                        plugin_repositories
1786                            .into_iter()
1787                            .map(serde_json::Value::Object)
1788                            .collect(),
1789                    ),
1790                );
1791            }
1792
1793            if !modules.is_empty() {
1794                extra_data.insert(
1795                    "modules".to_string(),
1796                    serde_json::Value::Array(
1797                        modules.into_iter().map(serde_json::Value::String).collect(),
1798                    ),
1799                );
1800            }
1801
1802            if !mailing_lists.is_empty() {
1803                extra_data.insert(
1804                    "mailing_lists".to_string(),
1805                    serde_json::Value::Array(
1806                        mailing_lists
1807                            .into_iter()
1808                            .map(serde_json::Value::Object)
1809                            .collect(),
1810                    ),
1811                );
1812            }
1813
1814            if !dependency_management_entries.is_empty() {
1815                extra_data.insert(
1816                    "dependency_management".to_string(),
1817                    serde_json::Value::Array(
1818                        dependency_management_entries
1819                            .into_iter()
1820                            .map(|dependency| {
1821                                serde_json::Value::Object(dependency_management_entry_to_value(
1822                                    &dependency,
1823                                ))
1824                            })
1825                            .collect(),
1826                    ),
1827                );
1828            }
1829
1830            if relocation.group_id.is_some()
1831                || relocation.artifact_id.is_some()
1832                || relocation.version.is_some()
1833                || relocation.message.is_some()
1834            {
1835                extra_data.insert(
1836                    "relocation".to_string(),
1837                    serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1838                );
1839            }
1840
1841            if parent_group_id.is_some()
1842                || parent_artifact_id.is_some()
1843                || parent_version.is_some()
1844                || parent_relative_path.is_some()
1845            {
1846                let mut parent_obj = serde_json::Map::new();
1847                if let Some(group_id) = parent_group_id {
1848                    parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1849                }
1850                if let Some(artifact_id) = parent_artifact_id {
1851                    parent_obj.insert(
1852                        "artifactId".to_string(),
1853                        serde_json::Value::String(artifact_id),
1854                    );
1855                }
1856                if let Some(version) = parent_version {
1857                    parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1858                }
1859                if let Some(relative_path) = parent_relative_path {
1860                    parent_obj.insert(
1861                        "relativePath".to_string(),
1862                        serde_json::Value::String(relative_path),
1863                    );
1864                }
1865                extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
1866            }
1867
1868            package_data.extra_data = Some(extra_data);
1869        }
1870
1871        package_data.extracted_license_statement = build_license_statement(&licenses);
1872        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1873            build_maven_declared_license_data(
1874                &licenses,
1875                package_data.extracted_license_statement.as_deref(),
1876            );
1877        package_data.declared_license_expression = declared_license_expression;
1878        package_data.declared_license_expression_spdx = declared_license_expression_spdx;
1879        package_data.license_detections = license_detections;
1880
1881        vec![package_data]
1882    }
1883
1884    fn is_match(path: &Path) -> bool {
1885        if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
1886            filename == "pom.xml"
1887                || filename == "pom.properties"
1888                || filename == "MANIFEST.MF"
1889                || filename.ends_with(".pom")
1890        } else {
1891            false
1892        }
1893    }
1894}
1895
1896fn build_maven_url(
1897    group_id: &Option<String>,
1898    artifact_id: &Option<String>,
1899    version: &Option<String>,
1900    filename: Option<&str>,
1901) -> Option<String> {
1902    const BASE_URL: &str = "https://repo1.maven.org/maven2";
1903
1904    let group_id = group_id.as_ref()?;
1905    let artifact_id = artifact_id.as_ref()?;
1906
1907    let group_path = group_id.replace('.', "/");
1908    let filename_str = filename.unwrap_or("");
1909
1910    let url = if let Some(ver) = version {
1911        format!(
1912            "{}/{}/{}/{}/{}",
1913            BASE_URL, group_path, artifact_id, ver, filename_str
1914        )
1915    } else {
1916        format!(
1917            "{}/{}/{}/{}",
1918            BASE_URL, group_path, artifact_id, filename_str
1919        )
1920    };
1921
1922    Some(url)
1923}
1924
1925fn build_maven_declared_license_data(
1926    licenses: &[MavenLicenseEntry],
1927    matched_text: Option<&str>,
1928) -> (
1929    Option<String>,
1930    Option<String>,
1931    Vec<crate::models::LicenseDetection>,
1932) {
1933    let normalized: Vec<_> = licenses
1934        .iter()
1935        .filter_map(|license| license.name.as_deref())
1936        .filter_map(normalize_maven_license_name)
1937        .collect();
1938
1939    if normalized.is_empty() {
1940        return empty_declared_license_data();
1941    }
1942
1943    let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
1944        return empty_declared_license_data();
1945    };
1946
1947    build_declared_license_data(
1948        combined,
1949        DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
1950    )
1951}
1952
1953fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
1954    match name.trim() {
1955        "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
1956            "public-domain",
1957            "LicenseRef-provenant-public-domain",
1958        )),
1959        other => normalize_declared_license_key(other),
1960    }
1961}
1962
1963/// Parse pom.properties file (Java properties format)
1964fn parse_pom_properties(path: &Path) -> PackageData {
1965    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
1966        Ok(content) => content,
1967        Err(e) => {
1968            warn!("Failed to read pom.properties at {:?}: {}", path, e);
1969            return PackageData {
1970                package_type: Some(PackageType::Maven),
1971                primary_language: Some("Java".to_string()),
1972                datasource_id: Some(DatasourceId::MavenPomProperties),
1973                ..Default::default()
1974            };
1975        }
1976    };
1977
1978    let mut package_data = default_package_data();
1979    package_data.package_type = Some(PackageType::Maven);
1980    package_data.primary_language = Some("Java".to_string());
1981    package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
1982
1983    let mut group_id: Option<String> = None;
1984    let mut artifact_id: Option<String> = None;
1985    let mut version: Option<String> = None;
1986
1987    // Parse Java properties format
1988    let mut continuation = String::new();
1989
1990    for line in content.lines() {
1991        let current_line = if continuation.is_empty() {
1992            line.to_string()
1993        } else {
1994            format!("{}{}", continuation, line)
1995        };
1996        continuation.clear();
1997
1998        // Check for line continuation (backslash at end)
1999        if current_line.ends_with('\\') {
2000            continuation = current_line[..current_line.len() - 1].to_string();
2001            continue;
2002        }
2003
2004        // Skip comments and empty lines
2005        let trimmed = current_line.trim();
2006        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2007            continue;
2008        }
2009
2010        // Parse key=value
2011        if let Some(eq_pos) = current_line.find('=') {
2012            let key = current_line[..eq_pos].trim();
2013            let value = current_line[eq_pos + 1..].trim();
2014
2015            match key {
2016                "groupId" => group_id = Some(value.to_string()),
2017                "artifactId" => artifact_id = Some(value.to_string()),
2018                "version" => version = Some(value.to_string()),
2019                _ => {}
2020            }
2021        }
2022    }
2023
2024    package_data.namespace = group_id.clone();
2025    package_data.name = artifact_id.clone();
2026    package_data.version = version.clone();
2027
2028    // Generate PURL
2029    if let (Some(group_id), Some(artifact_id), Some(version)) = (
2030        &package_data.namespace,
2031        &package_data.name,
2032        &package_data.version,
2033    ) {
2034        package_data.purl = Some(format!(
2035            "pkg:maven/{}/{}@{}",
2036            group_id, artifact_id, version
2037        ));
2038    }
2039
2040    package_data
2041}
2042
2043/// Parse MANIFEST.MF file (JAR manifest format)
2044///
2045/// Detects and handles both regular JAR manifests and OSGi bundle manifests.
2046/// If Bundle-SymbolicName is present, treats the manifest as an OSGi bundle
2047/// and extracts OSGi-specific metadata including Import-Package and Require-Bundle
2048/// dependencies.
2049fn parse_manifest_mf(path: &Path) -> PackageData {
2050    let content = match read_file_to_string(path).map_err(|e| e.to_string()) {
2051        Ok(content) => content,
2052        Err(e) => {
2053            warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2054            return default_package_data();
2055        }
2056    };
2057
2058    let mut package_data = default_package_data();
2059
2060    // Parse manifest headers (RFC822-style with space continuations)
2061    let mut headers: Vec<(String, String)> = Vec::new();
2062    let mut current_key: Option<String> = None;
2063    let mut current_value = String::new();
2064
2065    for line in content.lines() {
2066        if line.starts_with(' ') || line.starts_with('\t') {
2067            // Continuation line
2068            current_value.push_str(line.trim());
2069        } else if let Some(colon_pos) = line.find(':') {
2070            // Save previous header
2071            if let Some(key) = current_key.take() {
2072                headers.push((key, current_value.trim().to_string()));
2073                current_value.clear();
2074            }
2075
2076            // Start new header
2077            let key = line[..colon_pos].trim().to_string();
2078            let value = line[colon_pos + 1..].trim().to_string();
2079            current_key = Some(key);
2080            current_value = value;
2081        }
2082    }
2083
2084    // Save last header
2085    if let Some(key) = current_key {
2086        headers.push((key, current_value.trim().to_string()));
2087    }
2088
2089    // Convert headers to HashMap for easier lookup
2090    let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2091
2092    // Check if this is an OSGi bundle by looking for Bundle-SymbolicName
2093    let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2094    let is_osgi = bundle_symbolic_name.is_some();
2095
2096    if is_osgi {
2097        // OSGi bundle - extract OSGi-specific metadata
2098        package_data.package_type = Some(PackageType::Osgi);
2099        package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2100
2101        // Bundle-SymbolicName is the canonical name for OSGi bundles
2102        // Strip directives after semicolon: "org.example.bundle;singleton:=true" -> "org.example.bundle"
2103        if let Some(bsn) = bundle_symbolic_name {
2104            let name = if let Some(semicolon_pos) = bsn.find(';') {
2105                bsn[..semicolon_pos].trim().to_string()
2106            } else {
2107                bsn.clone()
2108            };
2109            package_data.name = Some(name);
2110        }
2111
2112        // Bundle-Version
2113        package_data.version = headers_map.get("Bundle-Version").cloned();
2114
2115        // Bundle-Description takes priority over Bundle-Name for description
2116        if let Some(desc) = headers_map.get("Bundle-Description") {
2117            package_data.description = Some(desc.clone());
2118        } else if let Some(name) = headers_map.get("Bundle-Name") {
2119            package_data.description = Some(name.clone());
2120        }
2121
2122        // Bundle-Vendor
2123        if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2124            package_data.parties.push(Party {
2125                r#type: Some("organization".to_string()),
2126                role: Some("vendor".to_string()),
2127                name: Some(vendor.clone()),
2128                email: None,
2129                url: None,
2130                organization: None,
2131                organization_url: None,
2132                timezone: None,
2133            });
2134        }
2135
2136        // Bundle-DocURL
2137        package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2138
2139        // Bundle-License
2140        package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2141
2142        // Import-Package -> dependencies with scope "import"
2143        if let Some(import_pkg) = headers_map.get("Import-Package") {
2144            let deps = parse_osgi_package_list(import_pkg, "import");
2145            package_data.dependencies.extend(deps);
2146        }
2147
2148        // Require-Bundle -> dependencies with scope "require-bundle"
2149        if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2150            let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2151            package_data.dependencies.extend(deps);
2152        }
2153
2154        // Export-Package -> store in extra_data
2155        if let Some(export_pkg) = headers_map.get("Export-Package") {
2156            let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2157            extra_data.insert(
2158                "export_packages".to_string(),
2159                serde_json::Value::String(export_pkg.clone()),
2160            );
2161            package_data.extra_data = Some(extra_data);
2162        }
2163
2164        // Build OSGi PURL: pkg:osgi/{bundle_symbolic_name}@{bundle_version}
2165        if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2166            package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2167        }
2168    } else {
2169        // Regular JAR manifest
2170        package_data.package_type = Some(PackageType::Maven);
2171        package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2172
2173        // Extract fields with priority order for non-OSGi JARs
2174        let mut name: Option<String> = None;
2175        let mut version: Option<String> = None;
2176        let mut vendor: Option<String> = None;
2177
2178        for (key, value) in &headers {
2179            match key.as_str() {
2180                "Bundle-Name" if name.is_none() => {
2181                    name = Some(value.clone());
2182                }
2183                "Implementation-Title" if name.is_none() => {
2184                    name = Some(value.clone());
2185                }
2186                "Bundle-Version" if version.is_none() => {
2187                    version = Some(value.clone());
2188                }
2189                "Implementation-Version" if version.is_none() => {
2190                    version = Some(value.clone());
2191                }
2192                "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2193                    vendor = Some(value.clone());
2194                }
2195                _ => {}
2196            }
2197        }
2198
2199        package_data.name = name;
2200        package_data.version = version;
2201
2202        // Add vendor to parties if present
2203        if let Some(vendor_name) = vendor {
2204            package_data.parties.push(Party {
2205                r#type: Some("organization".to_string()),
2206                role: Some("vendor".to_string()),
2207                name: Some(vendor_name),
2208                email: None,
2209                url: None,
2210                organization: None,
2211                organization_url: None,
2212                timezone: None,
2213            });
2214        }
2215
2216        // Try to extract groupId from path (META-INF/maven/{groupId}/{artifactId}/)
2217        if let Some(path_str) = path.to_str()
2218            && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2219        {
2220            let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2221            let parts: Vec<&str> = after_maven.split('/').collect();
2222            if parts.len() >= 2 {
2223                package_data.namespace = Some(parts[0].to_string());
2224            }
2225        }
2226
2227        // Generate Maven PURL if we have enough information
2228        if let (Some(group_id), Some(artifact_id), Some(version)) = (
2229            &package_data.namespace,
2230            &package_data.name,
2231            &package_data.version,
2232        ) {
2233            package_data.purl = Some(format!(
2234                "pkg:maven/{}/{}@{}",
2235                group_id, artifact_id, version
2236            ));
2237        }
2238    }
2239
2240    package_data
2241}
2242
2243/// Parse OSGi Import-Package header into dependencies.
2244///
2245/// Format: comma-separated list of packages with optional directives:
2246/// "org.osgi.framework;version=\"[1.6,2)\",javax.servlet;version=\"[3.0,4)\""
2247pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2248    let mut dependencies = Vec::new();
2249
2250    // Split by comma, but be careful not to split within quoted strings
2251    for package_entry in split_osgi_list(package_list) {
2252        let package_entry = package_entry.trim();
2253        if package_entry.is_empty() {
2254            continue;
2255        }
2256
2257        // Extract package name (before first semicolon)
2258        let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2259            package_entry[..semicolon_pos].trim()
2260        } else {
2261            package_entry
2262        };
2263
2264        if package_name.is_empty() {
2265            continue;
2266        }
2267
2268        // Extract version directive if present
2269        let version_requirement = extract_osgi_version(package_entry);
2270        let is_optional = package_entry.contains("resolution:=optional");
2271
2272        dependencies.push(Dependency {
2273            purl: Some(format!("pkg:osgi/{}", package_name)),
2274            extracted_requirement: version_requirement,
2275            scope: Some(scope.to_string()),
2276            is_runtime: Some(true),
2277            is_optional: Some(is_optional),
2278            is_pinned: None,
2279            is_direct: Some(true),
2280            resolved_package: None,
2281            extra_data: None,
2282        });
2283    }
2284
2285    dependencies
2286}
2287
2288/// Parse OSGi Require-Bundle header into dependencies.
2289///
2290/// Format: comma-separated list of bundle symbolic names with optional directives:
2291/// "org.eclipse.core.runtime;bundle-version=\"3.7.0\",org.eclipse.ui;resolution:=optional"
2292pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2293    let mut dependencies = Vec::new();
2294
2295    for bundle_entry in split_osgi_list(bundle_list) {
2296        let bundle_entry = bundle_entry.trim();
2297        if bundle_entry.is_empty() {
2298            continue;
2299        }
2300
2301        // Extract bundle symbolic name (before first semicolon)
2302        let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2303            bundle_entry[..semicolon_pos].trim()
2304        } else {
2305            bundle_entry
2306        };
2307
2308        if bundle_name.is_empty() {
2309            continue;
2310        }
2311
2312        // Extract bundle-version directive if present
2313        let version_requirement = extract_osgi_bundle_version(bundle_entry);
2314
2315        // Check if optional
2316        let is_optional = bundle_entry.contains("resolution:=optional");
2317
2318        dependencies.push(Dependency {
2319            purl: Some(format!("pkg:osgi/{}", bundle_name)),
2320            extracted_requirement: version_requirement,
2321            scope: Some(scope.to_string()),
2322            is_runtime: Some(!is_optional),
2323            is_optional: Some(is_optional),
2324            is_pinned: None,
2325            is_direct: Some(true),
2326            resolved_package: None,
2327            extra_data: None,
2328        });
2329    }
2330
2331    dependencies
2332}
2333
2334/// Split OSGi comma-separated list, respecting quoted strings.
2335///
2336/// OSGi headers can contain commas within quoted strings:
2337/// "foo;version=\"[1.0,2.0)\",bar;version=\"3.0\""
2338pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2339    let mut result = Vec::new();
2340    let mut current = String::new();
2341    let mut in_quotes = false;
2342
2343    for ch in list.chars() {
2344        match ch {
2345            '"' => {
2346                in_quotes = !in_quotes;
2347                current.push(ch);
2348            }
2349            ',' if !in_quotes => {
2350                if !current.trim().is_empty() {
2351                    result.push(current.trim().to_string());
2352                }
2353                current.clear();
2354            }
2355            _ => {
2356                current.push(ch);
2357            }
2358        }
2359    }
2360
2361    if !current.trim().is_empty() {
2362        result.push(current.trim().to_string());
2363    }
2364
2365    result
2366}
2367
2368fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2369    let needle = format!("{}=", directive);
2370    let version_pos = entry.find(&needle)?;
2371    let after_value = &entry[version_pos + needle.len()..];
2372
2373    if let Some(stripped) = after_value.strip_prefix('"') {
2374        stripped.find('"').map(|end| stripped[..end].to_string())
2375    } else {
2376        let end = after_value.find(';').unwrap_or(after_value.len());
2377        Some(after_value[..end].trim().to_string())
2378    }
2379}
2380
2381pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2382    extract_osgi_directive(entry, "version")
2383}
2384
2385pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2386    extract_osgi_directive(entry, "bundle-version")
2387}
2388
2389fn default_package_data() -> PackageData {
2390    PackageData {
2391        package_type: Some(PackageType::Maven),
2392        datasource_id: Some(DatasourceId::MavenPom),
2393        ..Default::default()
2394    }
2395}
2396
2397#[cfg(test)]
2398mod tests {
2399    use super::*;
2400    use std::fs;
2401    use tempfile::TempDir;
2402
2403    #[test]
2404    fn test_organization_extraction() {
2405        let temp_dir = TempDir::new().unwrap();
2406        let pom_path = temp_dir.path().join("pom.xml");
2407
2408        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2409<project>
2410    <modelVersion>4.0.0</modelVersion>
2411    <groupId>com.example</groupId>
2412    <artifactId>my-app</artifactId>
2413    <version>1.0.0</version>
2414    <organization>
2415        <name>Example Corporation</name>
2416        <url>https://example.com</url>
2417    </organization>
2418</project>"#;
2419
2420        fs::write(&pom_path, pom_content).unwrap();
2421
2422        let package_data = MavenParser::extract_first_package(&pom_path);
2423
2424        assert_eq!(package_data.name, Some("my-app".to_string()));
2425        assert_eq!(package_data.namespace, Some("com.example".to_string()));
2426        assert_eq!(package_data.version, Some("1.0.0".to_string()));
2427
2428        let extra_data = package_data.extra_data.unwrap();
2429        assert_eq!(
2430            extra_data.get("organization_name"),
2431            Some(&serde_json::Value::String(
2432                "Example Corporation".to_string()
2433            ))
2434        );
2435        assert_eq!(
2436            extra_data.get("organization_url"),
2437            Some(&serde_json::Value::String(
2438                "https://example.com".to_string()
2439            ))
2440        );
2441    }
2442
2443    #[test]
2444    fn test_scm_metadata_extraction() {
2445        let temp_dir = TempDir::new().unwrap();
2446        let pom_path = temp_dir.path().join("pom.xml");
2447
2448        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2449<project xmlns="http://maven.apache.org/POM/4.0.0"
2450         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2451         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2452    <modelVersion>4.0.0</modelVersion>
2453    <groupId>org.springframework.boot</groupId>
2454    <artifactId>spring-boot-starter-web</artifactId>
2455    <version>3.0.0</version>
2456    <scm>
2457        <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2458        <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2459        <url>https://github.com/spring-projects/spring-boot</url>
2460        <tag>v3.0.0</tag>
2461    </scm>
2462</project>"#;
2463
2464        fs::write(&pom_path, pom_content).unwrap();
2465
2466        let package_data = MavenParser::extract_first_package(&pom_path);
2467
2468        assert_eq!(
2469            package_data.name,
2470            Some("spring-boot-starter-web".to_string())
2471        );
2472        assert_eq!(
2473            package_data.namespace,
2474            Some("org.springframework.boot".to_string())
2475        );
2476        assert_eq!(package_data.version, Some("3.0.0".to_string()));
2477
2478        assert_eq!(
2479            package_data.code_view_url,
2480            Some("https://github.com/spring-projects/spring-boot".to_string())
2481        );
2482
2483        // vcs_url prefers connection over developerConnection
2484        assert_eq!(
2485            package_data.vcs_url,
2486            Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2487        );
2488
2489        let extra_data = package_data.extra_data.unwrap();
2490        assert_eq!(
2491            extra_data.get("scm_tag"),
2492            Some(&serde_json::Value::String("v3.0.0".to_string()))
2493        );
2494        // developerConnection stored separately in extra_data
2495        assert_eq!(
2496            extra_data.get("scm_developer_connection"),
2497            Some(&serde_json::Value::String(
2498                "git+git@github.com:spring-projects/spring-boot.git".to_string()
2499            ))
2500        );
2501    }
2502
2503    #[test]
2504    fn test_developers_and_contributors_extraction() {
2505        let temp_dir = TempDir::new().unwrap();
2506        let pom_path = temp_dir.path().join("pom.xml");
2507
2508        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2509<project xmlns="http://maven.apache.org/POM/4.0.0"
2510         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2511         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2512    <modelVersion>4.0.0</modelVersion>
2513    <groupId>com.example</groupId>
2514    <artifactId>test-app</artifactId>
2515    <version>1.0.0</version>
2516    <developers>
2517        <developer>
2518            <id>jdoe</id>
2519            <name>John Doe</name>
2520            <email>john@example.com</email>
2521            <url>https://example.com/jdoe</url>
2522            <organization>Example Corp</organization>
2523            <organizationUrl>https://example.com</organizationUrl>
2524            <timezone>America/New_York</timezone>
2525        </developer>
2526        <developer>
2527            <name>Jane Smith</name>
2528            <email>jane@example.com</email>
2529        </developer>
2530    </developers>
2531    <contributors>
2532        <contributor>
2533            <name>Bob Wilson</name>
2534            <email>bob@example.com</email>
2535            <url>https://example.com/bob</url>
2536        </contributor>
2537    </contributors>
2538</project>"#;
2539
2540        fs::write(&pom_path, pom_content).unwrap();
2541
2542        let package_data = MavenParser::extract_first_package(&pom_path);
2543
2544        assert_eq!(package_data.name, Some("test-app".to_string()));
2545        assert_eq!(package_data.parties.len(), 3);
2546
2547        let dev1 = &package_data.parties[0];
2548        assert_eq!(dev1.r#type, Some("person".to_string()));
2549        assert_eq!(dev1.role, Some("developer".to_string()));
2550        assert_eq!(dev1.name, Some("John Doe".to_string()));
2551        assert_eq!(dev1.email, Some("john@example.com".to_string()));
2552        assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2553        assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2554        assert_eq!(
2555            dev1.organization_url,
2556            Some("https://example.com".to_string())
2557        );
2558        assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2559
2560        let dev2 = &package_data.parties[1];
2561        assert_eq!(dev2.r#type, Some("person".to_string()));
2562        assert_eq!(dev2.role, Some("developer".to_string()));
2563        assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2564        assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2565
2566        let contrib = &package_data.parties[2];
2567        assert_eq!(contrib.r#type, Some("person".to_string()));
2568        assert_eq!(contrib.role, Some("contributor".to_string()));
2569        assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2570        assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2571        assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2572    }
2573
2574    #[test]
2575    fn test_issue_management_extraction() {
2576        let temp_dir = TempDir::new().unwrap();
2577        let pom_path = temp_dir.path().join("pom.xml");
2578
2579        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2580<project xmlns="http://maven.apache.org/POM/4.0.0"
2581         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2582         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2583    <modelVersion>4.0.0</modelVersion>
2584    <groupId>com.example</groupId>
2585    <artifactId>test-app</artifactId>
2586    <version>1.0.0</version>
2587    <issueManagement>
2588        <system>GitHub</system>
2589        <url>https://github.com/example/test-app/issues</url>
2590    </issueManagement>
2591</project>"#;
2592
2593        fs::write(&pom_path, pom_content).unwrap();
2594
2595        let package_data = MavenParser::extract_first_package(&pom_path);
2596
2597        assert_eq!(package_data.name, Some("test-app".to_string()));
2598        assert_eq!(
2599            package_data.bug_tracking_url,
2600            Some("https://github.com/example/test-app/issues".to_string())
2601        );
2602
2603        let extra_data = package_data.extra_data.unwrap();
2604        assert_eq!(
2605            extra_data.get("issue_tracking_system"),
2606            Some(&serde_json::Value::String("GitHub".to_string()))
2607        );
2608    }
2609
2610    #[test]
2611    fn test_ci_management_extraction() {
2612        let temp_dir = TempDir::new().unwrap();
2613        let pom_path = temp_dir.path().join("pom.xml");
2614
2615        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2616<project xmlns="http://maven.apache.org/POM/4.0.0"
2617         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2618         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2619    <modelVersion>4.0.0</modelVersion>
2620    <groupId>com.example</groupId>
2621    <artifactId>test-app</artifactId>
2622    <version>1.0.0</version>
2623    <ciManagement>
2624        <system>Jenkins</system>
2625        <url>https://ci.example.com/job/test-app</url>
2626    </ciManagement>
2627</project>"#;
2628
2629        fs::write(&pom_path, pom_content).unwrap();
2630
2631        let package_data = MavenParser::extract_first_package(&pom_path);
2632
2633        assert_eq!(package_data.name, Some("test-app".to_string()));
2634
2635        let extra_data = package_data.extra_data.unwrap();
2636        assert_eq!(
2637            extra_data.get("ci_system"),
2638            Some(&serde_json::Value::String("Jenkins".to_string()))
2639        );
2640        assert_eq!(
2641            extra_data.get("ci_url"),
2642            Some(&serde_json::Value::String(
2643                "https://ci.example.com/job/test-app".to_string()
2644            ))
2645        );
2646    }
2647
2648    #[test]
2649    fn test_distribution_management_extraction() {
2650        let temp_dir = TempDir::new().unwrap();
2651        let pom_path = temp_dir.path().join("pom.xml");
2652
2653        let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2654<project xmlns="http://maven.apache.org/POM/4.0.0"
2655         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2656         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2657    <modelVersion>4.0.0</modelVersion>
2658    <groupId>com.example</groupId>
2659    <artifactId>test-app</artifactId>
2660    <version>1.0.0</version>
2661    <distributionManagement>
2662        <downloadUrl>https://example.com/downloads</downloadUrl>
2663        <repository>
2664            <id>releases</id>
2665            <name>Release Repository</name>
2666            <url>https://repo.example.com/releases</url>
2667            <layout>default</layout>
2668        </repository>
2669        <snapshotRepository>
2670            <id>snapshots</id>
2671            <name>Snapshot Repository</name>
2672            <url>https://repo.example.com/snapshots</url>
2673            <layout>default</layout>
2674        </snapshotRepository>
2675        <site>
2676            <id>site-deploy</id>
2677            <name>Project Site</name>
2678            <url>https://example.com/site</url>
2679        </site>
2680    </distributionManagement>
2681</project>"#;
2682
2683        fs::write(&pom_path, pom_content).unwrap();
2684
2685        let package_data = MavenParser::extract_first_package(&pom_path);
2686
2687        assert_eq!(package_data.name, Some("test-app".to_string()));
2688        assert_eq!(
2689            package_data.download_url,
2690            Some("https://example.com/downloads".to_string())
2691        );
2692
2693        let extra_data = package_data.extra_data.unwrap();
2694
2695        assert_eq!(
2696            extra_data.get("distribution_download_url"),
2697            Some(&serde_json::Value::String(
2698                "https://example.com/downloads".to_string()
2699            ))
2700        );
2701
2702        let repo = extra_data
2703            .get("distribution_repository")
2704            .unwrap()
2705            .as_object()
2706            .unwrap();
2707        assert_eq!(
2708            repo.get("id"),
2709            Some(&serde_json::Value::String("releases".to_string()))
2710        );
2711        assert_eq!(
2712            repo.get("name"),
2713            Some(&serde_json::Value::String("Release Repository".to_string()))
2714        );
2715        assert_eq!(
2716            repo.get("url"),
2717            Some(&serde_json::Value::String(
2718                "https://repo.example.com/releases".to_string()
2719            ))
2720        );
2721        assert_eq!(
2722            repo.get("layout"),
2723            Some(&serde_json::Value::String("default".to_string()))
2724        );
2725
2726        let snapshot_repo = extra_data
2727            .get("distribution_snapshot_repository")
2728            .unwrap()
2729            .as_object()
2730            .unwrap();
2731        assert_eq!(
2732            snapshot_repo.get("id"),
2733            Some(&serde_json::Value::String("snapshots".to_string()))
2734        );
2735        assert_eq!(
2736            snapshot_repo.get("name"),
2737            Some(&serde_json::Value::String(
2738                "Snapshot Repository".to_string()
2739            ))
2740        );
2741        assert_eq!(
2742            snapshot_repo.get("url"),
2743            Some(&serde_json::Value::String(
2744                "https://repo.example.com/snapshots".to_string()
2745            ))
2746        );
2747        assert_eq!(
2748            snapshot_repo.get("layout"),
2749            Some(&serde_json::Value::String("default".to_string()))
2750        );
2751
2752        let site = extra_data
2753            .get("distribution_site")
2754            .unwrap()
2755            .as_object()
2756            .unwrap();
2757        assert_eq!(
2758            site.get("id"),
2759            Some(&serde_json::Value::String("site-deploy".to_string()))
2760        );
2761        assert_eq!(
2762            site.get("name"),
2763            Some(&serde_json::Value::String("Project Site".to_string()))
2764        );
2765        assert_eq!(
2766            site.get("url"),
2767            Some(&serde_json::Value::String(
2768                "https://example.com/site".to_string()
2769            ))
2770        );
2771    }
2772}
2773
2774crate::register_parser!(
2775    "Apache Maven POM",
2776    &[
2777        "**/*.pom",
2778        "**/pom.xml",
2779        "**/pom.properties",
2780        "**/META-INF/MANIFEST.MF"
2781    ],
2782    "maven",
2783    "Java",
2784    Some("https://maven.apache.org/pom.html"),
2785);