1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27use quick_xml::Reader;
28use quick_xml::events::Event;
29use std::borrow::Cow;
30use std::collections::{HashMap, HashSet};
31use std::path::Path;
32
33use super::PackageParser;
34use super::license_normalization::{
35 DeclaredLicenseMatchMetadata, NormalizedDeclaredLicense, build_declared_license_data,
36 combine_normalized_licenses, empty_declared_license_data, normalize_declared_license_key,
37};
38
39#[derive(Clone, Default)]
40struct MavenDependencyData {
41 group_id: Option<String>,
42 artifact_id: Option<String>,
43 version: Option<String>,
44 classifier: Option<String>,
45 type_: Option<String>,
46 scope: Option<String>,
47 optional: Option<String>,
48 system_path: Option<String>,
49 message: Option<String>,
50}
51
52#[derive(Clone, Default)]
53struct MavenLicenseEntry {
54 name: Option<String>,
55 url: Option<String>,
56 comments: Option<String>,
57}
58
59struct PropertyResolver {
74 raw: HashMap<String, String>,
75 builtins: HashMap<String, String>,
76 cache: HashMap<String, String>,
77 resolving_set: HashSet<String>,
78 resolving_stack: Vec<String>,
79 max_depth: usize,
80 max_output_len: usize,
81 max_substitutions: usize,
82 warned_keys: HashSet<String>,
83}
84
85impl PropertyResolver {
86 fn new(raw: HashMap<String, String>, builtins: HashMap<String, String>) -> Self {
87 Self {
88 raw,
89 builtins,
90 cache: HashMap::new(),
91 resolving_set: HashSet::new(),
92 resolving_stack: Vec::new(),
93 max_depth: 10,
94 max_output_len: 100_000,
95 max_substitutions: 1000,
96 warned_keys: HashSet::new(),
97 }
98 }
99
100 fn resolve_key(&mut self, key: &str, depth: usize) -> Option<String> {
101 if let Some(value) = self.cache.get(key) {
102 return Some(value.clone());
103 }
104
105 if depth >= self.max_depth {
106 self.warn_once(
107 "depth",
108 key,
109 format!("Maven property depth limit hit resolving {key}"),
110 );
111 return None;
112 }
113
114 if self.resolving_set.contains(key) {
115 if self
116 .resolving_stack
117 .last()
118 .is_some_and(|current| current == key)
119 {
120 return None;
121 }
122
123 self.warn_once(
124 "cycle",
125 key,
126 format!(
127 "Maven property cycle detected at {key}: {:?}",
128 self.resolving_stack
129 ),
130 );
131 return None;
132 }
133
134 let raw_val = if let Some(value) = self.raw.get(key).or_else(|| self.builtins.get(key)) {
135 value.clone()
136 } else {
137 return None;
138 };
139
140 self.resolving_set.insert(key.to_string());
141 self.resolving_stack.push(key.to_string());
142
143 let resolved = self.resolve_text(&raw_val, depth + 1);
144
145 self.resolving_stack.pop();
146 self.resolving_set.remove(key);
147
148 self.cache.insert(key.to_string(), resolved.clone());
149 Some(resolved)
150 }
151
152 fn resolve_text(&mut self, text: &str, depth: usize) -> String {
153 if !text.contains("${") {
154 return text.to_string();
155 }
156
157 if depth >= self.max_depth {
158 warn!("Maven property depth limit hit resolving text");
159 return text.to_string();
160 }
161
162 let bytes = text.as_bytes();
163 let mut output: Vec<u8> = Vec::with_capacity(bytes.len());
164 let mut index = 0;
165 let mut substitutions = 0;
166
167 while index < bytes.len() {
168 if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
169 if substitutions >= self.max_substitutions {
170 warn!("Maven property substitution limit hit resolving {text}");
171 return text.to_string();
172 }
173
174 let placeholder_start = index;
175 let Some((content, closing_index)) =
176 self.parse_placeholder_content(text, index + 2)
177 else {
178 warn!("Maven property malformed placeholder in {text}");
179 return text.to_string();
180 };
181
182 substitutions += 1;
183 let resolved_key = if content.contains("${") {
184 self.resolve_text(content, depth + 1)
185 } else {
186 content.to_string()
187 };
188
189 if let Some(resolved) = self.resolve_key(&resolved_key, depth) {
190 if output.len() + resolved.len() > self.max_output_len {
191 warn!("Maven property output length limit hit resolving {text}");
192 return text.to_string();
193 }
194 output.extend_from_slice(resolved.as_bytes());
195 } else {
196 let placeholder_bytes = &bytes[placeholder_start..=closing_index];
197 if output.len() + placeholder_bytes.len() > self.max_output_len {
198 warn!("Maven property output length limit hit resolving {text}");
199 return text.to_string();
200 }
201 output.extend_from_slice(placeholder_bytes);
202 }
203
204 index = closing_index + 1;
205 continue;
206 }
207
208 if output.len() + 1 > self.max_output_len {
209 warn!("Maven property output length limit hit resolving {text}");
210 return text.to_string();
211 }
212
213 output.push(bytes[index]);
214 index += 1;
215 }
216
217 String::from_utf8(output).unwrap_or_else(|_| text.to_string())
218 }
219
220 fn parse_placeholder_content<'a>(
221 &self,
222 text: &'a str,
223 start_index: usize,
224 ) -> Option<(&'a str, usize)> {
225 let bytes = text.as_bytes();
226 let mut index = start_index;
227 let mut depth = 0;
228
229 while index < bytes.len() {
230 if bytes[index] == b'$' && index + 1 < bytes.len() && bytes[index + 1] == b'{' {
231 depth += 1;
232 index += 2;
233 continue;
234 }
235
236 if bytes[index] == b'}' {
237 if depth == 0 {
238 return Some((&text[start_index..index], index));
239 }
240 depth -= 1;
241 }
242
243 index += 1;
244 }
245
246 None
247 }
248
249 fn warn_once(&mut self, kind: &str, key: &str, message: String) {
250 let token = format!("{kind}:{key}");
251 if self.warned_keys.insert(token) {
252 warn!("{message}");
253 }
254 }
255}
256
257fn sanitize_template_directives(content: &str) -> Cow<'_, str> {
258 if !content.contains("<%") {
259 return Cow::Borrowed(content);
260 }
261
262 let mut sanitized = String::with_capacity(content.len());
263 let mut remaining = content;
264
265 while let Some(start) = remaining.find("<%") {
266 let (before, after_start) = remaining.split_at(start);
267 sanitized.push_str(before);
268
269 let Some(end) = after_start.find("%>") else {
270 return Cow::Borrowed(content);
271 };
272
273 let directive = &after_start[..end + 2];
274 for ch in directive.chars() {
275 if matches!(ch, '\n' | '\r') {
276 sanitized.push(ch);
277 } else {
278 sanitized.push(' ');
279 }
280 }
281
282 remaining = &after_start[end + 2..];
283 }
284
285 sanitized.push_str(remaining);
286 Cow::Owned(sanitized)
287}
288
289fn resolve_option(resolver: &mut PropertyResolver, value: &mut Option<String>) {
290 if let Some(current) = value.clone() {
291 *value = Some(resolver.resolve_text(¤t, 0));
292 }
293}
294
295fn resolve_vec(resolver: &mut PropertyResolver, values: &mut [String]) {
296 for value in values.iter_mut() {
297 *value = resolver.resolve_text(value, 0);
298 }
299}
300
301fn resolve_map_strings(
302 resolver: &mut PropertyResolver,
303 values: &mut serde_json::Map<String, serde_json::Value>,
304) {
305 for value in values.values_mut() {
306 if let serde_json::Value::String(current) = value {
307 let resolved = resolver.resolve_text(current, 0);
308 *current = resolved;
309 }
310 }
311}
312
313fn resolve_maps(
314 resolver: &mut PropertyResolver,
315 values: &mut [serde_json::Map<String, serde_json::Value>],
316) {
317 for value in values.iter_mut() {
318 resolve_map_strings(resolver, value);
319 }
320}
321
322fn resolve_dependency_data(resolver: &mut PropertyResolver, dependency: &mut MavenDependencyData) {
323 resolve_option(resolver, &mut dependency.group_id);
324 resolve_option(resolver, &mut dependency.artifact_id);
325 resolve_option(resolver, &mut dependency.version);
326 resolve_option(resolver, &mut dependency.classifier);
327 resolve_option(resolver, &mut dependency.type_);
328 resolve_option(resolver, &mut dependency.scope);
329 resolve_option(resolver, &mut dependency.optional);
330 resolve_option(resolver, &mut dependency.system_path);
331 resolve_option(resolver, &mut dependency.message);
332}
333
334fn parse_maven_bool(value: Option<&str>) -> bool {
335 value.is_some_and(|value| value.trim().eq_ignore_ascii_case("true"))
336}
337
338fn normalize_maven_packaging(packaging: Option<&str>) -> Option<&str> {
339 match packaging.map(str::trim).filter(|value| !value.is_empty()) {
340 Some(
341 "ejb3" | "ear" | "aar" | "apk" | "gem" | "jar" | "nar" | "pom" | "so" | "swc" | "tar"
342 | "tar.gz" | "war" | "xar" | "zip",
343 ) => packaging.map(str::trim),
344 Some(_) => Some("jar"),
345 None => None,
346 }
347}
348
349fn resolve_license_entry(resolver: &mut PropertyResolver, license: &mut MavenLicenseEntry) {
350 resolve_option(resolver, &mut license.name);
351 resolve_option(resolver, &mut license.url);
352 resolve_option(resolver, &mut license.comments);
353}
354
355fn build_maven_qualifiers(
356 classifier: Option<&str>,
357 packaging: Option<&str>,
358) -> Option<HashMap<String, String>> {
359 let mut qualifiers = HashMap::new();
360
361 if let Some(classifier) = classifier.filter(|value| !value.trim().is_empty()) {
362 qualifiers.insert("classifier".to_string(), classifier.to_string());
363 }
364
365 if let Some(packaging) = normalize_maven_packaging(packaging)
366 .filter(|value| !value.is_empty() && *value != "jar" && *value != "pom")
367 {
368 qualifiers.insert("type".to_string(), packaging.to_string());
369 }
370
371 (!qualifiers.is_empty()).then_some(qualifiers)
372}
373
374fn build_maven_purl(
375 group_id: &str,
376 artifact_id: &str,
377 version: Option<&str>,
378 classifier: Option<&str>,
379 packaging: Option<&str>,
380) -> String {
381 let mut purl = format!(
382 "pkg:maven/{}/{}",
383 percent_encode_purl_component(group_id),
384 percent_encode_purl_component(artifact_id)
385 );
386
387 if let Some(version) = version.filter(|value| !value.trim().is_empty()) {
388 purl.push('@');
389 purl.push_str(&percent_encode_purl_component(version));
390 }
391
392 let qualifiers = build_maven_qualifiers(classifier, packaging);
393 if let Some(qualifiers) = qualifiers {
394 let mut query_parts = Vec::new();
395 if let Some(classifier) = qualifiers.get("classifier") {
396 query_parts.push(format!(
397 "classifier={}",
398 percent_encode_purl_component(classifier)
399 ));
400 }
401 if let Some(type_) = qualifiers.get("type") {
402 query_parts.push(format!("type={}", percent_encode_purl_component(type_)));
403 }
404
405 if !query_parts.is_empty() {
406 purl.push('?');
407 purl.push_str(&query_parts.join("&"));
408 }
409 }
410
411 purl
412}
413
414fn percent_encode_purl_component(value: &str) -> String {
415 let mut encoded = String::with_capacity(value.len());
416
417 for byte in value.bytes() {
418 match byte {
419 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => {
420 encoded.push(byte as char);
421 }
422 _ => encoded.push_str(&format!("%{byte:02X}")),
423 }
424 }
425
426 encoded
427}
428
429fn build_maven_download_url(
430 group_id: &str,
431 artifact_id: &str,
432 version: &str,
433 classifier: Option<&str>,
434 packaging: Option<&str>,
435) -> String {
436 const BASE_URL: &str = "https://repo1.maven.org/maven2";
437 let group_path = group_id.replace('.', "/");
438 let extension = normalize_maven_packaging(packaging)
439 .filter(|value| *value != "pom")
440 .unwrap_or("jar");
441 let classifier_suffix = classifier
442 .map(str::trim)
443 .filter(|value| !value.is_empty())
444 .map(|value| format!("-{value}"))
445 .unwrap_or_default();
446
447 format!(
448 "{}/{}/{}/{}/{}-{}{}.{}",
449 BASE_URL,
450 group_path,
451 artifact_id,
452 version,
453 artifact_id,
454 version,
455 classifier_suffix,
456 extension
457 )
458}
459
460fn build_maven_source_package(namespace: &str, name: &str, version: &str) -> String {
461 build_maven_purl(namespace, name, Some(version), Some("sources"), None)
462}
463
464fn build_license_statement(licenses: &[MavenLicenseEntry]) -> Option<String> {
465 let rendered_entries: Vec<String> = licenses
466 .iter()
467 .filter_map(|license| {
468 let mut lines = Vec::new();
469
470 if let Some(name) = license
471 .name
472 .as_ref()
473 .filter(|value| !value.trim().is_empty())
474 {
475 lines.push(format!(" name: {name}"));
476 }
477 if let Some(url) = license
478 .url
479 .as_ref()
480 .filter(|value| !value.trim().is_empty())
481 {
482 lines.push(format!(" url: {url}"));
483 }
484 if let Some(comments) = license
485 .comments
486 .as_ref()
487 .filter(|value| !value.trim().is_empty())
488 {
489 lines.push(format!(" comments: {comments}"));
490 }
491
492 (!lines.is_empty()).then(|| format!("- license:\n{}", lines.join("\n")))
493 })
494 .collect();
495
496 if rendered_entries.is_empty() {
497 None
498 } else {
499 Some(format!("{}\n", rendered_entries.join("\n")))
500 }
501}
502
503fn is_license_like_comment(comment: &str) -> bool {
504 let lowered = comment.to_ascii_lowercase();
505 [
506 "license",
507 "licensed",
508 "copyright",
509 "spdx",
510 "apache",
511 "mit",
512 "bsd",
513 "gpl",
514 "lgpl",
515 "mozilla public",
516 "eclipse public",
517 ]
518 .iter()
519 .any(|marker| lowered.contains(marker))
520}
521
522fn dependency_extra_data(
523 dependency: &MavenDependencyData,
524) -> Option<HashMap<String, serde_json::Value>> {
525 let mut extra_data = HashMap::new();
526
527 if let Some(classifier) = dependency
528 .classifier
529 .as_ref()
530 .filter(|value| !value.trim().is_empty())
531 {
532 extra_data.insert(
533 "classifier".to_string(),
534 serde_json::Value::String(classifier.clone()),
535 );
536 }
537 if let Some(type_) = dependency
538 .type_
539 .as_ref()
540 .filter(|value| !value.trim().is_empty())
541 {
542 extra_data.insert("type".to_string(), serde_json::Value::String(type_.clone()));
543 }
544 if let Some(system_path) = dependency
545 .system_path
546 .as_ref()
547 .filter(|value| !value.trim().is_empty())
548 {
549 extra_data.insert(
550 "system_path".to_string(),
551 serde_json::Value::String(system_path.clone()),
552 );
553 }
554 if let Some(message) = dependency
555 .message
556 .as_ref()
557 .filter(|value| !value.trim().is_empty())
558 {
559 extra_data.insert(
560 "message".to_string(),
561 serde_json::Value::String(message.clone()),
562 );
563 }
564
565 (!extra_data.is_empty()).then_some(extra_data)
566}
567
568fn dependency_management_entry_to_value(
569 dependency: &MavenDependencyData,
570) -> serde_json::Map<String, serde_json::Value> {
571 let mut dep_obj = serde_json::Map::new();
572
573 if let Some(group_id) = dependency.group_id.as_ref() {
574 dep_obj.insert(
575 "groupId".to_string(),
576 serde_json::Value::String(group_id.clone()),
577 );
578 }
579 if let Some(artifact_id) = dependency.artifact_id.as_ref() {
580 dep_obj.insert(
581 "artifactId".to_string(),
582 serde_json::Value::String(artifact_id.clone()),
583 );
584 }
585 if let Some(version) = dependency.version.as_ref() {
586 dep_obj.insert(
587 "version".to_string(),
588 serde_json::Value::String(version.clone()),
589 );
590 }
591 if let Some(scope) = dependency.scope.as_ref() {
592 dep_obj.insert(
593 "scope".to_string(),
594 serde_json::Value::String(scope.clone()),
595 );
596 }
597 if let Some(type_) = dependency.type_.as_ref() {
598 dep_obj.insert("type".to_string(), serde_json::Value::String(type_.clone()));
599 }
600 if let Some(classifier) = dependency.classifier.as_ref() {
601 dep_obj.insert(
602 "classifier".to_string(),
603 serde_json::Value::String(classifier.clone()),
604 );
605 }
606 if let Some(optional) = dependency.optional.as_deref() {
607 dep_obj.insert(
608 "optional".to_string(),
609 serde_json::Value::Bool(parse_maven_bool(Some(optional))),
610 );
611 }
612 if let Some(message) = dependency.message.as_ref() {
613 dep_obj.insert(
614 "message".to_string(),
615 serde_json::Value::String(message.clone()),
616 );
617 }
618
619 dep_obj
620}
621
622fn maven_dependency_to_dependency(
623 dependency_data: &MavenDependencyData,
624 fallback_scope: Option<&str>,
625 force_non_runtime: bool,
626) -> Option<Dependency> {
627 let group_id = dependency_data.group_id.as_ref()?;
628 let artifact_id = dependency_data.artifact_id.as_ref()?;
629 let version = dependency_data.version.clone();
630 let scope = dependency_data
631 .scope
632 .clone()
633 .or_else(|| fallback_scope.map(str::to_string));
634 let explicit_optional = parse_maven_bool(dependency_data.optional.as_deref());
635
636 let (is_runtime, is_optional) = if force_non_runtime {
637 (Some(false), Some(explicit_optional))
638 } else {
639 match scope.as_deref() {
640 Some("test") | Some("provided") => (Some(false), Some(true)),
641 Some(_) => (Some(true), Some(explicit_optional)),
642 None => (None, Some(explicit_optional)),
643 }
644 };
645
646 Some(Dependency {
647 purl: Some(build_maven_purl(
648 group_id,
649 artifact_id,
650 version.as_deref(),
651 dependency_data.classifier.as_deref(),
652 dependency_data.type_.as_deref(),
653 )),
654 extracted_requirement: version.clone(),
655 scope,
656 is_runtime,
657 is_optional,
658 is_pinned: version.as_deref().map(is_maven_version_pinned),
659 is_direct: Some(true),
660 resolved_package: None,
661 extra_data: dependency_extra_data(dependency_data),
662 })
663}
664
665fn is_maven_version_pinned(version_str: &str) -> bool {
672 let trimmed = version_str.trim();
673
674 if trimmed.is_empty() {
676 return false;
677 }
678
679 if trimmed.contains('[')
681 || trimmed.contains(']')
682 || trimmed.contains('(')
683 || trimmed.contains(')')
684 {
685 return false;
686 }
687
688 if trimmed.eq_ignore_ascii_case("LATEST") || trimmed.eq_ignore_ascii_case("RELEASE") {
690 return false;
691 }
692
693 true
695}
696
697struct MavenBuiltinPropertyInputs<'a> {
698 namespace: &'a Option<String>,
699 name: &'a Option<String>,
700 version: &'a Option<String>,
701 parent_group_id: &'a Option<String>,
702 parent_artifact_id: &'a Option<String>,
703 parent_version: &'a Option<String>,
704 project_name: &'a Option<String>,
705 project_packaging: &'a Option<String>,
706}
707
708fn build_builtin_properties(inputs: MavenBuiltinPropertyInputs<'_>) -> HashMap<String, String> {
709 let mut builtins = HashMap::new();
710 let effective_group_id = inputs
711 .namespace
712 .clone()
713 .or_else(|| inputs.parent_group_id.clone());
714 let effective_version = inputs
715 .version
716 .clone()
717 .or_else(|| inputs.parent_version.clone());
718
719 if let Some(group_id) = effective_group_id.clone() {
720 builtins.insert("project.groupId".to_string(), group_id.clone());
721 builtins.insert("pom.groupId".to_string(), group_id);
722 }
723
724 if let Some(artifact_id) = inputs.name.clone() {
725 builtins.insert("project.artifactId".to_string(), artifact_id.clone());
726 builtins.insert("pom.artifactId".to_string(), artifact_id);
727 }
728
729 if let Some(ver) = effective_version.clone() {
730 builtins.insert("project.version".to_string(), ver.clone());
731 builtins.insert("pom.version".to_string(), ver);
732 }
733
734 if let Some(group_id) = inputs.parent_group_id.clone() {
735 builtins.insert("project.parent.groupId".to_string(), group_id);
736 }
737
738 if let Some(artifact_id) = inputs.parent_artifact_id.clone() {
739 builtins.insert("project.parent.artifactId".to_string(), artifact_id.clone());
740 builtins.insert("pom.parent.artifactId".to_string(), artifact_id.clone());
741 builtins.insert("parent.artifactId".to_string(), artifact_id);
742 }
743
744 if let Some(ver) = inputs.parent_version.clone() {
745 builtins.insert("project.parent.version".to_string(), ver.clone());
746 builtins.insert("pom.parent.version".to_string(), ver.clone());
747 builtins.insert("parent.version".to_string(), ver);
748 }
749
750 if let Some(packaging) = inputs.project_packaging.clone() {
751 builtins.insert("project.packaging".to_string(), packaging);
752 }
753
754 if let Some(name) = inputs.project_name.clone() {
755 builtins.insert("project.name".to_string(), name);
756 }
757
758 builtins
759}
760
761pub struct MavenParser;
766
767impl PackageParser for MavenParser {
768 const PACKAGE_TYPE: PackageType = PackageType::Maven;
769
770 fn extract_packages(path: &Path) -> Vec<PackageData> {
771 if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
772 if filename == "pom.properties" {
773 return vec![parse_pom_properties(path)];
774 } else if filename == "MANIFEST.MF" {
775 return vec![parse_manifest_mf(path)];
776 }
777 }
778
779 let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
780 Ok(content) => content,
781 Err(e) => {
782 warn!("Failed to open pom.xml at {:?}: {}", path, e);
783 return vec![default_package_data(DatasourceId::MavenPom)];
784 }
785 };
786
787 let sanitized_content = sanitize_template_directives(&content);
788 let mut reader = Reader::from_str(sanitized_content.as_ref());
789 reader.config_mut().trim_text(true);
790
791 let mut buf = Vec::new();
792 let mut package_data = default_package_data(DatasourceId::MavenPom);
793 package_data.package_type = Some(Self::PACKAGE_TYPE);
794 package_data.primary_language = Some("Java".to_string());
795 package_data.datasource_id = Some(DatasourceId::MavenPom);
796
797 let mut current_element = Vec::new();
798 let mut in_dependencies = false;
799 let mut current_dependency: Option<Dependency> = None;
800 let mut dependency_data: Vec<MavenDependencyData> = Vec::new();
801 let mut current_dependency_data: Option<MavenDependencyData> = None;
802
803 let mut licenses: Vec<MavenLicenseEntry> = Vec::new();
804 let mut xml_license_comments: Vec<String> = Vec::new();
805 let mut current_license: Option<MavenLicenseEntry> = None;
806 let mut inception_year = None;
807 let mut scm_connection = None;
808 let mut scm_developer_connection = None;
809 let mut scm_url = None;
810 let mut scm_tag = None;
811 let mut organization_name = None;
812 let mut organization_url = None;
813 let mut in_developers = false;
814 let mut in_contributors = false;
815 let mut current_party: Option<Party> = None;
816 let mut issue_management_system = None;
817 let mut issue_management_url = None;
818 let mut ci_management_system = None;
819 let mut ci_management_url = None;
820 let mut in_distribution_management = false;
821 let mut in_dist_repository = false;
822 let mut in_dist_snapshot_repository = false;
823 let mut in_dist_site = false;
824 let mut dist_download_url = None;
825 let mut dist_repository_id = None;
826 let mut dist_repository_name = None;
827 let mut dist_repository_url = None;
828 let mut dist_repository_layout = None;
829 let mut dist_snapshot_repository_id = None;
830 let mut dist_snapshot_repository_name = None;
831 let mut dist_snapshot_repository_url = None;
832 let mut dist_snapshot_repository_layout = None;
833 let mut dist_site_id = None;
834 let mut dist_site_name = None;
835 let mut dist_site_url = None;
836 let mut in_repositories = false;
837 let mut in_plugin_repositories = false;
838 let mut in_repository = false;
839 let mut repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
840 let mut plugin_repositories: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
841 let mut current_repository_id = None;
842 let mut current_repository_name = None;
843 let mut current_repository_url = None;
844 let mut in_modules = false;
845 let mut modules: Vec<String> = Vec::new();
846 let mut in_mailing_lists = false;
847 let mut in_mailing_list = false;
848 let mut mailing_lists: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
849 let mut current_mailing_list_name = None;
850 let mut current_mailing_list_subscribe = None;
851 let mut current_mailing_list_unsubscribe = None;
852 let mut current_mailing_list_post = None;
853 let mut current_mailing_list_archive = None;
854 let mut in_dependency_management = false;
855 let mut dependency_management_entries: Vec<MavenDependencyData> = Vec::new();
856 let mut current_dep_mgmt_dependency: Option<MavenDependencyData> = None;
857 let mut in_dep_mgmt_dependency = false;
858 let mut in_parent = false;
859 let mut parent_group_id = None;
860 let mut parent_artifact_id = None;
861 let mut parent_version = None;
862 let mut parent_relative_path = None;
863 let mut in_properties = false;
864 let mut properties: HashMap<String, String> = HashMap::new();
865 let mut project_name = None;
866 let mut project_description = None;
867 let mut project_packaging = None;
868 let mut project_classifier = None;
869 let mut in_relocation = false;
870 let mut relocation = MavenDependencyData::default();
871
872 let mut iteration_count: usize = 0;
873 loop {
874 iteration_count += 1;
875 if iteration_count > MAX_ITERATION_COUNT {
876 warn!(
877 "Exceeded MAX_ITERATION_COUNT ({}) parsing pom.xml at {:?}; stopping early",
878 MAX_ITERATION_COUNT, path
879 );
880 break;
881 }
882 match reader.read_event_into(&mut buf) {
883 Ok(Event::Start(e)) => {
884 let element_name = e.name().as_ref().to_vec();
885 current_element.push(element_name.clone());
886
887 match element_name.as_slice() {
888 b"parent" => in_parent = true,
889 b"dependencyManagement" => in_dependency_management = true,
890 b"dependencies" if in_dependency_management => {}
891 b"dependencies" => in_dependencies = true,
892 b"dependency" if in_dependency_management => {
893 in_dep_mgmt_dependency = true;
894 current_dep_mgmt_dependency = Some(MavenDependencyData::default());
895 }
896 b"dependency" if in_dependencies => {
897 current_dependency = Some(Dependency {
898 purl: None,
899 extracted_requirement: None,
900 scope: None,
901 is_runtime: None,
902 is_optional: Some(false),
903 is_pinned: None,
904 is_direct: Some(true),
905 resolved_package: None,
906 extra_data: None,
907 });
908 current_dependency_data = Some(MavenDependencyData::default());
909 }
910 b"properties" => in_properties = true,
911 b"developers" => in_developers = true,
912 b"developer" if in_developers => {
913 current_party = Some(Party {
914 r#type: Some("person".to_string()),
915 role: Some("developer".to_string()),
916 name: None,
917 email: None,
918 url: None,
919 organization: None,
920 organization_url: None,
921 timezone: None,
922 });
923 }
924 b"contributors" => in_contributors = true,
925 b"contributor" if in_contributors => {
926 current_party = Some(Party {
927 r#type: Some("person".to_string()),
928 role: Some("contributor".to_string()),
929 name: None,
930 email: None,
931 url: None,
932 organization: None,
933 organization_url: None,
934 timezone: None,
935 });
936 }
937 b"license" => current_license = Some(MavenLicenseEntry::default()),
938 b"distributionManagement" => in_distribution_management = true,
939 b"relocation" if in_distribution_management => {
940 in_relocation = true;
941 relocation = MavenDependencyData::default();
942 }
943 b"repository" if in_distribution_management => in_dist_repository = true,
944 b"snapshotRepository" if in_distribution_management => {
945 in_dist_snapshot_repository = true
946 }
947 b"site" if in_distribution_management => in_dist_site = true,
948 b"repositories" => in_repositories = true,
949 b"pluginRepositories" => in_plugin_repositories = true,
950 b"repository" if in_repositories && !in_distribution_management => {
951 in_repository = true;
952 current_repository_id = None;
953 current_repository_name = None;
954 current_repository_url = None;
955 }
956 b"pluginRepository" if in_plugin_repositories => {
957 in_repository = true;
958 current_repository_id = None;
959 current_repository_name = None;
960 current_repository_url = None;
961 }
962 b"modules" => in_modules = true,
963 b"mailingLists" => in_mailing_lists = true,
964 b"mailingList" if in_mailing_lists => {
965 in_mailing_list = true;
966 current_mailing_list_name = None;
967 current_mailing_list_subscribe = None;
968 current_mailing_list_unsubscribe = None;
969 current_mailing_list_post = None;
970 current_mailing_list_archive = None;
971 }
972 _ => {}
973 }
974 }
975 Ok(Event::Text(e)) => {
976 let text = match e.decode() {
977 Ok(Cow::Borrowed(s)) => s.to_string(),
978 Ok(Cow::Owned(s)) => s,
979 Err(_) => {
980 warn!(
981 "Invalid UTF-8 in XML text content in {:?}; using lossy conversion",
982 path
983 );
984 String::from_utf8_lossy(e.as_ref()).into_owned()
985 }
986 };
987 let current_path = current_element.last().map(|v| v.as_slice());
988 let current_parent = current_element
989 .len()
990 .checked_sub(2)
991 .map(|index| current_element[index].as_slice());
992
993 if in_properties
994 && current_element.len() >= 2
995 && current_element[current_element.len() - 2] == b"properties"
996 {
997 if let Some(property_name) = current_element
998 .last()
999 .and_then(|name| std::str::from_utf8(name).ok())
1000 {
1001 properties.insert(property_name.to_string(), truncate_field(text));
1002 } else {
1003 warn!("Failed to decode Maven property name in {:?}", path);
1004 }
1005 } else if in_dep_mgmt_dependency {
1006 if let Some(dep_mgmt) = current_dep_mgmt_dependency.as_mut() {
1007 match current_path {
1008 Some(b"groupId") if current_parent == Some(b"dependency") => {
1009 dep_mgmt.group_id = Some(text)
1010 }
1011 Some(b"artifactId") if current_parent == Some(b"dependency") => {
1012 dep_mgmt.artifact_id = Some(text)
1013 }
1014 Some(b"version") if current_parent == Some(b"dependency") => {
1015 dep_mgmt.version = Some(text)
1016 }
1017 Some(b"scope") if current_parent == Some(b"dependency") => {
1018 dep_mgmt.scope = Some(text)
1019 }
1020 Some(b"type") if current_parent == Some(b"dependency") => {
1021 dep_mgmt.type_ = Some(text)
1022 }
1023 Some(b"classifier") if current_parent == Some(b"dependency") => {
1024 dep_mgmt.classifier = Some(text)
1025 }
1026 Some(b"optional") if current_parent == Some(b"dependency") => {
1027 dep_mgmt.optional = Some(text)
1028 }
1029 _ => {}
1030 }
1031 }
1032 } else if let Some(license) = &mut current_license {
1033 match current_path {
1034 Some(b"name") => license.name = Some(text),
1035 Some(b"url") => license.url = Some(text),
1036 Some(b"comments") => license.comments = Some(text),
1037 _ => {}
1038 }
1039 } else if let Some(party) = &mut current_party {
1040 match current_path {
1041 Some(b"name") => party.name = Some(text),
1042 Some(b"email") => party.email = Some(text),
1043 Some(b"url") => party.url = Some(text),
1044 Some(b"organization") => party.organization = Some(text),
1045 Some(b"organizationUrl") => party.organization_url = Some(text),
1046 Some(b"timezone") => party.timezone = Some(text),
1047 _ => {}
1048 }
1049 } else if let Some(dep) = &mut current_dependency {
1050 match current_path {
1051 Some(b"groupId") => {
1052 if current_parent == Some(b"dependency")
1053 && let Some(coords) = current_dependency_data.as_mut()
1054 {
1055 coords.group_id = Some(text);
1056 }
1057 }
1058 Some(b"artifactId") => {
1059 if current_parent == Some(b"dependency")
1060 && let Some(coords) = current_dependency_data.as_mut()
1061 {
1062 coords.artifact_id = Some(text);
1063 }
1064 }
1065 Some(b"version") => {
1066 if current_parent == Some(b"dependency")
1067 && let Some(coords) = current_dependency_data.as_mut()
1068 {
1069 coords.version = Some(text);
1070 }
1071 }
1072 Some(b"scope") => {
1073 if current_parent == Some(b"dependency") {
1074 dep.scope = Some(text.clone());
1075 dep.is_optional = Some(text == "test" || text == "provided");
1076 dep.is_runtime = Some(text != "test" && text != "provided");
1077 }
1078 if current_parent == Some(b"dependency")
1079 && let Some(coords) = current_dependency_data.as_mut()
1080 {
1081 coords.scope = Some(text);
1082 }
1083 }
1084 Some(b"optional") => {
1085 if current_parent == Some(b"dependency")
1086 && let Some(coords) = current_dependency_data.as_mut()
1087 {
1088 coords.optional = Some(text);
1089 }
1090 }
1091 Some(b"type") => {
1092 if current_parent == Some(b"dependency")
1093 && let Some(coords) = current_dependency_data.as_mut()
1094 {
1095 coords.type_ = Some(text);
1096 }
1097 }
1098 Some(b"classifier") => {
1099 if current_parent == Some(b"dependency")
1100 && let Some(coords) = current_dependency_data.as_mut()
1101 {
1102 coords.classifier = Some(text);
1103 }
1104 }
1105 Some(b"systemPath") => {
1106 if current_parent == Some(b"dependency")
1107 && let Some(coords) = current_dependency_data.as_mut()
1108 {
1109 coords.system_path = Some(text);
1110 }
1111 }
1112 _ => {}
1113 }
1114 } else if in_relocation {
1115 match current_path {
1116 Some(b"groupId") => relocation.group_id = Some(text),
1117 Some(b"artifactId") => relocation.artifact_id = Some(text),
1118 Some(b"version") => relocation.version = Some(text),
1119 Some(b"classifier") => relocation.classifier = Some(text),
1120 Some(b"type") => relocation.type_ = Some(text),
1121 Some(b"message") => relocation.message = Some(text),
1122 _ => {}
1123 }
1124 } else if in_parent {
1125 match current_path {
1126 Some(b"groupId") => {
1127 parent_group_id = Some(text);
1128 }
1129 Some(b"artifactId") => {
1130 parent_artifact_id = Some(text);
1131 }
1132 Some(b"version") => {
1133 parent_version = Some(text);
1134 }
1135 Some(b"relativePath") => {
1136 parent_relative_path = Some(text);
1137 }
1138 _ => {}
1139 }
1140 } else {
1141 match current_path {
1142 Some(b"groupId") if current_element.len() == 2 => {
1143 package_data.namespace = Some(text)
1144 }
1145 Some(b"artifactId") if current_element.len() == 2 => {
1146 package_data.name = Some(text)
1147 }
1148 Some(b"version") if current_element.len() == 2 => {
1149 package_data.version = Some(text)
1150 }
1151 Some(b"name") if current_element.len() == 2 => {
1152 project_name = Some(text)
1153 }
1154 Some(b"description") if current_element.len() == 2 => {
1155 project_description = Some(text)
1156 }
1157 Some(b"packaging") if current_element.len() == 2 => {
1158 project_packaging = Some(text)
1159 }
1160 Some(b"classifier") if current_element.len() == 2 => {
1161 project_classifier = Some(text)
1162 }
1163 Some(b"url") if current_element.len() == 2 => {
1164 package_data.homepage_url = Some(text)
1165 }
1166 Some(b"inceptionYear") if current_element.len() == 2 => {
1167 inception_year = Some(text)
1168 }
1169 Some(b"connection")
1170 if current_element.len() >= 3
1171 && current_element[current_element.len() - 2] == b"scm" =>
1172 {
1173 scm_connection = if text.starts_with("scm:git:") {
1174 Some(text.replacen("scm:git:", "git+", 1))
1175 } else if text.starts_with("scm:") {
1176 Some(text.replacen("scm:", "", 1))
1177 } else {
1178 Some(text)
1179 };
1180 }
1181 Some(b"developerConnection")
1182 if current_element.len() >= 3
1183 && current_element[current_element.len() - 2] == b"scm" =>
1184 {
1185 scm_developer_connection = if text.starts_with("scm:git:") {
1186 Some(text.replacen("scm:git:", "git+", 1))
1187 } else if text.starts_with("scm:") {
1188 Some(text.replacen("scm:", "", 1))
1189 } else {
1190 Some(text)
1191 };
1192 }
1193 Some(b"url")
1194 if current_element.len() >= 3
1195 && current_element[current_element.len() - 2] == b"scm" =>
1196 {
1197 scm_url = Some(text);
1198 }
1199 Some(b"tag")
1200 if current_element.len() >= 3
1201 && current_element[current_element.len() - 2] == b"scm" =>
1202 {
1203 scm_tag = Some(text);
1204 }
1205 Some(b"name")
1206 if current_element.len() >= 2
1207 && current_element[current_element.len() - 2]
1208 == b"organization" =>
1209 {
1210 organization_name = Some(text);
1211 }
1212 Some(b"url")
1213 if current_element.len() >= 2
1214 && current_element[current_element.len() - 2]
1215 == b"organization" =>
1216 {
1217 organization_url = Some(text);
1218 }
1219 Some(b"system")
1220 if current_element.len() >= 2
1221 && current_element[current_element.len() - 2]
1222 == b"issueManagement" =>
1223 {
1224 issue_management_system = Some(text);
1225 }
1226 Some(b"url")
1227 if current_element.len() >= 2
1228 && current_element[current_element.len() - 2]
1229 == b"issueManagement" =>
1230 {
1231 issue_management_url = Some(text);
1232 }
1233 Some(b"system")
1234 if current_element.len() >= 2
1235 && current_element[current_element.len() - 2]
1236 == b"ciManagement" =>
1237 {
1238 ci_management_system = Some(text);
1239 }
1240 Some(b"url")
1241 if current_element.len() >= 2
1242 && current_element[current_element.len() - 2]
1243 == b"ciManagement" =>
1244 {
1245 ci_management_url = Some(text);
1246 }
1247 Some(b"downloadUrl")
1248 if current_element.len() >= 2
1249 && current_element[current_element.len() - 2]
1250 == b"distributionManagement" =>
1251 {
1252 dist_download_url = Some(text);
1253 }
1254 Some(b"id") if in_dist_repository => {
1255 dist_repository_id = Some(text);
1256 }
1257 Some(b"name") if in_dist_repository => {
1258 dist_repository_name = Some(text);
1259 }
1260 Some(b"url") if in_dist_repository => {
1261 dist_repository_url = Some(text);
1262 }
1263 Some(b"layout") if in_dist_repository => {
1264 dist_repository_layout = Some(text);
1265 }
1266 Some(b"id") if in_dist_snapshot_repository => {
1267 dist_snapshot_repository_id = Some(text);
1268 }
1269 Some(b"name") if in_dist_snapshot_repository => {
1270 dist_snapshot_repository_name = Some(text);
1271 }
1272 Some(b"url") if in_dist_snapshot_repository => {
1273 dist_snapshot_repository_url = Some(text);
1274 }
1275 Some(b"layout") if in_dist_snapshot_repository => {
1276 dist_snapshot_repository_layout = Some(text);
1277 }
1278 Some(b"id") if in_dist_site => {
1279 dist_site_id = Some(text);
1280 }
1281 Some(b"name") if in_dist_site => {
1282 dist_site_name = Some(text);
1283 }
1284 Some(b"url") if in_dist_site => {
1285 dist_site_url = Some(text);
1286 }
1287 Some(b"id") if in_repository => {
1288 current_repository_id = Some(text);
1289 }
1290 Some(b"name") if in_repository => {
1291 current_repository_name = Some(text);
1292 }
1293 Some(b"url") if in_repository => {
1294 current_repository_url = Some(text);
1295 }
1296 Some(b"module") if in_modules => {
1297 modules.push(text);
1298 }
1299 Some(b"name") if in_mailing_list => {
1300 current_mailing_list_name = Some(text);
1301 }
1302 Some(b"subscribe") if in_mailing_list => {
1303 current_mailing_list_subscribe = Some(text);
1304 }
1305 Some(b"unsubscribe") if in_mailing_list => {
1306 current_mailing_list_unsubscribe = Some(text);
1307 }
1308 Some(b"post") if in_mailing_list => {
1309 current_mailing_list_post = Some(text);
1310 }
1311 Some(b"archive") if in_mailing_list => {
1312 current_mailing_list_archive = Some(text);
1313 }
1314 _ => {}
1315 }
1316 }
1317 }
1318 Ok(Event::Comment(e)) => {
1319 let comment = match e.decode() {
1320 Ok(Cow::Borrowed(s)) => s.trim().to_string(),
1321 Ok(Cow::Owned(s)) => s.trim().to_string(),
1322 Err(_) => {
1323 warn!(
1324 "Invalid UTF-8 in XML comment in {:?}; using lossy conversion",
1325 path
1326 );
1327 String::from_utf8_lossy(e.as_ref())
1328 .into_owned()
1329 .trim()
1330 .to_string()
1331 }
1332 };
1333 if current_element.is_empty()
1334 && !comment.is_empty()
1335 && is_license_like_comment(&comment)
1336 {
1337 xml_license_comments.push(comment);
1338 }
1339 }
1340 Ok(Event::End(e)) => {
1341 if !current_element.is_empty() {
1342 current_element.pop();
1343 }
1344
1345 match e.name().as_ref() {
1346 b"parent" => in_parent = false,
1347 b"dependencyManagement" => in_dependency_management = false,
1348 b"dependencies" => in_dependencies = false,
1349 b"dependency" if in_dep_mgmt_dependency => {
1350 in_dep_mgmt_dependency = false;
1351 if let Some(dep_mgmt) = current_dep_mgmt_dependency.take()
1352 && (dep_mgmt.group_id.is_some()
1353 || dep_mgmt.artifact_id.is_some()
1354 || dep_mgmt.version.is_some())
1355 {
1356 dependency_management_entries.push(dep_mgmt);
1357 }
1358 }
1359 b"dependency" => {
1360 if let (Some(dep), Some(coords)) =
1361 (current_dependency.take(), current_dependency_data.take())
1362 {
1363 package_data.dependencies.push(dep);
1364 dependency_data.push(coords);
1365 } else if let Some(dep) = current_dependency.take() {
1366 package_data.dependencies.push(dep);
1367 }
1368 }
1369 b"license" => {
1370 if let Some(license) = current_license.take()
1371 && (license.name.is_some()
1372 || license.url.is_some()
1373 || license.comments.is_some())
1374 {
1375 licenses.push(license);
1376 }
1377 }
1378 b"developers" => in_developers = false,
1379 b"developer" => {
1380 if let Some(party) = current_party.take() {
1381 package_data.parties.push(party);
1382 }
1383 }
1384 b"contributors" => in_contributors = false,
1385 b"contributor" => {
1386 if let Some(party) = current_party.take() {
1387 package_data.parties.push(party);
1388 }
1389 }
1390 b"distributionManagement" => in_distribution_management = false,
1391 b"relocation" => in_relocation = false,
1392 b"repository" if !in_dependencies && in_distribution_management => {
1393 in_dist_repository = false
1394 }
1395 b"repository" if !in_dependencies && in_repositories => {
1396 in_repository = false;
1397 if current_repository_id.is_some()
1398 || current_repository_name.is_some()
1399 || current_repository_url.is_some()
1400 {
1401 let mut repo = serde_json::Map::new();
1402 if let Some(id) = current_repository_id.take() {
1403 repo.insert("id".to_string(), serde_json::Value::String(id));
1404 }
1405 if let Some(name) = current_repository_name.take() {
1406 repo.insert(
1407 "name".to_string(),
1408 serde_json::Value::String(name),
1409 );
1410 }
1411 if let Some(url) = current_repository_url.take() {
1412 repo.insert("url".to_string(), serde_json::Value::String(url));
1413 }
1414 repositories.push(repo);
1415 }
1416 }
1417 b"pluginRepository" if in_plugin_repositories => {
1418 in_repository = false;
1419 if current_repository_id.is_some()
1420 || current_repository_name.is_some()
1421 || current_repository_url.is_some()
1422 {
1423 let mut repo = serde_json::Map::new();
1424 if let Some(id) = current_repository_id.take() {
1425 repo.insert("id".to_string(), serde_json::Value::String(id));
1426 }
1427 if let Some(name) = current_repository_name.take() {
1428 repo.insert(
1429 "name".to_string(),
1430 serde_json::Value::String(name),
1431 );
1432 }
1433 if let Some(url) = current_repository_url.take() {
1434 repo.insert("url".to_string(), serde_json::Value::String(url));
1435 }
1436 plugin_repositories.push(repo);
1437 }
1438 }
1439 b"repositories" => in_repositories = false,
1440 b"properties" => in_properties = false,
1441 b"pluginRepositories" => in_plugin_repositories = false,
1442 b"modules" => in_modules = false,
1443 b"mailingLists" => in_mailing_lists = false,
1444 b"mailingList" => {
1445 in_mailing_list = false;
1446 if current_mailing_list_name.is_some()
1447 || current_mailing_list_subscribe.is_some()
1448 || current_mailing_list_unsubscribe.is_some()
1449 || current_mailing_list_post.is_some()
1450 || current_mailing_list_archive.is_some()
1451 {
1452 let mut ml = serde_json::Map::new();
1453 if let Some(name) = current_mailing_list_name.take() {
1454 ml.insert("name".to_string(), serde_json::Value::String(name));
1455 }
1456 if let Some(subscribe) = current_mailing_list_subscribe.take() {
1457 ml.insert(
1458 "subscribe".to_string(),
1459 serde_json::Value::String(subscribe),
1460 );
1461 }
1462 if let Some(unsubscribe) = current_mailing_list_unsubscribe.take() {
1463 ml.insert(
1464 "unsubscribe".to_string(),
1465 serde_json::Value::String(unsubscribe),
1466 );
1467 }
1468 if let Some(post) = current_mailing_list_post.take() {
1469 ml.insert("post".to_string(), serde_json::Value::String(post));
1470 }
1471 if let Some(archive) = current_mailing_list_archive.take() {
1472 ml.insert(
1473 "archive".to_string(),
1474 serde_json::Value::String(archive),
1475 );
1476 }
1477 mailing_lists.push(ml);
1478 }
1479 }
1480 b"snapshotRepository" => in_dist_snapshot_repository = false,
1481 b"site" => in_dist_site = false,
1482 _ => {}
1483 }
1484 }
1485 Ok(Event::Eof) => break,
1486 Err(e) => {
1487 warn!("Error parsing pom.xml at {:?}: {}", path, e);
1488 return vec![package_data];
1489 }
1490 _ => {}
1491 }
1492 buf.clear();
1493 }
1494
1495 let builtins = build_builtin_properties(MavenBuiltinPropertyInputs {
1496 namespace: &package_data.namespace,
1497 name: &package_data.name,
1498 version: &package_data.version,
1499 parent_group_id: &parent_group_id,
1500 parent_artifact_id: &parent_artifact_id,
1501 parent_version: &parent_version,
1502 project_name: &project_name,
1503 project_packaging: &project_packaging,
1504 });
1505 let mut resolver = PropertyResolver::new(properties, builtins);
1506
1507 resolve_option(&mut resolver, &mut package_data.namespace);
1508 resolve_option(&mut resolver, &mut package_data.name);
1509 resolve_option(&mut resolver, &mut package_data.version);
1510 resolve_option(&mut resolver, &mut package_data.homepage_url);
1511 resolve_option(&mut resolver, &mut inception_year);
1512 resolve_option(&mut resolver, &mut scm_connection);
1513 resolve_option(&mut resolver, &mut scm_developer_connection);
1514 resolve_option(&mut resolver, &mut scm_url);
1515 resolve_option(&mut resolver, &mut scm_tag);
1516 resolve_option(&mut resolver, &mut organization_name);
1517 resolve_option(&mut resolver, &mut organization_url);
1518 resolve_option(&mut resolver, &mut issue_management_system);
1519 resolve_option(&mut resolver, &mut issue_management_url);
1520 resolve_option(&mut resolver, &mut ci_management_system);
1521 resolve_option(&mut resolver, &mut ci_management_url);
1522 resolve_option(&mut resolver, &mut dist_download_url);
1523 resolve_option(&mut resolver, &mut dist_repository_id);
1524 resolve_option(&mut resolver, &mut dist_repository_name);
1525 resolve_option(&mut resolver, &mut dist_repository_url);
1526 resolve_option(&mut resolver, &mut dist_repository_layout);
1527 resolve_option(&mut resolver, &mut dist_snapshot_repository_id);
1528 resolve_option(&mut resolver, &mut dist_snapshot_repository_name);
1529 resolve_option(&mut resolver, &mut dist_snapshot_repository_url);
1530 resolve_option(&mut resolver, &mut dist_snapshot_repository_layout);
1531 resolve_option(&mut resolver, &mut dist_site_id);
1532 resolve_option(&mut resolver, &mut dist_site_name);
1533 resolve_option(&mut resolver, &mut dist_site_url);
1534 resolve_option(&mut resolver, &mut parent_group_id);
1535 resolve_option(&mut resolver, &mut parent_artifact_id);
1536 resolve_option(&mut resolver, &mut parent_version);
1537 resolve_option(&mut resolver, &mut parent_relative_path);
1538 resolve_option(&mut resolver, &mut project_name);
1539 resolve_option(&mut resolver, &mut project_description);
1540 resolve_option(&mut resolver, &mut project_packaging);
1541 resolve_option(&mut resolver, &mut project_classifier);
1542 resolve_vec(&mut resolver, &mut modules);
1543 resolve_maps(&mut resolver, &mut repositories);
1544 resolve_maps(&mut resolver, &mut plugin_repositories);
1545 resolve_maps(&mut resolver, &mut mailing_lists);
1546 for comment in &mut xml_license_comments {
1547 *comment = resolver.resolve_text(comment, 0);
1548 }
1549 for dependency in &mut dependency_management_entries {
1550 resolve_dependency_data(&mut resolver, dependency);
1551 }
1552 resolve_dependency_data(&mut resolver, &mut relocation);
1553 for license in &mut licenses {
1554 resolve_license_entry(&mut resolver, license);
1555 }
1556 for comment in xml_license_comments {
1557 if !comment.trim().is_empty() {
1558 licenses.push(MavenLicenseEntry {
1559 comments: Some(comment),
1560 ..Default::default()
1561 });
1562 }
1563 }
1564
1565 for (dependency, coords) in package_data
1566 .dependencies
1567 .iter_mut()
1568 .zip(dependency_data.iter_mut())
1569 {
1570 resolve_dependency_data(&mut resolver, coords);
1571 dependency.scope = coords.scope.clone();
1572 dependency.extracted_requirement = coords.version.clone();
1573 dependency.extra_data = dependency_extra_data(coords);
1574 dependency.is_optional = Some(parse_maven_bool(coords.optional.as_deref()));
1575
1576 match dependency.scope.as_deref() {
1577 Some("test") | Some("provided") => {
1578 dependency.is_runtime = Some(false);
1579 dependency.is_optional = Some(true);
1580 }
1581 Some(_) => {
1582 dependency.is_runtime = Some(true);
1583 }
1584 None => {
1585 dependency.is_runtime = None;
1586 }
1587 }
1588
1589 if let Some(version) = &coords.version {
1590 dependency.is_pinned = Some(is_maven_version_pinned(version));
1591 }
1592
1593 if let (Some(group_id), Some(artifact_id)) = (&coords.group_id, &coords.artifact_id) {
1594 dependency.purl = Some(build_maven_purl(
1595 group_id,
1596 artifact_id,
1597 coords.version.as_deref(),
1598 coords.classifier.as_deref(),
1599 coords.type_.as_deref(),
1600 ));
1601 }
1602 }
1603
1604 if package_data.namespace.is_none() {
1605 package_data.namespace = parent_group_id.clone();
1606 }
1607 if package_data.version.is_none() {
1608 package_data.version = parent_version.clone();
1609 }
1610
1611 package_data.qualifiers =
1612 build_maven_qualifiers(project_classifier.as_deref(), project_packaging.as_deref());
1613
1614 package_data.description = match (
1615 project_name.as_deref().filter(|value| !value.is_empty()),
1616 project_description
1617 .as_deref()
1618 .filter(|value| !value.is_empty()),
1619 ) {
1620 (Some(name), Some(description)) if name == description => Some(name.to_string()),
1621 (Some(name), Some(description)) => Some(format!("{name}\n{description}")),
1622 (Some(name), None) => Some(name.to_string()),
1623 (None, Some(description)) => Some(description.to_string()),
1624 (None, None) => None,
1625 };
1626
1627 if path.to_string_lossy().contains("META-INF/maven/") {
1628 let path_str = path.to_string_lossy();
1629 if let Some(meta_inf_pos) = path_str.find("META-INF/maven/") {
1630 let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
1631 let parts: Vec<&str> = after_maven.split('/').collect();
1632 if parts.len() >= 2 {
1633 if package_data.namespace.is_none() {
1634 package_data.namespace = Some(parts[0].to_string());
1635 }
1636 if package_data.name.is_none() {
1637 package_data.name = Some(parts[1].to_string());
1638 }
1639 }
1640 }
1641 }
1642
1643 if let (Some(group_id), Some(artifact_id), Some(version)) = (
1645 &package_data.namespace,
1646 &package_data.name,
1647 &package_data.version,
1648 ) {
1649 package_data.purl = Some(build_maven_purl(
1650 group_id,
1651 artifact_id,
1652 Some(version),
1653 project_classifier.as_deref(),
1654 project_packaging.as_deref(),
1655 ));
1656 if project_classifier.is_none() {
1657 package_data
1658 .source_packages
1659 .push(build_maven_source_package(group_id, artifact_id, version));
1660 }
1661 }
1662
1663 if let (Some(group_id), Some(artifact_id)) = (&package_data.namespace, &package_data.name) {
1664 package_data.repository_homepage_url = build_maven_url(
1665 &package_data.namespace,
1666 &package_data.name,
1667 &package_data.version,
1668 None,
1669 );
1670
1671 package_data.repository_download_url = package_data.version.as_ref().map(|ver| {
1672 build_maven_download_url(
1673 group_id,
1674 artifact_id,
1675 ver,
1676 project_classifier.as_deref(),
1677 project_packaging.as_deref(),
1678 )
1679 });
1680
1681 if let Some(ver) = &package_data.version {
1682 let pom_filename = format!("{}-{}.pom", artifact_id, ver);
1683 package_data.api_data_url = build_maven_url(
1684 &package_data.namespace,
1685 &package_data.name,
1686 &package_data.version,
1687 Some(&pom_filename),
1688 );
1689 }
1690 }
1691
1692 package_data.vcs_url = scm_connection
1693 .or_else(|| scm_developer_connection.clone())
1694 .or_else(|| scm_url.clone());
1695
1696 if let Some(url) = &scm_url {
1698 package_data.code_view_url = Some(url.clone());
1699 }
1700
1701 if let Some(url) = &issue_management_url {
1703 package_data.bug_tracking_url = Some(url.clone());
1704 }
1705
1706 if let Some(url) = &dist_download_url {
1708 package_data.download_url = Some(url.clone());
1709 }
1710
1711 if organization_name.is_some() || organization_url.is_some() {
1712 package_data.parties.push(Party {
1713 r#type: Some("organization".to_string()),
1714 role: Some("owner".to_string()),
1715 name: organization_name.clone(),
1716 email: None,
1717 url: organization_url.clone(),
1718 organization: None,
1719 organization_url: None,
1720 timezone: None,
1721 });
1722 }
1723
1724 for dependency in &dependency_management_entries {
1725 let fallback_scope = if dependency.scope.as_deref() == Some("import") {
1726 Some("import")
1727 } else {
1728 Some("dependencymanagement")
1729 };
1730
1731 if let Some(converted) =
1732 maven_dependency_to_dependency(dependency, fallback_scope, true)
1733 {
1734 package_data.dependencies.push(converted);
1735 }
1736 }
1737
1738 if (relocation.group_id.is_some()
1739 || relocation.artifact_id.is_some()
1740 || relocation.version.is_some())
1741 && let Some(converted) =
1742 maven_dependency_to_dependency(&relocation, Some("relocation"), true)
1743 {
1744 package_data.dependencies.push(converted);
1745 }
1746
1747 if inception_year.is_some()
1748 || organization_name.is_some()
1749 || organization_url.is_some()
1750 || scm_tag.is_some()
1751 || scm_developer_connection.is_some()
1752 || issue_management_system.is_some()
1753 || ci_management_system.is_some()
1754 || ci_management_url.is_some()
1755 || dist_download_url.is_some()
1756 || dist_repository_id.is_some()
1757 || dist_snapshot_repository_id.is_some()
1758 || dist_site_id.is_some()
1759 || !repositories.is_empty()
1760 || !plugin_repositories.is_empty()
1761 || !modules.is_empty()
1762 || !mailing_lists.is_empty()
1763 || !dependency_management_entries.is_empty()
1764 || parent_group_id.is_some()
1765 || relocation.group_id.is_some()
1766 || relocation.artifact_id.is_some()
1767 || relocation.version.is_some()
1768 || relocation.message.is_some()
1769 {
1770 let mut extra_data = package_data.extra_data.take().unwrap_or_default();
1771 if let Some(year) = inception_year {
1772 extra_data.insert(
1773 "inception_year".to_string(),
1774 serde_json::Value::String(year),
1775 );
1776 }
1777 if let Some(name) = organization_name {
1778 extra_data.insert(
1779 "organization_name".to_string(),
1780 serde_json::Value::String(name),
1781 );
1782 }
1783 if let Some(url) = organization_url {
1784 extra_data.insert(
1785 "organization_url".to_string(),
1786 serde_json::Value::String(url),
1787 );
1788 }
1789 if let Some(tag) = scm_tag {
1790 extra_data.insert("scm_tag".to_string(), serde_json::Value::String(tag));
1791 }
1792 if let Some(dev_conn) = scm_developer_connection {
1793 extra_data.insert(
1794 "scm_developer_connection".to_string(),
1795 serde_json::Value::String(dev_conn),
1796 );
1797 }
1798 if let Some(system) = issue_management_system {
1799 extra_data.insert(
1800 "issue_tracking_system".to_string(),
1801 serde_json::Value::String(system),
1802 );
1803 }
1804 if let Some(system) = ci_management_system {
1805 extra_data.insert("ci_system".to_string(), serde_json::Value::String(system));
1806 }
1807 if let Some(url) = ci_management_url {
1808 extra_data.insert("ci_url".to_string(), serde_json::Value::String(url));
1809 }
1810
1811 if let Some(url) = dist_download_url {
1813 extra_data.insert(
1814 "distribution_download_url".to_string(),
1815 serde_json::Value::String(url),
1816 );
1817 }
1818
1819 if dist_repository_id.is_some()
1821 || dist_repository_name.is_some()
1822 || dist_repository_url.is_some()
1823 || dist_repository_layout.is_some()
1824 {
1825 let mut repo = serde_json::Map::new();
1826 if let Some(id) = dist_repository_id {
1827 repo.insert("id".to_string(), serde_json::Value::String(id));
1828 }
1829 if let Some(name) = dist_repository_name {
1830 repo.insert("name".to_string(), serde_json::Value::String(name));
1831 }
1832 if let Some(url) = dist_repository_url {
1833 repo.insert("url".to_string(), serde_json::Value::String(url));
1834 }
1835 if let Some(layout) = dist_repository_layout {
1836 repo.insert("layout".to_string(), serde_json::Value::String(layout));
1837 }
1838 extra_data.insert(
1839 "distribution_repository".to_string(),
1840 serde_json::Value::Object(repo),
1841 );
1842 }
1843
1844 if dist_snapshot_repository_id.is_some()
1846 || dist_snapshot_repository_name.is_some()
1847 || dist_snapshot_repository_url.is_some()
1848 || dist_snapshot_repository_layout.is_some()
1849 {
1850 let mut repo = serde_json::Map::new();
1851 if let Some(id) = dist_snapshot_repository_id {
1852 repo.insert("id".to_string(), serde_json::Value::String(id));
1853 }
1854 if let Some(name) = dist_snapshot_repository_name {
1855 repo.insert("name".to_string(), serde_json::Value::String(name));
1856 }
1857 if let Some(url) = dist_snapshot_repository_url {
1858 repo.insert("url".to_string(), serde_json::Value::String(url));
1859 }
1860 if let Some(layout) = dist_snapshot_repository_layout {
1861 repo.insert("layout".to_string(), serde_json::Value::String(layout));
1862 }
1863 extra_data.insert(
1864 "distribution_snapshot_repository".to_string(),
1865 serde_json::Value::Object(repo),
1866 );
1867 }
1868
1869 if dist_site_id.is_some() || dist_site_name.is_some() || dist_site_url.is_some() {
1871 let mut site = serde_json::Map::new();
1872 if let Some(id) = dist_site_id {
1873 site.insert("id".to_string(), serde_json::Value::String(id));
1874 }
1875 if let Some(name) = dist_site_name {
1876 site.insert("name".to_string(), serde_json::Value::String(name));
1877 }
1878 if let Some(url) = dist_site_url {
1879 site.insert("url".to_string(), serde_json::Value::String(url));
1880 }
1881 extra_data.insert(
1882 "distribution_site".to_string(),
1883 serde_json::Value::Object(site),
1884 );
1885 }
1886
1887 if !repositories.is_empty() {
1888 extra_data.insert(
1889 "repositories".to_string(),
1890 serde_json::Value::Array(
1891 repositories
1892 .into_iter()
1893 .map(serde_json::Value::Object)
1894 .collect(),
1895 ),
1896 );
1897 }
1898
1899 if !plugin_repositories.is_empty() {
1900 extra_data.insert(
1901 "plugin_repositories".to_string(),
1902 serde_json::Value::Array(
1903 plugin_repositories
1904 .into_iter()
1905 .map(serde_json::Value::Object)
1906 .collect(),
1907 ),
1908 );
1909 }
1910
1911 if !modules.is_empty() {
1912 extra_data.insert(
1913 "modules".to_string(),
1914 serde_json::Value::Array(
1915 modules.into_iter().map(serde_json::Value::String).collect(),
1916 ),
1917 );
1918 }
1919
1920 if !mailing_lists.is_empty() {
1921 extra_data.insert(
1922 "mailing_lists".to_string(),
1923 serde_json::Value::Array(
1924 mailing_lists
1925 .into_iter()
1926 .map(serde_json::Value::Object)
1927 .collect(),
1928 ),
1929 );
1930 }
1931
1932 if !dependency_management_entries.is_empty() {
1933 extra_data.insert(
1934 "dependency_management".to_string(),
1935 serde_json::Value::Array(
1936 dependency_management_entries
1937 .into_iter()
1938 .map(|dependency| {
1939 serde_json::Value::Object(dependency_management_entry_to_value(
1940 &dependency,
1941 ))
1942 })
1943 .collect(),
1944 ),
1945 );
1946 }
1947
1948 if relocation.group_id.is_some()
1949 || relocation.artifact_id.is_some()
1950 || relocation.version.is_some()
1951 || relocation.message.is_some()
1952 {
1953 extra_data.insert(
1954 "relocation".to_string(),
1955 serde_json::Value::Object(dependency_management_entry_to_value(&relocation)),
1956 );
1957 }
1958
1959 if parent_group_id.is_some()
1960 || parent_artifact_id.is_some()
1961 || parent_version.is_some()
1962 || parent_relative_path.is_some()
1963 {
1964 let mut parent_obj = serde_json::Map::new();
1965 if let Some(group_id) = parent_group_id {
1966 parent_obj.insert("groupId".to_string(), serde_json::Value::String(group_id));
1967 }
1968 if let Some(artifact_id) = parent_artifact_id {
1969 parent_obj.insert(
1970 "artifactId".to_string(),
1971 serde_json::Value::String(artifact_id),
1972 );
1973 }
1974 if let Some(version) = parent_version {
1975 parent_obj.insert("version".to_string(), serde_json::Value::String(version));
1976 }
1977 if let Some(relative_path) = parent_relative_path {
1978 parent_obj.insert(
1979 "relativePath".to_string(),
1980 serde_json::Value::String(relative_path),
1981 );
1982 }
1983 extra_data.insert("parent".to_string(), serde_json::Value::Object(parent_obj));
1984 }
1985
1986 package_data.extra_data = Some(extra_data);
1987 }
1988
1989 package_data.extracted_license_statement =
1990 build_license_statement(&licenses).map(truncate_field);
1991 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1992 build_maven_declared_license_data(
1993 &licenses,
1994 package_data.extracted_license_statement.as_deref(),
1995 );
1996 package_data.declared_license_expression = declared_license_expression;
1997 package_data.declared_license_expression_spdx = declared_license_expression_spdx;
1998 package_data.license_detections = license_detections;
1999
2000 package_data.namespace = package_data.namespace.map(truncate_field);
2001 package_data.name = package_data.name.map(truncate_field);
2002 package_data.version = package_data.version.map(truncate_field);
2003 package_data.description = package_data.description.map(truncate_field);
2004 package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2005 package_data.vcs_url = package_data.vcs_url.map(truncate_field);
2006 package_data.purl = package_data.purl.map(truncate_field);
2007 package_data.code_view_url = package_data.code_view_url.map(truncate_field);
2008 package_data.bug_tracking_url = package_data.bug_tracking_url.map(truncate_field);
2009 package_data.download_url = package_data.download_url.map(truncate_field);
2010 package_data.repository_homepage_url =
2011 package_data.repository_homepage_url.map(truncate_field);
2012 package_data.repository_download_url =
2013 package_data.repository_download_url.map(truncate_field);
2014 package_data.api_data_url = package_data.api_data_url.map(truncate_field);
2015 for dep in &mut package_data.dependencies {
2016 dep.purl = dep.purl.take().map(truncate_field);
2017 dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2018 }
2019
2020 vec![package_data]
2021 }
2022
2023 fn is_match(path: &Path) -> bool {
2024 if let Some(filename) = path.file_name().and_then(|name| name.to_str()) {
2025 filename == "pom.xml"
2026 || filename.ends_with(".pom.xml")
2027 || filename.ends_with("-pom.xml")
2028 || filename == "pom.properties"
2029 || filename == "MANIFEST.MF"
2030 || filename.ends_with(".pom")
2031 } else {
2032 false
2033 }
2034 }
2035}
2036
2037fn build_maven_url(
2038 group_id: &Option<String>,
2039 artifact_id: &Option<String>,
2040 version: &Option<String>,
2041 filename: Option<&str>,
2042) -> Option<String> {
2043 const BASE_URL: &str = "https://repo1.maven.org/maven2";
2044
2045 let group_id = group_id.as_ref()?;
2046 let artifact_id = artifact_id.as_ref()?;
2047
2048 let group_path = group_id.replace('.', "/");
2049 let filename_str = filename.unwrap_or("");
2050
2051 let url = if let Some(ver) = version {
2052 format!(
2053 "{}/{}/{}/{}/{}",
2054 BASE_URL, group_path, artifact_id, ver, filename_str
2055 )
2056 } else {
2057 format!(
2058 "{}/{}/{}/{}",
2059 BASE_URL, group_path, artifact_id, filename_str
2060 )
2061 };
2062
2063 Some(url)
2064}
2065
2066fn build_maven_declared_license_data(
2067 licenses: &[MavenLicenseEntry],
2068 matched_text: Option<&str>,
2069) -> (
2070 Option<String>,
2071 Option<String>,
2072 Vec<crate::models::LicenseDetection>,
2073) {
2074 let normalized: Vec<_> = licenses
2075 .iter()
2076 .filter_map(|license| license.name.as_deref())
2077 .filter_map(normalize_maven_license_name)
2078 .collect();
2079
2080 if normalized.is_empty() {
2081 return empty_declared_license_data();
2082 }
2083
2084 let Some(combined) = combine_normalized_licenses(normalized, " OR ") else {
2085 return empty_declared_license_data();
2086 };
2087
2088 build_declared_license_data(
2089 combined,
2090 DeclaredLicenseMatchMetadata::single_line(matched_text.unwrap_or_default()),
2091 )
2092}
2093
2094fn normalize_maven_license_name(name: &str) -> Option<NormalizedDeclaredLicense> {
2095 match name.trim() {
2096 "Public Domain" | "public domain" => Some(NormalizedDeclaredLicense::new(
2097 "public-domain",
2098 "LicenseRef-provenant-public-domain",
2099 )),
2100 other => normalize_declared_license_key(other),
2101 }
2102}
2103
2104fn parse_pom_properties(path: &Path) -> PackageData {
2106 let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2107 Ok(content) => content,
2108 Err(e) => {
2109 warn!("Failed to read pom.properties at {:?}: {}", path, e);
2110 return PackageData {
2111 package_type: Some(PackageType::Maven),
2112 primary_language: Some("Java".to_string()),
2113 datasource_id: Some(DatasourceId::MavenPomProperties),
2114 ..Default::default()
2115 };
2116 }
2117 };
2118
2119 let mut package_data = default_package_data(DatasourceId::MavenPomProperties);
2120 package_data.package_type = Some(PackageType::Maven);
2121 package_data.primary_language = Some("Java".to_string());
2122 package_data.datasource_id = Some(DatasourceId::MavenPomProperties);
2123
2124 let mut group_id: Option<String> = None;
2125 let mut artifact_id: Option<String> = None;
2126 let mut version: Option<String> = None;
2127
2128 let mut continuation = String::new();
2130
2131 for line in content.lines() {
2132 let current_line = if continuation.is_empty() {
2133 line.to_string()
2134 } else {
2135 format!("{}{}", continuation, line)
2136 };
2137 continuation.clear();
2138
2139 if current_line.ends_with('\\') {
2141 continuation = current_line[..current_line.len() - 1].to_string();
2142 continue;
2143 }
2144
2145 let trimmed = current_line.trim();
2147 if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
2148 continue;
2149 }
2150
2151 if let Some(eq_pos) = current_line.find('=') {
2153 let key = current_line[..eq_pos].trim();
2154 let value = current_line[eq_pos + 1..].trim();
2155
2156 match key {
2157 "groupId" => group_id = Some(value.to_string()),
2158 "artifactId" => artifact_id = Some(value.to_string()),
2159 "version" => version = Some(value.to_string()),
2160 _ => {}
2161 }
2162 }
2163 }
2164
2165 package_data.namespace = group_id.map(truncate_field);
2166 package_data.name = artifact_id.map(truncate_field);
2167 package_data.version = version.map(truncate_field);
2168
2169 if let (Some(group_id), Some(artifact_id), Some(version)) = (
2171 &package_data.namespace,
2172 &package_data.name,
2173 &package_data.version,
2174 ) {
2175 package_data.purl = Some(truncate_field(format!(
2176 "pkg:maven/{}/{}@{}",
2177 group_id, artifact_id, version
2178 )));
2179 }
2180
2181 package_data
2182}
2183
2184fn parse_manifest_mf(path: &Path) -> PackageData {
2191 let content = match read_file_to_string(path, None).map_err(|e| e.to_string()) {
2192 Ok(content) => content,
2193 Err(e) => {
2194 warn!("Failed to read MANIFEST.MF at {:?}: {}", path, e);
2195 return default_package_data(DatasourceId::JavaJarManifest);
2196 }
2197 };
2198
2199 let mut package_data = default_package_data(DatasourceId::JavaJarManifest);
2200
2201 let mut headers: Vec<(String, String)> = Vec::new();
2203 let mut current_key: Option<String> = None;
2204 let mut current_value = String::new();
2205
2206 for line in content.lines() {
2207 if line.starts_with(' ') || line.starts_with('\t') {
2208 current_value.push_str(line.trim());
2210 } else if let Some(colon_pos) = line.find(':') {
2211 if let Some(key) = current_key.take() {
2213 headers.push((key, current_value.trim().to_string()));
2214 current_value.clear();
2215 }
2216
2217 let key = line[..colon_pos].trim().to_string();
2219 let value = line[colon_pos + 1..].trim().to_string();
2220 current_key = Some(key);
2221 current_value = value;
2222 }
2223 }
2224
2225 if let Some(key) = current_key {
2227 headers.push((key, current_value.trim().to_string()));
2228 }
2229
2230 let headers_map: HashMap<String, String> = headers.iter().cloned().collect();
2232
2233 let bundle_symbolic_name = headers_map.get("Bundle-SymbolicName");
2235 let is_osgi = bundle_symbolic_name.is_some();
2236
2237 if is_osgi {
2238 package_data.package_type = Some(PackageType::Osgi);
2240 package_data.datasource_id = Some(DatasourceId::JavaOsgiManifest);
2241
2242 if let Some(bsn) = bundle_symbolic_name {
2245 let name = if let Some(semicolon_pos) = bsn.find(';') {
2246 bsn[..semicolon_pos].trim().to_string()
2247 } else {
2248 bsn.clone()
2249 };
2250 package_data.name = Some(name);
2251 }
2252
2253 package_data.version = headers_map.get("Bundle-Version").cloned();
2255
2256 if let Some(desc) = headers_map.get("Bundle-Description") {
2258 package_data.description = Some(desc.clone());
2259 } else if let Some(name) = headers_map.get("Bundle-Name") {
2260 package_data.description = Some(name.clone());
2261 }
2262
2263 if let Some(vendor) = headers_map.get("Bundle-Vendor") {
2265 package_data.parties.push(Party {
2266 r#type: Some("organization".to_string()),
2267 role: Some("vendor".to_string()),
2268 name: Some(vendor.clone()),
2269 email: None,
2270 url: None,
2271 organization: None,
2272 organization_url: None,
2273 timezone: None,
2274 });
2275 }
2276
2277 package_data.homepage_url = headers_map.get("Bundle-DocURL").cloned();
2279
2280 package_data.extracted_license_statement = headers_map.get("Bundle-License").cloned();
2282
2283 if let Some(import_pkg) = headers_map.get("Import-Package") {
2285 let deps = parse_osgi_package_list(import_pkg, "import");
2286 package_data.dependencies.extend(deps);
2287 }
2288
2289 if let Some(require_bundle) = headers_map.get("Require-Bundle") {
2291 let deps = parse_osgi_bundle_list(require_bundle, "require-bundle");
2292 package_data.dependencies.extend(deps);
2293 }
2294
2295 if let Some(export_pkg) = headers_map.get("Export-Package") {
2297 let mut extra_data = package_data.extra_data.take().unwrap_or_default();
2298 extra_data.insert(
2299 "export_packages".to_string(),
2300 serde_json::Value::String(export_pkg.clone()),
2301 );
2302 package_data.extra_data = Some(extra_data);
2303 }
2304
2305 if let (Some(name), Some(version)) = (&package_data.name, &package_data.version) {
2307 package_data.purl = Some(format!("pkg:osgi/{}@{}", name, version));
2308 }
2309 } else {
2310 package_data.package_type = Some(PackageType::Maven);
2312 package_data.datasource_id = Some(DatasourceId::JavaJarManifest);
2313
2314 let mut name: Option<String> = None;
2316 let mut version: Option<String> = None;
2317 let mut vendor: Option<String> = None;
2318
2319 for (key, value) in &headers {
2320 match key.as_str() {
2321 "Bundle-Name" if name.is_none() => {
2322 name = Some(value.clone());
2323 }
2324 "Implementation-Title" if name.is_none() => {
2325 name = Some(value.clone());
2326 }
2327 "Bundle-Version" if version.is_none() => {
2328 version = Some(value.clone());
2329 }
2330 "Implementation-Version" if version.is_none() => {
2331 version = Some(value.clone());
2332 }
2333 "Implementation-Vendor" | "Bundle-Vendor" if vendor.is_none() => {
2334 vendor = Some(value.clone());
2335 }
2336 _ => {}
2337 }
2338 }
2339
2340 package_data.name = name;
2341 package_data.version = version;
2342
2343 if let Some(vendor_name) = vendor {
2345 package_data.parties.push(Party {
2346 r#type: Some("organization".to_string()),
2347 role: Some("vendor".to_string()),
2348 name: Some(vendor_name),
2349 email: None,
2350 url: None,
2351 organization: None,
2352 organization_url: None,
2353 timezone: None,
2354 });
2355 }
2356
2357 if let Some(path_str) = path.to_str()
2359 && let Some(meta_inf_pos) = path_str.find("META-INF/maven/")
2360 {
2361 let after_maven = &path_str[meta_inf_pos + "META-INF/maven/".len()..];
2362 let parts: Vec<&str> = after_maven.split('/').collect();
2363 if parts.len() >= 2 {
2364 package_data.namespace = Some(parts[0].to_string());
2365 }
2366 }
2367
2368 if let (Some(group_id), Some(artifact_id), Some(version)) = (
2370 &package_data.namespace,
2371 &package_data.name,
2372 &package_data.version,
2373 ) {
2374 package_data.purl = Some(format!(
2375 "pkg:maven/{}/{}@{}",
2376 group_id, artifact_id, version
2377 ));
2378 } else if package_data.name.is_none() && package_data.version.is_none() {
2379 package_data.package_type = Some(PackageType::Jar);
2384 }
2385 }
2386
2387 package_data.name = package_data.name.map(truncate_field);
2388 package_data.version = package_data.version.map(truncate_field);
2389 package_data.namespace = package_data.namespace.map(truncate_field);
2390 package_data.description = package_data.description.map(truncate_field);
2391 package_data.homepage_url = package_data.homepage_url.map(truncate_field);
2392 package_data.extracted_license_statement =
2393 package_data.extracted_license_statement.map(truncate_field);
2394 package_data.purl = package_data.purl.map(truncate_field);
2395 for dep in &mut package_data.dependencies {
2396 dep.purl = dep.purl.take().map(truncate_field);
2397 dep.extracted_requirement = dep.extracted_requirement.take().map(truncate_field);
2398 }
2399
2400 package_data
2401}
2402
2403pub(crate) fn parse_osgi_package_list(package_list: &str, scope: &str) -> Vec<Dependency> {
2408 let mut dependencies = Vec::new();
2409
2410 for package_entry in split_osgi_list(package_list)
2412 .into_iter()
2413 .take(MAX_ITERATION_COUNT)
2414 {
2415 let package_entry = package_entry.trim();
2416 if package_entry.is_empty() {
2417 continue;
2418 }
2419
2420 let package_name = if let Some(semicolon_pos) = package_entry.find(';') {
2422 package_entry[..semicolon_pos].trim()
2423 } else {
2424 package_entry
2425 };
2426
2427 if package_name.is_empty() {
2428 continue;
2429 }
2430
2431 let version_requirement = extract_osgi_version(package_entry);
2433 let is_optional = package_entry.contains("resolution:=optional");
2434
2435 dependencies.push(Dependency {
2436 purl: Some(format!("pkg:osgi/{}", package_name)),
2437 extracted_requirement: version_requirement,
2438 scope: Some(scope.to_string()),
2439 is_runtime: Some(true),
2440 is_optional: Some(is_optional),
2441 is_pinned: None,
2442 is_direct: Some(true),
2443 resolved_package: None,
2444 extra_data: None,
2445 });
2446 }
2447
2448 dependencies
2449}
2450
2451pub(crate) fn parse_osgi_bundle_list(bundle_list: &str, scope: &str) -> Vec<Dependency> {
2456 let mut dependencies = Vec::new();
2457
2458 for bundle_entry in split_osgi_list(bundle_list)
2459 .into_iter()
2460 .take(MAX_ITERATION_COUNT)
2461 {
2462 let bundle_entry = bundle_entry.trim();
2463 if bundle_entry.is_empty() {
2464 continue;
2465 }
2466
2467 let bundle_name = if let Some(semicolon_pos) = bundle_entry.find(';') {
2469 bundle_entry[..semicolon_pos].trim()
2470 } else {
2471 bundle_entry
2472 };
2473
2474 if bundle_name.is_empty() {
2475 continue;
2476 }
2477
2478 let version_requirement = extract_osgi_bundle_version(bundle_entry);
2480
2481 let is_optional = bundle_entry.contains("resolution:=optional");
2483
2484 dependencies.push(Dependency {
2485 purl: Some(format!("pkg:osgi/{}", bundle_name)),
2486 extracted_requirement: version_requirement,
2487 scope: Some(scope.to_string()),
2488 is_runtime: Some(!is_optional),
2489 is_optional: Some(is_optional),
2490 is_pinned: None,
2491 is_direct: Some(true),
2492 resolved_package: None,
2493 extra_data: None,
2494 });
2495 }
2496
2497 dependencies
2498}
2499
2500pub(crate) fn split_osgi_list(list: &str) -> Vec<String> {
2505 let mut result = Vec::new();
2506 let mut current = String::new();
2507 let mut in_quotes = false;
2508
2509 for ch in list.chars() {
2510 match ch {
2511 '"' => {
2512 in_quotes = !in_quotes;
2513 current.push(ch);
2514 }
2515 ',' if !in_quotes => {
2516 if !current.trim().is_empty() {
2517 result.push(current.trim().to_string());
2518 }
2519 current.clear();
2520 }
2521 _ => {
2522 current.push(ch);
2523 }
2524 }
2525 }
2526
2527 if !current.trim().is_empty() {
2528 result.push(current.trim().to_string());
2529 }
2530
2531 result
2532}
2533
2534fn extract_osgi_directive(entry: &str, directive: &str) -> Option<String> {
2535 let needle = format!("{}=", directive);
2536 let version_pos = entry.find(&needle)?;
2537 let after_value = &entry[version_pos + needle.len()..];
2538
2539 if let Some(stripped) = after_value.strip_prefix('"') {
2540 stripped.find('"').map(|end| stripped[..end].to_string())
2541 } else {
2542 let end = after_value.find(';').unwrap_or(after_value.len());
2543 Some(after_value[..end].trim().to_string())
2544 }
2545}
2546
2547pub(crate) fn extract_osgi_version(entry: &str) -> Option<String> {
2548 extract_osgi_directive(entry, "version")
2549}
2550
2551pub(crate) fn extract_osgi_bundle_version(entry: &str) -> Option<String> {
2552 extract_osgi_directive(entry, "bundle-version")
2553}
2554
2555fn default_package_data(datasource_id: DatasourceId) -> PackageData {
2556 PackageData {
2557 package_type: Some(PackageType::Maven),
2558 datasource_id: Some(datasource_id),
2559 ..Default::default()
2560 }
2561}
2562
2563#[cfg(test)]
2564mod tests {
2565 use super::*;
2566 use std::fs;
2567 use tempfile::TempDir;
2568
2569 #[test]
2570 fn test_organization_extraction() {
2571 let temp_dir = TempDir::new().unwrap();
2572 let pom_path = temp_dir.path().join("pom.xml");
2573
2574 let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2575<project>
2576 <modelVersion>4.0.0</modelVersion>
2577 <groupId>com.example</groupId>
2578 <artifactId>my-app</artifactId>
2579 <version>1.0.0</version>
2580 <organization>
2581 <name>Example Corporation</name>
2582 <url>https://example.com</url>
2583 </organization>
2584</project>"#;
2585
2586 fs::write(&pom_path, pom_content).unwrap();
2587
2588 let package_data = MavenParser::extract_first_package(&pom_path);
2589
2590 assert_eq!(package_data.name, Some("my-app".to_string()));
2591 assert_eq!(package_data.namespace, Some("com.example".to_string()));
2592 assert_eq!(package_data.version, Some("1.0.0".to_string()));
2593
2594 let extra_data = package_data.extra_data.unwrap();
2595 assert_eq!(
2596 extra_data.get("organization_name"),
2597 Some(&serde_json::Value::String(
2598 "Example Corporation".to_string()
2599 ))
2600 );
2601 assert_eq!(
2602 extra_data.get("organization_url"),
2603 Some(&serde_json::Value::String(
2604 "https://example.com".to_string()
2605 ))
2606 );
2607 }
2608
2609 #[test]
2610 fn test_scm_metadata_extraction() {
2611 let temp_dir = TempDir::new().unwrap();
2612 let pom_path = temp_dir.path().join("pom.xml");
2613
2614 let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2615<project xmlns="http://maven.apache.org/POM/4.0.0"
2616 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2617 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2618 <modelVersion>4.0.0</modelVersion>
2619 <groupId>org.springframework.boot</groupId>
2620 <artifactId>spring-boot-starter-web</artifactId>
2621 <version>3.0.0</version>
2622 <scm>
2623 <connection>scm:git:https://github.com/spring-projects/spring-boot.git</connection>
2624 <developerConnection>scm:git:git@github.com:spring-projects/spring-boot.git</developerConnection>
2625 <url>https://github.com/spring-projects/spring-boot</url>
2626 <tag>v3.0.0</tag>
2627 </scm>
2628</project>"#;
2629
2630 fs::write(&pom_path, pom_content).unwrap();
2631
2632 let package_data = MavenParser::extract_first_package(&pom_path);
2633
2634 assert_eq!(
2635 package_data.name,
2636 Some("spring-boot-starter-web".to_string())
2637 );
2638 assert_eq!(
2639 package_data.namespace,
2640 Some("org.springframework.boot".to_string())
2641 );
2642 assert_eq!(package_data.version, Some("3.0.0".to_string()));
2643
2644 assert_eq!(
2645 package_data.code_view_url,
2646 Some("https://github.com/spring-projects/spring-boot".to_string())
2647 );
2648
2649 assert_eq!(
2651 package_data.vcs_url,
2652 Some("git+https://github.com/spring-projects/spring-boot.git".to_string())
2653 );
2654
2655 let extra_data = package_data.extra_data.unwrap();
2656 assert_eq!(
2657 extra_data.get("scm_tag"),
2658 Some(&serde_json::Value::String("v3.0.0".to_string()))
2659 );
2660 assert_eq!(
2662 extra_data.get("scm_developer_connection"),
2663 Some(&serde_json::Value::String(
2664 "git+git@github.com:spring-projects/spring-boot.git".to_string()
2665 ))
2666 );
2667 }
2668
2669 #[test]
2670 fn test_developers_and_contributors_extraction() {
2671 let temp_dir = TempDir::new().unwrap();
2672 let pom_path = temp_dir.path().join("pom.xml");
2673
2674 let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2675<project xmlns="http://maven.apache.org/POM/4.0.0"
2676 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2677 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2678 <modelVersion>4.0.0</modelVersion>
2679 <groupId>com.example</groupId>
2680 <artifactId>test-app</artifactId>
2681 <version>1.0.0</version>
2682 <developers>
2683 <developer>
2684 <id>jdoe</id>
2685 <name>John Doe</name>
2686 <email>john@example.com</email>
2687 <url>https://example.com/jdoe</url>
2688 <organization>Example Corp</organization>
2689 <organizationUrl>https://example.com</organizationUrl>
2690 <timezone>America/New_York</timezone>
2691 </developer>
2692 <developer>
2693 <name>Jane Smith</name>
2694 <email>jane@example.com</email>
2695 </developer>
2696 </developers>
2697 <contributors>
2698 <contributor>
2699 <name>Bob Wilson</name>
2700 <email>bob@example.com</email>
2701 <url>https://example.com/bob</url>
2702 </contributor>
2703 </contributors>
2704</project>"#;
2705
2706 fs::write(&pom_path, pom_content).unwrap();
2707
2708 let package_data = MavenParser::extract_first_package(&pom_path);
2709
2710 assert_eq!(package_data.name, Some("test-app".to_string()));
2711 assert_eq!(package_data.parties.len(), 3);
2712
2713 let dev1 = &package_data.parties[0];
2714 assert_eq!(dev1.r#type, Some("person".to_string()));
2715 assert_eq!(dev1.role, Some("developer".to_string()));
2716 assert_eq!(dev1.name, Some("John Doe".to_string()));
2717 assert_eq!(dev1.email, Some("john@example.com".to_string()));
2718 assert_eq!(dev1.url, Some("https://example.com/jdoe".to_string()));
2719 assert_eq!(dev1.organization, Some("Example Corp".to_string()));
2720 assert_eq!(
2721 dev1.organization_url,
2722 Some("https://example.com".to_string())
2723 );
2724 assert_eq!(dev1.timezone, Some("America/New_York".to_string()));
2725
2726 let dev2 = &package_data.parties[1];
2727 assert_eq!(dev2.r#type, Some("person".to_string()));
2728 assert_eq!(dev2.role, Some("developer".to_string()));
2729 assert_eq!(dev2.name, Some("Jane Smith".to_string()));
2730 assert_eq!(dev2.email, Some("jane@example.com".to_string()));
2731
2732 let contrib = &package_data.parties[2];
2733 assert_eq!(contrib.r#type, Some("person".to_string()));
2734 assert_eq!(contrib.role, Some("contributor".to_string()));
2735 assert_eq!(contrib.name, Some("Bob Wilson".to_string()));
2736 assert_eq!(contrib.email, Some("bob@example.com".to_string()));
2737 assert_eq!(contrib.url, Some("https://example.com/bob".to_string()));
2738 }
2739
2740 #[test]
2741 fn test_issue_management_extraction() {
2742 let temp_dir = TempDir::new().unwrap();
2743 let pom_path = temp_dir.path().join("pom.xml");
2744
2745 let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2746<project xmlns="http://maven.apache.org/POM/4.0.0"
2747 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2748 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2749 <modelVersion>4.0.0</modelVersion>
2750 <groupId>com.example</groupId>
2751 <artifactId>test-app</artifactId>
2752 <version>1.0.0</version>
2753 <issueManagement>
2754 <system>GitHub</system>
2755 <url>https://github.com/example/test-app/issues</url>
2756 </issueManagement>
2757</project>"#;
2758
2759 fs::write(&pom_path, pom_content).unwrap();
2760
2761 let package_data = MavenParser::extract_first_package(&pom_path);
2762
2763 assert_eq!(package_data.name, Some("test-app".to_string()));
2764 assert_eq!(
2765 package_data.bug_tracking_url,
2766 Some("https://github.com/example/test-app/issues".to_string())
2767 );
2768
2769 let extra_data = package_data.extra_data.unwrap();
2770 assert_eq!(
2771 extra_data.get("issue_tracking_system"),
2772 Some(&serde_json::Value::String("GitHub".to_string()))
2773 );
2774 }
2775
2776 #[test]
2777 fn test_ci_management_extraction() {
2778 let temp_dir = TempDir::new().unwrap();
2779 let pom_path = temp_dir.path().join("pom.xml");
2780
2781 let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2782<project xmlns="http://maven.apache.org/POM/4.0.0"
2783 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2784 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2785 <modelVersion>4.0.0</modelVersion>
2786 <groupId>com.example</groupId>
2787 <artifactId>test-app</artifactId>
2788 <version>1.0.0</version>
2789 <ciManagement>
2790 <system>Jenkins</system>
2791 <url>https://ci.example.com/job/test-app</url>
2792 </ciManagement>
2793</project>"#;
2794
2795 fs::write(&pom_path, pom_content).unwrap();
2796
2797 let package_data = MavenParser::extract_first_package(&pom_path);
2798
2799 assert_eq!(package_data.name, Some("test-app".to_string()));
2800
2801 let extra_data = package_data.extra_data.unwrap();
2802 assert_eq!(
2803 extra_data.get("ci_system"),
2804 Some(&serde_json::Value::String("Jenkins".to_string()))
2805 );
2806 assert_eq!(
2807 extra_data.get("ci_url"),
2808 Some(&serde_json::Value::String(
2809 "https://ci.example.com/job/test-app".to_string()
2810 ))
2811 );
2812 }
2813
2814 #[test]
2815 fn test_distribution_management_extraction() {
2816 let temp_dir = TempDir::new().unwrap();
2817 let pom_path = temp_dir.path().join("pom.xml");
2818
2819 let pom_content = r#"<?xml version="1.0" encoding="UTF-8"?>
2820<project xmlns="http://maven.apache.org/POM/4.0.0"
2821 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2822 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2823 <modelVersion>4.0.0</modelVersion>
2824 <groupId>com.example</groupId>
2825 <artifactId>test-app</artifactId>
2826 <version>1.0.0</version>
2827 <distributionManagement>
2828 <downloadUrl>https://example.com/downloads</downloadUrl>
2829 <repository>
2830 <id>releases</id>
2831 <name>Release Repository</name>
2832 <url>https://repo.example.com/releases</url>
2833 <layout>default</layout>
2834 </repository>
2835 <snapshotRepository>
2836 <id>snapshots</id>
2837 <name>Snapshot Repository</name>
2838 <url>https://repo.example.com/snapshots</url>
2839 <layout>default</layout>
2840 </snapshotRepository>
2841 <site>
2842 <id>site-deploy</id>
2843 <name>Project Site</name>
2844 <url>https://example.com/site</url>
2845 </site>
2846 </distributionManagement>
2847</project>"#;
2848
2849 fs::write(&pom_path, pom_content).unwrap();
2850
2851 let package_data = MavenParser::extract_first_package(&pom_path);
2852
2853 assert_eq!(package_data.name, Some("test-app".to_string()));
2854 assert_eq!(
2855 package_data.download_url,
2856 Some("https://example.com/downloads".to_string())
2857 );
2858
2859 let extra_data = package_data.extra_data.unwrap();
2860
2861 assert_eq!(
2862 extra_data.get("distribution_download_url"),
2863 Some(&serde_json::Value::String(
2864 "https://example.com/downloads".to_string()
2865 ))
2866 );
2867
2868 let repo = extra_data
2869 .get("distribution_repository")
2870 .unwrap()
2871 .as_object()
2872 .unwrap();
2873 assert_eq!(
2874 repo.get("id"),
2875 Some(&serde_json::Value::String("releases".to_string()))
2876 );
2877 assert_eq!(
2878 repo.get("name"),
2879 Some(&serde_json::Value::String("Release Repository".to_string()))
2880 );
2881 assert_eq!(
2882 repo.get("url"),
2883 Some(&serde_json::Value::String(
2884 "https://repo.example.com/releases".to_string()
2885 ))
2886 );
2887 assert_eq!(
2888 repo.get("layout"),
2889 Some(&serde_json::Value::String("default".to_string()))
2890 );
2891
2892 let snapshot_repo = extra_data
2893 .get("distribution_snapshot_repository")
2894 .unwrap()
2895 .as_object()
2896 .unwrap();
2897 assert_eq!(
2898 snapshot_repo.get("id"),
2899 Some(&serde_json::Value::String("snapshots".to_string()))
2900 );
2901 assert_eq!(
2902 snapshot_repo.get("name"),
2903 Some(&serde_json::Value::String(
2904 "Snapshot Repository".to_string()
2905 ))
2906 );
2907 assert_eq!(
2908 snapshot_repo.get("url"),
2909 Some(&serde_json::Value::String(
2910 "https://repo.example.com/snapshots".to_string()
2911 ))
2912 );
2913 assert_eq!(
2914 snapshot_repo.get("layout"),
2915 Some(&serde_json::Value::String("default".to_string()))
2916 );
2917
2918 let site = extra_data
2919 .get("distribution_site")
2920 .unwrap()
2921 .as_object()
2922 .unwrap();
2923 assert_eq!(
2924 site.get("id"),
2925 Some(&serde_json::Value::String("site-deploy".to_string()))
2926 );
2927 assert_eq!(
2928 site.get("name"),
2929 Some(&serde_json::Value::String("Project Site".to_string()))
2930 );
2931 assert_eq!(
2932 site.get("url"),
2933 Some(&serde_json::Value::String(
2934 "https://example.com/site".to_string()
2935 ))
2936 );
2937 }
2938}
2939
2940crate::register_parser!(
2941 "Apache Maven POM",
2942 &[
2943 "**/*.pom",
2944 "**/pom.xml",
2945 "**/pom.properties",
2946 "**/META-INF/MANIFEST.MF"
2947 ],
2948 "maven",
2949 "Java",
2950 Some("https://maven.apache.org/pom.html"),
2951);