1use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use regex::Regex;
9use serde_json::Value as JsonValue;
10use yaml_serde::{Mapping, Value as YamlValue};
11
12use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
13use crate::parsers::utils::{
14 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
15};
16
17use super::PackageParser;
18use super::metadata::ParserMetadata;
19
20const PACKAGE_TYPE: PackageType = PackageType::Hackage;
21const PRIMARY_LANGUAGE: &str = "Haskell";
22
23pub struct HackageCabalParser;
24
25pub struct HackageCabalProjectParser;
26
27pub struct HackageStackYamlParser;
28
29impl PackageParser for HackageCabalParser {
30 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
31
32 fn metadata() -> Vec<ParserMetadata> {
33 vec![ParserMetadata {
34 description: "Hackage Cabal package manifest",
35 file_patterns: &["**/*.cabal"],
36 package_type: "hackage",
37 primary_language: "Haskell",
38 documentation_url: Some(
39 "https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html",
40 ),
41 }]
42 }
43
44 fn is_match(path: &Path) -> bool {
45 path.extension().is_some_and(|ext| ext == "cabal")
46 }
47
48 fn extract_packages(path: &Path) -> Vec<PackageData> {
49 let content = match read_file_to_string(path, None) {
50 Ok(content) => content,
51 Err(error) => {
52 warn!("Failed to read cabal file {:?}: {}", path, error);
53 return vec![default_package_data(DatasourceId::HackageCabal)];
54 }
55 };
56
57 vec![parse_cabal_manifest(&content)]
58 }
59}
60
61impl PackageParser for HackageCabalProjectParser {
62 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
63
64 fn metadata() -> Vec<ParserMetadata> {
65 vec![ParserMetadata {
66 description: "Hackage cabal.project workspace file",
67 file_patterns: &["**/cabal.project"],
68 package_type: "hackage",
69 primary_language: "Haskell",
70 documentation_url: Some(
71 "https://cabal.readthedocs.io/en/stable/cabal-project-description-file.html",
72 ),
73 }]
74 }
75
76 fn is_match(path: &Path) -> bool {
77 path.file_name().is_some_and(|name| name == "cabal.project")
78 }
79
80 fn extract_packages(path: &Path) -> Vec<PackageData> {
81 let content = match read_file_to_string(path, None) {
82 Ok(content) => content,
83 Err(error) => {
84 warn!("Failed to read cabal.project {:?}: {}", path, error);
85 return vec![default_package_data(DatasourceId::HackageCabalProject)];
86 }
87 };
88
89 vec![parse_cabal_project(&content)]
90 }
91}
92
93impl PackageParser for HackageStackYamlParser {
94 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
95
96 fn metadata() -> Vec<ParserMetadata> {
97 vec![ParserMetadata {
98 description: "Hackage Stack project manifest",
99 file_patterns: &["**/stack.yaml"],
100 package_type: "hackage",
101 primary_language: "Haskell",
102 documentation_url: Some("https://docs.haskellstack.org/en/stable/configure/yaml/"),
103 }]
104 }
105
106 fn is_match(path: &Path) -> bool {
107 path.file_name().is_some_and(|name| name == "stack.yaml")
108 }
109
110 fn extract_packages(path: &Path) -> Vec<PackageData> {
111 let content = match read_file_to_string(path, None) {
112 Ok(content) => content,
113 Err(error) => {
114 warn!("Failed to read stack.yaml {:?}: {}", path, error);
115 return vec![default_package_data(DatasourceId::HackageStackYaml)];
116 }
117 };
118
119 let yaml: YamlValue = match yaml_serde::from_str(&content) {
120 Ok(yaml) => yaml,
121 Err(error) => {
122 warn!("Failed to parse stack.yaml {:?}: {}", path, error);
123 return vec![default_package_data(DatasourceId::HackageStackYaml)];
124 }
125 };
126
127 vec![parse_stack_yaml(&yaml)]
128 }
129}
130
131#[derive(Clone, Debug, Default)]
132struct ComponentContext {
133 component_type: String,
134 component_name: Option<String>,
135}
136
137#[derive(Debug, Default)]
138struct CabalData {
139 name: Option<String>,
140 version: Option<String>,
141 synopsis: Option<String>,
142 description: Option<String>,
143 license: Option<String>,
144 homepage_url: Option<String>,
145 bug_tracking_url: Option<String>,
146 vcs_url: Option<String>,
147 authors: Vec<String>,
148 maintainers: Vec<String>,
149 category_keywords: Vec<String>,
150 explicit_keywords: Vec<String>,
151 dependencies: Vec<Dependency>,
152}
153
154fn default_package_data(datasource_id: DatasourceId) -> PackageData {
155 PackageData {
156 package_type: Some(PACKAGE_TYPE),
157 primary_language: Some(PRIMARY_LANGUAGE.to_string()),
158 datasource_id: Some(datasource_id),
159 ..Default::default()
160 }
161}
162
163fn parse_cabal_manifest(content: &str) -> PackageData {
164 let parsed = parse_cabal_data(content);
165 let keywords = merge_keywords(&parsed.category_keywords, &parsed.explicit_keywords);
166 let description =
167 combine_summary_and_description(&parsed.synopsis, &parsed.description).map(truncate_field);
168 let parties = build_parties(&parsed.authors, &parsed.maintainers);
169 let purl =
170 build_hackage_purl(parsed.name.as_deref(), parsed.version.as_deref()).map(truncate_field);
171 let repository_homepage_url = parsed
172 .name
173 .as_ref()
174 .map(|name| match parsed.version.as_ref() {
175 Some(version) => truncate_field(format!(
176 "https://hackage.haskell.org/package/{}-{}",
177 name, version
178 )),
179 None => truncate_field(format!("https://hackage.haskell.org/package/{}", name)),
180 });
181
182 PackageData {
183 package_type: Some(PACKAGE_TYPE),
184 namespace: None,
185 name: parsed.name,
186 version: parsed.version,
187 qualifiers: None,
188 subpath: None,
189 primary_language: Some(PRIMARY_LANGUAGE.to_string()),
190 description,
191 release_date: None,
192 parties,
193 keywords,
194 homepage_url: parsed.homepage_url,
195 download_url: None,
196 size: None,
197 sha1: None,
198 md5: None,
199 sha256: None,
200 sha512: None,
201 bug_tracking_url: parsed.bug_tracking_url,
202 code_view_url: None,
203 vcs_url: parsed.vcs_url,
204 copyright: None,
205 holder: None,
206 declared_license_expression: None,
207 declared_license_expression_spdx: None,
208 license_detections: Vec::new(),
209 other_license_expression: None,
210 other_license_expression_spdx: None,
211 other_license_detections: Vec::new(),
212 extracted_license_statement: parsed.license,
213 notice_text: None,
214 source_packages: Vec::new(),
215 file_references: Vec::new(),
216 is_private: false,
217 is_virtual: false,
218 extra_data: None,
219 dependencies: parsed.dependencies,
220 repository_homepage_url,
221 repository_download_url: None,
222 api_data_url: None,
223 datasource_id: Some(DatasourceId::HackageCabal),
224 purl,
225 }
226}
227
228fn parse_cabal_project(content: &str) -> PackageData {
229 let mut package_data = default_package_data(DatasourceId::HackageCabalProject);
230 let lines: Vec<&str> = content.lines().collect();
231 let mut dependencies = Vec::new();
232 let mut extra_data = HashMap::new();
233 let mut source_repo_entries: Vec<HashMap<String, JsonValue>> = Vec::new();
234 let mut current_source_repo: Option<HashMap<String, JsonValue>> = None;
235 let mut index = 0;
236 let mut iteration_count = 0usize;
237
238 while index < lines.len() {
239 iteration_count += 1;
240 if iteration_count > MAX_ITERATION_COUNT {
241 warn!(
242 "parse_cabal_project: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
243 MAX_ITERATION_COUNT, index
244 );
245 break;
246 }
247
248 let cleaned = strip_cabal_comment(lines[index]);
249 let trimmed = cleaned.trim();
250 let indent = indentation(cleaned);
251
252 if trimmed.is_empty() {
253 index += 1;
254 continue;
255 }
256
257 if indent == 0 && trimmed == "source-repository-package" {
258 if let Some(entry) = current_source_repo.take() {
259 source_repo_entries.push(entry);
260 }
261 current_source_repo = Some(HashMap::new());
262 index += 1;
263 continue;
264 }
265
266 let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
267 if indent == 0
268 && let Some(entry) = current_source_repo.take()
269 {
270 source_repo_entries.push(entry);
271 }
272 index += 1;
273 continue;
274 };
275
276 if current_source_repo.is_some() && indent > 0 {
277 if let Some(source_repo) = current_source_repo.as_mut() {
278 source_repo.insert(
279 project_extra_key(&key),
280 parse_multiline_scalar_or_list(&value),
281 );
282 }
283 index = next_index + 1;
284 continue;
285 }
286
287 if current_source_repo.is_some()
288 && indent == 0
289 && key != "source-repository-package"
290 && let Some(entry) = current_source_repo.take()
291 {
292 source_repo_entries.push(entry);
293 }
294
295 match key.as_str() {
296 "packages" => {
297 dependencies.extend(parse_path_like_entries(&value, "packages", false));
298 }
299 "optional-packages" => {
300 dependencies.extend(parse_path_like_entries(&value, "optional-packages", true));
301 }
302 "extra-packages" => {
303 dependencies.extend(parse_hackage_spec_entries(&value, "extra-packages", None));
304 }
305 "import" => {
306 dependencies.extend(parse_import_entries(&value));
307 }
308 _ => {
309 extra_data.insert(
310 project_extra_key(&key),
311 parse_multiline_scalar_or_list(&value),
312 );
313 }
314 }
315
316 index = next_index + 1;
317 }
318
319 if let Some(entry) = current_source_repo.take() {
320 source_repo_entries.push(entry);
321 }
322
323 for entry in source_repo_entries.into_iter().take(MAX_ITERATION_COUNT) {
324 dependencies.push(build_source_repository_dependency(entry));
325 }
326
327 package_data.dependencies = dependencies;
328 package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
329 package_data
330}
331
332fn parse_stack_yaml(yaml: &YamlValue) -> PackageData {
333 let mut package_data = default_package_data(DatasourceId::HackageStackYaml);
334 let Some(mapping) = yaml.as_mapping() else {
335 return package_data;
336 };
337
338 let mut dependencies = Vec::new();
339 let mut extra_data = HashMap::new();
340
341 if let Some(resolver) = mapping_get(mapping, "resolver")
342 && let Ok(value) = serde_json::to_value(resolver)
343 {
344 extra_data.insert("resolver".to_string(), value);
345 }
346
347 if let Some(snapshot) = mapping_get(mapping, "snapshot")
348 && let Ok(value) = serde_json::to_value(snapshot)
349 {
350 extra_data.insert("snapshot".to_string(), value);
351 }
352
353 if let Some(packages) = mapping_get(mapping, "packages") {
354 dependencies.extend(parse_stack_package_entries(packages));
355 }
356
357 if let Some(extra_deps) = mapping_get(mapping, "extra-deps") {
358 dependencies.extend(parse_stack_extra_dep_entries(extra_deps));
359 }
360
361 for (key, value) in mapping.iter().take(MAX_ITERATION_COUNT) {
362 let Some(key) = key.as_str() else {
363 continue;
364 };
365
366 if matches!(key, "resolver" | "snapshot" | "packages" | "extra-deps") {
367 continue;
368 }
369
370 if let Ok(json_value) = serde_json::to_value(value) {
371 extra_data.insert(key.to_string(), json_value);
372 }
373 }
374
375 package_data.dependencies = dependencies;
376 package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
377 package_data
378}
379
380fn parse_cabal_data(content: &str) -> CabalData {
381 let mut data = CabalData::default();
382 let lines: Vec<&str> = content.lines().collect();
383 let mut current_component: Option<ComponentContext> = None;
384 let mut in_source_repository = false;
385 let mut index = 0;
386 let mut iteration_count = 0usize;
387
388 while index < lines.len() {
389 iteration_count += 1;
390 if iteration_count > MAX_ITERATION_COUNT {
391 warn!(
392 "parse_cabal_data: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
393 MAX_ITERATION_COUNT, index
394 );
395 break;
396 }
397 let cleaned = strip_cabal_comment(lines[index]);
398 let trimmed = cleaned.trim();
399 let indent = indentation(cleaned);
400
401 if trimmed.is_empty() {
402 index += 1;
403 continue;
404 }
405
406 if indent == 0 && !trimmed.contains(':') {
407 current_component = parse_component_header(trimmed);
408 in_source_repository = trimmed.starts_with("source-repository");
409 index += 1;
410 continue;
411 }
412
413 let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
414 index += 1;
415 continue;
416 };
417
418 match key.as_str() {
419 "name" if indent == 0 => data.name = clean_single_line(&value).map(truncate_field),
420 "version" if indent == 0 => {
421 data.version = clean_single_line(&value).map(truncate_field)
422 }
423 "synopsis" if indent == 0 => {
424 data.synopsis = clean_single_line(&value).map(truncate_field)
425 }
426 "description" if indent == 0 => {
427 data.description = normalize_cabal_multiline(&value).map(truncate_field);
428 }
429 "license" if indent == 0 => {
430 data.license = clean_single_line(&value).map(truncate_field)
431 }
432 "homepage" if indent == 0 => {
433 data.homepage_url = clean_single_line(&value).map(truncate_field)
434 }
435 "bug-reports" if indent == 0 => {
436 data.bug_tracking_url = clean_single_line(&value).map(truncate_field)
437 }
438 "author" if indent == 0 => data.authors.extend(split_comma_separated(&value)),
439 "maintainer" if indent == 0 => {
440 data.maintainers.extend(split_comma_separated(&value));
441 }
442 "category" if indent == 0 => {
443 data.category_keywords.extend(split_keywords(&value));
444 }
445 "keywords" if indent == 0 => {
446 data.explicit_keywords.extend(split_keywords(&value));
447 }
448 "location" if in_source_repository && data.vcs_url.is_none() => {
449 data.vcs_url = clean_single_line(&value).map(truncate_field);
450 }
451 "build-depends" => {
452 data.dependencies
453 .extend(parse_build_depends(&value, current_component.as_ref()));
454 }
455 _ => {}
456 }
457
458 index = next_index + 1;
459 }
460
461 data
462}
463
464fn parse_build_depends(value: &str, component: Option<&ComponentContext>) -> Vec<Dependency> {
465 if component.is_some_and(|component| component.component_type == "common") {
466 return Vec::new();
467 }
468
469 split_dependency_entries(value)
470 .into_iter()
471 .filter_map(|entry| {
472 parse_hackage_spec_dependency(&entry, Some("build-depends"), component, None)
473 })
474 .collect()
475}
476
477fn parse_path_like_entries(value: &str, scope: &str, optional: bool) -> Vec<Dependency> {
478 split_multiline_entries(value)
479 .into_iter()
480 .filter(|entry| !entry.is_empty())
481 .map(|entry| {
482 let mut extra_data = HashMap::new();
483 extra_data.insert("path".to_string(), JsonValue::String(entry.clone()));
484
485 Dependency {
486 purl: None,
487 extracted_requirement: Some(truncate_field(entry)),
488 scope: Some(scope.to_string()),
489 is_runtime: None,
490 is_optional: Some(optional),
491 is_pinned: Some(false),
492 is_direct: Some(true),
493 resolved_package: None,
494 extra_data: Some(extra_data),
495 }
496 })
497 .collect()
498}
499
500fn parse_import_entries(value: &str) -> Vec<Dependency> {
501 split_multiline_entries(value)
502 .into_iter()
503 .filter(|entry| !entry.is_empty())
504 .map(|entry| Dependency {
505 purl: None,
506 extracted_requirement: Some(truncate_field(entry)),
507 scope: Some("import".to_string()),
508 is_runtime: None,
509 is_optional: Some(false),
510 is_pinned: Some(false),
511 is_direct: Some(true),
512 resolved_package: None,
513 extra_data: None,
514 })
515 .collect()
516}
517
518fn parse_hackage_spec_entries(
519 value: &str,
520 scope: &str,
521 is_runtime: Option<bool>,
522) -> Vec<Dependency> {
523 split_multiline_entries(value)
524 .into_iter()
525 .filter_map(|entry| parse_hackage_spec_dependency(&entry, Some(scope), None, is_runtime))
526 .collect()
527}
528
529fn parse_stack_package_entries(value: &YamlValue) -> Vec<Dependency> {
530 let Some(sequence) = value.as_sequence() else {
531 return Vec::new();
532 };
533
534 sequence
535 .iter()
536 .take(MAX_ITERATION_COUNT)
537 .filter_map(|entry| match entry {
538 YamlValue::String(path) => {
539 let mut extra_data = HashMap::new();
540 extra_data.insert("path".to_string(), JsonValue::String(path.clone()));
541
542 Some(Dependency {
543 purl: None,
544 extracted_requirement: Some(truncate_field(path.clone())),
545 scope: Some("packages".to_string()),
546 is_runtime: None,
547 is_optional: Some(false),
548 is_pinned: Some(false),
549 is_direct: Some(true),
550 resolved_package: None,
551 extra_data: Some(extra_data),
552 })
553 }
554 YamlValue::Mapping(map) => {
555 let extracted_requirement = mapping_string(map, "location")
556 .or_else(|| mapping_string(map, "git"))
557 .or_else(|| mapping_string(map, "url"))
558 .map(truncate_field);
559 let extra_data = serde_json::to_value(entry)
560 .ok()
561 .and_then(|value| value.as_object().cloned())
562 .map(|map| map.into_iter().collect::<HashMap<_, _>>());
563
564 Some(Dependency {
565 purl: None,
566 extracted_requirement,
567 scope: Some("packages".to_string()),
568 is_runtime: None,
569 is_optional: Some(false),
570 is_pinned: Some(mapping_string(map, "commit").is_some()),
571 is_direct: Some(true),
572 resolved_package: None,
573 extra_data,
574 })
575 }
576 _ => None,
577 })
578 .collect()
579}
580
581fn parse_stack_extra_dep_entries(value: &YamlValue) -> Vec<Dependency> {
582 let Some(sequence) = value.as_sequence() else {
583 return Vec::new();
584 };
585
586 sequence
587 .iter()
588 .take(MAX_ITERATION_COUNT)
589 .filter_map(|entry| match entry {
590 YamlValue::String(spec) => parse_stack_extra_dep_string(spec),
591 YamlValue::Mapping(map) => Some(parse_stack_extra_dep_mapping(map, entry)),
592 _ => None,
593 })
594 .collect()
595}
596
597fn parse_stack_extra_dep_string(spec: &str) -> Option<Dependency> {
598 let trimmed = spec.trim();
599 if trimmed.is_empty() {
600 return None;
601 }
602
603 let (package_spec, pantry_suffix) = trimmed
604 .split_once('@')
605 .map_or((trimmed, None), |(package_spec, suffix)| {
606 (package_spec, Some(suffix))
607 });
608
609 let mut dependency =
610 parse_hackage_spec_dependency(package_spec, Some("extra-deps"), None, None).unwrap_or(
611 Dependency {
612 purl: None,
613 extracted_requirement: Some(truncate_field(package_spec.to_string())),
614 scope: Some("extra-deps".to_string()),
615 is_runtime: None,
616 is_optional: Some(false),
617 is_pinned: Some(false),
618 is_direct: Some(true),
619 resolved_package: None,
620 extra_data: None,
621 },
622 );
623
624 if let Some(suffix) = pantry_suffix {
625 let mut extra_data = dependency.extra_data.take().unwrap_or_default();
626 extra_data.insert("pantry".to_string(), JsonValue::String(suffix.to_string()));
627 dependency.extra_data = Some(extra_data);
628 dependency.is_pinned = Some(true);
629 if dependency.extracted_requirement.is_none() {
630 dependency.extracted_requirement = Some(truncate_field(package_spec.to_string()));
631 }
632 }
633
634 dependency.scope = Some("extra-deps".to_string());
635 Some(dependency)
636}
637
638fn parse_stack_extra_dep_mapping(map: &Mapping, raw_value: &YamlValue) -> Dependency {
639 let name = mapping_string(map, "name");
640 let version = mapping_string(map, "version");
641 let purl = build_hackage_purl(name.as_deref(), version.as_deref()).map(truncate_field);
642 let extracted_requirement = version
643 .clone()
644 .or_else(|| mapping_string(map, "git"))
645 .or_else(|| mapping_string(map, "url"))
646 .map(truncate_field);
647 let extra_data = serde_json::to_value(raw_value)
648 .ok()
649 .and_then(|value| value.as_object().cloned())
650 .map(|map| map.into_iter().collect::<HashMap<_, _>>());
651
652 Dependency {
653 purl,
654 extracted_requirement,
655 scope: Some("extra-deps".to_string()),
656 is_runtime: None,
657 is_optional: Some(false),
658 is_pinned: Some(version.is_some() || mapping_string(map, "commit").is_some()),
659 is_direct: Some(true),
660 resolved_package: None,
661 extra_data,
662 }
663}
664
665fn build_source_repository_dependency(extra_data: HashMap<String, JsonValue>) -> Dependency {
666 let extracted_requirement = extra_data
667 .get("location")
668 .and_then(JsonValue::as_str)
669 .map(str::to_string)
670 .or_else(|| {
671 extra_data
672 .get("tag")
673 .and_then(JsonValue::as_str)
674 .map(str::to_string)
675 })
676 .map(truncate_field);
677
678 Dependency {
679 purl: None,
680 extracted_requirement,
681 scope: Some("source-repository-package".to_string()),
682 is_runtime: None,
683 is_optional: Some(false),
684 is_pinned: Some(
685 extra_data.contains_key("tag")
686 || extra_data.contains_key("commit")
687 || extra_data.contains_key("sha256"),
688 ),
689 is_direct: Some(true),
690 resolved_package: None,
691 extra_data: Some(extra_data),
692 }
693}
694
695fn parse_hackage_spec_dependency(
696 spec: &str,
697 scope: Option<&str>,
698 component: Option<&ComponentContext>,
699 is_runtime: Option<bool>,
700) -> Option<Dependency> {
701 let trimmed = spec.trim();
702 if trimmed.is_empty() {
703 return None;
704 }
705
706 let can_split_name_version = matches!(scope, Some("extra-packages" | "extra-deps"));
707
708 if can_split_name_version && let Some((name, version)) = split_hackage_name_version(trimmed) {
709 let mut extra_data = HashMap::new();
710 if let Some(component) = component {
711 extra_data.insert(
712 "component_type".to_string(),
713 JsonValue::String(component.component_type.clone()),
714 );
715 if let Some(component_name) = &component.component_name {
716 extra_data.insert(
717 "component_name".to_string(),
718 JsonValue::String(component_name.clone()),
719 );
720 }
721 }
722
723 return Some(Dependency {
724 purl: Some(truncate_field(format!("pkg:hackage/{}@{}", name, version))),
725 extracted_requirement: Some(truncate_field(version)),
726 scope: scope.map(str::to_string),
727 is_runtime: component.map(component_is_runtime).or(is_runtime),
728 is_optional: Some(false),
729 is_pinned: Some(true),
730 is_direct: Some(true),
731 resolved_package: None,
732 extra_data: (!extra_data.is_empty()).then_some(extra_data),
733 });
734 }
735
736 let name_re = Regex::new(r"^(?P<name>[A-Za-z0-9][A-Za-z0-9_\.-]*)").ok()?;
737 let captures = name_re.captures(trimmed)?;
738 let name = captures.name("name")?.as_str().to_string();
739 let requirement = trimmed[name.len()..].trim();
740 let implicit_name_version = if can_split_name_version && requirement.is_empty() {
741 split_hackage_name_version(trimmed)
742 } else {
743 None
744 };
745 let resolved_name = implicit_name_version
746 .as_ref()
747 .map(|(resolved_name, _)| resolved_name.as_str())
748 .unwrap_or(name.as_str());
749 let exact_version = exact_version_requirement(requirement).or_else(|| {
750 implicit_name_version
751 .as_ref()
752 .map(|(_, version)| version.clone())
753 });
754 let purl = if let Some(version) = exact_version.as_deref() {
755 Some(truncate_field(format!(
756 "pkg:hackage/{}@{}",
757 resolved_name, version
758 )))
759 } else {
760 Some(truncate_field(format!("pkg:hackage/{}", resolved_name)))
761 };
762
763 let mut extra_data = HashMap::new();
764 if let Some(component) = component {
765 extra_data.insert(
766 "component_type".to_string(),
767 JsonValue::String(component.component_type.clone()),
768 );
769 if let Some(component_name) = &component.component_name {
770 extra_data.insert(
771 "component_name".to_string(),
772 JsonValue::String(component_name.clone()),
773 );
774 }
775 }
776
777 let extracted_requirement = if let Some((_, version)) = implicit_name_version {
778 Some(truncate_field(version))
779 } else {
780 (!requirement.is_empty())
781 .then_some(requirement.to_string())
782 .map(truncate_field)
783 };
784
785 Some(Dependency {
786 purl,
787 extracted_requirement,
788 scope: scope.map(str::to_string),
789 is_runtime: component.map(component_is_runtime).or(is_runtime),
790 is_optional: Some(false),
791 is_pinned: Some(exact_version.is_some()),
792 is_direct: Some(true),
793 resolved_package: None,
794 extra_data: (!extra_data.is_empty()).then_some(extra_data),
795 })
796}
797
798fn component_is_runtime(component: &ComponentContext) -> bool {
799 !matches!(
800 component.component_type.as_str(),
801 "test-suite" | "benchmark"
802 )
803}
804
805fn parse_component_header(trimmed: &str) -> Option<ComponentContext> {
806 const COMPONENT_PREFIXES: &[&str] = &[
807 "library",
808 "foreign-library",
809 "executable",
810 "test-suite",
811 "benchmark",
812 "common",
813 ];
814
815 COMPONENT_PREFIXES.iter().find_map(|prefix| {
816 trimmed
817 .strip_prefix(prefix)
818 .map(|remainder| ComponentContext {
819 component_type: (*prefix).to_string(),
820 component_name: clean_single_line(remainder),
821 })
822 })
823}
824
825fn collect_indented_field(lines: &[&str], start_index: usize) -> Option<(String, String, usize)> {
826 let current = strip_cabal_comment(lines[start_index]);
827 let trimmed = current.trim();
828 let indent = indentation(current);
829 let colon_index = trimmed.find(':')?;
830 let key = trimmed[..colon_index].trim().to_ascii_lowercase();
831 let mut values = vec![trimmed[colon_index + 1..].trim().to_string()];
832 let mut last_index = start_index;
833
834 for (next_index, line) in lines.iter().enumerate().skip(start_index + 1) {
835 let next = strip_cabal_comment(line);
836 let next_trimmed = next.trim();
837 if next_trimmed.is_empty() {
838 break;
839 }
840
841 if indentation(next) <= indent {
842 break;
843 }
844
845 values.push(next_trimmed.to_string());
846 last_index = next_index;
847 }
848
849 Some((key, values.join("\n"), last_index))
850}
851
852fn split_dependency_entries(value: &str) -> Vec<String> {
853 let mut entries = Vec::new();
854 let mut current = String::new();
855 let mut paren_depth = 0usize;
856 let mut brace_depth = 0usize;
857 let mut bracket_depth = 0usize;
858
859 for character in value.chars().take(MAX_ITERATION_COUNT) {
860 match character {
861 '(' => paren_depth += 1,
862 ')' => paren_depth = paren_depth.saturating_sub(1),
863 '{' => brace_depth += 1,
864 '}' => brace_depth = brace_depth.saturating_sub(1),
865 '[' => bracket_depth += 1,
866 ']' => bracket_depth = bracket_depth.saturating_sub(1),
867 ',' if paren_depth == 0 && brace_depth == 0 && bracket_depth == 0 => {
868 let trimmed = current.trim();
869 if !trimmed.is_empty() {
870 entries.push(trimmed.to_string());
871 }
872 current.clear();
873 continue;
874 }
875 _ => {}
876 }
877
878 current.push(character);
879 }
880
881 let trimmed = current.trim();
882 if !trimmed.is_empty() {
883 entries.push(trimmed.to_string());
884 }
885
886 entries
887}
888
889fn split_multiline_entries(value: &str) -> Vec<String> {
890 value
891 .lines()
892 .take(MAX_ITERATION_COUNT)
893 .map(str::trim)
894 .filter(|line| !line.is_empty())
895 .map(|line| line.strip_prefix("-").unwrap_or(line).trim().to_string())
896 .collect()
897}
898
899fn parse_multiline_scalar_or_list(value: &str) -> JsonValue {
900 let entries = split_multiline_entries(value);
901 if entries.len() <= 1 {
902 clean_single_line(value)
903 .map(JsonValue::String)
904 .unwrap_or(JsonValue::Null)
905 } else {
906 JsonValue::Array(entries.into_iter().map(JsonValue::String).collect())
907 }
908}
909
910fn normalize_cabal_multiline(value: &str) -> Option<String> {
911 let lines: Vec<String> = value
912 .lines()
913 .map(str::trim)
914 .map(|line| {
915 if line == "." {
916 "".to_string()
917 } else {
918 line.to_string()
919 }
920 })
921 .collect();
922
923 let combined = lines.join("\n").trim().to_string();
924 (!combined.is_empty()).then_some(combined)
925}
926
927fn clean_single_line(value: &str) -> Option<String> {
928 let cleaned = value.trim();
929 (!cleaned.is_empty()).then_some(cleaned.to_string())
930}
931
932fn split_comma_separated(value: &str) -> Vec<String> {
933 value
934 .split(',')
935 .map(str::trim)
936 .filter(|part| !part.is_empty())
937 .map(str::to_string)
938 .collect()
939}
940
941fn split_keywords(value: &str) -> Vec<String> {
942 split_comma_separated(value)
943}
944
945fn merge_keywords(categories: &[String], keywords: &[String]) -> Vec<String> {
946 let mut seen = HashSet::new();
947 categories
948 .iter()
949 .chain(keywords.iter())
950 .filter_map(|keyword| {
951 let normalized = keyword.trim();
952 if normalized.is_empty() || !seen.insert(normalized.to_ascii_lowercase()) {
953 None
954 } else {
955 Some(normalized.to_string())
956 }
957 })
958 .collect()
959}
960
961fn combine_summary_and_description(
962 synopsis: &Option<String>,
963 description: &Option<String>,
964) -> Option<String> {
965 match (synopsis, description) {
966 (Some(synopsis), Some(description)) if synopsis == description => Some(synopsis.clone()),
967 (Some(synopsis), Some(description)) => Some(format!("{}\n\n{}", synopsis, description)),
968 (Some(synopsis), None) => Some(synopsis.clone()),
969 (None, Some(description)) => Some(description.clone()),
970 (None, None) => None,
971 }
972}
973
974fn build_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
975 let author_parties = authors
976 .iter()
977 .filter_map(|author| build_party(author, "author"));
978 let maintainer_parties = maintainers
979 .iter()
980 .filter_map(|maintainer| build_party(maintainer, "maintainer"));
981
982 author_parties.chain(maintainer_parties).collect()
983}
984
985fn build_party(value: &str, role: &str) -> Option<Party> {
986 let (name, email) = split_name_email(value.trim());
987 if name.is_none() && email.is_none() {
988 return None;
989 }
990
991 Some(Party {
992 r#type: Some("person".to_string()),
993 role: Some(role.to_string()),
994 name,
995 email,
996 url: None,
997 organization: None,
998 organization_url: None,
999 timezone: None,
1000 })
1001}
1002
1003fn build_hackage_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1004 match (name, version) {
1005 (Some(name), Some(version)) => Some(format!("pkg:hackage/{}@{}", name, version)),
1006 (Some(name), None) => Some(format!("pkg:hackage/{}", name)),
1007 _ => None,
1008 }
1009}
1010
1011fn split_hackage_name_version(spec: &str) -> Option<(String, String)> {
1012 if spec.chars().any(|character| {
1013 character.is_whitespace() || matches!(character, '<' | '>' | '=' | '&' | '|' | '(' | ')')
1014 }) {
1015 return None;
1016 }
1017
1018 for (index, character) in spec.char_indices().rev() {
1019 if character != '-' {
1020 continue;
1021 }
1022
1023 let name = &spec[..index];
1024 let version = &spec[index + 1..];
1025
1026 if name.is_empty()
1027 || version.is_empty()
1028 || !version
1029 .chars()
1030 .next()
1031 .is_some_and(|character| character.is_ascii_digit())
1032 {
1033 continue;
1034 }
1035
1036 return Some((name.to_string(), version.to_string()));
1037 }
1038
1039 None
1040}
1041
1042fn exact_version_requirement(requirement: &str) -> Option<String> {
1043 let trimmed = requirement.trim();
1044 if trimmed.is_empty() {
1045 return None;
1046 }
1047
1048 let exact_re = Regex::new(r"^==\s*([A-Za-z0-9][A-Za-z0-9\.\-_+]*)$").ok()?;
1049 exact_re.captures(trimmed).and_then(|captures| {
1050 let version = captures.get(1)?.as_str();
1051 (!version.contains('*')).then_some(version.to_string())
1052 })
1053}
1054
1055fn project_extra_key(key: &str) -> String {
1056 key.replace('-', "_")
1057}
1058
1059fn strip_cabal_comment(line: &str) -> &str {
1060 let trimmed = line.trim_start();
1061 if trimmed.starts_with("--") {
1062 return "";
1063 }
1064
1065 let bytes = line.as_bytes();
1066 for index in 0..bytes.len().saturating_sub(1) {
1067 if bytes[index] == b'-'
1068 && bytes[index + 1] == b'-'
1069 && (index == 0 || bytes[index - 1].is_ascii_whitespace())
1070 {
1071 return line[..index].trim_end();
1072 }
1073 }
1074
1075 line
1076}
1077
1078fn indentation(line: &str) -> usize {
1079 line.chars()
1080 .take_while(|character| character.is_whitespace())
1081 .count()
1082}
1083
1084fn mapping_get<'a>(mapping: &'a Mapping, key: &str) -> Option<&'a YamlValue> {
1085 mapping.get(YamlValue::String(key.to_string()))
1086}
1087
1088fn mapping_string(mapping: &Mapping, key: &str) -> Option<String> {
1089 mapping_get(mapping, key)
1090 .and_then(YamlValue::as_str)
1091 .map(str::to_string)
1092}