Skip to main content

provenant/parsers/
hackage.rs

1use std::collections::{HashMap, HashSet};
2use std::fs;
3use std::path::Path;
4
5use log::warn;
6use regex::Regex;
7use serde_json::Value as JsonValue;
8use serde_yaml::{Mapping, Value as YamlValue};
9
10use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
11use crate::parsers::utils::split_name_email;
12
13use super::PackageParser;
14
15const PACKAGE_TYPE: PackageType = PackageType::Hackage;
16const PRIMARY_LANGUAGE: &str = "Haskell";
17
18pub struct HackageCabalParser;
19
20pub struct HackageCabalProjectParser;
21
22pub struct HackageStackYamlParser;
23
24impl PackageParser for HackageCabalParser {
25    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
26
27    fn is_match(path: &Path) -> bool {
28        path.extension().is_some_and(|ext| ext == "cabal")
29    }
30
31    fn extract_packages(path: &Path) -> Vec<PackageData> {
32        let content = match fs::read_to_string(path) {
33            Ok(content) => content,
34            Err(error) => {
35                warn!("Failed to read cabal file {:?}: {}", path, error);
36                return vec![default_package_data(DatasourceId::HackageCabal)];
37            }
38        };
39
40        vec![parse_cabal_manifest(&content)]
41    }
42}
43
44impl PackageParser for HackageCabalProjectParser {
45    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
46
47    fn is_match(path: &Path) -> bool {
48        path.file_name().is_some_and(|name| name == "cabal.project")
49    }
50
51    fn extract_packages(path: &Path) -> Vec<PackageData> {
52        let content = match fs::read_to_string(path) {
53            Ok(content) => content,
54            Err(error) => {
55                warn!("Failed to read cabal.project {:?}: {}", path, error);
56                return vec![default_package_data(DatasourceId::HackageCabalProject)];
57            }
58        };
59
60        vec![parse_cabal_project(&content)]
61    }
62}
63
64impl PackageParser for HackageStackYamlParser {
65    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
66
67    fn is_match(path: &Path) -> bool {
68        path.file_name().is_some_and(|name| name == "stack.yaml")
69    }
70
71    fn extract_packages(path: &Path) -> Vec<PackageData> {
72        let content = match fs::read_to_string(path) {
73            Ok(content) => content,
74            Err(error) => {
75                warn!("Failed to read stack.yaml {:?}: {}", path, error);
76                return vec![default_package_data(DatasourceId::HackageStackYaml)];
77            }
78        };
79
80        let yaml: YamlValue = match serde_yaml::from_str(&content) {
81            Ok(yaml) => yaml,
82            Err(error) => {
83                warn!("Failed to parse stack.yaml {:?}: {}", path, error);
84                return vec![default_package_data(DatasourceId::HackageStackYaml)];
85            }
86        };
87
88        vec![parse_stack_yaml(&yaml)]
89    }
90}
91
92#[derive(Clone, Debug, Default)]
93struct ComponentContext {
94    component_type: String,
95    component_name: Option<String>,
96}
97
98#[derive(Debug, Default)]
99struct CabalData {
100    name: Option<String>,
101    version: Option<String>,
102    synopsis: Option<String>,
103    description: Option<String>,
104    license: Option<String>,
105    homepage_url: Option<String>,
106    bug_tracking_url: Option<String>,
107    vcs_url: Option<String>,
108    authors: Vec<String>,
109    maintainers: Vec<String>,
110    category_keywords: Vec<String>,
111    explicit_keywords: Vec<String>,
112    dependencies: Vec<Dependency>,
113}
114
115fn default_package_data(datasource_id: DatasourceId) -> PackageData {
116    PackageData {
117        package_type: Some(PACKAGE_TYPE),
118        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
119        datasource_id: Some(datasource_id),
120        ..Default::default()
121    }
122}
123
124fn parse_cabal_manifest(content: &str) -> PackageData {
125    let parsed = parse_cabal_data(content);
126    let keywords = merge_keywords(&parsed.category_keywords, &parsed.explicit_keywords);
127    let description = combine_summary_and_description(&parsed.synopsis, &parsed.description);
128    let parties = build_parties(&parsed.authors, &parsed.maintainers);
129    let purl = build_hackage_purl(parsed.name.as_deref(), parsed.version.as_deref());
130    let repository_homepage_url = parsed
131        .name
132        .as_ref()
133        .map(|name| match parsed.version.as_ref() {
134            Some(version) => format!("https://hackage.haskell.org/package/{}-{}", name, version),
135            None => format!("https://hackage.haskell.org/package/{}", name),
136        });
137
138    PackageData {
139        package_type: Some(PACKAGE_TYPE),
140        namespace: None,
141        name: parsed.name,
142        version: parsed.version,
143        qualifiers: None,
144        subpath: None,
145        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
146        description,
147        release_date: None,
148        parties,
149        keywords,
150        homepage_url: parsed.homepage_url,
151        download_url: None,
152        size: None,
153        sha1: None,
154        md5: None,
155        sha256: None,
156        sha512: None,
157        bug_tracking_url: parsed.bug_tracking_url,
158        code_view_url: None,
159        vcs_url: parsed.vcs_url,
160        copyright: None,
161        holder: None,
162        declared_license_expression: None,
163        declared_license_expression_spdx: None,
164        license_detections: Vec::new(),
165        other_license_expression: None,
166        other_license_expression_spdx: None,
167        other_license_detections: Vec::new(),
168        extracted_license_statement: parsed.license,
169        notice_text: None,
170        source_packages: Vec::new(),
171        file_references: Vec::new(),
172        is_private: false,
173        is_virtual: false,
174        extra_data: None,
175        dependencies: parsed.dependencies,
176        repository_homepage_url,
177        repository_download_url: None,
178        api_data_url: None,
179        datasource_id: Some(DatasourceId::HackageCabal),
180        purl,
181    }
182}
183
184fn parse_cabal_project(content: &str) -> PackageData {
185    let mut package_data = default_package_data(DatasourceId::HackageCabalProject);
186    let lines: Vec<&str> = content.lines().collect();
187    let mut dependencies = Vec::new();
188    let mut extra_data = HashMap::new();
189    let mut source_repo_entries: Vec<HashMap<String, JsonValue>> = Vec::new();
190    let mut current_source_repo: Option<HashMap<String, JsonValue>> = None;
191    let mut index = 0;
192
193    while index < lines.len() {
194        let cleaned = strip_cabal_comment(lines[index]);
195        let trimmed = cleaned.trim();
196        let indent = indentation(cleaned);
197
198        if trimmed.is_empty() {
199            index += 1;
200            continue;
201        }
202
203        if indent == 0 && trimmed == "source-repository-package" {
204            if let Some(entry) = current_source_repo.take() {
205                source_repo_entries.push(entry);
206            }
207            current_source_repo = Some(HashMap::new());
208            index += 1;
209            continue;
210        }
211
212        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
213            if indent == 0
214                && let Some(entry) = current_source_repo.take()
215            {
216                source_repo_entries.push(entry);
217            }
218            index += 1;
219            continue;
220        };
221
222        if current_source_repo.is_some() && indent > 0 {
223            if let Some(source_repo) = current_source_repo.as_mut() {
224                source_repo.insert(
225                    project_extra_key(&key),
226                    parse_multiline_scalar_or_list(&value),
227                );
228            }
229            index = next_index + 1;
230            continue;
231        }
232
233        if current_source_repo.is_some()
234            && indent == 0
235            && key != "source-repository-package"
236            && let Some(entry) = current_source_repo.take()
237        {
238            source_repo_entries.push(entry);
239        }
240
241        match key.as_str() {
242            "packages" => {
243                dependencies.extend(parse_path_like_entries(&value, "packages", false));
244            }
245            "optional-packages" => {
246                dependencies.extend(parse_path_like_entries(&value, "optional-packages", true));
247            }
248            "extra-packages" => {
249                dependencies.extend(parse_hackage_spec_entries(&value, "extra-packages", None));
250            }
251            "import" => {
252                dependencies.extend(parse_import_entries(&value));
253            }
254            _ => {
255                extra_data.insert(
256                    project_extra_key(&key),
257                    parse_multiline_scalar_or_list(&value),
258                );
259            }
260        }
261
262        index = next_index + 1;
263    }
264
265    if let Some(entry) = current_source_repo.take() {
266        source_repo_entries.push(entry);
267    }
268
269    for entry in source_repo_entries {
270        dependencies.push(build_source_repository_dependency(entry));
271    }
272
273    package_data.dependencies = dependencies;
274    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
275    package_data
276}
277
278fn parse_stack_yaml(yaml: &YamlValue) -> PackageData {
279    let mut package_data = default_package_data(DatasourceId::HackageStackYaml);
280    let Some(mapping) = yaml.as_mapping() else {
281        return package_data;
282    };
283
284    let mut dependencies = Vec::new();
285    let mut extra_data = HashMap::new();
286
287    if let Some(resolver) = mapping_get(mapping, "resolver")
288        && let Ok(value) = serde_json::to_value(resolver)
289    {
290        extra_data.insert("resolver".to_string(), value);
291    }
292
293    if let Some(snapshot) = mapping_get(mapping, "snapshot")
294        && let Ok(value) = serde_json::to_value(snapshot)
295    {
296        extra_data.insert("snapshot".to_string(), value);
297    }
298
299    if let Some(packages) = mapping_get(mapping, "packages") {
300        dependencies.extend(parse_stack_package_entries(packages));
301    }
302
303    if let Some(extra_deps) = mapping_get(mapping, "extra-deps") {
304        dependencies.extend(parse_stack_extra_dep_entries(extra_deps));
305    }
306
307    for (key, value) in mapping {
308        let Some(key) = key.as_str() else {
309            continue;
310        };
311
312        if matches!(key, "resolver" | "snapshot" | "packages" | "extra-deps") {
313            continue;
314        }
315
316        if let Ok(json_value) = serde_json::to_value(value) {
317            extra_data.insert(key.to_string(), json_value);
318        }
319    }
320
321    package_data.dependencies = dependencies;
322    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
323    package_data
324}
325
326fn parse_cabal_data(content: &str) -> CabalData {
327    let mut data = CabalData::default();
328    let lines: Vec<&str> = content.lines().collect();
329    let mut current_component: Option<ComponentContext> = None;
330    let mut in_source_repository = false;
331    let mut index = 0;
332
333    while index < lines.len() {
334        let cleaned = strip_cabal_comment(lines[index]);
335        let trimmed = cleaned.trim();
336        let indent = indentation(cleaned);
337
338        if trimmed.is_empty() {
339            index += 1;
340            continue;
341        }
342
343        if indent == 0 && !trimmed.contains(':') {
344            current_component = parse_component_header(trimmed);
345            in_source_repository = trimmed.starts_with("source-repository");
346            index += 1;
347            continue;
348        }
349
350        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
351            index += 1;
352            continue;
353        };
354
355        match key.as_str() {
356            "name" if indent == 0 => data.name = clean_single_line(&value),
357            "version" if indent == 0 => data.version = clean_single_line(&value),
358            "synopsis" if indent == 0 => data.synopsis = clean_single_line(&value),
359            "description" if indent == 0 => {
360                data.description = normalize_cabal_multiline(&value);
361            }
362            "license" if indent == 0 => data.license = clean_single_line(&value),
363            "homepage" if indent == 0 => data.homepage_url = clean_single_line(&value),
364            "bug-reports" if indent == 0 => data.bug_tracking_url = clean_single_line(&value),
365            "author" if indent == 0 => data.authors.extend(split_comma_separated(&value)),
366            "maintainer" if indent == 0 => {
367                data.maintainers.extend(split_comma_separated(&value));
368            }
369            "category" if indent == 0 => {
370                data.category_keywords.extend(split_keywords(&value));
371            }
372            "keywords" if indent == 0 => {
373                data.explicit_keywords.extend(split_keywords(&value));
374            }
375            "location" if in_source_repository && data.vcs_url.is_none() => {
376                data.vcs_url = clean_single_line(&value);
377            }
378            "build-depends" => {
379                data.dependencies
380                    .extend(parse_build_depends(&value, current_component.as_ref()));
381            }
382            _ => {}
383        }
384
385        index = next_index + 1;
386    }
387
388    data
389}
390
391fn parse_build_depends(value: &str, component: Option<&ComponentContext>) -> Vec<Dependency> {
392    if component.is_some_and(|component| component.component_type == "common") {
393        return Vec::new();
394    }
395
396    split_dependency_entries(value)
397        .into_iter()
398        .filter_map(|entry| {
399            parse_hackage_spec_dependency(&entry, Some("build-depends"), component, None)
400        })
401        .collect()
402}
403
404fn parse_path_like_entries(value: &str, scope: &str, optional: bool) -> Vec<Dependency> {
405    split_multiline_entries(value)
406        .into_iter()
407        .filter(|entry| !entry.is_empty())
408        .map(|entry| {
409            let mut extra_data = HashMap::new();
410            extra_data.insert("path".to_string(), JsonValue::String(entry.clone()));
411
412            Dependency {
413                purl: None,
414                extracted_requirement: Some(entry),
415                scope: Some(scope.to_string()),
416                is_runtime: None,
417                is_optional: Some(optional),
418                is_pinned: Some(false),
419                is_direct: Some(true),
420                resolved_package: None,
421                extra_data: Some(extra_data),
422            }
423        })
424        .collect()
425}
426
427fn parse_import_entries(value: &str) -> Vec<Dependency> {
428    split_multiline_entries(value)
429        .into_iter()
430        .filter(|entry| !entry.is_empty())
431        .map(|entry| Dependency {
432            purl: None,
433            extracted_requirement: Some(entry),
434            scope: Some("import".to_string()),
435            is_runtime: None,
436            is_optional: Some(false),
437            is_pinned: Some(false),
438            is_direct: Some(true),
439            resolved_package: None,
440            extra_data: None,
441        })
442        .collect()
443}
444
445fn parse_hackage_spec_entries(
446    value: &str,
447    scope: &str,
448    is_runtime: Option<bool>,
449) -> Vec<Dependency> {
450    split_multiline_entries(value)
451        .into_iter()
452        .filter_map(|entry| parse_hackage_spec_dependency(&entry, Some(scope), None, is_runtime))
453        .collect()
454}
455
456fn parse_stack_package_entries(value: &YamlValue) -> Vec<Dependency> {
457    let Some(sequence) = value.as_sequence() else {
458        return Vec::new();
459    };
460
461    sequence
462        .iter()
463        .filter_map(|entry| match entry {
464            YamlValue::String(path) => {
465                let mut extra_data = HashMap::new();
466                extra_data.insert("path".to_string(), JsonValue::String(path.clone()));
467
468                Some(Dependency {
469                    purl: None,
470                    extracted_requirement: Some(path.clone()),
471                    scope: Some("packages".to_string()),
472                    is_runtime: None,
473                    is_optional: Some(false),
474                    is_pinned: Some(false),
475                    is_direct: Some(true),
476                    resolved_package: None,
477                    extra_data: Some(extra_data),
478                })
479            }
480            YamlValue::Mapping(map) => {
481                let extracted_requirement = mapping_string(map, "location")
482                    .or_else(|| mapping_string(map, "git"))
483                    .or_else(|| mapping_string(map, "url"));
484                let extra_data = serde_json::to_value(entry)
485                    .ok()
486                    .and_then(|value| value.as_object().cloned())
487                    .map(|map| map.into_iter().collect::<HashMap<_, _>>());
488
489                Some(Dependency {
490                    purl: None,
491                    extracted_requirement,
492                    scope: Some("packages".to_string()),
493                    is_runtime: None,
494                    is_optional: Some(false),
495                    is_pinned: Some(mapping_string(map, "commit").is_some()),
496                    is_direct: Some(true),
497                    resolved_package: None,
498                    extra_data,
499                })
500            }
501            _ => None,
502        })
503        .collect()
504}
505
506fn parse_stack_extra_dep_entries(value: &YamlValue) -> Vec<Dependency> {
507    let Some(sequence) = value.as_sequence() else {
508        return Vec::new();
509    };
510
511    sequence
512        .iter()
513        .filter_map(|entry| match entry {
514            YamlValue::String(spec) => parse_stack_extra_dep_string(spec),
515            YamlValue::Mapping(map) => Some(parse_stack_extra_dep_mapping(map, entry)),
516            _ => None,
517        })
518        .collect()
519}
520
521fn parse_stack_extra_dep_string(spec: &str) -> Option<Dependency> {
522    let trimmed = spec.trim();
523    if trimmed.is_empty() {
524        return None;
525    }
526
527    let (package_spec, pantry_suffix) = trimmed
528        .split_once('@')
529        .map_or((trimmed, None), |(package_spec, suffix)| {
530            (package_spec, Some(suffix))
531        });
532
533    let mut dependency =
534        parse_hackage_spec_dependency(package_spec, Some("extra-deps"), None, None).unwrap_or(
535            Dependency {
536                purl: None,
537                extracted_requirement: Some(package_spec.to_string()),
538                scope: Some("extra-deps".to_string()),
539                is_runtime: None,
540                is_optional: Some(false),
541                is_pinned: Some(false),
542                is_direct: Some(true),
543                resolved_package: None,
544                extra_data: None,
545            },
546        );
547
548    if let Some(suffix) = pantry_suffix {
549        let mut extra_data = dependency.extra_data.take().unwrap_or_default();
550        extra_data.insert("pantry".to_string(), JsonValue::String(suffix.to_string()));
551        dependency.extra_data = Some(extra_data);
552        dependency.is_pinned = Some(true);
553        if dependency.extracted_requirement.is_none() {
554            dependency.extracted_requirement = Some(package_spec.to_string());
555        }
556    }
557
558    dependency.scope = Some("extra-deps".to_string());
559    Some(dependency)
560}
561
562fn parse_stack_extra_dep_mapping(map: &Mapping, raw_value: &YamlValue) -> Dependency {
563    let name = mapping_string(map, "name");
564    let version = mapping_string(map, "version");
565    let purl = build_hackage_purl(name.as_deref(), version.as_deref());
566    let extracted_requirement = version
567        .clone()
568        .or_else(|| mapping_string(map, "git"))
569        .or_else(|| mapping_string(map, "url"));
570    let extra_data = serde_json::to_value(raw_value)
571        .ok()
572        .and_then(|value| value.as_object().cloned())
573        .map(|map| map.into_iter().collect::<HashMap<_, _>>());
574
575    Dependency {
576        purl,
577        extracted_requirement,
578        scope: Some("extra-deps".to_string()),
579        is_runtime: None,
580        is_optional: Some(false),
581        is_pinned: Some(version.is_some() || mapping_string(map, "commit").is_some()),
582        is_direct: Some(true),
583        resolved_package: None,
584        extra_data,
585    }
586}
587
588fn build_source_repository_dependency(extra_data: HashMap<String, JsonValue>) -> Dependency {
589    let extracted_requirement = extra_data
590        .get("location")
591        .and_then(JsonValue::as_str)
592        .map(str::to_string)
593        .or_else(|| {
594            extra_data
595                .get("tag")
596                .and_then(JsonValue::as_str)
597                .map(str::to_string)
598        });
599
600    Dependency {
601        purl: None,
602        extracted_requirement,
603        scope: Some("source-repository-package".to_string()),
604        is_runtime: None,
605        is_optional: Some(false),
606        is_pinned: Some(
607            extra_data.contains_key("tag")
608                || extra_data.contains_key("commit")
609                || extra_data.contains_key("sha256"),
610        ),
611        is_direct: Some(true),
612        resolved_package: None,
613        extra_data: Some(extra_data),
614    }
615}
616
617fn parse_hackage_spec_dependency(
618    spec: &str,
619    scope: Option<&str>,
620    component: Option<&ComponentContext>,
621    is_runtime: Option<bool>,
622) -> Option<Dependency> {
623    let trimmed = spec.trim();
624    if trimmed.is_empty() {
625        return None;
626    }
627
628    let can_split_name_version = matches!(scope, Some("extra-packages" | "extra-deps"));
629
630    if can_split_name_version && let Some((name, version)) = split_hackage_name_version(trimmed) {
631        let mut extra_data = HashMap::new();
632        if let Some(component) = component {
633            extra_data.insert(
634                "component_type".to_string(),
635                JsonValue::String(component.component_type.clone()),
636            );
637            if let Some(component_name) = &component.component_name {
638                extra_data.insert(
639                    "component_name".to_string(),
640                    JsonValue::String(component_name.clone()),
641                );
642            }
643        }
644
645        return Some(Dependency {
646            purl: Some(format!("pkg:hackage/{}@{}", name, version)),
647            extracted_requirement: Some(version),
648            scope: scope.map(str::to_string),
649            is_runtime: component.map(component_is_runtime).or(is_runtime),
650            is_optional: Some(false),
651            is_pinned: Some(true),
652            is_direct: Some(true),
653            resolved_package: None,
654            extra_data: (!extra_data.is_empty()).then_some(extra_data),
655        });
656    }
657
658    let name_re = Regex::new(r"^(?P<name>[A-Za-z0-9][A-Za-z0-9_\.-]*)").ok()?;
659    let captures = name_re.captures(trimmed)?;
660    let name = captures.name("name")?.as_str().to_string();
661    let requirement = trimmed[name.len()..].trim();
662    let implicit_name_version = if can_split_name_version && requirement.is_empty() {
663        split_hackage_name_version(trimmed)
664    } else {
665        None
666    };
667    let resolved_name = implicit_name_version
668        .as_ref()
669        .map(|(resolved_name, _)| resolved_name.as_str())
670        .unwrap_or(name.as_str());
671    let exact_version = exact_version_requirement(requirement).or_else(|| {
672        implicit_name_version
673            .as_ref()
674            .map(|(_, version)| version.clone())
675    });
676    let purl = if let Some(version) = exact_version.as_deref() {
677        Some(format!("pkg:hackage/{}@{}", resolved_name, version))
678    } else {
679        Some(format!("pkg:hackage/{}", resolved_name))
680    };
681
682    let mut extra_data = HashMap::new();
683    if let Some(component) = component {
684        extra_data.insert(
685            "component_type".to_string(),
686            JsonValue::String(component.component_type.clone()),
687        );
688        if let Some(component_name) = &component.component_name {
689            extra_data.insert(
690                "component_name".to_string(),
691                JsonValue::String(component_name.clone()),
692            );
693        }
694    }
695
696    let extracted_requirement = if let Some((_, version)) = implicit_name_version {
697        Some(version)
698    } else {
699        (!requirement.is_empty()).then_some(requirement.to_string())
700    };
701
702    Some(Dependency {
703        purl,
704        extracted_requirement,
705        scope: scope.map(str::to_string),
706        is_runtime: component.map(component_is_runtime).or(is_runtime),
707        is_optional: Some(false),
708        is_pinned: Some(exact_version.is_some()),
709        is_direct: Some(true),
710        resolved_package: None,
711        extra_data: (!extra_data.is_empty()).then_some(extra_data),
712    })
713}
714
715fn component_is_runtime(component: &ComponentContext) -> bool {
716    !matches!(
717        component.component_type.as_str(),
718        "test-suite" | "benchmark"
719    )
720}
721
722fn parse_component_header(trimmed: &str) -> Option<ComponentContext> {
723    const COMPONENT_PREFIXES: &[&str] = &[
724        "library",
725        "foreign-library",
726        "executable",
727        "test-suite",
728        "benchmark",
729        "common",
730    ];
731
732    COMPONENT_PREFIXES.iter().find_map(|prefix| {
733        trimmed
734            .strip_prefix(prefix)
735            .map(|remainder| ComponentContext {
736                component_type: (*prefix).to_string(),
737                component_name: clean_single_line(remainder),
738            })
739    })
740}
741
742fn collect_indented_field(lines: &[&str], start_index: usize) -> Option<(String, String, usize)> {
743    let current = strip_cabal_comment(lines[start_index]);
744    let trimmed = current.trim();
745    let indent = indentation(current);
746    let colon_index = trimmed.find(':')?;
747    let key = trimmed[..colon_index].trim().to_ascii_lowercase();
748    let mut values = vec![trimmed[colon_index + 1..].trim().to_string()];
749    let mut last_index = start_index;
750
751    for (next_index, line) in lines.iter().enumerate().skip(start_index + 1) {
752        let next = strip_cabal_comment(line);
753        let next_trimmed = next.trim();
754        if next_trimmed.is_empty() {
755            break;
756        }
757
758        if indentation(next) <= indent {
759            break;
760        }
761
762        values.push(next_trimmed.to_string());
763        last_index = next_index;
764    }
765
766    Some((key, values.join("\n"), last_index))
767}
768
769fn split_dependency_entries(value: &str) -> Vec<String> {
770    let mut entries = Vec::new();
771    let mut current = String::new();
772    let mut paren_depth = 0usize;
773    let mut brace_depth = 0usize;
774    let mut bracket_depth = 0usize;
775
776    for character in value.chars() {
777        match character {
778            '(' => paren_depth += 1,
779            ')' => paren_depth = paren_depth.saturating_sub(1),
780            '{' => brace_depth += 1,
781            '}' => brace_depth = brace_depth.saturating_sub(1),
782            '[' => bracket_depth += 1,
783            ']' => bracket_depth = bracket_depth.saturating_sub(1),
784            ',' if paren_depth == 0 && brace_depth == 0 && bracket_depth == 0 => {
785                let trimmed = current.trim();
786                if !trimmed.is_empty() {
787                    entries.push(trimmed.to_string());
788                }
789                current.clear();
790                continue;
791            }
792            _ => {}
793        }
794
795        current.push(character);
796    }
797
798    let trimmed = current.trim();
799    if !trimmed.is_empty() {
800        entries.push(trimmed.to_string());
801    }
802
803    entries
804}
805
806fn split_multiline_entries(value: &str) -> Vec<String> {
807    value
808        .lines()
809        .map(str::trim)
810        .filter(|line| !line.is_empty())
811        .map(|line| line.strip_prefix("-").unwrap_or(line).trim().to_string())
812        .collect()
813}
814
815fn parse_multiline_scalar_or_list(value: &str) -> JsonValue {
816    let entries = split_multiline_entries(value);
817    if entries.len() <= 1 {
818        clean_single_line(value)
819            .map(JsonValue::String)
820            .unwrap_or(JsonValue::Null)
821    } else {
822        JsonValue::Array(entries.into_iter().map(JsonValue::String).collect())
823    }
824}
825
826fn normalize_cabal_multiline(value: &str) -> Option<String> {
827    let lines: Vec<String> = value
828        .lines()
829        .map(str::trim)
830        .map(|line| {
831            if line == "." {
832                "".to_string()
833            } else {
834                line.to_string()
835            }
836        })
837        .collect();
838
839    let combined = lines.join("\n").trim().to_string();
840    (!combined.is_empty()).then_some(combined)
841}
842
843fn clean_single_line(value: &str) -> Option<String> {
844    let cleaned = value.trim();
845    (!cleaned.is_empty()).then_some(cleaned.to_string())
846}
847
848fn split_comma_separated(value: &str) -> Vec<String> {
849    value
850        .split(',')
851        .map(str::trim)
852        .filter(|part| !part.is_empty())
853        .map(str::to_string)
854        .collect()
855}
856
857fn split_keywords(value: &str) -> Vec<String> {
858    split_comma_separated(value)
859}
860
861fn merge_keywords(categories: &[String], keywords: &[String]) -> Vec<String> {
862    let mut seen = HashSet::new();
863    categories
864        .iter()
865        .chain(keywords.iter())
866        .filter_map(|keyword| {
867            let normalized = keyword.trim();
868            if normalized.is_empty() || !seen.insert(normalized.to_ascii_lowercase()) {
869                None
870            } else {
871                Some(normalized.to_string())
872            }
873        })
874        .collect()
875}
876
877fn combine_summary_and_description(
878    synopsis: &Option<String>,
879    description: &Option<String>,
880) -> Option<String> {
881    match (synopsis, description) {
882        (Some(synopsis), Some(description)) if synopsis == description => Some(synopsis.clone()),
883        (Some(synopsis), Some(description)) => Some(format!("{}\n\n{}", synopsis, description)),
884        (Some(synopsis), None) => Some(synopsis.clone()),
885        (None, Some(description)) => Some(description.clone()),
886        (None, None) => None,
887    }
888}
889
890fn build_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
891    let author_parties = authors
892        .iter()
893        .filter_map(|author| build_party(author, "author"));
894    let maintainer_parties = maintainers
895        .iter()
896        .filter_map(|maintainer| build_party(maintainer, "maintainer"));
897
898    author_parties.chain(maintainer_parties).collect()
899}
900
901fn build_party(value: &str, role: &str) -> Option<Party> {
902    let (name, email) = split_name_email(value.trim());
903    if name.is_none() && email.is_none() {
904        return None;
905    }
906
907    Some(Party {
908        r#type: Some("person".to_string()),
909        role: Some(role.to_string()),
910        name,
911        email,
912        url: None,
913        organization: None,
914        organization_url: None,
915        timezone: None,
916    })
917}
918
919fn build_hackage_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
920    match (name, version) {
921        (Some(name), Some(version)) => Some(format!("pkg:hackage/{}@{}", name, version)),
922        (Some(name), None) => Some(format!("pkg:hackage/{}", name)),
923        _ => None,
924    }
925}
926
927fn split_hackage_name_version(spec: &str) -> Option<(String, String)> {
928    if spec.chars().any(|character| {
929        character.is_whitespace() || matches!(character, '<' | '>' | '=' | '&' | '|' | '(' | ')')
930    }) {
931        return None;
932    }
933
934    for (index, character) in spec.char_indices().rev() {
935        if character != '-' {
936            continue;
937        }
938
939        let name = &spec[..index];
940        let version = &spec[index + 1..];
941
942        if name.is_empty()
943            || version.is_empty()
944            || !version
945                .chars()
946                .next()
947                .is_some_and(|character| character.is_ascii_digit())
948        {
949            continue;
950        }
951
952        return Some((name.to_string(), version.to_string()));
953    }
954
955    None
956}
957
958fn exact_version_requirement(requirement: &str) -> Option<String> {
959    let trimmed = requirement.trim();
960    if trimmed.is_empty() {
961        return None;
962    }
963
964    let exact_re = Regex::new(r"^==\s*([A-Za-z0-9][A-Za-z0-9\.\-_+]*)$").ok()?;
965    exact_re.captures(trimmed).and_then(|captures| {
966        let version = captures.get(1)?.as_str();
967        (!version.contains('*')).then_some(version.to_string())
968    })
969}
970
971fn project_extra_key(key: &str) -> String {
972    key.replace('-', "_")
973}
974
975fn strip_cabal_comment(line: &str) -> &str {
976    let trimmed = line.trim_start();
977    if trimmed.starts_with("--") {
978        return "";
979    }
980
981    let bytes = line.as_bytes();
982    for index in 0..bytes.len().saturating_sub(1) {
983        if bytes[index] == b'-'
984            && bytes[index + 1] == b'-'
985            && (index == 0 || bytes[index - 1].is_ascii_whitespace())
986        {
987            return line[..index].trim_end();
988        }
989    }
990
991    line
992}
993
994fn indentation(line: &str) -> usize {
995    line.chars()
996        .take_while(|character| character.is_whitespace())
997        .count()
998}
999
1000fn mapping_get<'a>(mapping: &'a Mapping, key: &str) -> Option<&'a YamlValue> {
1001    mapping.get(YamlValue::String(key.to_string()))
1002}
1003
1004fn mapping_string(mapping: &Mapping, key: &str) -> Option<String> {
1005    mapping_get(mapping, key)
1006        .and_then(YamlValue::as_str)
1007        .map(str::to_string)
1008}
1009
1010crate::register_parser!(
1011    "Hackage Cabal package manifest",
1012    &["**/*.cabal"],
1013    "hackage",
1014    "Haskell",
1015    Some("https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html"),
1016);
1017
1018crate::register_parser!(
1019    "Hackage cabal.project workspace file",
1020    &["**/cabal.project"],
1021    "hackage",
1022    "Haskell",
1023    Some("https://cabal.readthedocs.io/en/stable/cabal-project-description-file.html"),
1024);
1025
1026crate::register_parser!(
1027    "Hackage Stack project manifest",
1028    &["**/stack.yaml"],
1029    "hackage",
1030    "Haskell",
1031    Some("https://docs.haskellstack.org/en/stable/configure/yaml/"),
1032);