Skip to main content

provenant/parsers/
hackage.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use regex::Regex;
9use serde_json::Value as JsonValue;
10use yaml_serde::{Mapping, Value as YamlValue};
11
12use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
13use crate::parsers::utils::{
14    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
15};
16
17use super::PackageParser;
18
19const PACKAGE_TYPE: PackageType = PackageType::Hackage;
20const PRIMARY_LANGUAGE: &str = "Haskell";
21
22pub struct HackageCabalParser;
23
24pub struct HackageCabalProjectParser;
25
26pub struct HackageStackYamlParser;
27
28impl PackageParser for HackageCabalParser {
29    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
30
31    fn is_match(path: &Path) -> bool {
32        path.extension().is_some_and(|ext| ext == "cabal")
33    }
34
35    fn extract_packages(path: &Path) -> Vec<PackageData> {
36        let content = match read_file_to_string(path, None) {
37            Ok(content) => content,
38            Err(error) => {
39                warn!("Failed to read cabal file {:?}: {}", path, error);
40                return vec![default_package_data(DatasourceId::HackageCabal)];
41            }
42        };
43
44        vec![parse_cabal_manifest(&content)]
45    }
46}
47
48impl PackageParser for HackageCabalProjectParser {
49    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
50
51    fn is_match(path: &Path) -> bool {
52        path.file_name().is_some_and(|name| name == "cabal.project")
53    }
54
55    fn extract_packages(path: &Path) -> Vec<PackageData> {
56        let content = match read_file_to_string(path, None) {
57            Ok(content) => content,
58            Err(error) => {
59                warn!("Failed to read cabal.project {:?}: {}", path, error);
60                return vec![default_package_data(DatasourceId::HackageCabalProject)];
61            }
62        };
63
64        vec![parse_cabal_project(&content)]
65    }
66}
67
68impl PackageParser for HackageStackYamlParser {
69    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
70
71    fn is_match(path: &Path) -> bool {
72        path.file_name().is_some_and(|name| name == "stack.yaml")
73    }
74
75    fn extract_packages(path: &Path) -> Vec<PackageData> {
76        let content = match read_file_to_string(path, None) {
77            Ok(content) => content,
78            Err(error) => {
79                warn!("Failed to read stack.yaml {:?}: {}", path, error);
80                return vec![default_package_data(DatasourceId::HackageStackYaml)];
81            }
82        };
83
84        let yaml: YamlValue = match yaml_serde::from_str(&content) {
85            Ok(yaml) => yaml,
86            Err(error) => {
87                warn!("Failed to parse stack.yaml {:?}: {}", path, error);
88                return vec![default_package_data(DatasourceId::HackageStackYaml)];
89            }
90        };
91
92        vec![parse_stack_yaml(&yaml)]
93    }
94}
95
96#[derive(Clone, Debug, Default)]
97struct ComponentContext {
98    component_type: String,
99    component_name: Option<String>,
100}
101
102#[derive(Debug, Default)]
103struct CabalData {
104    name: Option<String>,
105    version: Option<String>,
106    synopsis: Option<String>,
107    description: Option<String>,
108    license: Option<String>,
109    homepage_url: Option<String>,
110    bug_tracking_url: Option<String>,
111    vcs_url: Option<String>,
112    authors: Vec<String>,
113    maintainers: Vec<String>,
114    category_keywords: Vec<String>,
115    explicit_keywords: Vec<String>,
116    dependencies: Vec<Dependency>,
117}
118
119fn default_package_data(datasource_id: DatasourceId) -> PackageData {
120    PackageData {
121        package_type: Some(PACKAGE_TYPE),
122        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
123        datasource_id: Some(datasource_id),
124        ..Default::default()
125    }
126}
127
128fn parse_cabal_manifest(content: &str) -> PackageData {
129    let parsed = parse_cabal_data(content);
130    let keywords = merge_keywords(&parsed.category_keywords, &parsed.explicit_keywords);
131    let description =
132        combine_summary_and_description(&parsed.synopsis, &parsed.description).map(truncate_field);
133    let parties = build_parties(&parsed.authors, &parsed.maintainers);
134    let purl =
135        build_hackage_purl(parsed.name.as_deref(), parsed.version.as_deref()).map(truncate_field);
136    let repository_homepage_url = parsed
137        .name
138        .as_ref()
139        .map(|name| match parsed.version.as_ref() {
140            Some(version) => truncate_field(format!(
141                "https://hackage.haskell.org/package/{}-{}",
142                name, version
143            )),
144            None => truncate_field(format!("https://hackage.haskell.org/package/{}", name)),
145        });
146
147    PackageData {
148        package_type: Some(PACKAGE_TYPE),
149        namespace: None,
150        name: parsed.name,
151        version: parsed.version,
152        qualifiers: None,
153        subpath: None,
154        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
155        description,
156        release_date: None,
157        parties,
158        keywords,
159        homepage_url: parsed.homepage_url,
160        download_url: None,
161        size: None,
162        sha1: None,
163        md5: None,
164        sha256: None,
165        sha512: None,
166        bug_tracking_url: parsed.bug_tracking_url,
167        code_view_url: None,
168        vcs_url: parsed.vcs_url,
169        copyright: None,
170        holder: None,
171        declared_license_expression: None,
172        declared_license_expression_spdx: None,
173        license_detections: Vec::new(),
174        other_license_expression: None,
175        other_license_expression_spdx: None,
176        other_license_detections: Vec::new(),
177        extracted_license_statement: parsed.license,
178        notice_text: None,
179        source_packages: Vec::new(),
180        file_references: Vec::new(),
181        is_private: false,
182        is_virtual: false,
183        extra_data: None,
184        dependencies: parsed.dependencies,
185        repository_homepage_url,
186        repository_download_url: None,
187        api_data_url: None,
188        datasource_id: Some(DatasourceId::HackageCabal),
189        purl,
190    }
191}
192
193fn parse_cabal_project(content: &str) -> PackageData {
194    let mut package_data = default_package_data(DatasourceId::HackageCabalProject);
195    let lines: Vec<&str> = content.lines().collect();
196    let mut dependencies = Vec::new();
197    let mut extra_data = HashMap::new();
198    let mut source_repo_entries: Vec<HashMap<String, JsonValue>> = Vec::new();
199    let mut current_source_repo: Option<HashMap<String, JsonValue>> = None;
200    let mut index = 0;
201    let mut iteration_count = 0usize;
202
203    while index < lines.len() {
204        iteration_count += 1;
205        if iteration_count > MAX_ITERATION_COUNT {
206            warn!(
207                "parse_cabal_project: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
208                MAX_ITERATION_COUNT, index
209            );
210            break;
211        }
212
213        let cleaned = strip_cabal_comment(lines[index]);
214        let trimmed = cleaned.trim();
215        let indent = indentation(cleaned);
216
217        if trimmed.is_empty() {
218            index += 1;
219            continue;
220        }
221
222        if indent == 0 && trimmed == "source-repository-package" {
223            if let Some(entry) = current_source_repo.take() {
224                source_repo_entries.push(entry);
225            }
226            current_source_repo = Some(HashMap::new());
227            index += 1;
228            continue;
229        }
230
231        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
232            if indent == 0
233                && let Some(entry) = current_source_repo.take()
234            {
235                source_repo_entries.push(entry);
236            }
237            index += 1;
238            continue;
239        };
240
241        if current_source_repo.is_some() && indent > 0 {
242            if let Some(source_repo) = current_source_repo.as_mut() {
243                source_repo.insert(
244                    project_extra_key(&key),
245                    parse_multiline_scalar_or_list(&value),
246                );
247            }
248            index = next_index + 1;
249            continue;
250        }
251
252        if current_source_repo.is_some()
253            && indent == 0
254            && key != "source-repository-package"
255            && let Some(entry) = current_source_repo.take()
256        {
257            source_repo_entries.push(entry);
258        }
259
260        match key.as_str() {
261            "packages" => {
262                dependencies.extend(parse_path_like_entries(&value, "packages", false));
263            }
264            "optional-packages" => {
265                dependencies.extend(parse_path_like_entries(&value, "optional-packages", true));
266            }
267            "extra-packages" => {
268                dependencies.extend(parse_hackage_spec_entries(&value, "extra-packages", None));
269            }
270            "import" => {
271                dependencies.extend(parse_import_entries(&value));
272            }
273            _ => {
274                extra_data.insert(
275                    project_extra_key(&key),
276                    parse_multiline_scalar_or_list(&value),
277                );
278            }
279        }
280
281        index = next_index + 1;
282    }
283
284    if let Some(entry) = current_source_repo.take() {
285        source_repo_entries.push(entry);
286    }
287
288    for entry in source_repo_entries.into_iter().take(MAX_ITERATION_COUNT) {
289        dependencies.push(build_source_repository_dependency(entry));
290    }
291
292    package_data.dependencies = dependencies;
293    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
294    package_data
295}
296
297fn parse_stack_yaml(yaml: &YamlValue) -> PackageData {
298    let mut package_data = default_package_data(DatasourceId::HackageStackYaml);
299    let Some(mapping) = yaml.as_mapping() else {
300        return package_data;
301    };
302
303    let mut dependencies = Vec::new();
304    let mut extra_data = HashMap::new();
305
306    if let Some(resolver) = mapping_get(mapping, "resolver")
307        && let Ok(value) = serde_json::to_value(resolver)
308    {
309        extra_data.insert("resolver".to_string(), value);
310    }
311
312    if let Some(snapshot) = mapping_get(mapping, "snapshot")
313        && let Ok(value) = serde_json::to_value(snapshot)
314    {
315        extra_data.insert("snapshot".to_string(), value);
316    }
317
318    if let Some(packages) = mapping_get(mapping, "packages") {
319        dependencies.extend(parse_stack_package_entries(packages));
320    }
321
322    if let Some(extra_deps) = mapping_get(mapping, "extra-deps") {
323        dependencies.extend(parse_stack_extra_dep_entries(extra_deps));
324    }
325
326    for (key, value) in mapping.iter().take(MAX_ITERATION_COUNT) {
327        let Some(key) = key.as_str() else {
328            continue;
329        };
330
331        if matches!(key, "resolver" | "snapshot" | "packages" | "extra-deps") {
332            continue;
333        }
334
335        if let Ok(json_value) = serde_json::to_value(value) {
336            extra_data.insert(key.to_string(), json_value);
337        }
338    }
339
340    package_data.dependencies = dependencies;
341    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
342    package_data
343}
344
345fn parse_cabal_data(content: &str) -> CabalData {
346    let mut data = CabalData::default();
347    let lines: Vec<&str> = content.lines().collect();
348    let mut current_component: Option<ComponentContext> = None;
349    let mut in_source_repository = false;
350    let mut index = 0;
351    let mut iteration_count = 0usize;
352
353    while index < lines.len() {
354        iteration_count += 1;
355        if iteration_count > MAX_ITERATION_COUNT {
356            warn!(
357                "parse_cabal_data: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
358                MAX_ITERATION_COUNT, index
359            );
360            break;
361        }
362        let cleaned = strip_cabal_comment(lines[index]);
363        let trimmed = cleaned.trim();
364        let indent = indentation(cleaned);
365
366        if trimmed.is_empty() {
367            index += 1;
368            continue;
369        }
370
371        if indent == 0 && !trimmed.contains(':') {
372            current_component = parse_component_header(trimmed);
373            in_source_repository = trimmed.starts_with("source-repository");
374            index += 1;
375            continue;
376        }
377
378        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
379            index += 1;
380            continue;
381        };
382
383        match key.as_str() {
384            "name" if indent == 0 => data.name = clean_single_line(&value).map(truncate_field),
385            "version" if indent == 0 => {
386                data.version = clean_single_line(&value).map(truncate_field)
387            }
388            "synopsis" if indent == 0 => {
389                data.synopsis = clean_single_line(&value).map(truncate_field)
390            }
391            "description" if indent == 0 => {
392                data.description = normalize_cabal_multiline(&value).map(truncate_field);
393            }
394            "license" if indent == 0 => {
395                data.license = clean_single_line(&value).map(truncate_field)
396            }
397            "homepage" if indent == 0 => {
398                data.homepage_url = clean_single_line(&value).map(truncate_field)
399            }
400            "bug-reports" if indent == 0 => {
401                data.bug_tracking_url = clean_single_line(&value).map(truncate_field)
402            }
403            "author" if indent == 0 => data.authors.extend(split_comma_separated(&value)),
404            "maintainer" if indent == 0 => {
405                data.maintainers.extend(split_comma_separated(&value));
406            }
407            "category" if indent == 0 => {
408                data.category_keywords.extend(split_keywords(&value));
409            }
410            "keywords" if indent == 0 => {
411                data.explicit_keywords.extend(split_keywords(&value));
412            }
413            "location" if in_source_repository && data.vcs_url.is_none() => {
414                data.vcs_url = clean_single_line(&value).map(truncate_field);
415            }
416            "build-depends" => {
417                data.dependencies
418                    .extend(parse_build_depends(&value, current_component.as_ref()));
419            }
420            _ => {}
421        }
422
423        index = next_index + 1;
424    }
425
426    data
427}
428
429fn parse_build_depends(value: &str, component: Option<&ComponentContext>) -> Vec<Dependency> {
430    if component.is_some_and(|component| component.component_type == "common") {
431        return Vec::new();
432    }
433
434    split_dependency_entries(value)
435        .into_iter()
436        .filter_map(|entry| {
437            parse_hackage_spec_dependency(&entry, Some("build-depends"), component, None)
438        })
439        .collect()
440}
441
442fn parse_path_like_entries(value: &str, scope: &str, optional: bool) -> Vec<Dependency> {
443    split_multiline_entries(value)
444        .into_iter()
445        .filter(|entry| !entry.is_empty())
446        .map(|entry| {
447            let mut extra_data = HashMap::new();
448            extra_data.insert("path".to_string(), JsonValue::String(entry.clone()));
449
450            Dependency {
451                purl: None,
452                extracted_requirement: Some(truncate_field(entry)),
453                scope: Some(scope.to_string()),
454                is_runtime: None,
455                is_optional: Some(optional),
456                is_pinned: Some(false),
457                is_direct: Some(true),
458                resolved_package: None,
459                extra_data: Some(extra_data),
460            }
461        })
462        .collect()
463}
464
465fn parse_import_entries(value: &str) -> Vec<Dependency> {
466    split_multiline_entries(value)
467        .into_iter()
468        .filter(|entry| !entry.is_empty())
469        .map(|entry| Dependency {
470            purl: None,
471            extracted_requirement: Some(truncate_field(entry)),
472            scope: Some("import".to_string()),
473            is_runtime: None,
474            is_optional: Some(false),
475            is_pinned: Some(false),
476            is_direct: Some(true),
477            resolved_package: None,
478            extra_data: None,
479        })
480        .collect()
481}
482
483fn parse_hackage_spec_entries(
484    value: &str,
485    scope: &str,
486    is_runtime: Option<bool>,
487) -> Vec<Dependency> {
488    split_multiline_entries(value)
489        .into_iter()
490        .filter_map(|entry| parse_hackage_spec_dependency(&entry, Some(scope), None, is_runtime))
491        .collect()
492}
493
494fn parse_stack_package_entries(value: &YamlValue) -> Vec<Dependency> {
495    let Some(sequence) = value.as_sequence() else {
496        return Vec::new();
497    };
498
499    sequence
500        .iter()
501        .take(MAX_ITERATION_COUNT)
502        .filter_map(|entry| match entry {
503            YamlValue::String(path) => {
504                let mut extra_data = HashMap::new();
505                extra_data.insert("path".to_string(), JsonValue::String(path.clone()));
506
507                Some(Dependency {
508                    purl: None,
509                    extracted_requirement: Some(truncate_field(path.clone())),
510                    scope: Some("packages".to_string()),
511                    is_runtime: None,
512                    is_optional: Some(false),
513                    is_pinned: Some(false),
514                    is_direct: Some(true),
515                    resolved_package: None,
516                    extra_data: Some(extra_data),
517                })
518            }
519            YamlValue::Mapping(map) => {
520                let extracted_requirement = mapping_string(map, "location")
521                    .or_else(|| mapping_string(map, "git"))
522                    .or_else(|| mapping_string(map, "url"))
523                    .map(truncate_field);
524                let extra_data = serde_json::to_value(entry)
525                    .ok()
526                    .and_then(|value| value.as_object().cloned())
527                    .map(|map| map.into_iter().collect::<HashMap<_, _>>());
528
529                Some(Dependency {
530                    purl: None,
531                    extracted_requirement,
532                    scope: Some("packages".to_string()),
533                    is_runtime: None,
534                    is_optional: Some(false),
535                    is_pinned: Some(mapping_string(map, "commit").is_some()),
536                    is_direct: Some(true),
537                    resolved_package: None,
538                    extra_data,
539                })
540            }
541            _ => None,
542        })
543        .collect()
544}
545
546fn parse_stack_extra_dep_entries(value: &YamlValue) -> Vec<Dependency> {
547    let Some(sequence) = value.as_sequence() else {
548        return Vec::new();
549    };
550
551    sequence
552        .iter()
553        .take(MAX_ITERATION_COUNT)
554        .filter_map(|entry| match entry {
555            YamlValue::String(spec) => parse_stack_extra_dep_string(spec),
556            YamlValue::Mapping(map) => Some(parse_stack_extra_dep_mapping(map, entry)),
557            _ => None,
558        })
559        .collect()
560}
561
562fn parse_stack_extra_dep_string(spec: &str) -> Option<Dependency> {
563    let trimmed = spec.trim();
564    if trimmed.is_empty() {
565        return None;
566    }
567
568    let (package_spec, pantry_suffix) = trimmed
569        .split_once('@')
570        .map_or((trimmed, None), |(package_spec, suffix)| {
571            (package_spec, Some(suffix))
572        });
573
574    let mut dependency =
575        parse_hackage_spec_dependency(package_spec, Some("extra-deps"), None, None).unwrap_or(
576            Dependency {
577                purl: None,
578                extracted_requirement: Some(truncate_field(package_spec.to_string())),
579                scope: Some("extra-deps".to_string()),
580                is_runtime: None,
581                is_optional: Some(false),
582                is_pinned: Some(false),
583                is_direct: Some(true),
584                resolved_package: None,
585                extra_data: None,
586            },
587        );
588
589    if let Some(suffix) = pantry_suffix {
590        let mut extra_data = dependency.extra_data.take().unwrap_or_default();
591        extra_data.insert("pantry".to_string(), JsonValue::String(suffix.to_string()));
592        dependency.extra_data = Some(extra_data);
593        dependency.is_pinned = Some(true);
594        if dependency.extracted_requirement.is_none() {
595            dependency.extracted_requirement = Some(truncate_field(package_spec.to_string()));
596        }
597    }
598
599    dependency.scope = Some("extra-deps".to_string());
600    Some(dependency)
601}
602
603fn parse_stack_extra_dep_mapping(map: &Mapping, raw_value: &YamlValue) -> Dependency {
604    let name = mapping_string(map, "name");
605    let version = mapping_string(map, "version");
606    let purl = build_hackage_purl(name.as_deref(), version.as_deref()).map(truncate_field);
607    let extracted_requirement = version
608        .clone()
609        .or_else(|| mapping_string(map, "git"))
610        .or_else(|| mapping_string(map, "url"))
611        .map(truncate_field);
612    let extra_data = serde_json::to_value(raw_value)
613        .ok()
614        .and_then(|value| value.as_object().cloned())
615        .map(|map| map.into_iter().collect::<HashMap<_, _>>());
616
617    Dependency {
618        purl,
619        extracted_requirement,
620        scope: Some("extra-deps".to_string()),
621        is_runtime: None,
622        is_optional: Some(false),
623        is_pinned: Some(version.is_some() || mapping_string(map, "commit").is_some()),
624        is_direct: Some(true),
625        resolved_package: None,
626        extra_data,
627    }
628}
629
630fn build_source_repository_dependency(extra_data: HashMap<String, JsonValue>) -> Dependency {
631    let extracted_requirement = extra_data
632        .get("location")
633        .and_then(JsonValue::as_str)
634        .map(str::to_string)
635        .or_else(|| {
636            extra_data
637                .get("tag")
638                .and_then(JsonValue::as_str)
639                .map(str::to_string)
640        })
641        .map(truncate_field);
642
643    Dependency {
644        purl: None,
645        extracted_requirement,
646        scope: Some("source-repository-package".to_string()),
647        is_runtime: None,
648        is_optional: Some(false),
649        is_pinned: Some(
650            extra_data.contains_key("tag")
651                || extra_data.contains_key("commit")
652                || extra_data.contains_key("sha256"),
653        ),
654        is_direct: Some(true),
655        resolved_package: None,
656        extra_data: Some(extra_data),
657    }
658}
659
660fn parse_hackage_spec_dependency(
661    spec: &str,
662    scope: Option<&str>,
663    component: Option<&ComponentContext>,
664    is_runtime: Option<bool>,
665) -> Option<Dependency> {
666    let trimmed = spec.trim();
667    if trimmed.is_empty() {
668        return None;
669    }
670
671    let can_split_name_version = matches!(scope, Some("extra-packages" | "extra-deps"));
672
673    if can_split_name_version && let Some((name, version)) = split_hackage_name_version(trimmed) {
674        let mut extra_data = HashMap::new();
675        if let Some(component) = component {
676            extra_data.insert(
677                "component_type".to_string(),
678                JsonValue::String(component.component_type.clone()),
679            );
680            if let Some(component_name) = &component.component_name {
681                extra_data.insert(
682                    "component_name".to_string(),
683                    JsonValue::String(component_name.clone()),
684                );
685            }
686        }
687
688        return Some(Dependency {
689            purl: Some(truncate_field(format!("pkg:hackage/{}@{}", name, version))),
690            extracted_requirement: Some(truncate_field(version)),
691            scope: scope.map(str::to_string),
692            is_runtime: component.map(component_is_runtime).or(is_runtime),
693            is_optional: Some(false),
694            is_pinned: Some(true),
695            is_direct: Some(true),
696            resolved_package: None,
697            extra_data: (!extra_data.is_empty()).then_some(extra_data),
698        });
699    }
700
701    let name_re = Regex::new(r"^(?P<name>[A-Za-z0-9][A-Za-z0-9_\.-]*)").ok()?;
702    let captures = name_re.captures(trimmed)?;
703    let name = captures.name("name")?.as_str().to_string();
704    let requirement = trimmed[name.len()..].trim();
705    let implicit_name_version = if can_split_name_version && requirement.is_empty() {
706        split_hackage_name_version(trimmed)
707    } else {
708        None
709    };
710    let resolved_name = implicit_name_version
711        .as_ref()
712        .map(|(resolved_name, _)| resolved_name.as_str())
713        .unwrap_or(name.as_str());
714    let exact_version = exact_version_requirement(requirement).or_else(|| {
715        implicit_name_version
716            .as_ref()
717            .map(|(_, version)| version.clone())
718    });
719    let purl = if let Some(version) = exact_version.as_deref() {
720        Some(truncate_field(format!(
721            "pkg:hackage/{}@{}",
722            resolved_name, version
723        )))
724    } else {
725        Some(truncate_field(format!("pkg:hackage/{}", resolved_name)))
726    };
727
728    let mut extra_data = HashMap::new();
729    if let Some(component) = component {
730        extra_data.insert(
731            "component_type".to_string(),
732            JsonValue::String(component.component_type.clone()),
733        );
734        if let Some(component_name) = &component.component_name {
735            extra_data.insert(
736                "component_name".to_string(),
737                JsonValue::String(component_name.clone()),
738            );
739        }
740    }
741
742    let extracted_requirement = if let Some((_, version)) = implicit_name_version {
743        Some(truncate_field(version))
744    } else {
745        (!requirement.is_empty())
746            .then_some(requirement.to_string())
747            .map(truncate_field)
748    };
749
750    Some(Dependency {
751        purl,
752        extracted_requirement,
753        scope: scope.map(str::to_string),
754        is_runtime: component.map(component_is_runtime).or(is_runtime),
755        is_optional: Some(false),
756        is_pinned: Some(exact_version.is_some()),
757        is_direct: Some(true),
758        resolved_package: None,
759        extra_data: (!extra_data.is_empty()).then_some(extra_data),
760    })
761}
762
763fn component_is_runtime(component: &ComponentContext) -> bool {
764    !matches!(
765        component.component_type.as_str(),
766        "test-suite" | "benchmark"
767    )
768}
769
770fn parse_component_header(trimmed: &str) -> Option<ComponentContext> {
771    const COMPONENT_PREFIXES: &[&str] = &[
772        "library",
773        "foreign-library",
774        "executable",
775        "test-suite",
776        "benchmark",
777        "common",
778    ];
779
780    COMPONENT_PREFIXES.iter().find_map(|prefix| {
781        trimmed
782            .strip_prefix(prefix)
783            .map(|remainder| ComponentContext {
784                component_type: (*prefix).to_string(),
785                component_name: clean_single_line(remainder),
786            })
787    })
788}
789
790fn collect_indented_field(lines: &[&str], start_index: usize) -> Option<(String, String, usize)> {
791    let current = strip_cabal_comment(lines[start_index]);
792    let trimmed = current.trim();
793    let indent = indentation(current);
794    let colon_index = trimmed.find(':')?;
795    let key = trimmed[..colon_index].trim().to_ascii_lowercase();
796    let mut values = vec![trimmed[colon_index + 1..].trim().to_string()];
797    let mut last_index = start_index;
798
799    for (next_index, line) in lines.iter().enumerate().skip(start_index + 1) {
800        let next = strip_cabal_comment(line);
801        let next_trimmed = next.trim();
802        if next_trimmed.is_empty() {
803            break;
804        }
805
806        if indentation(next) <= indent {
807            break;
808        }
809
810        values.push(next_trimmed.to_string());
811        last_index = next_index;
812    }
813
814    Some((key, values.join("\n"), last_index))
815}
816
817fn split_dependency_entries(value: &str) -> Vec<String> {
818    let mut entries = Vec::new();
819    let mut current = String::new();
820    let mut paren_depth = 0usize;
821    let mut brace_depth = 0usize;
822    let mut bracket_depth = 0usize;
823
824    for character in value.chars().take(MAX_ITERATION_COUNT) {
825        match character {
826            '(' => paren_depth += 1,
827            ')' => paren_depth = paren_depth.saturating_sub(1),
828            '{' => brace_depth += 1,
829            '}' => brace_depth = brace_depth.saturating_sub(1),
830            '[' => bracket_depth += 1,
831            ']' => bracket_depth = bracket_depth.saturating_sub(1),
832            ',' if paren_depth == 0 && brace_depth == 0 && bracket_depth == 0 => {
833                let trimmed = current.trim();
834                if !trimmed.is_empty() {
835                    entries.push(trimmed.to_string());
836                }
837                current.clear();
838                continue;
839            }
840            _ => {}
841        }
842
843        current.push(character);
844    }
845
846    let trimmed = current.trim();
847    if !trimmed.is_empty() {
848        entries.push(trimmed.to_string());
849    }
850
851    entries
852}
853
854fn split_multiline_entries(value: &str) -> Vec<String> {
855    value
856        .lines()
857        .take(MAX_ITERATION_COUNT)
858        .map(str::trim)
859        .filter(|line| !line.is_empty())
860        .map(|line| line.strip_prefix("-").unwrap_or(line).trim().to_string())
861        .collect()
862}
863
864fn parse_multiline_scalar_or_list(value: &str) -> JsonValue {
865    let entries = split_multiline_entries(value);
866    if entries.len() <= 1 {
867        clean_single_line(value)
868            .map(JsonValue::String)
869            .unwrap_or(JsonValue::Null)
870    } else {
871        JsonValue::Array(entries.into_iter().map(JsonValue::String).collect())
872    }
873}
874
875fn normalize_cabal_multiline(value: &str) -> Option<String> {
876    let lines: Vec<String> = value
877        .lines()
878        .map(str::trim)
879        .map(|line| {
880            if line == "." {
881                "".to_string()
882            } else {
883                line.to_string()
884            }
885        })
886        .collect();
887
888    let combined = lines.join("\n").trim().to_string();
889    (!combined.is_empty()).then_some(combined)
890}
891
892fn clean_single_line(value: &str) -> Option<String> {
893    let cleaned = value.trim();
894    (!cleaned.is_empty()).then_some(cleaned.to_string())
895}
896
897fn split_comma_separated(value: &str) -> Vec<String> {
898    value
899        .split(',')
900        .map(str::trim)
901        .filter(|part| !part.is_empty())
902        .map(str::to_string)
903        .collect()
904}
905
906fn split_keywords(value: &str) -> Vec<String> {
907    split_comma_separated(value)
908}
909
910fn merge_keywords(categories: &[String], keywords: &[String]) -> Vec<String> {
911    let mut seen = HashSet::new();
912    categories
913        .iter()
914        .chain(keywords.iter())
915        .filter_map(|keyword| {
916            let normalized = keyword.trim();
917            if normalized.is_empty() || !seen.insert(normalized.to_ascii_lowercase()) {
918                None
919            } else {
920                Some(normalized.to_string())
921            }
922        })
923        .collect()
924}
925
926fn combine_summary_and_description(
927    synopsis: &Option<String>,
928    description: &Option<String>,
929) -> Option<String> {
930    match (synopsis, description) {
931        (Some(synopsis), Some(description)) if synopsis == description => Some(synopsis.clone()),
932        (Some(synopsis), Some(description)) => Some(format!("{}\n\n{}", synopsis, description)),
933        (Some(synopsis), None) => Some(synopsis.clone()),
934        (None, Some(description)) => Some(description.clone()),
935        (None, None) => None,
936    }
937}
938
939fn build_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
940    let author_parties = authors
941        .iter()
942        .filter_map(|author| build_party(author, "author"));
943    let maintainer_parties = maintainers
944        .iter()
945        .filter_map(|maintainer| build_party(maintainer, "maintainer"));
946
947    author_parties.chain(maintainer_parties).collect()
948}
949
950fn build_party(value: &str, role: &str) -> Option<Party> {
951    let (name, email) = split_name_email(value.trim());
952    if name.is_none() && email.is_none() {
953        return None;
954    }
955
956    Some(Party {
957        r#type: Some("person".to_string()),
958        role: Some(role.to_string()),
959        name,
960        email,
961        url: None,
962        organization: None,
963        organization_url: None,
964        timezone: None,
965    })
966}
967
968fn build_hackage_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
969    match (name, version) {
970        (Some(name), Some(version)) => Some(format!("pkg:hackage/{}@{}", name, version)),
971        (Some(name), None) => Some(format!("pkg:hackage/{}", name)),
972        _ => None,
973    }
974}
975
976fn split_hackage_name_version(spec: &str) -> Option<(String, String)> {
977    if spec.chars().any(|character| {
978        character.is_whitespace() || matches!(character, '<' | '>' | '=' | '&' | '|' | '(' | ')')
979    }) {
980        return None;
981    }
982
983    for (index, character) in spec.char_indices().rev() {
984        if character != '-' {
985            continue;
986        }
987
988        let name = &spec[..index];
989        let version = &spec[index + 1..];
990
991        if name.is_empty()
992            || version.is_empty()
993            || !version
994                .chars()
995                .next()
996                .is_some_and(|character| character.is_ascii_digit())
997        {
998            continue;
999        }
1000
1001        return Some((name.to_string(), version.to_string()));
1002    }
1003
1004    None
1005}
1006
1007fn exact_version_requirement(requirement: &str) -> Option<String> {
1008    let trimmed = requirement.trim();
1009    if trimmed.is_empty() {
1010        return None;
1011    }
1012
1013    let exact_re = Regex::new(r"^==\s*([A-Za-z0-9][A-Za-z0-9\.\-_+]*)$").ok()?;
1014    exact_re.captures(trimmed).and_then(|captures| {
1015        let version = captures.get(1)?.as_str();
1016        (!version.contains('*')).then_some(version.to_string())
1017    })
1018}
1019
1020fn project_extra_key(key: &str) -> String {
1021    key.replace('-', "_")
1022}
1023
1024fn strip_cabal_comment(line: &str) -> &str {
1025    let trimmed = line.trim_start();
1026    if trimmed.starts_with("--") {
1027        return "";
1028    }
1029
1030    let bytes = line.as_bytes();
1031    for index in 0..bytes.len().saturating_sub(1) {
1032        if bytes[index] == b'-'
1033            && bytes[index + 1] == b'-'
1034            && (index == 0 || bytes[index - 1].is_ascii_whitespace())
1035        {
1036            return line[..index].trim_end();
1037        }
1038    }
1039
1040    line
1041}
1042
1043fn indentation(line: &str) -> usize {
1044    line.chars()
1045        .take_while(|character| character.is_whitespace())
1046        .count()
1047}
1048
1049fn mapping_get<'a>(mapping: &'a Mapping, key: &str) -> Option<&'a YamlValue> {
1050    mapping.get(YamlValue::String(key.to_string()))
1051}
1052
1053fn mapping_string(mapping: &Mapping, key: &str) -> Option<String> {
1054    mapping_get(mapping, key)
1055        .and_then(YamlValue::as_str)
1056        .map(str::to_string)
1057}
1058
1059crate::register_parser!(
1060    "Hackage Cabal package manifest",
1061    &["**/*.cabal"],
1062    "hackage",
1063    "Haskell",
1064    Some("https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html"),
1065);
1066
1067crate::register_parser!(
1068    "Hackage cabal.project workspace file",
1069    &["**/cabal.project"],
1070    "hackage",
1071    "Haskell",
1072    Some("https://cabal.readthedocs.io/en/stable/cabal-project-description-file.html"),
1073);
1074
1075crate::register_parser!(
1076    "Hackage Stack project manifest",
1077    &["**/stack.yaml"],
1078    "hackage",
1079    "Haskell",
1080    Some("https://docs.haskellstack.org/en/stable/configure/yaml/"),
1081);