Skip to main content

provenant/parsers/
hackage.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use regex::Regex;
9use serde_json::Value as JsonValue;
10use yaml_serde::{Mapping, Value as YamlValue};
11
12use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
13use crate::parsers::utils::{
14    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
15};
16
17use super::PackageParser;
18use super::metadata::ParserMetadata;
19
20const PACKAGE_TYPE: PackageType = PackageType::Hackage;
21const PRIMARY_LANGUAGE: &str = "Haskell";
22
23pub struct HackageCabalParser;
24
25pub struct HackageCabalProjectParser;
26
27pub struct HackageStackYamlParser;
28
29impl PackageParser for HackageCabalParser {
30    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
31
32    fn metadata() -> Vec<ParserMetadata> {
33        vec![ParserMetadata {
34            description: "Hackage Cabal package manifest",
35            file_patterns: &["**/*.cabal"],
36            package_type: "hackage",
37            primary_language: "Haskell",
38            documentation_url: Some(
39                "https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html",
40            ),
41        }]
42    }
43
44    fn is_match(path: &Path) -> bool {
45        path.extension().is_some_and(|ext| ext == "cabal")
46    }
47
48    fn extract_packages(path: &Path) -> Vec<PackageData> {
49        let content = match read_file_to_string(path, None) {
50            Ok(content) => content,
51            Err(error) => {
52                warn!("Failed to read cabal file {:?}: {}", path, error);
53                return vec![default_package_data(DatasourceId::HackageCabal)];
54            }
55        };
56
57        vec![parse_cabal_manifest(&content)]
58    }
59}
60
61impl PackageParser for HackageCabalProjectParser {
62    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
63
64    fn metadata() -> Vec<ParserMetadata> {
65        vec![ParserMetadata {
66            description: "Hackage cabal.project workspace file",
67            file_patterns: &["**/cabal.project"],
68            package_type: "hackage",
69            primary_language: "Haskell",
70            documentation_url: Some(
71                "https://cabal.readthedocs.io/en/stable/cabal-project-description-file.html",
72            ),
73        }]
74    }
75
76    fn is_match(path: &Path) -> bool {
77        path.file_name().is_some_and(|name| name == "cabal.project")
78    }
79
80    fn extract_packages(path: &Path) -> Vec<PackageData> {
81        let content = match read_file_to_string(path, None) {
82            Ok(content) => content,
83            Err(error) => {
84                warn!("Failed to read cabal.project {:?}: {}", path, error);
85                return vec![default_package_data(DatasourceId::HackageCabalProject)];
86            }
87        };
88
89        vec![parse_cabal_project(&content)]
90    }
91}
92
93impl PackageParser for HackageStackYamlParser {
94    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
95
96    fn metadata() -> Vec<ParserMetadata> {
97        vec![ParserMetadata {
98            description: "Hackage Stack project manifest",
99            file_patterns: &["**/stack.yaml"],
100            package_type: "hackage",
101            primary_language: "Haskell",
102            documentation_url: Some("https://docs.haskellstack.org/en/stable/configure/yaml/"),
103        }]
104    }
105
106    fn is_match(path: &Path) -> bool {
107        path.file_name().is_some_and(|name| name == "stack.yaml")
108    }
109
110    fn extract_packages(path: &Path) -> Vec<PackageData> {
111        let content = match read_file_to_string(path, None) {
112            Ok(content) => content,
113            Err(error) => {
114                warn!("Failed to read stack.yaml {:?}: {}", path, error);
115                return vec![default_package_data(DatasourceId::HackageStackYaml)];
116            }
117        };
118
119        let yaml: YamlValue = match yaml_serde::from_str(&content) {
120            Ok(yaml) => yaml,
121            Err(error) => {
122                warn!("Failed to parse stack.yaml {:?}: {}", path, error);
123                return vec![default_package_data(DatasourceId::HackageStackYaml)];
124            }
125        };
126
127        vec![parse_stack_yaml(&yaml)]
128    }
129}
130
131#[derive(Clone, Debug, Default)]
132struct ComponentContext {
133    component_type: String,
134    component_name: Option<String>,
135}
136
137#[derive(Debug, Default)]
138struct CabalData {
139    name: Option<String>,
140    version: Option<String>,
141    synopsis: Option<String>,
142    description: Option<String>,
143    license: Option<String>,
144    homepage_url: Option<String>,
145    bug_tracking_url: Option<String>,
146    vcs_url: Option<String>,
147    authors: Vec<String>,
148    maintainers: Vec<String>,
149    category_keywords: Vec<String>,
150    explicit_keywords: Vec<String>,
151    dependencies: Vec<Dependency>,
152}
153
154fn default_package_data(datasource_id: DatasourceId) -> PackageData {
155    PackageData {
156        package_type: Some(PACKAGE_TYPE),
157        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
158        datasource_id: Some(datasource_id),
159        ..Default::default()
160    }
161}
162
163fn parse_cabal_manifest(content: &str) -> PackageData {
164    let parsed = parse_cabal_data(content);
165    let keywords = merge_keywords(&parsed.category_keywords, &parsed.explicit_keywords);
166    let description =
167        combine_summary_and_description(&parsed.synopsis, &parsed.description).map(truncate_field);
168    let parties = build_parties(&parsed.authors, &parsed.maintainers);
169    let purl =
170        build_hackage_purl(parsed.name.as_deref(), parsed.version.as_deref()).map(truncate_field);
171    let repository_homepage_url = parsed
172        .name
173        .as_ref()
174        .map(|name| match parsed.version.as_ref() {
175            Some(version) => truncate_field(format!(
176                "https://hackage.haskell.org/package/{}-{}",
177                name, version
178            )),
179            None => truncate_field(format!("https://hackage.haskell.org/package/{}", name)),
180        });
181
182    PackageData {
183        package_type: Some(PACKAGE_TYPE),
184        namespace: None,
185        name: parsed.name,
186        version: parsed.version,
187        qualifiers: None,
188        subpath: None,
189        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
190        description,
191        release_date: None,
192        parties,
193        keywords,
194        homepage_url: parsed.homepage_url,
195        download_url: None,
196        size: None,
197        sha1: None,
198        md5: None,
199        sha256: None,
200        sha512: None,
201        bug_tracking_url: parsed.bug_tracking_url,
202        code_view_url: None,
203        vcs_url: parsed.vcs_url,
204        copyright: None,
205        holder: None,
206        declared_license_expression: None,
207        declared_license_expression_spdx: None,
208        license_detections: Vec::new(),
209        other_license_expression: None,
210        other_license_expression_spdx: None,
211        other_license_detections: Vec::new(),
212        extracted_license_statement: parsed.license,
213        notice_text: None,
214        source_packages: Vec::new(),
215        file_references: Vec::new(),
216        is_private: false,
217        is_virtual: false,
218        extra_data: None,
219        dependencies: parsed.dependencies,
220        repository_homepage_url,
221        repository_download_url: None,
222        api_data_url: None,
223        datasource_id: Some(DatasourceId::HackageCabal),
224        purl,
225    }
226}
227
228fn parse_cabal_project(content: &str) -> PackageData {
229    let mut package_data = default_package_data(DatasourceId::HackageCabalProject);
230    let lines: Vec<&str> = content.lines().collect();
231    let mut dependencies = Vec::new();
232    let mut extra_data = HashMap::new();
233    let mut source_repo_entries: Vec<HashMap<String, JsonValue>> = Vec::new();
234    let mut current_source_repo: Option<HashMap<String, JsonValue>> = None;
235    let mut index = 0;
236    let mut iteration_count = 0usize;
237
238    while index < lines.len() {
239        iteration_count += 1;
240        if iteration_count > MAX_ITERATION_COUNT {
241            warn!(
242                "parse_cabal_project: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
243                MAX_ITERATION_COUNT, index
244            );
245            break;
246        }
247
248        let cleaned = strip_cabal_comment(lines[index]);
249        let trimmed = cleaned.trim();
250        let indent = indentation(cleaned);
251
252        if trimmed.is_empty() {
253            index += 1;
254            continue;
255        }
256
257        if indent == 0 && trimmed == "source-repository-package" {
258            if let Some(entry) = current_source_repo.take() {
259                source_repo_entries.push(entry);
260            }
261            current_source_repo = Some(HashMap::new());
262            index += 1;
263            continue;
264        }
265
266        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
267            if indent == 0
268                && let Some(entry) = current_source_repo.take()
269            {
270                source_repo_entries.push(entry);
271            }
272            index += 1;
273            continue;
274        };
275
276        if current_source_repo.is_some() && indent > 0 {
277            if let Some(source_repo) = current_source_repo.as_mut() {
278                source_repo.insert(
279                    project_extra_key(&key),
280                    parse_multiline_scalar_or_list(&value),
281                );
282            }
283            index = next_index + 1;
284            continue;
285        }
286
287        if current_source_repo.is_some()
288            && indent == 0
289            && key != "source-repository-package"
290            && let Some(entry) = current_source_repo.take()
291        {
292            source_repo_entries.push(entry);
293        }
294
295        match key.as_str() {
296            "packages" => {
297                dependencies.extend(parse_path_like_entries(&value, "packages", false));
298            }
299            "optional-packages" => {
300                dependencies.extend(parse_path_like_entries(&value, "optional-packages", true));
301            }
302            "extra-packages" => {
303                dependencies.extend(parse_hackage_spec_entries(&value, "extra-packages", None));
304            }
305            "import" => {
306                dependencies.extend(parse_import_entries(&value));
307            }
308            _ => {
309                extra_data.insert(
310                    project_extra_key(&key),
311                    parse_multiline_scalar_or_list(&value),
312                );
313            }
314        }
315
316        index = next_index + 1;
317    }
318
319    if let Some(entry) = current_source_repo.take() {
320        source_repo_entries.push(entry);
321    }
322
323    for entry in source_repo_entries.into_iter().take(MAX_ITERATION_COUNT) {
324        dependencies.push(build_source_repository_dependency(entry));
325    }
326
327    package_data.dependencies = dependencies;
328    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
329    package_data
330}
331
332fn parse_stack_yaml(yaml: &YamlValue) -> PackageData {
333    let mut package_data = default_package_data(DatasourceId::HackageStackYaml);
334    let Some(mapping) = yaml.as_mapping() else {
335        return package_data;
336    };
337
338    let mut dependencies = Vec::new();
339    let mut extra_data = HashMap::new();
340
341    if let Some(resolver) = mapping_get(mapping, "resolver")
342        && let Ok(value) = serde_json::to_value(resolver)
343    {
344        extra_data.insert("resolver".to_string(), value);
345    }
346
347    if let Some(snapshot) = mapping_get(mapping, "snapshot")
348        && let Ok(value) = serde_json::to_value(snapshot)
349    {
350        extra_data.insert("snapshot".to_string(), value);
351    }
352
353    if let Some(packages) = mapping_get(mapping, "packages") {
354        dependencies.extend(parse_stack_package_entries(packages));
355    }
356
357    if let Some(extra_deps) = mapping_get(mapping, "extra-deps") {
358        dependencies.extend(parse_stack_extra_dep_entries(extra_deps));
359    }
360
361    for (key, value) in mapping.iter().take(MAX_ITERATION_COUNT) {
362        let Some(key) = key.as_str() else {
363            continue;
364        };
365
366        if matches!(key, "resolver" | "snapshot" | "packages" | "extra-deps") {
367            continue;
368        }
369
370        if let Ok(json_value) = serde_json::to_value(value) {
371            extra_data.insert(key.to_string(), json_value);
372        }
373    }
374
375    package_data.dependencies = dependencies;
376    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
377    package_data
378}
379
380fn parse_cabal_data(content: &str) -> CabalData {
381    let mut data = CabalData::default();
382    let lines: Vec<&str> = content.lines().collect();
383    let mut current_component: Option<ComponentContext> = None;
384    let mut in_source_repository = false;
385    let mut index = 0;
386    let mut iteration_count = 0usize;
387
388    while index < lines.len() {
389        iteration_count += 1;
390        if iteration_count > MAX_ITERATION_COUNT {
391            warn!(
392                "parse_cabal_data: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
393                MAX_ITERATION_COUNT, index
394            );
395            break;
396        }
397        let cleaned = strip_cabal_comment(lines[index]);
398        let trimmed = cleaned.trim();
399        let indent = indentation(cleaned);
400
401        if trimmed.is_empty() {
402            index += 1;
403            continue;
404        }
405
406        if indent == 0 && !trimmed.contains(':') {
407            current_component = parse_component_header(trimmed);
408            in_source_repository = trimmed.starts_with("source-repository");
409            index += 1;
410            continue;
411        }
412
413        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
414            index += 1;
415            continue;
416        };
417
418        match key.as_str() {
419            "name" if indent == 0 => data.name = clean_single_line(&value).map(truncate_field),
420            "version" if indent == 0 => {
421                data.version = clean_single_line(&value).map(truncate_field)
422            }
423            "synopsis" if indent == 0 => {
424                data.synopsis = clean_single_line(&value).map(truncate_field)
425            }
426            "description" if indent == 0 => {
427                data.description = normalize_cabal_multiline(&value).map(truncate_field);
428            }
429            "license" if indent == 0 => {
430                data.license = clean_single_line(&value).map(truncate_field)
431            }
432            "homepage" if indent == 0 => {
433                data.homepage_url = clean_single_line(&value).map(truncate_field)
434            }
435            "bug-reports" if indent == 0 => {
436                data.bug_tracking_url = clean_single_line(&value).map(truncate_field)
437            }
438            "author" if indent == 0 => data.authors.extend(split_comma_separated(&value)),
439            "maintainer" if indent == 0 => {
440                data.maintainers.extend(split_comma_separated(&value));
441            }
442            "category" if indent == 0 => {
443                data.category_keywords.extend(split_keywords(&value));
444            }
445            "keywords" if indent == 0 => {
446                data.explicit_keywords.extend(split_keywords(&value));
447            }
448            "location" if in_source_repository && data.vcs_url.is_none() => {
449                data.vcs_url = clean_single_line(&value).map(truncate_field);
450            }
451            "build-depends" => {
452                data.dependencies
453                    .extend(parse_build_depends(&value, current_component.as_ref()));
454            }
455            _ => {}
456        }
457
458        index = next_index + 1;
459    }
460
461    data
462}
463
464fn parse_build_depends(value: &str, component: Option<&ComponentContext>) -> Vec<Dependency> {
465    if component.is_some_and(|component| component.component_type == "common") {
466        return Vec::new();
467    }
468
469    split_dependency_entries(value)
470        .into_iter()
471        .filter_map(|entry| {
472            parse_hackage_spec_dependency(&entry, Some("build-depends"), component, None)
473        })
474        .collect()
475}
476
477fn parse_path_like_entries(value: &str, scope: &str, optional: bool) -> Vec<Dependency> {
478    split_multiline_entries(value)
479        .into_iter()
480        .filter(|entry| !entry.is_empty())
481        .map(|entry| {
482            let mut extra_data = HashMap::new();
483            extra_data.insert("path".to_string(), JsonValue::String(entry.clone()));
484
485            Dependency {
486                purl: None,
487                extracted_requirement: Some(truncate_field(entry)),
488                scope: Some(scope.to_string()),
489                is_runtime: None,
490                is_optional: Some(optional),
491                is_pinned: Some(false),
492                is_direct: Some(true),
493                resolved_package: None,
494                extra_data: Some(extra_data),
495            }
496        })
497        .collect()
498}
499
500fn parse_import_entries(value: &str) -> Vec<Dependency> {
501    split_multiline_entries(value)
502        .into_iter()
503        .filter(|entry| !entry.is_empty())
504        .map(|entry| Dependency {
505            purl: None,
506            extracted_requirement: Some(truncate_field(entry)),
507            scope: Some("import".to_string()),
508            is_runtime: None,
509            is_optional: Some(false),
510            is_pinned: Some(false),
511            is_direct: Some(true),
512            resolved_package: None,
513            extra_data: None,
514        })
515        .collect()
516}
517
518fn parse_hackage_spec_entries(
519    value: &str,
520    scope: &str,
521    is_runtime: Option<bool>,
522) -> Vec<Dependency> {
523    split_multiline_entries(value)
524        .into_iter()
525        .filter_map(|entry| parse_hackage_spec_dependency(&entry, Some(scope), None, is_runtime))
526        .collect()
527}
528
529fn parse_stack_package_entries(value: &YamlValue) -> Vec<Dependency> {
530    let Some(sequence) = value.as_sequence() else {
531        return Vec::new();
532    };
533
534    sequence
535        .iter()
536        .take(MAX_ITERATION_COUNT)
537        .filter_map(|entry| match entry {
538            YamlValue::String(path) => {
539                let mut extra_data = HashMap::new();
540                extra_data.insert("path".to_string(), JsonValue::String(path.clone()));
541
542                Some(Dependency {
543                    purl: None,
544                    extracted_requirement: Some(truncate_field(path.clone())),
545                    scope: Some("packages".to_string()),
546                    is_runtime: None,
547                    is_optional: Some(false),
548                    is_pinned: Some(false),
549                    is_direct: Some(true),
550                    resolved_package: None,
551                    extra_data: Some(extra_data),
552                })
553            }
554            YamlValue::Mapping(map) => {
555                let extracted_requirement = mapping_string(map, "location")
556                    .or_else(|| mapping_string(map, "git"))
557                    .or_else(|| mapping_string(map, "url"))
558                    .map(truncate_field);
559                let extra_data = serde_json::to_value(entry)
560                    .ok()
561                    .and_then(|value| value.as_object().cloned())
562                    .map(|map| map.into_iter().collect::<HashMap<_, _>>());
563
564                Some(Dependency {
565                    purl: None,
566                    extracted_requirement,
567                    scope: Some("packages".to_string()),
568                    is_runtime: None,
569                    is_optional: Some(false),
570                    is_pinned: Some(mapping_string(map, "commit").is_some()),
571                    is_direct: Some(true),
572                    resolved_package: None,
573                    extra_data,
574                })
575            }
576            _ => None,
577        })
578        .collect()
579}
580
581fn parse_stack_extra_dep_entries(value: &YamlValue) -> Vec<Dependency> {
582    let Some(sequence) = value.as_sequence() else {
583        return Vec::new();
584    };
585
586    sequence
587        .iter()
588        .take(MAX_ITERATION_COUNT)
589        .filter_map(|entry| match entry {
590            YamlValue::String(spec) => parse_stack_extra_dep_string(spec),
591            YamlValue::Mapping(map) => Some(parse_stack_extra_dep_mapping(map, entry)),
592            _ => None,
593        })
594        .collect()
595}
596
597fn parse_stack_extra_dep_string(spec: &str) -> Option<Dependency> {
598    let trimmed = spec.trim();
599    if trimmed.is_empty() {
600        return None;
601    }
602
603    let (package_spec, pantry_suffix) = trimmed
604        .split_once('@')
605        .map_or((trimmed, None), |(package_spec, suffix)| {
606            (package_spec, Some(suffix))
607        });
608
609    let mut dependency =
610        parse_hackage_spec_dependency(package_spec, Some("extra-deps"), None, None).unwrap_or(
611            Dependency {
612                purl: None,
613                extracted_requirement: Some(truncate_field(package_spec.to_string())),
614                scope: Some("extra-deps".to_string()),
615                is_runtime: None,
616                is_optional: Some(false),
617                is_pinned: Some(false),
618                is_direct: Some(true),
619                resolved_package: None,
620                extra_data: None,
621            },
622        );
623
624    if let Some(suffix) = pantry_suffix {
625        let mut extra_data = dependency.extra_data.take().unwrap_or_default();
626        extra_data.insert("pantry".to_string(), JsonValue::String(suffix.to_string()));
627        dependency.extra_data = Some(extra_data);
628        dependency.is_pinned = Some(true);
629        if dependency.extracted_requirement.is_none() {
630            dependency.extracted_requirement = Some(truncate_field(package_spec.to_string()));
631        }
632    }
633
634    dependency.scope = Some("extra-deps".to_string());
635    Some(dependency)
636}
637
638fn parse_stack_extra_dep_mapping(map: &Mapping, raw_value: &YamlValue) -> Dependency {
639    let name = mapping_string(map, "name");
640    let version = mapping_string(map, "version");
641    let purl = build_hackage_purl(name.as_deref(), version.as_deref()).map(truncate_field);
642    let extracted_requirement = version
643        .clone()
644        .or_else(|| mapping_string(map, "git"))
645        .or_else(|| mapping_string(map, "url"))
646        .map(truncate_field);
647    let extra_data = serde_json::to_value(raw_value)
648        .ok()
649        .and_then(|value| value.as_object().cloned())
650        .map(|map| map.into_iter().collect::<HashMap<_, _>>());
651
652    Dependency {
653        purl,
654        extracted_requirement,
655        scope: Some("extra-deps".to_string()),
656        is_runtime: None,
657        is_optional: Some(false),
658        is_pinned: Some(version.is_some() || mapping_string(map, "commit").is_some()),
659        is_direct: Some(true),
660        resolved_package: None,
661        extra_data,
662    }
663}
664
665fn build_source_repository_dependency(extra_data: HashMap<String, JsonValue>) -> Dependency {
666    let extracted_requirement = extra_data
667        .get("location")
668        .and_then(JsonValue::as_str)
669        .map(str::to_string)
670        .or_else(|| {
671            extra_data
672                .get("tag")
673                .and_then(JsonValue::as_str)
674                .map(str::to_string)
675        })
676        .map(truncate_field);
677
678    Dependency {
679        purl: None,
680        extracted_requirement,
681        scope: Some("source-repository-package".to_string()),
682        is_runtime: None,
683        is_optional: Some(false),
684        is_pinned: Some(
685            extra_data.contains_key("tag")
686                || extra_data.contains_key("commit")
687                || extra_data.contains_key("sha256"),
688        ),
689        is_direct: Some(true),
690        resolved_package: None,
691        extra_data: Some(extra_data),
692    }
693}
694
695fn parse_hackage_spec_dependency(
696    spec: &str,
697    scope: Option<&str>,
698    component: Option<&ComponentContext>,
699    is_runtime: Option<bool>,
700) -> Option<Dependency> {
701    let trimmed = spec.trim();
702    if trimmed.is_empty() {
703        return None;
704    }
705
706    let can_split_name_version = matches!(scope, Some("extra-packages" | "extra-deps"));
707
708    if can_split_name_version && let Some((name, version)) = split_hackage_name_version(trimmed) {
709        let mut extra_data = HashMap::new();
710        if let Some(component) = component {
711            extra_data.insert(
712                "component_type".to_string(),
713                JsonValue::String(component.component_type.clone()),
714            );
715            if let Some(component_name) = &component.component_name {
716                extra_data.insert(
717                    "component_name".to_string(),
718                    JsonValue::String(component_name.clone()),
719                );
720            }
721        }
722
723        return Some(Dependency {
724            purl: Some(truncate_field(format!("pkg:hackage/{}@{}", name, version))),
725            extracted_requirement: Some(truncate_field(version)),
726            scope: scope.map(str::to_string),
727            is_runtime: component.map(component_is_runtime).or(is_runtime),
728            is_optional: Some(false),
729            is_pinned: Some(true),
730            is_direct: Some(true),
731            resolved_package: None,
732            extra_data: (!extra_data.is_empty()).then_some(extra_data),
733        });
734    }
735
736    let name_re = Regex::new(r"^(?P<name>[A-Za-z0-9][A-Za-z0-9_\.-]*)").ok()?;
737    let captures = name_re.captures(trimmed)?;
738    let name = captures.name("name")?.as_str().to_string();
739    let requirement = trimmed[name.len()..].trim();
740    let implicit_name_version = if can_split_name_version && requirement.is_empty() {
741        split_hackage_name_version(trimmed)
742    } else {
743        None
744    };
745    let resolved_name = implicit_name_version
746        .as_ref()
747        .map(|(resolved_name, _)| resolved_name.as_str())
748        .unwrap_or(name.as_str());
749    let exact_version = exact_version_requirement(requirement).or_else(|| {
750        implicit_name_version
751            .as_ref()
752            .map(|(_, version)| version.clone())
753    });
754    let purl = if let Some(version) = exact_version.as_deref() {
755        Some(truncate_field(format!(
756            "pkg:hackage/{}@{}",
757            resolved_name, version
758        )))
759    } else {
760        Some(truncate_field(format!("pkg:hackage/{}", resolved_name)))
761    };
762
763    let mut extra_data = HashMap::new();
764    if let Some(component) = component {
765        extra_data.insert(
766            "component_type".to_string(),
767            JsonValue::String(component.component_type.clone()),
768        );
769        if let Some(component_name) = &component.component_name {
770            extra_data.insert(
771                "component_name".to_string(),
772                JsonValue::String(component_name.clone()),
773            );
774        }
775    }
776
777    let extracted_requirement = if let Some((_, version)) = implicit_name_version {
778        Some(truncate_field(version))
779    } else {
780        (!requirement.is_empty())
781            .then_some(requirement.to_string())
782            .map(truncate_field)
783    };
784
785    Some(Dependency {
786        purl,
787        extracted_requirement,
788        scope: scope.map(str::to_string),
789        is_runtime: component.map(component_is_runtime).or(is_runtime),
790        is_optional: Some(false),
791        is_pinned: Some(exact_version.is_some()),
792        is_direct: Some(true),
793        resolved_package: None,
794        extra_data: (!extra_data.is_empty()).then_some(extra_data),
795    })
796}
797
798fn component_is_runtime(component: &ComponentContext) -> bool {
799    !matches!(
800        component.component_type.as_str(),
801        "test-suite" | "benchmark"
802    )
803}
804
805fn parse_component_header(trimmed: &str) -> Option<ComponentContext> {
806    const COMPONENT_PREFIXES: &[&str] = &[
807        "library",
808        "foreign-library",
809        "executable",
810        "test-suite",
811        "benchmark",
812        "common",
813    ];
814
815    COMPONENT_PREFIXES.iter().find_map(|prefix| {
816        trimmed
817            .strip_prefix(prefix)
818            .map(|remainder| ComponentContext {
819                component_type: (*prefix).to_string(),
820                component_name: clean_single_line(remainder),
821            })
822    })
823}
824
825fn collect_indented_field(lines: &[&str], start_index: usize) -> Option<(String, String, usize)> {
826    let current = strip_cabal_comment(lines[start_index]);
827    let trimmed = current.trim();
828    let indent = indentation(current);
829    let colon_index = trimmed.find(':')?;
830    let key = trimmed[..colon_index].trim().to_ascii_lowercase();
831    let mut values = vec![trimmed[colon_index + 1..].trim().to_string()];
832    let mut last_index = start_index;
833
834    for (next_index, line) in lines.iter().enumerate().skip(start_index + 1) {
835        let next = strip_cabal_comment(line);
836        let next_trimmed = next.trim();
837        if next_trimmed.is_empty() {
838            break;
839        }
840
841        if indentation(next) <= indent {
842            break;
843        }
844
845        values.push(next_trimmed.to_string());
846        last_index = next_index;
847    }
848
849    Some((key, values.join("\n"), last_index))
850}
851
852fn split_dependency_entries(value: &str) -> Vec<String> {
853    let mut entries = Vec::new();
854    let mut current = String::new();
855    let mut paren_depth = 0usize;
856    let mut brace_depth = 0usize;
857    let mut bracket_depth = 0usize;
858
859    for character in value.chars().take(MAX_ITERATION_COUNT) {
860        match character {
861            '(' => paren_depth += 1,
862            ')' => paren_depth = paren_depth.saturating_sub(1),
863            '{' => brace_depth += 1,
864            '}' => brace_depth = brace_depth.saturating_sub(1),
865            '[' => bracket_depth += 1,
866            ']' => bracket_depth = bracket_depth.saturating_sub(1),
867            ',' if paren_depth == 0 && brace_depth == 0 && bracket_depth == 0 => {
868                let trimmed = current.trim();
869                if !trimmed.is_empty() {
870                    entries.push(trimmed.to_string());
871                }
872                current.clear();
873                continue;
874            }
875            _ => {}
876        }
877
878        current.push(character);
879    }
880
881    let trimmed = current.trim();
882    if !trimmed.is_empty() {
883        entries.push(trimmed.to_string());
884    }
885
886    entries
887}
888
889fn split_multiline_entries(value: &str) -> Vec<String> {
890    value
891        .lines()
892        .take(MAX_ITERATION_COUNT)
893        .map(str::trim)
894        .filter(|line| !line.is_empty())
895        .map(|line| line.strip_prefix("-").unwrap_or(line).trim().to_string())
896        .collect()
897}
898
899fn parse_multiline_scalar_or_list(value: &str) -> JsonValue {
900    let entries = split_multiline_entries(value);
901    if entries.len() <= 1 {
902        clean_single_line(value)
903            .map(JsonValue::String)
904            .unwrap_or(JsonValue::Null)
905    } else {
906        JsonValue::Array(entries.into_iter().map(JsonValue::String).collect())
907    }
908}
909
910fn normalize_cabal_multiline(value: &str) -> Option<String> {
911    let lines: Vec<String> = value
912        .lines()
913        .map(str::trim)
914        .map(|line| {
915            if line == "." {
916                "".to_string()
917            } else {
918                line.to_string()
919            }
920        })
921        .collect();
922
923    let combined = lines.join("\n").trim().to_string();
924    (!combined.is_empty()).then_some(combined)
925}
926
927fn clean_single_line(value: &str) -> Option<String> {
928    let cleaned = value.trim();
929    (!cleaned.is_empty()).then_some(cleaned.to_string())
930}
931
932fn split_comma_separated(value: &str) -> Vec<String> {
933    value
934        .split(',')
935        .map(str::trim)
936        .filter(|part| !part.is_empty())
937        .map(str::to_string)
938        .collect()
939}
940
941fn split_keywords(value: &str) -> Vec<String> {
942    split_comma_separated(value)
943}
944
945fn merge_keywords(categories: &[String], keywords: &[String]) -> Vec<String> {
946    let mut seen = HashSet::new();
947    categories
948        .iter()
949        .chain(keywords.iter())
950        .filter_map(|keyword| {
951            let normalized = keyword.trim();
952            if normalized.is_empty() || !seen.insert(normalized.to_ascii_lowercase()) {
953                None
954            } else {
955                Some(normalized.to_string())
956            }
957        })
958        .collect()
959}
960
961fn combine_summary_and_description(
962    synopsis: &Option<String>,
963    description: &Option<String>,
964) -> Option<String> {
965    match (synopsis, description) {
966        (Some(synopsis), Some(description)) if synopsis == description => Some(synopsis.clone()),
967        (Some(synopsis), Some(description)) => Some(format!("{}\n\n{}", synopsis, description)),
968        (Some(synopsis), None) => Some(synopsis.clone()),
969        (None, Some(description)) => Some(description.clone()),
970        (None, None) => None,
971    }
972}
973
974fn build_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
975    let author_parties = authors
976        .iter()
977        .filter_map(|author| build_party(author, "author"));
978    let maintainer_parties = maintainers
979        .iter()
980        .filter_map(|maintainer| build_party(maintainer, "maintainer"));
981
982    author_parties.chain(maintainer_parties).collect()
983}
984
985fn build_party(value: &str, role: &str) -> Option<Party> {
986    let (name, email) = split_name_email(value.trim());
987    if name.is_none() && email.is_none() {
988        return None;
989    }
990
991    Some(Party {
992        r#type: Some("person".to_string()),
993        role: Some(role.to_string()),
994        name,
995        email,
996        url: None,
997        organization: None,
998        organization_url: None,
999        timezone: None,
1000    })
1001}
1002
1003fn build_hackage_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
1004    match (name, version) {
1005        (Some(name), Some(version)) => Some(format!("pkg:hackage/{}@{}", name, version)),
1006        (Some(name), None) => Some(format!("pkg:hackage/{}", name)),
1007        _ => None,
1008    }
1009}
1010
1011fn split_hackage_name_version(spec: &str) -> Option<(String, String)> {
1012    if spec.chars().any(|character| {
1013        character.is_whitespace() || matches!(character, '<' | '>' | '=' | '&' | '|' | '(' | ')')
1014    }) {
1015        return None;
1016    }
1017
1018    for (index, character) in spec.char_indices().rev() {
1019        if character != '-' {
1020            continue;
1021        }
1022
1023        let name = &spec[..index];
1024        let version = &spec[index + 1..];
1025
1026        if name.is_empty()
1027            || version.is_empty()
1028            || !version
1029                .chars()
1030                .next()
1031                .is_some_and(|character| character.is_ascii_digit())
1032        {
1033            continue;
1034        }
1035
1036        return Some((name.to_string(), version.to_string()));
1037    }
1038
1039    None
1040}
1041
1042fn exact_version_requirement(requirement: &str) -> Option<String> {
1043    let trimmed = requirement.trim();
1044    if trimmed.is_empty() {
1045        return None;
1046    }
1047
1048    let exact_re = Regex::new(r"^==\s*([A-Za-z0-9][A-Za-z0-9\.\-_+]*)$").ok()?;
1049    exact_re.captures(trimmed).and_then(|captures| {
1050        let version = captures.get(1)?.as_str();
1051        (!version.contains('*')).then_some(version.to_string())
1052    })
1053}
1054
1055fn project_extra_key(key: &str) -> String {
1056    key.replace('-', "_")
1057}
1058
1059fn strip_cabal_comment(line: &str) -> &str {
1060    let trimmed = line.trim_start();
1061    if trimmed.starts_with("--") {
1062        return "";
1063    }
1064
1065    let bytes = line.as_bytes();
1066    for index in 0..bytes.len().saturating_sub(1) {
1067        if bytes[index] == b'-'
1068            && bytes[index + 1] == b'-'
1069            && (index == 0 || bytes[index - 1].is_ascii_whitespace())
1070        {
1071            return line[..index].trim_end();
1072        }
1073    }
1074
1075    line
1076}
1077
1078fn indentation(line: &str) -> usize {
1079    line.chars()
1080        .take_while(|character| character.is_whitespace())
1081        .count()
1082}
1083
1084fn mapping_get<'a>(mapping: &'a Mapping, key: &str) -> Option<&'a YamlValue> {
1085    mapping.get(YamlValue::String(key.to_string()))
1086}
1087
1088fn mapping_string(mapping: &Mapping, key: &str) -> Option<String> {
1089    mapping_get(mapping, key)
1090        .and_then(YamlValue::as_str)
1091        .map(str::to_string)
1092}