Skip to main content

provenant/parsers/
hackage.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use regex::Regex;
6use serde_json::Value as JsonValue;
7use yaml_serde::{Mapping, Value as YamlValue};
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
10use crate::parsers::utils::{
11    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
12};
13
14use super::PackageParser;
15
16const PACKAGE_TYPE: PackageType = PackageType::Hackage;
17const PRIMARY_LANGUAGE: &str = "Haskell";
18
19pub struct HackageCabalParser;
20
21pub struct HackageCabalProjectParser;
22
23pub struct HackageStackYamlParser;
24
25impl PackageParser for HackageCabalParser {
26    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
27
28    fn is_match(path: &Path) -> bool {
29        path.extension().is_some_and(|ext| ext == "cabal")
30    }
31
32    fn extract_packages(path: &Path) -> Vec<PackageData> {
33        let content = match read_file_to_string(path, None) {
34            Ok(content) => content,
35            Err(error) => {
36                warn!("Failed to read cabal file {:?}: {}", path, error);
37                return vec![default_package_data(DatasourceId::HackageCabal)];
38            }
39        };
40
41        vec![parse_cabal_manifest(&content)]
42    }
43}
44
45impl PackageParser for HackageCabalProjectParser {
46    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
47
48    fn is_match(path: &Path) -> bool {
49        path.file_name().is_some_and(|name| name == "cabal.project")
50    }
51
52    fn extract_packages(path: &Path) -> Vec<PackageData> {
53        let content = match read_file_to_string(path, None) {
54            Ok(content) => content,
55            Err(error) => {
56                warn!("Failed to read cabal.project {:?}: {}", path, error);
57                return vec![default_package_data(DatasourceId::HackageCabalProject)];
58            }
59        };
60
61        vec![parse_cabal_project(&content)]
62    }
63}
64
65impl PackageParser for HackageStackYamlParser {
66    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
67
68    fn is_match(path: &Path) -> bool {
69        path.file_name().is_some_and(|name| name == "stack.yaml")
70    }
71
72    fn extract_packages(path: &Path) -> Vec<PackageData> {
73        let content = match read_file_to_string(path, None) {
74            Ok(content) => content,
75            Err(error) => {
76                warn!("Failed to read stack.yaml {:?}: {}", path, error);
77                return vec![default_package_data(DatasourceId::HackageStackYaml)];
78            }
79        };
80
81        let yaml: YamlValue = match yaml_serde::from_str(&content) {
82            Ok(yaml) => yaml,
83            Err(error) => {
84                warn!("Failed to parse stack.yaml {:?}: {}", path, error);
85                return vec![default_package_data(DatasourceId::HackageStackYaml)];
86            }
87        };
88
89        vec![parse_stack_yaml(&yaml)]
90    }
91}
92
93#[derive(Clone, Debug, Default)]
94struct ComponentContext {
95    component_type: String,
96    component_name: Option<String>,
97}
98
99#[derive(Debug, Default)]
100struct CabalData {
101    name: Option<String>,
102    version: Option<String>,
103    synopsis: Option<String>,
104    description: Option<String>,
105    license: Option<String>,
106    homepage_url: Option<String>,
107    bug_tracking_url: Option<String>,
108    vcs_url: Option<String>,
109    authors: Vec<String>,
110    maintainers: Vec<String>,
111    category_keywords: Vec<String>,
112    explicit_keywords: Vec<String>,
113    dependencies: Vec<Dependency>,
114}
115
116fn default_package_data(datasource_id: DatasourceId) -> PackageData {
117    PackageData {
118        package_type: Some(PACKAGE_TYPE),
119        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
120        datasource_id: Some(datasource_id),
121        ..Default::default()
122    }
123}
124
125fn parse_cabal_manifest(content: &str) -> PackageData {
126    let parsed = parse_cabal_data(content);
127    let keywords = merge_keywords(&parsed.category_keywords, &parsed.explicit_keywords);
128    let description =
129        combine_summary_and_description(&parsed.synopsis, &parsed.description).map(truncate_field);
130    let parties = build_parties(&parsed.authors, &parsed.maintainers);
131    let purl =
132        build_hackage_purl(parsed.name.as_deref(), parsed.version.as_deref()).map(truncate_field);
133    let repository_homepage_url = parsed
134        .name
135        .as_ref()
136        .map(|name| match parsed.version.as_ref() {
137            Some(version) => truncate_field(format!(
138                "https://hackage.haskell.org/package/{}-{}",
139                name, version
140            )),
141            None => truncate_field(format!("https://hackage.haskell.org/package/{}", name)),
142        });
143
144    PackageData {
145        package_type: Some(PACKAGE_TYPE),
146        namespace: None,
147        name: parsed.name,
148        version: parsed.version,
149        qualifiers: None,
150        subpath: None,
151        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
152        description,
153        release_date: None,
154        parties,
155        keywords,
156        homepage_url: parsed.homepage_url,
157        download_url: None,
158        size: None,
159        sha1: None,
160        md5: None,
161        sha256: None,
162        sha512: None,
163        bug_tracking_url: parsed.bug_tracking_url,
164        code_view_url: None,
165        vcs_url: parsed.vcs_url,
166        copyright: None,
167        holder: None,
168        declared_license_expression: None,
169        declared_license_expression_spdx: None,
170        license_detections: Vec::new(),
171        other_license_expression: None,
172        other_license_expression_spdx: None,
173        other_license_detections: Vec::new(),
174        extracted_license_statement: parsed.license,
175        notice_text: None,
176        source_packages: Vec::new(),
177        file_references: Vec::new(),
178        is_private: false,
179        is_virtual: false,
180        extra_data: None,
181        dependencies: parsed.dependencies,
182        repository_homepage_url,
183        repository_download_url: None,
184        api_data_url: None,
185        datasource_id: Some(DatasourceId::HackageCabal),
186        purl,
187    }
188}
189
190fn parse_cabal_project(content: &str) -> PackageData {
191    let mut package_data = default_package_data(DatasourceId::HackageCabalProject);
192    let lines: Vec<&str> = content.lines().collect();
193    let mut dependencies = Vec::new();
194    let mut extra_data = HashMap::new();
195    let mut source_repo_entries: Vec<HashMap<String, JsonValue>> = Vec::new();
196    let mut current_source_repo: Option<HashMap<String, JsonValue>> = None;
197    let mut index = 0;
198    let mut iteration_count = 0usize;
199
200    while index < lines.len() {
201        iteration_count += 1;
202        if iteration_count > MAX_ITERATION_COUNT {
203            warn!(
204                "parse_cabal_project: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
205                MAX_ITERATION_COUNT, index
206            );
207            break;
208        }
209
210        let cleaned = strip_cabal_comment(lines[index]);
211        let trimmed = cleaned.trim();
212        let indent = indentation(cleaned);
213
214        if trimmed.is_empty() {
215            index += 1;
216            continue;
217        }
218
219        if indent == 0 && trimmed == "source-repository-package" {
220            if let Some(entry) = current_source_repo.take() {
221                source_repo_entries.push(entry);
222            }
223            current_source_repo = Some(HashMap::new());
224            index += 1;
225            continue;
226        }
227
228        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
229            if indent == 0
230                && let Some(entry) = current_source_repo.take()
231            {
232                source_repo_entries.push(entry);
233            }
234            index += 1;
235            continue;
236        };
237
238        if current_source_repo.is_some() && indent > 0 {
239            if let Some(source_repo) = current_source_repo.as_mut() {
240                source_repo.insert(
241                    project_extra_key(&key),
242                    parse_multiline_scalar_or_list(&value),
243                );
244            }
245            index = next_index + 1;
246            continue;
247        }
248
249        if current_source_repo.is_some()
250            && indent == 0
251            && key != "source-repository-package"
252            && let Some(entry) = current_source_repo.take()
253        {
254            source_repo_entries.push(entry);
255        }
256
257        match key.as_str() {
258            "packages" => {
259                dependencies.extend(parse_path_like_entries(&value, "packages", false));
260            }
261            "optional-packages" => {
262                dependencies.extend(parse_path_like_entries(&value, "optional-packages", true));
263            }
264            "extra-packages" => {
265                dependencies.extend(parse_hackage_spec_entries(&value, "extra-packages", None));
266            }
267            "import" => {
268                dependencies.extend(parse_import_entries(&value));
269            }
270            _ => {
271                extra_data.insert(
272                    project_extra_key(&key),
273                    parse_multiline_scalar_or_list(&value),
274                );
275            }
276        }
277
278        index = next_index + 1;
279    }
280
281    if let Some(entry) = current_source_repo.take() {
282        source_repo_entries.push(entry);
283    }
284
285    for entry in source_repo_entries.into_iter().take(MAX_ITERATION_COUNT) {
286        dependencies.push(build_source_repository_dependency(entry));
287    }
288
289    package_data.dependencies = dependencies;
290    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
291    package_data
292}
293
294fn parse_stack_yaml(yaml: &YamlValue) -> PackageData {
295    let mut package_data = default_package_data(DatasourceId::HackageStackYaml);
296    let Some(mapping) = yaml.as_mapping() else {
297        return package_data;
298    };
299
300    let mut dependencies = Vec::new();
301    let mut extra_data = HashMap::new();
302
303    if let Some(resolver) = mapping_get(mapping, "resolver")
304        && let Ok(value) = serde_json::to_value(resolver)
305    {
306        extra_data.insert("resolver".to_string(), value);
307    }
308
309    if let Some(snapshot) = mapping_get(mapping, "snapshot")
310        && let Ok(value) = serde_json::to_value(snapshot)
311    {
312        extra_data.insert("snapshot".to_string(), value);
313    }
314
315    if let Some(packages) = mapping_get(mapping, "packages") {
316        dependencies.extend(parse_stack_package_entries(packages));
317    }
318
319    if let Some(extra_deps) = mapping_get(mapping, "extra-deps") {
320        dependencies.extend(parse_stack_extra_dep_entries(extra_deps));
321    }
322
323    for (key, value) in mapping.iter().take(MAX_ITERATION_COUNT) {
324        let Some(key) = key.as_str() else {
325            continue;
326        };
327
328        if matches!(key, "resolver" | "snapshot" | "packages" | "extra-deps") {
329            continue;
330        }
331
332        if let Ok(json_value) = serde_json::to_value(value) {
333            extra_data.insert(key.to_string(), json_value);
334        }
335    }
336
337    package_data.dependencies = dependencies;
338    package_data.extra_data = (!extra_data.is_empty()).then_some(extra_data);
339    package_data
340}
341
342fn parse_cabal_data(content: &str) -> CabalData {
343    let mut data = CabalData::default();
344    let lines: Vec<&str> = content.lines().collect();
345    let mut current_component: Option<ComponentContext> = None;
346    let mut in_source_repository = false;
347    let mut index = 0;
348    let mut iteration_count = 0usize;
349
350    while index < lines.len() {
351        iteration_count += 1;
352        if iteration_count > MAX_ITERATION_COUNT {
353            warn!(
354                "parse_cabal_data: exceeded MAX_ITERATION_COUNT ({}) at line {}, stopping",
355                MAX_ITERATION_COUNT, index
356            );
357            break;
358        }
359        let cleaned = strip_cabal_comment(lines[index]);
360        let trimmed = cleaned.trim();
361        let indent = indentation(cleaned);
362
363        if trimmed.is_empty() {
364            index += 1;
365            continue;
366        }
367
368        if indent == 0 && !trimmed.contains(':') {
369            current_component = parse_component_header(trimmed);
370            in_source_repository = trimmed.starts_with("source-repository");
371            index += 1;
372            continue;
373        }
374
375        let Some((key, value, next_index)) = collect_indented_field(&lines, index) else {
376            index += 1;
377            continue;
378        };
379
380        match key.as_str() {
381            "name" if indent == 0 => data.name = clean_single_line(&value).map(truncate_field),
382            "version" if indent == 0 => {
383                data.version = clean_single_line(&value).map(truncate_field)
384            }
385            "synopsis" if indent == 0 => {
386                data.synopsis = clean_single_line(&value).map(truncate_field)
387            }
388            "description" if indent == 0 => {
389                data.description = normalize_cabal_multiline(&value).map(truncate_field);
390            }
391            "license" if indent == 0 => {
392                data.license = clean_single_line(&value).map(truncate_field)
393            }
394            "homepage" if indent == 0 => {
395                data.homepage_url = clean_single_line(&value).map(truncate_field)
396            }
397            "bug-reports" if indent == 0 => {
398                data.bug_tracking_url = clean_single_line(&value).map(truncate_field)
399            }
400            "author" if indent == 0 => data.authors.extend(split_comma_separated(&value)),
401            "maintainer" if indent == 0 => {
402                data.maintainers.extend(split_comma_separated(&value));
403            }
404            "category" if indent == 0 => {
405                data.category_keywords.extend(split_keywords(&value));
406            }
407            "keywords" if indent == 0 => {
408                data.explicit_keywords.extend(split_keywords(&value));
409            }
410            "location" if in_source_repository && data.vcs_url.is_none() => {
411                data.vcs_url = clean_single_line(&value).map(truncate_field);
412            }
413            "build-depends" => {
414                data.dependencies
415                    .extend(parse_build_depends(&value, current_component.as_ref()));
416            }
417            _ => {}
418        }
419
420        index = next_index + 1;
421    }
422
423    data
424}
425
426fn parse_build_depends(value: &str, component: Option<&ComponentContext>) -> Vec<Dependency> {
427    if component.is_some_and(|component| component.component_type == "common") {
428        return Vec::new();
429    }
430
431    split_dependency_entries(value)
432        .into_iter()
433        .filter_map(|entry| {
434            parse_hackage_spec_dependency(&entry, Some("build-depends"), component, None)
435        })
436        .collect()
437}
438
439fn parse_path_like_entries(value: &str, scope: &str, optional: bool) -> Vec<Dependency> {
440    split_multiline_entries(value)
441        .into_iter()
442        .filter(|entry| !entry.is_empty())
443        .map(|entry| {
444            let mut extra_data = HashMap::new();
445            extra_data.insert("path".to_string(), JsonValue::String(entry.clone()));
446
447            Dependency {
448                purl: None,
449                extracted_requirement: Some(truncate_field(entry)),
450                scope: Some(scope.to_string()),
451                is_runtime: None,
452                is_optional: Some(optional),
453                is_pinned: Some(false),
454                is_direct: Some(true),
455                resolved_package: None,
456                extra_data: Some(extra_data),
457            }
458        })
459        .collect()
460}
461
462fn parse_import_entries(value: &str) -> Vec<Dependency> {
463    split_multiline_entries(value)
464        .into_iter()
465        .filter(|entry| !entry.is_empty())
466        .map(|entry| Dependency {
467            purl: None,
468            extracted_requirement: Some(truncate_field(entry)),
469            scope: Some("import".to_string()),
470            is_runtime: None,
471            is_optional: Some(false),
472            is_pinned: Some(false),
473            is_direct: Some(true),
474            resolved_package: None,
475            extra_data: None,
476        })
477        .collect()
478}
479
480fn parse_hackage_spec_entries(
481    value: &str,
482    scope: &str,
483    is_runtime: Option<bool>,
484) -> Vec<Dependency> {
485    split_multiline_entries(value)
486        .into_iter()
487        .filter_map(|entry| parse_hackage_spec_dependency(&entry, Some(scope), None, is_runtime))
488        .collect()
489}
490
491fn parse_stack_package_entries(value: &YamlValue) -> Vec<Dependency> {
492    let Some(sequence) = value.as_sequence() else {
493        return Vec::new();
494    };
495
496    sequence
497        .iter()
498        .take(MAX_ITERATION_COUNT)
499        .filter_map(|entry| match entry {
500            YamlValue::String(path) => {
501                let mut extra_data = HashMap::new();
502                extra_data.insert("path".to_string(), JsonValue::String(path.clone()));
503
504                Some(Dependency {
505                    purl: None,
506                    extracted_requirement: Some(truncate_field(path.clone())),
507                    scope: Some("packages".to_string()),
508                    is_runtime: None,
509                    is_optional: Some(false),
510                    is_pinned: Some(false),
511                    is_direct: Some(true),
512                    resolved_package: None,
513                    extra_data: Some(extra_data),
514                })
515            }
516            YamlValue::Mapping(map) => {
517                let extracted_requirement = mapping_string(map, "location")
518                    .or_else(|| mapping_string(map, "git"))
519                    .or_else(|| mapping_string(map, "url"))
520                    .map(truncate_field);
521                let extra_data = serde_json::to_value(entry)
522                    .ok()
523                    .and_then(|value| value.as_object().cloned())
524                    .map(|map| map.into_iter().collect::<HashMap<_, _>>());
525
526                Some(Dependency {
527                    purl: None,
528                    extracted_requirement,
529                    scope: Some("packages".to_string()),
530                    is_runtime: None,
531                    is_optional: Some(false),
532                    is_pinned: Some(mapping_string(map, "commit").is_some()),
533                    is_direct: Some(true),
534                    resolved_package: None,
535                    extra_data,
536                })
537            }
538            _ => None,
539        })
540        .collect()
541}
542
543fn parse_stack_extra_dep_entries(value: &YamlValue) -> Vec<Dependency> {
544    let Some(sequence) = value.as_sequence() else {
545        return Vec::new();
546    };
547
548    sequence
549        .iter()
550        .take(MAX_ITERATION_COUNT)
551        .filter_map(|entry| match entry {
552            YamlValue::String(spec) => parse_stack_extra_dep_string(spec),
553            YamlValue::Mapping(map) => Some(parse_stack_extra_dep_mapping(map, entry)),
554            _ => None,
555        })
556        .collect()
557}
558
559fn parse_stack_extra_dep_string(spec: &str) -> Option<Dependency> {
560    let trimmed = spec.trim();
561    if trimmed.is_empty() {
562        return None;
563    }
564
565    let (package_spec, pantry_suffix) = trimmed
566        .split_once('@')
567        .map_or((trimmed, None), |(package_spec, suffix)| {
568            (package_spec, Some(suffix))
569        });
570
571    let mut dependency =
572        parse_hackage_spec_dependency(package_spec, Some("extra-deps"), None, None).unwrap_or(
573            Dependency {
574                purl: None,
575                extracted_requirement: Some(truncate_field(package_spec.to_string())),
576                scope: Some("extra-deps".to_string()),
577                is_runtime: None,
578                is_optional: Some(false),
579                is_pinned: Some(false),
580                is_direct: Some(true),
581                resolved_package: None,
582                extra_data: None,
583            },
584        );
585
586    if let Some(suffix) = pantry_suffix {
587        let mut extra_data = dependency.extra_data.take().unwrap_or_default();
588        extra_data.insert("pantry".to_string(), JsonValue::String(suffix.to_string()));
589        dependency.extra_data = Some(extra_data);
590        dependency.is_pinned = Some(true);
591        if dependency.extracted_requirement.is_none() {
592            dependency.extracted_requirement = Some(truncate_field(package_spec.to_string()));
593        }
594    }
595
596    dependency.scope = Some("extra-deps".to_string());
597    Some(dependency)
598}
599
600fn parse_stack_extra_dep_mapping(map: &Mapping, raw_value: &YamlValue) -> Dependency {
601    let name = mapping_string(map, "name");
602    let version = mapping_string(map, "version");
603    let purl = build_hackage_purl(name.as_deref(), version.as_deref()).map(truncate_field);
604    let extracted_requirement = version
605        .clone()
606        .or_else(|| mapping_string(map, "git"))
607        .or_else(|| mapping_string(map, "url"))
608        .map(truncate_field);
609    let extra_data = serde_json::to_value(raw_value)
610        .ok()
611        .and_then(|value| value.as_object().cloned())
612        .map(|map| map.into_iter().collect::<HashMap<_, _>>());
613
614    Dependency {
615        purl,
616        extracted_requirement,
617        scope: Some("extra-deps".to_string()),
618        is_runtime: None,
619        is_optional: Some(false),
620        is_pinned: Some(version.is_some() || mapping_string(map, "commit").is_some()),
621        is_direct: Some(true),
622        resolved_package: None,
623        extra_data,
624    }
625}
626
627fn build_source_repository_dependency(extra_data: HashMap<String, JsonValue>) -> Dependency {
628    let extracted_requirement = extra_data
629        .get("location")
630        .and_then(JsonValue::as_str)
631        .map(str::to_string)
632        .or_else(|| {
633            extra_data
634                .get("tag")
635                .and_then(JsonValue::as_str)
636                .map(str::to_string)
637        })
638        .map(truncate_field);
639
640    Dependency {
641        purl: None,
642        extracted_requirement,
643        scope: Some("source-repository-package".to_string()),
644        is_runtime: None,
645        is_optional: Some(false),
646        is_pinned: Some(
647            extra_data.contains_key("tag")
648                || extra_data.contains_key("commit")
649                || extra_data.contains_key("sha256"),
650        ),
651        is_direct: Some(true),
652        resolved_package: None,
653        extra_data: Some(extra_data),
654    }
655}
656
657fn parse_hackage_spec_dependency(
658    spec: &str,
659    scope: Option<&str>,
660    component: Option<&ComponentContext>,
661    is_runtime: Option<bool>,
662) -> Option<Dependency> {
663    let trimmed = spec.trim();
664    if trimmed.is_empty() {
665        return None;
666    }
667
668    let can_split_name_version = matches!(scope, Some("extra-packages" | "extra-deps"));
669
670    if can_split_name_version && let Some((name, version)) = split_hackage_name_version(trimmed) {
671        let mut extra_data = HashMap::new();
672        if let Some(component) = component {
673            extra_data.insert(
674                "component_type".to_string(),
675                JsonValue::String(component.component_type.clone()),
676            );
677            if let Some(component_name) = &component.component_name {
678                extra_data.insert(
679                    "component_name".to_string(),
680                    JsonValue::String(component_name.clone()),
681                );
682            }
683        }
684
685        return Some(Dependency {
686            purl: Some(truncate_field(format!("pkg:hackage/{}@{}", name, version))),
687            extracted_requirement: Some(truncate_field(version)),
688            scope: scope.map(str::to_string),
689            is_runtime: component.map(component_is_runtime).or(is_runtime),
690            is_optional: Some(false),
691            is_pinned: Some(true),
692            is_direct: Some(true),
693            resolved_package: None,
694            extra_data: (!extra_data.is_empty()).then_some(extra_data),
695        });
696    }
697
698    let name_re = Regex::new(r"^(?P<name>[A-Za-z0-9][A-Za-z0-9_\.-]*)").ok()?;
699    let captures = name_re.captures(trimmed)?;
700    let name = captures.name("name")?.as_str().to_string();
701    let requirement = trimmed[name.len()..].trim();
702    let implicit_name_version = if can_split_name_version && requirement.is_empty() {
703        split_hackage_name_version(trimmed)
704    } else {
705        None
706    };
707    let resolved_name = implicit_name_version
708        .as_ref()
709        .map(|(resolved_name, _)| resolved_name.as_str())
710        .unwrap_or(name.as_str());
711    let exact_version = exact_version_requirement(requirement).or_else(|| {
712        implicit_name_version
713            .as_ref()
714            .map(|(_, version)| version.clone())
715    });
716    let purl = if let Some(version) = exact_version.as_deref() {
717        Some(truncate_field(format!(
718            "pkg:hackage/{}@{}",
719            resolved_name, version
720        )))
721    } else {
722        Some(truncate_field(format!("pkg:hackage/{}", resolved_name)))
723    };
724
725    let mut extra_data = HashMap::new();
726    if let Some(component) = component {
727        extra_data.insert(
728            "component_type".to_string(),
729            JsonValue::String(component.component_type.clone()),
730        );
731        if let Some(component_name) = &component.component_name {
732            extra_data.insert(
733                "component_name".to_string(),
734                JsonValue::String(component_name.clone()),
735            );
736        }
737    }
738
739    let extracted_requirement = if let Some((_, version)) = implicit_name_version {
740        Some(truncate_field(version))
741    } else {
742        (!requirement.is_empty())
743            .then_some(requirement.to_string())
744            .map(truncate_field)
745    };
746
747    Some(Dependency {
748        purl,
749        extracted_requirement,
750        scope: scope.map(str::to_string),
751        is_runtime: component.map(component_is_runtime).or(is_runtime),
752        is_optional: Some(false),
753        is_pinned: Some(exact_version.is_some()),
754        is_direct: Some(true),
755        resolved_package: None,
756        extra_data: (!extra_data.is_empty()).then_some(extra_data),
757    })
758}
759
760fn component_is_runtime(component: &ComponentContext) -> bool {
761    !matches!(
762        component.component_type.as_str(),
763        "test-suite" | "benchmark"
764    )
765}
766
767fn parse_component_header(trimmed: &str) -> Option<ComponentContext> {
768    const COMPONENT_PREFIXES: &[&str] = &[
769        "library",
770        "foreign-library",
771        "executable",
772        "test-suite",
773        "benchmark",
774        "common",
775    ];
776
777    COMPONENT_PREFIXES.iter().find_map(|prefix| {
778        trimmed
779            .strip_prefix(prefix)
780            .map(|remainder| ComponentContext {
781                component_type: (*prefix).to_string(),
782                component_name: clean_single_line(remainder),
783            })
784    })
785}
786
787fn collect_indented_field(lines: &[&str], start_index: usize) -> Option<(String, String, usize)> {
788    let current = strip_cabal_comment(lines[start_index]);
789    let trimmed = current.trim();
790    let indent = indentation(current);
791    let colon_index = trimmed.find(':')?;
792    let key = trimmed[..colon_index].trim().to_ascii_lowercase();
793    let mut values = vec![trimmed[colon_index + 1..].trim().to_string()];
794    let mut last_index = start_index;
795
796    for (next_index, line) in lines.iter().enumerate().skip(start_index + 1) {
797        let next = strip_cabal_comment(line);
798        let next_trimmed = next.trim();
799        if next_trimmed.is_empty() {
800            break;
801        }
802
803        if indentation(next) <= indent {
804            break;
805        }
806
807        values.push(next_trimmed.to_string());
808        last_index = next_index;
809    }
810
811    Some((key, values.join("\n"), last_index))
812}
813
814fn split_dependency_entries(value: &str) -> Vec<String> {
815    let mut entries = Vec::new();
816    let mut current = String::new();
817    let mut paren_depth = 0usize;
818    let mut brace_depth = 0usize;
819    let mut bracket_depth = 0usize;
820
821    for character in value.chars().take(MAX_ITERATION_COUNT) {
822        match character {
823            '(' => paren_depth += 1,
824            ')' => paren_depth = paren_depth.saturating_sub(1),
825            '{' => brace_depth += 1,
826            '}' => brace_depth = brace_depth.saturating_sub(1),
827            '[' => bracket_depth += 1,
828            ']' => bracket_depth = bracket_depth.saturating_sub(1),
829            ',' if paren_depth == 0 && brace_depth == 0 && bracket_depth == 0 => {
830                let trimmed = current.trim();
831                if !trimmed.is_empty() {
832                    entries.push(trimmed.to_string());
833                }
834                current.clear();
835                continue;
836            }
837            _ => {}
838        }
839
840        current.push(character);
841    }
842
843    let trimmed = current.trim();
844    if !trimmed.is_empty() {
845        entries.push(trimmed.to_string());
846    }
847
848    entries
849}
850
851fn split_multiline_entries(value: &str) -> Vec<String> {
852    value
853        .lines()
854        .take(MAX_ITERATION_COUNT)
855        .map(str::trim)
856        .filter(|line| !line.is_empty())
857        .map(|line| line.strip_prefix("-").unwrap_or(line).trim().to_string())
858        .collect()
859}
860
861fn parse_multiline_scalar_or_list(value: &str) -> JsonValue {
862    let entries = split_multiline_entries(value);
863    if entries.len() <= 1 {
864        clean_single_line(value)
865            .map(JsonValue::String)
866            .unwrap_or(JsonValue::Null)
867    } else {
868        JsonValue::Array(entries.into_iter().map(JsonValue::String).collect())
869    }
870}
871
872fn normalize_cabal_multiline(value: &str) -> Option<String> {
873    let lines: Vec<String> = value
874        .lines()
875        .map(str::trim)
876        .map(|line| {
877            if line == "." {
878                "".to_string()
879            } else {
880                line.to_string()
881            }
882        })
883        .collect();
884
885    let combined = lines.join("\n").trim().to_string();
886    (!combined.is_empty()).then_some(combined)
887}
888
889fn clean_single_line(value: &str) -> Option<String> {
890    let cleaned = value.trim();
891    (!cleaned.is_empty()).then_some(cleaned.to_string())
892}
893
894fn split_comma_separated(value: &str) -> Vec<String> {
895    value
896        .split(',')
897        .map(str::trim)
898        .filter(|part| !part.is_empty())
899        .map(str::to_string)
900        .collect()
901}
902
903fn split_keywords(value: &str) -> Vec<String> {
904    split_comma_separated(value)
905}
906
907fn merge_keywords(categories: &[String], keywords: &[String]) -> Vec<String> {
908    let mut seen = HashSet::new();
909    categories
910        .iter()
911        .chain(keywords.iter())
912        .filter_map(|keyword| {
913            let normalized = keyword.trim();
914            if normalized.is_empty() || !seen.insert(normalized.to_ascii_lowercase()) {
915                None
916            } else {
917                Some(normalized.to_string())
918            }
919        })
920        .collect()
921}
922
923fn combine_summary_and_description(
924    synopsis: &Option<String>,
925    description: &Option<String>,
926) -> Option<String> {
927    match (synopsis, description) {
928        (Some(synopsis), Some(description)) if synopsis == description => Some(synopsis.clone()),
929        (Some(synopsis), Some(description)) => Some(format!("{}\n\n{}", synopsis, description)),
930        (Some(synopsis), None) => Some(synopsis.clone()),
931        (None, Some(description)) => Some(description.clone()),
932        (None, None) => None,
933    }
934}
935
936fn build_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
937    let author_parties = authors
938        .iter()
939        .filter_map(|author| build_party(author, "author"));
940    let maintainer_parties = maintainers
941        .iter()
942        .filter_map(|maintainer| build_party(maintainer, "maintainer"));
943
944    author_parties.chain(maintainer_parties).collect()
945}
946
947fn build_party(value: &str, role: &str) -> Option<Party> {
948    let (name, email) = split_name_email(value.trim());
949    if name.is_none() && email.is_none() {
950        return None;
951    }
952
953    Some(Party {
954        r#type: Some("person".to_string()),
955        role: Some(role.to_string()),
956        name,
957        email,
958        url: None,
959        organization: None,
960        organization_url: None,
961        timezone: None,
962    })
963}
964
965fn build_hackage_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
966    match (name, version) {
967        (Some(name), Some(version)) => Some(format!("pkg:hackage/{}@{}", name, version)),
968        (Some(name), None) => Some(format!("pkg:hackage/{}", name)),
969        _ => None,
970    }
971}
972
973fn split_hackage_name_version(spec: &str) -> Option<(String, String)> {
974    if spec.chars().any(|character| {
975        character.is_whitespace() || matches!(character, '<' | '>' | '=' | '&' | '|' | '(' | ')')
976    }) {
977        return None;
978    }
979
980    for (index, character) in spec.char_indices().rev() {
981        if character != '-' {
982            continue;
983        }
984
985        let name = &spec[..index];
986        let version = &spec[index + 1..];
987
988        if name.is_empty()
989            || version.is_empty()
990            || !version
991                .chars()
992                .next()
993                .is_some_and(|character| character.is_ascii_digit())
994        {
995            continue;
996        }
997
998        return Some((name.to_string(), version.to_string()));
999    }
1000
1001    None
1002}
1003
1004fn exact_version_requirement(requirement: &str) -> Option<String> {
1005    let trimmed = requirement.trim();
1006    if trimmed.is_empty() {
1007        return None;
1008    }
1009
1010    let exact_re = Regex::new(r"^==\s*([A-Za-z0-9][A-Za-z0-9\.\-_+]*)$").ok()?;
1011    exact_re.captures(trimmed).and_then(|captures| {
1012        let version = captures.get(1)?.as_str();
1013        (!version.contains('*')).then_some(version.to_string())
1014    })
1015}
1016
1017fn project_extra_key(key: &str) -> String {
1018    key.replace('-', "_")
1019}
1020
1021fn strip_cabal_comment(line: &str) -> &str {
1022    let trimmed = line.trim_start();
1023    if trimmed.starts_with("--") {
1024        return "";
1025    }
1026
1027    let bytes = line.as_bytes();
1028    for index in 0..bytes.len().saturating_sub(1) {
1029        if bytes[index] == b'-'
1030            && bytes[index + 1] == b'-'
1031            && (index == 0 || bytes[index - 1].is_ascii_whitespace())
1032        {
1033            return line[..index].trim_end();
1034        }
1035    }
1036
1037    line
1038}
1039
1040fn indentation(line: &str) -> usize {
1041    line.chars()
1042        .take_while(|character| character.is_whitespace())
1043        .count()
1044}
1045
1046fn mapping_get<'a>(mapping: &'a Mapping, key: &str) -> Option<&'a YamlValue> {
1047    mapping.get(YamlValue::String(key.to_string()))
1048}
1049
1050fn mapping_string(mapping: &Mapping, key: &str) -> Option<String> {
1051    mapping_get(mapping, key)
1052        .and_then(YamlValue::as_str)
1053        .map(str::to_string)
1054}
1055
1056crate::register_parser!(
1057    "Hackage Cabal package manifest",
1058    &["**/*.cabal"],
1059    "hackage",
1060    "Haskell",
1061    Some("https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html"),
1062);
1063
1064crate::register_parser!(
1065    "Hackage cabal.project workspace file",
1066    &["**/cabal.project"],
1067    "hackage",
1068    "Haskell",
1069    Some("https://cabal.readthedocs.io/en/stable/cabal-project-description-file.html"),
1070);
1071
1072crate::register_parser!(
1073    "Hackage Stack project manifest",
1074    &["**/stack.yaml"],
1075    "hackage",
1076    "Haskell",
1077    Some("https://docs.haskellstack.org/en/stable/configure/yaml/"),
1078);