Skip to main content

provenant/parsers/
opam.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for OCaml OPAM package manager manifests.
5//!
6//! Extracts package metadata and dependencies from OPAM files used by the
7//! OCaml ecosystem.
8//!
9//! # Supported Formats
10//! - *.opam files (OPAM package manifests)
11//! - opam files without extension
12//!
13//! # Key Features
14//! - Field-based parsing of OPAM's custom format (key: value)
15//! - Author and maintainer extraction with email parsing
16//! - URL extraction for source archives, homepage, repository
17//! - License statement extraction
18//! - Checksum extraction (sha1, md5, sha256, sha512)
19//!
20//! # Implementation Notes
21//! - OPAM format uses custom syntax, not JSON/YAML/TOML
22//! - Strings can be quoted or unquoted
23//! - Lists use bracket notation: [item1 item2]
24//! - Multi-line strings use three-quote notation: """..."""
25
26use std::path::Path;
27
28use crate::parser_warn as warn;
29use regex::Regex;
30
31use crate::models::{
32    DatasourceId, Dependency, Md5Digest, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
33    Sha512Digest,
34};
35use crate::parsers::PackageParser;
36use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
37
38use super::license_normalization::{
39    DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
40    normalize_spdx_declared_license,
41};
42
43/// Parser for OCaml OPAM package manifest files.
44///
45/// Handles the OPAM file format used by the OCaml package manager.
46/// Reference: <https://opam.ocaml.org/doc/Manual.html#Common-file-format>
47pub struct OpamParser;
48
49impl PackageParser for OpamParser {
50    const PACKAGE_TYPE: PackageType = PackageType::Opam;
51
52    fn is_match(path: &Path) -> bool {
53        path.file_name().is_some_and(|name| {
54            name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
55        })
56    }
57
58    fn extract_packages(path: &Path) -> Vec<PackageData> {
59        vec![match read_file_to_string(path, None) {
60            Ok(text) => parse_opam(&text),
61            Err(e) => {
62                warn!("Failed to read OPAM file {:?}: {}", path, e);
63                default_package_data()
64            }
65        }]
66    }
67}
68
69/// Parsed OPAM file data
70#[derive(Debug, Default)]
71struct OpamData {
72    name: Option<String>,
73    version: Option<String>,
74    synopsis: Option<String>,
75    description: Option<String>,
76    homepage: Option<String>,
77    dev_repo: Option<String>,
78    bug_reports: Option<String>,
79    src: Option<String>,
80    authors: Vec<String>,
81    maintainers: Vec<String>,
82    license: Option<String>,
83    sha1: Option<Sha1Digest>,
84    md5: Option<Md5Digest>,
85    sha256: Option<Sha256Digest>,
86    sha512: Option<Sha512Digest>,
87    dependencies: Vec<(String, String)>, // (name, version_constraint)
88}
89
90fn default_package_data() -> PackageData {
91    PackageData {
92        package_type: Some(OpamParser::PACKAGE_TYPE),
93        primary_language: Some("Ocaml".to_string()),
94        datasource_id: Some(DatasourceId::OpamFile),
95        ..Default::default()
96    }
97}
98
99/// Parse an OPAM file from text content
100fn parse_opam(text: &str) -> PackageData {
101    let opam_data = parse_opam_data(text);
102
103    let description = build_description(&opam_data.synopsis, &opam_data.description);
104    let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
105    let dependencies = extract_dependencies(&opam_data.dependencies);
106
107    let (repository_homepage_url, api_data_url, purl) =
108        build_opam_urls(&opam_data.name, &opam_data.version);
109    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
110        normalize_opam_declared_license(opam_data.license.as_deref());
111
112    PackageData {
113        package_type: Some(OpamParser::PACKAGE_TYPE),
114        namespace: None,
115        name: opam_data.name,
116        version: opam_data.version,
117        qualifiers: None,
118        subpath: None,
119        primary_language: Some("Ocaml".to_string()),
120        description,
121        release_date: None,
122        parties,
123        keywords: Vec::new(),
124        homepage_url: opam_data.homepage,
125        download_url: opam_data.src,
126        size: None,
127        sha1: opam_data.sha1,
128        md5: opam_data.md5,
129        sha256: opam_data.sha256,
130        sha512: opam_data.sha512,
131        bug_tracking_url: opam_data.bug_reports,
132        code_view_url: None,
133        vcs_url: opam_data.dev_repo,
134        copyright: None,
135        holder: None,
136        declared_license_expression,
137        declared_license_expression_spdx,
138        license_detections,
139        other_license_expression: None,
140        other_license_expression_spdx: None,
141        other_license_detections: Vec::new(),
142        extracted_license_statement: opam_data.license,
143        notice_text: None,
144        source_packages: Vec::new(),
145        file_references: Vec::new(),
146        is_private: false,
147        is_virtual: false,
148        extra_data: None,
149        dependencies,
150        repository_homepage_url,
151        repository_download_url: None,
152        api_data_url,
153        datasource_id: Some(DatasourceId::OpamFile),
154        purl,
155    }
156}
157
158fn normalize_opam_declared_license(
159    statement: Option<&str>,
160) -> (
161    Option<String>,
162    Option<String>,
163    Vec<crate::models::LicenseDetection>,
164) {
165    let Some(statement) = statement.map(str::trim).filter(|value| !value.is_empty()) else {
166        return super::license_normalization::empty_declared_license_data();
167    };
168
169    match statement {
170        "GPL-2.0-only" => build_declared_license_data_from_pair(
171            "gpl-2.0",
172            "GPL-2.0-only",
173            DeclaredLicenseMatchMetadata::single_line(statement),
174        ),
175        "GPL-3.0-only" => build_declared_license_data_from_pair(
176            "gpl-3.0",
177            "GPL-3.0-only",
178            DeclaredLicenseMatchMetadata::single_line(statement),
179        ),
180        "LGPL-3.0-only with OCaml-LGPL-linking-exception" => build_declared_license_data_from_pair(
181            "lgpl-3.0 WITH ocaml-lgpl-linking-exception",
182            "LGPL-3.0-only WITH OCaml-LGPL-linking-exception",
183            DeclaredLicenseMatchMetadata::single_line(statement),
184        ),
185        _ => normalize_spdx_declared_license(Some(statement)),
186    }
187}
188
189fn build_opam_urls(
190    name: &Option<String>,
191    version: &Option<String>,
192) -> (Option<String>, Option<String>, Option<String>) {
193    let repository_homepage_url = name
194        .as_ref()
195        .map(|n| format!("https://opam.ocaml.org/packages/{}", n));
196
197    let api_data_url = match (name, version) {
198        (Some(n), Some(v)) => Some(format!(
199            "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
200            n, n, v
201        )),
202        _ => None,
203    };
204
205    let purl = match (name, version) {
206        (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
207        (Some(n), None) => Some(format!("pkg:opam/{}", n)),
208        _ => None,
209    };
210
211    (repository_homepage_url, api_data_url, purl)
212}
213
214/// Parse OPAM file text into structured data
215fn parse_opam_data(text: &str) -> OpamData {
216    let mut data = OpamData::default();
217    let lines: Vec<&str> = text.lines().collect();
218    let mut i = 0;
219    let mut iteration_count: usize = 0;
220
221    while i < lines.len() {
222        iteration_count += 1;
223        if iteration_count > MAX_ITERATION_COUNT {
224            warn!("parse_opam_data: exceeded MAX_ITERATION_COUNT, breaking");
225            break;
226        }
227        let line = lines[i];
228
229        // Parse key: value format
230        if let Some((key, value)) = parse_key_value(line) {
231            match key.as_str() {
232                "name" => data.name = clean_value(&value),
233                "version" => data.version = clean_value(&value),
234                "synopsis" => data.synopsis = clean_value(&value),
235                "description" => {
236                    data.description = parse_description_field(&lines, &mut i, &value);
237                }
238                "homepage" => data.homepage = clean_value(&value),
239                "dev-repo" => data.dev_repo = clean_value(&value),
240                "bug-reports" => data.bug_reports = clean_value(&value),
241                "src" => {
242                    if value.trim().is_empty() && i + 1 < lines.len() {
243                        i += 1;
244                        data.src = clean_value(lines[i]);
245                    } else {
246                        data.src = clean_value(&value);
247                    }
248                }
249                "license" => data.license = clean_value(&value),
250                "authors" => {
251                    data.authors = parse_string_array(&lines, &mut i, &value);
252                }
253                "maintainer" => {
254                    data.maintainers = parse_string_array(&lines, &mut i, &value);
255                }
256                "depends" => {
257                    data.dependencies = parse_dependency_array(&lines, &mut i);
258                }
259                "checksum" => {
260                    parse_checksums(&lines, &mut i, &mut data);
261                }
262                _ => {}
263            }
264        }
265
266        i += 1;
267    }
268
269    data
270}
271
272/// Parse a key: value line
273fn parse_key_value(line: &str) -> Option<(String, String)> {
274    let line = line.trim();
275    if line.is_empty() || line.starts_with('#') {
276        return None;
277    }
278
279    if let Some(colon_pos) = line.find(':') {
280        let key = line[..colon_pos].trim().to_string();
281        let value = line[colon_pos + 1..].trim().to_string();
282        Some((key, value))
283    } else {
284        None
285    }
286}
287
288/// Clean a value by removing quotes and brackets
289fn clean_value(value: &str) -> Option<String> {
290    let cleaned = value
291        .trim()
292        .trim_matches('"')
293        .trim_matches('[')
294        .trim_matches(']')
295        .trim();
296
297    if cleaned.is_empty() {
298        None
299    } else {
300        Some(truncate_field(cleaned.to_string()))
301    }
302}
303
304/// Parse an OPAM description field.
305///
306/// OPAM descriptions can be encoded as an inline quoted string, a quoted string
307/// on the following line, or a triple-quoted multiline string.
308fn parse_description_field(lines: &[&str], i: &mut usize, first_value: &str) -> Option<String> {
309    let trimmed = first_value.trim();
310
311    if trimmed.is_empty() {
312        let next_trimmed = lines.get(*i + 1)?.trim();
313
314        if next_trimmed.starts_with("\"\"\"") {
315            *i += 1;
316            return parse_triple_quoted_string(lines, i, next_trimmed);
317        }
318
319        if next_trimmed.starts_with('"') {
320            *i += 1;
321            return clean_value(next_trimmed);
322        }
323
324        return None;
325    }
326
327    if trimmed.starts_with("\"\"\"") {
328        return parse_triple_quoted_string(lines, i, trimmed);
329    }
330
331    clean_value(trimmed)
332}
333
334/// Parse a multiline string enclosed in triple quotes.
335fn parse_triple_quoted_string(lines: &[&str], i: &mut usize, first_value: &str) -> Option<String> {
336    let mut result = String::new();
337    let mut iteration_count: usize = 0;
338
339    let first_content = first_value.trim().trim_start_matches("\"\"\"");
340    if let Some(end_index) = first_content.find("\"\"\"") {
341        let cleaned = first_content[..end_index].trim();
342        return (!cleaned.is_empty()).then(|| truncate_field(cleaned.to_string()));
343    }
344
345    if !first_content.trim().is_empty() {
346        result.push_str(first_content.trim());
347    }
348
349    *i += 1;
350    while *i < lines.len() {
351        iteration_count += 1;
352        if iteration_count > MAX_ITERATION_COUNT {
353            warn!("parse_multiline_string: exceeded MAX_ITERATION_COUNT, breaking");
354            break;
355        }
356        let line = lines[*i].trim();
357
358        if let Some(end_index) = line.find("\"\"\"") {
359            let before_end = line[..end_index].trim();
360            if !before_end.is_empty() {
361                if !result.is_empty() {
362                    result.push(' ');
363                }
364                result.push_str(before_end);
365            }
366            break;
367        }
368
369        let content = line.trim_matches('"').trim();
370        if !result.is_empty() {
371            result.push(' ');
372        }
373        result.push_str(content);
374        *i += 1;
375    }
376
377    let cleaned = result.trim().to_string();
378    if cleaned.is_empty() {
379        None
380    } else {
381        Some(truncate_field(cleaned))
382    }
383}
384
385/// Parse a string array (single-line or multiline)
386fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
387    let mut result = Vec::new();
388    let mut iteration_count: usize = 0;
389
390    let mut content = first_value.to_string();
391
392    if content.contains('[') && !content.contains(']') {
393        *i += 1;
394        while *i < lines.len() {
395            iteration_count += 1;
396            if iteration_count > MAX_ITERATION_COUNT {
397                warn!("parse_string_array: exceeded MAX_ITERATION_COUNT, breaking");
398                break;
399            }
400            let line = lines[*i];
401            content.push(' ');
402            content.push_str(line);
403
404            if line.contains(']') {
405                break;
406            }
407            *i += 1;
408        }
409    }
410
411    let cleaned = content.trim_matches('[').trim_matches(']').trim();
412
413    for part in split_quoted_strings(cleaned) {
414        let p = part.trim_matches('"').trim();
415        if !p.is_empty() {
416            result.push(truncate_field(p.to_string()));
417        }
418    }
419
420    result
421}
422
423/// Parse dependency array
424fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
425    let mut result = Vec::new();
426    let mut iteration_count: usize = 0;
427
428    *i += 1;
429    while *i < lines.len() {
430        iteration_count += 1;
431        if iteration_count > MAX_ITERATION_COUNT {
432            warn!("parse_dependency_array: exceeded MAX_ITERATION_COUNT, breaking");
433            break;
434        }
435        let line = lines[*i];
436
437        if line.trim().contains(']') {
438            break;
439        }
440
441        if let Some((name, version)) = parse_dependency_line(line) {
442            result.push((name, version));
443        }
444
445        *i += 1;
446    }
447
448    result
449}
450
451/// Parse a single dependency line: "name" {version_constraint}
452fn parse_dependency_line(line: &str) -> Option<(String, String)> {
453    let line = line.trim();
454    if line.is_empty() {
455        return None;
456    }
457
458    // Match: "name" {optional version}
459    let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
460    let caps = regex.captures(line)?;
461
462    let name = truncate_field(caps.get(1)?.as_str().to_string());
463    let version_part = caps.get(2)?.as_str().trim();
464
465    // Extract the operator and version constraint
466    let constraint = if version_part.is_empty() {
467        String::new()
468    } else {
469        truncate_field(extract_version_constraint(version_part))
470    };
471
472    Some((name, constraint))
473}
474
475/// Extract version constraint from {>= "1.0"} format
476fn extract_version_constraint(version_part: &str) -> String {
477    let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
478    if let Ok(re) = regex
479        && let Some(caps) = re.captures(version_part)
480    {
481        let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
482        let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
483        if !op.is_empty() && !ver.is_empty() {
484            return format!("{} {}", op, ver);
485        }
486    }
487
488    // If regex parsing fails, try to extract raw content
489    let content = version_part
490        .trim_matches('{')
491        .trim_matches('}')
492        .trim_matches('"')
493        .trim();
494
495    content.replace('"', "")
496}
497
498/// Parse checksums from checksum array
499fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
500    if let Some((_, first_value)) = parse_key_value(lines[*i]) {
501        let inline = first_value.trim();
502        if !inline.is_empty() && inline != "[" {
503            if let Some((key, value)) = parse_checksum_line(inline) {
504                match key.as_str() {
505                    "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
506                    "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
507                    "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
508                    "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
509                    _ => {}
510                }
511            }
512            return;
513        }
514    }
515
516    let mut iteration_count: usize = 0;
517    *i += 1;
518    while *i < lines.len() {
519        iteration_count += 1;
520        if iteration_count > MAX_ITERATION_COUNT {
521            warn!("parse_checksums: exceeded MAX_ITERATION_COUNT, breaking");
522            break;
523        }
524        let line = lines[*i];
525
526        if line.trim().contains(']') {
527            break;
528        }
529
530        if let Some((key, value)) = parse_checksum_line(line) {
531            match key.as_str() {
532                "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
533                "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
534                "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
535                "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
536                _ => {}
537            }
538        }
539
540        *i += 1;
541    }
542}
543
544/// Parse a single checksum line: algo=hash
545fn parse_checksum_line(line: &str) -> Option<(String, String)> {
546    let line = line.trim().trim_matches('"').trim();
547
548    let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
549    let caps = regex.captures(line)?;
550
551    let key = caps.get(1)?.as_str().to_string();
552    let value = caps.get(2)?.as_str().to_string();
553
554    Some((key, value))
555}
556
557/// Split quoted strings like: "str1" "str2" "str3"
558fn split_quoted_strings(content: &str) -> Vec<String> {
559    let mut result = Vec::new();
560    let mut current = String::new();
561    let mut in_quotes = false;
562
563    for ch in content.chars() {
564        match ch {
565            '"' => in_quotes = !in_quotes,
566            ' ' if !in_quotes => {
567                if !current.is_empty() {
568                    result.push(current.trim_matches('"').to_string());
569                    current.clear();
570                }
571            }
572            _ => current.push(ch),
573        }
574    }
575
576    if !current.is_empty() {
577        result.push(current.trim_matches('"').to_string());
578    }
579
580    result
581}
582
583/// Build description from synopsis and description
584fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
585    let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
586        .into_iter()
587        .filter(|p| p.is_some())
588        .flatten()
589        .collect();
590
591    if parts.is_empty() {
592        None
593    } else {
594        Some(parts.join("\n"))
595    }
596}
597
598/// Extract parties from authors and maintainers
599fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
600    let mut parties = Vec::new();
601
602    // Add authors
603    for author in authors {
604        parties.push(Party {
605            r#type: Some("person".to_string()),
606            role: Some("author".to_string()),
607            name: Some(truncate_field(author.clone())),
608            email: None,
609            url: None,
610            organization: None,
611            organization_url: None,
612            timezone: None,
613        });
614    }
615
616    // Add maintainers (as email)
617    for maintainer in maintainers {
618        parties.push(Party {
619            r#type: Some("person".to_string()),
620            role: Some("maintainer".to_string()),
621            name: None,
622            email: Some(truncate_field(maintainer.clone())),
623            url: None,
624            organization: None,
625            organization_url: None,
626            timezone: None,
627        });
628    }
629
630    parties
631}
632
633/// Extract dependencies into Dependency objects
634fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
635    deps.iter()
636        .map(|(name, version_constraint)| Dependency {
637            purl: Some(truncate_field(format!("pkg:opam/{}", name))),
638            extracted_requirement: Some(truncate_field(version_constraint.clone())),
639            scope: Some("dependency".to_string()),
640            is_runtime: Some(true),
641            is_optional: Some(false),
642            is_pinned: Some(false),
643            is_direct: Some(true),
644            resolved_package: None,
645            extra_data: None,
646        })
647        .collect()
648}
649
650#[cfg(test)]
651mod tests {
652    use super::*;
653    use crate::parsers::PackageParser;
654
655    #[test]
656    fn test_is_match_with_opam_extension() {
657        let path = Path::new("sample.opam");
658        assert!(OpamParser::is_match(path));
659    }
660
661    #[test]
662    fn test_is_match_with_opam_name() {
663        let path = Path::new("opam");
664        assert!(OpamParser::is_match(path));
665    }
666
667    #[test]
668    fn test_is_match_with_non_opam() {
669        let path = Path::new("sample.txt");
670        assert!(!OpamParser::is_match(path));
671    }
672
673    #[test]
674    fn test_parse_key_value() {
675        let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
676        assert_eq!(key, "name");
677        assert_eq!(value, "\"js_of_ocaml\"");
678    }
679
680    #[test]
681    fn test_clean_value() {
682        assert_eq!(
683            clean_value("\"js_of_ocaml\""),
684            Some("js_of_ocaml".to_string())
685        );
686        assert_eq!(clean_value("\"\""), None);
687    }
688
689    #[test]
690    fn test_extract_version_constraint() {
691        let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
692        assert_eq!(result, ">= 4.02.0");
693    }
694
695    #[test]
696    fn test_parse_dependency_line() {
697        let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
698        assert_eq!(name, "ocaml");
699        assert_eq!(version, ">= 4.02.0");
700    }
701
702    #[test]
703    fn test_parse_dependency_line_without_version() {
704        let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
705        assert_eq!(name, "uchar");
706        assert_eq!(version, "");
707    }
708
709    #[test]
710    fn test_split_quoted_strings() {
711        let parts = split_quoted_strings(r#""str1" "str2""#);
712        assert_eq!(parts, vec!["str1", "str2"]);
713    }
714
715    #[test]
716    fn test_build_description() {
717        let synopsis = Some("Short description".to_string());
718        let description = Some("Long description".to_string());
719        let result = build_description(&synopsis, &description);
720        assert_eq!(
721            result,
722            Some("Short description\nLong description".to_string())
723        );
724    }
725
726    #[test]
727    fn test_parse_opam_keeps_fields_after_single_line_description() {
728        let package = parse_opam(
729            r#"opam-version: "2.0"
730name: "dune-rpc"
731version: "3.23.0"
732description: "Library to connect and control a running dune instance"
733maintainer: ["Jane Street Group, LLC <opensource@janestreet.com>"]
734authors: ["Jane Street Group, LLC <opensource@janestreet.com>"]
735license: "MIT"
736homepage: "https://github.com/ocaml/dune"
737bug-reports: "https://github.com/ocaml/dune/issues"
738depends: [
739  "dune" {>= "3.23"}
740  "ocamlc-loc"
741  "stdune" {= version}
742  "odoc" {with-doc}
743]
744dev-repo: "git+https://github.com/ocaml/dune.git"
745"#,
746        );
747
748        assert_eq!(package.name.as_deref(), Some("dune-rpc"));
749        assert_eq!(package.version.as_deref(), Some("3.23.0"));
750        assert_eq!(
751            package.description.as_deref(),
752            Some("Library to connect and control a running dune instance")
753        );
754        assert_eq!(
755            package.homepage_url.as_deref(),
756            Some("https://github.com/ocaml/dune")
757        );
758        assert_eq!(
759            package.bug_tracking_url.as_deref(),
760            Some("https://github.com/ocaml/dune/issues")
761        );
762        assert_eq!(
763            package.vcs_url.as_deref(),
764            Some("git+https://github.com/ocaml/dune.git")
765        );
766        assert_eq!(
767            package.declared_license_expression_spdx.as_deref(),
768            Some("MIT")
769        );
770        assert_eq!(package.dependencies.len(), 4);
771        assert_eq!(
772            package.dependencies[0].purl.as_deref(),
773            Some("pkg:opam/dune")
774        );
775        assert_eq!(
776            package.dependencies[0].extracted_requirement.as_deref(),
777            Some(">= 3.23")
778        );
779        assert_eq!(
780            package.dependencies[2].extracted_requirement.as_deref(),
781            Some("= version")
782        );
783        assert_eq!(
784            package.dependencies[3].extracted_requirement.as_deref(),
785            Some("with-doc")
786        );
787    }
788
789    #[test]
790    fn test_parse_opam_keeps_fields_after_next_line_description() {
791        let package = parse_opam(
792            r#"opam-version: "2.0"
793name: "chrome-trace"
794version: "3.23.0"
795description:
796  "This library offers no backwards compatibility guarantees. Use at your own risk."
797maintainer: ["Jane Street Group, LLC <opensource@janestreet.com>"]
798license: "MIT"
799depends: [
800  "dune" {>= "3.23"}
801  "ocaml" {>= "4.14"}
802  "odoc" {with-doc}
803]
804dev-repo: "git+https://github.com/ocaml/dune.git"
805"#,
806        );
807
808        assert_eq!(package.name.as_deref(), Some("chrome-trace"));
809        assert_eq!(
810            package.description.as_deref(),
811            Some(
812                "This library offers no backwards compatibility guarantees. Use at your own risk."
813            )
814        );
815        assert_eq!(
816            package.vcs_url.as_deref(),
817            Some("git+https://github.com/ocaml/dune.git")
818        );
819        assert_eq!(package.dependencies.len(), 3);
820        assert_eq!(
821            package.dependencies[1].purl.as_deref(),
822            Some("pkg:opam/ocaml")
823        );
824        assert_eq!(
825            package.dependencies[1].extracted_requirement.as_deref(),
826            Some(">= 4.14")
827        );
828        assert_eq!(
829            package.dependencies[2].extracted_requirement.as_deref(),
830            Some("with-doc")
831        );
832    }
833
834    #[test]
835    fn test_extract_parties() {
836        let authors = vec!["Author One".to_string()];
837        let maintainers = vec!["maintainer@example.com".to_string()];
838        let parties = extract_parties(&authors, &maintainers);
839
840        assert_eq!(parties.len(), 2);
841        assert_eq!(parties[0].name, Some("Author One".to_string()));
842        assert_eq!(parties[0].role, Some("author".to_string()));
843        assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
844        assert_eq!(parties[1].role, Some("maintainer".to_string()));
845    }
846
847    #[test]
848    fn test_normalize_opam_declared_license_preserves_scancode_style_expression() {
849        let (declared, declared_spdx, detections) = normalize_opam_declared_license(Some(
850            "LGPL-3.0-only with OCaml-LGPL-linking-exception",
851        ));
852
853        assert_eq!(
854            declared.as_deref(),
855            Some("lgpl-3.0 WITH ocaml-lgpl-linking-exception")
856        );
857        assert_eq!(
858            declared_spdx.as_deref(),
859            Some("LGPL-3.0-only WITH OCaml-LGPL-linking-exception")
860        );
861        assert_eq!(detections.len(), 1);
862        assert_eq!(
863            detections[0].license_expression,
864            "lgpl-3.0 WITH ocaml-lgpl-linking-exception"
865        );
866    }
867}
868
869crate::register_parser!(
870    "OCaml OPAM package manifest",
871    &["**/*.opam", "**/opam"],
872    "opam",
873    "OCaml",
874    Some("https://opam.ocaml.org/doc/Manual.html"),
875);