Skip to main content

provenant/parsers/
opam.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for OCaml OPAM package manager manifests.
5//!
6//! Extracts package metadata and dependencies from OPAM files used by the
7//! OCaml ecosystem.
8//!
9//! # Supported Formats
10//! - *.opam files (OPAM package manifests)
11//! - opam files without extension
12//!
13//! # Key Features
14//! - Field-based parsing of OPAM's custom format (key: value)
15//! - Author and maintainer extraction with email parsing
16//! - URL extraction for source archives, homepage, repository
17//! - License statement extraction
18//! - Checksum extraction (sha1, md5, sha256, sha512)
19//!
20//! # Implementation Notes
21//! - OPAM format uses custom syntax, not JSON/YAML/TOML
22//! - Strings can be quoted or unquoted
23//! - Lists use bracket notation: [item1 item2]
24//! - Multi-line strings use three-quote notation: """..."""
25
26use std::path::Path;
27
28use crate::parser_warn as warn;
29use regex::Regex;
30
31use super::metadata::ParserMetadata;
32use crate::models::{
33    DatasourceId, Dependency, Md5Digest, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
34    Sha512Digest,
35};
36use crate::parsers::PackageParser;
37use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
38
39use super::license_normalization::{
40    DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
41    normalize_spdx_declared_license,
42};
43
44/// Parser for OCaml OPAM package manifest files.
45///
46/// Handles the OPAM file format used by the OCaml package manager.
47/// Reference: <https://opam.ocaml.org/doc/Manual.html#Common-file-format>
48pub struct OpamParser;
49
50impl PackageParser for OpamParser {
51    const PACKAGE_TYPE: PackageType = PackageType::Opam;
52
53    fn metadata() -> Vec<ParserMetadata> {
54        vec![ParserMetadata {
55            description: "OCaml OPAM package manifest",
56            file_patterns: &["**/*.opam", "**/opam"],
57            package_type: "opam",
58            primary_language: "OCaml",
59            documentation_url: Some("https://opam.ocaml.org/doc/Manual.html"),
60        }]
61    }
62
63    fn is_match(path: &Path) -> bool {
64        path.file_name().is_some_and(|name| {
65            name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
66        })
67    }
68
69    fn extract_packages(path: &Path) -> Vec<PackageData> {
70        vec![match read_file_to_string(path, None) {
71            Ok(text) => parse_opam(&text),
72            Err(e) => {
73                warn!("Failed to read OPAM file {:?}: {}", path, e);
74                default_package_data()
75            }
76        }]
77    }
78}
79
80/// Parsed OPAM file data
81#[derive(Debug, Default)]
82struct OpamData {
83    name: Option<String>,
84    version: Option<String>,
85    synopsis: Option<String>,
86    description: Option<String>,
87    homepage: Option<String>,
88    dev_repo: Option<String>,
89    bug_reports: Option<String>,
90    src: Option<String>,
91    authors: Vec<String>,
92    maintainers: Vec<String>,
93    license: Option<String>,
94    sha1: Option<Sha1Digest>,
95    md5: Option<Md5Digest>,
96    sha256: Option<Sha256Digest>,
97    sha512: Option<Sha512Digest>,
98    dependencies: Vec<(String, String)>, // (name, version_constraint)
99}
100
101fn default_package_data() -> PackageData {
102    PackageData {
103        package_type: Some(OpamParser::PACKAGE_TYPE),
104        primary_language: Some("Ocaml".to_string()),
105        datasource_id: Some(DatasourceId::OpamFile),
106        ..Default::default()
107    }
108}
109
110/// Parse an OPAM file from text content
111fn parse_opam(text: &str) -> PackageData {
112    let opam_data = parse_opam_data(text);
113
114    let description = build_description(&opam_data.synopsis, &opam_data.description);
115    let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
116    let dependencies = extract_dependencies(&opam_data.dependencies);
117
118    let (repository_homepage_url, api_data_url, purl) =
119        build_opam_urls(&opam_data.name, &opam_data.version);
120    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
121        normalize_opam_declared_license(opam_data.license.as_deref());
122
123    PackageData {
124        package_type: Some(OpamParser::PACKAGE_TYPE),
125        namespace: None,
126        name: opam_data.name,
127        version: opam_data.version,
128        qualifiers: None,
129        subpath: None,
130        primary_language: Some("Ocaml".to_string()),
131        description,
132        release_date: None,
133        parties,
134        keywords: Vec::new(),
135        homepage_url: opam_data.homepage,
136        download_url: opam_data.src,
137        size: None,
138        sha1: opam_data.sha1,
139        md5: opam_data.md5,
140        sha256: opam_data.sha256,
141        sha512: opam_data.sha512,
142        bug_tracking_url: opam_data.bug_reports,
143        code_view_url: None,
144        vcs_url: opam_data.dev_repo,
145        copyright: None,
146        holder: None,
147        declared_license_expression,
148        declared_license_expression_spdx,
149        license_detections,
150        other_license_expression: None,
151        other_license_expression_spdx: None,
152        other_license_detections: Vec::new(),
153        extracted_license_statement: opam_data.license,
154        notice_text: None,
155        source_packages: Vec::new(),
156        file_references: Vec::new(),
157        is_private: false,
158        is_virtual: false,
159        extra_data: None,
160        dependencies,
161        repository_homepage_url,
162        repository_download_url: None,
163        api_data_url,
164        datasource_id: Some(DatasourceId::OpamFile),
165        purl,
166    }
167}
168
169fn normalize_opam_declared_license(
170    statement: Option<&str>,
171) -> (
172    Option<String>,
173    Option<String>,
174    Vec<crate::models::LicenseDetection>,
175) {
176    let Some(statement) = statement.map(str::trim).filter(|value| !value.is_empty()) else {
177        return super::license_normalization::empty_declared_license_data();
178    };
179
180    match statement {
181        "GPL-2.0-only" => build_declared_license_data_from_pair(
182            "gpl-2.0",
183            "GPL-2.0-only",
184            DeclaredLicenseMatchMetadata::single_line(statement),
185        ),
186        "GPL-3.0-only" => build_declared_license_data_from_pair(
187            "gpl-3.0",
188            "GPL-3.0-only",
189            DeclaredLicenseMatchMetadata::single_line(statement),
190        ),
191        "LGPL-3.0-only with OCaml-LGPL-linking-exception" => build_declared_license_data_from_pair(
192            "lgpl-3.0 WITH ocaml-lgpl-linking-exception",
193            "LGPL-3.0-only WITH OCaml-LGPL-linking-exception",
194            DeclaredLicenseMatchMetadata::single_line(statement),
195        ),
196        _ => normalize_spdx_declared_license(Some(statement)),
197    }
198}
199
200fn build_opam_urls(
201    name: &Option<String>,
202    version: &Option<String>,
203) -> (Option<String>, Option<String>, Option<String>) {
204    let repository_homepage_url = name
205        .as_ref()
206        .map(|n| format!("https://opam.ocaml.org/packages/{}", n));
207
208    let api_data_url = match (name, version) {
209        (Some(n), Some(v)) => Some(format!(
210            "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
211            n, n, v
212        )),
213        _ => None,
214    };
215
216    let purl = match (name, version) {
217        (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
218        (Some(n), None) => Some(format!("pkg:opam/{}", n)),
219        _ => None,
220    };
221
222    (repository_homepage_url, api_data_url, purl)
223}
224
225/// Parse OPAM file text into structured data
226fn parse_opam_data(text: &str) -> OpamData {
227    let mut data = OpamData::default();
228    let lines: Vec<&str> = text.lines().collect();
229    let mut i = 0;
230    let mut iteration_count: usize = 0;
231
232    while i < lines.len() {
233        iteration_count += 1;
234        if iteration_count > MAX_ITERATION_COUNT {
235            warn!("parse_opam_data: exceeded MAX_ITERATION_COUNT, breaking");
236            break;
237        }
238        let line = lines[i];
239
240        // Parse key: value format
241        if let Some((key, value)) = parse_key_value(line) {
242            match key.as_str() {
243                "name" => data.name = clean_value(&value),
244                "version" => data.version = clean_value(&value),
245                "synopsis" => data.synopsis = clean_value(&value),
246                "description" => {
247                    data.description = parse_description_field(&lines, &mut i, &value);
248                }
249                "homepage" => data.homepage = clean_value(&value),
250                "dev-repo" => data.dev_repo = clean_value(&value),
251                "bug-reports" => data.bug_reports = clean_value(&value),
252                "src" => {
253                    if value.trim().is_empty() && i + 1 < lines.len() {
254                        i += 1;
255                        data.src = clean_value(lines[i]);
256                    } else {
257                        data.src = clean_value(&value);
258                    }
259                }
260                "license" => data.license = clean_value(&value),
261                "authors" => {
262                    data.authors = parse_string_array(&lines, &mut i, &value);
263                }
264                "maintainer" => {
265                    data.maintainers = parse_string_array(&lines, &mut i, &value);
266                }
267                "depends" => {
268                    data.dependencies = parse_dependency_array(&lines, &mut i);
269                }
270                "checksum" => {
271                    parse_checksums(&lines, &mut i, &mut data);
272                }
273                _ => {}
274            }
275        }
276
277        i += 1;
278    }
279
280    data
281}
282
283/// Parse a key: value line
284fn parse_key_value(line: &str) -> Option<(String, String)> {
285    let line = line.trim();
286    if line.is_empty() || line.starts_with('#') {
287        return None;
288    }
289
290    if let Some(colon_pos) = line.find(':') {
291        let key = line[..colon_pos].trim().to_string();
292        let value = line[colon_pos + 1..].trim().to_string();
293        Some((key, value))
294    } else {
295        None
296    }
297}
298
299/// Clean a value by removing quotes and brackets
300fn clean_value(value: &str) -> Option<String> {
301    let cleaned = value
302        .trim()
303        .trim_matches('"')
304        .trim_matches('[')
305        .trim_matches(']')
306        .trim();
307
308    if cleaned.is_empty() {
309        None
310    } else {
311        Some(truncate_field(cleaned.to_string()))
312    }
313}
314
315/// Parse an OPAM description field.
316///
317/// OPAM descriptions can be encoded as an inline quoted string, a quoted string
318/// on the following line, or a triple-quoted multiline string.
319fn parse_description_field(lines: &[&str], i: &mut usize, first_value: &str) -> Option<String> {
320    let trimmed = first_value.trim();
321
322    if trimmed.is_empty() {
323        let next_trimmed = lines.get(*i + 1)?.trim();
324
325        if next_trimmed.starts_with("\"\"\"") {
326            *i += 1;
327            return parse_triple_quoted_string(lines, i, next_trimmed);
328        }
329
330        if next_trimmed.starts_with('"') {
331            *i += 1;
332            return clean_value(next_trimmed);
333        }
334
335        return None;
336    }
337
338    if trimmed.starts_with("\"\"\"") {
339        return parse_triple_quoted_string(lines, i, trimmed);
340    }
341
342    clean_value(trimmed)
343}
344
345/// Parse a multiline string enclosed in triple quotes.
346fn parse_triple_quoted_string(lines: &[&str], i: &mut usize, first_value: &str) -> Option<String> {
347    let mut result = String::new();
348    let mut iteration_count: usize = 0;
349
350    let first_content = first_value.trim().trim_start_matches("\"\"\"");
351    if let Some(end_index) = first_content.find("\"\"\"") {
352        let cleaned = first_content[..end_index].trim();
353        return (!cleaned.is_empty()).then(|| truncate_field(cleaned.to_string()));
354    }
355
356    if !first_content.trim().is_empty() {
357        result.push_str(first_content.trim());
358    }
359
360    *i += 1;
361    while *i < lines.len() {
362        iteration_count += 1;
363        if iteration_count > MAX_ITERATION_COUNT {
364            warn!("parse_multiline_string: exceeded MAX_ITERATION_COUNT, breaking");
365            break;
366        }
367        let line = lines[*i].trim();
368
369        if let Some(end_index) = line.find("\"\"\"") {
370            let before_end = line[..end_index].trim();
371            if !before_end.is_empty() {
372                if !result.is_empty() {
373                    result.push(' ');
374                }
375                result.push_str(before_end);
376            }
377            break;
378        }
379
380        let content = line.trim_matches('"').trim();
381        if !result.is_empty() {
382            result.push(' ');
383        }
384        result.push_str(content);
385        *i += 1;
386    }
387
388    let cleaned = result.trim().to_string();
389    if cleaned.is_empty() {
390        None
391    } else {
392        Some(truncate_field(cleaned))
393    }
394}
395
396/// Parse a string array (single-line or multiline)
397fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
398    let mut result = Vec::new();
399    let mut iteration_count: usize = 0;
400
401    let mut content = first_value.to_string();
402
403    if content.contains('[') && !content.contains(']') {
404        *i += 1;
405        while *i < lines.len() {
406            iteration_count += 1;
407            if iteration_count > MAX_ITERATION_COUNT {
408                warn!("parse_string_array: exceeded MAX_ITERATION_COUNT, breaking");
409                break;
410            }
411            let line = lines[*i];
412            content.push(' ');
413            content.push_str(line);
414
415            if line.contains(']') {
416                break;
417            }
418            *i += 1;
419        }
420    }
421
422    let cleaned = content.trim_matches('[').trim_matches(']').trim();
423
424    for part in split_quoted_strings(cleaned) {
425        let p = part.trim_matches('"').trim();
426        if !p.is_empty() {
427            result.push(truncate_field(p.to_string()));
428        }
429    }
430
431    result
432}
433
434/// Parse dependency array
435fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
436    let mut result = Vec::new();
437    let mut iteration_count: usize = 0;
438
439    *i += 1;
440    while *i < lines.len() {
441        iteration_count += 1;
442        if iteration_count > MAX_ITERATION_COUNT {
443            warn!("parse_dependency_array: exceeded MAX_ITERATION_COUNT, breaking");
444            break;
445        }
446        let line = lines[*i];
447
448        if line.trim().contains(']') {
449            break;
450        }
451
452        if let Some((name, version)) = parse_dependency_line(line) {
453            result.push((name, version));
454        }
455
456        *i += 1;
457    }
458
459    result
460}
461
462/// Parse a single dependency line: "name" {version_constraint}
463fn parse_dependency_line(line: &str) -> Option<(String, String)> {
464    let line = line.trim();
465    if line.is_empty() {
466        return None;
467    }
468
469    // Match: "name" {optional version}
470    let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
471    let caps = regex.captures(line)?;
472
473    let name = truncate_field(caps.get(1)?.as_str().to_string());
474    let version_part = caps.get(2)?.as_str().trim();
475
476    // Extract the operator and version constraint
477    let constraint = if version_part.is_empty() {
478        String::new()
479    } else {
480        truncate_field(extract_version_constraint(version_part))
481    };
482
483    Some((name, constraint))
484}
485
486/// Extract version constraint from {>= "1.0"} format
487fn extract_version_constraint(version_part: &str) -> String {
488    let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
489    if let Ok(re) = regex
490        && let Some(caps) = re.captures(version_part)
491    {
492        let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
493        let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
494        if !op.is_empty() && !ver.is_empty() {
495            return format!("{} {}", op, ver);
496        }
497    }
498
499    // If regex parsing fails, try to extract raw content
500    let content = version_part
501        .trim_matches('{')
502        .trim_matches('}')
503        .trim_matches('"')
504        .trim();
505
506    content.replace('"', "")
507}
508
509/// Parse checksums from checksum array
510fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
511    if let Some((_, first_value)) = parse_key_value(lines[*i]) {
512        let inline = first_value.trim();
513        if !inline.is_empty() && inline != "[" {
514            if let Some((key, value)) = parse_checksum_line(inline) {
515                match key.as_str() {
516                    "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
517                    "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
518                    "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
519                    "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
520                    _ => {}
521                }
522            }
523            return;
524        }
525    }
526
527    let mut iteration_count: usize = 0;
528    *i += 1;
529    while *i < lines.len() {
530        iteration_count += 1;
531        if iteration_count > MAX_ITERATION_COUNT {
532            warn!("parse_checksums: exceeded MAX_ITERATION_COUNT, breaking");
533            break;
534        }
535        let line = lines[*i];
536
537        if line.trim().contains(']') {
538            break;
539        }
540
541        if let Some((key, value)) = parse_checksum_line(line) {
542            match key.as_str() {
543                "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
544                "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
545                "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
546                "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
547                _ => {}
548            }
549        }
550
551        *i += 1;
552    }
553}
554
555/// Parse a single checksum line: algo=hash
556fn parse_checksum_line(line: &str) -> Option<(String, String)> {
557    let line = line.trim().trim_matches('"').trim();
558
559    let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
560    let caps = regex.captures(line)?;
561
562    let key = caps.get(1)?.as_str().to_string();
563    let value = caps.get(2)?.as_str().to_string();
564
565    Some((key, value))
566}
567
568/// Split quoted strings like: "str1" "str2" "str3"
569fn split_quoted_strings(content: &str) -> Vec<String> {
570    let mut result = Vec::new();
571    let mut current = String::new();
572    let mut in_quotes = false;
573
574    for ch in content.chars() {
575        match ch {
576            '"' => in_quotes = !in_quotes,
577            ' ' if !in_quotes => {
578                if !current.is_empty() {
579                    result.push(current.trim_matches('"').to_string());
580                    current.clear();
581                }
582            }
583            _ => current.push(ch),
584        }
585    }
586
587    if !current.is_empty() {
588        result.push(current.trim_matches('"').to_string());
589    }
590
591    result
592}
593
594/// Build description from synopsis and description
595fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
596    let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
597        .into_iter()
598        .filter(|p| p.is_some())
599        .flatten()
600        .collect();
601
602    if parts.is_empty() {
603        None
604    } else {
605        Some(parts.join("\n"))
606    }
607}
608
609/// Extract parties from authors and maintainers
610fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
611    let mut parties = Vec::new();
612
613    // Add authors
614    for author in authors {
615        parties.push(Party {
616            r#type: Some("person".to_string()),
617            role: Some("author".to_string()),
618            name: Some(truncate_field(author.clone())),
619            email: None,
620            url: None,
621            organization: None,
622            organization_url: None,
623            timezone: None,
624        });
625    }
626
627    // Add maintainers (as email)
628    for maintainer in maintainers {
629        parties.push(Party {
630            r#type: Some("person".to_string()),
631            role: Some("maintainer".to_string()),
632            name: None,
633            email: Some(truncate_field(maintainer.clone())),
634            url: None,
635            organization: None,
636            organization_url: None,
637            timezone: None,
638        });
639    }
640
641    parties
642}
643
644/// Extract dependencies into Dependency objects
645fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
646    deps.iter()
647        .map(|(name, version_constraint)| Dependency {
648            purl: Some(truncate_field(format!("pkg:opam/{}", name))),
649            extracted_requirement: Some(truncate_field(version_constraint.clone())),
650            scope: Some("dependency".to_string()),
651            is_runtime: Some(true),
652            is_optional: Some(false),
653            is_pinned: Some(false),
654            is_direct: Some(true),
655            resolved_package: None,
656            extra_data: None,
657        })
658        .collect()
659}
660
661#[cfg(test)]
662mod tests {
663    use super::*;
664    use crate::parsers::PackageParser;
665
666    #[test]
667    fn test_is_match_with_opam_extension() {
668        let path = Path::new("sample.opam");
669        assert!(OpamParser::is_match(path));
670    }
671
672    #[test]
673    fn test_is_match_with_opam_name() {
674        let path = Path::new("opam");
675        assert!(OpamParser::is_match(path));
676    }
677
678    #[test]
679    fn test_is_match_with_non_opam() {
680        let path = Path::new("sample.txt");
681        assert!(!OpamParser::is_match(path));
682    }
683
684    #[test]
685    fn test_parse_key_value() {
686        let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
687        assert_eq!(key, "name");
688        assert_eq!(value, "\"js_of_ocaml\"");
689    }
690
691    #[test]
692    fn test_clean_value() {
693        assert_eq!(
694            clean_value("\"js_of_ocaml\""),
695            Some("js_of_ocaml".to_string())
696        );
697        assert_eq!(clean_value("\"\""), None);
698    }
699
700    #[test]
701    fn test_extract_version_constraint() {
702        let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
703        assert_eq!(result, ">= 4.02.0");
704    }
705
706    #[test]
707    fn test_parse_dependency_line() {
708        let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
709        assert_eq!(name, "ocaml");
710        assert_eq!(version, ">= 4.02.0");
711    }
712
713    #[test]
714    fn test_parse_dependency_line_without_version() {
715        let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
716        assert_eq!(name, "uchar");
717        assert_eq!(version, "");
718    }
719
720    #[test]
721    fn test_split_quoted_strings() {
722        let parts = split_quoted_strings(r#""str1" "str2""#);
723        assert_eq!(parts, vec!["str1", "str2"]);
724    }
725
726    #[test]
727    fn test_build_description() {
728        let synopsis = Some("Short description".to_string());
729        let description = Some("Long description".to_string());
730        let result = build_description(&synopsis, &description);
731        assert_eq!(
732            result,
733            Some("Short description\nLong description".to_string())
734        );
735    }
736
737    #[test]
738    fn test_parse_opam_keeps_fields_after_single_line_description() {
739        let package = parse_opam(
740            r#"opam-version: "2.0"
741name: "dune-rpc"
742version: "3.23.0"
743description: "Library to connect and control a running dune instance"
744maintainer: ["Jane Street Group, LLC <opensource@janestreet.com>"]
745authors: ["Jane Street Group, LLC <opensource@janestreet.com>"]
746license: "MIT"
747homepage: "https://github.com/ocaml/dune"
748bug-reports: "https://github.com/ocaml/dune/issues"
749depends: [
750  "dune" {>= "3.23"}
751  "ocamlc-loc"
752  "stdune" {= version}
753  "odoc" {with-doc}
754]
755dev-repo: "git+https://github.com/ocaml/dune.git"
756"#,
757        );
758
759        assert_eq!(package.name.as_deref(), Some("dune-rpc"));
760        assert_eq!(package.version.as_deref(), Some("3.23.0"));
761        assert_eq!(
762            package.description.as_deref(),
763            Some("Library to connect and control a running dune instance")
764        );
765        assert_eq!(
766            package.homepage_url.as_deref(),
767            Some("https://github.com/ocaml/dune")
768        );
769        assert_eq!(
770            package.bug_tracking_url.as_deref(),
771            Some("https://github.com/ocaml/dune/issues")
772        );
773        assert_eq!(
774            package.vcs_url.as_deref(),
775            Some("git+https://github.com/ocaml/dune.git")
776        );
777        assert_eq!(
778            package.declared_license_expression_spdx.as_deref(),
779            Some("MIT")
780        );
781        assert_eq!(package.dependencies.len(), 4);
782        assert_eq!(
783            package.dependencies[0].purl.as_deref(),
784            Some("pkg:opam/dune")
785        );
786        assert_eq!(
787            package.dependencies[0].extracted_requirement.as_deref(),
788            Some(">= 3.23")
789        );
790        assert_eq!(
791            package.dependencies[2].extracted_requirement.as_deref(),
792            Some("= version")
793        );
794        assert_eq!(
795            package.dependencies[3].extracted_requirement.as_deref(),
796            Some("with-doc")
797        );
798    }
799
800    #[test]
801    fn test_parse_opam_keeps_fields_after_next_line_description() {
802        let package = parse_opam(
803            r#"opam-version: "2.0"
804name: "chrome-trace"
805version: "3.23.0"
806description:
807  "This library offers no backwards compatibility guarantees. Use at your own risk."
808maintainer: ["Jane Street Group, LLC <opensource@janestreet.com>"]
809license: "MIT"
810depends: [
811  "dune" {>= "3.23"}
812  "ocaml" {>= "4.14"}
813  "odoc" {with-doc}
814]
815dev-repo: "git+https://github.com/ocaml/dune.git"
816"#,
817        );
818
819        assert_eq!(package.name.as_deref(), Some("chrome-trace"));
820        assert_eq!(
821            package.description.as_deref(),
822            Some(
823                "This library offers no backwards compatibility guarantees. Use at your own risk."
824            )
825        );
826        assert_eq!(
827            package.vcs_url.as_deref(),
828            Some("git+https://github.com/ocaml/dune.git")
829        );
830        assert_eq!(package.dependencies.len(), 3);
831        assert_eq!(
832            package.dependencies[1].purl.as_deref(),
833            Some("pkg:opam/ocaml")
834        );
835        assert_eq!(
836            package.dependencies[1].extracted_requirement.as_deref(),
837            Some(">= 4.14")
838        );
839        assert_eq!(
840            package.dependencies[2].extracted_requirement.as_deref(),
841            Some("with-doc")
842        );
843    }
844
845    #[test]
846    fn test_extract_parties() {
847        let authors = vec!["Author One".to_string()];
848        let maintainers = vec!["maintainer@example.com".to_string()];
849        let parties = extract_parties(&authors, &maintainers);
850
851        assert_eq!(parties.len(), 2);
852        assert_eq!(parties[0].name, Some("Author One".to_string()));
853        assert_eq!(parties[0].role, Some("author".to_string()));
854        assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
855        assert_eq!(parties[1].role, Some("maintainer".to_string()));
856    }
857
858    #[test]
859    fn test_normalize_opam_declared_license_preserves_scancode_style_expression() {
860        let (declared, declared_spdx, detections) = normalize_opam_declared_license(Some(
861            "LGPL-3.0-only with OCaml-LGPL-linking-exception",
862        ));
863
864        assert_eq!(
865            declared.as_deref(),
866            Some("lgpl-3.0 WITH ocaml-lgpl-linking-exception")
867        );
868        assert_eq!(
869            declared_spdx.as_deref(),
870            Some("LGPL-3.0-only WITH OCaml-LGPL-linking-exception")
871        );
872        assert_eq!(detections.len(), 1);
873        assert_eq!(
874            detections[0].license_expression,
875            "lgpl-3.0 WITH ocaml-lgpl-linking-exception"
876        );
877    }
878}