1use std::path::Path;
24
25use crate::parser_warn as warn;
26use regex::Regex;
27
28use crate::models::{
29 DatasourceId, Dependency, Md5Digest, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
30 Sha512Digest,
31};
32use crate::parsers::PackageParser;
33use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
34
35use super::license_normalization::{
36 DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
37 normalize_spdx_declared_license,
38};
39
40pub struct OpamParser;
45
46impl PackageParser for OpamParser {
47 const PACKAGE_TYPE: PackageType = PackageType::Opam;
48
49 fn is_match(path: &Path) -> bool {
50 path.file_name().is_some_and(|name| {
51 name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
52 })
53 }
54
55 fn extract_packages(path: &Path) -> Vec<PackageData> {
56 vec![match read_file_to_string(path, None) {
57 Ok(text) => parse_opam(&text),
58 Err(e) => {
59 warn!("Failed to read OPAM file {:?}: {}", path, e);
60 default_package_data()
61 }
62 }]
63 }
64}
65
66#[derive(Debug, Default)]
68struct OpamData {
69 name: Option<String>,
70 version: Option<String>,
71 synopsis: Option<String>,
72 description: Option<String>,
73 homepage: Option<String>,
74 dev_repo: Option<String>,
75 bug_reports: Option<String>,
76 src: Option<String>,
77 authors: Vec<String>,
78 maintainers: Vec<String>,
79 license: Option<String>,
80 sha1: Option<Sha1Digest>,
81 md5: Option<Md5Digest>,
82 sha256: Option<Sha256Digest>,
83 sha512: Option<Sha512Digest>,
84 dependencies: Vec<(String, String)>, }
86
87fn default_package_data() -> PackageData {
88 PackageData {
89 package_type: Some(OpamParser::PACKAGE_TYPE),
90 primary_language: Some("Ocaml".to_string()),
91 datasource_id: Some(DatasourceId::OpamFile),
92 ..Default::default()
93 }
94}
95
96fn parse_opam(text: &str) -> PackageData {
98 let opam_data = parse_opam_data(text);
99
100 let description = build_description(&opam_data.synopsis, &opam_data.description);
101 let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
102 let dependencies = extract_dependencies(&opam_data.dependencies);
103
104 let (repository_homepage_url, api_data_url, purl) =
105 build_opam_urls(&opam_data.name, &opam_data.version);
106 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
107 normalize_opam_declared_license(opam_data.license.as_deref());
108
109 PackageData {
110 package_type: Some(OpamParser::PACKAGE_TYPE),
111 namespace: None,
112 name: opam_data.name,
113 version: opam_data.version,
114 qualifiers: None,
115 subpath: None,
116 primary_language: Some("Ocaml".to_string()),
117 description,
118 release_date: None,
119 parties,
120 keywords: Vec::new(),
121 homepage_url: opam_data.homepage,
122 download_url: opam_data.src,
123 size: None,
124 sha1: opam_data.sha1,
125 md5: opam_data.md5,
126 sha256: opam_data.sha256,
127 sha512: opam_data.sha512,
128 bug_tracking_url: opam_data.bug_reports,
129 code_view_url: None,
130 vcs_url: opam_data.dev_repo,
131 copyright: None,
132 holder: None,
133 declared_license_expression,
134 declared_license_expression_spdx,
135 license_detections,
136 other_license_expression: None,
137 other_license_expression_spdx: None,
138 other_license_detections: Vec::new(),
139 extracted_license_statement: opam_data.license,
140 notice_text: None,
141 source_packages: Vec::new(),
142 file_references: Vec::new(),
143 is_private: false,
144 is_virtual: false,
145 extra_data: None,
146 dependencies,
147 repository_homepage_url,
148 repository_download_url: None,
149 api_data_url,
150 datasource_id: Some(DatasourceId::OpamFile),
151 purl,
152 }
153}
154
155fn normalize_opam_declared_license(
156 statement: Option<&str>,
157) -> (
158 Option<String>,
159 Option<String>,
160 Vec<crate::models::LicenseDetection>,
161) {
162 let Some(statement) = statement.map(str::trim).filter(|value| !value.is_empty()) else {
163 return super::license_normalization::empty_declared_license_data();
164 };
165
166 match statement {
167 "GPL-2.0-only" => build_declared_license_data_from_pair(
168 "gpl-2.0",
169 "GPL-2.0-only",
170 DeclaredLicenseMatchMetadata::single_line(statement),
171 ),
172 "GPL-3.0-only" => build_declared_license_data_from_pair(
173 "gpl-3.0",
174 "GPL-3.0-only",
175 DeclaredLicenseMatchMetadata::single_line(statement),
176 ),
177 "LGPL-3.0-only with OCaml-LGPL-linking-exception" => build_declared_license_data_from_pair(
178 "lgpl-3.0 WITH ocaml-lgpl-linking-exception",
179 "LGPL-3.0-only WITH OCaml-LGPL-linking-exception",
180 DeclaredLicenseMatchMetadata::single_line(statement),
181 ),
182 _ => normalize_spdx_declared_license(Some(statement)),
183 }
184}
185
186fn build_opam_urls(
187 name: &Option<String>,
188 version: &Option<String>,
189) -> (Option<String>, Option<String>, Option<String>) {
190 let repository_homepage_url = name
191 .as_ref()
192 .map(|n| format!("https://opam.ocaml.org/packages/{}", n));
193
194 let api_data_url = match (name, version) {
195 (Some(n), Some(v)) => Some(format!(
196 "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
197 n, n, v
198 )),
199 _ => None,
200 };
201
202 let purl = match (name, version) {
203 (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
204 (Some(n), None) => Some(format!("pkg:opam/{}", n)),
205 _ => None,
206 };
207
208 (repository_homepage_url, api_data_url, purl)
209}
210
211fn parse_opam_data(text: &str) -> OpamData {
213 let mut data = OpamData::default();
214 let lines: Vec<&str> = text.lines().collect();
215 let mut i = 0;
216 let mut iteration_count: usize = 0;
217
218 while i < lines.len() {
219 iteration_count += 1;
220 if iteration_count > MAX_ITERATION_COUNT {
221 warn!("parse_opam_data: exceeded MAX_ITERATION_COUNT, breaking");
222 break;
223 }
224 let line = lines[i];
225
226 if let Some((key, value)) = parse_key_value(line) {
228 match key.as_str() {
229 "name" => data.name = clean_value(&value),
230 "version" => data.version = clean_value(&value),
231 "synopsis" => data.synopsis = clean_value(&value),
232 "description" => {
233 data.description = parse_multiline_string(&lines, &mut i);
234 }
235 "homepage" => data.homepage = clean_value(&value),
236 "dev-repo" => data.dev_repo = clean_value(&value),
237 "bug-reports" => data.bug_reports = clean_value(&value),
238 "src" => {
239 if value.trim().is_empty() && i + 1 < lines.len() {
240 i += 1;
241 data.src = clean_value(lines[i]);
242 } else {
243 data.src = clean_value(&value);
244 }
245 }
246 "license" => data.license = clean_value(&value),
247 "authors" => {
248 data.authors = parse_string_array(&lines, &mut i, &value);
249 }
250 "maintainer" => {
251 data.maintainers = parse_string_array(&lines, &mut i, &value);
252 }
253 "depends" => {
254 data.dependencies = parse_dependency_array(&lines, &mut i);
255 }
256 "checksum" => {
257 parse_checksums(&lines, &mut i, &mut data);
258 }
259 _ => {}
260 }
261 }
262
263 i += 1;
264 }
265
266 data
267}
268
269fn parse_key_value(line: &str) -> Option<(String, String)> {
271 let line = line.trim();
272 if line.is_empty() || line.starts_with('#') {
273 return None;
274 }
275
276 if let Some(colon_pos) = line.find(':') {
277 let key = line[..colon_pos].trim().to_string();
278 let value = line[colon_pos + 1..].trim().to_string();
279 Some((key, value))
280 } else {
281 None
282 }
283}
284
285fn clean_value(value: &str) -> Option<String> {
287 let cleaned = value
288 .trim()
289 .trim_matches('"')
290 .trim_matches('[')
291 .trim_matches(']')
292 .trim();
293
294 if cleaned.is_empty() {
295 None
296 } else {
297 Some(truncate_field(cleaned.to_string()))
298 }
299}
300
301fn parse_multiline_string(lines: &[&str], i: &mut usize) -> Option<String> {
303 let mut result = String::new();
304 let mut iteration_count: usize = 0;
305
306 if let Some((_, value)) = parse_key_value(lines[*i]) {
307 result.push_str(value.trim_matches('"').trim());
308 }
309
310 *i += 1;
311 while *i < lines.len() {
312 iteration_count += 1;
313 if iteration_count > MAX_ITERATION_COUNT {
314 warn!("parse_multiline_string: exceeded MAX_ITERATION_COUNT, breaking");
315 break;
316 }
317 let line = lines[*i];
318 result.push(' ');
319 result.push_str(line.trim_matches('"').trim());
320
321 if line.contains("\"\"\"") {
322 break;
323 }
324 *i += 1;
325 }
326
327 let cleaned = result.trim().to_string();
328 if cleaned.is_empty() {
329 None
330 } else {
331 Some(truncate_field(cleaned))
332 }
333}
334
335fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
337 let mut result = Vec::new();
338 let mut iteration_count: usize = 0;
339
340 let mut content = first_value.to_string();
341
342 if content.contains('[') && !content.contains(']') {
343 *i += 1;
344 while *i < lines.len() {
345 iteration_count += 1;
346 if iteration_count > MAX_ITERATION_COUNT {
347 warn!("parse_string_array: exceeded MAX_ITERATION_COUNT, breaking");
348 break;
349 }
350 let line = lines[*i];
351 content.push(' ');
352 content.push_str(line);
353
354 if line.contains(']') {
355 break;
356 }
357 *i += 1;
358 }
359 }
360
361 let cleaned = content.trim_matches('[').trim_matches(']').trim();
362
363 for part in split_quoted_strings(cleaned) {
364 let p = part.trim_matches('"').trim();
365 if !p.is_empty() {
366 result.push(truncate_field(p.to_string()));
367 }
368 }
369
370 result
371}
372
373fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
375 let mut result = Vec::new();
376 let mut iteration_count: usize = 0;
377
378 *i += 1;
379 while *i < lines.len() {
380 iteration_count += 1;
381 if iteration_count > MAX_ITERATION_COUNT {
382 warn!("parse_dependency_array: exceeded MAX_ITERATION_COUNT, breaking");
383 break;
384 }
385 let line = lines[*i];
386
387 if line.trim().contains(']') {
388 break;
389 }
390
391 if let Some((name, version)) = parse_dependency_line(line) {
392 result.push((name, version));
393 }
394
395 *i += 1;
396 }
397
398 result
399}
400
401fn parse_dependency_line(line: &str) -> Option<(String, String)> {
403 let line = line.trim();
404 if line.is_empty() {
405 return None;
406 }
407
408 let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
410 let caps = regex.captures(line)?;
411
412 let name = truncate_field(caps.get(1)?.as_str().to_string());
413 let version_part = caps.get(2)?.as_str().trim();
414
415 let constraint = if version_part.is_empty() {
417 String::new()
418 } else {
419 truncate_field(extract_version_constraint(version_part))
420 };
421
422 Some((name, constraint))
423}
424
425fn extract_version_constraint(version_part: &str) -> String {
427 let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
428 if let Ok(re) = regex
429 && let Some(caps) = re.captures(version_part)
430 {
431 let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
432 let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
433 if !op.is_empty() && !ver.is_empty() {
434 return format!("{} {}", op, ver);
435 }
436 }
437
438 let content = version_part
440 .trim_matches('{')
441 .trim_matches('}')
442 .trim_matches('"')
443 .trim();
444
445 content.replace('"', "")
446}
447
448fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
450 if let Some((_, first_value)) = parse_key_value(lines[*i]) {
451 let inline = first_value.trim();
452 if !inline.is_empty() && inline != "[" {
453 if let Some((key, value)) = parse_checksum_line(inline) {
454 match key.as_str() {
455 "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
456 "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
457 "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
458 "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
459 _ => {}
460 }
461 }
462 return;
463 }
464 }
465
466 let mut iteration_count: usize = 0;
467 *i += 1;
468 while *i < lines.len() {
469 iteration_count += 1;
470 if iteration_count > MAX_ITERATION_COUNT {
471 warn!("parse_checksums: exceeded MAX_ITERATION_COUNT, breaking");
472 break;
473 }
474 let line = lines[*i];
475
476 if line.trim().contains(']') {
477 break;
478 }
479
480 if let Some((key, value)) = parse_checksum_line(line) {
481 match key.as_str() {
482 "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
483 "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
484 "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
485 "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
486 _ => {}
487 }
488 }
489
490 *i += 1;
491 }
492}
493
494fn parse_checksum_line(line: &str) -> Option<(String, String)> {
496 let line = line.trim().trim_matches('"').trim();
497
498 let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
499 let caps = regex.captures(line)?;
500
501 let key = caps.get(1)?.as_str().to_string();
502 let value = caps.get(2)?.as_str().to_string();
503
504 Some((key, value))
505}
506
507fn split_quoted_strings(content: &str) -> Vec<String> {
509 let mut result = Vec::new();
510 let mut current = String::new();
511 let mut in_quotes = false;
512
513 for ch in content.chars() {
514 match ch {
515 '"' => in_quotes = !in_quotes,
516 ' ' if !in_quotes => {
517 if !current.is_empty() {
518 result.push(current.trim_matches('"').to_string());
519 current.clear();
520 }
521 }
522 _ => current.push(ch),
523 }
524 }
525
526 if !current.is_empty() {
527 result.push(current.trim_matches('"').to_string());
528 }
529
530 result
531}
532
533fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
535 let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
536 .into_iter()
537 .filter(|p| p.is_some())
538 .flatten()
539 .collect();
540
541 if parts.is_empty() {
542 None
543 } else {
544 Some(parts.join("\n"))
545 }
546}
547
548fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
550 let mut parties = Vec::new();
551
552 for author in authors {
554 parties.push(Party {
555 r#type: Some("person".to_string()),
556 role: Some("author".to_string()),
557 name: Some(truncate_field(author.clone())),
558 email: None,
559 url: None,
560 organization: None,
561 organization_url: None,
562 timezone: None,
563 });
564 }
565
566 for maintainer in maintainers {
568 parties.push(Party {
569 r#type: Some("person".to_string()),
570 role: Some("maintainer".to_string()),
571 name: None,
572 email: Some(truncate_field(maintainer.clone())),
573 url: None,
574 organization: None,
575 organization_url: None,
576 timezone: None,
577 });
578 }
579
580 parties
581}
582
583fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
585 deps.iter()
586 .map(|(name, version_constraint)| Dependency {
587 purl: Some(truncate_field(format!("pkg:opam/{}", name))),
588 extracted_requirement: Some(truncate_field(version_constraint.clone())),
589 scope: Some("dependency".to_string()),
590 is_runtime: Some(true),
591 is_optional: Some(false),
592 is_pinned: Some(false),
593 is_direct: Some(true),
594 resolved_package: None,
595 extra_data: None,
596 })
597 .collect()
598}
599
600#[cfg(test)]
601mod tests {
602 use super::*;
603 use crate::parsers::PackageParser;
604
605 #[test]
606 fn test_is_match_with_opam_extension() {
607 let path = Path::new("sample.opam");
608 assert!(OpamParser::is_match(path));
609 }
610
611 #[test]
612 fn test_is_match_with_opam_name() {
613 let path = Path::new("opam");
614 assert!(OpamParser::is_match(path));
615 }
616
617 #[test]
618 fn test_is_match_with_non_opam() {
619 let path = Path::new("sample.txt");
620 assert!(!OpamParser::is_match(path));
621 }
622
623 #[test]
624 fn test_parse_key_value() {
625 let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
626 assert_eq!(key, "name");
627 assert_eq!(value, "\"js_of_ocaml\"");
628 }
629
630 #[test]
631 fn test_clean_value() {
632 assert_eq!(
633 clean_value("\"js_of_ocaml\""),
634 Some("js_of_ocaml".to_string())
635 );
636 assert_eq!(clean_value("\"\""), None);
637 }
638
639 #[test]
640 fn test_extract_version_constraint() {
641 let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
642 assert_eq!(result, ">= 4.02.0");
643 }
644
645 #[test]
646 fn test_parse_dependency_line() {
647 let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
648 assert_eq!(name, "ocaml");
649 assert_eq!(version, ">= 4.02.0");
650 }
651
652 #[test]
653 fn test_parse_dependency_line_without_version() {
654 let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
655 assert_eq!(name, "uchar");
656 assert_eq!(version, "");
657 }
658
659 #[test]
660 fn test_split_quoted_strings() {
661 let parts = split_quoted_strings(r#""str1" "str2""#);
662 assert_eq!(parts, vec!["str1", "str2"]);
663 }
664
665 #[test]
666 fn test_build_description() {
667 let synopsis = Some("Short description".to_string());
668 let description = Some("Long description".to_string());
669 let result = build_description(&synopsis, &description);
670 assert_eq!(
671 result,
672 Some("Short description\nLong description".to_string())
673 );
674 }
675
676 #[test]
677 fn test_extract_parties() {
678 let authors = vec!["Author One".to_string()];
679 let maintainers = vec!["maintainer@example.com".to_string()];
680 let parties = extract_parties(&authors, &maintainers);
681
682 assert_eq!(parties.len(), 2);
683 assert_eq!(parties[0].name, Some("Author One".to_string()));
684 assert_eq!(parties[0].role, Some("author".to_string()));
685 assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
686 assert_eq!(parties[1].role, Some("maintainer".to_string()));
687 }
688
689 #[test]
690 fn test_normalize_opam_declared_license_preserves_scancode_style_expression() {
691 let (declared, declared_spdx, detections) = normalize_opam_declared_license(Some(
692 "LGPL-3.0-only with OCaml-LGPL-linking-exception",
693 ));
694
695 assert_eq!(
696 declared.as_deref(),
697 Some("lgpl-3.0 WITH ocaml-lgpl-linking-exception")
698 );
699 assert_eq!(
700 declared_spdx.as_deref(),
701 Some("LGPL-3.0-only WITH OCaml-LGPL-linking-exception")
702 );
703 assert_eq!(detections.len(), 1);
704 assert_eq!(
705 detections[0].license_expression,
706 "lgpl-3.0 WITH ocaml-lgpl-linking-exception"
707 );
708 }
709}
710
711crate::register_parser!(
712 "OCaml OPAM package manifest",
713 &["**/*.opam", "**/opam"],
714 "opam",
715 "OCaml",
716 Some("https://opam.ocaml.org/doc/Manual.html"),
717);