1use std::collections::HashMap;
24use std::path::Path;
25use std::sync::LazyLock;
26
27use crate::parser_warn as warn;
28use packageurl::PackageUrl;
29use regex::Regex;
30
31use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
32use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
33
34use super::PackageParser;
35
36pub struct CranParser;
41
42impl PackageParser for CranParser {
43 const PACKAGE_TYPE: PackageType = PackageType::Cran;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().is_some_and(|name| name == "DESCRIPTION")
47 }
48
49 fn extract_packages(path: &Path) -> Vec<PackageData> {
50 let content = match read_file_to_string(path, None) {
51 Ok(c) => c,
52 Err(e) => {
53 warn!("Failed to read DESCRIPTION at {:?}: {}", path, e);
54 return vec![default_package_data()];
55 }
56 };
57 let fields = parse_dcf(&content);
58
59 let name = fields
60 .get("Package")
61 .map(|s| truncate_field(s.trim().to_string()));
62 let version = fields
63 .get("Version")
64 .map(|s| truncate_field(s.trim().to_string()));
65
66 let purl = create_package_url(&name, &version);
68
69 let repository_homepage_url = name
71 .as_ref()
72 .map(|n| truncate_field(format!("https://cran.r-project.org/package={}", n)));
73
74 let description = build_description(&fields);
76
77 let extracted_license_statement = fields
79 .get("License")
80 .map(|s| truncate_field(s.trim().to_string()));
81
82 let homepage_url = fields
84 .get("URL")
85 .map(|s| truncate_field(s.split(',').next().unwrap_or("").trim().to_string()))
86 .filter(|s| !s.is_empty());
87
88 let mut parties = Vec::new();
90
91 if let Some(maintainer_str) = fields.get("Maintainer")
93 && let Some(party) = parse_party(maintainer_str, "maintainer")
94 {
95 parties.push(party);
96 }
97
98 if let Some(author_str) = fields.get("Author") {
100 for author_part in split_author_entries(author_str) {
101 if let Some(party) = parse_party(author_part, "author") {
102 parties.push(party);
103 }
104 }
105 }
106
107 let mut dependencies = Vec::new();
109
110 for (field_name, scope) in [
112 ("Depends", None),
113 ("Imports", Some("imports")),
114 ("Suggests", Some("suggests")),
115 ("Enhances", Some("enhances")),
116 ("LinkingTo", Some("linkingto")),
117 ] {
118 if let Some(deps_str) = fields.get(field_name) {
119 dependencies.extend(parse_dependencies(deps_str, scope));
120 }
121 }
122
123 vec![PackageData {
124 package_type: Some(Self::PACKAGE_TYPE),
125 namespace: None,
126 name,
127 version,
128 qualifiers: None,
129 subpath: None,
130 primary_language: Some("R".to_string()),
131 description,
132 release_date: None,
133 parties,
134 keywords: Vec::new(),
135 homepage_url,
136 download_url: None,
137 size: None,
138 sha1: None,
139 md5: None,
140 sha256: None,
141 sha512: None,
142 bug_tracking_url: None,
143 code_view_url: None,
144 vcs_url: None,
145 copyright: None,
146 holder: None,
147 declared_license_expression: None,
148 declared_license_expression_spdx: None,
149 license_detections: Vec::new(),
150 other_license_expression: None,
151 other_license_expression_spdx: None,
152 other_license_detections: Vec::new(),
153 extracted_license_statement,
154 notice_text: None,
155 source_packages: Vec::new(),
156 file_references: Vec::new(),
157 is_private: false,
158 is_virtual: false,
159 extra_data: None,
160 dependencies,
161 repository_homepage_url,
162 repository_download_url: None,
163 api_data_url: None,
164 datasource_id: Some(DatasourceId::CranDescription),
165 purl,
166 }]
167 }
168}
169
170fn parse_dcf(content: &str) -> HashMap<String, String> {
171 let mut fields: HashMap<String, String> = HashMap::new();
172 let mut current_field: Option<String> = None;
173 let mut current_value = String::new();
174
175 for line in content.lines().take(MAX_ITERATION_COUNT) {
176 if line.starts_with(' ') || line.starts_with('\t') {
178 if current_field.is_some() {
179 if !current_value.is_empty() {
181 current_value.push(' ');
182 }
183 current_value.push_str(line.trim_start());
184 }
185 } else if let Some((field_name, field_value)) = line.split_once(':') {
186 if let Some(field) = current_field.take() {
188 fields.insert(field, truncate_field(current_value.clone()));
189 current_value.clear();
190 }
191
192 current_field = Some(field_name.trim().to_string());
194 current_value = field_value.trim_start().to_string();
195 }
196 }
198
199 if let Some(field) = current_field {
201 fields.insert(field, truncate_field(current_value));
202 }
203
204 fields
205}
206
207fn parse_dependencies(deps_str: &str, scope: Option<&str>) -> Vec<Dependency> {
212 let mut dependencies = Vec::new();
213
214 for dep in deps_str.split(',').take(MAX_ITERATION_COUNT) {
215 let dep = dep.trim();
216 if dep.is_empty() {
217 continue;
218 }
219
220 let (name, extracted_requirement, is_pinned) = parse_version_constraint(dep);
221
222 if name == "R" {
224 continue;
225 }
226
227 let purl = if is_pinned {
229 if let Some(ref req) = extracted_requirement {
231 if let Some(version) = extract_version_from_requirement(req) {
232 match PackageUrl::new("cran", &name) {
233 Ok(mut p) => {
234 if p.with_version(&version).is_ok() {
235 Some(p.to_string())
236 } else {
237 PackageUrl::new("cran", &name).ok().map(|p| p.to_string())
239 }
240 }
241 Err(e) => {
242 warn!(
243 "Failed to create PURL for CRAN dependency '{}': {}",
244 name, e
245 );
246 None
247 }
248 }
249 } else {
250 PackageUrl::new("cran", &name).ok().map(|p| p.to_string())
252 }
253 } else {
254 PackageUrl::new("cran", &name).ok().map(|p| p.to_string())
256 }
257 } else {
258 PackageUrl::new("cran", &name).ok().map(|p| p.to_string())
260 };
261
262 dependencies.push(Dependency {
263 purl,
264 extracted_requirement: extracted_requirement.map(truncate_field),
265 scope: scope.map(|s| truncate_field(s.to_string())),
266 is_runtime: Some(scope.is_none() || scope == Some("imports")),
267 is_optional: Some(scope == Some("suggests") || scope == Some("enhances")),
268 is_pinned: Some(is_pinned),
269 is_direct: Some(true),
270 resolved_package: None,
271 extra_data: None,
272 });
273 }
274
275 dependencies
276}
277
278static VERSION_CONSTRAINT_RE: LazyLock<Regex> = LazyLock::new(|| {
279 Regex::new(r"^([a-zA-Z0-9.]+)\s*\(([><=]+)\s*([^)]+)\)\s*$").expect("valid regex")
280});
281
282fn parse_version_constraint(dep: &str) -> (String, Option<String>, bool) {
287 if let Some(captures) = VERSION_CONSTRAINT_RE.captures(dep) {
288 let name = match captures.get(1) {
289 Some(m) => truncate_field(m.as_str().to_string()),
290 None => return (truncate_field(dep.trim().to_string()), None, false),
291 };
292 let operator = match captures.get(2) {
293 Some(m) => m.as_str(),
294 None => return (name, None, false),
295 };
296 let version = match captures.get(3) {
297 Some(m) => m.as_str(),
298 None => return (name, None, false),
299 };
300 let requirement = truncate_field(format!("{} {}", operator, version));
301 let is_pinned = operator == "==";
302
303 (name, Some(requirement), is_pinned)
304 } else {
305 (truncate_field(dep.trim().to_string()), None, false)
306 }
307}
308
309fn extract_version_from_requirement(requirement: &str) -> Option<String> {
311 requirement
312 .split_whitespace()
313 .nth(1)
314 .map(|s| truncate_field(s.to_string()))
315}
316
317fn build_description(fields: &HashMap<String, String>) -> Option<String> {
319 let title = fields.get("Title").map(|s| s.trim());
320 let desc = fields.get("Description").map(|s| s.trim());
321
322 match (title, desc) {
323 (Some(t), Some(d)) if !t.is_empty() && !d.is_empty() => {
324 Some(truncate_field(format!("{}\n{}", t, d)))
325 }
326 (Some(t), _) if !t.is_empty() => Some(truncate_field(t.to_string())),
327 (_, Some(d)) if !d.is_empty() => Some(truncate_field(d.to_string())),
328 _ => None,
329 }
330}
331
332fn split_author_entries(author_str: &str) -> Vec<&str> {
333 let mut entries = Vec::new();
334 let mut start = 0;
335 let mut bracket_depth: usize = 0;
336 let mut paren_depth: usize = 0;
337
338 for (idx, ch) in author_str.char_indices().take(MAX_ITERATION_COUNT) {
339 match ch {
340 '[' => bracket_depth += 1,
341 ']' => bracket_depth = bracket_depth.saturating_sub(1),
342 '(' => paren_depth += 1,
343 ')' => paren_depth = paren_depth.saturating_sub(1),
344 ',' if bracket_depth == 0 && paren_depth == 0 => {
345 let entry = author_str[start..idx].trim();
346 if !entry.is_empty() {
347 entries.push(entry);
348 }
349 start = idx + 1;
350 }
351 _ => {}
352 }
353 }
354
355 let final_entry = author_str[start..].trim();
356 if !final_entry.is_empty() {
357 entries.push(final_entry);
358 }
359
360 entries
361}
362
363fn parse_party(info: &str, role: &str) -> Option<Party> {
370 let info = info.trim();
371 if info.is_empty() {
372 return None;
373 }
374
375 if info.contains('<') && info.contains('>') {
377 let parts: Vec<&str> = info.split('<').collect();
378 if parts.len() == 2 {
379 let name = parts[0].trim().to_string();
380 let email = parts[1].trim_end_matches('>').trim().to_string();
381
382 if !email.contains('@') {
383 return Some(Party {
384 r#type: Some(truncate_field("person".to_string())),
385 role: Some(truncate_field(role.to_string())),
386 name: Some(truncate_field(info.to_string())),
387 email: None,
388 url: None,
389 organization: None,
390 organization_url: None,
391 timezone: None,
392 });
393 }
394
395 return Some(Party {
396 r#type: Some(truncate_field("person".to_string())),
397 role: Some(truncate_field(role.to_string())),
398 name: if name.is_empty() {
399 None
400 } else {
401 Some(truncate_field(name))
402 },
403 email: if email.is_empty() {
404 None
405 } else {
406 Some(truncate_field(email))
407 },
408 url: None,
409 organization: None,
410 organization_url: None,
411 timezone: None,
412 });
413 }
414 }
415
416 Some(Party {
418 r#type: Some(truncate_field("person".to_string())),
419 role: Some(truncate_field(role.to_string())),
420 name: Some(truncate_field(info.to_string())),
421 email: None,
422 url: None,
423 organization: None,
424 organization_url: None,
425 timezone: None,
426 })
427}
428
429fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
431 name.as_ref().and_then(|name| {
432 let mut package_url = match PackageUrl::new("cran", name) {
433 Ok(p) => p,
434 Err(e) => {
435 warn!(
436 "Failed to create PackageUrl for CRAN package '{}': {}",
437 name, e
438 );
439 return None;
440 }
441 };
442
443 if let Some(v) = version
444 && let Err(e) = package_url.with_version(v)
445 {
446 warn!(
447 "Failed to set version '{}' for CRAN package '{}': {}",
448 v, name, e
449 );
450 return None;
451 }
452
453 Some(package_url.to_string())
454 })
455}
456
457fn default_package_data() -> PackageData {
458 PackageData {
459 package_type: Some(CranParser::PACKAGE_TYPE),
460 primary_language: Some("R".to_string()),
461 datasource_id: Some(DatasourceId::CranDescription),
462 ..Default::default()
463 }
464}
465
466crate::register_parser!(
467 "CRAN R package DESCRIPTION file",
468 &["**/DESCRIPTION"],
469 "cran",
470 "R",
471 Some("https://cran.r-project.org/doc/manuals/r-release/R-exts.html#The-DESCRIPTION-file"),
472);