Skip to main content

provenant/parsers/
podspec.rs

1//! Parser for CocoaPods .podspec manifest files.
2//!
3//! Extracts package metadata and dependencies from .podspec files which define
4//! CocoaPods package specifications using Ruby DSL syntax.
5//!
6//! # Supported Formats
7//! - *.podspec (CocoaPods package specification files)
8//! - .podspec files (same format, different naming convention)
9//!
10//! # Key Features
11//! - Metadata extraction (name, version, summary, description, license)
12//! - Author/contributor information parsing with email handling
13//! - Homepage and source repository URL extraction
14//! - Dependency declaration parsing with version constraints
15//! - Support for development dependencies
16//! - Regex-based Ruby DSL parsing (no full Ruby AST required)
17//!
18//! # Implementation Notes
19//! - Uses regex for pattern matching in Ruby DSL syntax
20//! - Supports multi-line string values and Ruby hash syntax
21//! - Dependency version constraints are parsed from DSL
22//! - Graceful error handling with `warn!()` logs on parse failures
23
24use std::fs;
25use std::path::Path;
26
27use lazy_static::lazy_static;
28use log::warn;
29use md5::{Digest, Md5};
30use packageurl::PackageUrl;
31use regex::Regex;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
34use crate::parsers::PackageParser;
35
36/// Parses CocoaPods specification files (.podspec).
37///
38/// Extracts package metadata from .podspec files using regex-based Ruby DSL parsing.
39///
40/// # Extracted Fields
41/// - Name, version, summary, description
42/// - Homepage, license, source URLs
43/// - Author information (including author hashes)
44/// - Dependencies with version constraints
45///
46/// # Heredoc Support
47/// Handles multiline descriptions: `s.description = <<-DESC ... DESC`
48pub struct PodspecParser;
49
50impl PackageParser for PodspecParser {
51    const PACKAGE_TYPE: PackageType = PackageType::Cocoapods;
52
53    fn is_match(path: &Path) -> bool {
54        path.extension().is_some_and(|ext| {
55            ext == "podspec"
56                && path
57                    .file_name()
58                    .is_some_and(|name| !name.to_string_lossy().ends_with(".json.podspec"))
59        })
60    }
61
62    fn extract_packages(path: &Path) -> Vec<PackageData> {
63        let content = match fs::read_to_string(path) {
64            Ok(c) => c,
65            Err(e) => {
66                warn!("Failed to read {:?}: {}", path, e);
67                return vec![default_package_data()];
68            }
69        };
70
71        let name = extract_field(&content, &NAME_PATTERN);
72        let version = extract_field(&content, &VERSION_PATTERN);
73        let summary = extract_field(&content, &SUMMARY_PATTERN);
74        let description =
75            merge_summary_and_description(summary.as_deref(), extract_description(&content));
76        let homepage_url = extract_field(&content, &HOMEPAGE_PATTERN);
77        let license = extract_license_statement(&content);
78        let source = extract_source_url(&content);
79        let authors = extract_authors(&content);
80
81        let parties = authors
82            .into_iter()
83            .map(|(name, email)| Party {
84                r#type: Some("person".to_string()),
85                name: Some(name),
86                email,
87                url: None,
88                role: Some("author".to_string()),
89                organization: None,
90                organization_url: None,
91                timezone: None,
92            })
93            .collect();
94
95        let dependencies = extract_dependencies(&content);
96        let repository_homepage_url = name
97            .as_ref()
98            .map(|n| format!("https://cocoapods.org/pods/{}", n));
99        let repository_download_url = match (source.as_deref(), version.as_deref()) {
100            (Some(vcs_url), Some(version_str)) => get_repo_base_url(vcs_url)
101                .map(|base| format!("{}/archive/refs/tags/{}.zip", base, version_str)),
102            _ => None,
103        };
104        let code_view_url = match (source.as_deref(), version.as_deref()) {
105            (Some(vcs_url), Some(version_str)) => {
106                get_repo_base_url(vcs_url).map(|base| format!("{}/tree/{}", base, version_str))
107            }
108            _ => None,
109        };
110        let bug_tracking_url = source
111            .as_deref()
112            .and_then(get_repo_base_url)
113            .map(|base| format!("{}/issues/", base));
114        let api_data_url = match (name.as_deref(), version.as_deref()) {
115            (Some(name_str), Some(version_str)) => get_hashed_path(name_str).map(|hashed| {
116                format!(
117                    "https://raw.githubusercontent.com/CocoaPods/Specs/blob/master/Specs/{}/{}/{}/{}.podspec.json",
118                    hashed, name_str, version_str, name_str
119                )
120            }),
121            _ => None,
122        };
123        let purl = if let Some(name_str) = &name {
124            let mut purl = PackageUrl::new(Self::PACKAGE_TYPE.as_str(), name_str)
125                .unwrap_or_else(|_| PackageUrl::new("generic", name_str).unwrap());
126            if let Some(version_str) = &version {
127                let _ = purl.with_version(version_str);
128            }
129            Some(purl.to_string())
130        } else {
131            None
132        };
133
134        vec![PackageData {
135            package_type: Some(Self::PACKAGE_TYPE),
136            namespace: None,
137            name,
138            version,
139            qualifiers: None,
140            subpath: None,
141            primary_language: Some("Objective-C".to_string()),
142            description,
143            release_date: None,
144            parties,
145            keywords: Vec::new(),
146            homepage_url,
147            download_url: None,
148            size: None,
149            sha1: None,
150            md5: None,
151            sha256: None,
152            sha512: None,
153            bug_tracking_url,
154            code_view_url,
155            vcs_url: source,
156            copyright: None,
157            holder: None,
158            declared_license_expression: None,
159            declared_license_expression_spdx: None,
160            license_detections: Vec::new(),
161            other_license_expression: None,
162            other_license_expression_spdx: None,
163            other_license_detections: Vec::new(),
164            extracted_license_statement: license,
165            notice_text: None,
166            source_packages: Vec::new(),
167            file_references: Vec::new(),
168            extra_data: None,
169            dependencies,
170            repository_homepage_url,
171            repository_download_url,
172            api_data_url,
173            datasource_id: Some(DatasourceId::CocoapodsPodspec),
174            purl,
175            is_private: false,
176            is_virtual: false,
177        }]
178    }
179}
180
181fn default_package_data() -> PackageData {
182    PackageData {
183        package_type: Some(PodspecParser::PACKAGE_TYPE),
184        primary_language: Some("Objective-C".to_string()),
185        datasource_id: Some(DatasourceId::CocoapodsPodspec),
186        ..Default::default()
187    }
188}
189
190lazy_static! {
191    // Regex patterns matching Python reference implementation
192    static ref NAME_PATTERN: Regex = Regex::new(r"\.name\s*=\s*(.+)").unwrap();
193    static ref VERSION_PATTERN: Regex = Regex::new(r"\.version\s*=\s*(.+)").unwrap();
194    static ref SUMMARY_PATTERN: Regex = Regex::new(r"\.summary\s*=\s*(.+)").unwrap();
195    static ref DESCRIPTION_PATTERN: Regex = Regex::new(r"\.description\s*=\s*(.+)").unwrap();
196    static ref HOMEPAGE_PATTERN: Regex = Regex::new(r"\.homepage\s*=\s*(.+)").unwrap();
197    static ref LICENSE_PATTERN: Regex = Regex::new(r"\.license\s*=\s*(.+)").unwrap();
198    static ref SOURCE_PATTERN: Regex = Regex::new(r"\.source\s*=\s*(.+)").unwrap();
199    static ref AUTHOR_PATTERN: Regex = Regex::new(r"\.authors?\s*=\s*(.+)").unwrap();
200    static ref SOURCE_GIT_PATTERN: Regex = Regex::new(r#":git\s*=>\s*['\"]([^'\"]+)['\"]"#).unwrap();
201    static ref SOURCE_HTTP_PATTERN: Regex = Regex::new(r#":http\s*=>\s*['\"]([^'\"]+)['\"]"#).unwrap();
202
203    // Dependency patterns (using pod/dependency method calls)
204    static ref DEPENDENCY_PATTERN: Regex = Regex::new(
205        r#"(?:s\.)?(?:dependency|add_dependency|add_(?:runtime|development)_dependency)\s+['"]([^'"]+)['"](?:\s*,\s*(.+))?"#
206    ).unwrap();
207}
208
209fn extract_license_statement(content: &str) -> Option<String> {
210    extract_field(content, &LICENSE_PATTERN).map(|value| normalize_ruby_hash_literal(&value))
211}
212
213fn normalize_ruby_hash_literal(value: &str) -> String {
214    if !value.contains('=') && !value.contains("=>") {
215        return value.to_string();
216    }
217
218    value
219        .replace("=>", "=")
220        .replace(['\'', '"'], "")
221        .split_whitespace()
222        .collect::<Vec<_>>()
223        .join(" ")
224}
225
226/// Extract a single field using a regex pattern
227fn extract_field(content: &str, pattern: &Regex) -> Option<String> {
228    for line in content.lines() {
229        let cleaned_line = pre_process(line);
230        if let Some(value) = pattern.captures(&cleaned_line).and_then(|caps| caps.get(1)) {
231            return Some(clean_string(value.as_str()));
232        }
233    }
234    None
235}
236
237/// Extract description, handling multiline heredoc format
238fn extract_description(content: &str) -> Option<String> {
239    let lines: Vec<&str> = content.lines().collect();
240
241    for (i, line) in lines.iter().enumerate() {
242        let cleaned = pre_process(line);
243        if let Some(value) = DESCRIPTION_PATTERN
244            .captures(&cleaned)
245            .and_then(|caps| caps.get(1))
246        {
247            let value_str = value.as_str();
248
249            if value_str.contains("<<-") {
250                return extract_multiline_description(&lines, i);
251            } else {
252                return Some(clean_string(value_str));
253            }
254        }
255    }
256    None
257}
258
259fn merge_summary_and_description(
260    summary: Option<&str>,
261    description: Option<String>,
262) -> Option<String> {
263    match (
264        summary.map(str::trim).filter(|s| !s.is_empty()),
265        description,
266    ) {
267        (Some(summary), Some(description)) if description.starts_with(summary) => Some(description),
268        (Some(summary), Some(description)) => Some(format!("{}\n{}", summary, description)),
269        (Some(summary), None) => Some(summary.to_string()),
270        (None, description) => description,
271    }
272}
273
274/// Extract multiline description in heredoc format
275fn extract_multiline_description(lines: &[&str], start_index: usize) -> Option<String> {
276    let start_line = lines.get(start_index)?;
277
278    // Extract the delimiter (e.g., "DESC" from "<<-DESC")
279    let delimiter = start_line
280        .split("<<-")
281        .nth(1)?
282        .trim()
283        .trim_matches(|c| c == '"' || c == '\'');
284
285    let mut description_lines = Vec::new();
286    let mut found_start = false;
287
288    for line in lines.iter().skip(start_index) {
289        if !found_start && line.contains("<<-") {
290            found_start = true;
291            continue;
292        }
293
294        if found_start {
295            let trimmed = line.trim();
296            if trimmed == delimiter {
297                break;
298            }
299            description_lines.push(*line);
300        }
301    }
302
303    if description_lines.is_empty() {
304        None
305    } else {
306        Some(description_lines.join("\n").trim().to_string())
307    }
308}
309
310/// Extract authors (can be single or multiple)
311fn extract_authors(content: &str) -> Vec<(String, Option<String>)> {
312    let mut authors = Vec::new();
313
314    for line in content.lines() {
315        let cleaned_line = pre_process(line);
316        if let Some(value) = AUTHOR_PATTERN
317            .captures(&cleaned_line)
318            .and_then(|caps| caps.get(1))
319        {
320            let value_str = value.as_str();
321
322            if value_str.contains("=>") {
323                for part in value_str.split(',') {
324                    if let Some((name, email)) = parse_author_hash_entry(part) {
325                        authors.push((name, Some(email)));
326                    }
327                }
328            } else {
329                let cleaned = clean_string(value_str);
330                let (name, email) = parse_author_string(&cleaned);
331                authors.push((name, email));
332            }
333        }
334    }
335
336    authors
337}
338
339fn extract_source_url(content: &str) -> Option<String> {
340    for line in content.lines() {
341        let cleaned_line = pre_process(line);
342        let Some(value) = SOURCE_PATTERN
343            .captures(&cleaned_line)
344            .and_then(|caps| caps.get(1))
345            .map(|m| m.as_str())
346        else {
347            continue;
348        };
349
350        if let Some(caps) = SOURCE_GIT_PATTERN.captures(value)
351            && let Some(url) = caps.get(1)
352        {
353            return Some(clean_string(url.as_str()));
354        }
355
356        if let Some(caps) = SOURCE_HTTP_PATTERN.captures(value)
357            && let Some(url) = caps.get(1)
358        {
359            return Some(clean_string(url.as_str()));
360        }
361
362        return Some(clean_string(value));
363    }
364
365    None
366}
367
368/// Parse author from hash entry format: "Name" => "email"
369fn parse_author_hash_entry(entry: &str) -> Option<(String, String)> {
370    let parts: Vec<&str> = entry.split("=>").collect();
371    if parts.len() == 2 {
372        let name = clean_string(parts[0].trim())
373            .trim()
374            .trim_matches(['\'', '"'])
375            .to_string();
376        let email = clean_string(parts[1].trim())
377            .trim()
378            .trim_matches(['\'', '"'])
379            .to_string();
380        Some((name, email))
381    } else {
382        None
383    }
384}
385
386/// Parse author from string, extracting email if present
387fn parse_author_string(author: &str) -> (String, Option<String>) {
388    if let Some(email_start) = author.find('<')
389        && let Some(email_end) = author.find('>')
390    {
391        let name = author[..email_start].trim().to_string();
392        let email = author[email_start + 1..email_end].trim().to_string();
393        return (name, Some(email));
394    }
395    (author.to_string(), None)
396}
397
398/// Extract dependencies from podspec
399fn extract_dependencies(content: &str) -> Vec<Dependency> {
400    let mut dependencies = Vec::new();
401
402    for line in content.lines() {
403        let cleaned_line = pre_process(line);
404        if let Some(caps) = DEPENDENCY_PATTERN.captures(&cleaned_line) {
405            let method = caps.get(0).map(|m| m.as_str()).unwrap_or("");
406            let name = caps.get(1).map(|m| m.as_str()).unwrap_or("");
407            let version_req = caps.get(2).map(|m| clean_string(m.as_str()));
408
409            if let Some(dep) = create_dependency(name, version_req, method) {
410                dependencies.push(dep);
411            }
412        }
413    }
414
415    dependencies
416}
417
418/// Create a Dependency from name and version requirement
419fn create_dependency(name: &str, version_req: Option<String>, method: &str) -> Option<Dependency> {
420    if name.is_empty() {
421        return None;
422    }
423
424    let purl = PackageUrl::new("cocoapods", name).ok()?;
425
426    // Determine if version is pinned (exact version)
427    let is_pinned = version_req
428        .as_ref()
429        .map(|v| !v.contains(&['~', '>', '<', '='][..]))
430        .unwrap_or(false);
431
432    let is_development = method.contains("add_development_dependency");
433
434    Some(Dependency {
435        purl: Some(purl.to_string()),
436        extracted_requirement: version_req,
437        scope: Some(
438            if is_development {
439                "development"
440            } else {
441                "runtime"
442            }
443            .to_string(),
444        ),
445        is_runtime: Some(!is_development),
446        is_optional: Some(is_development),
447        is_pinned: Some(is_pinned),
448        is_direct: Some(true),
449        resolved_package: None,
450        extra_data: None,
451    })
452}
453
454/// Pre-process a line by removing comments and trimming
455fn pre_process(line: &str) -> String {
456    let line = if let Some(comment_pos) = line.find('#') {
457        &line[..comment_pos]
458    } else {
459        line
460    };
461    line.trim().to_string()
462}
463
464/// Clean a string value by removing quotes and special characters
465fn clean_string(s: &str) -> String {
466    let after_removing_special_patterns = s.trim().replace("%q", "").replace(".freeze", "");
467
468    after_removing_special_patterns
469        .trim_matches(|c| {
470            c == '\''
471                || c == '"'
472                || c == '{'
473                || c == '}'
474                || c == '['
475                || c == ']'
476                || c == '<'
477                || c == '>'
478        })
479        .trim()
480        .to_string()
481}
482
483fn get_repo_base_url(vcs_url: &str) -> Option<String> {
484    if vcs_url.is_empty() {
485        return None;
486    }
487
488    if vcs_url.ends_with(".git") {
489        Some(vcs_url.trim_end_matches(".git").to_string())
490    } else {
491        Some(vcs_url.to_string())
492    }
493}
494
495fn get_hashed_path(name: &str) -> Option<String> {
496    if name.is_empty() {
497        return None;
498    }
499
500    let mut hasher = Md5::new();
501    hasher.update(name.as_bytes());
502    let hash_str = format!("{:x}", hasher.finalize());
503
504    Some(format!(
505        "{}/{}/{}",
506        &hash_str[0..1],
507        &hash_str[1..2],
508        &hash_str[2..3]
509    ))
510}
511
512crate::register_parser!(
513    "CocoaPods podspec file",
514    &["**/*.podspec"],
515    "cocoapods",
516    "Objective-C",
517    Some("https://guides.cocoapods.org/syntax/podspec.html"),
518);
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523
524    #[test]
525    fn test_is_match() {
526        assert!(PodspecParser::is_match(Path::new("AFNetworking.podspec")));
527        assert!(PodspecParser::is_match(Path::new("project/MyLib.podspec")));
528        assert!(!PodspecParser::is_match(Path::new(
529            "AFNetworking.podspec.json"
530        )));
531        assert!(!PodspecParser::is_match(Path::new("Podfile")));
532        assert!(!PodspecParser::is_match(Path::new("Podfile.lock")));
533    }
534
535    #[test]
536    fn test_clean_string() {
537        assert_eq!(clean_string("'AFNetworking'"), "AFNetworking");
538        assert_eq!(clean_string("\"AFNetworking\""), "AFNetworking");
539        assert_eq!(clean_string("'test'.freeze"), "test");
540        assert_eq!(clean_string("%q{test}"), "test");
541    }
542
543    #[test]
544    fn test_extract_simple_field() {
545        let content = r#"
546Pod::Spec.new do |s|
547  s.name = "AFNetworking"
548  s.version = "4.0.1"
549end
550"#;
551        assert_eq!(
552            extract_field(content, &NAME_PATTERN),
553            Some("AFNetworking".to_string())
554        );
555        assert_eq!(
556            extract_field(content, &VERSION_PATTERN),
557            Some("4.0.1".to_string())
558        );
559    }
560
561    #[test]
562    fn test_extract_multiline_description() {
563        let content = r#"
564Pod::Spec.new do |s|
565  s.description = <<-DESC
566    A delightful networking library.
567    Features include:
568    - Modern API
569  DESC
570end
571"#;
572        let desc = extract_description(content);
573        assert!(desc.is_some());
574        let desc_text = desc.unwrap();
575        assert!(desc_text.contains("delightful networking"));
576        assert!(desc_text.contains("Modern API"));
577    }
578
579    #[test]
580    fn test_extract_dependency() {
581        let content = r#"
582Pod::Spec.new do |s|
583  s.dependency "AFNetworking", "~> 4.0"
584  s.dependency "Alamofire"
585end
586"#;
587        let deps = extract_dependencies(content);
588        assert_eq!(deps.len(), 2);
589
590        assert_eq!(deps[0].purl, Some("pkg:cocoapods/AFNetworking".to_string()));
591        assert_eq!(deps[0].extracted_requirement, Some("~> 4.0".to_string()));
592        assert_eq!(deps[0].is_pinned, Some(false)); // Contains ~
593
594        assert_eq!(deps[1].purl, Some("pkg:cocoapods/Alamofire".to_string()));
595        assert_eq!(deps[1].extracted_requirement, None);
596    }
597
598    #[test]
599    fn test_extract_runtime_and_development_dependency_scopes() {
600        let content = r#"
601Pod::Spec.new do |s|
602  s.add_dependency 'AFNetworking', '~> 4.0'
603  s.add_runtime_dependency 'Alamofire', '~> 5.0'
604  s.add_development_dependency 'Quick', '~> 7.0'
605end
606"#;
607
608        let deps = extract_dependencies(content);
609        assert_eq!(deps.len(), 3);
610
611        assert_eq!(deps[0].scope.as_deref(), Some("runtime"));
612        assert_eq!(deps[0].is_runtime, Some(true));
613        assert_eq!(deps[0].is_optional, Some(false));
614
615        assert_eq!(deps[1].scope.as_deref(), Some("runtime"));
616        assert_eq!(deps[1].is_runtime, Some(true));
617        assert_eq!(deps[1].is_optional, Some(false));
618
619        assert_eq!(deps[2].scope.as_deref(), Some("development"));
620        assert_eq!(deps[2].is_runtime, Some(false));
621        assert_eq!(deps[2].is_optional, Some(true));
622    }
623
624    #[test]
625    fn test_parse_author_string() {
626        assert_eq!(
627            parse_author_string("John Doe <john@example.com>"),
628            ("John Doe".to_string(), Some("john@example.com".to_string()))
629        );
630        assert_eq!(
631            parse_author_string("Jane Smith"),
632            ("Jane Smith".to_string(), None)
633        );
634    }
635}