Skip to main content

provenant/parsers/
podspec_json.rs

1//! Parser for CocoaPods .podspec.json manifests.
2//!
3//! Extracts package metadata and dependencies from .podspec.json files used by
4//! CocoaPods for iOS/macOS package management.
5//!
6//! # Supported Formats
7//! - *.podspec.json (CocoaPods manifest JSON format)
8//!
9//! # Key Features
10//! - Dependency extraction from dependencies dictionary
11//! - License handling (both string and dict formats with "type" and "text" keys)
12//! - VCS and download URL extraction from source field
13//! - Author/party information parsing
14//! - Full JSON storage in extra_data
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Handles license as both string and dict (joins dict values)
19//! - Extracts dependencies from dict (key=name, value=version requirement)
20//! - All dependencies have scope="dependencies" and is_runtime=true
21//! - Source dict stored in extra_data["source"]
22
23use std::collections::HashMap;
24use std::fs::File;
25use std::io::Read;
26use std::path::Path;
27
28use crate::parser_warn as warn;
29use packageurl::PackageUrl;
30use serde_json::Value;
31
32use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
33
34use super::PackageParser;
35use super::license_normalization::normalize_spdx_declared_license;
36
37const FIELD_NAME: &str = "name";
38const FIELD_VERSION: &str = "version";
39const FIELD_SUMMARY: &str = "summary";
40const FIELD_DESCRIPTION: &str = "description";
41const FIELD_HOMEPAGE: &str = "homepage";
42const FIELD_LICENSE: &str = "license";
43const FIELD_SOURCE: &str = "source";
44const FIELD_AUTHORS: &str = "authors";
45const FIELD_DEPENDENCIES: &str = "dependencies";
46
47const PRIMARY_LANGUAGE: &str = "Objective-C";
48
49/// CocoaPods .podspec.json parser.
50///
51/// Parses .podspec.json manifest files from CocoaPods ecosystem.
52pub struct PodspecJsonParser;
53
54impl PackageParser for PodspecJsonParser {
55    const PACKAGE_TYPE: PackageType = PackageType::Cocoapods;
56
57    fn extract_packages(path: &Path) -> Vec<PackageData> {
58        let json_content = match read_json_file(path) {
59            Ok(content) => content,
60            Err(e) => {
61                warn!("Failed to read .podspec.json at {:?}: {}", path, e);
62                return vec![default_package_data()];
63            }
64        };
65
66        let name = json_content
67            .get(FIELD_NAME)
68            .and_then(|v| v.as_str())
69            .map(|s| s.trim().to_string())
70            .filter(|s| !s.is_empty());
71
72        let version = json_content
73            .get(FIELD_VERSION)
74            .and_then(|v| v.as_str())
75            .map(|s| s.trim().to_string())
76            .filter(|s| !s.is_empty());
77
78        let summary = json_content
79            .get(FIELD_SUMMARY)
80            .and_then(|v| v.as_str())
81            .map(|s| s.trim().to_string())
82            .filter(|s| !s.is_empty());
83
84        let mut description = json_content
85            .get(FIELD_DESCRIPTION)
86            .and_then(|v| v.as_str())
87            .map(|s| s.trim().to_string())
88            .filter(|s| !s.is_empty());
89
90        // If summary exists and description doesn't start with summary, prepend it
91        if let (Some(summary_text), Some(desc_text)) = (&summary, &description) {
92            if !desc_text.starts_with(summary_text) {
93                description = Some(format!("{}. {}", summary_text, desc_text));
94            }
95        } else if summary.is_some() && description.is_none() {
96            description = summary.clone();
97        }
98
99        let homepage_url = json_content
100            .get(FIELD_HOMEPAGE)
101            .and_then(|v| v.as_str())
102            .map(|s| s.trim().to_string())
103            .filter(|s| !s.is_empty());
104
105        let extracted_license_statement = extract_license_statement(&json_content);
106        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
107            normalize_podspec_json_declared_license(
108                &json_content,
109                extracted_license_statement.as_deref(),
110            );
111
112        let (vcs_url, download_url) = extract_source_urls(&json_content);
113
114        let parties = extract_parties(&json_content);
115
116        let dependencies = extract_dependencies(&json_content);
117
118        let mut extra_data = HashMap::new();
119
120        // Store source dict in extra_data
121        if let Some(source) = json_content.get(FIELD_SOURCE) {
122            extra_data.insert("source".to_string(), source.clone());
123        }
124
125        // Store dependencies dict in extra_data if present
126        if let Some(deps) = json_content.get(FIELD_DEPENDENCIES)
127            && let Some(obj) = deps.as_object()
128            && !obj.is_empty()
129        {
130            extra_data.insert(FIELD_DEPENDENCIES.to_string(), deps.clone());
131        }
132
133        if let Some(license_file) = json_content
134            .get(FIELD_LICENSE)
135            .and_then(|license| license.as_object())
136            .and_then(|license| license.get("file"))
137            .and_then(|value| value.as_str())
138            .filter(|value| !value.trim().is_empty())
139        {
140            extra_data.insert(
141                "license_file".to_string(),
142                Value::String(license_file.trim().to_string()),
143            );
144        }
145
146        // Store full JSON in extra_data
147        extra_data.insert("podspec.json".to_string(), json_content.clone());
148
149        let extra_data = if extra_data.is_empty() {
150            None
151        } else {
152            Some(extra_data)
153        };
154
155        // Generate URLs using CocoaPods patterns
156        let repository_homepage_url = name
157            .as_ref()
158            .map(|n| format!("https://cocoapods.org/pods/{}", n));
159        let repository_download_url =
160            if let (Some(_name_str), Some(version_str)) = (&name, &version) {
161                if let Some(homepage) = &homepage_url {
162                    Some(format!("{}/archive/{}.zip", homepage, version_str))
163                } else if let Some(vcs) = &vcs_url {
164                    let repo_base = get_repo_base_url(vcs);
165                    repo_base.map(|base| format!("{}/archive/refs/tags/{}.zip", base, version_str))
166                } else {
167                    None
168                }
169            } else {
170                None
171            };
172
173        let code_view_url = if let (Some(vcs), Some(version_str)) = (&vcs_url, &version) {
174            let repo_base = get_repo_base_url(vcs);
175            repo_base.map(|base| format!("{}/tree/{}", base, version_str))
176        } else {
177            None
178        };
179
180        let bug_tracking_url = vcs_url.as_ref().and_then(|vcs| {
181            let repo_base = get_repo_base_url(vcs);
182            repo_base.map(|base| format!("{}/issues/", base))
183        });
184
185        let api_data_url = if let (Some(name_str), Some(version_str)) = (&name, &version) {
186            get_hashed_path(name_str).map(|hashed| {
187                format!(
188                    "https://raw.githubusercontent.com/CocoaPods/Specs/blob/master/Specs/{}/{}/{}/{}.podspec.json",
189                    hashed, name_str, version_str, name_str
190                )
191            })
192        } else {
193            None
194        };
195
196        let purl = if let Some(name_str) = &name {
197            let mut purl = PackageUrl::new(Self::PACKAGE_TYPE.as_str(), name_str)
198                .unwrap_or_else(|_| PackageUrl::new("generic", name_str).unwrap());
199            if let Some(version_str) = &version {
200                let _ = purl.with_version(version_str);
201            }
202            Some(purl.to_string())
203        } else {
204            None
205        };
206
207        vec![PackageData {
208            package_type: Some(Self::PACKAGE_TYPE),
209            namespace: None,
210            name: name.clone(),
211            version: version.clone(),
212            qualifiers: None,
213            subpath: None,
214            primary_language: Some(PRIMARY_LANGUAGE.to_string()),
215            description,
216            release_date: None,
217            parties,
218            keywords: Vec::new(),
219            homepage_url,
220            download_url,
221            size: None,
222            sha1: None,
223            md5: None,
224            sha256: None,
225            sha512: None,
226            bug_tracking_url,
227            code_view_url,
228            vcs_url,
229            copyright: None,
230            holder: None,
231            declared_license_expression,
232            declared_license_expression_spdx,
233            license_detections,
234            other_license_expression: None,
235            other_license_expression_spdx: None,
236            other_license_detections: Vec::new(),
237            extracted_license_statement,
238            notice_text: None,
239            source_packages: Vec::new(),
240            file_references: Vec::new(),
241            is_private: false,
242            is_virtual: false,
243            extra_data,
244            dependencies,
245            repository_homepage_url,
246            repository_download_url,
247            api_data_url,
248            datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
249            purl,
250        }]
251    }
252
253    fn is_match(path: &Path) -> bool {
254        path.file_name()
255            .and_then(|name| name.to_str())
256            .is_some_and(|name| name.ends_with(".podspec.json"))
257    }
258}
259
260/// Reads and parses a JSON file.
261fn read_json_file(path: &Path) -> Result<Value, String> {
262    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
263    let mut contents = String::new();
264    file.read_to_string(&mut contents)
265        .map_err(|e| format!("Failed to read file: {}", e))?;
266    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
267}
268
269/// Returns a default empty PackageData.
270fn default_package_data() -> PackageData {
271    PackageData {
272        package_type: Some(PodspecJsonParser::PACKAGE_TYPE),
273        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
274        datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
275        ..Default::default()
276    }
277}
278
279/// Extracts license statement from JSON.
280/// Handles both string and dict formats.
281fn extract_license_statement(json: &Value) -> Option<String> {
282    json.get(FIELD_LICENSE).and_then(|lic| {
283        if let Some(lic_str) = lic.as_str() {
284            Some(lic_str.trim().to_string())
285        } else if let Some(lic_obj) = lic.as_object() {
286            // If license is a dict, join all values with space
287            let values: Vec<String> = lic_obj
288                .values()
289                .filter_map(|v| v.as_str())
290                .map(|s| s.trim().to_string())
291                .filter(|s| !s.is_empty())
292                .collect();
293            if values.is_empty() {
294                None
295            } else {
296                Some(values.join(" "))
297            }
298        } else {
299            None
300        }
301    })
302}
303
304fn normalize_podspec_json_declared_license(
305    json: &Value,
306    extracted_license_statement: Option<&str>,
307) -> (
308    Option<String>,
309    Option<String>,
310    Vec<crate::models::LicenseDetection>,
311) {
312    let normalized_candidate = json
313        .get(FIELD_LICENSE)
314        .and_then(|license| {
315            license
316                .as_str()
317                .map(str::trim)
318                .filter(|value| !value.is_empty())
319                .map(canonicalize_cocoapods_license_type)
320                .or_else(|| {
321                    license
322                        .as_object()
323                        .and_then(|obj| obj.get("type"))
324                        .and_then(|value| value.as_str())
325                        .map(str::trim)
326                        .filter(|value| !value.is_empty())
327                        .map(canonicalize_cocoapods_license_type)
328                })
329        })
330        .or_else(|| extracted_license_statement.map(canonicalize_cocoapods_license_type));
331
332    normalize_spdx_declared_license(normalized_candidate.as_deref())
333}
334
335fn canonicalize_cocoapods_license_type(value: &str) -> String {
336    match value.trim() {
337        "Apache License, Version 2.0" => "Apache-2.0".to_string(),
338        other => other.to_string(),
339    }
340}
341
342/// Extracts VCS URL and download URL from source field.
343fn extract_source_urls(json: &Value) -> (Option<String>, Option<String>) {
344    let mut vcs_url = None;
345    let mut download_url = None;
346
347    if let Some(source) = json.get(FIELD_SOURCE) {
348        if let Some(source_obj) = source.as_object() {
349            // Git URL takes precedence for vcs_url
350            if let Some(git_url) = source_obj.get("git").and_then(|v| v.as_str()) {
351                let git_str = git_url.trim().to_string();
352                if !git_str.is_empty() {
353                    vcs_url = Some(git_str);
354                }
355            }
356
357            // HTTP URL is download_url
358            if let Some(http_url) = source_obj.get("http").and_then(|v| v.as_str()) {
359                let http_str = http_url.trim().to_string();
360                if !http_str.is_empty() {
361                    download_url = Some(http_str);
362                }
363            }
364        } else if let Some(source_str) = source.as_str() {
365            // If source is a string, use as vcs_url
366            let source_trimmed = source_str.trim().to_string();
367            if !source_trimmed.is_empty() {
368                vcs_url = Some(source_trimmed);
369            }
370        }
371    }
372
373    (vcs_url, download_url)
374}
375
376/// Extracts party information from authors field.
377fn extract_parties(json: &Value) -> Vec<Party> {
378    let mut parties = Vec::new();
379
380    if let Some(authors) = json.get(FIELD_AUTHORS) {
381        if let Some(authors_obj) = authors.as_object() {
382            // Authors as dict: key=name, value=url
383            for (name, value) in authors_obj {
384                let name_str = name.trim().to_string();
385                if !name_str.is_empty() {
386                    let url = value.as_str().and_then(|s| {
387                        let trimmed = s.trim();
388                        // Python reference adds ".com" suffix if URL doesn't have it
389                        if trimmed.is_empty() {
390                            None
391                        } else if trimmed.contains("://") || trimmed.contains('.') {
392                            Some(trimmed.to_string())
393                        } else {
394                            Some(format!("{}.com", trimmed))
395                        }
396                    });
397
398                    parties.push(Party {
399                        r#type: Some("organization".to_string()),
400                        role: Some("owner".to_string()),
401                        name: Some(name_str),
402                        email: None,
403                        url,
404                        organization: None,
405                        organization_url: None,
406                        timezone: None,
407                    });
408                }
409            }
410        } else if let Some(authors_str) = authors.as_str() {
411            // Authors as string
412            let authors_trimmed = authors_str.trim().to_string();
413            if !authors_trimmed.is_empty() {
414                parties.push(Party {
415                    r#type: Some("organization".to_string()),
416                    role: Some("owner".to_string()),
417                    name: Some(authors_trimmed),
418                    email: None,
419                    url: None,
420                    organization: None,
421                    organization_url: None,
422                    timezone: None,
423                });
424            }
425        }
426    }
427
428    parties
429}
430
431/// Extracts dependencies from dependencies dict.
432fn extract_dependencies(json: &Value) -> Vec<Dependency> {
433    let mut dependencies = Vec::new();
434
435    if let Some(deps) = json.get(FIELD_DEPENDENCIES)
436        && let Some(deps_obj) = deps.as_object()
437    {
438        for (name, requirement) in deps_obj {
439            let name_str = name.trim();
440            if name_str.is_empty() {
441                continue;
442            }
443
444            let requirement_str = requirement
445                .as_str()
446                .map(|s| s.trim().to_string())
447                .filter(|s| !s.is_empty());
448
449            let purl = Some(format!("pkg:cocoapods/{}", name_str));
450
451            dependencies.push(Dependency {
452                purl,
453                extracted_requirement: requirement_str,
454                scope: Some("runtime".to_string()),
455                is_runtime: Some(true),
456                is_optional: Some(false),
457                is_pinned: None,
458                is_direct: None,
459                resolved_package: None,
460                extra_data: None,
461            });
462        }
463    }
464
465    dependencies
466}
467
468/// Gets the repository base URL from a VCS URL by removing .git suffix.
469fn get_repo_base_url(vcs_url: &str) -> Option<String> {
470    if vcs_url.is_empty() {
471        return None;
472    }
473
474    if vcs_url.ends_with(".git") {
475        Some(vcs_url.trim_end_matches(".git").to_string())
476    } else {
477        Some(vcs_url.to_string())
478    }
479}
480
481/// Computes the hashed path prefix for CocoaPods Specs repository.
482///
483/// Uses MD5 hash of package name to generate the path prefix (first 3 chars).
484fn get_hashed_path(name: &str) -> Option<String> {
485    use md5::{Digest, Md5};
486
487    if name.is_empty() {
488        return None;
489    }
490
491    // Compute MD5 hash
492    let mut hasher = Md5::new();
493    hasher.update(name.as_bytes());
494    let result = hasher.finalize();
495    let hash_str = hex::encode(result);
496
497    if hash_str.len() >= 3 {
498        Some(format!(
499            "{}/{}/{}",
500            &hash_str[0..1],
501            &hash_str[1..2],
502            &hash_str[2..3]
503        ))
504    } else {
505        Some(hash_str)
506    }
507}
508
509crate::register_parser!(
510    "CocoaPods .podspec.json manifest",
511    &["**/*.podspec.json"],
512    "cocoapods",
513    "Objective-C",
514    Some("https://guides.cocoapods.org/syntax/podspec.html"),
515);