Skip to main content

provenant/parsers/
podspec_json.rs

1//! Parser for CocoaPods .podspec.json manifests.
2//!
3//! Extracts package metadata and dependencies from .podspec.json files used by
4//! CocoaPods for iOS/macOS package management.
5//!
6//! # Supported Formats
7//! - *.podspec.json (CocoaPods manifest JSON format)
8//!
9//! # Key Features
10//! - Dependency extraction from dependencies dictionary
11//! - License handling (both string and dict formats with "type" and "text" keys)
12//! - VCS and download URL extraction from source field
13//! - Author/party information parsing
14//! - Full JSON storage in extra_data
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Handles license as both string and dict (joins dict values)
19//! - Extracts dependencies from dict (key=name, value=version requirement)
20//! - All dependencies have scope="dependencies" and is_runtime=true
21//! - Source dict stored in extra_data["source"]
22
23use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
28use packageurl::PackageUrl;
29use serde_json::Value;
30
31use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
32
33use super::PackageParser;
34use super::license_normalization::normalize_spdx_declared_license;
35
36const FIELD_NAME: &str = "name";
37const FIELD_VERSION: &str = "version";
38const FIELD_SUMMARY: &str = "summary";
39const FIELD_DESCRIPTION: &str = "description";
40const FIELD_HOMEPAGE: &str = "homepage";
41const FIELD_LICENSE: &str = "license";
42const FIELD_SOURCE: &str = "source";
43const FIELD_AUTHORS: &str = "authors";
44const FIELD_DEPENDENCIES: &str = "dependencies";
45
46const PRIMARY_LANGUAGE: &str = "Objective-C";
47
48/// CocoaPods .podspec.json parser.
49///
50/// Parses .podspec.json manifest files from CocoaPods ecosystem.
51pub struct PodspecJsonParser;
52
53impl PackageParser for PodspecJsonParser {
54    const PACKAGE_TYPE: PackageType = PackageType::Cocoapods;
55
56    fn extract_packages(path: &Path) -> Vec<PackageData> {
57        let json_content = match read_json_file(path) {
58            Ok(content) => content,
59            Err(e) => {
60                warn!("Failed to read .podspec.json at {:?}: {}", path, e);
61                return vec![default_package_data()];
62            }
63        };
64
65        let name = json_content
66            .get(FIELD_NAME)
67            .and_then(|v| v.as_str())
68            .map(|s| truncate_field(s.trim().to_string()))
69            .filter(|s| !s.is_empty());
70
71        let version = json_content
72            .get(FIELD_VERSION)
73            .and_then(|v| v.as_str())
74            .map(|s| truncate_field(s.trim().to_string()))
75            .filter(|s| !s.is_empty());
76
77        let summary = json_content
78            .get(FIELD_SUMMARY)
79            .and_then(|v| v.as_str())
80            .map(|s| truncate_field(s.trim().to_string()))
81            .filter(|s| !s.is_empty());
82
83        let mut description = json_content
84            .get(FIELD_DESCRIPTION)
85            .and_then(|v| v.as_str())
86            .map(|s| truncate_field(s.trim().to_string()))
87            .filter(|s| !s.is_empty());
88
89        // If summary exists and description doesn't start with summary, prepend it
90        if let (Some(summary_text), Some(desc_text)) = (&summary, &description) {
91            if !desc_text.starts_with(summary_text) {
92                description = Some(format!("{}. {}", summary_text, desc_text));
93            }
94        } else if summary.is_some() && description.is_none() {
95            description = summary.clone();
96        }
97
98        let homepage_url = json_content
99            .get(FIELD_HOMEPAGE)
100            .and_then(|v| v.as_str())
101            .map(|s| truncate_field(s.trim().to_string()))
102            .filter(|s| !s.is_empty());
103
104        let extracted_license_statement = extract_license_statement(&json_content);
105        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
106            normalize_podspec_json_declared_license(
107                &json_content,
108                extracted_license_statement.as_deref(),
109            );
110
111        let (vcs_url, download_url) = extract_source_urls(&json_content);
112
113        let parties = extract_parties(&json_content);
114
115        let dependencies = extract_dependencies(&json_content);
116
117        let mut extra_data = HashMap::new();
118
119        // Store source dict in extra_data
120        if let Some(source) = json_content.get(FIELD_SOURCE) {
121            extra_data.insert("source".to_string(), source.clone());
122        }
123
124        // Store dependencies dict in extra_data if present
125        if let Some(deps) = json_content.get(FIELD_DEPENDENCIES)
126            && let Some(obj) = deps.as_object()
127            && !obj.is_empty()
128        {
129            extra_data.insert(FIELD_DEPENDENCIES.to_string(), deps.clone());
130        }
131
132        if let Some(license_file) = json_content
133            .get(FIELD_LICENSE)
134            .and_then(|license| license.as_object())
135            .and_then(|license| license.get("file"))
136            .and_then(|value| value.as_str())
137            .filter(|value| !value.trim().is_empty())
138        {
139            extra_data.insert(
140                "license_file".to_string(),
141                Value::String(license_file.trim().to_string()),
142            );
143        }
144
145        let raw_json = serde_json::to_string(&json_content).unwrap_or_default();
146        if raw_json.len() <= 10 * 1024 * 1024 {
147            extra_data.insert("podspec.json".to_string(), json_content.clone());
148        } else {
149            warn!(
150                "Skipping podspec.json extra_data entry: serialized size {} bytes exceeds 10MB limit",
151                raw_json.len()
152            );
153        }
154
155        let extra_data = if extra_data.is_empty() {
156            None
157        } else {
158            Some(extra_data)
159        };
160
161        // Generate URLs using CocoaPods patterns
162        let repository_homepage_url = name
163            .as_ref()
164            .map(|n| format!("https://cocoapods.org/pods/{}", n));
165        let repository_download_url =
166            if let (Some(_name_str), Some(version_str)) = (&name, &version) {
167                if let Some(homepage) = &homepage_url {
168                    Some(format!("{}/archive/{}.zip", homepage, version_str))
169                } else if let Some(vcs) = &vcs_url {
170                    let repo_base = get_repo_base_url(vcs);
171                    repo_base.map(|base| format!("{}/archive/refs/tags/{}.zip", base, version_str))
172                } else {
173                    None
174                }
175            } else {
176                None
177            };
178
179        let code_view_url = if let (Some(vcs), Some(version_str)) = (&vcs_url, &version) {
180            let repo_base = get_repo_base_url(vcs);
181            repo_base.map(|base| format!("{}/tree/{}", base, version_str))
182        } else {
183            None
184        };
185
186        let bug_tracking_url = vcs_url.as_ref().and_then(|vcs| {
187            let repo_base = get_repo_base_url(vcs);
188            repo_base.map(|base| format!("{}/issues/", base))
189        });
190
191        let api_data_url = if let (Some(name_str), Some(version_str)) = (&name, &version) {
192            get_hashed_path(name_str).map(|hashed| {
193                format!(
194                    "https://raw.githubusercontent.com/CocoaPods/Specs/blob/master/Specs/{}/{}/{}/{}.podspec.json",
195                    hashed, name_str, version_str, name_str
196                )
197            })
198        } else {
199            None
200        };
201
202        let purl = if let Some(name_str) = &name {
203            let purl = PackageUrl::new(Self::PACKAGE_TYPE.as_str(), name_str)
204                .or_else(|_| PackageUrl::new("generic", name_str))
205                .ok();
206            purl.map(|mut p| {
207                if let Some(version_str) = &version {
208                    let _ = p.with_version(version_str);
209                }
210                p.to_string()
211            })
212        } else {
213            None
214        };
215
216        vec![PackageData {
217            package_type: Some(Self::PACKAGE_TYPE),
218            namespace: None,
219            name: name.clone(),
220            version: version.clone(),
221            qualifiers: None,
222            subpath: None,
223            primary_language: Some(PRIMARY_LANGUAGE.to_string()),
224            description,
225            release_date: None,
226            parties,
227            keywords: Vec::new(),
228            homepage_url,
229            download_url,
230            size: None,
231            sha1: None,
232            md5: None,
233            sha256: None,
234            sha512: None,
235            bug_tracking_url,
236            code_view_url,
237            vcs_url,
238            copyright: None,
239            holder: None,
240            declared_license_expression,
241            declared_license_expression_spdx,
242            license_detections,
243            other_license_expression: None,
244            other_license_expression_spdx: None,
245            other_license_detections: Vec::new(),
246            extracted_license_statement,
247            notice_text: None,
248            source_packages: Vec::new(),
249            file_references: Vec::new(),
250            is_private: false,
251            is_virtual: false,
252            extra_data,
253            dependencies,
254            repository_homepage_url,
255            repository_download_url,
256            api_data_url,
257            datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
258            purl,
259        }]
260    }
261
262    fn is_match(path: &Path) -> bool {
263        path.file_name()
264            .and_then(|name| name.to_str())
265            .is_some_and(|name| name.ends_with(".podspec.json"))
266    }
267}
268
269fn read_json_file(path: &Path) -> Result<Value, String> {
270    let contents = read_file_to_string(path, None).map_err(|e| e.to_string())?;
271    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
272}
273
274/// Returns a default empty PackageData.
275fn default_package_data() -> PackageData {
276    PackageData {
277        package_type: Some(PodspecJsonParser::PACKAGE_TYPE),
278        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
279        datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
280        ..Default::default()
281    }
282}
283
284/// Extracts license statement from JSON.
285/// Handles both string and dict formats.
286fn extract_license_statement(json: &Value) -> Option<String> {
287    json.get(FIELD_LICENSE).and_then(|lic| {
288        if let Some(lic_str) = lic.as_str() {
289            Some(truncate_field(lic_str.trim().to_string()))
290        } else if let Some(lic_obj) = lic.as_object() {
291            // If license is a dict, join all values with space
292            let values: Vec<String> = lic_obj
293                .values()
294                .filter_map(|v| v.as_str())
295                .map(|s| s.trim().to_string())
296                .filter(|s| !s.is_empty())
297                .collect();
298            if values.is_empty() {
299                None
300            } else {
301                Some(values.join(" "))
302            }
303        } else {
304            None
305        }
306    })
307}
308
309fn normalize_podspec_json_declared_license(
310    json: &Value,
311    extracted_license_statement: Option<&str>,
312) -> (
313    Option<String>,
314    Option<String>,
315    Vec<crate::models::LicenseDetection>,
316) {
317    let normalized_candidate = json
318        .get(FIELD_LICENSE)
319        .and_then(|license| {
320            license
321                .as_str()
322                .map(str::trim)
323                .filter(|value| !value.is_empty())
324                .map(canonicalize_cocoapods_license_type)
325                .or_else(|| {
326                    license
327                        .as_object()
328                        .and_then(|obj| obj.get("type"))
329                        .and_then(|value| value.as_str())
330                        .map(str::trim)
331                        .filter(|value| !value.is_empty())
332                        .map(canonicalize_cocoapods_license_type)
333                })
334        })
335        .or_else(|| extracted_license_statement.map(canonicalize_cocoapods_license_type));
336
337    normalize_spdx_declared_license(normalized_candidate.as_deref())
338}
339
340fn canonicalize_cocoapods_license_type(value: &str) -> String {
341    match value.trim() {
342        "Apache License, Version 2.0" => "Apache-2.0".to_string(),
343        other => other.to_string(),
344    }
345}
346
347/// Extracts VCS URL and download URL from source field.
348fn extract_source_urls(json: &Value) -> (Option<String>, Option<String>) {
349    let mut vcs_url = None;
350    let mut download_url = None;
351
352    if let Some(source) = json.get(FIELD_SOURCE) {
353        if let Some(source_obj) = source.as_object() {
354            // Git URL takes precedence for vcs_url
355            if let Some(git_url) = source_obj.get("git").and_then(|v| v.as_str()) {
356                let git_str = truncate_field(git_url.trim().to_string());
357                if !git_str.is_empty() {
358                    vcs_url = Some(git_str);
359                }
360            }
361
362            // HTTP URL is download_url
363            if let Some(http_url) = source_obj.get("http").and_then(|v| v.as_str()) {
364                let http_str = truncate_field(http_url.trim().to_string());
365                if !http_str.is_empty() {
366                    download_url = Some(http_str);
367                }
368            }
369        } else if let Some(source_str) = source.as_str() {
370            // If source is a string, use as vcs_url
371            let source_trimmed = truncate_field(source_str.trim().to_string());
372            if !source_trimmed.is_empty() {
373                vcs_url = Some(source_trimmed);
374            }
375        }
376    }
377
378    (vcs_url, download_url)
379}
380
381/// Extracts party information from authors field.
382fn extract_parties(json: &Value) -> Vec<Party> {
383    let mut parties = Vec::new();
384
385    if let Some(authors) = json.get(FIELD_AUTHORS) {
386        if let Some(authors_obj) = authors.as_object() {
387            // Authors as dict: key=name, value=url
388            for (name, value) in authors_obj.iter().take(MAX_ITERATION_COUNT) {
389                let name_str = truncate_field(name.trim().to_string());
390                if !name_str.is_empty() {
391                    let url = value.as_str().and_then(|s| {
392                        let trimmed = s.trim();
393                        if trimmed.is_empty() {
394                            None
395                        } else if trimmed.contains("://") || trimmed.contains('.') {
396                            Some(truncate_field(trimmed.to_string()))
397                        } else {
398                            Some(truncate_field(format!("{}.com", trimmed)))
399                        }
400                    });
401
402                    parties.push(Party {
403                        r#type: Some("organization".to_string()),
404                        role: Some("owner".to_string()),
405                        name: Some(name_str),
406                        email: None,
407                        url,
408                        organization: None,
409                        organization_url: None,
410                        timezone: None,
411                    });
412                }
413            }
414        } else if let Some(authors_str) = authors.as_str() {
415            // Authors as string
416            let authors_trimmed = truncate_field(authors_str.trim().to_string());
417            if !authors_trimmed.is_empty() {
418                parties.push(Party {
419                    r#type: Some("organization".to_string()),
420                    role: Some("owner".to_string()),
421                    name: Some(authors_trimmed),
422                    email: None,
423                    url: None,
424                    organization: None,
425                    organization_url: None,
426                    timezone: None,
427                });
428            }
429        }
430    }
431
432    parties
433}
434
435/// Extracts dependencies from dependencies dict.
436fn extract_dependencies(json: &Value) -> Vec<Dependency> {
437    let mut dependencies = Vec::new();
438
439    if let Some(deps) = json.get(FIELD_DEPENDENCIES)
440        && let Some(deps_obj) = deps.as_object()
441    {
442        for (name, requirement) in deps_obj.iter().take(MAX_ITERATION_COUNT) {
443            let name_str = name.trim();
444            if name_str.is_empty() {
445                continue;
446            }
447
448            let requirement_str = requirement
449                .as_str()
450                .map(|s| truncate_field(s.trim().to_string()))
451                .filter(|s| !s.is_empty());
452
453            let purl = Some(truncate_field(format!("pkg:cocoapods/{}", name_str)));
454
455            dependencies.push(Dependency {
456                purl,
457                extracted_requirement: requirement_str,
458                scope: Some("runtime".to_string()),
459                is_runtime: Some(true),
460                is_optional: Some(false),
461                is_pinned: None,
462                is_direct: None,
463                resolved_package: None,
464                extra_data: None,
465            });
466        }
467    }
468
469    dependencies
470}
471
472/// Gets the repository base URL from a VCS URL by removing .git suffix.
473fn get_repo_base_url(vcs_url: &str) -> Option<String> {
474    if vcs_url.is_empty() {
475        return None;
476    }
477
478    if vcs_url.ends_with(".git") {
479        Some(vcs_url.trim_end_matches(".git").to_string())
480    } else {
481        Some(vcs_url.to_string())
482    }
483}
484
485/// Computes the hashed path prefix for CocoaPods Specs repository.
486///
487/// Uses MD5 hash of package name to generate the path prefix (first 3 chars).
488fn get_hashed_path(name: &str) -> Option<String> {
489    use md5::{Digest, Md5};
490
491    if name.is_empty() {
492        return None;
493    }
494
495    // Compute MD5 hash
496    let mut hasher = Md5::new();
497    hasher.update(name.as_bytes());
498    let result = hasher.finalize();
499    let hash_str = hex::encode(result);
500
501    if hash_str.len() >= 3 {
502        Some(format!(
503            "{}/{}/{}",
504            &hash_str[0..1],
505            &hash_str[1..2],
506            &hash_str[2..3]
507        ))
508    } else {
509        Some(hash_str)
510    }
511}
512
513crate::register_parser!(
514    "CocoaPods .podspec.json manifest",
515    &["**/*.podspec.json"],
516    "cocoapods",
517    "Objective-C",
518    Some("https://guides.cocoapods.org/syntax/podspec.html"),
519);