Skip to main content

provenant/parsers/
podspec_json.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for CocoaPods .podspec.json manifests.
5//!
6//! Extracts package metadata and dependencies from .podspec.json files used by
7//! CocoaPods for iOS/macOS package management.
8//!
9//! # Supported Formats
10//! - *.podspec.json (CocoaPods manifest JSON format)
11//!
12//! # Key Features
13//! - Dependency extraction from dependencies dictionary
14//! - License handling (both string and dict formats with "type" and "text" keys)
15//! - VCS and download URL extraction from source field
16//! - Author/party information parsing
17//! - Full JSON storage in extra_data
18//!
19//! # Implementation Notes
20//! - Uses serde_json for JSON parsing
21//! - Handles license as both string and dict (joins dict values)
22//! - Extracts dependencies from dict (key=name, value=version requirement)
23//! - All dependencies have scope="dependencies" and is_runtime=true
24//! - Source dict stored in extra_data["source"]
25
26use std::collections::HashMap;
27use std::path::Path;
28
29use crate::parser_warn as warn;
30use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
31use packageurl::PackageUrl;
32use serde_json::Value;
33
34use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
35
36use super::PackageParser;
37use super::license_normalization::normalize_spdx_declared_license;
38
39const FIELD_NAME: &str = "name";
40const FIELD_VERSION: &str = "version";
41const FIELD_SUMMARY: &str = "summary";
42const FIELD_DESCRIPTION: &str = "description";
43const FIELD_HOMEPAGE: &str = "homepage";
44const FIELD_LICENSE: &str = "license";
45const FIELD_SOURCE: &str = "source";
46const FIELD_AUTHORS: &str = "authors";
47const FIELD_DEPENDENCIES: &str = "dependencies";
48
49const PRIMARY_LANGUAGE: &str = "Objective-C";
50
51/// CocoaPods .podspec.json parser.
52///
53/// Parses .podspec.json manifest files from CocoaPods ecosystem.
54pub struct PodspecJsonParser;
55
56impl PackageParser for PodspecJsonParser {
57    const PACKAGE_TYPE: PackageType = PackageType::Cocoapods;
58
59    fn extract_packages(path: &Path) -> Vec<PackageData> {
60        let json_content = match read_json_file(path) {
61            Ok(content) => content,
62            Err(e) => {
63                warn!("Failed to read .podspec.json at {:?}: {}", path, e);
64                return vec![default_package_data()];
65            }
66        };
67
68        let name = json_content
69            .get(FIELD_NAME)
70            .and_then(|v| v.as_str())
71            .map(|s| truncate_field(s.trim().to_string()))
72            .filter(|s| !s.is_empty());
73
74        let version = json_content
75            .get(FIELD_VERSION)
76            .and_then(|v| v.as_str())
77            .map(|s| truncate_field(s.trim().to_string()))
78            .filter(|s| !s.is_empty());
79
80        let summary = json_content
81            .get(FIELD_SUMMARY)
82            .and_then(|v| v.as_str())
83            .map(|s| truncate_field(s.trim().to_string()))
84            .filter(|s| !s.is_empty());
85
86        let mut description = json_content
87            .get(FIELD_DESCRIPTION)
88            .and_then(|v| v.as_str())
89            .map(|s| truncate_field(s.trim().to_string()))
90            .filter(|s| !s.is_empty());
91
92        // If summary exists and description doesn't start with summary, prepend it
93        if let (Some(summary_text), Some(desc_text)) = (&summary, &description) {
94            if !desc_text.starts_with(summary_text) {
95                description = Some(format!("{}. {}", summary_text, desc_text));
96            }
97        } else if summary.is_some() && description.is_none() {
98            description = summary.clone();
99        }
100
101        let homepage_url = json_content
102            .get(FIELD_HOMEPAGE)
103            .and_then(|v| v.as_str())
104            .map(|s| truncate_field(s.trim().to_string()))
105            .filter(|s| !s.is_empty());
106
107        let extracted_license_statement = extract_license_statement(&json_content);
108        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
109            normalize_podspec_json_declared_license(
110                &json_content,
111                extracted_license_statement.as_deref(),
112            );
113
114        let (vcs_url, download_url) = extract_source_urls(&json_content);
115
116        let parties = extract_parties(&json_content);
117
118        let dependencies = extract_dependencies(&json_content);
119
120        let mut extra_data = HashMap::new();
121
122        // Store source dict in extra_data
123        if let Some(source) = json_content.get(FIELD_SOURCE) {
124            extra_data.insert("source".to_string(), source.clone());
125        }
126
127        // Store dependencies dict in extra_data if present
128        if let Some(deps) = json_content.get(FIELD_DEPENDENCIES)
129            && let Some(obj) = deps.as_object()
130            && !obj.is_empty()
131        {
132            extra_data.insert(FIELD_DEPENDENCIES.to_string(), deps.clone());
133        }
134
135        if let Some(license_file) = json_content
136            .get(FIELD_LICENSE)
137            .and_then(|license| license.as_object())
138            .and_then(|license| license.get("file"))
139            .and_then(|value| value.as_str())
140            .filter(|value| !value.trim().is_empty())
141        {
142            extra_data.insert(
143                "license_file".to_string(),
144                Value::String(license_file.trim().to_string()),
145            );
146        }
147
148        let raw_json = serde_json::to_string(&json_content).unwrap_or_default();
149        if raw_json.len() <= 10 * 1024 * 1024 {
150            extra_data.insert("podspec.json".to_string(), json_content.clone());
151        } else {
152            warn!(
153                "Skipping podspec.json extra_data entry: serialized size {} bytes exceeds 10MB limit",
154                raw_json.len()
155            );
156        }
157
158        let extra_data = if extra_data.is_empty() {
159            None
160        } else {
161            Some(extra_data)
162        };
163
164        // Generate URLs using CocoaPods patterns
165        let repository_homepage_url = name
166            .as_ref()
167            .map(|n| format!("https://cocoapods.org/pods/{}", n));
168        let repository_download_url =
169            if let (Some(_name_str), Some(version_str)) = (&name, &version) {
170                if let Some(homepage) = &homepage_url {
171                    Some(format!("{}/archive/{}.zip", homepage, version_str))
172                } else if let Some(vcs) = &vcs_url {
173                    let repo_base = get_repo_base_url(vcs);
174                    repo_base.map(|base| format!("{}/archive/refs/tags/{}.zip", base, version_str))
175                } else {
176                    None
177                }
178            } else {
179                None
180            };
181
182        let code_view_url = if let (Some(vcs), Some(version_str)) = (&vcs_url, &version) {
183            let repo_base = get_repo_base_url(vcs);
184            repo_base.map(|base| format!("{}/tree/{}", base, version_str))
185        } else {
186            None
187        };
188
189        let bug_tracking_url = vcs_url.as_ref().and_then(|vcs| {
190            let repo_base = get_repo_base_url(vcs);
191            repo_base.map(|base| format!("{}/issues/", base))
192        });
193
194        let api_data_url = if let (Some(name_str), Some(version_str)) = (&name, &version) {
195            get_hashed_path(name_str).map(|hashed| {
196                format!(
197                    "https://raw.githubusercontent.com/CocoaPods/Specs/blob/master/Specs/{}/{}/{}/{}.podspec.json",
198                    hashed, name_str, version_str, name_str
199                )
200            })
201        } else {
202            None
203        };
204
205        let purl = if let Some(name_str) = &name {
206            let purl = PackageUrl::new(Self::PACKAGE_TYPE.as_str(), name_str)
207                .or_else(|_| PackageUrl::new("generic", name_str))
208                .ok();
209            purl.map(|mut p| {
210                if let Some(version_str) = &version {
211                    let _ = p.with_version(version_str);
212                }
213                p.to_string()
214            })
215        } else {
216            None
217        };
218
219        vec![PackageData {
220            package_type: Some(Self::PACKAGE_TYPE),
221            namespace: None,
222            name: name.clone(),
223            version: version.clone(),
224            qualifiers: None,
225            subpath: None,
226            primary_language: Some(PRIMARY_LANGUAGE.to_string()),
227            description,
228            release_date: None,
229            parties,
230            keywords: Vec::new(),
231            homepage_url,
232            download_url,
233            size: None,
234            sha1: None,
235            md5: None,
236            sha256: None,
237            sha512: None,
238            bug_tracking_url,
239            code_view_url,
240            vcs_url,
241            copyright: None,
242            holder: None,
243            declared_license_expression,
244            declared_license_expression_spdx,
245            license_detections,
246            other_license_expression: None,
247            other_license_expression_spdx: None,
248            other_license_detections: Vec::new(),
249            extracted_license_statement,
250            notice_text: None,
251            source_packages: Vec::new(),
252            file_references: Vec::new(),
253            is_private: false,
254            is_virtual: false,
255            extra_data,
256            dependencies,
257            repository_homepage_url,
258            repository_download_url,
259            api_data_url,
260            datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
261            purl,
262        }]
263    }
264
265    fn is_match(path: &Path) -> bool {
266        path.file_name()
267            .and_then(|name| name.to_str())
268            .is_some_and(|name| name.ends_with(".podspec.json"))
269    }
270}
271
272fn read_json_file(path: &Path) -> Result<Value, String> {
273    let contents = read_file_to_string(path, None).map_err(|e| e.to_string())?;
274    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
275}
276
277/// Returns a default empty PackageData.
278fn default_package_data() -> PackageData {
279    PackageData {
280        package_type: Some(PodspecJsonParser::PACKAGE_TYPE),
281        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
282        datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
283        ..Default::default()
284    }
285}
286
287/// Extracts license statement from JSON.
288/// Handles both string and dict formats.
289fn extract_license_statement(json: &Value) -> Option<String> {
290    json.get(FIELD_LICENSE).and_then(|lic| {
291        if let Some(lic_str) = lic.as_str() {
292            Some(truncate_field(lic_str.trim().to_string()))
293        } else if let Some(lic_obj) = lic.as_object() {
294            // If license is a dict, join all values with space
295            let values: Vec<String> = lic_obj
296                .values()
297                .filter_map(|v| v.as_str())
298                .map(|s| s.trim().to_string())
299                .filter(|s| !s.is_empty())
300                .collect();
301            if values.is_empty() {
302                None
303            } else {
304                Some(values.join(" "))
305            }
306        } else {
307            None
308        }
309    })
310}
311
312fn normalize_podspec_json_declared_license(
313    json: &Value,
314    extracted_license_statement: Option<&str>,
315) -> (
316    Option<String>,
317    Option<String>,
318    Vec<crate::models::LicenseDetection>,
319) {
320    let normalized_candidate = json
321        .get(FIELD_LICENSE)
322        .and_then(|license| {
323            license
324                .as_str()
325                .map(str::trim)
326                .filter(|value| !value.is_empty())
327                .map(canonicalize_cocoapods_license_type)
328                .or_else(|| {
329                    license
330                        .as_object()
331                        .and_then(|obj| obj.get("type"))
332                        .and_then(|value| value.as_str())
333                        .map(str::trim)
334                        .filter(|value| !value.is_empty())
335                        .map(canonicalize_cocoapods_license_type)
336                })
337        })
338        .or_else(|| extracted_license_statement.map(canonicalize_cocoapods_license_type));
339
340    normalize_spdx_declared_license(normalized_candidate.as_deref())
341}
342
343fn canonicalize_cocoapods_license_type(value: &str) -> String {
344    match value.trim() {
345        "Apache License, Version 2.0" => "Apache-2.0".to_string(),
346        other => other.to_string(),
347    }
348}
349
350/// Extracts VCS URL and download URL from source field.
351fn extract_source_urls(json: &Value) -> (Option<String>, Option<String>) {
352    let mut vcs_url = None;
353    let mut download_url = None;
354
355    if let Some(source) = json.get(FIELD_SOURCE) {
356        if let Some(source_obj) = source.as_object() {
357            // Git URL takes precedence for vcs_url
358            if let Some(git_url) = source_obj.get("git").and_then(|v| v.as_str()) {
359                let git_str = truncate_field(git_url.trim().to_string());
360                if !git_str.is_empty() {
361                    vcs_url = Some(git_str);
362                }
363            }
364
365            // HTTP URL is download_url
366            if let Some(http_url) = source_obj.get("http").and_then(|v| v.as_str()) {
367                let http_str = truncate_field(http_url.trim().to_string());
368                if !http_str.is_empty() {
369                    download_url = Some(http_str);
370                }
371            }
372        } else if let Some(source_str) = source.as_str() {
373            // If source is a string, use as vcs_url
374            let source_trimmed = truncate_field(source_str.trim().to_string());
375            if !source_trimmed.is_empty() {
376                vcs_url = Some(source_trimmed);
377            }
378        }
379    }
380
381    (vcs_url, download_url)
382}
383
384/// Extracts party information from authors field.
385fn extract_parties(json: &Value) -> Vec<Party> {
386    let mut parties = Vec::new();
387
388    if let Some(authors) = json.get(FIELD_AUTHORS) {
389        if let Some(authors_obj) = authors.as_object() {
390            // Authors as dict: key=name, value=url
391            for (name, value) in authors_obj.iter().take(MAX_ITERATION_COUNT) {
392                let name_str = truncate_field(name.trim().to_string());
393                if !name_str.is_empty() {
394                    let url = value.as_str().and_then(|s| {
395                        let trimmed = s.trim();
396                        if trimmed.is_empty() {
397                            None
398                        } else if trimmed.contains("://") || trimmed.contains('.') {
399                            Some(truncate_field(trimmed.to_string()))
400                        } else {
401                            Some(truncate_field(format!("{}.com", trimmed)))
402                        }
403                    });
404
405                    parties.push(Party {
406                        r#type: Some("organization".to_string()),
407                        role: Some("owner".to_string()),
408                        name: Some(name_str),
409                        email: None,
410                        url,
411                        organization: None,
412                        organization_url: None,
413                        timezone: None,
414                    });
415                }
416            }
417        } else if let Some(authors_str) = authors.as_str() {
418            // Authors as string
419            let authors_trimmed = truncate_field(authors_str.trim().to_string());
420            if !authors_trimmed.is_empty() {
421                parties.push(Party {
422                    r#type: Some("organization".to_string()),
423                    role: Some("owner".to_string()),
424                    name: Some(authors_trimmed),
425                    email: None,
426                    url: None,
427                    organization: None,
428                    organization_url: None,
429                    timezone: None,
430                });
431            }
432        }
433    }
434
435    parties
436}
437
438/// Extracts dependencies from dependencies dict.
439fn extract_dependencies(json: &Value) -> Vec<Dependency> {
440    let mut dependencies = Vec::new();
441
442    if let Some(deps) = json.get(FIELD_DEPENDENCIES)
443        && let Some(deps_obj) = deps.as_object()
444    {
445        for (name, requirement) in deps_obj.iter().take(MAX_ITERATION_COUNT) {
446            let name_str = name.trim();
447            if name_str.is_empty() {
448                continue;
449            }
450
451            let requirement_str = requirement
452                .as_str()
453                .map(|s| truncate_field(s.trim().to_string()))
454                .filter(|s| !s.is_empty());
455
456            let purl = Some(truncate_field(format!("pkg:cocoapods/{}", name_str)));
457
458            dependencies.push(Dependency {
459                purl,
460                extracted_requirement: requirement_str,
461                scope: Some("runtime".to_string()),
462                is_runtime: Some(true),
463                is_optional: Some(false),
464                is_pinned: None,
465                is_direct: None,
466                resolved_package: None,
467                extra_data: None,
468            });
469        }
470    }
471
472    dependencies
473}
474
475/// Gets the repository base URL from a VCS URL by removing .git suffix.
476fn get_repo_base_url(vcs_url: &str) -> Option<String> {
477    if vcs_url.is_empty() {
478        return None;
479    }
480
481    if vcs_url.ends_with(".git") {
482        Some(vcs_url.trim_end_matches(".git").to_string())
483    } else {
484        Some(vcs_url.to_string())
485    }
486}
487
488/// Computes the hashed path prefix for CocoaPods Specs repository.
489///
490/// Uses MD5 hash of package name to generate the path prefix (first 3 chars).
491fn get_hashed_path(name: &str) -> Option<String> {
492    use md5::{Digest, Md5};
493
494    if name.is_empty() {
495        return None;
496    }
497
498    // Compute MD5 hash
499    let mut hasher = Md5::new();
500    hasher.update(name.as_bytes());
501    let result = hasher.finalize();
502    let hash_str = hex::encode(result);
503
504    if hash_str.len() >= 3 {
505        Some(format!(
506            "{}/{}/{}",
507            &hash_str[0..1],
508            &hash_str[1..2],
509            &hash_str[2..3]
510        ))
511    } else {
512        Some(hash_str)
513    }
514}
515
516crate::register_parser!(
517    "CocoaPods .podspec.json manifest",
518    &["**/*.podspec.json"],
519    "cocoapods",
520    "Objective-C",
521    Some("https://guides.cocoapods.org/syntax/podspec.html"),
522);