Skip to main content

provenant/parsers/
podspec_json.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for CocoaPods .podspec.json manifests.
5//!
6//! Extracts package metadata and dependencies from .podspec.json files used by
7//! CocoaPods for iOS/macOS package management.
8//!
9//! # Supported Formats
10//! - *.podspec.json (CocoaPods manifest JSON format)
11//!
12//! # Key Features
13//! - Dependency extraction from dependencies dictionary
14//! - License handling (both string and dict formats with "type" and "text" keys)
15//! - VCS and download URL extraction from source field
16//! - Author/party information parsing
17//! - Full JSON storage in extra_data
18//!
19//! # Implementation Notes
20//! - Uses serde_json for JSON parsing
21//! - Handles license as both string and dict (joins dict values)
22//! - Extracts dependencies from dict (key=name, value=version requirement)
23//! - All dependencies have scope="dependencies" and is_runtime=true
24//! - Source dict stored in extra_data["source"]
25
26use std::collections::HashMap;
27use std::path::Path;
28
29use crate::parser_warn as warn;
30use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
31use packageurl::PackageUrl;
32use serde_json::Value;
33
34use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
35
36use super::PackageParser;
37use super::license_normalization::normalize_spdx_declared_license;
38use super::metadata::ParserMetadata;
39
40const FIELD_NAME: &str = "name";
41const FIELD_VERSION: &str = "version";
42const FIELD_SUMMARY: &str = "summary";
43const FIELD_DESCRIPTION: &str = "description";
44const FIELD_HOMEPAGE: &str = "homepage";
45const FIELD_LICENSE: &str = "license";
46const FIELD_SOURCE: &str = "source";
47const FIELD_AUTHORS: &str = "authors";
48const FIELD_DEPENDENCIES: &str = "dependencies";
49
50const PRIMARY_LANGUAGE: &str = "Objective-C";
51
52/// CocoaPods .podspec.json parser.
53///
54/// Parses .podspec.json manifest files from CocoaPods ecosystem.
55pub struct PodspecJsonParser;
56
57impl PackageParser for PodspecJsonParser {
58    const PACKAGE_TYPE: PackageType = PackageType::Cocoapods;
59
60    fn metadata() -> Vec<ParserMetadata> {
61        vec![ParserMetadata {
62            description: "CocoaPods .podspec.json manifest",
63            file_patterns: &["**/*.podspec.json"],
64            package_type: "cocoapods",
65            primary_language: "Objective-C",
66            documentation_url: Some("https://guides.cocoapods.org/syntax/podspec.html"),
67        }]
68    }
69
70    fn extract_packages(path: &Path) -> Vec<PackageData> {
71        let json_content = match read_json_file(path) {
72            Ok(content) => content,
73            Err(e) => {
74                warn!("Failed to read .podspec.json at {:?}: {}", path, e);
75                return vec![default_package_data()];
76            }
77        };
78
79        let name = json_content
80            .get(FIELD_NAME)
81            .and_then(|v| v.as_str())
82            .map(|s| truncate_field(s.trim().to_string()))
83            .filter(|s| !s.is_empty());
84
85        let version = json_content
86            .get(FIELD_VERSION)
87            .and_then(|v| v.as_str())
88            .map(|s| truncate_field(s.trim().to_string()))
89            .filter(|s| !s.is_empty());
90
91        let summary = json_content
92            .get(FIELD_SUMMARY)
93            .and_then(|v| v.as_str())
94            .map(|s| truncate_field(s.trim().to_string()))
95            .filter(|s| !s.is_empty());
96
97        let mut description = json_content
98            .get(FIELD_DESCRIPTION)
99            .and_then(|v| v.as_str())
100            .map(|s| truncate_field(s.trim().to_string()))
101            .filter(|s| !s.is_empty());
102
103        // If summary exists and description doesn't start with summary, prepend it
104        if let (Some(summary_text), Some(desc_text)) = (&summary, &description) {
105            if !desc_text.starts_with(summary_text) {
106                description = Some(format!("{}. {}", summary_text, desc_text));
107            }
108        } else if summary.is_some() && description.is_none() {
109            description = summary.clone();
110        }
111
112        let homepage_url = json_content
113            .get(FIELD_HOMEPAGE)
114            .and_then(|v| v.as_str())
115            .map(|s| truncate_field(s.trim().to_string()))
116            .filter(|s| !s.is_empty());
117
118        let extracted_license_statement = extract_license_statement(&json_content);
119        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
120            normalize_podspec_json_declared_license(
121                &json_content,
122                extracted_license_statement.as_deref(),
123            );
124
125        let (vcs_url, download_url) = extract_source_urls(&json_content);
126
127        let parties = extract_parties(&json_content);
128
129        let dependencies = extract_dependencies(&json_content);
130
131        let mut extra_data = HashMap::new();
132
133        // Store source dict in extra_data
134        if let Some(source) = json_content.get(FIELD_SOURCE) {
135            extra_data.insert("source".to_string(), source.clone());
136        }
137
138        // Store dependencies dict in extra_data if present
139        if let Some(deps) = json_content.get(FIELD_DEPENDENCIES)
140            && let Some(obj) = deps.as_object()
141            && !obj.is_empty()
142        {
143            extra_data.insert(FIELD_DEPENDENCIES.to_string(), deps.clone());
144        }
145
146        if let Some(license_file) = json_content
147            .get(FIELD_LICENSE)
148            .and_then(|license| license.as_object())
149            .and_then(|license| license.get("file"))
150            .and_then(|value| value.as_str())
151            .filter(|value| !value.trim().is_empty())
152        {
153            extra_data.insert(
154                "license_file".to_string(),
155                Value::String(license_file.trim().to_string()),
156            );
157        }
158
159        let raw_json = serde_json::to_string(&json_content).unwrap_or_default();
160        if raw_json.len() <= 10 * 1024 * 1024 {
161            extra_data.insert("podspec.json".to_string(), json_content.clone());
162        } else {
163            warn!(
164                "Skipping podspec.json extra_data entry: serialized size {} bytes exceeds 10MB limit",
165                raw_json.len()
166            );
167        }
168
169        let extra_data = if extra_data.is_empty() {
170            None
171        } else {
172            Some(extra_data)
173        };
174
175        // Generate URLs using CocoaPods patterns
176        let repository_homepage_url = name
177            .as_ref()
178            .map(|n| format!("https://cocoapods.org/pods/{}", n));
179        let repository_download_url =
180            if let (Some(_name_str), Some(version_str)) = (&name, &version) {
181                if let Some(homepage) = &homepage_url {
182                    Some(format!("{}/archive/{}.zip", homepage, version_str))
183                } else if let Some(vcs) = &vcs_url {
184                    let repo_base = get_repo_base_url(vcs);
185                    repo_base.map(|base| format!("{}/archive/refs/tags/{}.zip", base, version_str))
186                } else {
187                    None
188                }
189            } else {
190                None
191            };
192
193        let code_view_url = if let (Some(vcs), Some(version_str)) = (&vcs_url, &version) {
194            let repo_base = get_repo_base_url(vcs);
195            repo_base.map(|base| format!("{}/tree/{}", base, version_str))
196        } else {
197            None
198        };
199
200        let bug_tracking_url = vcs_url.as_ref().and_then(|vcs| {
201            let repo_base = get_repo_base_url(vcs);
202            repo_base.map(|base| format!("{}/issues/", base))
203        });
204
205        let api_data_url = if let (Some(name_str), Some(version_str)) = (&name, &version) {
206            get_hashed_path(name_str).map(|hashed| {
207                format!(
208                    "https://raw.githubusercontent.com/CocoaPods/Specs/blob/master/Specs/{}/{}/{}/{}.podspec.json",
209                    hashed, name_str, version_str, name_str
210                )
211            })
212        } else {
213            None
214        };
215
216        let purl = if let Some(name_str) = &name {
217            let purl = PackageUrl::new(Self::PACKAGE_TYPE.as_str(), name_str)
218                .or_else(|_| PackageUrl::new("generic", name_str))
219                .ok();
220            purl.map(|mut p| {
221                if let Some(version_str) = &version {
222                    let _ = p.with_version(version_str);
223                }
224                p.to_string()
225            })
226        } else {
227            None
228        };
229
230        vec![PackageData {
231            package_type: Some(Self::PACKAGE_TYPE),
232            namespace: None,
233            name: name.clone(),
234            version: version.clone(),
235            qualifiers: None,
236            subpath: None,
237            primary_language: Some(PRIMARY_LANGUAGE.to_string()),
238            description,
239            release_date: None,
240            parties,
241            keywords: Vec::new(),
242            homepage_url,
243            download_url,
244            size: None,
245            sha1: None,
246            md5: None,
247            sha256: None,
248            sha512: None,
249            bug_tracking_url,
250            code_view_url,
251            vcs_url,
252            copyright: None,
253            holder: None,
254            declared_license_expression,
255            declared_license_expression_spdx,
256            license_detections,
257            other_license_expression: None,
258            other_license_expression_spdx: None,
259            other_license_detections: Vec::new(),
260            extracted_license_statement,
261            notice_text: None,
262            source_packages: Vec::new(),
263            file_references: Vec::new(),
264            is_private: false,
265            is_virtual: false,
266            extra_data,
267            dependencies,
268            repository_homepage_url,
269            repository_download_url,
270            api_data_url,
271            datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
272            purl,
273        }]
274    }
275
276    fn is_match(path: &Path) -> bool {
277        path.file_name()
278            .and_then(|name| name.to_str())
279            .is_some_and(|name| name.ends_with(".podspec.json"))
280    }
281}
282
283fn read_json_file(path: &Path) -> Result<Value, String> {
284    let contents = read_file_to_string(path, None).map_err(|e| e.to_string())?;
285    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
286}
287
288/// Returns a default empty PackageData.
289fn default_package_data() -> PackageData {
290    PackageData {
291        package_type: Some(PodspecJsonParser::PACKAGE_TYPE),
292        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
293        datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
294        ..Default::default()
295    }
296}
297
298/// Extracts license statement from JSON.
299/// Handles both string and dict formats.
300fn extract_license_statement(json: &Value) -> Option<String> {
301    json.get(FIELD_LICENSE).and_then(|lic| {
302        if let Some(lic_str) = lic.as_str() {
303            Some(truncate_field(lic_str.trim().to_string()))
304        } else if let Some(lic_obj) = lic.as_object() {
305            // If license is a dict, join all values with space
306            let values: Vec<String> = lic_obj
307                .values()
308                .filter_map(|v| v.as_str())
309                .map(|s| s.trim().to_string())
310                .filter(|s| !s.is_empty())
311                .collect();
312            if values.is_empty() {
313                None
314            } else {
315                Some(values.join(" "))
316            }
317        } else {
318            None
319        }
320    })
321}
322
323fn normalize_podspec_json_declared_license(
324    json: &Value,
325    extracted_license_statement: Option<&str>,
326) -> (
327    Option<String>,
328    Option<String>,
329    Vec<crate::models::LicenseDetection>,
330) {
331    let normalized_candidate = json
332        .get(FIELD_LICENSE)
333        .and_then(|license| {
334            license
335                .as_str()
336                .map(str::trim)
337                .filter(|value| !value.is_empty())
338                .map(canonicalize_cocoapods_license_type)
339                .or_else(|| {
340                    license
341                        .as_object()
342                        .and_then(|obj| obj.get("type"))
343                        .and_then(|value| value.as_str())
344                        .map(str::trim)
345                        .filter(|value| !value.is_empty())
346                        .map(canonicalize_cocoapods_license_type)
347                })
348        })
349        .or_else(|| extracted_license_statement.map(canonicalize_cocoapods_license_type));
350
351    normalize_spdx_declared_license(normalized_candidate.as_deref())
352}
353
354fn canonicalize_cocoapods_license_type(value: &str) -> String {
355    match value.trim() {
356        "Apache License, Version 2.0" => "Apache-2.0".to_string(),
357        other => other.to_string(),
358    }
359}
360
361/// Extracts VCS URL and download URL from source field.
362fn extract_source_urls(json: &Value) -> (Option<String>, Option<String>) {
363    let mut vcs_url = None;
364    let mut download_url = None;
365
366    if let Some(source) = json.get(FIELD_SOURCE) {
367        if let Some(source_obj) = source.as_object() {
368            // Git URL takes precedence for vcs_url
369            if let Some(git_url) = source_obj.get("git").and_then(|v| v.as_str()) {
370                let git_str = truncate_field(git_url.trim().to_string());
371                if !git_str.is_empty() {
372                    vcs_url = Some(git_str);
373                }
374            }
375
376            // HTTP URL is download_url
377            if let Some(http_url) = source_obj.get("http").and_then(|v| v.as_str()) {
378                let http_str = truncate_field(http_url.trim().to_string());
379                if !http_str.is_empty() {
380                    download_url = Some(http_str);
381                }
382            }
383        } else if let Some(source_str) = source.as_str() {
384            // If source is a string, use as vcs_url
385            let source_trimmed = truncate_field(source_str.trim().to_string());
386            if !source_trimmed.is_empty() {
387                vcs_url = Some(source_trimmed);
388            }
389        }
390    }
391
392    (vcs_url, download_url)
393}
394
395/// Extracts party information from authors field.
396fn extract_parties(json: &Value) -> Vec<Party> {
397    let mut parties = Vec::new();
398
399    if let Some(authors) = json.get(FIELD_AUTHORS) {
400        if let Some(authors_obj) = authors.as_object() {
401            // Authors as dict: key=name, value=url
402            for (name, value) in authors_obj.iter().take(MAX_ITERATION_COUNT) {
403                let name_str = truncate_field(name.trim().to_string());
404                if !name_str.is_empty() {
405                    let url = value.as_str().and_then(|s| {
406                        let trimmed = s.trim();
407                        if trimmed.is_empty() {
408                            None
409                        } else if trimmed.contains("://") || trimmed.contains('.') {
410                            Some(truncate_field(trimmed.to_string()))
411                        } else {
412                            Some(truncate_field(format!("{}.com", trimmed)))
413                        }
414                    });
415
416                    parties.push(Party {
417                        r#type: Some("organization".to_string()),
418                        role: Some("owner".to_string()),
419                        name: Some(name_str),
420                        email: None,
421                        url,
422                        organization: None,
423                        organization_url: None,
424                        timezone: None,
425                    });
426                }
427            }
428        } else if let Some(authors_str) = authors.as_str() {
429            // Authors as string
430            let authors_trimmed = truncate_field(authors_str.trim().to_string());
431            if !authors_trimmed.is_empty() {
432                parties.push(Party {
433                    r#type: Some("organization".to_string()),
434                    role: Some("owner".to_string()),
435                    name: Some(authors_trimmed),
436                    email: None,
437                    url: None,
438                    organization: None,
439                    organization_url: None,
440                    timezone: None,
441                });
442            }
443        }
444    }
445
446    parties
447}
448
449/// Extracts dependencies from dependencies dict.
450fn extract_dependencies(json: &Value) -> Vec<Dependency> {
451    let mut dependencies = Vec::new();
452
453    if let Some(deps) = json.get(FIELD_DEPENDENCIES)
454        && let Some(deps_obj) = deps.as_object()
455    {
456        for (name, requirement) in deps_obj.iter().take(MAX_ITERATION_COUNT) {
457            let name_str = name.trim();
458            if name_str.is_empty() {
459                continue;
460            }
461
462            let requirement_str = requirement
463                .as_str()
464                .map(|s| truncate_field(s.trim().to_string()))
465                .filter(|s| !s.is_empty());
466
467            let purl = Some(truncate_field(format!("pkg:cocoapods/{}", name_str)));
468
469            dependencies.push(Dependency {
470                purl,
471                extracted_requirement: requirement_str,
472                scope: Some("runtime".to_string()),
473                is_runtime: Some(true),
474                is_optional: Some(false),
475                is_pinned: None,
476                is_direct: None,
477                resolved_package: None,
478                extra_data: None,
479            });
480        }
481    }
482
483    dependencies
484}
485
486/// Gets the repository base URL from a VCS URL by removing .git suffix.
487fn get_repo_base_url(vcs_url: &str) -> Option<String> {
488    if vcs_url.is_empty() {
489        return None;
490    }
491
492    if vcs_url.ends_with(".git") {
493        Some(vcs_url.trim_end_matches(".git").to_string())
494    } else {
495        Some(vcs_url.to_string())
496    }
497}
498
499/// Computes the hashed path prefix for CocoaPods Specs repository.
500///
501/// Uses MD5 hash of package name to generate the path prefix (first 3 chars).
502fn get_hashed_path(name: &str) -> Option<String> {
503    use md5::{Digest, Md5};
504
505    if name.is_empty() {
506        return None;
507    }
508
509    // Compute MD5 hash
510    let mut hasher = Md5::new();
511    hasher.update(name.as_bytes());
512    let result = hasher.finalize();
513    let hash_str = hex::encode(result);
514
515    if hash_str.len() >= 3 {
516        Some(format!(
517            "{}/{}/{}",
518            &hash_str[0..1],
519            &hash_str[1..2],
520            &hash_str[2..3]
521        ))
522    } else {
523        Some(hash_str)
524    }
525}