Skip to main content

provenant/parsers/
podspec_json.rs

1//! Parser for CocoaPods .podspec.json manifests.
2//!
3//! Extracts package metadata and dependencies from .podspec.json files used by
4//! CocoaPods for iOS/macOS package management.
5//!
6//! # Supported Formats
7//! - *.podspec.json (CocoaPods manifest JSON format)
8//!
9//! # Key Features
10//! - Dependency extraction from dependencies dictionary
11//! - License handling (both string and dict formats with "type" and "text" keys)
12//! - VCS and download URL extraction from source field
13//! - Author/party information parsing
14//! - Full JSON storage in extra_data
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Handles license as both string and dict (joins dict values)
19//! - Extracts dependencies from dict (key=name, value=version requirement)
20//! - All dependencies have scope="dependencies" and is_runtime=true
21//! - Source dict stored in extra_data["source"]
22
23use std::collections::HashMap;
24use std::fs::File;
25use std::io::Read;
26use std::path::Path;
27
28use log::warn;
29use packageurl::PackageUrl;
30use serde_json::Value;
31
32use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
33
34use super::PackageParser;
35
36const FIELD_NAME: &str = "name";
37const FIELD_VERSION: &str = "version";
38const FIELD_SUMMARY: &str = "summary";
39const FIELD_DESCRIPTION: &str = "description";
40const FIELD_HOMEPAGE: &str = "homepage";
41const FIELD_LICENSE: &str = "license";
42const FIELD_SOURCE: &str = "source";
43const FIELD_AUTHORS: &str = "authors";
44const FIELD_DEPENDENCIES: &str = "dependencies";
45
46const PRIMARY_LANGUAGE: &str = "Objective-C";
47
48/// CocoaPods .podspec.json parser.
49///
50/// Parses .podspec.json manifest files from CocoaPods ecosystem.
51pub struct PodspecJsonParser;
52
53impl PackageParser for PodspecJsonParser {
54    const PACKAGE_TYPE: PackageType = PackageType::Cocoapods;
55
56    fn extract_packages(path: &Path) -> Vec<PackageData> {
57        let json_content = match read_json_file(path) {
58            Ok(content) => content,
59            Err(e) => {
60                warn!("Failed to read .podspec.json at {:?}: {}", path, e);
61                return vec![default_package_data()];
62            }
63        };
64
65        let name = json_content
66            .get(FIELD_NAME)
67            .and_then(|v| v.as_str())
68            .map(|s| s.trim().to_string())
69            .filter(|s| !s.is_empty());
70
71        let version = json_content
72            .get(FIELD_VERSION)
73            .and_then(|v| v.as_str())
74            .map(|s| s.trim().to_string())
75            .filter(|s| !s.is_empty());
76
77        let summary = json_content
78            .get(FIELD_SUMMARY)
79            .and_then(|v| v.as_str())
80            .map(|s| s.trim().to_string())
81            .filter(|s| !s.is_empty());
82
83        let mut description = json_content
84            .get(FIELD_DESCRIPTION)
85            .and_then(|v| v.as_str())
86            .map(|s| s.trim().to_string())
87            .filter(|s| !s.is_empty());
88
89        // If summary exists and description doesn't start with summary, prepend it
90        if let (Some(summary_text), Some(desc_text)) = (&summary, &description) {
91            if !desc_text.starts_with(summary_text) {
92                description = Some(format!("{}. {}", summary_text, desc_text));
93            }
94        } else if summary.is_some() && description.is_none() {
95            description = summary.clone();
96        }
97
98        let homepage_url = json_content
99            .get(FIELD_HOMEPAGE)
100            .and_then(|v| v.as_str())
101            .map(|s| s.trim().to_string())
102            .filter(|s| !s.is_empty());
103
104        let extracted_license_statement = extract_license_statement(&json_content);
105
106        let (vcs_url, download_url) = extract_source_urls(&json_content);
107
108        let parties = extract_parties(&json_content);
109
110        let dependencies = extract_dependencies(&json_content);
111
112        let mut extra_data = HashMap::new();
113
114        // Store source dict in extra_data
115        if let Some(source) = json_content.get(FIELD_SOURCE) {
116            extra_data.insert("source".to_string(), source.clone());
117        }
118
119        // Store dependencies dict in extra_data if present
120        if let Some(deps) = json_content.get(FIELD_DEPENDENCIES)
121            && let Some(obj) = deps.as_object()
122            && !obj.is_empty()
123        {
124            extra_data.insert(FIELD_DEPENDENCIES.to_string(), deps.clone());
125        }
126
127        // Store full JSON in extra_data
128        extra_data.insert("podspec.json".to_string(), json_content.clone());
129
130        let extra_data = if extra_data.is_empty() {
131            None
132        } else {
133            Some(extra_data)
134        };
135
136        // Generate URLs using CocoaPods patterns
137        let repository_homepage_url = name
138            .as_ref()
139            .map(|n| format!("https://cocoapods.org/pods/{}", n));
140        let repository_download_url =
141            if let (Some(_name_str), Some(version_str)) = (&name, &version) {
142                if let Some(homepage) = &homepage_url {
143                    Some(format!("{}/archive/{}.zip", homepage, version_str))
144                } else if let Some(vcs) = &vcs_url {
145                    let repo_base = get_repo_base_url(vcs);
146                    repo_base.map(|base| format!("{}/archive/refs/tags/{}.zip", base, version_str))
147                } else {
148                    None
149                }
150            } else {
151                None
152            };
153
154        let code_view_url = if let (Some(vcs), Some(version_str)) = (&vcs_url, &version) {
155            let repo_base = get_repo_base_url(vcs);
156            repo_base.map(|base| format!("{}/tree/{}", base, version_str))
157        } else {
158            None
159        };
160
161        let bug_tracking_url = vcs_url.as_ref().and_then(|vcs| {
162            let repo_base = get_repo_base_url(vcs);
163            repo_base.map(|base| format!("{}/issues/", base))
164        });
165
166        let api_data_url = if let (Some(name_str), Some(version_str)) = (&name, &version) {
167            get_hashed_path(name_str).map(|hashed| {
168                format!(
169                    "https://raw.githubusercontent.com/CocoaPods/Specs/blob/master/Specs/{}/{}/{}/{}.podspec.json",
170                    hashed, name_str, version_str, name_str
171                )
172            })
173        } else {
174            None
175        };
176
177        let purl = if let Some(name_str) = &name {
178            let mut purl = PackageUrl::new(Self::PACKAGE_TYPE.as_str(), name_str)
179                .unwrap_or_else(|_| PackageUrl::new("generic", name_str).unwrap());
180            if let Some(version_str) = &version {
181                let _ = purl.with_version(version_str);
182            }
183            Some(purl.to_string())
184        } else {
185            None
186        };
187
188        vec![PackageData {
189            package_type: Some(Self::PACKAGE_TYPE),
190            namespace: None,
191            name: name.clone(),
192            version: version.clone(),
193            qualifiers: None,
194            subpath: None,
195            primary_language: Some(PRIMARY_LANGUAGE.to_string()),
196            description,
197            release_date: None,
198            parties,
199            keywords: Vec::new(),
200            homepage_url,
201            download_url,
202            size: None,
203            sha1: None,
204            md5: None,
205            sha256: None,
206            sha512: None,
207            bug_tracking_url,
208            code_view_url,
209            vcs_url,
210            copyright: None,
211            holder: None,
212            declared_license_expression: None,
213            declared_license_expression_spdx: None,
214            license_detections: Vec::new(),
215            other_license_expression: None,
216            other_license_expression_spdx: None,
217            other_license_detections: Vec::new(),
218            extracted_license_statement,
219            notice_text: None,
220            source_packages: Vec::new(),
221            file_references: Vec::new(),
222            is_private: false,
223            is_virtual: false,
224            extra_data,
225            dependencies,
226            repository_homepage_url,
227            repository_download_url,
228            api_data_url,
229            datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
230            purl,
231        }]
232    }
233
234    fn is_match(path: &Path) -> bool {
235        path.file_name()
236            .and_then(|name| name.to_str())
237            .is_some_and(|name| name.ends_with(".podspec.json"))
238    }
239}
240
241/// Reads and parses a JSON file.
242fn read_json_file(path: &Path) -> Result<Value, String> {
243    let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
244    let mut contents = String::new();
245    file.read_to_string(&mut contents)
246        .map_err(|e| format!("Failed to read file: {}", e))?;
247    serde_json::from_str(&contents).map_err(|e| format!("Failed to parse JSON: {}", e))
248}
249
250/// Returns a default empty PackageData.
251fn default_package_data() -> PackageData {
252    PackageData {
253        package_type: Some(PodspecJsonParser::PACKAGE_TYPE),
254        primary_language: Some(PRIMARY_LANGUAGE.to_string()),
255        datasource_id: Some(DatasourceId::CocoapodsPodspecJson),
256        ..Default::default()
257    }
258}
259
260/// Extracts license statement from JSON.
261/// Handles both string and dict formats.
262fn extract_license_statement(json: &Value) -> Option<String> {
263    json.get(FIELD_LICENSE).and_then(|lic| {
264        if let Some(lic_str) = lic.as_str() {
265            Some(lic_str.trim().to_string())
266        } else if let Some(lic_obj) = lic.as_object() {
267            // If license is a dict, join all values with space
268            let values: Vec<String> = lic_obj
269                .values()
270                .filter_map(|v| v.as_str())
271                .map(|s| s.trim().to_string())
272                .filter(|s| !s.is_empty())
273                .collect();
274            if values.is_empty() {
275                None
276            } else {
277                Some(values.join(" "))
278            }
279        } else {
280            None
281        }
282    })
283}
284
285/// Extracts VCS URL and download URL from source field.
286fn extract_source_urls(json: &Value) -> (Option<String>, Option<String>) {
287    let mut vcs_url = None;
288    let mut download_url = None;
289
290    if let Some(source) = json.get(FIELD_SOURCE) {
291        if let Some(source_obj) = source.as_object() {
292            // Git URL takes precedence for vcs_url
293            if let Some(git_url) = source_obj.get("git").and_then(|v| v.as_str()) {
294                let git_str = git_url.trim().to_string();
295                if !git_str.is_empty() {
296                    vcs_url = Some(git_str);
297                }
298            }
299
300            // HTTP URL is download_url
301            if let Some(http_url) = source_obj.get("http").and_then(|v| v.as_str()) {
302                let http_str = http_url.trim().to_string();
303                if !http_str.is_empty() {
304                    download_url = Some(http_str);
305                }
306            }
307        } else if let Some(source_str) = source.as_str() {
308            // If source is a string, use as vcs_url
309            let source_trimmed = source_str.trim().to_string();
310            if !source_trimmed.is_empty() {
311                vcs_url = Some(source_trimmed);
312            }
313        }
314    }
315
316    (vcs_url, download_url)
317}
318
319/// Extracts party information from authors field.
320fn extract_parties(json: &Value) -> Vec<Party> {
321    let mut parties = Vec::new();
322
323    if let Some(authors) = json.get(FIELD_AUTHORS) {
324        if let Some(authors_obj) = authors.as_object() {
325            // Authors as dict: key=name, value=url
326            for (name, value) in authors_obj {
327                let name_str = name.trim().to_string();
328                if !name_str.is_empty() {
329                    let url = value.as_str().and_then(|s| {
330                        let trimmed = s.trim();
331                        // Python reference adds ".com" suffix if URL doesn't have it
332                        if trimmed.is_empty() {
333                            None
334                        } else if trimmed.contains("://") || trimmed.contains('.') {
335                            Some(trimmed.to_string())
336                        } else {
337                            Some(format!("{}.com", trimmed))
338                        }
339                    });
340
341                    parties.push(Party {
342                        r#type: Some("organization".to_string()),
343                        role: Some("owner".to_string()),
344                        name: Some(name_str),
345                        email: None,
346                        url,
347                        organization: None,
348                        organization_url: None,
349                        timezone: None,
350                    });
351                }
352            }
353        } else if let Some(authors_str) = authors.as_str() {
354            // Authors as string
355            let authors_trimmed = authors_str.trim().to_string();
356            if !authors_trimmed.is_empty() {
357                parties.push(Party {
358                    r#type: Some("organization".to_string()),
359                    role: Some("owner".to_string()),
360                    name: Some(authors_trimmed),
361                    email: None,
362                    url: None,
363                    organization: None,
364                    organization_url: None,
365                    timezone: None,
366                });
367            }
368        }
369    }
370
371    parties
372}
373
374/// Extracts dependencies from dependencies dict.
375fn extract_dependencies(json: &Value) -> Vec<Dependency> {
376    let mut dependencies = Vec::new();
377
378    if let Some(deps) = json.get(FIELD_DEPENDENCIES)
379        && let Some(deps_obj) = deps.as_object()
380    {
381        for (name, requirement) in deps_obj {
382            let name_str = name.trim();
383            if name_str.is_empty() {
384                continue;
385            }
386
387            let requirement_str = requirement
388                .as_str()
389                .map(|s| s.trim().to_string())
390                .filter(|s| !s.is_empty());
391
392            let purl = Some(format!("pkg:cocoapods/{}", name_str));
393
394            dependencies.push(Dependency {
395                purl,
396                extracted_requirement: requirement_str,
397                scope: Some("runtime".to_string()),
398                is_runtime: Some(true),
399                is_optional: Some(false),
400                is_pinned: None,
401                is_direct: None,
402                resolved_package: None,
403                extra_data: None,
404            });
405        }
406    }
407
408    dependencies
409}
410
411/// Gets the repository base URL from a VCS URL by removing .git suffix.
412fn get_repo_base_url(vcs_url: &str) -> Option<String> {
413    if vcs_url.is_empty() {
414        return None;
415    }
416
417    if vcs_url.ends_with(".git") {
418        Some(vcs_url.trim_end_matches(".git").to_string())
419    } else {
420        Some(vcs_url.to_string())
421    }
422}
423
424/// Computes the hashed path prefix for CocoaPods Specs repository.
425///
426/// Uses MD5 hash of package name to generate the path prefix (first 3 chars).
427fn get_hashed_path(name: &str) -> Option<String> {
428    use md5::{Digest, Md5};
429
430    if name.is_empty() {
431        return None;
432    }
433
434    // Compute MD5 hash
435    let mut hasher = Md5::new();
436    hasher.update(name.as_bytes());
437    let result = hasher.finalize();
438    let hash_str = format!("{:x}", result);
439
440    if hash_str.len() >= 3 {
441        Some(format!(
442            "{}/{}/{}",
443            &hash_str[0..1],
444            &hash_str[1..2],
445            &hash_str[2..3]
446        ))
447    } else {
448        Some(hash_str)
449    }
450}
451
452crate::register_parser!(
453    "CocoaPods .podspec.json manifest",
454    &["**/*.podspec.json"],
455    "cocoapods",
456    "Objective-C",
457    Some("https://guides.cocoapods.org/syntax/podspec.html"),
458);