Skip to main content

provenant/parsers/
swift_manifest_json.rs

1//! Parser for Swift Package Manager manifest files.
2//!
3//! Supports three input formats:
4//! - `Package.swift.json` - Pre-generated JSON (recommended for CI/CD)
5//! - `Package.swift.deplock` - DepLock JSON format
6//! - `Package.swift` - Raw Swift source (auto-generates JSON with caching)
7//!
8//! # Automatic JSON Generation (Enhancement over Python ScanCode)
9//!
10//! This Rust implementation includes automatic JSON generation from raw `Package.swift`
11//! files, which is an enhancement beyond the Python ScanCode reference implementation.
12//!
13//! **Python ScanCode behavior**: Requires users to manually run:
14//! ```bash
15//! swift package dump-package > Package.swift.json
16//! ```
17//!
18//! **Rust ScanCode behavior**: Automatically generates JSON when Swift toolchain available,
19//! with BLAKE3-based caching for performance.
20//!
21//! ## Design Decision: Graceful Degradation
22//!
23//! - **Swift toolchain available**: Automatically generates + caches JSON (~200ms first, <1ms cached)
24//! - **Swift toolchain unavailable**: Warns and skips file (no crash, CI/CD unaffected)
25//! - **Pre-generated JSON**: Always works, regardless of Swift availability
26//!
27//! This design allows:
28//! - ✅ Better UX for developers with Swift installed
29//! - ✅ No CI/CD complications (tests don't require Swift)
30//! - ✅ Backward compatibility (pre-generated JSON workflow unchanged)
31//! - ✅ Feature parity maintained (Python behavior is subset of Rust behavior)
32
33use std::collections::HashMap;
34use std::fs;
35use std::io::Write as _;
36use std::path::Path;
37use std::process::Command;
38
39use log::warn;
40use packageurl::PackageUrl;
41use serde_json::Value;
42
43use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
44
45use super::PackageParser;
46
47/// Parses Swift Package Manager manifest files with automatic JSON generation.
48///
49/// # Supported File Formats
50/// - `Package.swift.json` - Pre-generated JSON from `swift package dump-package`
51/// - `Package.swift.deplock` - JSON format from DepLock tool
52/// - `Package.swift` - Raw Swift source (auto-generates JSON if Swift available)
53///
54/// # Automatic JSON Generation
55///
56/// When scanning raw `Package.swift` files:
57/// 1. Checks BLAKE3-based cache for previously generated JSON
58/// 2. If cache miss, invokes `swift package dump-package` (requires Swift toolchain)
59/// 3. Caches result for future scans
60/// 4. Falls back gracefully if Swift unavailable (logs warning, returns empty package data)
61///
62/// # Performance
63/// - **Pre-generated JSON**: <1ms (direct file read)
64/// - **Raw Package.swift (cached)**: <1ms (cache hit)
65/// - **Raw Package.swift (first time)**: ~100-500ms (Swift toolchain execution + cache write)
66/// - **Raw Package.swift (no Swift)**: <1ms (immediate fallback)
67///
68/// # Example
69/// ```no_run
70/// use provenant::parsers::{PackageParser, SwiftManifestJsonParser};
71/// use std::path::Path;
72///
73/// // Works with pre-generated JSON
74/// let json_path = Path::new("Package.swift.json");
75/// let data1 = SwiftManifestJsonParser::extract_first_package(json_path);
76///
77/// // Also works with raw Package.swift (if Swift installed)
78/// let swift_path = Path::new("Package.swift");
79/// let data2 = SwiftManifestJsonParser::extract_first_package(swift_path);
80/// ```
81pub struct SwiftManifestJsonParser;
82
83impl PackageParser for SwiftManifestJsonParser {
84    const PACKAGE_TYPE: PackageType = PackageType::Swift;
85
86    fn extract_packages(path: &Path) -> Vec<PackageData> {
87        let filename = path.file_name().and_then(|n| n.to_str());
88
89        let is_json_file = filename
90            .map(|n| n.ends_with(".swift.json") || n.ends_with(".swift.deplock"))
91            .unwrap_or(false);
92        let is_raw_swift = filename.map(|n| n == "Package.swift").unwrap_or(false);
93
94        vec![if is_json_file {
95            let json_content = match read_swift_manifest_json(path) {
96                Ok(content) => content,
97                Err(e) => {
98                    warn!(
99                        "Failed to read or parse Swift manifest JSON at {:?}: {}",
100                        path, e
101                    );
102                    return vec![default_package_data()];
103                }
104            };
105            parse_swift_manifest(&json_content)
106        } else if is_raw_swift {
107            match dump_package_cached(path) {
108                Ok(json_str) => match serde_json::from_str::<Value>(&json_str) {
109                    Ok(json) => parse_swift_manifest(&json),
110                    Err(e) => {
111                        warn!(
112                            "Swift toolchain generated invalid JSON for {:?}: {}",
113                            path, e
114                        );
115                        default_package_data()
116                    }
117                },
118                Err(e) => {
119                    warn!(
120                        "Cannot auto-generate Package.swift.json for {:?}: {}. \
121                             Swift toolchain may not be installed. \
122                             To scan this file, manually run: swift package dump-package > Package.swift.json",
123                        path, e
124                    );
125                    default_package_data()
126                }
127            }
128        } else {
129            default_package_data()
130        }]
131    }
132
133    fn is_match(path: &Path) -> bool {
134        path.file_name()
135            .and_then(|name| name.to_str())
136            .is_some_and(|name| {
137                name.ends_with(".swift.json")
138                    || name.ends_with(".swift.deplock")
139                    || name == "Package.swift"
140            })
141    }
142}
143
144fn read_swift_manifest_json(path: &Path) -> Result<Value, String> {
145    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
146
147    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
148}
149
150fn parse_swift_manifest(manifest: &Value) -> PackageData {
151    let name = manifest
152        .get("name")
153        .and_then(|v| v.as_str())
154        .map(String::from);
155
156    let dependencies = get_dependencies(manifest.get("dependencies"));
157    let platforms = manifest.get("platforms").cloned();
158
159    let tools_version = manifest
160        .get("toolsVersion")
161        .and_then(|tv| tv.get("_version"))
162        .and_then(|v| v.as_str())
163        .map(String::from);
164
165    let mut extra_data = HashMap::new();
166    if let Some(platforms_val) = platforms {
167        extra_data.insert("platforms".to_string(), platforms_val);
168    }
169    if let Some(ref tv) = tools_version {
170        extra_data.insert(
171            "swift_tools_version".to_string(),
172            serde_json::Value::String(tv.clone()),
173        );
174    }
175
176    let purl = create_package_url(&name, &None);
177
178    PackageData {
179        package_type: Some(SwiftManifestJsonParser::PACKAGE_TYPE),
180        namespace: None,
181        name,
182        version: None,
183        qualifiers: None,
184        subpath: None,
185        primary_language: Some("Swift".to_string()),
186        description: None,
187        release_date: None,
188        parties: Vec::new(),
189        keywords: Vec::new(),
190        homepage_url: None,
191        download_url: None,
192        size: None,
193        sha1: None,
194        md5: None,
195        sha256: None,
196        sha512: None,
197        bug_tracking_url: None,
198        code_view_url: None,
199        vcs_url: None,
200        copyright: None,
201        holder: None,
202        declared_license_expression: None,
203        declared_license_expression_spdx: None,
204        license_detections: Vec::new(),
205        other_license_expression: None,
206        other_license_expression_spdx: None,
207        other_license_detections: Vec::new(),
208        extracted_license_statement: None,
209        notice_text: None,
210        source_packages: Vec::new(),
211        file_references: Vec::new(),
212        is_private: false,
213        is_virtual: false,
214        extra_data: if extra_data.is_empty() {
215            None
216        } else {
217            Some(extra_data)
218        },
219        dependencies,
220        repository_homepage_url: None,
221        repository_download_url: None,
222        api_data_url: None,
223        datasource_id: Some(DatasourceId::SwiftPackageManifestJson),
224        purl,
225    }
226}
227
228fn get_dependencies(dependencies: Option<&Value>) -> Vec<Dependency> {
229    let Some(deps_array) = dependencies.and_then(|v| v.as_array()) else {
230        return Vec::new();
231    };
232
233    let mut dependent_packages = Vec::new();
234
235    for dependency in deps_array {
236        let Some(source_control) = dependency.get("sourceControl").and_then(|v| v.as_array())
237        else {
238            continue;
239        };
240
241        let Some(source) = source_control.first() else {
242            continue;
243        };
244
245        let identity = source
246            .get("identity")
247            .and_then(|v| v.as_str())
248            .unwrap_or_default();
249
250        let (namespace, dep_name) = extract_namespace_and_name(source, identity);
251        let (version, is_pinned) = extract_version_requirement(source);
252        let purl = create_dependency_purl(&namespace, &dep_name, &version, is_pinned);
253
254        dependent_packages.push(Dependency {
255            purl: Some(purl),
256            extracted_requirement: version,
257            scope: Some("dependencies".to_string()),
258            is_runtime: Some(true),
259            is_optional: Some(false),
260            is_pinned: Some(is_pinned),
261            is_direct: Some(true),
262            resolved_package: None,
263            extra_data: None,
264        });
265    }
266
267    dependent_packages
268}
269
270fn extract_namespace_and_name(source: &Value, identity: &str) -> (Option<String>, String) {
271    let url = source
272        .get("location")
273        .and_then(|loc| loc.get("remote"))
274        .and_then(|remote| remote.as_array())
275        .and_then(|arr| arr.first())
276        .and_then(|first| first.get("urlString"))
277        .and_then(|v| v.as_str());
278
279    match url {
280        Some(url_str) => get_namespace_and_name(url_str),
281        None => (None, identity.to_string()),
282    }
283}
284
285/// Parses a repository URL into (namespace, name).
286///
287/// Example: `https://github.com/apple/swift-argument-parser.git`
288/// yields namespace=`"github.com/apple"`, name=`"swift-argument-parser"`
289pub fn get_namespace_and_name(url: &str) -> (Option<String>, String) {
290    let (hostname, path) = if let Some(stripped) = url.strip_prefix("https://") {
291        let rest = stripped.trim_end_matches('/');
292        match rest.find('/') {
293            Some(idx) => (Some(&rest[..idx]), &rest[idx + 1..]),
294            None => (Some(rest), ""),
295        }
296    } else if let Some(stripped) = url.strip_prefix("http://") {
297        let rest = stripped.trim_end_matches('/');
298        match rest.find('/') {
299            Some(idx) => (Some(&rest[..idx]), &rest[idx + 1..]),
300            None => (Some(rest), ""),
301        }
302    } else {
303        (None, url)
304    };
305
306    let clean_path = path
307        .strip_suffix(".git")
308        .unwrap_or(path)
309        .trim_end_matches('/');
310
311    if let Some(host) = hostname {
312        let canonical = format!("{}/{}", host, clean_path);
313        match canonical.rsplit_once('/') {
314            Some((ns, name)) => (Some(ns.to_string()), name.to_string()),
315            None => (None, canonical),
316        }
317    } else {
318        match clean_path.rsplit_once('/') {
319            Some((ns, name)) => (Some(ns.to_string()), name.to_string()),
320            None => (None, clean_path.to_string()),
321        }
322    }
323}
324
325/// Handles four requirement types:
326/// - `exact`: `["1.0.0"]` -> version="1.0.0", is_pinned=true
327/// - `range`: `[{"lowerBound": "1.0.0", "upperBound": "2.0.0"}]` -> version="vers:swift/>=1.0.0|<2.0.0", is_pinned=false
328/// - `branch`: `["main"]` -> version="main", is_pinned=false
329/// - `revision`: `["abc123"]` -> version="abc123", is_pinned=true
330fn extract_version_requirement(source: &Value) -> (Option<String>, bool) {
331    let Some(requirement) = source.get("requirement") else {
332        return (None, false);
333    };
334
335    if let Some(exact) = requirement.get("exact").and_then(|v| v.as_array())
336        && let Some(version) = exact.first().and_then(|v| v.as_str())
337    {
338        return (Some(version.to_string()), true);
339    }
340
341    if let Some(range) = requirement.get("range").and_then(|v| v.as_array())
342        && let Some(bound) = range.first()
343    {
344        let lower = bound.get("lowerBound").and_then(|v| v.as_str());
345        let upper = bound.get("upperBound").and_then(|v| v.as_str());
346        if let (Some(lb), Some(ub)) = (lower, upper) {
347            let vers = format!("vers:swift/>={lb}|<{ub}");
348            return (Some(vers), false);
349        }
350    }
351
352    if let Some(branch) = requirement.get("branch").and_then(|v| v.as_array())
353        && let Some(branch_name) = branch.first().and_then(|v| v.as_str())
354    {
355        return (Some(branch_name.to_string()), false);
356    }
357
358    if let Some(revision) = requirement.get("revision").and_then(|v| v.as_array())
359        && let Some(rev) = revision.first().and_then(|v| v.as_str())
360    {
361        return (Some(rev.to_string()), true);
362    }
363
364    (None, false)
365}
366
367fn create_dependency_purl(
368    namespace: &Option<String>,
369    name: &str,
370    version: &Option<String>,
371    is_pinned: bool,
372) -> String {
373    let mut purl = match PackageUrl::new(SwiftManifestJsonParser::PACKAGE_TYPE.as_str(), name) {
374        Ok(p) => p,
375        Err(e) => {
376            warn!(
377                "Failed to create PackageUrl for swift dependency '{}': {}",
378                name, e
379            );
380            return match (namespace, is_pinned.then_some(version.as_deref()).flatten()) {
381                (Some(ns), Some(v)) => format!("pkg:swift/{}/{}@{}", ns, name, v),
382                (Some(ns), None) => format!("pkg:swift/{}/{}", ns, name),
383                (None, Some(v)) => format!("pkg:swift/{}@{}", name, v),
384                (None, None) => format!("pkg:swift/{}", name),
385            };
386        }
387    };
388
389    if let Some(ns) = namespace
390        && let Err(e) = purl.with_namespace(ns)
391    {
392        warn!(
393            "Failed to set namespace '{}' for swift dependency '{}': {}",
394            ns, name, e
395        );
396    }
397
398    if is_pinned
399        && let Some(v) = version
400        && let Err(e) = purl.with_version(v)
401    {
402        warn!(
403            "Failed to set version '{}' for swift dependency '{}': {}",
404            v, name, e
405        );
406    }
407
408    purl.to_string()
409}
410
411fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
412    name.as_ref().and_then(|name| {
413        let mut package_url =
414            match PackageUrl::new(SwiftManifestJsonParser::PACKAGE_TYPE.as_str(), name) {
415                Ok(p) => p,
416                Err(e) => {
417                    warn!(
418                        "Failed to create PackageUrl for swift package '{}': {}",
419                        name, e
420                    );
421                    return None;
422                }
423            };
424
425        if let Some(v) = version
426            && let Err(e) = package_url.with_version(v)
427        {
428            warn!(
429                "Failed to set version '{}' for swift package '{}': {}",
430                v, name, e
431            );
432            return None;
433        }
434
435        Some(package_url.to_string())
436    })
437}
438
439/// Invokes `swift package dump-package` to generate Package.swift.json.
440///
441/// Executes the Swift toolchain command to convert Package.swift into JSON format.
442///
443/// This function is used internally by `dump_package_cached()` to generate JSON
444/// from raw Package.swift files. It requires the Swift toolchain to be installed
445/// and available on PATH.
446///
447/// # Arguments
448/// * `package_dir` - Directory containing Package.swift
449///
450/// # Returns
451/// * `Ok(String)` - JSON string output from swift command
452/// * `Err(String)` - Error message if Swift toolchain unavailable or command fails
453///
454/// # Note
455/// This function is public for testing purposes but is not intended for direct use.
456/// Use `dump_package_cached()` instead for automatic caching.
457pub fn invoke_swift_dump_package(package_dir: &Path) -> Result<String, String> {
458    let output = Command::new("swift")
459        .args(["package", "dump-package"])
460        .current_dir(package_dir)
461        .output()
462        .map_err(|e| {
463            format!(
464                "Failed to execute 'swift package dump-package' in {:?}: {}. \
465                 Is the Swift toolchain installed and available on PATH?",
466                package_dir, e
467            )
468        })?;
469
470    if !output.status.success() {
471        let stderr = String::from_utf8_lossy(&output.stderr);
472        return Err(format!(
473            "'swift package dump-package' failed in {:?} (exit code: {:?}): {}",
474            package_dir,
475            output.status.code(),
476            stderr.trim()
477        ));
478    }
479
480    String::from_utf8(output.stdout)
481        .map_err(|e| format!("swift dump-package output is not valid UTF-8: {}", e))
482}
483
484/// Generates or retrieves cached Package.swift.json using BLAKE3 content hashing.
485///
486/// This is the primary entry point for converting raw Package.swift files to JSON.
487/// It implements a content-based caching strategy where the cache key is the BLAKE3
488/// hash of the Package.swift file contents.
489///
490/// # Caching Strategy
491///
492/// 1. **Cache Key**: BLAKE3 hash of Package.swift content (not file path)
493/// 2. **Cache Hit**: Returns cached JSON (<1ms)
494/// 3. **Cache Miss**: Executes `swift package dump-package`, validates JSON, caches result
495/// 4. **Cache Location**: System cache directory (e.g., ~/.cache/provenant/swift/)
496///
497/// # Performance
498/// - **Cache hit**: <1ms (single file read)
499/// - **Cache miss**: ~100-500ms (Swift toolchain execution + validation + cache write)
500///
501/// # Error Handling
502///
503/// Returns `Err` if:
504/// - Package.swift file cannot be read
505/// - Swift toolchain not installed or not on PATH
506/// - `swift package dump-package` command fails
507/// - Output is not valid JSON
508/// - Cannot determine or create cache directory
509///
510/// Cache write failures are logged but not returned as errors (graceful degradation).
511///
512/// # Arguments
513/// * `package_swift_path` - Path to Package.swift file
514///
515/// # Returns
516/// * `Ok(String)` - Valid JSON string (from cache or freshly generated)
517/// * `Err(String)` - Error message with context
518///
519/// # Example
520/// ```ignore
521/// use std::path::Path;
522///
523/// let swift_path = Path::new("path/to/Package.swift");
524/// match dump_package_cached(swift_path) {
525///     Ok(json) => println!("Got JSON: {}", json),
526///     Err(e) => eprintln!("Swift toolchain unavailable: {}", e),
527/// }
528/// ```
529pub fn dump_package_cached(package_swift_path: &Path) -> Result<String, String> {
530    let content = fs::read_to_string(package_swift_path).map_err(|e| {
531        format!(
532            "Failed to read Package.swift at {:?}: {}",
533            package_swift_path, e
534        )
535    })?;
536
537    let hash = blake3::hash(content.as_bytes()).to_hex().to_string();
538
539    let cache_dir = get_cache_dir()?;
540    let cache_file = cache_dir.join(format!("{}.json", hash));
541
542    if cache_file.exists() {
543        match fs::read_to_string(&cache_file) {
544            Ok(cached) => return Ok(cached),
545            Err(e) => {
546                warn!(
547                    "Failed to read cache file {:?}, regenerating: {}",
548                    cache_file, e
549                );
550            }
551        }
552    }
553
554    let parent_dir = package_swift_path.parent().ok_or_else(|| {
555        format!(
556            "Cannot determine parent directory of {:?}",
557            package_swift_path
558        )
559    })?;
560
561    let json_output = invoke_swift_dump_package(parent_dir)?;
562
563    serde_json::from_str::<Value>(&json_output)
564        .map_err(|e| format!("swift dump-package produced invalid JSON: {}", e))?;
565
566    if let Err(e) = write_cache_file(&cache_file, &json_output) {
567        warn!("Failed to write cache file {:?}: {}", cache_file, e);
568    }
569
570    Ok(json_output)
571}
572
573fn get_cache_dir() -> Result<std::path::PathBuf, String> {
574    let base = dirs_cache_dir().ok_or("Cannot determine cache directory")?;
575    let cache_dir = base.join("provenant").join("swift");
576
577    fs::create_dir_all(&cache_dir)
578        .map_err(|e| format!("Failed to create cache directory {:?}: {}", cache_dir, e))?;
579
580    Ok(cache_dir)
581}
582
583fn dirs_cache_dir() -> Option<std::path::PathBuf> {
584    if let Ok(xdg) = std::env::var("XDG_CACHE_HOME") {
585        let path = std::path::PathBuf::from(xdg);
586        if path.is_absolute() {
587            return Some(path);
588        }
589    }
590
591    home_dir().map(|home| {
592        if cfg!(target_os = "macos") {
593            home.join("Library").join("Caches")
594        } else {
595            home.join(".cache")
596        }
597    })
598}
599
600fn home_dir() -> Option<std::path::PathBuf> {
601    std::env::var("HOME")
602        .ok()
603        .map(std::path::PathBuf::from)
604        .filter(|p| p.is_absolute())
605}
606
607fn write_cache_file(path: &Path, content: &str) -> Result<(), String> {
608    let parent = path.parent().ok_or("Cache file has no parent directory")?;
609
610    // Write to temp then rename for atomicity
611    let temp_path = parent.join(format!(
612        ".tmp-{}-{}",
613        std::process::id(),
614        path.file_name().and_then(|n| n.to_str()).unwrap_or("cache")
615    ));
616
617    let mut file = fs::File::create(&temp_path)
618        .map_err(|e| format!("Failed to create temp file {:?}: {}", temp_path, e))?;
619
620    file.write_all(content.as_bytes())
621        .map_err(|e| format!("Failed to write temp file {:?}: {}", temp_path, e))?;
622
623    fs::rename(&temp_path, path).map_err(|e| {
624        let _ = fs::remove_file(&temp_path);
625        format!(
626            "Failed to rename temp file {:?} to {:?}: {}",
627            temp_path, path, e
628        )
629    })?;
630
631    Ok(())
632}
633
634fn default_package_data() -> PackageData {
635    PackageData::default()
636}
637
638crate::register_parser!(
639    "Swift Package Manager manifest (Package.swift, Package.swift.json, Package.swift.deplock)",
640    &[
641        "**/Package.swift",
642        "**/Package.swift.json",
643        "**/Package.swift.deplock"
644    ],
645    "swift",
646    "Swift",
647    Some("https://docs.swift.org/package-manager/PackageDescription/PackageDescription.html"),
648);