Skip to main content

provenant/parsers/
swift_manifest_json.rs

1//! Parser for Swift Package Manager manifest files.
2//!
3//! Supports three input formats:
4//! - `Package.swift.json` - Pre-generated JSON (recommended for CI/CD)
5//! - `Package.swift.deplock` - DepLock JSON format
6//! - `Package.swift` - Raw Swift source (auto-generates JSON with caching)
7//!
8//! # Automatic JSON Generation (Enhancement over Python ScanCode)
9//!
10//! This Rust implementation includes automatic JSON generation from raw `Package.swift`
11//! files, which is an enhancement beyond the Python ScanCode reference implementation.
12//!
13//! **Python ScanCode behavior**: Requires users to manually run:
14//! ```bash
15//! swift package dump-package > Package.swift.json
16//! ```
17//!
18//! **Rust ScanCode behavior**: Automatically generates JSON when Swift toolchain available,
19//! with BLAKE3-based caching for performance.
20//!
21//! ## Design Decision: Graceful Degradation
22//!
23//! - **Swift toolchain available**: Automatically generates + caches JSON (~200ms first, <1ms cached)
24//! - **Swift toolchain unavailable**: Warns and skips file (no crash, CI/CD unaffected)
25//! - **Pre-generated JSON**: Always works, regardless of Swift availability
26//!
27//! This design allows:
28//! - ✅ Better UX for developers with Swift installed
29//! - ✅ No CI/CD complications (tests don't require Swift)
30//! - ✅ Backward compatibility (pre-generated JSON workflow unchanged)
31//! - ✅ Feature parity maintained (Python behavior is subset of Rust behavior)
32
33use std::collections::HashMap;
34use std::fs;
35use std::io::Write as _;
36use std::path::Path;
37use std::process::Command;
38
39use log::warn;
40use packageurl::PackageUrl;
41use serde_json::Value;
42
43use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
44
45use super::PackageParser;
46
47/// Parses Swift Package Manager manifest files with automatic JSON generation.
48///
49/// # Supported File Formats
50/// - `Package.swift.json` - Pre-generated JSON from `swift package dump-package`
51/// - `Package.swift.deplock` - JSON format from DepLock tool
52/// - `Package.swift` - Raw Swift source (auto-generates JSON if Swift available)
53///
54/// # Automatic JSON Generation
55///
56/// When scanning raw `Package.swift` files:
57/// 1. Checks BLAKE3-based cache for previously generated JSON
58/// 2. If cache miss, invokes `swift package dump-package` (requires Swift toolchain)
59/// 3. Caches result for future scans
60/// 4. Falls back gracefully if Swift unavailable (logs warning, returns empty package data)
61///
62/// # Performance
63/// - **Pre-generated JSON**: <1ms (direct file read)
64/// - **Raw Package.swift (cached)**: <1ms (cache hit)
65/// - **Raw Package.swift (first time)**: ~100-500ms (Swift toolchain execution + cache write)
66/// - **Raw Package.swift (no Swift)**: <1ms (immediate fallback)
67///
68/// # Example
69/// ```no_run
70/// use provenant::parsers::{PackageParser, SwiftManifestJsonParser};
71/// use std::path::Path;
72///
73/// // Works with pre-generated JSON
74/// let json_path = Path::new("Package.swift.json");
75/// let data1 = SwiftManifestJsonParser::extract_first_package(json_path);
76///
77/// // Also works with raw Package.swift (if Swift installed)
78/// let swift_path = Path::new("Package.swift");
79/// let data2 = SwiftManifestJsonParser::extract_first_package(swift_path);
80/// ```
81pub struct SwiftManifestJsonParser;
82
83impl PackageParser for SwiftManifestJsonParser {
84    const PACKAGE_TYPE: PackageType = PackageType::Swift;
85
86    fn extract_packages(path: &Path) -> Vec<PackageData> {
87        let filename = path.file_name().and_then(|n| n.to_str());
88
89        let is_json_file = filename
90            .map(|n| n.ends_with(".swift.json") || n.ends_with(".swift.deplock"))
91            .unwrap_or(false);
92        let is_raw_swift = filename.map(|n| n == "Package.swift").unwrap_or(false);
93
94        vec![if is_json_file {
95            let json_content = match read_swift_manifest_json(path) {
96                Ok(content) => content,
97                Err(e) => {
98                    warn!(
99                        "Failed to read or parse Swift manifest JSON at {:?}: {}",
100                        path, e
101                    );
102                    return vec![default_package_data()];
103                }
104            };
105            parse_swift_manifest(&json_content)
106        } else if is_raw_swift {
107            match dump_package_cached(path) {
108                Ok(json_str) => match serde_json::from_str::<Value>(&json_str) {
109                    Ok(json) => parse_swift_manifest(&json),
110                    Err(e) => {
111                        warn!(
112                            "Swift toolchain generated invalid JSON for {:?}: {}",
113                            path, e
114                        );
115                        default_package_data()
116                    }
117                },
118                Err(e) => {
119                    warn!(
120                        "Cannot auto-generate Package.swift.json for {:?}: {}. \
121                             Swift toolchain may not be installed. \
122                             To scan this file, manually run: swift package dump-package > Package.swift.json",
123                        path, e
124                    );
125                    default_package_data()
126                }
127            }
128        } else {
129            default_package_data()
130        }]
131    }
132
133    fn is_match(path: &Path) -> bool {
134        path.file_name()
135            .and_then(|name| name.to_str())
136            .is_some_and(|name| {
137                name.ends_with(".swift.json")
138                    || name.ends_with(".swift.deplock")
139                    || name == "Package.swift"
140            })
141    }
142}
143
144fn read_swift_manifest_json(path: &Path) -> Result<Value, String> {
145    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
146
147    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
148}
149
150fn parse_swift_manifest(manifest: &Value) -> PackageData {
151    let name = manifest
152        .get("name")
153        .and_then(|v| v.as_str())
154        .map(String::from);
155
156    let dependencies = get_dependencies(manifest.get("dependencies"));
157    let platforms = manifest.get("platforms").cloned();
158
159    let tools_version = manifest
160        .get("toolsVersion")
161        .and_then(|tv| tv.get("_version"))
162        .and_then(|v| v.as_str())
163        .map(String::from);
164
165    let mut extra_data = HashMap::new();
166    if let Some(platforms_val) = platforms {
167        extra_data.insert("platforms".to_string(), platforms_val);
168    }
169    if let Some(ref tv) = tools_version {
170        extra_data.insert(
171            "swift_tools_version".to_string(),
172            serde_json::Value::String(tv.clone()),
173        );
174    }
175
176    let purl = create_package_url(&name, &None);
177
178    PackageData {
179        package_type: Some(SwiftManifestJsonParser::PACKAGE_TYPE),
180        namespace: None,
181        name,
182        version: None,
183        qualifiers: None,
184        subpath: None,
185        primary_language: Some("Swift".to_string()),
186        description: None,
187        release_date: None,
188        parties: Vec::new(),
189        keywords: Vec::new(),
190        homepage_url: None,
191        download_url: None,
192        size: None,
193        sha1: None,
194        md5: None,
195        sha256: None,
196        sha512: None,
197        bug_tracking_url: None,
198        code_view_url: None,
199        vcs_url: None,
200        copyright: None,
201        holder: None,
202        declared_license_expression: None,
203        declared_license_expression_spdx: None,
204        license_detections: Vec::new(),
205        other_license_expression: None,
206        other_license_expression_spdx: None,
207        other_license_detections: Vec::new(),
208        extracted_license_statement: None,
209        notice_text: None,
210        source_packages: Vec::new(),
211        file_references: Vec::new(),
212        is_private: false,
213        is_virtual: false,
214        extra_data: if extra_data.is_empty() {
215            None
216        } else {
217            Some(extra_data)
218        },
219        dependencies,
220        repository_homepage_url: None,
221        repository_download_url: None,
222        api_data_url: None,
223        datasource_id: Some(DatasourceId::SwiftPackageManifestJson),
224        purl,
225    }
226}
227
228fn get_dependencies(dependencies: Option<&Value>) -> Vec<Dependency> {
229    let Some(deps_array) = dependencies.and_then(|v| v.as_array()) else {
230        return Vec::new();
231    };
232
233    let mut dependent_packages = Vec::new();
234
235    for dependency in deps_array {
236        if let Some(dep) = parse_manifest_dependency(dependency) {
237            dependent_packages.push(dep);
238        }
239    }
240
241    dependent_packages
242}
243
244fn parse_manifest_dependency(dependency: &Value) -> Option<Dependency> {
245    if let Some(source_control) = dependency.get("sourceControl").and_then(|v| v.as_array())
246        && let Some(source) = source_control.first()
247    {
248        let identity = source
249            .get("identity")
250            .and_then(|v| v.as_str())
251            .unwrap_or_default();
252
253        let (namespace, dep_name) = extract_namespace_and_name(source, identity);
254        let (version, is_pinned, requirement_kind) = extract_version_requirement(source);
255        let purl = create_dependency_purl(&namespace, &dep_name, &version, is_pinned);
256        let mut extra_data = HashMap::from([
257            (
258                "dependency_kind".to_string(),
259                serde_json::Value::String("sourceControl".to_string()),
260            ),
261            (
262                "requirement_kind".to_string(),
263                serde_json::Value::String(requirement_kind.to_string()),
264            ),
265        ]);
266        if let Some(remote) = source
267            .get("location")
268            .and_then(|loc| loc.get("remote"))
269            .and_then(|remote| remote.as_array())
270            .and_then(|arr| arr.first())
271            .and_then(|first| first.get("urlString"))
272            .and_then(|v| v.as_str())
273        {
274            extra_data.insert(
275                "location".to_string(),
276                serde_json::Value::String(remote.to_string()),
277            );
278        }
279
280        return Some(Dependency {
281            purl: Some(purl),
282            extracted_requirement: version,
283            scope: Some("dependencies".to_string()),
284            is_runtime: None,
285            is_optional: Some(false),
286            is_pinned: Some(is_pinned),
287            is_direct: Some(true),
288            resolved_package: None,
289            extra_data: Some(extra_data),
290        });
291    }
292
293    if let Some(file_system) = dependency.get("fileSystem").and_then(|v| v.as_array())
294        && let Some(source) = file_system.first()
295    {
296        let identity = source
297            .get("identity")
298            .and_then(|v| v.as_str())
299            .or_else(|| source.get("name").and_then(|v| v.as_str()))
300            .unwrap_or_default();
301        if identity.is_empty() {
302            return None;
303        }
304
305        let dep_name = identity.to_string();
306        let purl = create_dependency_purl(&None, &dep_name, &None, false);
307        let mut extra_data = HashMap::from([(
308            "dependency_kind".to_string(),
309            serde_json::Value::String("fileSystem".to_string()),
310        )]);
311        if let Some(path) = source.get("path").and_then(|v| v.as_str()) {
312            extra_data.insert(
313                "path".to_string(),
314                serde_json::Value::String(path.to_string()),
315            );
316        }
317
318        return Some(Dependency {
319            purl: Some(purl),
320            extracted_requirement: None,
321            scope: Some("dependencies".to_string()),
322            is_runtime: None,
323            is_optional: Some(false),
324            is_pinned: Some(false),
325            is_direct: Some(true),
326            resolved_package: None,
327            extra_data: Some(extra_data),
328        });
329    }
330
331    None
332}
333
334fn extract_namespace_and_name(source: &Value, identity: &str) -> (Option<String>, String) {
335    let url = source
336        .get("location")
337        .and_then(|loc| loc.get("remote"))
338        .and_then(|remote| remote.as_array())
339        .and_then(|arr| arr.first())
340        .and_then(|first| first.get("urlString"))
341        .and_then(|v| v.as_str());
342
343    match url {
344        Some(url_str) => get_namespace_and_name(url_str),
345        None => (None, identity.to_string()),
346    }
347}
348
349/// Parses a repository URL into (namespace, name).
350///
351/// Example: `https://github.com/apple/swift-argument-parser.git`
352/// yields namespace=`"github.com/apple"`, name=`"swift-argument-parser"`
353pub fn get_namespace_and_name(url: &str) -> (Option<String>, String) {
354    let (hostname, path) = if let Some(stripped) = url.strip_prefix("https://") {
355        let rest = stripped.trim_end_matches('/');
356        match rest.find('/') {
357            Some(idx) => (Some(&rest[..idx]), &rest[idx + 1..]),
358            None => (Some(rest), ""),
359        }
360    } else if let Some(stripped) = url.strip_prefix("http://") {
361        let rest = stripped.trim_end_matches('/');
362        match rest.find('/') {
363            Some(idx) => (Some(&rest[..idx]), &rest[idx + 1..]),
364            None => (Some(rest), ""),
365        }
366    } else {
367        (None, url)
368    };
369
370    let clean_path = path
371        .strip_suffix(".git")
372        .unwrap_or(path)
373        .trim_end_matches('/');
374
375    if let Some(host) = hostname {
376        let canonical = format!("{}/{}", host, clean_path);
377        match canonical.rsplit_once('/') {
378            Some((ns, name)) => (Some(ns.to_string()), name.to_string()),
379            None => (None, canonical),
380        }
381    } else {
382        match clean_path.rsplit_once('/') {
383            Some((ns, name)) => (Some(ns.to_string()), name.to_string()),
384            None => (None, clean_path.to_string()),
385        }
386    }
387}
388
389/// Handles four requirement types:
390/// - `exact`: `["1.0.0"]` -> version="1.0.0", is_pinned=true
391/// - `range`: `[{"lowerBound": "1.0.0", "upperBound": "2.0.0"}]` -> version="vers:swift/>=1.0.0|<2.0.0", is_pinned=false
392/// - `branch`: `["main"]` -> version="main", is_pinned=false
393/// - `revision`: `["abc123"]` -> version="abc123", is_pinned=true
394fn extract_version_requirement(source: &Value) -> (Option<String>, bool, &'static str) {
395    let Some(requirement) = source.get("requirement") else {
396        return (None, false, "unknown");
397    };
398
399    if let Some(exact) = requirement.get("exact").and_then(|v| v.as_array())
400        && let Some(version) = exact.first().and_then(|v| v.as_str())
401    {
402        return (Some(version.to_string()), true, "exact");
403    }
404
405    if let Some(range) = requirement.get("range").and_then(|v| v.as_array())
406        && let Some(bound) = range.first()
407    {
408        let lower = bound.get("lowerBound").and_then(|v| v.as_str());
409        let upper = bound.get("upperBound").and_then(|v| v.as_str());
410        if let (Some(lb), Some(ub)) = (lower, upper) {
411            let vers = format!("vers:swift/>={lb}|<{ub}");
412            return (Some(vers), false, "range");
413        }
414    }
415
416    if let Some(branch) = requirement.get("branch").and_then(|v| v.as_array())
417        && let Some(branch_name) = branch.first().and_then(|v| v.as_str())
418    {
419        return (Some(branch_name.to_string()), false, "branch");
420    }
421
422    if let Some(revision) = requirement.get("revision").and_then(|v| v.as_array())
423        && let Some(rev) = revision.first().and_then(|v| v.as_str())
424    {
425        return (Some(rev.to_string()), true, "revision");
426    }
427
428    (None, false, "unknown")
429}
430
431fn create_dependency_purl(
432    namespace: &Option<String>,
433    name: &str,
434    version: &Option<String>,
435    is_pinned: bool,
436) -> String {
437    let mut purl = match PackageUrl::new(SwiftManifestJsonParser::PACKAGE_TYPE.as_str(), name) {
438        Ok(p) => p,
439        Err(e) => {
440            warn!(
441                "Failed to create PackageUrl for swift dependency '{}': {}",
442                name, e
443            );
444            return match (namespace, is_pinned.then_some(version.as_deref()).flatten()) {
445                (Some(ns), Some(v)) => format!("pkg:swift/{}/{}@{}", ns, name, v),
446                (Some(ns), None) => format!("pkg:swift/{}/{}", ns, name),
447                (None, Some(v)) => format!("pkg:swift/{}@{}", name, v),
448                (None, None) => format!("pkg:swift/{}", name),
449            };
450        }
451    };
452
453    if let Some(ns) = namespace
454        && let Err(e) = purl.with_namespace(ns)
455    {
456        warn!(
457            "Failed to set namespace '{}' for swift dependency '{}': {}",
458            ns, name, e
459        );
460    }
461
462    if is_pinned
463        && let Some(v) = version
464        && let Err(e) = purl.with_version(v)
465    {
466        warn!(
467            "Failed to set version '{}' for swift dependency '{}': {}",
468            v, name, e
469        );
470    }
471
472    purl.to_string()
473}
474
475fn create_package_url(name: &Option<String>, version: &Option<String>) -> Option<String> {
476    name.as_ref().and_then(|name| {
477        let mut package_url =
478            match PackageUrl::new(SwiftManifestJsonParser::PACKAGE_TYPE.as_str(), name) {
479                Ok(p) => p,
480                Err(e) => {
481                    warn!(
482                        "Failed to create PackageUrl for swift package '{}': {}",
483                        name, e
484                    );
485                    return None;
486                }
487            };
488
489        if let Some(v) = version
490            && let Err(e) = package_url.with_version(v)
491        {
492            warn!(
493                "Failed to set version '{}' for swift package '{}': {}",
494                v, name, e
495            );
496            return None;
497        }
498
499        Some(package_url.to_string())
500    })
501}
502
503/// Invokes `swift package dump-package` to generate Package.swift.json.
504///
505/// Executes the Swift toolchain command to convert Package.swift into JSON format.
506///
507/// This function is used internally by `dump_package_cached()` to generate JSON
508/// from raw Package.swift files. It requires the Swift toolchain to be installed
509/// and available on PATH.
510///
511/// # Arguments
512/// * `package_dir` - Directory containing Package.swift
513///
514/// # Returns
515/// * `Ok(String)` - JSON string output from swift command
516/// * `Err(String)` - Error message if Swift toolchain unavailable or command fails
517///
518/// # Note
519/// This function is public for testing purposes but is not intended for direct use.
520/// Use `dump_package_cached()` instead for automatic caching.
521pub fn invoke_swift_dump_package(package_dir: &Path) -> Result<String, String> {
522    let output = Command::new("swift")
523        .args(["package", "dump-package"])
524        .current_dir(package_dir)
525        .output()
526        .map_err(|e| {
527            format!(
528                "Failed to execute 'swift package dump-package' in {:?}: {}. \
529                 Is the Swift toolchain installed and available on PATH?",
530                package_dir, e
531            )
532        })?;
533
534    if !output.status.success() {
535        let stderr = String::from_utf8_lossy(&output.stderr);
536        return Err(format!(
537            "'swift package dump-package' failed in {:?} (exit code: {:?}): {}",
538            package_dir,
539            output.status.code(),
540            stderr.trim()
541        ));
542    }
543
544    String::from_utf8(output.stdout)
545        .map_err(|e| format!("swift dump-package output is not valid UTF-8: {}", e))
546}
547
548/// Generates or retrieves cached Package.swift.json using BLAKE3 content hashing.
549///
550/// This is the primary entry point for converting raw Package.swift files to JSON.
551/// It implements a content-based caching strategy where the cache key is the BLAKE3
552/// hash of the Package.swift file contents.
553///
554/// # Caching Strategy
555///
556/// 1. **Cache Key**: BLAKE3 hash of Package.swift content (not file path)
557/// 2. **Cache Hit**: Returns cached JSON (<1ms)
558/// 3. **Cache Miss**: Executes `swift package dump-package`, validates JSON, caches result
559/// 4. **Cache Location**: System cache directory (e.g., ~/.cache/provenant/swift/)
560///
561/// # Performance
562/// - **Cache hit**: <1ms (single file read)
563/// - **Cache miss**: ~100-500ms (Swift toolchain execution + validation + cache write)
564///
565/// # Error Handling
566///
567/// Returns `Err` if:
568/// - Package.swift file cannot be read
569/// - Swift toolchain not installed or not on PATH
570/// - `swift package dump-package` command fails
571/// - Output is not valid JSON
572/// - Cannot determine or create cache directory
573///
574/// Cache write failures are logged but not returned as errors (graceful degradation).
575///
576/// # Arguments
577/// * `package_swift_path` - Path to Package.swift file
578///
579/// # Returns
580/// * `Ok(String)` - Valid JSON string (from cache or freshly generated)
581/// * `Err(String)` - Error message with context
582///
583/// # Example
584/// ```ignore
585/// use std::path::Path;
586///
587/// let swift_path = Path::new("path/to/Package.swift");
588/// match dump_package_cached(swift_path) {
589///     Ok(json) => println!("Got JSON: {}", json),
590///     Err(e) => eprintln!("Swift toolchain unavailable: {}", e),
591/// }
592/// ```
593pub fn dump_package_cached(package_swift_path: &Path) -> Result<String, String> {
594    let content = fs::read_to_string(package_swift_path).map_err(|e| {
595        format!(
596            "Failed to read Package.swift at {:?}: {}",
597            package_swift_path, e
598        )
599    })?;
600
601    let hash = blake3::hash(content.as_bytes()).to_hex().to_string();
602
603    let cache_dir = get_cache_dir()?;
604    let cache_file = cache_dir.join(format!("{}.json", hash));
605
606    if cache_file.exists() {
607        match fs::read_to_string(&cache_file) {
608            Ok(cached) => return Ok(cached),
609            Err(e) => {
610                warn!(
611                    "Failed to read cache file {:?}, regenerating: {}",
612                    cache_file, e
613                );
614            }
615        }
616    }
617
618    let parent_dir = package_swift_path.parent().ok_or_else(|| {
619        format!(
620            "Cannot determine parent directory of {:?}",
621            package_swift_path
622        )
623    })?;
624
625    let json_output = invoke_swift_dump_package(parent_dir)?;
626
627    serde_json::from_str::<Value>(&json_output)
628        .map_err(|e| format!("swift dump-package produced invalid JSON: {}", e))?;
629
630    if let Err(e) = write_cache_file(&cache_file, &json_output) {
631        warn!("Failed to write cache file {:?}: {}", cache_file, e);
632    }
633
634    Ok(json_output)
635}
636
637fn get_cache_dir() -> Result<std::path::PathBuf, String> {
638    let base = dirs_cache_dir().ok_or("Cannot determine cache directory")?;
639    let cache_dir = base.join("provenant").join("swift");
640
641    fs::create_dir_all(&cache_dir)
642        .map_err(|e| format!("Failed to create cache directory {:?}: {}", cache_dir, e))?;
643
644    Ok(cache_dir)
645}
646
647fn dirs_cache_dir() -> Option<std::path::PathBuf> {
648    if let Ok(xdg) = std::env::var("XDG_CACHE_HOME") {
649        let path = std::path::PathBuf::from(xdg);
650        if path.is_absolute() {
651            return Some(path);
652        }
653    }
654
655    home_dir().map(|home| {
656        if cfg!(target_os = "macos") {
657            home.join("Library").join("Caches")
658        } else {
659            home.join(".cache")
660        }
661    })
662}
663
664fn home_dir() -> Option<std::path::PathBuf> {
665    std::env::var("HOME")
666        .ok()
667        .map(std::path::PathBuf::from)
668        .filter(|p| p.is_absolute())
669}
670
671fn write_cache_file(path: &Path, content: &str) -> Result<(), String> {
672    let parent = path.parent().ok_or("Cache file has no parent directory")?;
673
674    // Write to temp then rename for atomicity
675    let temp_path = parent.join(format!(
676        ".tmp-{}-{}",
677        std::process::id(),
678        path.file_name().and_then(|n| n.to_str()).unwrap_or("cache")
679    ));
680
681    let mut file = fs::File::create(&temp_path)
682        .map_err(|e| format!("Failed to create temp file {:?}: {}", temp_path, e))?;
683
684    file.write_all(content.as_bytes())
685        .map_err(|e| format!("Failed to write temp file {:?}: {}", temp_path, e))?;
686
687    fs::rename(&temp_path, path).map_err(|e| {
688        let _ = fs::remove_file(&temp_path);
689        format!(
690            "Failed to rename temp file {:?} to {:?}: {}",
691            temp_path, path, e
692        )
693    })?;
694
695    Ok(())
696}
697
698fn default_package_data() -> PackageData {
699    PackageData::default()
700}
701
702crate::register_parser!(
703    "Swift Package Manager manifest (Package.swift, Package.swift.json, Package.swift.deplock)",
704    &[
705        "**/Package.swift",
706        "**/Package.swift.json",
707        "**/Package.swift.deplock"
708    ],
709    "swift",
710    "Swift",
711    Some("https://docs.swift.org/package-manager/PackageDescription/PackageDescription.html"),
712);