Skip to main content

pacha/
uri.rs

1//! URI scheme for model references
2//!
3//! Supports multiple URI schemes for model resolution:
4//!
5//! - `pacha://model-name:version` - Pacha registry (local or remote)
6//! - `pacha://model-name:latest` - Latest version
7//! - `pacha://model-name@sha256:abc123` - Content-addressed
8//! - `pacha://model-name:production` - Stage alias
9//! - `file://./model.gguf` - Local file
10//! - `hf://meta-llama/Llama-3-8B` - HuggingFace Hub
11//!
12//! # Example
13//!
14//! ```
15//! use pacha::uri::{ModelUri, UriScheme};
16//!
17//! let uri = ModelUri::parse("pacha://llama3:8b-q4").unwrap();
18//! assert_eq!(uri.scheme, UriScheme::Pacha);
19//! assert_eq!(uri.name, "llama3");
20//! assert_eq!(uri.version.as_deref(), Some("8b-q4"));
21//! ```
22
23use crate::error::{PachaError, Result};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26use std::path::PathBuf;
27use std::str::FromStr;
28
29/// URI scheme for model references
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
31pub enum UriScheme {
32    /// Pacha registry (local or remote)
33    Pacha,
34    /// Local filesystem
35    File,
36    /// HuggingFace Hub
37    HuggingFace,
38}
39
40impl fmt::Display for UriScheme {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        match self {
43            Self::Pacha => write!(f, "pacha"),
44            Self::File => write!(f, "file"),
45            Self::HuggingFace => write!(f, "hf"),
46        }
47    }
48}
49
50impl FromStr for UriScheme {
51    type Err = PachaError;
52
53    fn from_str(s: &str) -> Result<Self> {
54        match s.to_lowercase().as_str() {
55            "pacha" => Ok(Self::Pacha),
56            "file" => Ok(Self::File),
57            "hf" | "huggingface" => Ok(Self::HuggingFace),
58            _ => Err(PachaError::InvalidUri(format!("Unknown scheme: {s}"))),
59        }
60    }
61}
62
63/// Version reference type
64#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
65pub enum VersionRef {
66    /// Semantic version (e.g., "1.0.0")
67    Version(String),
68    /// Tag (e.g., "latest", "production", "8b-q4")
69    Tag(String),
70    /// Content hash (e.g., "sha256:abc123")
71    Hash(String),
72}
73
74impl fmt::Display for VersionRef {
75    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76        match self {
77            Self::Version(v) | Self::Tag(v) => write!(f, "{v}"),
78            Self::Hash(h) => write!(f, "@{h}"),
79        }
80    }
81}
82
83/// Parsed model URI
84#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
85pub struct ModelUri {
86    /// URI scheme
87    pub scheme: UriScheme,
88    /// Model name or path
89    pub name: String,
90    /// Version reference (semantic version, tag, or hash)
91    pub version: Option<String>,
92    /// Content hash for content-addressed references
93    pub hash: Option<String>,
94    /// Remote registry host (if specified)
95    pub host: Option<String>,
96    /// File path within model (for HuggingFace URIs)
97    pub path: Option<String>,
98}
99
100impl ModelUri {
101    /// Parse a URI string into a `ModelUri`
102    ///
103    /// # Supported formats
104    ///
105    /// - `pacha://model:version`
106    /// - `pacha://host/model:version`
107    /// - `pacha://model@sha256:hash`
108    /// - `file://path/to/model.gguf`
109    /// - `hf://org/model`
110    /// - `model:version` (assumes pacha://)
111    /// - `./model.gguf` (assumes file://)
112    pub fn parse(uri: &str) -> Result<Self> {
113        let uri = uri.trim();
114
115        // Parse scheme://rest first
116        let (scheme, rest) = if let Some(idx) = uri.find("://") {
117            let scheme_str = &uri[..idx];
118            let rest = &uri[idx + 3..];
119            (UriScheme::from_str(scheme_str)?, rest)
120        } else if uri.starts_with("./") || uri.starts_with('/') {
121            // Bare relative or absolute paths - assume file://
122            return Ok(Self {
123                scheme: UriScheme::File,
124                name: uri.to_string(),
125                version: None,
126                hash: None,
127                host: None,
128                path: None,
129            });
130        } else if uri.ends_with(".gguf") || uri.ends_with(".safetensors") || uri.ends_with(".apr") {
131            // Bare model files - assume file://
132            return Ok(Self {
133                scheme: UriScheme::File,
134                name: uri.to_string(),
135                version: None,
136                hash: None,
137                host: None,
138                path: None,
139            });
140        } else if uri.contains(':') && !uri.contains('/') {
141            // Bare model:version format - assume pacha
142            (UriScheme::Pacha, uri)
143        } else {
144            return Err(PachaError::InvalidUri(format!("Cannot parse URI: {uri}")));
145        };
146
147        match scheme {
148            UriScheme::File => Self::parse_file_uri(rest),
149            UriScheme::HuggingFace => Self::parse_hf_uri(rest),
150            UriScheme::Pacha => Self::parse_pacha_uri(rest),
151        }
152    }
153
154    fn parse_file_uri(path: &str) -> Result<Self> {
155        Ok(Self {
156            scheme: UriScheme::File,
157            name: path.to_string(),
158            version: None,
159            hash: None,
160            host: None,
161            path: None,
162        })
163    }
164
165    fn parse_hf_uri(input: &str) -> Result<Self> {
166        // Formats:
167        // - hf://org/model
168        // - hf://org/model:revision
169        // - hf://org/model/path/to/file
170        // - hf://org/model:revision/path/to/file
171
172        // First, separate the model identifier from any path
173        // The model id is always "org/model" (exactly two segments)
174        let parts: Vec<&str> = input.splitn(3, '/').collect();
175
176        if parts.len() < 2 {
177            return Err(PachaError::InvalidUri(format!(
178                "HuggingFace URI must have format org/model: {}",
179                input
180            )));
181        }
182
183        let org = parts[0];
184        let model_and_rest = parts[1];
185
186        // Check for version in model part (e.g., "model:revision")
187        let (model, version) = if let Some(idx) = model_and_rest.find(':') {
188            (&model_and_rest[..idx], Some(model_and_rest[idx + 1..].to_string()))
189        } else {
190            (model_and_rest, None)
191        };
192
193        // Build the model name (org/model)
194        let name = format!("{org}/{model}");
195
196        // Get file path if present (third segment and beyond)
197        let file_path = if parts.len() > 2 { Some(parts[2].to_string()) } else { None };
198
199        Ok(Self {
200            scheme: UriScheme::HuggingFace,
201            name,
202            version,
203            hash: None,
204            host: None,
205            path: file_path,
206        })
207    }
208
209    fn parse_pacha_uri(rest: &str) -> Result<Self> {
210        // Check for host: pacha://host/model:version
211        let (host, model_part) = if let Some(idx) = rest.find('/') {
212            (Some(rest[..idx].to_string()), &rest[idx + 1..])
213        } else {
214            (None, rest)
215        };
216
217        // Check for hash: model@sha256:abc123
218        let (name_version, hash) = if let Some(idx) = model_part.find('@') {
219            let hash_part = &model_part[idx + 1..];
220            (&model_part[..idx], Some(hash_part.to_string()))
221        } else {
222            (model_part, None)
223        };
224
225        // Split name:version
226        let (name, version) = if let Some(idx) = name_version.rfind(':') {
227            (name_version[..idx].to_string(), Some(name_version[idx + 1..].to_string()))
228        } else {
229            (name_version.to_string(), None)
230        };
231
232        if name.is_empty() {
233            return Err(PachaError::InvalidUri("Empty model name".to_string()));
234        }
235
236        Ok(Self { scheme: UriScheme::Pacha, name, version, hash, host, path: None })
237    }
238
239    /// Check if this is a local file reference
240    pub fn is_local_file(&self) -> bool {
241        self.scheme == UriScheme::File
242    }
243
244    /// Check if this is a remote reference
245    pub fn is_remote(&self) -> bool {
246        self.host.is_some() || self.scheme == UriScheme::HuggingFace
247    }
248
249    /// Get the local file path (if scheme is File)
250    pub fn as_path(&self) -> Option<PathBuf> {
251        if self.scheme == UriScheme::File {
252            Some(PathBuf::from(&self.name))
253        } else {
254            None
255        }
256    }
257
258    /// Get version or default to "latest"
259    pub fn version_or_latest(&self) -> &str {
260        self.version.as_deref().unwrap_or("latest")
261    }
262}
263
264impl fmt::Display for ModelUri {
265    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266        write!(f, "{}://", self.scheme)?;
267
268        if let Some(ref host) = self.host {
269            write!(f, "{host}/")?;
270        }
271
272        write!(f, "{}", self.name)?;
273
274        if let Some(ref hash) = self.hash {
275            write!(f, "@{hash}")?;
276        } else if let Some(ref version) = self.version {
277            write!(f, ":{version}")?;
278        }
279
280        if let Some(ref path) = self.path {
281            write!(f, "/{path}")?;
282        }
283
284        Ok(())
285    }
286}
287
288impl FromStr for ModelUri {
289    type Err = PachaError;
290
291    fn from_str(s: &str) -> Result<Self> {
292        Self::parse(s)
293    }
294}
295
296// ============================================================================
297// TESTS - EXTREME TDD
298// ============================================================================
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    // -------------------------------------------------------------------------
305    // UriScheme Tests
306    // -------------------------------------------------------------------------
307
308    #[test]
309    fn test_uri_scheme_from_str() {
310        assert_eq!(UriScheme::from_str("pacha").unwrap(), UriScheme::Pacha);
311        assert_eq!(UriScheme::from_str("PACHA").unwrap(), UriScheme::Pacha);
312        assert_eq!(UriScheme::from_str("file").unwrap(), UriScheme::File);
313        assert_eq!(UriScheme::from_str("hf").unwrap(), UriScheme::HuggingFace);
314        assert_eq!(UriScheme::from_str("huggingface").unwrap(), UriScheme::HuggingFace);
315    }
316
317    #[test]
318    fn test_uri_scheme_from_str_invalid() {
319        assert!(UriScheme::from_str("unknown").is_err());
320        assert!(UriScheme::from_str("").is_err());
321    }
322
323    #[test]
324    fn test_uri_scheme_display() {
325        assert_eq!(UriScheme::Pacha.to_string(), "pacha");
326        assert_eq!(UriScheme::File.to_string(), "file");
327        assert_eq!(UriScheme::HuggingFace.to_string(), "hf");
328    }
329
330    // -------------------------------------------------------------------------
331    // ModelUri Pacha Scheme Tests
332    // -------------------------------------------------------------------------
333
334    #[test]
335    fn test_parse_pacha_simple() {
336        let uri = ModelUri::parse("pacha://llama3:8b").unwrap();
337        assert_eq!(uri.scheme, UriScheme::Pacha);
338        assert_eq!(uri.name, "llama3");
339        assert_eq!(uri.version.as_deref(), Some("8b"));
340        assert!(uri.host.is_none());
341        assert!(uri.hash.is_none());
342    }
343
344    #[test]
345    fn test_parse_pacha_with_host() {
346        let uri = ModelUri::parse("pacha://registry.example.com/llama3:1.0.0").unwrap();
347        assert_eq!(uri.scheme, UriScheme::Pacha);
348        assert_eq!(uri.host.as_deref(), Some("registry.example.com"));
349        assert_eq!(uri.name, "llama3");
350        assert_eq!(uri.version.as_deref(), Some("1.0.0"));
351    }
352
353    #[test]
354    fn test_parse_pacha_with_hash() {
355        let uri = ModelUri::parse("pacha://llama3@sha256:abc123def").unwrap();
356        assert_eq!(uri.scheme, UriScheme::Pacha);
357        assert_eq!(uri.name, "llama3");
358        assert_eq!(uri.hash.as_deref(), Some("sha256:abc123def"));
359        assert!(uri.version.is_none());
360    }
361
362    #[test]
363    fn test_parse_pacha_no_version() {
364        let uri = ModelUri::parse("pacha://llama3").unwrap();
365        assert_eq!(uri.name, "llama3");
366        assert!(uri.version.is_none());
367        assert_eq!(uri.version_or_latest(), "latest");
368    }
369
370    #[test]
371    fn test_parse_pacha_stage_tag() {
372        let uri = ModelUri::parse("pacha://fraud-detector:production").unwrap();
373        assert_eq!(uri.name, "fraud-detector");
374        assert_eq!(uri.version.as_deref(), Some("production"));
375    }
376
377    #[test]
378    fn test_parse_bare_model_version() {
379        let uri = ModelUri::parse("llama3:8b-q4").unwrap();
380        assert_eq!(uri.scheme, UriScheme::Pacha);
381        assert_eq!(uri.name, "llama3");
382        assert_eq!(uri.version.as_deref(), Some("8b-q4"));
383    }
384
385    // -------------------------------------------------------------------------
386    // ModelUri File Scheme Tests
387    // -------------------------------------------------------------------------
388
389    #[test]
390    fn test_parse_file_uri() {
391        let uri = ModelUri::parse("file://./model.gguf").unwrap();
392        assert_eq!(uri.scheme, UriScheme::File);
393        assert_eq!(uri.name, "./model.gguf");
394        assert!(uri.is_local_file());
395    }
396
397    #[test]
398    fn test_parse_file_absolute() {
399        let uri = ModelUri::parse("file:///home/user/model.gguf").unwrap();
400        assert_eq!(uri.scheme, UriScheme::File);
401        assert_eq!(uri.name, "/home/user/model.gguf");
402    }
403
404    #[test]
405    fn test_parse_bare_relative_path() {
406        let uri = ModelUri::parse("./models/llama.gguf").unwrap();
407        assert_eq!(uri.scheme, UriScheme::File);
408        assert_eq!(uri.name, "./models/llama.gguf");
409    }
410
411    #[test]
412    fn test_parse_bare_absolute_path() {
413        let uri = ModelUri::parse("/opt/models/llama.gguf").unwrap();
414        assert_eq!(uri.scheme, UriScheme::File);
415        assert_eq!(uri.name, "/opt/models/llama.gguf");
416    }
417
418    #[test]
419    fn test_parse_bare_gguf_file() {
420        let uri = ModelUri::parse("model.gguf").unwrap();
421        assert_eq!(uri.scheme, UriScheme::File);
422        assert_eq!(uri.name, "model.gguf");
423    }
424
425    #[test]
426    fn test_as_path() {
427        let uri = ModelUri::parse("file://./model.gguf").unwrap();
428        assert_eq!(uri.as_path(), Some(PathBuf::from("./model.gguf")));
429
430        let uri = ModelUri::parse("pacha://llama3:8b").unwrap();
431        assert_eq!(uri.as_path(), None);
432    }
433
434    // -------------------------------------------------------------------------
435    // ModelUri HuggingFace Scheme Tests
436    // -------------------------------------------------------------------------
437
438    #[test]
439    fn test_parse_hf_uri() {
440        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B").unwrap();
441        assert_eq!(uri.scheme, UriScheme::HuggingFace);
442        assert_eq!(uri.name, "meta-llama/Llama-3-8B");
443        assert!(uri.version.is_none());
444        assert!(uri.is_remote());
445    }
446
447    #[test]
448    fn test_parse_hf_uri_with_revision() {
449        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B:main").unwrap();
450        assert_eq!(uri.scheme, UriScheme::HuggingFace);
451        assert_eq!(uri.name, "meta-llama/Llama-3-8B");
452        assert_eq!(uri.version.as_deref(), Some("main"));
453    }
454
455    #[test]
456    fn test_parse_hf_uri_with_path() {
457        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B/config.json").unwrap();
458        assert_eq!(uri.scheme, UriScheme::HuggingFace);
459        assert_eq!(uri.name, "meta-llama/Llama-3-8B");
460        assert_eq!(uri.path.as_deref(), Some("config.json"));
461        assert!(uri.version.is_none());
462    }
463
464    #[test]
465    fn test_parse_hf_uri_with_revision_and_path() {
466        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B:v2.0/model.safetensors").unwrap();
467        assert_eq!(uri.scheme, UriScheme::HuggingFace);
468        assert_eq!(uri.name, "meta-llama/Llama-3-8B");
469        assert_eq!(uri.version.as_deref(), Some("v2.0"));
470        assert_eq!(uri.path.as_deref(), Some("model.safetensors"));
471    }
472
473    #[test]
474    fn test_parse_hf_uri_invalid_format() {
475        // Missing org
476        assert!(ModelUri::parse("hf://model").is_err());
477    }
478
479    // -------------------------------------------------------------------------
480    // ModelUri Display Tests
481    // -------------------------------------------------------------------------
482
483    #[test]
484    fn test_display_pacha() {
485        let uri = ModelUri::parse("pacha://llama3:8b").unwrap();
486        assert_eq!(uri.to_string(), "pacha://llama3:8b");
487    }
488
489    #[test]
490    fn test_display_pacha_with_host() {
491        let uri = ModelUri::parse("pacha://registry.example.com/llama3:1.0.0").unwrap();
492        assert_eq!(uri.to_string(), "pacha://registry.example.com/llama3:1.0.0");
493    }
494
495    #[test]
496    fn test_display_pacha_with_hash() {
497        let uri = ModelUri::parse("pacha://llama3@sha256:abc123").unwrap();
498        assert_eq!(uri.to_string(), "pacha://llama3@sha256:abc123");
499    }
500
501    #[test]
502    fn test_display_file() {
503        let uri = ModelUri::parse("file://./model.gguf").unwrap();
504        assert_eq!(uri.to_string(), "file://./model.gguf");
505    }
506
507    #[test]
508    fn test_display_hf() {
509        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B").unwrap();
510        assert_eq!(uri.to_string(), "hf://meta-llama/Llama-3-8B");
511    }
512
513    #[test]
514    fn test_display_hf_with_path() {
515        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B/config.json").unwrap();
516        assert_eq!(uri.to_string(), "hf://meta-llama/Llama-3-8B/config.json");
517    }
518
519    #[test]
520    fn test_display_hf_with_revision_and_path() {
521        let uri = ModelUri::parse("hf://meta-llama/Llama-3-8B:v2.0/model.safetensors").unwrap();
522        assert_eq!(uri.to_string(), "hf://meta-llama/Llama-3-8B:v2.0/model.safetensors");
523    }
524
525    // -------------------------------------------------------------------------
526    // ModelUri Error Cases
527    // -------------------------------------------------------------------------
528
529    #[test]
530    fn test_parse_empty_name() {
531        assert!(ModelUri::parse("pacha://:8b").is_err());
532    }
533
534    #[test]
535    fn test_parse_unknown_scheme() {
536        assert!(ModelUri::parse("unknown://model").is_err());
537    }
538
539    // -------------------------------------------------------------------------
540    // Roundtrip Tests
541    // -------------------------------------------------------------------------
542
543    #[test]
544    fn test_roundtrip_pacha() {
545        let original = "pacha://llama3:8b-q4";
546        let uri = ModelUri::parse(original).unwrap();
547        assert_eq!(uri.to_string(), original);
548    }
549
550    #[test]
551    fn test_roundtrip_file() {
552        let original = "file:///opt/models/llama.gguf";
553        let uri = ModelUri::parse(original).unwrap();
554        assert_eq!(uri.to_string(), original);
555    }
556
557    #[test]
558    fn test_roundtrip_hf() {
559        let original = "hf://meta-llama/Llama-3-8B:main";
560        let uri = ModelUri::parse(original).unwrap();
561        assert_eq!(uri.to_string(), original);
562    }
563
564    // -------------------------------------------------------------------------
565    // FromStr Trait Tests
566    // -------------------------------------------------------------------------
567
568    #[test]
569    fn test_from_str_trait() {
570        let uri: ModelUri = "pacha://llama3:8b".parse().unwrap();
571        assert_eq!(uri.name, "llama3");
572    }
573
574    // -------------------------------------------------------------------------
575    // Property Tests
576    // -------------------------------------------------------------------------
577
578    #[test]
579    fn test_is_local_file() {
580        assert!(ModelUri::parse("file://./model.gguf").unwrap().is_local_file());
581        assert!(ModelUri::parse("./model.gguf").unwrap().is_local_file());
582        assert!(!ModelUri::parse("pacha://llama3:8b").unwrap().is_local_file());
583        assert!(!ModelUri::parse("hf://meta-llama/Llama-3").unwrap().is_local_file());
584    }
585
586    #[test]
587    fn test_is_remote() {
588        assert!(ModelUri::parse("hf://meta-llama/Llama-3").unwrap().is_remote());
589        assert!(ModelUri::parse("pacha://registry.example.com/llama3:8b").unwrap().is_remote());
590        assert!(!ModelUri::parse("pacha://llama3:8b").unwrap().is_remote());
591        assert!(!ModelUri::parse("file://./model.gguf").unwrap().is_remote());
592    }
593}