Skip to main content

pacha/
catalog.rs

1//! Model Catalog and Discovery
2//!
3//! Unified interface for discovering models across local and remote registries.
4//!
5//! ## Features
6//!
7//! - Search across multiple sources (local, remote, HuggingFace)
8//! - Filter by model type, size, task, quantization
9//! - Sort by popularity, size, date, name
10//! - Cached catalog for fast local queries
11//!
12//! ## Example
13//!
14//! ```rust,ignore
15//! use pacha::catalog::{ModelCatalog, SearchQuery};
16//!
17//! let catalog = ModelCatalog::new()
18//!     .with_local_registry(registry)
19//!     .with_remote_registry(remote);
20//!
21//! let results = catalog.search(
22//!     SearchQuery::new()
23//!         .with_text("llama")
24//!         .with_task(Task::TextGeneration)
25//!         .with_max_size_gb(8.0)
26//! ).await?;
27//! ```
28
29use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31
32// ============================================================================
33// CAT-001: Catalog Entry
34// ============================================================================
35
36/// A model entry in the catalog
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct CatalogEntry {
39    /// Unique identifier
40    pub id: String,
41    /// Model name
42    pub name: String,
43    /// Model version
44    pub version: String,
45    /// Source (local, remote, huggingface)
46    pub source: ModelSource,
47    /// Model size in bytes
48    pub size_bytes: u64,
49    /// Task category
50    pub task: Option<Task>,
51    /// Architecture (e.g., "llama", "mistral", "phi")
52    pub architecture: Option<String>,
53    /// Quantization type if quantized
54    pub quantization: Option<String>,
55    /// Context length
56    pub context_length: Option<u32>,
57    /// Number of parameters (approximate)
58    pub parameters: Option<u64>,
59    /// License
60    pub license: Option<String>,
61    /// Description
62    pub description: Option<String>,
63    /// Tags for filtering
64    pub tags: Vec<String>,
65    /// Download count (for popularity)
66    pub downloads: u64,
67    /// Last updated timestamp
68    pub updated_at: Option<String>,
69    /// URI for accessing the model
70    pub uri: String,
71}
72
73impl CatalogEntry {
74    /// Create a new catalog entry
75    #[must_use]
76    pub fn new(name: impl Into<String>, version: impl Into<String>, source: ModelSource) -> Self {
77        let name = name.into();
78        let version = version.into();
79        let uri = match source {
80            ModelSource::Local => format!("pacha://{name}:{version}"),
81            ModelSource::Remote { ref host } => format!("pacha://{host}/{name}:{version}"),
82            ModelSource::HuggingFace => format!("hf://{name}"),
83        };
84
85        Self {
86            id: format!("{name}:{version}"),
87            name,
88            version,
89            source,
90            size_bytes: 0,
91            task: None,
92            architecture: None,
93            quantization: None,
94            context_length: None,
95            parameters: None,
96            license: None,
97            description: None,
98            tags: Vec::new(),
99            downloads: 0,
100            updated_at: None,
101            uri,
102        }
103    }
104
105    /// Set size in bytes
106    #[must_use]
107    pub fn with_size(mut self, bytes: u64) -> Self {
108        self.size_bytes = bytes;
109        self
110    }
111
112    /// Set task
113    #[must_use]
114    pub fn with_task(mut self, task: Task) -> Self {
115        self.task = Some(task);
116        self
117    }
118
119    /// Set architecture
120    #[must_use]
121    pub fn with_architecture(mut self, arch: impl Into<String>) -> Self {
122        self.architecture = Some(arch.into());
123        self
124    }
125
126    /// Set quantization
127    #[must_use]
128    pub fn with_quantization(mut self, quant: impl Into<String>) -> Self {
129        self.quantization = Some(quant.into());
130        self
131    }
132
133    /// Set context length
134    #[must_use]
135    pub fn with_context_length(mut self, length: u32) -> Self {
136        self.context_length = Some(length);
137        self
138    }
139
140    /// Set parameters
141    #[must_use]
142    pub fn with_parameters(mut self, params: u64) -> Self {
143        self.parameters = Some(params);
144        self
145    }
146
147    /// Set license
148    #[must_use]
149    pub fn with_license(mut self, license: impl Into<String>) -> Self {
150        self.license = Some(license.into());
151        self
152    }
153
154    /// Set description
155    #[must_use]
156    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
157        self.description = Some(desc.into());
158        self
159    }
160
161    /// Add tag
162    #[must_use]
163    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
164        self.tags.push(tag.into());
165        self
166    }
167
168    /// Add multiple tags
169    #[must_use]
170    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
171        self.tags.extend(tags);
172        self
173    }
174
175    /// Set downloads
176    #[must_use]
177    pub fn with_downloads(mut self, downloads: u64) -> Self {
178        self.downloads = downloads;
179        self
180    }
181
182    /// Get size in GB
183    #[must_use]
184    pub fn size_gb(&self) -> f64 {
185        self.size_bytes as f64 / (1024.0 * 1024.0 * 1024.0)
186    }
187
188    /// Check if model matches a text query
189    #[must_use]
190    pub fn matches_text(&self, query: &str) -> bool {
191        let query = query.to_lowercase();
192        self.name.to_lowercase().contains(&query)
193            || self.description.as_ref().is_some_and(|d| d.to_lowercase().contains(&query))
194            || self.tags.iter().any(|t| t.to_lowercase().contains(&query))
195            || self.architecture.as_ref().is_some_and(|a| a.to_lowercase().contains(&query))
196    }
197}
198
199/// Model source
200#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
201pub enum ModelSource {
202    /// Local Pacha registry
203    Local,
204    /// Remote Pacha registry
205    Remote {
206        /// Registry host
207        host: String,
208    },
209    /// HuggingFace Hub
210    HuggingFace,
211}
212
213/// Task category
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
215pub enum Task {
216    /// Text generation (LLMs)
217    TextGeneration,
218    /// Text classification
219    TextClassification,
220    /// Question answering
221    QuestionAnswering,
222    /// Summarization
223    Summarization,
224    /// Translation
225    Translation,
226    /// Image classification
227    ImageClassification,
228    /// Object detection
229    ObjectDetection,
230    /// Image generation
231    ImageGeneration,
232    /// Speech recognition
233    SpeechRecognition,
234    /// Text to speech
235    TextToSpeech,
236    /// Embedding generation
237    Embedding,
238    /// Code generation
239    CodeGeneration,
240    /// Multi-modal
241    MultiModal,
242    /// Other/unknown
243    Other,
244}
245
246impl Task {
247    /// Get display name
248    #[must_use]
249    pub const fn display_name(&self) -> &'static str {
250        match self {
251            Self::TextGeneration => "Text Generation",
252            Self::TextClassification => "Text Classification",
253            Self::QuestionAnswering => "Question Answering",
254            Self::Summarization => "Summarization",
255            Self::Translation => "Translation",
256            Self::ImageClassification => "Image Classification",
257            Self::ObjectDetection => "Object Detection",
258            Self::ImageGeneration => "Image Generation",
259            Self::SpeechRecognition => "Speech Recognition",
260            Self::TextToSpeech => "Text to Speech",
261            Self::Embedding => "Embedding",
262            Self::CodeGeneration => "Code Generation",
263            Self::MultiModal => "Multi-Modal",
264            Self::Other => "Other",
265        }
266    }
267}
268
269// ============================================================================
270// CAT-002: Search Query
271// ============================================================================
272
273/// Search query for catalog
274#[derive(Debug, Clone, Default)]
275pub struct SearchQuery {
276    /// Text search (name, description, tags)
277    pub text: Option<String>,
278    /// Filter by task
279    pub task: Option<Task>,
280    /// Filter by source
281    pub source: Option<ModelSource>,
282    /// Filter by architecture
283    pub architecture: Option<String>,
284    /// Filter by quantization
285    pub quantization: Option<String>,
286    /// Maximum size in GB
287    pub max_size_gb: Option<f64>,
288    /// Minimum size in GB
289    pub min_size_gb: Option<f64>,
290    /// Minimum context length
291    pub min_context_length: Option<u32>,
292    /// Filter by license
293    pub license: Option<String>,
294    /// Filter by tags (any match)
295    pub tags: Vec<String>,
296    /// Sort order
297    pub sort: SortOrder,
298    /// Maximum results
299    pub limit: usize,
300    /// Offset for pagination
301    pub offset: usize,
302}
303
304impl SearchQuery {
305    /// Create a new search query
306    #[must_use]
307    pub fn new() -> Self {
308        Self { limit: 50, ..Default::default() }
309    }
310
311    /// Set text search
312    #[must_use]
313    pub fn with_text(mut self, text: impl Into<String>) -> Self {
314        self.text = Some(text.into());
315        self
316    }
317
318    /// Filter by task
319    #[must_use]
320    pub fn with_task(mut self, task: Task) -> Self {
321        self.task = Some(task);
322        self
323    }
324
325    /// Filter by source
326    #[must_use]
327    pub fn with_source(mut self, source: ModelSource) -> Self {
328        self.source = Some(source);
329        self
330    }
331
332    /// Filter by architecture
333    #[must_use]
334    pub fn with_architecture(mut self, arch: impl Into<String>) -> Self {
335        self.architecture = Some(arch.into());
336        self
337    }
338
339    /// Filter by quantization
340    #[must_use]
341    pub fn with_quantization(mut self, quant: impl Into<String>) -> Self {
342        self.quantization = Some(quant.into());
343        self
344    }
345
346    /// Set maximum size
347    #[must_use]
348    pub fn with_max_size_gb(mut self, gb: f64) -> Self {
349        self.max_size_gb = Some(gb);
350        self
351    }
352
353    /// Set minimum size
354    #[must_use]
355    pub fn with_min_size_gb(mut self, gb: f64) -> Self {
356        self.min_size_gb = Some(gb);
357        self
358    }
359
360    /// Set minimum context length
361    #[must_use]
362    pub fn with_min_context_length(mut self, length: u32) -> Self {
363        self.min_context_length = Some(length);
364        self
365    }
366
367    /// Filter by license
368    #[must_use]
369    pub fn with_license(mut self, license: impl Into<String>) -> Self {
370        self.license = Some(license.into());
371        self
372    }
373
374    /// Add tag filter
375    #[must_use]
376    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
377        self.tags.push(tag.into());
378        self
379    }
380
381    /// Set sort order
382    #[must_use]
383    pub fn with_sort(mut self, sort: SortOrder) -> Self {
384        self.sort = sort;
385        self
386    }
387
388    /// Set result limit
389    #[must_use]
390    pub fn with_limit(mut self, limit: usize) -> Self {
391        self.limit = limit;
392        self
393    }
394
395    /// Set offset for pagination
396    #[must_use]
397    pub fn with_offset(mut self, offset: usize) -> Self {
398        self.offset = offset;
399        self
400    }
401
402    /// Check if an entry matches this query
403    #[must_use]
404    pub fn matches(&self, entry: &CatalogEntry) -> bool {
405        self.matches_text_filter(entry)
406            && self.matches_task_filter(entry)
407            && self.matches_source_filter(entry)
408            && self.matches_arch_filter(entry)
409            && self.matches_quant_filter(entry)
410            && self.matches_size_filter(entry)
411            && self.matches_context_filter(entry)
412            && self.matches_license_filter(entry)
413            && self.matches_tag_filter(entry)
414    }
415
416    fn matches_text_filter(&self, entry: &CatalogEntry) -> bool {
417        self.text.as_ref().map_or(true, |text| entry.matches_text(text))
418    }
419
420    fn matches_task_filter(&self, entry: &CatalogEntry) -> bool {
421        self.task.map_or(true, |task| entry.task == Some(task))
422    }
423
424    fn matches_source_filter(&self, entry: &CatalogEntry) -> bool {
425        self.source.as_ref().map_or(true, |source| &entry.source == source)
426    }
427
428    fn matches_arch_filter(&self, entry: &CatalogEntry) -> bool {
429        self.architecture.as_ref().map_or(true, |arch| entry.architecture.as_ref() == Some(arch))
430    }
431
432    fn matches_quant_filter(&self, entry: &CatalogEntry) -> bool {
433        self.quantization.as_ref().map_or(true, |quant| entry.quantization.as_ref() == Some(quant))
434    }
435
436    fn matches_size_filter(&self, entry: &CatalogEntry) -> bool {
437        let within_max = self.max_size_gb.map_or(true, |max| entry.size_gb() <= max);
438        let within_min = self.min_size_gb.map_or(true, |min| entry.size_gb() >= min);
439        within_max && within_min
440    }
441
442    fn matches_context_filter(&self, entry: &CatalogEntry) -> bool {
443        self.min_context_length.map_or(true, |min_ctx| entry.context_length.unwrap_or(0) >= min_ctx)
444    }
445
446    fn matches_license_filter(&self, entry: &CatalogEntry) -> bool {
447        self.license.as_ref().map_or(true, |lic| entry.license.as_ref() == Some(lic))
448    }
449
450    fn matches_tag_filter(&self, entry: &CatalogEntry) -> bool {
451        self.tags.is_empty() || self.tags.iter().any(|t| entry.tags.contains(t))
452    }
453}
454
455/// Sort order for search results
456#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
457pub enum SortOrder {
458    /// Sort by name (alphabetical)
459    #[default]
460    Name,
461    /// Sort by downloads (most popular first)
462    Downloads,
463    /// Sort by size (smallest first)
464    SizeAsc,
465    /// Sort by size (largest first)
466    SizeDesc,
467    /// Sort by date (newest first)
468    DateDesc,
469    /// Sort by date (oldest first)
470    DateAsc,
471    /// Sort by parameters (smallest first)
472    ParametersAsc,
473    /// Sort by parameters (largest first)
474    ParametersDesc,
475}
476
477// ============================================================================
478// CAT-003: Search Results
479// ============================================================================
480
481/// Search results
482#[derive(Debug, Clone, Serialize, Deserialize)]
483pub struct SearchResults {
484    /// Matching entries
485    pub entries: Vec<CatalogEntry>,
486    /// Total matches (before pagination)
487    pub total: usize,
488    /// Query offset
489    pub offset: usize,
490    /// Query limit
491    pub limit: usize,
492}
493
494impl SearchResults {
495    /// Create new search results
496    #[must_use]
497    pub fn new(entries: Vec<CatalogEntry>, total: usize, offset: usize, limit: usize) -> Self {
498        Self { entries, total, offset, limit }
499    }
500
501    /// Check if there are more results
502    #[must_use]
503    pub fn has_more(&self) -> bool {
504        self.offset + self.entries.len() < self.total
505    }
506
507    /// Get next page offset
508    #[must_use]
509    pub fn next_offset(&self) -> Option<usize> {
510        if self.has_more() {
511            Some(self.offset + self.limit)
512        } else {
513            None
514        }
515    }
516}
517
518// ============================================================================
519// CAT-004: Model Catalog
520// ============================================================================
521
522/// Unified model catalog
523#[derive(Debug, Default)]
524pub struct ModelCatalog {
525    /// In-memory cache of catalog entries
526    entries: Vec<CatalogEntry>,
527    /// Index by name for fast lookup
528    by_name: HashMap<String, Vec<usize>>,
529    /// Index by source
530    by_source: HashMap<String, Vec<usize>>,
531}
532
533impl ModelCatalog {
534    /// Create a new empty catalog
535    #[must_use]
536    pub fn new() -> Self {
537        Self::default()
538    }
539
540    /// Add an entry to the catalog
541    pub fn add(&mut self, entry: CatalogEntry) {
542        let idx = self.entries.len();
543
544        // Index by name
545        self.by_name.entry(entry.name.clone()).or_default().push(idx);
546
547        // Index by source
548        let source_key = match &entry.source {
549            ModelSource::Local => "local".to_string(),
550            ModelSource::Remote { host } => format!("remote:{host}"),
551            ModelSource::HuggingFace => "huggingface".to_string(),
552        };
553        self.by_source.entry(source_key).or_default().push(idx);
554
555        self.entries.push(entry);
556    }
557
558    /// Get total number of entries
559    #[must_use]
560    pub fn len(&self) -> usize {
561        self.entries.len()
562    }
563
564    /// Check if catalog is empty
565    #[must_use]
566    pub fn is_empty(&self) -> bool {
567        self.entries.is_empty()
568    }
569
570    /// Get entry by index
571    #[must_use]
572    pub fn get(&self, idx: usize) -> Option<&CatalogEntry> {
573        self.entries.get(idx)
574    }
575
576    /// Get entries by name
577    #[must_use]
578    pub fn get_by_name(&self, name: &str) -> Vec<&CatalogEntry> {
579        self.by_name
580            .get(name)
581            .map(|indices| indices.iter().filter_map(|&i| self.entries.get(i)).collect())
582            .unwrap_or_default()
583    }
584
585    /// Search the catalog
586    #[must_use]
587    pub fn search(&self, query: &SearchQuery) -> SearchResults {
588        // Filter
589        let mut matches: Vec<&CatalogEntry> =
590            self.entries.iter().filter(|e| query.matches(e)).collect();
591
592        let total = matches.len();
593
594        // Sort
595        match query.sort {
596            SortOrder::Name => matches.sort_by(|a, b| a.name.cmp(&b.name)),
597            SortOrder::Downloads => matches.sort_by(|a, b| b.downloads.cmp(&a.downloads)),
598            SortOrder::SizeAsc => matches.sort_by(|a, b| a.size_bytes.cmp(&b.size_bytes)),
599            SortOrder::SizeDesc => matches.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes)),
600            SortOrder::DateDesc => {
601                matches.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
602            }
603            SortOrder::DateAsc => {
604                matches.sort_by(|a, b| a.updated_at.cmp(&b.updated_at));
605            }
606            SortOrder::ParametersAsc => {
607                matches.sort_by(|a, b| a.parameters.cmp(&b.parameters));
608            }
609            SortOrder::ParametersDesc => {
610                matches.sort_by(|a, b| b.parameters.cmp(&a.parameters));
611            }
612        }
613
614        // Paginate
615        let entries: Vec<CatalogEntry> =
616            matches.into_iter().skip(query.offset).take(query.limit).cloned().collect();
617
618        SearchResults::new(entries, total, query.offset, query.limit)
619    }
620
621    /// List all unique architectures
622    #[must_use]
623    pub fn architectures(&self) -> Vec<String> {
624        let mut archs: Vec<_> =
625            self.entries.iter().filter_map(|e| e.architecture.clone()).collect();
626        archs.sort();
627        archs.dedup();
628        archs
629    }
630
631    /// List all unique tags
632    #[must_use]
633    pub fn tags(&self) -> Vec<String> {
634        let mut tags: Vec<_> = self.entries.iter().flat_map(|e| e.tags.clone()).collect();
635        tags.sort();
636        tags.dedup();
637        tags
638    }
639
640    /// List all unique licenses
641    #[must_use]
642    pub fn licenses(&self) -> Vec<String> {
643        let mut licenses: Vec<_> = self.entries.iter().filter_map(|e| e.license.clone()).collect();
644        licenses.sort();
645        licenses.dedup();
646        licenses
647    }
648
649    /// Get statistics
650    #[must_use]
651    pub fn stats(&self) -> CatalogStats {
652        let total_models = self.entries.len();
653        let total_size: u64 = self.entries.iter().map(|e| e.size_bytes).sum();
654
655        let local_count = self.by_source.get("local").map_or(0, Vec::len);
656        let hf_count = self.by_source.get("huggingface").map_or(0, Vec::len);
657        let remote_count = total_models - local_count - hf_count;
658
659        let by_task: HashMap<String, usize> = {
660            let mut map = HashMap::new();
661            for entry in &self.entries {
662                if let Some(task) = entry.task {
663                    *map.entry(task.display_name().to_string()).or_insert(0) += 1;
664                }
665            }
666            map
667        };
668
669        CatalogStats {
670            total_models,
671            total_size_bytes: total_size,
672            local_count,
673            remote_count,
674            huggingface_count: hf_count,
675            by_task,
676            unique_architectures: self.architectures().len(),
677            unique_tags: self.tags().len(),
678        }
679    }
680}
681
682/// Catalog statistics
683#[derive(Debug, Clone, Serialize, Deserialize)]
684pub struct CatalogStats {
685    /// Total number of models
686    pub total_models: usize,
687    /// Total size in bytes
688    pub total_size_bytes: u64,
689    /// Local models count
690    pub local_count: usize,
691    /// Remote models count
692    pub remote_count: usize,
693    /// HuggingFace models count
694    pub huggingface_count: usize,
695    /// Models by task
696    pub by_task: HashMap<String, usize>,
697    /// Number of unique architectures
698    pub unique_architectures: usize,
699    /// Number of unique tags
700    pub unique_tags: usize,
701}
702
703impl CatalogStats {
704    /// Get total size in GB
705    #[must_use]
706    pub fn total_size_gb(&self) -> f64 {
707        self.total_size_bytes as f64 / (1024.0 * 1024.0 * 1024.0)
708    }
709}
710
711// ============================================================================
712// Tests
713// ============================================================================
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    // ========================================================================
720    // CAT-001: Entry Tests
721    // ========================================================================
722
723    #[test]
724    fn test_catalog_entry_new() {
725        let entry = CatalogEntry::new("llama3", "8b", ModelSource::Local);
726        assert_eq!(entry.name, "llama3");
727        assert_eq!(entry.version, "8b");
728        assert_eq!(entry.uri, "pacha://llama3:8b");
729    }
730
731    #[test]
732    fn test_catalog_entry_builder() {
733        let entry = CatalogEntry::new("mistral", "7b-q4", ModelSource::Local)
734            .with_size(4_000_000_000)
735            .with_task(Task::TextGeneration)
736            .with_architecture("mistral")
737            .with_quantization("Q4_K_M")
738            .with_context_length(8192)
739            .with_parameters(7_000_000_000)
740            .with_license("Apache-2.0")
741            .with_description("Mistral 7B quantized")
742            .with_tag("llm")
743            .with_downloads(10000);
744
745        assert_eq!(entry.size_bytes, 4_000_000_000);
746        assert_eq!(entry.task, Some(Task::TextGeneration));
747        assert_eq!(entry.architecture, Some("mistral".to_string()));
748        assert_eq!(entry.quantization, Some("Q4_K_M".to_string()));
749        assert_eq!(entry.context_length, Some(8192));
750        assert_eq!(entry.downloads, 10000);
751    }
752
753    #[test]
754    fn test_catalog_entry_size_gb() {
755        let entry =
756            CatalogEntry::new("test", "1.0", ModelSource::Local).with_size(4 * 1024 * 1024 * 1024); // 4 GB
757
758        assert!((entry.size_gb() - 4.0).abs() < 0.01);
759    }
760
761    #[test]
762    fn test_catalog_entry_matches_text() {
763        let entry = CatalogEntry::new("llama3-8b", "1.0", ModelSource::Local)
764            .with_description("Meta's Llama 3 model")
765            .with_tag("meta")
766            .with_architecture("llama");
767
768        assert!(entry.matches_text("llama"));
769        assert!(entry.matches_text("LLAMA")); // Case insensitive
770        assert!(entry.matches_text("meta"));
771        assert!(entry.matches_text("Meta's"));
772        assert!(!entry.matches_text("gpt"));
773    }
774
775    #[test]
776    fn test_model_source_remote() {
777        let entry = CatalogEntry::new(
778            "model",
779            "1.0",
780            ModelSource::Remote { host: "registry.example.com".to_string() },
781        );
782        assert_eq!(entry.uri, "pacha://registry.example.com/model:1.0");
783    }
784
785    #[test]
786    fn test_model_source_huggingface() {
787        let entry = CatalogEntry::new("meta-llama/Llama-2-7b", "main", ModelSource::HuggingFace);
788        assert_eq!(entry.uri, "hf://meta-llama/Llama-2-7b");
789    }
790
791    // ========================================================================
792    // CAT-002: Query Tests
793    // ========================================================================
794
795    #[test]
796    fn test_search_query_default() {
797        let query = SearchQuery::new();
798        assert!(query.text.is_none());
799        assert!(query.task.is_none());
800        assert_eq!(query.limit, 50);
801        assert_eq!(query.offset, 0);
802    }
803
804    #[test]
805    fn test_search_query_builder() {
806        let query = SearchQuery::new()
807            .with_text("llama")
808            .with_task(Task::TextGeneration)
809            .with_architecture("llama")
810            .with_max_size_gb(8.0)
811            .with_sort(SortOrder::Downloads)
812            .with_limit(10);
813
814        assert_eq!(query.text, Some("llama".to_string()));
815        assert_eq!(query.task, Some(Task::TextGeneration));
816        assert_eq!(query.architecture, Some("llama".to_string()));
817        assert_eq!(query.max_size_gb, Some(8.0));
818        assert_eq!(query.sort, SortOrder::Downloads);
819        assert_eq!(query.limit, 10);
820    }
821
822    #[test]
823    fn test_search_query_matches_text() {
824        let entry = CatalogEntry::new("llama3", "1.0", ModelSource::Local);
825        let query = SearchQuery::new().with_text("llama");
826
827        assert!(query.matches(&entry));
828
829        let query = SearchQuery::new().with_text("gpt");
830        assert!(!query.matches(&entry));
831    }
832
833    #[test]
834    fn test_search_query_matches_task() {
835        let entry =
836            CatalogEntry::new("test", "1.0", ModelSource::Local).with_task(Task::TextGeneration);
837
838        let query = SearchQuery::new().with_task(Task::TextGeneration);
839        assert!(query.matches(&entry));
840
841        let query = SearchQuery::new().with_task(Task::ImageClassification);
842        assert!(!query.matches(&entry));
843    }
844
845    #[test]
846    fn test_search_query_matches_size() {
847        let entry =
848            CatalogEntry::new("test", "1.0", ModelSource::Local).with_size(4 * 1024 * 1024 * 1024); // 4 GB
849
850        let query = SearchQuery::new().with_max_size_gb(8.0);
851        assert!(query.matches(&entry));
852
853        let query = SearchQuery::new().with_max_size_gb(2.0);
854        assert!(!query.matches(&entry));
855
856        let query = SearchQuery::new().with_min_size_gb(2.0);
857        assert!(query.matches(&entry));
858
859        let query = SearchQuery::new().with_min_size_gb(8.0);
860        assert!(!query.matches(&entry));
861    }
862
863    #[test]
864    fn test_search_query_matches_tags() {
865        let entry =
866            CatalogEntry::new("test", "1.0", ModelSource::Local).with_tag("llm").with_tag("meta");
867
868        let query = SearchQuery::new().with_tag("llm");
869        assert!(query.matches(&entry));
870
871        let query = SearchQuery::new().with_tag("gpt");
872        assert!(!query.matches(&entry));
873    }
874
875    // ========================================================================
876    // CAT-003: Results Tests
877    // ========================================================================
878
879    #[test]
880    fn test_search_results_has_more() {
881        // 100 total, got 10 entries starting at 0 -> more available
882        let entries: Vec<CatalogEntry> = (0..10)
883            .map(|i| CatalogEntry::new(format!("m{i}"), "1.0", ModelSource::Local))
884            .collect();
885        let results = SearchResults::new(entries, 100, 0, 10);
886        assert!(results.has_more());
887
888        // 10 total, got 10 entries starting at 0 -> no more
889        let entries: Vec<CatalogEntry> = (0..10)
890            .map(|i| CatalogEntry::new(format!("m{i}"), "1.0", ModelSource::Local))
891            .collect();
892        let results = SearchResults::new(entries, 10, 0, 10);
893        assert!(!results.has_more());
894    }
895
896    #[test]
897    fn test_search_results_next_offset() {
898        // 100 total, got 10 entries starting at 0 -> next offset is 10
899        let entries: Vec<CatalogEntry> = (0..10)
900            .map(|i| CatalogEntry::new(format!("m{i}"), "1.0", ModelSource::Local))
901            .collect();
902        let results = SearchResults::new(entries, 100, 0, 10);
903        assert_eq!(results.next_offset(), Some(10));
904
905        // 100 total, got 10 entries starting at 90 -> no next (at end)
906        let entries: Vec<CatalogEntry> = (0..10)
907            .map(|i| CatalogEntry::new(format!("m{i}"), "1.0", ModelSource::Local))
908            .collect();
909        let results = SearchResults::new(entries, 100, 90, 10);
910        assert_eq!(results.next_offset(), None);
911    }
912
913    // ========================================================================
914    // CAT-004: Catalog Tests
915    // ========================================================================
916
917    #[test]
918    fn test_catalog_new() {
919        let catalog = ModelCatalog::new();
920        assert!(catalog.is_empty());
921        assert_eq!(catalog.len(), 0);
922    }
923
924    #[test]
925    fn test_catalog_add() {
926        let mut catalog = ModelCatalog::new();
927        catalog.add(CatalogEntry::new("llama3", "8b", ModelSource::Local));
928
929        assert_eq!(catalog.len(), 1);
930        assert!(!catalog.is_empty());
931    }
932
933    #[test]
934    fn test_catalog_get_by_name() {
935        let mut catalog = ModelCatalog::new();
936        catalog.add(CatalogEntry::new("llama3", "8b", ModelSource::Local));
937        catalog.add(CatalogEntry::new("llama3", "70b", ModelSource::Local));
938        catalog.add(CatalogEntry::new("mistral", "7b", ModelSource::Local));
939
940        let entries = catalog.get_by_name("llama3");
941        assert_eq!(entries.len(), 2);
942
943        let entries = catalog.get_by_name("gpt");
944        assert!(entries.is_empty());
945    }
946
947    #[test]
948    fn test_catalog_search() {
949        let mut catalog = ModelCatalog::new();
950        catalog.add(
951            CatalogEntry::new("llama3-8b", "1.0", ModelSource::Local)
952                .with_task(Task::TextGeneration)
953                .with_downloads(1000),
954        );
955        catalog.add(
956            CatalogEntry::new("llama3-70b", "1.0", ModelSource::Local)
957                .with_task(Task::TextGeneration)
958                .with_downloads(500),
959        );
960        catalog.add(
961            CatalogEntry::new("clip", "1.0", ModelSource::Local)
962                .with_task(Task::ImageClassification),
963        );
964
965        // Search by text
966        let results = catalog.search(&SearchQuery::new().with_text("llama"));
967        assert_eq!(results.total, 2);
968
969        // Search by task
970        let results = catalog.search(&SearchQuery::new().with_task(Task::TextGeneration));
971        assert_eq!(results.total, 2);
972
973        // Sort by downloads
974        let results =
975            catalog.search(&SearchQuery::new().with_text("llama").with_sort(SortOrder::Downloads));
976        assert_eq!(results.entries[0].name, "llama3-8b"); // More downloads
977    }
978
979    #[test]
980    fn test_catalog_search_pagination() {
981        let mut catalog = ModelCatalog::new();
982        for i in 0..25 {
983            catalog.add(CatalogEntry::new(format!("model-{i}"), "1.0", ModelSource::Local));
984        }
985
986        let results = catalog.search(&SearchQuery::new().with_limit(10));
987        assert_eq!(results.entries.len(), 10);
988        assert_eq!(results.total, 25);
989        assert!(results.has_more());
990
991        let results = catalog.search(&SearchQuery::new().with_limit(10).with_offset(20));
992        assert_eq!(results.entries.len(), 5);
993        assert!(!results.has_more());
994    }
995
996    #[test]
997    fn test_catalog_architectures() {
998        let mut catalog = ModelCatalog::new();
999        catalog.add(CatalogEntry::new("m1", "1.0", ModelSource::Local).with_architecture("llama"));
1000        catalog
1001            .add(CatalogEntry::new("m2", "1.0", ModelSource::Local).with_architecture("mistral"));
1002        catalog.add(CatalogEntry::new("m3", "1.0", ModelSource::Local).with_architecture("llama"));
1003
1004        let archs = catalog.architectures();
1005        assert_eq!(archs.len(), 2);
1006        assert!(archs.contains(&"llama".to_string()));
1007        assert!(archs.contains(&"mistral".to_string()));
1008    }
1009
1010    #[test]
1011    fn test_catalog_stats() {
1012        let mut catalog = ModelCatalog::new();
1013        catalog.add(
1014            CatalogEntry::new("m1", "1.0", ModelSource::Local)
1015                .with_size(1024)
1016                .with_task(Task::TextGeneration),
1017        );
1018        catalog.add(
1019            CatalogEntry::new("m2", "1.0", ModelSource::HuggingFace)
1020                .with_size(2048)
1021                .with_task(Task::TextGeneration),
1022        );
1023
1024        let stats = catalog.stats();
1025        assert_eq!(stats.total_models, 2);
1026        assert_eq!(stats.total_size_bytes, 3072);
1027        assert_eq!(stats.local_count, 1);
1028        assert_eq!(stats.huggingface_count, 1);
1029    }
1030
1031    // ========================================================================
1032    // Task Tests
1033    // ========================================================================
1034
1035    #[test]
1036    fn test_task_display_name() {
1037        assert_eq!(Task::TextGeneration.display_name(), "Text Generation");
1038        assert_eq!(Task::CodeGeneration.display_name(), "Code Generation");
1039        assert_eq!(Task::ImageClassification.display_name(), "Image Classification");
1040    }
1041
1042    // ========================================================================
1043    // Serialization Tests
1044    // ========================================================================
1045
1046    #[test]
1047    fn test_catalog_entry_serialization() {
1048        let entry = CatalogEntry::new("llama3", "8b", ModelSource::Local)
1049            .with_task(Task::TextGeneration)
1050            .with_size(4_000_000_000);
1051
1052        let json = serde_json::to_string(&entry).unwrap();
1053        assert!(json.contains("llama3"));
1054        assert!(json.contains("TextGeneration"));
1055
1056        let parsed: CatalogEntry = serde_json::from_str(&json).unwrap();
1057        assert_eq!(parsed.name, "llama3");
1058    }
1059
1060    #[test]
1061    fn test_search_results_serialization() {
1062        let results = SearchResults::new(
1063            vec![CatalogEntry::new("test", "1.0", ModelSource::Local)],
1064            1,
1065            0,
1066            10,
1067        );
1068
1069        let json = serde_json::to_string(&results).unwrap();
1070        let parsed: SearchResults = serde_json::from_str(&json).unwrap();
1071
1072        assert_eq!(parsed.total, 1);
1073        assert_eq!(parsed.entries.len(), 1);
1074    }
1075
1076    #[test]
1077    fn test_catalog_stats_serialization() {
1078        let stats = CatalogStats {
1079            total_models: 100,
1080            total_size_bytes: 1024 * 1024 * 1024,
1081            local_count: 50,
1082            remote_count: 30,
1083            huggingface_count: 20,
1084            by_task: HashMap::from([("Text Generation".to_string(), 80)]),
1085            unique_architectures: 5,
1086            unique_tags: 10,
1087        };
1088
1089        let json = serde_json::to_string(&stats).unwrap();
1090        let parsed: CatalogStats = serde_json::from_str(&json).unwrap();
1091
1092        assert_eq!(parsed.total_models, 100);
1093    }
1094}