mecha10_cli/services/
model_service.rs

1//! Model management service for downloading and caching AI models from HuggingFace Hub
2//!
3//! This service provides a unified interface for:
4//! - Downloading models from HuggingFace Hub using the model catalog
5//! - Caching models to the project's models/ directory
6//! - Listing available and installed models
7//! - Managing model lifecycle (pull, remove, info)
8//!
9//! # Design
10//!
11//! - Models are defined in `model_catalog.toml` embedded in the CLI binary
12//! - Models are downloaded from HuggingFace Hub on first use (lazy loading)
13//! - Models are cached to `<project_root>/models/<name>.onnx`
14//! - HF cache is also used (`~/.cache/huggingface/hub/`) for offline support
15//!
16//! # Usage
17//!
18//! ```no_run
19//! use mecha10_cli::services::ModelService;
20//!
21//! let service = ModelService::new()?;
22//!
23//! // List catalog models
24//! let catalog = service.list_catalog()?;
25//!
26//! // Download a model
27//! service.pull("yolov8n", None)?;
28//!
29//! // List installed models
30//! let installed = service.list_installed()?;
31//! ```
32
33use anyhow::{Context, Result};
34use hf_hub::api::tokio::Api;
35use indicatif::{ProgressBar, ProgressStyle};
36use mecha10_core::model::{CustomLabelsConfig, ModelConfig, PreprocessingConfig};
37use serde::{Deserialize, Serialize};
38use std::path::{Path, PathBuf};
39use tokio::fs;
40
41/// Model catalog entry from model_catalog.toml
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ModelCatalogEntry {
44    /// Model identifier (e.g., "yolov8n-face", "mobilenet-v2")
45    pub name: String,
46    /// Human-readable description
47    pub description: String,
48    /// Task type (e.g., "object-detection", "image-classification")
49    pub task: String,
50    /// HuggingFace repository (e.g., "deepghs/yolo-face")
51    pub repo: String,
52    /// Filename within the repo (e.g., "yolov8n-face/model.onnx")
53    pub filename: String,
54
55    /// Optional preprocessing preset hint (e.g., "imagenet", "yolo", "coco")
56    #[serde(default)]
57    pub preprocessing_preset: Option<String>,
58
59    /// Optional small class lists (e.g., ["face"] for face detection)
60    #[serde(default)]
61    pub classes: Vec<String>,
62
63    /// Optional automatic quantization configuration
64    #[serde(default)]
65    pub quantize: Option<QuantizeConfig>,
66}
67
68/// Quantization configuration for automatic INT8 conversion
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct QuantizeConfig {
71    /// Enable automatic quantization after download
72    pub enabled: bool,
73    /// Quantization method (currently only "dynamic_int8" supported)
74    pub method: String,
75}
76
77/// Model catalog loaded from TOML
78#[derive(Debug, Deserialize)]
79struct ModelCatalog {
80    models: Vec<ModelCatalogEntry>,
81}
82
83/// Preprocessing presets for common model families
84#[derive(Debug, Clone, Copy)]
85pub enum PreprocessingPreset {
86    /// ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
87    ImageNet,
88    /// YOLO normalization (mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0] -> [0, 1])
89    Yolo,
90    /// COCO normalization (same as ImageNet)
91    Coco,
92    /// No normalization ([0, 255] range)
93    Zero255,
94}
95
96impl PreprocessingPreset {
97    /// Parse preset from string name
98    pub fn from_name(name: &str) -> Result<Self> {
99        match name.to_lowercase().as_str() {
100            "imagenet" => Ok(Self::ImageNet),
101            "yolo" => Ok(Self::Yolo),
102            "coco" => Ok(Self::Coco),
103            "zero255" | "0-255" => Ok(Self::Zero255),
104            _ => anyhow::bail!("Unknown preprocessing preset: {}", name),
105        }
106    }
107
108    /// Get preprocessing configuration for this preset
109    pub fn to_config(self) -> PreprocessingConfig {
110        match self {
111            Self::ImageNet | Self::Coco => PreprocessingConfig {
112                mean: [0.485, 0.456, 0.406],
113                std: [0.229, 0.224, 0.225],
114                channel_order: "RGB".to_string(),
115            },
116            Self::Yolo => PreprocessingConfig {
117                mean: [0.0, 0.0, 0.0],
118                std: [255.0, 255.0, 255.0],
119                channel_order: "RGB".to_string(),
120            },
121            Self::Zero255 => PreprocessingConfig {
122                mean: [0.0, 0.0, 0.0],
123                std: [1.0, 1.0, 1.0],
124                channel_order: "RGB".to_string(),
125            },
126        }
127    }
128}
129
130/// HuggingFace preprocessor_config.json schema
131#[derive(Debug, Clone, Deserialize)]
132struct HFPreprocessorConfig {
133    #[serde(default)]
134    image_mean: Option<Vec<f32>>,
135    #[serde(default)]
136    image_std: Option<Vec<f32>>,
137    #[serde(default)]
138    size: Option<HFSize>,
139    /// Some models (e.g., MobileNet) use crop_size instead of size
140    #[serde(default)]
141    crop_size: Option<HFSize>,
142}
143
144#[derive(Debug, Clone, Deserialize)]
145#[serde(untagged)]
146enum HFSize {
147    Dict { height: u32, width: u32 },
148    ShortestEdge { shortest_edge: u32 },
149    Single(u32), // Square images
150}
151
152impl HFPreprocessorConfig {
153    fn to_preprocessing(&self) -> PreprocessingConfig {
154        PreprocessingConfig {
155            mean: [
156                self.image_mean.as_ref().and_then(|v| v.first()).copied().unwrap_or(0.0),
157                self.image_mean.as_ref().and_then(|v| v.get(1)).copied().unwrap_or(0.0),
158                self.image_mean.as_ref().and_then(|v| v.get(2)).copied().unwrap_or(0.0),
159            ],
160            std: [
161                self.image_std.as_ref().and_then(|v| v.first()).copied().unwrap_or(1.0),
162                self.image_std.as_ref().and_then(|v| v.get(1)).copied().unwrap_or(1.0),
163                self.image_std.as_ref().and_then(|v| v.get(2)).copied().unwrap_or(1.0),
164            ],
165            channel_order: "RGB".to_string(),
166        }
167    }
168
169    fn input_size(&self) -> Option<[u32; 2]> {
170        // Check crop_size first (used by MobileNet, EfficientNet, etc.)
171        // crop_size represents the actual model input dimensions
172        if let Some(crop_size) = &self.crop_size {
173            return match crop_size {
174                HFSize::Dict { height, width } => Some([*width, *height]),
175                HFSize::ShortestEdge { shortest_edge } => Some([*shortest_edge, *shortest_edge]),
176                HFSize::Single(s) => Some([*s, *s]),
177            };
178        }
179
180        // Fall back to size field
181        match &self.size {
182            Some(HFSize::Dict { height, width }) => Some([*width, *height]),
183            Some(HFSize::ShortestEdge { shortest_edge }) => Some([*shortest_edge, *shortest_edge]),
184            Some(HFSize::Single(s)) => Some([*s, *s]),
185            None => None,
186        }
187    }
188}
189
190/// Installed model metadata
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct InstalledModel {
193    /// Model name
194    pub name: String,
195    /// Local file path
196    pub path: PathBuf,
197    /// File size in bytes
198    pub size: u64,
199    /// Catalog entry if from catalog
200    pub catalog_entry: Option<ModelCatalogEntry>,
201}
202
203/// Model management service
204pub struct ModelService {
205    /// HuggingFace API client
206    api: Api,
207    /// Model catalog
208    catalog: Vec<ModelCatalogEntry>,
209    /// Models directory (defaults to ./models)
210    models_dir: PathBuf,
211}
212
213impl ModelService {
214    /// Create a new ModelService with default settings
215    #[allow(dead_code)]
216    pub fn new() -> Result<Self> {
217        Self::with_models_dir(PathBuf::from("models"))
218    }
219
220    /// Create a ModelService with custom models directory
221    pub fn with_models_dir(models_dir: PathBuf) -> Result<Self> {
222        let api = Api::new().context("Failed to initialize HuggingFace API")?;
223
224        // Load embedded model catalog
225        let catalog_toml = include_str!("../../model_catalog.toml");
226        let catalog: ModelCatalog = toml::from_str(catalog_toml).context("Failed to parse model_catalog.toml")?;
227
228        Ok(Self {
229            api,
230            catalog: catalog.models,
231            models_dir,
232        })
233    }
234
235    /// List all models in the catalog
236    pub fn list_catalog(&self) -> Result<Vec<ModelCatalogEntry>> {
237        Ok(self.catalog.clone())
238    }
239
240    /// Get catalog entry for a model by name
241    pub fn get_catalog_entry(&self, name: &str) -> Option<&ModelCatalogEntry> {
242        self.catalog.iter().find(|m| m.name == name)
243    }
244
245    /// List all installed models
246    pub async fn list_installed(&self) -> Result<Vec<InstalledModel>> {
247        // Ensure models directory exists
248        if !self.models_dir.exists() {
249            return Ok(Vec::new());
250        }
251
252        let mut installed = Vec::new();
253        let mut entries = fs::read_dir(&self.models_dir).await?;
254
255        while let Some(entry) = entries.next_entry().await? {
256            let path = entry.path();
257
258            // Only look at directories (each model has its own directory)
259            if !path.is_dir() {
260                continue;
261            }
262
263            // Check if model.onnx exists in this directory
264            let model_path = path.join("model.onnx");
265            if !model_path.exists() {
266                continue;
267            }
268
269            let metadata = fs::metadata(&model_path).await?;
270            let size = metadata.len();
271
272            // Extract model name from directory name
273            let name = path
274                .file_name()
275                .and_then(|s| s.to_str())
276                .unwrap_or("unknown")
277                .to_string();
278
279            // Try to find catalog entry
280            let catalog_entry = self.get_catalog_entry(&name).cloned();
281
282            installed.push(InstalledModel {
283                name,
284                path: model_path,
285                size,
286                catalog_entry,
287            });
288        }
289
290        Ok(installed)
291    }
292
293    /// Download a model from the catalog
294    pub async fn pull(&self, name: &str, progress: Option<&ProgressBar>) -> Result<PathBuf> {
295        // Find catalog entry
296        let entry = self
297            .get_catalog_entry(name)
298            .context(format!("Model '{}' not found in catalog", name))?;
299
300        // Create model directory: models/<name>/
301        let model_dir = self.models_dir.join(name);
302        fs::create_dir_all(&model_dir).await?;
303
304        // Download model.onnx
305        let model_path = self
306            .pull_from_repo(&entry.repo, &entry.filename, name, progress)
307            .await?;
308
309        // Download labels file or write inline classes
310        if !entry.classes.is_empty() {
311            // Write inline classes to labels.txt
312            self.write_inline_labels(name, &entry.classes).await?;
313        } else if entry.task == "object-detection" {
314            // Try to download labels.json from HuggingFace repo (for COCO models)
315            self.pull_labels_from_repo(entry, name, progress).await?;
316        } else if entry.task == "image-classification" {
317            // Try to download ImageNet labels for classification models
318            self.pull_labels_file(name, "imagenet-labels.txt", progress).await?;
319        }
320
321        // Generate and write config.json with auto-detection
322        self.generate_model_config(entry, &model_path, progress).await?;
323
324        // Auto-quantize if configured
325        if let Some(quantize_config) = &entry.quantize {
326            if quantize_config.enabled {
327                self.quantize_model(&model_path, quantize_config, progress).await?;
328            }
329        }
330
331        if let Some(pb) = progress {
332            pb.set_message(format!("✅ Model '{}' ready at {}", name, model_dir.display()));
333        }
334
335        Ok(model_path)
336    }
337
338    /// Download a custom model from a HuggingFace repository
339    pub async fn pull_from_repo(
340        &self,
341        repo: &str,
342        filename: &str,
343        name: &str,
344        progress: Option<&ProgressBar>,
345    ) -> Result<PathBuf> {
346        // Create model directory: models/<name>/
347        let model_dir = self.models_dir.join(name);
348        fs::create_dir_all(&model_dir).await?;
349
350        // Output path: models/<name>/model.onnx
351        let output_path = model_dir.join("model.onnx");
352
353        // Check if already downloaded
354        if output_path.exists() {
355            if let Some(pb) = progress {
356                pb.set_message(format!("Model '{}' already cached", name));
357            }
358            return Ok(output_path);
359        }
360
361        // Download from HuggingFace
362        if let Some(pb) = progress {
363            pb.set_style(
364                ProgressStyle::default_spinner()
365                    .template("{spinner:.green} {msg}")
366                    .unwrap(),
367            );
368            pb.set_message(format!("Downloading {} from {}", name, repo));
369        }
370
371        // Use HF API to download file
372        let repo_api = self.api.model(repo.to_string());
373        let hf_cached_path = repo_api
374            .get(filename)
375            .await
376            .context(format!("Failed to download {} from {}", filename, repo))?;
377
378        // Copy from HF cache to project models directory
379        fs::copy(&hf_cached_path, &output_path)
380            .await
381            .context("Failed to copy model to project directory")?;
382
383        if let Some(pb) = progress {
384            pb.set_message(format!("Downloaded {} successfully", name));
385        }
386
387        Ok(output_path)
388    }
389
390    /// Download a labels file (e.g., imagenet-labels.txt) into model directory
391    async fn pull_labels_file(
392        &self,
393        model_name: &str,
394        filename: &str,
395        progress: Option<&ProgressBar>,
396    ) -> Result<PathBuf> {
397        // Output path: models/<name>/labels.txt
398        let model_dir = self.models_dir.join(model_name);
399        let output_path = model_dir.join("labels.txt");
400
401        // Check if already downloaded
402        if output_path.exists() {
403            if let Some(pb) = progress {
404                pb.set_message("Labels file already cached".to_string());
405            }
406            return Ok(output_path);
407        }
408
409        // Map known label files to download URLs
410        let url = match filename {
411            "imagenet-labels.txt" => "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt",
412            _ => {
413                // Try to download from HuggingFace or skip if unknown
414                if let Some(pb) = progress {
415                    pb.set_message(format!("⚠️  Unknown labels file: {}, skipping", filename));
416                }
417                return Ok(output_path); // Return path even if not downloaded
418            }
419        };
420
421        if let Some(pb) = progress {
422            pb.set_message(format!("Downloading labels: {}", filename));
423        }
424
425        // Download using reqwest
426        let client = reqwest::Client::new();
427        let response = client
428            .get(url)
429            .send()
430            .await
431            .context(format!("Failed to download labels from {}", url))?;
432
433        if !response.status().is_success() {
434            anyhow::bail!("Failed to download labels: HTTP {}", response.status());
435        }
436
437        let content = response.text().await.context("Failed to read labels content")?;
438
439        // Write to file
440        fs::write(&output_path, content)
441            .await
442            .context("Failed to write labels file")?;
443
444        if let Some(pb) = progress {
445            pb.set_message(format!("Downloaded labels: {}", filename));
446        }
447
448        Ok(output_path)
449    }
450
451    /// Write inline classes to labels.txt
452    async fn write_inline_labels(&self, model_name: &str, classes: &[String]) -> Result<()> {
453        let model_dir = self.models_dir.join(model_name);
454        let labels_path = model_dir.join("labels.txt");
455
456        let content = classes.join("\n");
457        fs::write(&labels_path, content)
458            .await
459            .context("Failed to write inline labels to labels.txt")?;
460
461        Ok(())
462    }
463
464    /// Download labels.json from HuggingFace repo and convert to labels.txt
465    async fn pull_labels_from_repo(
466        &self,
467        entry: &ModelCatalogEntry,
468        model_name: &str,
469        progress: Option<&ProgressBar>,
470    ) -> Result<()> {
471        let model_dir = self.models_dir.join(model_name);
472        let labels_path = model_dir.join("labels.txt");
473
474        // Check if already downloaded
475        if labels_path.exists() {
476            if let Some(pb) = progress {
477                pb.set_message("Labels file already cached".to_string());
478            }
479            return Ok(());
480        }
481
482        // Extract directory path from filename (e.g., "yolov8n/model.onnx" -> "yolov8n")
483        let model_dir_in_repo = entry.filename.rsplit_once('/').map(|(dir, _)| dir).unwrap_or("");
484
485        if model_dir_in_repo.is_empty() {
486            // No labels.json in repo root, skip
487            return Ok(());
488        }
489
490        let labels_filename = format!("{}/labels.json", model_dir_in_repo);
491
492        if let Some(pb) = progress {
493            pb.set_message(format!("Downloading labels from {}", entry.repo));
494        }
495
496        // Download labels.json directly via HTTP
497        let url = format!("https://huggingface.co/{}/raw/main/{}", entry.repo, labels_filename);
498
499        let client = reqwest::Client::new();
500        let response = match client.get(&url).send().await {
501            Ok(resp) if resp.status().is_success() => resp,
502            _ => {
503                // labels.json not found, skip silently (model might use inline classes)
504                return Ok(());
505            }
506        };
507
508        let json_content = response.text().await.context("Failed to read labels.json response")?;
509
510        let labels: Vec<String> = serde_json::from_str(&json_content).context("Failed to parse labels.json")?;
511
512        // Write to labels.txt (one class per line)
513        let content = labels.join("\n");
514        fs::write(&labels_path, content)
515            .await
516            .context("Failed to write labels.txt")?;
517
518        if let Some(pb) = progress {
519            pb.set_message(format!("Downloaded {} class labels", labels.len()));
520        }
521
522        Ok(())
523    }
524
525    /// Fetch preprocessor_config.json from HuggingFace repo
526    async fn fetch_hf_preprocessor_config(&self, repo: &str) -> Result<HFPreprocessorConfig> {
527        let url = format!("https://huggingface.co/{}/raw/main/preprocessor_config.json", repo);
528
529        let client = reqwest::Client::new();
530        let response = client
531            .get(&url)
532            .send()
533            .await
534            .context(format!("Failed to fetch from {}", url))?;
535
536        if !response.status().is_success() {
537            anyhow::bail!(
538                "HuggingFace preprocessor_config.json not found for {} (HTTP {})",
539                repo,
540                response.status()
541            );
542        }
543
544        let config: HFPreprocessorConfig = response
545            .json()
546            .await
547            .context("Failed to parse preprocessor_config.json")?;
548
549        Ok(config)
550    }
551
552    /// Extract input size from ONNX model metadata
553    ///
554    /// Note: This is best-effort and may fail depending on the ONNX model format.
555    /// Falls back to None if extraction fails.
556    fn extract_input_size_from_onnx(&self, model_path: &Path) -> Option<[u32; 2]> {
557        use ort::session::Session;
558
559        // Best-effort extraction - don't fail if ONNX model is not parseable
560        let session = Session::builder().ok()?.commit_from_file(model_path).ok()?;
561
562        // Get first input tensor
563        let _input = session.inputs.first()?;
564
565        // Try to extract dimensions from the input type
566        // Note: The ort API has changed between versions, so this is best-effort
567        // For now, we'll return None and rely on HF config or presets
568        // TODO: Update when ort API is stabilized
569
570        None
571    }
572
573    /// Auto-detect preprocessing parameters with fallback priority:
574    /// 1. HuggingFace preprocessor_config.json
575    /// 2. Preprocessing preset from catalog
576    /// 3. ONNX metadata (input size only, use sensible defaults for mean/std)
577    async fn auto_detect_preprocessing(
578        &self,
579        entry: &ModelCatalogEntry,
580        model_path: &Path,
581        progress: Option<&ProgressBar>,
582    ) -> Result<(PreprocessingConfig, [u32; 2])> {
583        // Try HuggingFace preprocessor_config.json first
584        if let Some(pb) = progress {
585            pb.set_message(format!("🔍 Auto-detecting preprocessing for {}", entry.name));
586        }
587
588        if let Ok(hf_config) = self.fetch_hf_preprocessor_config(&entry.repo).await {
589            tracing::debug!(
590                "HF config: size={:?}, crop_size={:?}",
591                hf_config.size,
592                hf_config.crop_size
593            );
594
595            let preprocessing = hf_config.to_preprocessing();
596            let input_size = hf_config.input_size().unwrap_or([224, 224]);
597
598            tracing::debug!(
599                "Detected preprocessing: mean={:?}, std={:?}, input_size={:?}",
600                preprocessing.mean,
601                preprocessing.std,
602                input_size
603            );
604
605            if let Some(pb) = progress {
606                pb.set_message(format!(
607                    "✅ Auto-detected from HuggingFace (input_size={:?})",
608                    input_size
609                ));
610            }
611
612            return Ok((preprocessing, input_size));
613        } else {
614            tracing::debug!("Failed to fetch HuggingFace preprocessor config, falling back to preset");
615        }
616
617        // Try preprocessing preset
618        if let Some(preset_name) = &entry.preprocessing_preset {
619            if let Ok(preset) = PreprocessingPreset::from_name(preset_name) {
620                let preprocessing = preset.to_config();
621
622                // Try to get input_size from ONNX (best-effort)
623                let input_size = self.extract_input_size_from_onnx(model_path).unwrap_or([224, 224]);
624
625                if let Some(pb) = progress {
626                    pb.set_message(format!(
627                        "✅ Using preset '{}' (input_size={:?})",
628                        preset_name, input_size
629                    ));
630                }
631
632                return Ok((preprocessing, input_size));
633            }
634        }
635
636        // Fallback: Try ONNX metadata for input_size, use sensible defaults
637        let input_size = self.extract_input_size_from_onnx(model_path).unwrap_or([224, 224]);
638
639        let preprocessing = PreprocessingConfig {
640            mean: [0.0, 0.0, 0.0],
641            std: [1.0, 1.0, 1.0],
642            channel_order: "RGB".to_string(),
643        };
644
645        if let Some(pb) = progress {
646            pb.set_message(format!(
647                "⚠️  Using fallback preprocessing (input_size={:?}). Consider editing config.json",
648                input_size
649            ));
650        }
651
652        Ok((preprocessing, input_size))
653    }
654
655    /// Generate and write config.json for a model with auto-detected values
656    async fn generate_model_config(
657        &self,
658        entry: &ModelCatalogEntry,
659        model_path: &Path,
660        progress: Option<&ProgressBar>,
661    ) -> Result<()> {
662        let model_dir = self.models_dir.join(&entry.name);
663        let config_path = model_dir.join("config.json");
664
665        // Auto-detect preprocessing
666        let (preprocessing, input_size) = self.auto_detect_preprocessing(entry, model_path, progress).await?;
667
668        // Determine num_classes
669        let num_classes = if entry.task == "object-detection" {
670            entry.classes.len().max(1)
671        } else {
672            1000 // Default for image classification (ImageNet)
673        };
674
675        // Convert catalog entry to ModelConfig
676        let config = ModelConfig {
677            name: entry.name.clone(),
678            task: entry.task.clone(),
679            repo: entry.repo.clone(),
680            filename: entry.filename.clone(),
681            input_size,
682            preprocessing,
683            num_classes,
684            labels_file: "labels.txt".to_string(),
685            custom_labels: CustomLabelsConfig::default(),
686        };
687
688        // Write config as pretty JSON
689        let json = serde_json::to_string_pretty(&config).context("Failed to serialize model config")?;
690
691        fs::write(&config_path, json)
692            .await
693            .context("Failed to write model config.json")?;
694
695        if let Some(pb) = progress {
696            pb.set_message(format!("📝 Wrote config to {}", config_path.display()));
697        }
698
699        Ok(())
700    }
701
702    /// Quantize a model to INT8 (calls embedded Python script)
703    async fn quantize_model(
704        &self,
705        model_path: &Path,
706        config: &QuantizeConfig,
707        progress: Option<&ProgressBar>,
708    ) -> Result<PathBuf> {
709        let int8_path = model_path.with_file_name("model-int8.onnx");
710
711        // Skip if already quantized
712        if int8_path.exists() {
713            if let Some(pb) = progress {
714                pb.set_message("INT8 model already cached");
715            }
716            return Ok(int8_path);
717        }
718
719        if let Some(pb) = progress {
720            pb.set_message("Quantizing model to INT8...");
721        }
722
723        match config.method.as_str() {
724            "dynamic_int8" => {
725                self.quantize_dynamic_int8(model_path, &int8_path).await?;
726            }
727            _ => {
728                anyhow::bail!("Unsupported quantization method: {}", config.method);
729            }
730        }
731
732        if let Some(pb) = progress {
733            pb.set_message("✅ INT8 model ready");
734        }
735
736        Ok(int8_path)
737    }
738
739    /// Perform dynamic INT8 quantization using ONNX Runtime tools
740    async fn quantize_dynamic_int8(&self, input: &Path, output: &Path) -> Result<()> {
741        // Check if Python + onnxruntime available
742        let python = self.find_python()?;
743
744        // Embed quantization script in binary
745        let script = include_str!("../../scripts/quantize_int8.py");
746        let script_path = std::env::temp_dir().join("mecha10_quantize_int8.py");
747        fs::write(&script_path, script).await?;
748
749        // Run Python quantization script
750        let output_result = tokio::process::Command::new(&python)
751            .arg(&script_path)
752            .arg(input)
753            .arg(output)
754            .output()
755            .await?;
756
757        // Cleanup temp script
758        let _ = fs::remove_file(&script_path).await;
759
760        if !output_result.status.success() {
761            let stderr = String::from_utf8_lossy(&output_result.stderr);
762            anyhow::bail!(
763                "Quantization failed: {}\n\nTip: Install with 'pip install onnx onnxruntime'",
764                stderr
765            );
766        }
767
768        Ok(())
769    }
770
771    /// Find Python 3 executable
772    fn find_python(&self) -> Result<String> {
773        for candidate in &["python3", "python"] {
774            if which::which(candidate).is_ok() {
775                return Ok(candidate.to_string());
776            }
777        }
778        anyhow::bail!("Python 3 not found. Install with: brew install python3 (macOS) or apt install python3 (Linux)")
779    }
780
781    /// Remove an installed model
782    pub async fn remove(&self, name: &str) -> Result<()> {
783        let model_dir = self.models_dir.join(name);
784
785        if !model_dir.exists() {
786            anyhow::bail!("Model '{}' is not installed", name);
787        }
788
789        fs::remove_dir_all(&model_dir)
790            .await
791            .context(format!("Failed to remove model '{}'", name))?;
792
793        Ok(())
794    }
795
796    /// Get the path to a model's ONNX file
797    ///
798    /// Note: This method is no longer used by CLI (replaced by node-runner in Phase 2).
799    /// Kept for potential future use.
800    #[allow(dead_code)]
801    pub fn get_model_path(&self, name: &str) -> PathBuf {
802        self.models_dir.join(name).join("model.onnx")
803    }
804
805    /// Check if a model is installed
806    ///
807    /// Note: This method is no longer used by CLI (replaced by node-runner in Phase 2).
808    /// Kept for potential future use.
809    #[allow(dead_code)]
810    pub async fn is_installed(&self, name: &str) -> bool {
811        let model_path = self.get_model_path(name);
812        model_path.exists()
813    }
814
815    /// Get info about a model (catalog or installed)
816    pub async fn info(&self, name: &str) -> Result<ModelInfo> {
817        let catalog_entry = self.get_catalog_entry(name).cloned();
818        let installed = self.list_installed().await?;
819        let installed_info = installed.iter().find(|m| m.name == name).cloned();
820
821        Ok(ModelInfo {
822            name: name.to_string(),
823            catalog_entry,
824            installed_info,
825        })
826    }
827
828    /// Validate a model file is a valid ONNX file
829    #[allow(dead_code)]
830    pub async fn validate(&self, path: &Path) -> Result<bool> {
831        // Basic validation: check file exists and has .onnx extension
832        if !path.exists() {
833            return Ok(false);
834        }
835
836        if path.extension().and_then(|s| s.to_str()) != Some("onnx") {
837            return Ok(false);
838        }
839
840        // TODO: Add ONNX format validation using ort crate
841        // For now, just check the magic bytes
842        let bytes = fs::read(path).await?;
843
844        // ONNX files are Protocol Buffers, check for protobuf signature
845        // This is a very basic check - full validation would require parsing
846        Ok(bytes.len() > 4)
847    }
848}
849
850/// Combined model information (catalog + installed)
851#[derive(Debug, Clone, Serialize)]
852pub struct ModelInfo {
853    pub name: String,
854    pub catalog_entry: Option<ModelCatalogEntry>,
855    pub installed_info: Option<InstalledModel>,
856}
857
858impl ModelInfo {
859    /// Check if model is installed
860    #[allow(dead_code)]
861    pub fn is_installed(&self) -> bool {
862        self.installed_info.is_some()
863    }
864
865    /// Check if model is in catalog
866    #[allow(dead_code)]
867    pub fn is_in_catalog(&self) -> bool {
868        self.catalog_entry.is_some()
869    }
870}
mecha10_cli/services/model_service.rs

mecha10_cli/services/
model_service.rs