offline_intelligence/model_runtime/
mod.rs

1//! Model Runtime Abstraction Layer
2//!
3//! Provides a unified interface for hosting multiple model formats (GGUF, ONNX, 
4//! TensorRT, Safetensors, GGML, CoreML) through a trait-based runtime system.
5//!
6//! Architecture:
7//! - Each model format has its own runtime adapter
8//! - All runtimes expose OpenAI-compatible HTTP API
9//! - Maintains 1-hop architecture: Rust → HTTP → Runtime Server
10//! - Automatic format detection from file extension
11
12pub mod runtime_trait;
13pub mod gguf_runtime;
14pub mod onnx_runtime;
15pub mod tensorrt_runtime;
16pub mod safetensors_runtime;
17pub mod ggml_runtime;
18pub mod coreml_runtime;
19pub mod format_detector;
20pub mod platform_detector;
21pub mod runtime_manager;
22
23pub use runtime_trait::{ModelRuntime, ModelFormat, RuntimeConfig, InferenceRequest, InferenceResponse};
24pub use gguf_runtime::GGUFRuntime;
25pub use onnx_runtime::ONNXRuntime;
26pub use tensorrt_runtime::TensorRTRuntime;
27pub use safetensors_runtime::SafetensorsRuntime;
28pub use ggml_runtime::GGMLRuntime;
29pub use coreml_runtime::CoreMLRuntime;
30pub use format_detector::FormatDetector;
31pub use platform_detector::HardwareCapabilities;
32pub use runtime_manager::RuntimeManager;
offline_intelligence/model_runtime/mod.rs

offline_intelligence/model_runtime/
mod.rs