Skip to main content

batuta/serve/
mod.rs

1//! Model Serving Ecosystem
2//!
3//! Unified interface for local and remote model serving across the ML ecosystem.
4//!
5//! ## Components
6//!
7//! - `ChatTemplateEngine` - Unified prompt templating (Llama2, Mistral, ChatML)
8//! - `BackendSelector` - Intelligent backend selection with privacy tiers
9//! - `CostCircuitBreaker` - Daily budget limits to prevent runaway costs
10//! - `ContextManager` - Automatic token counting and truncation
11//! - `StatefulFailover` - Streaming failover with context preservation
12//! - `SpilloverRouter` - Hybrid cloud spillover routing
13//! - `LambdaDeployer` - AWS Lambda inference deployment
14//!
15//! ## Toyota Way Principles
16//!
17//! - Standardized Work: Chat templates ensure consistent model interaction
18//! - Poka-Yoke: Privacy gates prevent accidental data leakage
19//! - Jidoka: Stateful failover maintains context on errors
20//! - Muda Elimination: Cost circuit breakers prevent waste
21
22pub mod backends;
23#[cfg(feature = "banco")]
24pub mod banco;
25pub mod circuit_breaker;
26pub mod context;
27pub mod failover;
28pub mod lambda;
29pub mod router;
30pub mod templates;
31
32// Re-export key types for convenience
33pub use backends::{BackendSelector, LatencyTier, PrivacyTier, ServingBackend};
34pub use circuit_breaker::{CircuitBreakerConfig, CostCircuitBreaker, TokenPricing};
35pub use context::{ContextManager, ContextWindow, TokenEstimator, TruncationStrategy};
36pub use failover::{FailoverConfig, FailoverManager, StreamingContext};
37pub use lambda::{LambdaConfig, LambdaDeployer, LambdaRuntime};
38pub use router::{RejectReason, RouterConfig, RoutingDecision, SpilloverRouter};
39pub use templates::{ChatMessage, ChatTemplateEngine, Role, TemplateFormat};