pub mod input;
pub use input::{build_routed_pipeline, build_routed_pipeline_with_preprocessor};
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use dynamo_runtime::{discovery::ModelCardInstanceId, pipeline::RouterMode};
use crate::{
backend::ExecutionContext, discovery::LoadThresholdConfig, engines::StreamingEngine,
kv_router::KvRouterConfig, local_model::LocalModel, model_card::ModelDeploymentCard,
types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine,
};
pub type ChatEngineFactoryCallback = Arc<
dyn Fn(
ModelCardInstanceId,
ModelDeploymentCard,
) -> Pin<
Box<dyn Future<Output = anyhow::Result<OpenAIChatCompletionsStreamingEngine>> + Send>,
> + Send
+ Sync,
>;
#[derive(Debug, Clone, Default)]
pub struct RouterConfig {
pub router_mode: RouterMode,
pub kv_router_config: KvRouterConfig,
pub load_threshold_config: LoadThresholdConfig,
pub decode_fallback: bool,
}
impl RouterConfig {
pub fn new(router_mode: RouterMode, kv_router_config: KvRouterConfig) -> Self {
Self {
router_mode,
kv_router_config,
load_threshold_config: LoadThresholdConfig::default(),
decode_fallback: false,
}
}
pub fn with_load_threshold_config(mut self, config: LoadThresholdConfig) -> Self {
self.load_threshold_config = config;
self
}
pub fn with_decode_fallback(mut self, decode_fallback: bool) -> Self {
self.decode_fallback = decode_fallback;
self
}
}
#[derive(Clone)]
pub enum EngineConfig {
Dynamic {
model: Box<LocalModel>,
chat_engine_factory: Option<ChatEngineFactoryCallback>,
},
InProcessText {
engine: Arc<dyn StreamingEngine>,
model: Box<LocalModel>,
},
InProcessTokens {
engine: ExecutionContext,
model: Box<LocalModel>,
is_prefill: bool,
},
}
impl EngineConfig {
pub fn local_model(&self) -> &LocalModel {
use EngineConfig::*;
match self {
Dynamic { model, .. } => model,
InProcessText { model, .. } => model,
InProcessTokens { model, .. } => model,
}
}
pub fn chat_engine_factory(&self) -> Option<&ChatEngineFactoryCallback> {
match self {
EngineConfig::Dynamic {
chat_engine_factory,
..
} => chat_engine_factory.as_ref(),
_ => None,
}
}
}