use std::sync::Arc;
use anyhow::Result;
use dynamo_runtime::{
pipeline::{AsyncEngine, ManyOut, PushRouter, SingleIn},
protocols::annotated::Annotated,
};
use crate::{
kv_router::KvPushRouter,
protocols::common::llm_backend::{LLMEngineOutput, PreprocessedRequest},
};
#[derive(Clone)]
pub(super) enum InnerPrefillRouter {
KvRouter(Arc<KvPushRouter>),
SimpleRouter(Arc<PushRouter<PreprocessedRequest, Annotated<LLMEngineOutput>>>),
}
impl InnerPrefillRouter {
pub(super) async fn generate_to_worker(
&self,
request: SingleIn<PreprocessedRequest>,
target_worker: Option<u64>,
) -> Result<ManyOut<Annotated<LLMEngineOutput>>> {
match (self, target_worker) {
(InnerPrefillRouter::KvRouter(router), _) => router.generate(request).await,
(InnerPrefillRouter::SimpleRouter(router), Some(worker_id)) => {
router.direct(request, worker_id).await
}
(InnerPrefillRouter::SimpleRouter(router), None) => router.generate(request).await,
}
}
pub(super) fn select_next_worker(&self) -> Option<u64> {
match self {
InnerPrefillRouter::SimpleRouter(router) => router.select_next_worker(),
InnerPrefillRouter::KvRouter(_) => None,
}
}
}