Struct ModelRouter

Source

pub struct ModelRouter<C, S, A> { /* private fields */ }

Expand description

Routes each request to a model tier chosen by an LLM classifier.

A ModelRouter wraps a classifier provider plus three tier providers and, for every request, makes one extra classifier call to decide whether the work is Simple, Moderate, or Complex, then dispatches to the fast, capable, or advanced provider respectively. If the classifier call fails (error or rate limit) the router conservatively treats the request as Complex rather than risk under-serving it (see classify).

ModelRouter itself implements LlmProvider, so it can be passed anywhere a &dyn LlmProvider is expected (run_structured, RefreshingProvider, etc.). chat classifies then routes; the streaming chat_stream classifies first, then streams the chosen tier.

Note: the fast and capable tiers currently share one provider type S (only advanced has its own type A), so mixing e.g. a Gemini fast tier with an OpenAI capable tier requires both behind the same concrete type. Use Arc<dyn LlmProvider> for all three tiers to mix providers freely.

Struct ModelRouter Copy item path

Implementations§

impl<C, S, A> ModelRouter<C, S, A>where C: LlmProvider, S: LlmProvider, A: LlmProvider,

pub const fn new( classifier: C, fast: S, capable: S, advanced: A, ) -> ModelRouter<C, S, A>

pub async fn classify( &self, request: &ChatRequest, ) -> Result<TaskComplexity, Error>

§Errors

pub async fn route(&self, request: ChatRequest) -> Result<ChatOutcome, Error>

§Errors

pub async fn route_with_tier( &self, request: ChatRequest, tier: ModelTier, ) -> Result<ChatOutcome, Error>

§Errors

pub const fn fast_provider(&self) -> &S

pub const fn capable_provider(&self) -> &S

pub const fn advanced_provider(&self) -> &A

Trait Implementations§

impl<C, S, A> LlmProvider for ModelRouter<C, S, A>where C: LlmProvider, S: LlmProvider, A: LlmProvider,

fn model(&self) -> &str

fn provider(&self) -> &'static str

fn chat<'life0, 'async_trait>( &'life0 self, request: ChatRequest, ) -> Pin<Box<dyn Future<Output = Result<ChatOutcome, Error>> + Send + 'async_trait>>where 'life0: 'async_trait, ModelRouter<C, S, A>: 'async_trait,

fn chat_stream( &self, request: ChatRequest, ) -> Pin<Box<dyn Stream<Item = Result<StreamDelta, Error>> + Send + '_>>

fn configured_thinking(&self) -> Option<&ThinkingConfig>

fn capabilities(&self) -> Option<&'static ModelCapabilities>

fn validate_thinking_config( &self, thinking: Option<&ThinkingConfig>, ) -> Result<(), Error>

fn resolve_thinking_config( &self, request_thinking: Option<&ThinkingConfig>, ) -> Result<Option<ThinkingConfig>, Error>

fn default_max_tokens(&self) -> u32

fn structured_output_support(&self) -> StructuredOutputSupport

Auto Trait Implementations§

impl<C, S, A> Freeze for ModelRouter<C, S, A>where C: Freeze, S: Freeze, A: Freeze,

impl<C, S, A> RefUnwindSafe for ModelRouter<C, S, A>where C: RefUnwindSafe, S: RefUnwindSafe, A: RefUnwindSafe,

impl<C, S, A> Send for ModelRouter<C, S, A>where C: Send, S: Send, A: Send,

impl<C, S, A> Sync for ModelRouter<C, S, A>where C: Sync, S: Sync, A: Sync,

impl<C, S, A> Unpin for ModelRouter<C, S, A>where C: Unpin, S: Unpin, A: Unpin,

impl<C, S, A> UnsafeUnpin for ModelRouter<C, S, A>where C: UnsafeUnpin, S: UnsafeUnpin, A: UnsafeUnpin,

impl<C, S, A> UnwindSafe for ModelRouter<C, S, A>where C: UnwindSafe, S: UnwindSafe, A: UnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> FutureExt for T

fn with_context(self, otel_cx: Context) -> WithContext<Self>

fn with_current_context(self) -> WithContext<Self>

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Sized + Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Sized + Policy<B, E>, P: Policy<B, E>,

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T> Same for T

type Output = T

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct ModelRouter

impl<C, S, A> ModelRouter<C, S, A>
where C: LlmProvider, S: LlmProvider, A: LlmProvider,

impl<C, S, A> LlmProvider for ModelRouter<C, S, A>
where C: LlmProvider, S: LlmProvider, A: LlmProvider,

fn chat<'life0, 'async_trait>( &'life0 self, request: ChatRequest, ) -> Pin<Box<dyn Future<Output = Result<ChatOutcome, Error>> + Send + 'async_trait>>
where 'life0: 'async_trait, ModelRouter<C, S, A>: 'async_trait,

impl<C, S, A> Freeze for ModelRouter<C, S, A>
where C: Freeze, S: Freeze, A: Freeze,

impl<C, S, A> RefUnwindSafe for ModelRouter<C, S, A>
where C: RefUnwindSafe, S: RefUnwindSafe, A: RefUnwindSafe,

impl<C, S, A> Send for ModelRouter<C, S, A>
where C: Send, S: Send, A: Send,

impl<C, S, A> Sync for ModelRouter<C, S, A>
where C: Sync, S: Sync, A: Sync,

impl<C, S, A> Unpin for ModelRouter<C, S, A>
where C: Unpin, S: Unpin, A: Unpin,

impl<C, S, A> UnsafeUnpin for ModelRouter<C, S, A>
where C: UnsafeUnpin, S: UnsafeUnpin, A: UnsafeUnpin,

impl<C, S, A> UnwindSafe for ModelRouter<C, S, A>
where C: UnwindSafe, S: UnwindSafe, A: UnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Sized + Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Sized + Policy<B, E>, P: Policy<B, E>,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,