use crate::{
ApiError, BoxFuture, ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, Error,
Prefix, storage_key,
};
use std::time::Instant;
#[derive(Clone, Debug)]
pub struct RequestContext {
pub request_id: String,
pub model: String,
pub provider: String,
pub key_name: Option<String>,
pub is_stream: bool,
pub started_at: Instant,
}
pub struct ExtensionError {
pub status: u16,
pub body: ApiError,
}
impl ExtensionError {
pub fn new(status: u16, message: impl Into<String>, kind: impl Into<String>) -> Self {
Self {
status,
body: ApiError::new(message, kind),
}
}
}
pub trait Extension: Send + Sync {
fn name(&self) -> &str;
fn prefix(&self) -> Prefix;
fn storage_key(&self, suffix: &[u8]) -> Vec<u8> {
storage_key(&self.prefix(), suffix)
}
fn on_cache_lookup(
&self,
_request: &ChatCompletionRequest,
) -> BoxFuture<'_, Option<ChatCompletionResponse>> {
Box::pin(async { None })
}
fn on_request(&self, _ctx: &RequestContext) -> BoxFuture<'_, Result<(), ExtensionError>> {
Box::pin(async { Ok(()) })
}
fn on_response(
&self,
_ctx: &RequestContext,
_request: &ChatCompletionRequest,
_response: &ChatCompletionResponse,
) -> BoxFuture<'_, ()> {
Box::pin(async {})
}
fn on_chunk(&self, _ctx: &RequestContext, _chunk: &ChatCompletionChunk) -> BoxFuture<'_, ()> {
Box::pin(async {})
}
fn on_error(&self, _ctx: &RequestContext, _error: &Error) -> BoxFuture<'_, ()> {
Box::pin(async {})
}
}