alith_interface/llms/
mod.rs

1use crate::requests::{
2    completion::{
3        error::CompletionError, request::CompletionRequest, response::CompletionResponse,
4    },
5    embeddings::{EmbeddingsError, EmbeddingsRequest, EmbeddingsResponse},
6    logit_bias::LogitBias,
7};
8use alith_models::tokenizer::Tokenizer;
9use alith_prompt::{LLMPrompt, PromptTokenizer};
10pub mod api;
11pub mod local;
12use api::anthropic::AnthropicBackend;
13use api::generic_openai::GenericApiBackend;
14use api::openai::OpenAIBackend;
15use std::sync::Arc;
16
17pub enum LLMBackend {
18    OpenAI(OpenAIBackend),
19    Anthropic(AnthropicBackend),
20    GenericApi(GenericApiBackend),
21}
22
23impl LLMBackend {
24    pub(crate) async fn completion_request(
25        &self,
26        request: &CompletionRequest,
27    ) -> crate::Result<CompletionResponse, CompletionError> {
28        match self {
29            LLMBackend::OpenAI(b) => b.completion_request(request).await,
30            LLMBackend::Anthropic(b) => b.completion_request(request).await,
31            LLMBackend::GenericApi(b) => b.completion_request(request).await,
32        }
33    }
34
35    pub(crate) async fn embeddings_request(
36        &self,
37        request: &EmbeddingsRequest,
38    ) -> crate::Result<EmbeddingsResponse, EmbeddingsError> {
39        match self {
40            LLMBackend::OpenAI(b) => b.embeddings_request(request).await,
41            LLMBackend::GenericApi(b) => b.embeddings_request(request).await,
42            _ => unimplemented!(),
43        }
44    }
45
46    pub async fn clear_cache(
47        self: &Arc<Self>,
48    ) -> crate::Result<CompletionResponse, CompletionError> {
49        let mut request = CompletionRequest::new(Arc::clone(self));
50        request.config.cache_prompt = false;
51        request.config.requested_response_tokens = Some(0);
52        request.request().await
53    }
54
55    pub async fn set_cache(
56        self: &Arc<Self>,
57        prompt: &LLMPrompt,
58    ) -> crate::Result<CompletionResponse, CompletionError> {
59        let mut request = CompletionRequest::new(Arc::clone(self));
60        request.config.cache_prompt = true;
61        request.prompt = prompt.clone();
62        request.config.requested_response_tokens = Some(0);
63        request.request().await
64    }
65
66    pub fn new_prompt(&self) -> LLMPrompt {
67        match self {
68            LLMBackend::OpenAI(b) => LLMPrompt::new_api_prompt(
69                self.prompt_tokenizer(),
70                Some(b.model.tokens_per_message),
71                b.model.tokens_per_name,
72            ),
73            LLMBackend::Anthropic(b) => LLMPrompt::new_api_prompt(
74                self.prompt_tokenizer(),
75                Some(b.model.tokens_per_message),
76                b.model.tokens_per_name,
77            ),
78            LLMBackend::GenericApi(b) => LLMPrompt::new_api_prompt(
79                self.prompt_tokenizer(),
80                Some(b.model.tokens_per_message),
81                b.model.tokens_per_name,
82            ),
83        }
84    }
85
86    pub fn get_total_prompt_tokens(&self, prompt: &LLMPrompt) -> crate::Result<u64> {
87        match self {
88            LLMBackend::OpenAI(_) => prompt.api_prompt()?.get_total_prompt_tokens(),
89            LLMBackend::Anthropic(_) => prompt.api_prompt()?.get_total_prompt_tokens(),
90            LLMBackend::GenericApi(_) => prompt.api_prompt()?.get_total_prompt_tokens(),
91        }
92    }
93
94    pub fn model_id(&self) -> &str {
95        match self {
96            LLMBackend::OpenAI(b) => &b.model.model_base.model_id,
97            LLMBackend::Anthropic(b) => &b.model.model_base.model_id,
98            LLMBackend::GenericApi(b) => &b.model.model_base.model_id,
99        }
100    }
101
102    pub fn model_ctx_size(&self) -> u64 {
103        match self {
104            LLMBackend::OpenAI(b) => b.model.model_base.model_ctx_size,
105            LLMBackend::Anthropic(b) => b.model.model_base.model_ctx_size,
106            LLMBackend::GenericApi(b) => b.model.model_base.model_ctx_size,
107        }
108    }
109
110    pub fn inference_ctx_size(&self) -> u64 {
111        match self {
112            LLMBackend::OpenAI(b) => b.model.model_base.inference_ctx_size,
113            LLMBackend::Anthropic(b) => b.model.model_base.inference_ctx_size,
114            LLMBackend::GenericApi(b) => b.model.model_base.inference_ctx_size,
115        }
116    }
117
118    pub fn tokenizer(&self) -> &Arc<Tokenizer> {
119        match self {
120            LLMBackend::OpenAI(b) => &b.model.model_base.tokenizer,
121            LLMBackend::Anthropic(b) => &b.model.model_base.tokenizer,
122            LLMBackend::GenericApi(b) => &b.model.model_base.tokenizer,
123        }
124    }
125
126    fn prompt_tokenizer(&self) -> Arc<dyn PromptTokenizer> {
127        match self {
128            LLMBackend::OpenAI(b) => {
129                Arc::clone(&b.model.model_base.tokenizer) as Arc<dyn PromptTokenizer>
130            }
131            LLMBackend::Anthropic(b) => {
132                Arc::clone(&b.model.model_base.tokenizer) as Arc<dyn PromptTokenizer>
133            }
134            LLMBackend::GenericApi(b) => {
135                Arc::clone(&b.model.model_base.tokenizer) as Arc<dyn PromptTokenizer>
136            }
137        }
138    }
139
140    pub fn build_logit_bias(&self, logit_bias: &mut Option<LogitBias>) -> crate::Result<()> {
141        if let Some(logit_bias) = logit_bias {
142            match self {
143                LLMBackend::OpenAI(_) => logit_bias.build_openai(self.tokenizer())?,
144                LLMBackend::Anthropic(_) => unreachable!("Anthropic does not support logit bias"),
145                LLMBackend::GenericApi(_) => logit_bias.build_openai(self.tokenizer())?,
146            };
147        }
148        Ok(())
149    }
150
151    pub fn openai(&self) -> crate::Result<&api::openai::OpenAIBackend> {
152        match self {
153            LLMBackend::OpenAI(b) => Ok(b),
154            _ => crate::bail!("Backend is not openai"),
155        }
156    }
157
158    pub fn anthropic(&self) -> crate::Result<&api::anthropic::AnthropicBackend> {
159        match self {
160            LLMBackend::Anthropic(b) => Ok(b),
161            _ => crate::bail!("Backend is not anthropic"),
162        }
163    }
164
165    pub fn generic_api(&self) -> crate::Result<&api::generic_openai::GenericApiBackend> {
166        match self {
167            LLMBackend::GenericApi(b) => Ok(b),
168            _ => crate::bail!("Backend is not generic_api"),
169        }
170    }
171
172    pub fn shutdown(&self) {
173        match self {
174            LLMBackend::OpenAI(_) => (),
175            LLMBackend::Anthropic(_) => (),
176            LLMBackend::GenericApi(_) => (),
177        }
178    }
179}