alith_interface/llms/
mod.rs1use crate::requests::{
2 completion::{
3 error::CompletionError, request::CompletionRequest, response::CompletionResponse,
4 },
5 embeddings::{EmbeddingsError, EmbeddingsRequest, EmbeddingsResponse},
6 logit_bias::LogitBias,
7};
8use alith_models::tokenizer::Tokenizer;
9use alith_prompt::{LLMPrompt, PromptTokenizer};
10pub mod api;
11pub mod local;
12use api::anthropic::AnthropicBackend;
13use api::generic_openai::GenericApiBackend;
14use api::openai::OpenAIBackend;
15use std::sync::Arc;
16
17pub enum LLMBackend {
18 OpenAI(OpenAIBackend),
19 Anthropic(AnthropicBackend),
20 GenericApi(GenericApiBackend),
21}
22
23impl LLMBackend {
24 pub(crate) async fn completion_request(
25 &self,
26 request: &CompletionRequest,
27 ) -> crate::Result<CompletionResponse, CompletionError> {
28 match self {
29 LLMBackend::OpenAI(b) => b.completion_request(request).await,
30 LLMBackend::Anthropic(b) => b.completion_request(request).await,
31 LLMBackend::GenericApi(b) => b.completion_request(request).await,
32 }
33 }
34
35 pub(crate) async fn embeddings_request(
36 &self,
37 request: &EmbeddingsRequest,
38 ) -> crate::Result<EmbeddingsResponse, EmbeddingsError> {
39 match self {
40 LLMBackend::OpenAI(b) => b.embeddings_request(request).await,
41 LLMBackend::GenericApi(b) => b.embeddings_request(request).await,
42 _ => unimplemented!(),
43 }
44 }
45
46 pub async fn clear_cache(
47 self: &Arc<Self>,
48 ) -> crate::Result<CompletionResponse, CompletionError> {
49 let mut request = CompletionRequest::new(Arc::clone(self));
50 request.config.cache_prompt = false;
51 request.config.requested_response_tokens = Some(0);
52 request.request().await
53 }
54
55 pub async fn set_cache(
56 self: &Arc<Self>,
57 prompt: &LLMPrompt,
58 ) -> crate::Result<CompletionResponse, CompletionError> {
59 let mut request = CompletionRequest::new(Arc::clone(self));
60 request.config.cache_prompt = true;
61 request.prompt = prompt.clone();
62 request.config.requested_response_tokens = Some(0);
63 request.request().await
64 }
65
66 pub fn new_prompt(&self) -> LLMPrompt {
67 match self {
68 LLMBackend::OpenAI(b) => LLMPrompt::new_api_prompt(
69 self.prompt_tokenizer(),
70 Some(b.model.tokens_per_message),
71 b.model.tokens_per_name,
72 ),
73 LLMBackend::Anthropic(b) => LLMPrompt::new_api_prompt(
74 self.prompt_tokenizer(),
75 Some(b.model.tokens_per_message),
76 b.model.tokens_per_name,
77 ),
78 LLMBackend::GenericApi(b) => LLMPrompt::new_api_prompt(
79 self.prompt_tokenizer(),
80 Some(b.model.tokens_per_message),
81 b.model.tokens_per_name,
82 ),
83 }
84 }
85
86 pub fn get_total_prompt_tokens(&self, prompt: &LLMPrompt) -> crate::Result<u64> {
87 match self {
88 LLMBackend::OpenAI(_) => prompt.api_prompt()?.get_total_prompt_tokens(),
89 LLMBackend::Anthropic(_) => prompt.api_prompt()?.get_total_prompt_tokens(),
90 LLMBackend::GenericApi(_) => prompt.api_prompt()?.get_total_prompt_tokens(),
91 }
92 }
93
94 pub fn model_id(&self) -> &str {
95 match self {
96 LLMBackend::OpenAI(b) => &b.model.model_base.model_id,
97 LLMBackend::Anthropic(b) => &b.model.model_base.model_id,
98 LLMBackend::GenericApi(b) => &b.model.model_base.model_id,
99 }
100 }
101
102 pub fn model_ctx_size(&self) -> u64 {
103 match self {
104 LLMBackend::OpenAI(b) => b.model.model_base.model_ctx_size,
105 LLMBackend::Anthropic(b) => b.model.model_base.model_ctx_size,
106 LLMBackend::GenericApi(b) => b.model.model_base.model_ctx_size,
107 }
108 }
109
110 pub fn inference_ctx_size(&self) -> u64 {
111 match self {
112 LLMBackend::OpenAI(b) => b.model.model_base.inference_ctx_size,
113 LLMBackend::Anthropic(b) => b.model.model_base.inference_ctx_size,
114 LLMBackend::GenericApi(b) => b.model.model_base.inference_ctx_size,
115 }
116 }
117
118 pub fn tokenizer(&self) -> &Arc<Tokenizer> {
119 match self {
120 LLMBackend::OpenAI(b) => &b.model.model_base.tokenizer,
121 LLMBackend::Anthropic(b) => &b.model.model_base.tokenizer,
122 LLMBackend::GenericApi(b) => &b.model.model_base.tokenizer,
123 }
124 }
125
126 fn prompt_tokenizer(&self) -> Arc<dyn PromptTokenizer> {
127 match self {
128 LLMBackend::OpenAI(b) => {
129 Arc::clone(&b.model.model_base.tokenizer) as Arc<dyn PromptTokenizer>
130 }
131 LLMBackend::Anthropic(b) => {
132 Arc::clone(&b.model.model_base.tokenizer) as Arc<dyn PromptTokenizer>
133 }
134 LLMBackend::GenericApi(b) => {
135 Arc::clone(&b.model.model_base.tokenizer) as Arc<dyn PromptTokenizer>
136 }
137 }
138 }
139
140 pub fn build_logit_bias(&self, logit_bias: &mut Option<LogitBias>) -> crate::Result<()> {
141 if let Some(logit_bias) = logit_bias {
142 match self {
143 LLMBackend::OpenAI(_) => logit_bias.build_openai(self.tokenizer())?,
144 LLMBackend::Anthropic(_) => unreachable!("Anthropic does not support logit bias"),
145 LLMBackend::GenericApi(_) => logit_bias.build_openai(self.tokenizer())?,
146 };
147 }
148 Ok(())
149 }
150
151 pub fn openai(&self) -> crate::Result<&api::openai::OpenAIBackend> {
152 match self {
153 LLMBackend::OpenAI(b) => Ok(b),
154 _ => crate::bail!("Backend is not openai"),
155 }
156 }
157
158 pub fn anthropic(&self) -> crate::Result<&api::anthropic::AnthropicBackend> {
159 match self {
160 LLMBackend::Anthropic(b) => Ok(b),
161 _ => crate::bail!("Backend is not anthropic"),
162 }
163 }
164
165 pub fn generic_api(&self) -> crate::Result<&api::generic_openai::GenericApiBackend> {
166 match self {
167 LLMBackend::GenericApi(b) => Ok(b),
168 _ => crate::bail!("Backend is not generic_api"),
169 }
170 }
171
172 pub fn shutdown(&self) {
173 match self {
174 LLMBackend::OpenAI(_) => (),
175 LLMBackend::Anthropic(_) => (),
176 LLMBackend::GenericApi(_) => (),
177 }
178 }
179}