litellm-rs 0.1.1

A high-performance AI Gateway written in Rust, providing OpenAI-compatible APIs with intelligent routing, load balancing, and enterprise features
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! AI API endpoints (OpenAI compatible)
//!
//! This module provides OpenAI-compatible API endpoints for AI services.

#![allow(dead_code)]

use crate::core::models::openai::{
    ChatCompletionRequest, CompletionRequest, EmbeddingRequest, ImageGenerationRequest,
    ModelListResponse,
};
use crate::core::models::{ApiKey, RequestContext, User};
use crate::server::AppState;
use crate::server::routes::{ApiResponse, errors};
use crate::utils::validation::RequestValidator;
use actix_web::http::header::HeaderMap;
use actix_web::{HttpRequest, HttpResponse, Result as ActixResult, web};

use serde::Deserialize;
use tracing::{debug, error, info, warn};

/// Audio speech generation request
#[derive(Debug, Deserialize)]
struct AudioSpeechRequest {
    /// Text to convert to speech
    pub input: String,
    /// Voice to use for speech generation
    pub voice: String,
    /// Audio format (mp3, opus, aac, flac)
    pub response_format: Option<String>,
    /// Speed of speech (0.25 to 4.0)
    pub speed: Option<f32>,
}

/// Configure AI API routes
pub fn configure_routes(cfg: &mut web::ServiceConfig) {
    cfg.service(
        web::scope("/v1")
            // Chat completions
            .route("/chat/completions", web::post().to(chat_completions))
            // Text completions (legacy)
            .route("/completions", web::post().to(completions))
            // Embeddings
            .route("/embeddings", web::post().to(embeddings))
            // Image generation
            .route("/images/generations", web::post().to(image_generations))
            // Models
            .route("/models", web::get().to(list_models))
            .route("/models/{model_id}", web::get().to(get_model))
            // Audio (future implementation)
            .route(
                "/audio/transcriptions",
                web::post().to(audio_transcriptions),
            )
            .route("/audio/translations", web::post().to(audio_translations))
            .route("/audio/speech", web::post().to(audio_speech)),
    );
}

/// Chat completions endpoint
///
/// OpenAI-compatible chat completions API that supports streaming and non-streaming responses.
pub async fn chat_completions(
    state: web::Data<AppState>,
    req: HttpRequest,
    request: web::Json<ChatCompletionRequest>,
) -> ActixResult<HttpResponse> {
    info!("Chat completion request for model: {}", request.model);

    // Get request context from middleware
    let context = get_request_context(&req)?;

    // Validate request
    if let Err(e) = RequestValidator::validate_chat_completion_request(
        &request.model,
        &request.messages,
        request.max_tokens,
        request.temperature,
    ) {
        warn!("Invalid chat completion request: {}", e);
        return Ok(errors::validation_error(&e.to_string()));
    }

    // Check if streaming is requested
    if request.stream.unwrap_or(false) {
        // Handle streaming request
        handle_streaming_chat_completion(state.get_ref().clone(), request.into_inner(), context)
            .await
    } else {
        // Handle non-streaming request
        match state
            .router
            .route_chat_completion(request.into_inner(), context)
            .await
        {
            Ok(response) => Ok(HttpResponse::Ok().json(response)),
            Err(e) => {
                error!("Chat completion error: {}", e);
                Ok(errors::gateway_error_to_response(e))
            }
        }
    }
}

/// Text completions endpoint (legacy)
///
/// OpenAI-compatible text completions API for backward compatibility.
pub async fn completions(
    state: web::Data<AppState>,
    req: HttpRequest,
    request: web::Json<CompletionRequest>,
) -> ActixResult<HttpResponse> {
    info!("Text completion request for model: {}", request.model);

    // Get request context from middleware
    let context = get_request_context(&req)?;

    // Route request through the core router
    match state
        .router
        .route_completion(request.into_inner(), context)
        .await
    {
        Ok(response) => Ok(HttpResponse::Ok().json(response)),
        Err(e) => {
            error!("Text completion error: {}", e);
            Ok(HttpResponse::InternalServerError().json(serde_json::json!({
                "error": "Internal server error"
            })))
        }
    }
}

/// Embeddings endpoint
///
/// OpenAI-compatible embeddings API for generating text embeddings.
pub async fn embeddings(
    state: web::Data<AppState>,
    req: HttpRequest,
    request: web::Json<EmbeddingRequest>,
) -> ActixResult<HttpResponse> {
    info!("Embedding request for model: {}", request.model);

    // Get request context from middleware
    let context = get_request_context(&req)?;

    // Route request through the core router
    match state
        .router
        .route_embedding(request.into_inner(), context)
        .await
    {
        Ok(response) => Ok(HttpResponse::Ok().json(response)),
        Err(e) => {
            error!("Embedding error: {}", e);
            Ok(HttpResponse::InternalServerError()
                .json(ApiResponse::<()>::error("Error".to_string())))
        }
    }
}

/// Image generation endpoint
///
/// OpenAI-compatible image generation API.
async fn image_generations(
    state: web::Data<AppState>,
    req: HttpRequest,
    request: web::Json<ImageGenerationRequest>,
) -> ActixResult<HttpResponse> {
    info!("Image generation request for model: {:?}", request.model);

    // Get request context from middleware
    let context = get_request_context(&req)?;

    // Route request through the core router
    match state
        .router
        .route_image_generation(request.into_inner(), context)
        .await
    {
        Ok(response) => Ok(HttpResponse::Ok().json(response)),
        Err(e) => {
            error!("Image generation error: {}", e);
            Ok(HttpResponse::InternalServerError()
                .json(ApiResponse::<()>::error("Error".to_string())))
        }
    }
}

/// List available models
///
/// Returns a list of available AI models across all configured providers.
pub async fn list_models(state: web::Data<AppState>) -> ActixResult<HttpResponse> {
    debug!("Listing available models");

    match state.router.list_models().await {
        Ok(models) => {
            let response = ModelListResponse {
                object: "list".to_string(),
                data: models,
            };
            Ok(HttpResponse::Ok().json(response))
        }
        Err(e) => {
            error!("Failed to list models: {}", e);
            Ok(HttpResponse::InternalServerError()
                .json(ApiResponse::<()>::error("Error".to_string())))
        }
    }
}

/// Get specific model information
///
/// Returns detailed information about a specific model.
async fn get_model(
    state: web::Data<AppState>,
    model_id: web::Path<String>,
) -> ActixResult<HttpResponse> {
    debug!("Getting model info for: {}", model_id);

    match state.router.get_model(&model_id).await {
        Ok(Some(model)) => Ok(HttpResponse::Ok().json(model)),
        Ok(None) => {
            Ok(HttpResponse::NotFound().json(ApiResponse::<()>::error("Error".to_string())))
        }
        Err(e) => {
            error!("Failed to get model {}: {}", model_id, e);
            Ok(HttpResponse::InternalServerError()
                .json(ApiResponse::<()>::error("Error".to_string())))
        }
    }
}

/// Audio transcriptions endpoint
async fn audio_transcriptions(
    _state: web::Data<AppState>,
    req: HttpRequest,
    _payload: web::Payload,
) -> ActixResult<HttpResponse> {
    info!("Audio transcriptions request");

    // Get request context (user, API key, etc.)
    let _context = match get_request_context(&req) {
        Ok(ctx) => ctx,
        Err(_) => {
            return Ok(HttpResponse::Unauthorized()
                .json(ApiResponse::<()>::error("Unauthorized".to_string())));
        }
    };

    // For now, return a placeholder response indicating the feature is in development
    let response = serde_json::json!({
        "text": "Audio transcription feature is in development. This endpoint will support OpenAI-compatible audio transcription APIs.",
        "language": "en",
        "duration": 0.0,
        "segments": []
    });

    Ok(HttpResponse::Ok().json(ApiResponse::success(response)))
}

/// Audio translations endpoint
async fn audio_translations(
    _state: web::Data<AppState>,
    req: HttpRequest,
    _payload: web::Payload,
) -> ActixResult<HttpResponse> {
    info!("Audio translations request");

    // Get request context (user, API key, etc.)
    let _context = match get_request_context(&req) {
        Ok(ctx) => ctx,
        Err(_) => {
            return Ok(HttpResponse::Unauthorized()
                .json(ApiResponse::<()>::error("Unauthorized".to_string())));
        }
    };

    // For now, return a placeholder response indicating the feature is in development
    let response = serde_json::json!({
        "text": "Audio translation feature is in development. This endpoint will support OpenAI-compatible audio translation APIs.",
        "language": "en",
        "duration": 0.0,
        "segments": []
    });

    Ok(HttpResponse::Ok().json(ApiResponse::success(response)))
}

/// Audio speech endpoint
async fn audio_speech(
    _state: web::Data<AppState>,
    req: HttpRequest,
    request: web::Json<AudioSpeechRequest>,
) -> ActixResult<HttpResponse> {
    info!(
        "Audio speech request for text: {}",
        &request.input[..50.min(request.input.len())]
    );

    // Get request context (user, API key, etc.)
    let _context = match get_request_context(&req) {
        Ok(ctx) => ctx,
        Err(_) => {
            return Ok(HttpResponse::Unauthorized()
                .json(ApiResponse::<()>::error("Unauthorized".to_string())));
        }
    };

    // For now, return a placeholder audio response
    // In a real implementation, this would generate actual audio using TTS providers
    let audio_data = vec![0u8; 1024]; // Placeholder audio data

    Ok(HttpResponse::Ok()
        .content_type("audio/mpeg")
        .body(audio_data))
}

/// Get request context from headers and middleware extensions
fn get_request_context(req: &HttpRequest) -> ActixResult<RequestContext> {
    // In a real implementation, this would extract the context from request extensions
    // that were set by the authentication middleware
    let mut context = RequestContext::new();

    // Extract request ID
    if let Some(request_id) = req.headers().get("x-request-id") {
        if let Ok(id) = request_id.to_str() {
            context.request_id = id.to_string();
        }
    }

    // Extract user agent
    if let Some(user_agent) = req.headers().get("user-agent") {
        if let Ok(agent) = user_agent.to_str() {
            context.user_agent = Some(agent.to_string());
        }
    }

    Ok(context)
}

/// Extract user from request extensions
fn get_authenticated_user(_headers: &HeaderMap) -> Option<User> {
    // In a real implementation, this would extract the user from request extensions
    // that were set by the authentication middleware
    None
}

/// Extract API key from request extensions
fn get_authenticated_api_key(_headers: &HeaderMap) -> Option<ApiKey> {
    // In a real implementation, this would extract the API key from request extensions
    // that were set by the authentication middleware
    None
}

/// Check if user has permission for the requested operation
fn check_permission(user: Option<&User>, api_key: Option<&ApiKey>, _operation: &str) -> bool {
    // In a real implementation, this would check permissions through the RBAC system
    // For now, assume all authenticated requests are allowed
    user.is_some() || api_key.is_some()
}

/// Log API usage for billing and analytics
async fn log_api_usage(
    _state: &AppState,
    context: &RequestContext,
    model: &str,
    tokens_used: u32,
    cost: f64,
) {
    // In a real implementation, this would log usage to the database
    debug!(
        "API usage: user_id={:?}, model={}, tokens={}, cost={}",
        context.user_id, model, tokens_used, cost
    );
}

/// Handle streaming chat completion
async fn handle_streaming_chat_completion(
    _state: AppState,
    _request: ChatCompletionRequest,
    _context: RequestContext,
) -> ActixResult<HttpResponse> {
    // TODO: Implement streaming support
    // For now, return an error indicating streaming is not yet supported
    error!("Streaming is not yet implemented");
    Ok(errors::validation_error("Streaming is not yet implemented"))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_get_request_context() {
        // This test would need a mock HttpRequest in a real implementation
        // For now, we'll test the basic functionality
        let context = RequestContext::new();
        assert!(!context.request_id.is_empty());
        assert!(context.user_agent.is_none());
    }

    #[test]
    fn test_check_permission() {
        // Test with no authentication
        assert!(!check_permission(None, None, "chat"));

        // Test with user (would need actual User instance in real test)
        // assert!(check_permission(Some(&user), None, "chat"));
    }

    #[tokio::test]
    async fn test_log_api_usage() {
        // This would require actual state in a real test
        // For now, just test that the function doesn't panic
        let _context = RequestContext::new();
        // log_api_usage(&state, &context, "gpt-4", 100, 0.002).await;
    }
}