llm_edge_proxy/server/
routes.rs

1//! Route handlers for the HTTP server
2
3use axum::{
4    extract::State,
5    http::StatusCode,
6    response::{IntoResponse, Response},
7    Json,
8};
9use serde::{Deserialize, Serialize};
10use serde_json::json;
11use tracing::{info, instrument};
12
13use crate::error::ProxyResult;
14use crate::Config;
15
16/// Health check response
17#[derive(Serialize)]
18pub struct HealthResponse {
19    pub status: String,
20    pub timestamp: String,
21    pub version: String,
22}
23
24/// Health check endpoint
25#[instrument(name = "health_check")]
26pub async fn health_check() -> Json<HealthResponse> {
27    info!("Health check requested");
28
29    Json(HealthResponse {
30        status: "healthy".to_string(),
31        timestamp: chrono::Utc::now().to_rfc3339(),
32        version: env!("CARGO_PKG_VERSION").to_string(),
33    })
34}
35
36/// Readiness check - checks if service is ready to accept traffic
37#[instrument(name = "readiness_check")]
38pub async fn readiness_check() -> Json<HealthResponse> {
39    // In a full implementation, this would check:
40    // - Database connectivity
41    // - Cache availability
42    // - Downstream service health
43
44    Json(HealthResponse {
45        status: "ready".to_string(),
46        timestamp: chrono::Utc::now().to_rfc3339(),
47        version: env!("CARGO_PKG_VERSION").to_string(),
48    })
49}
50
51/// Liveness check - simple check if service is running
52#[instrument(name = "liveness_check")]
53pub async fn liveness_check() -> Json<HealthResponse> {
54    Json(HealthResponse {
55        status: "alive".to_string(),
56        timestamp: chrono::Utc::now().to_rfc3339(),
57        version: env!("CARGO_PKG_VERSION").to_string(),
58    })
59}
60
61/// Prometheus metrics endpoint
62#[instrument(name = "metrics")]
63pub async fn metrics() -> Response {
64    // In a full implementation, this would use the prometheus crate
65    // to collect and export metrics
66    let metrics_text = r#"# HELP llm_requests_total Total number of LLM requests
67# TYPE llm_requests_total counter
68llm_requests_total{provider="openai",status="success"} 0
69
70# HELP llm_request_duration_seconds Request duration in seconds
71# TYPE llm_request_duration_seconds histogram
72llm_request_duration_seconds_bucket{le="0.005"} 0
73llm_request_duration_seconds_bucket{le="0.01"} 0
74llm_request_duration_seconds_bucket{le="0.025"} 0
75llm_request_duration_seconds_bucket{le="0.05"} 0
76llm_request_duration_seconds_bucket{le="0.1"} 0
77llm_request_duration_seconds_bucket{le="+Inf"} 0
78llm_request_duration_seconds_sum 0
79llm_request_duration_seconds_count 0
80
81# HELP llm_cache_hit_total Total cache hits
82# TYPE llm_cache_hit_total counter
83llm_cache_hit_total 0
84"#;
85
86    (
87        StatusCode::OK,
88        [("content-type", "text/plain; version=0.0.4")],
89        metrics_text,
90    )
91        .into_response()
92}
93
94/// Chat completions request (OpenAI-compatible)
95#[derive(Debug, Deserialize)]
96pub struct ChatCompletionRequest {
97    pub model: String,
98    pub messages: Vec<ChatMessage>,
99    #[serde(default)]
100    pub temperature: Option<f32>,
101    #[serde(default)]
102    pub max_tokens: Option<u32>,
103    #[serde(default)]
104    pub stream: bool,
105}
106
107#[derive(Debug, Deserialize, Serialize)]
108pub struct ChatMessage {
109    pub role: String,
110    pub content: String,
111}
112
113/// Chat completions response
114#[derive(Debug, Serialize)]
115pub struct ChatCompletionResponse {
116    pub id: String,
117    pub object: String,
118    pub created: i64,
119    pub model: String,
120    pub choices: Vec<ChatChoice>,
121    pub usage: Usage,
122}
123
124#[derive(Debug, Serialize)]
125pub struct ChatChoice {
126    pub index: u32,
127    pub message: ChatMessage,
128    pub finish_reason: String,
129}
130
131#[derive(Debug, Serialize)]
132pub struct Usage {
133    pub prompt_tokens: u32,
134    pub completion_tokens: u32,
135    pub total_tokens: u32,
136}
137
138/// OpenAI-compatible chat completions endpoint
139#[instrument(name = "chat_completions", skip(_config, request))]
140pub async fn chat_completions(
141    State(_config): State<Config>,
142    Json(request): Json<ChatCompletionRequest>,
143) -> ProxyResult<Json<ChatCompletionResponse>> {
144    info!(
145        model = %request.model,
146        message_count = request.messages.len(),
147        stream = request.stream,
148        "Processing chat completion request"
149    );
150
151    // For now, return a mock response
152    // In Layer 2, this will be routed to actual providers
153    let response = ChatCompletionResponse {
154        id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
155        object: "chat.completion".to_string(),
156        created: chrono::Utc::now().timestamp(),
157        model: request.model.clone(),
158        choices: vec![ChatChoice {
159            index: 0,
160            message: ChatMessage {
161                role: "assistant".to_string(),
162                content: "This is a mock response from LLM Edge Agent Layer 1. Provider integration will be added in Layer 2.".to_string(),
163            },
164            finish_reason: "stop".to_string(),
165        }],
166        usage: Usage {
167            prompt_tokens: 10,
168            completion_tokens: 20,
169            total_tokens: 30,
170        },
171    };
172
173    Ok(Json(response))
174}
175
176/// Legacy completions endpoint
177#[instrument(name = "completions")]
178pub async fn completions(
179    State(_config): State<Config>,
180    Json(request): Json<serde_json::Value>,
181) -> ProxyResult<Json<serde_json::Value>> {
182    info!("Processing legacy completion request");
183
184    // Mock response
185    Ok(Json(json!({
186        "id": format!("cmpl-{}", uuid::Uuid::new_v4()),
187        "object": "text_completion",
188        "created": chrono::Utc::now().timestamp(),
189        "model": request.get("model").and_then(|v| v.as_str()).unwrap_or("unknown"),
190        "choices": [
191            {
192                "text": "Mock completion response",
193                "index": 0,
194                "finish_reason": "stop"
195            }
196        ],
197        "usage": {
198            "prompt_tokens": 5,
199            "completion_tokens": 10,
200            "total_tokens": 15
201        }
202    })))
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[tokio::test]
210    async fn test_health_check() {
211        let response = health_check().await;
212        assert_eq!(response.0.status, "healthy");
213    }
214
215    #[tokio::test]
216    async fn test_readiness_check() {
217        let response = readiness_check().await;
218        assert_eq!(response.0.status, "ready");
219    }
220}