llm_edge_proxy/server/
routes.rs1use axum::{
4 extract::State,
5 http::StatusCode,
6 response::{IntoResponse, Response},
7 Json,
8};
9use serde::{Deserialize, Serialize};
10use serde_json::json;
11use tracing::{info, instrument};
12
13use crate::error::ProxyResult;
14use crate::Config;
15
16#[derive(Serialize)]
18pub struct HealthResponse {
19 pub status: String,
20 pub timestamp: String,
21 pub version: String,
22}
23
24#[instrument(name = "health_check")]
26pub async fn health_check() -> Json<HealthResponse> {
27 info!("Health check requested");
28
29 Json(HealthResponse {
30 status: "healthy".to_string(),
31 timestamp: chrono::Utc::now().to_rfc3339(),
32 version: env!("CARGO_PKG_VERSION").to_string(),
33 })
34}
35
36#[instrument(name = "readiness_check")]
38pub async fn readiness_check() -> Json<HealthResponse> {
39 Json(HealthResponse {
45 status: "ready".to_string(),
46 timestamp: chrono::Utc::now().to_rfc3339(),
47 version: env!("CARGO_PKG_VERSION").to_string(),
48 })
49}
50
51#[instrument(name = "liveness_check")]
53pub async fn liveness_check() -> Json<HealthResponse> {
54 Json(HealthResponse {
55 status: "alive".to_string(),
56 timestamp: chrono::Utc::now().to_rfc3339(),
57 version: env!("CARGO_PKG_VERSION").to_string(),
58 })
59}
60
61#[instrument(name = "metrics")]
63pub async fn metrics() -> Response {
64 let metrics_text = r#"# HELP llm_requests_total Total number of LLM requests
67# TYPE llm_requests_total counter
68llm_requests_total{provider="openai",status="success"} 0
69
70# HELP llm_request_duration_seconds Request duration in seconds
71# TYPE llm_request_duration_seconds histogram
72llm_request_duration_seconds_bucket{le="0.005"} 0
73llm_request_duration_seconds_bucket{le="0.01"} 0
74llm_request_duration_seconds_bucket{le="0.025"} 0
75llm_request_duration_seconds_bucket{le="0.05"} 0
76llm_request_duration_seconds_bucket{le="0.1"} 0
77llm_request_duration_seconds_bucket{le="+Inf"} 0
78llm_request_duration_seconds_sum 0
79llm_request_duration_seconds_count 0
80
81# HELP llm_cache_hit_total Total cache hits
82# TYPE llm_cache_hit_total counter
83llm_cache_hit_total 0
84"#;
85
86 (
87 StatusCode::OK,
88 [("content-type", "text/plain; version=0.0.4")],
89 metrics_text,
90 )
91 .into_response()
92}
93
94#[derive(Debug, Deserialize)]
96pub struct ChatCompletionRequest {
97 pub model: String,
98 pub messages: Vec<ChatMessage>,
99 #[serde(default)]
100 pub temperature: Option<f32>,
101 #[serde(default)]
102 pub max_tokens: Option<u32>,
103 #[serde(default)]
104 pub stream: bool,
105}
106
107#[derive(Debug, Deserialize, Serialize)]
108pub struct ChatMessage {
109 pub role: String,
110 pub content: String,
111}
112
113#[derive(Debug, Serialize)]
115pub struct ChatCompletionResponse {
116 pub id: String,
117 pub object: String,
118 pub created: i64,
119 pub model: String,
120 pub choices: Vec<ChatChoice>,
121 pub usage: Usage,
122}
123
124#[derive(Debug, Serialize)]
125pub struct ChatChoice {
126 pub index: u32,
127 pub message: ChatMessage,
128 pub finish_reason: String,
129}
130
131#[derive(Debug, Serialize)]
132pub struct Usage {
133 pub prompt_tokens: u32,
134 pub completion_tokens: u32,
135 pub total_tokens: u32,
136}
137
138#[instrument(name = "chat_completions", skip(_config, request))]
140pub async fn chat_completions(
141 State(_config): State<Config>,
142 Json(request): Json<ChatCompletionRequest>,
143) -> ProxyResult<Json<ChatCompletionResponse>> {
144 info!(
145 model = %request.model,
146 message_count = request.messages.len(),
147 stream = request.stream,
148 "Processing chat completion request"
149 );
150
151 let response = ChatCompletionResponse {
154 id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
155 object: "chat.completion".to_string(),
156 created: chrono::Utc::now().timestamp(),
157 model: request.model.clone(),
158 choices: vec![ChatChoice {
159 index: 0,
160 message: ChatMessage {
161 role: "assistant".to_string(),
162 content: "This is a mock response from LLM Edge Agent Layer 1. Provider integration will be added in Layer 2.".to_string(),
163 },
164 finish_reason: "stop".to_string(),
165 }],
166 usage: Usage {
167 prompt_tokens: 10,
168 completion_tokens: 20,
169 total_tokens: 30,
170 },
171 };
172
173 Ok(Json(response))
174}
175
176#[instrument(name = "completions")]
178pub async fn completions(
179 State(_config): State<Config>,
180 Json(request): Json<serde_json::Value>,
181) -> ProxyResult<Json<serde_json::Value>> {
182 info!("Processing legacy completion request");
183
184 Ok(Json(json!({
186 "id": format!("cmpl-{}", uuid::Uuid::new_v4()),
187 "object": "text_completion",
188 "created": chrono::Utc::now().timestamp(),
189 "model": request.get("model").and_then(|v| v.as_str()).unwrap_or("unknown"),
190 "choices": [
191 {
192 "text": "Mock completion response",
193 "index": 0,
194 "finish_reason": "stop"
195 }
196 ],
197 "usage": {
198 "prompt_tokens": 5,
199 "completion_tokens": 10,
200 "total_tokens": 15
201 }
202 })))
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208
209 #[tokio::test]
210 async fn test_health_check() {
211 let response = health_check().await;
212 assert_eq!(response.0.status, "healthy");
213 }
214
215 #[tokio::test]
216 async fn test_readiness_check() {
217 let response = readiness_check().await;
218 assert_eq!(response.0.status, "ready");
219 }
220}