rustapi_toon/
llm_response.rs

1//! # LLM-Optimized Response Wrapper
2//!
3//! Provides `LlmResponse<T>` for AI/LLM endpoints with automatic
4//! token counting and format optimization.
5//!
6//! ## Features
7//!
8//! - Automatic content negotiation (JSON vs TOON)
9//! - Token counting headers
10//! - Token savings calculation
11//!
12//! ## Response Headers
13//!
14//! - `X-Token-Count-JSON`: Estimated token count in JSON format
15//! - `X-Token-Count-TOON`: Estimated token count in TOON format
16//! - `X-Token-Savings`: Percentage of tokens saved with TOON
17//!
18//! ## Example
19//!
20//! ```rust,ignore
21//! use rustapi_rs::prelude::*;
22//! use rustapi_rs::toon::{LlmResponse, AcceptHeader};
23//!
24//! #[derive(Serialize)]
25//! struct ChatResponse {
26//!     messages: Vec<Message>,
27//! }
28//!
29//! async fn chat(accept: AcceptHeader) -> LlmResponse<ChatResponse> {
30//!     let response = ChatResponse {
31//!         messages: vec![...],
32//!     };
33//!     LlmResponse::new(response, accept.preferred)
34//! }
35//! ```
36
37use crate::{OutputFormat, JSON_CONTENT_TYPE, TOON_CONTENT_TYPE};
38use bytes::Bytes;
39use http::{header, StatusCode};
40use http_body_util::Full;
41use rustapi_core::{ApiError, IntoResponse, Response};
42use rustapi_openapi::{
43    MediaType, Operation, OperationModifier, ResponseModifier, ResponseSpec, SchemaRef,
44};
45use serde::Serialize;
46use std::collections::HashMap;
47
48/// Header name for JSON token count
49pub const X_TOKEN_COUNT_JSON: &str = "x-token-count-json";
50/// Header name for TOON token count
51pub const X_TOKEN_COUNT_TOON: &str = "x-token-count-toon";
52/// Header name for token savings percentage
53pub const X_TOKEN_SAVINGS: &str = "x-token-savings";
54/// Header name for format used
55pub const X_FORMAT_USED: &str = "x-format-used";
56
57/// LLM-optimized response wrapper with token counting.
58///
59/// This wrapper automatically:
60/// 1. Serializes to the requested format (JSON or TOON)
61/// 2. Calculates estimated token counts for both formats
62/// 3. Adds informative headers about token usage
63///
64/// ## Token Estimation
65///
66/// Token counts are estimated using a simple heuristic:
67/// - ~4 characters per token (GPT-3/4 average)
68///
69/// For more accurate counts, use a proper tokenizer.
70///
71/// ## Example
72///
73/// ```rust,ignore
74/// use rustapi_rs::prelude::*;
75/// use rustapi_rs::toon::{LlmResponse, AcceptHeader, OutputFormat};
76///
77/// #[derive(Serialize)]
78/// struct ApiData {
79///     items: Vec<Item>,
80/// }
81///
82/// // With content negotiation
83/// async fn get_items(accept: AcceptHeader) -> LlmResponse<ApiData> {
84///     let data = ApiData { items: vec![...] };
85///     LlmResponse::new(data, accept.preferred)
86/// }
87///
88/// // Always TOON format
89/// async fn get_items_toon() -> LlmResponse<ApiData> {
90///     let data = ApiData { items: vec![...] };
91///     LlmResponse::toon(data)
92/// }
93/// ```
94#[derive(Debug, Clone)]
95pub struct LlmResponse<T> {
96    data: T,
97    format: OutputFormat,
98    include_token_headers: bool,
99}
100
101impl<T> LlmResponse<T> {
102    /// Create a new LLM response with the specified format.
103    pub fn new(data: T, format: OutputFormat) -> Self {
104        Self {
105            data,
106            format,
107            include_token_headers: true,
108        }
109    }
110
111    /// Create a JSON-formatted LLM response.
112    pub fn json(data: T) -> Self {
113        Self::new(data, OutputFormat::Json)
114    }
115
116    /// Create a TOON-formatted LLM response.
117    pub fn toon(data: T) -> Self {
118        Self::new(data, OutputFormat::Toon)
119    }
120
121    /// Disable token counting headers.
122    pub fn without_token_headers(mut self) -> Self {
123        self.include_token_headers = false;
124        self
125    }
126
127    /// Enable token counting headers (default).
128    pub fn with_token_headers(mut self) -> Self {
129        self.include_token_headers = true;
130        self
131    }
132}
133
134/// Estimate token count using simple character-based heuristic.
135/// ~4 characters per token (GPT-3/4 average)
136fn estimate_tokens(text: &str) -> usize {
137    // Simple heuristic: ~4 chars per token
138    // Accounts for whitespace and punctuation overhead
139    let char_count = text.len();
140    char_count.div_ceil(4) // Round up
141}
142
143/// Calculate token savings percentage.
144fn calculate_savings(json_tokens: usize, toon_tokens: usize) -> f64 {
145    if json_tokens == 0 {
146        return 0.0;
147    }
148    let savings = json_tokens.saturating_sub(toon_tokens) as f64 / json_tokens as f64 * 100.0;
149    (savings * 100.0).round() / 100.0 // Round to 2 decimal places
150}
151
152impl<T: Serialize> IntoResponse for LlmResponse<T> {
153    fn into_response(self) -> Response {
154        // Always serialize to both formats for token counting
155        let json_result = serde_json::to_string(&self.data);
156        let toon_result = toon_format::encode_default(&self.data);
157
158        // Calculate token counts if enabled
159        let (json_tokens, toon_tokens, savings) = if self.include_token_headers {
160            let json_tokens = json_result
161                .as_ref()
162                .map(|s| estimate_tokens(s))
163                .unwrap_or(0);
164            let toon_tokens = toon_result
165                .as_ref()
166                .map(|s| estimate_tokens(s))
167                .unwrap_or(0);
168            let savings = calculate_savings(json_tokens, toon_tokens);
169            (Some(json_tokens), Some(toon_tokens), Some(savings))
170        } else {
171            (None, None, None)
172        };
173
174        // Serialize to the requested format
175        let (body, content_type) = match self.format {
176            OutputFormat::Json => match json_result {
177                Ok(json) => (json, JSON_CONTENT_TYPE),
178                Err(e) => {
179                    tracing::error!("Failed to serialize to JSON: {}", e);
180                    return ApiError::internal(format!("JSON serialization error: {}", e))
181                        .into_response();
182                }
183            },
184            OutputFormat::Toon => match toon_result {
185                Ok(toon) => (toon, TOON_CONTENT_TYPE),
186                Err(e) => {
187                    tracing::error!("Failed to serialize to TOON: {}", e);
188                    return ApiError::internal(format!("TOON serialization error: {}", e))
189                        .into_response();
190                }
191            },
192        };
193
194        // Build response with headers
195        let mut builder = http::Response::builder()
196            .status(StatusCode::OK)
197            .header(header::CONTENT_TYPE, content_type)
198            .header(
199                X_FORMAT_USED,
200                match self.format {
201                    OutputFormat::Json => "json",
202                    OutputFormat::Toon => "toon",
203                },
204            );
205
206        // Token counting headers
207        if let Some(json_tokens) = json_tokens {
208            builder = builder.header(X_TOKEN_COUNT_JSON, json_tokens.to_string());
209        }
210        if let Some(toon_tokens) = toon_tokens {
211            builder = builder.header(X_TOKEN_COUNT_TOON, toon_tokens.to_string());
212        }
213        if let Some(savings) = savings {
214            builder = builder.header(X_TOKEN_SAVINGS, format!("{:.2}%", savings));
215        }
216
217        builder.body(Full::new(Bytes::from(body))).unwrap()
218    }
219}
220
221// OpenAPI support
222impl<T: Send> OperationModifier for LlmResponse<T> {
223    fn update_operation(_op: &mut Operation) {
224        // LlmResponse is a response type, no request body modification needed
225    }
226}
227
228impl<T: Serialize> ResponseModifier for LlmResponse<T> {
229    fn update_response(op: &mut Operation) {
230        let mut content = HashMap::new();
231
232        // JSON response
233        content.insert(
234            JSON_CONTENT_TYPE.to_string(),
235            MediaType {
236                schema: SchemaRef::Inline(serde_json::json!({
237                    "type": "object",
238                    "description": "JSON formatted response with token counting headers"
239                })),
240            },
241        );
242
243        // TOON response
244        content.insert(
245            TOON_CONTENT_TYPE.to_string(),
246            MediaType {
247                schema: SchemaRef::Inline(serde_json::json!({
248                    "type": "string",
249                    "description": "TOON (Token-Oriented Object Notation) formatted response with token counting headers"
250                })),
251            },
252        );
253
254        let response = ResponseSpec {
255            description: "LLM-optimized response with token counting headers (X-Token-Count-JSON, X-Token-Count-TOON, X-Token-Savings)".to_string(),
256            content: Some(content),
257        };
258        op.responses.insert("200".to_string(), response);
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265    use serde::Serialize;
266
267    #[derive(Serialize)]
268    struct TestData {
269        id: u64,
270        name: String,
271        active: bool,
272    }
273
274    #[test]
275    fn test_estimate_tokens() {
276        // ~4 chars per token
277        assert_eq!(estimate_tokens(""), 0);
278        assert_eq!(estimate_tokens("test"), 1); // 4 chars = 1 token
279        assert_eq!(estimate_tokens("hello world"), 3); // 11 chars = ~3 tokens
280        assert_eq!(estimate_tokens("a"), 1); // rounds up
281    }
282
283    #[test]
284    fn test_calculate_savings() {
285        assert_eq!(calculate_savings(100, 70), 30.0);
286        assert_eq!(calculate_savings(100, 80), 20.0);
287        assert_eq!(calculate_savings(100, 100), 0.0);
288        assert_eq!(calculate_savings(0, 0), 0.0);
289    }
290
291    #[test]
292    fn test_llm_response_json_format() {
293        let data = TestData {
294            id: 1,
295            name: "Test".to_string(),
296            active: true,
297        };
298        let response = LlmResponse::json(data);
299        assert!(matches!(response.format, OutputFormat::Json));
300    }
301
302    #[test]
303    fn test_llm_response_toon_format() {
304        let data = TestData {
305            id: 1,
306            name: "Test".to_string(),
307            active: true,
308        };
309        let response = LlmResponse::toon(data);
310        assert!(matches!(response.format, OutputFormat::Toon));
311    }
312
313    #[test]
314    fn test_llm_response_without_headers() {
315        let data = TestData {
316            id: 1,
317            name: "Test".to_string(),
318            active: true,
319        };
320        let response = LlmResponse::json(data).without_token_headers();
321        assert!(!response.include_token_headers);
322    }
323
324    #[test]
325    fn test_llm_response_with_headers() {
326        let data = TestData {
327            id: 1,
328            name: "Test".to_string(),
329            active: true,
330        };
331        let response = LlmResponse::toon(data)
332            .without_token_headers()
333            .with_token_headers();
334        assert!(response.include_token_headers);
335    }
336}