Skip to main content

rustapi_toon/
llm_response.rs

1//! # LLM-Optimized Response Wrapper
2//!
3//! Provides `LlmResponse<T>` for AI/LLM endpoints with automatic
4//! token counting and format optimization.
5//!
6//! ## Features
7//!
8//! - Automatic content negotiation (JSON vs TOON)
9//! - Token counting headers
10//! - Token savings calculation
11//!
12//! ## Response Headers
13//!
14//! - `X-Token-Count-JSON`: Estimated token count in JSON format
15//! - `X-Token-Count-TOON`: Estimated token count in TOON format
16//! - `X-Token-Savings`: Percentage of tokens saved with TOON
17//!
18//! ## Example
19//!
20//! ```rust,ignore
21//! use rustapi_rs::prelude::*;
22//! use rustapi_rs::toon::{LlmResponse, AcceptHeader};
23//!
24//! #[derive(Serialize)]
25//! struct ChatResponse {
26//!     messages: Vec<Message>,
27//! }
28//!
29//! async fn chat(accept: AcceptHeader) -> LlmResponse<ChatResponse> {
30//!     let response = ChatResponse {
31//!         messages: vec![...],
32//!     };
33//!     LlmResponse::new(response, accept.preferred)
34//! }
35//! ```
36
37use crate::{OutputFormat, JSON_CONTENT_TYPE, TOON_CONTENT_TYPE};
38use http::{header, StatusCode};
39use rustapi_core::{ApiError, IntoResponse, Response};
40use rustapi_openapi::{
41    MediaType, Operation, OperationModifier, ResponseModifier, ResponseSpec, SchemaRef,
42};
43use serde::Serialize;
44use std::collections::BTreeMap;
45
46/// Header name for JSON token count
47pub const X_TOKEN_COUNT_JSON: &str = "x-token-count-json";
48/// Header name for TOON token count
49pub const X_TOKEN_COUNT_TOON: &str = "x-token-count-toon";
50/// Header name for token savings percentage
51pub const X_TOKEN_SAVINGS: &str = "x-token-savings";
52/// Header name for format used
53pub const X_FORMAT_USED: &str = "x-format-used";
54
55/// LLM-optimized response wrapper with token counting.
56///
57/// This wrapper automatically:
58/// 1. Serializes to the requested format (JSON or TOON)
59/// 2. Calculates estimated token counts for both formats
60/// 3. Adds informative headers about token usage
61///
62/// ## Token Estimation
63///
64/// Token counts are estimated using a simple heuristic:
65/// - ~4 characters per token (GPT-3/4 average)
66///
67/// For more accurate counts, use a proper tokenizer.
68///
69/// ## Example
70///
71/// ```rust,ignore
72/// use rustapi_rs::prelude::*;
73/// use rustapi_rs::toon::{LlmResponse, AcceptHeader, OutputFormat};
74///
75/// #[derive(Serialize)]
76/// struct ApiData {
77///     items: Vec<Item>,
78/// }
79///
80/// // With content negotiation
81/// async fn get_items(accept: AcceptHeader) -> LlmResponse<ApiData> {
82///     let data = ApiData { items: vec![...] };
83///     LlmResponse::new(data, accept.preferred)
84/// }
85///
86/// // Always TOON format
87/// async fn get_items_toon() -> LlmResponse<ApiData> {
88///     let data = ApiData { items: vec![...] };
89///     LlmResponse::toon(data)
90/// }
91/// ```
92#[derive(Debug, Clone)]
93pub struct LlmResponse<T> {
94    data: T,
95    format: OutputFormat,
96    include_token_headers: bool,
97}
98
99impl<T> LlmResponse<T> {
100    /// Create a new LLM response with the specified format.
101    pub fn new(data: T, format: OutputFormat) -> Self {
102        Self {
103            data,
104            format,
105            include_token_headers: true,
106        }
107    }
108
109    /// Create a JSON-formatted LLM response.
110    pub fn json(data: T) -> Self {
111        Self::new(data, OutputFormat::Json)
112    }
113
114    /// Create a TOON-formatted LLM response.
115    pub fn toon(data: T) -> Self {
116        Self::new(data, OutputFormat::Toon)
117    }
118
119    /// Disable token counting headers.
120    pub fn without_token_headers(mut self) -> Self {
121        self.include_token_headers = false;
122        self
123    }
124
125    /// Enable token counting headers (default).
126    pub fn with_token_headers(mut self) -> Self {
127        self.include_token_headers = true;
128        self
129    }
130}
131
132/// Estimate token count using simple character-based heuristic.
133/// ~4 characters per token (GPT-3/4 average)
134fn estimate_tokens(text: &str) -> usize {
135    // Simple heuristic: ~4 chars per token
136    // Accounts for whitespace and punctuation overhead
137    let char_count = text.len();
138    char_count.div_ceil(4) // Round up
139}
140
141/// Calculate token savings percentage.
142fn calculate_savings(json_tokens: usize, toon_tokens: usize) -> f64 {
143    if json_tokens == 0 {
144        return 0.0;
145    }
146    let savings = json_tokens.saturating_sub(toon_tokens) as f64 / json_tokens as f64 * 100.0;
147    (savings * 100.0).round() / 100.0 // Round to 2 decimal places
148}
149
150impl<T: Serialize> IntoResponse for LlmResponse<T> {
151    fn into_response(self) -> Response {
152        // Always serialize to both formats for token counting
153        let json_result = serde_json::to_string(&self.data);
154        let toon_result = toon_format::encode_default(&self.data);
155
156        // Calculate token counts if enabled
157        let (json_tokens, toon_tokens, savings) = if self.include_token_headers {
158            let json_tokens = json_result
159                .as_ref()
160                .map(|s| estimate_tokens(s))
161                .unwrap_or(0);
162            let toon_tokens = toon_result
163                .as_ref()
164                .map(|s| estimate_tokens(s))
165                .unwrap_or(0);
166            let savings = calculate_savings(json_tokens, toon_tokens);
167            (Some(json_tokens), Some(toon_tokens), Some(savings))
168        } else {
169            (None, None, None)
170        };
171
172        // Serialize to the requested format
173        let (body, content_type) = match self.format {
174            OutputFormat::Json => match json_result {
175                Ok(json) => (json, JSON_CONTENT_TYPE),
176                Err(e) => {
177                    tracing::error!("Failed to serialize to JSON: {}", e);
178                    return ApiError::internal(format!("JSON serialization error: {}", e))
179                        .into_response();
180                }
181            },
182            OutputFormat::Toon => match toon_result {
183                Ok(toon) => (toon, TOON_CONTENT_TYPE),
184                Err(e) => {
185                    tracing::error!("Failed to serialize to TOON: {}", e);
186                    return ApiError::internal(format!("TOON serialization error: {}", e))
187                        .into_response();
188                }
189            },
190        };
191
192        // Build response with headers
193        let mut builder = http::Response::builder()
194            .status(StatusCode::OK)
195            .header(header::CONTENT_TYPE, content_type)
196            .header(
197                X_FORMAT_USED,
198                match self.format {
199                    OutputFormat::Json => "json",
200                    OutputFormat::Toon => "toon",
201                },
202            );
203
204        // Token counting headers
205        if let Some(json_tokens) = json_tokens {
206            builder = builder.header(X_TOKEN_COUNT_JSON, json_tokens.to_string());
207        }
208        if let Some(toon_tokens) = toon_tokens {
209            builder = builder.header(X_TOKEN_COUNT_TOON, toon_tokens.to_string());
210        }
211        if let Some(savings) = savings {
212            builder = builder.header(X_TOKEN_SAVINGS, format!("{:.2}%", savings));
213        }
214
215        builder
216            .body(rustapi_core::ResponseBody::from(body))
217            .unwrap()
218    }
219}
220
221// OpenAPI support
222impl<T: Send> OperationModifier for LlmResponse<T> {
223    fn update_operation(_op: &mut Operation) {
224        // LlmResponse is a response type, no request body modification needed
225    }
226}
227
228impl<T: Serialize> ResponseModifier for LlmResponse<T> {
229    fn update_response(op: &mut Operation) {
230        let mut content = BTreeMap::new();
231
232        // JSON response
233        content.insert(
234            JSON_CONTENT_TYPE.to_string(),
235            MediaType {
236                schema: Some(SchemaRef::Inline(serde_json::json!({
237                    "type": "object",
238                    "description": "JSON formatted response with token counting headers"
239                }))),
240                example: None,
241            },
242        );
243
244        // TOON response
245        content.insert(
246            TOON_CONTENT_TYPE.to_string(),
247            MediaType {
248                schema: Some(SchemaRef::Inline(serde_json::json!({
249                    "type": "string",
250                    "description": "TOON (Token-Oriented Object Notation) formatted response with token counting headers"
251                }))),
252                example: None,
253            },
254        );
255
256        let response = ResponseSpec {
257            description: "LLM-optimized response with token counting headers (X-Token-Count-JSON, X-Token-Count-TOON, X-Token-Savings)".to_string(),
258            content,
259            headers: BTreeMap::new(),
260        };
261        op.responses.insert("200".to_string(), response);
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use serde::Serialize;
269
270    #[derive(Serialize)]
271    struct TestData {
272        id: u64,
273        name: String,
274        active: bool,
275    }
276
277    #[test]
278    fn test_estimate_tokens() {
279        // ~4 chars per token
280        assert_eq!(estimate_tokens(""), 0);
281        assert_eq!(estimate_tokens("test"), 1); // 4 chars = 1 token
282        assert_eq!(estimate_tokens("hello world"), 3); // 11 chars = ~3 tokens
283        assert_eq!(estimate_tokens("a"), 1); // rounds up
284    }
285
286    #[test]
287    fn test_calculate_savings() {
288        assert_eq!(calculate_savings(100, 70), 30.0);
289        assert_eq!(calculate_savings(100, 80), 20.0);
290        assert_eq!(calculate_savings(100, 100), 0.0);
291        assert_eq!(calculate_savings(0, 0), 0.0);
292    }
293
294    #[test]
295    fn test_llm_response_json_format() {
296        let data = TestData {
297            id: 1,
298            name: "Test".to_string(),
299            active: true,
300        };
301        let response = LlmResponse::json(data);
302        assert!(matches!(response.format, OutputFormat::Json));
303    }
304
305    #[test]
306    fn test_llm_response_toon_format() {
307        let data = TestData {
308            id: 1,
309            name: "Test".to_string(),
310            active: true,
311        };
312        let response = LlmResponse::toon(data);
313        assert!(matches!(response.format, OutputFormat::Toon));
314    }
315
316    #[test]
317    fn test_llm_response_without_headers() {
318        let data = TestData {
319            id: 1,
320            name: "Test".to_string(),
321            active: true,
322        };
323        let response = LlmResponse::json(data).without_token_headers();
324        assert!(!response.include_token_headers);
325    }
326
327    #[test]
328    fn test_llm_response_with_headers() {
329        let data = TestData {
330            id: 1,
331            name: "Test".to_string(),
332            active: true,
333        };
334        let response = LlmResponse::toon(data)
335            .without_token_headers()
336            .with_token_headers();
337        assert!(response.include_token_headers);
338    }
339}