octoroute 1.0.0

Intelligent multi-model router for self-hosted LLMs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
//! OpenAI-compatible chat completions handler
//!
//! Handles POST /v1/chat/completions requests (both streaming and non-streaming).

use crate::error::AppError;
use crate::handlers::AppState;
use crate::middleware::RequestId;
use crate::shared::query::{
    QueryConfig, SamplingParams, execute_query_with_retry, query_model, record_routing_metrics,
};
use axum::{
    Extension, Json,
    extract::State,
    http::{HeaderName, HeaderValue},
    response::{IntoResponse, Response},
};

use super::extractor::OpenAiJson;
use super::find_endpoint_by_name;
use super::types::{
    ChatCompletion, ChatCompletionRequest, ModelChoice, TimestampResult, current_timestamp,
};

/// Custom header for surfacing non-fatal warnings to OpenAI API clients.
///
/// This header is added when the request succeeded but there were issues
/// that operators and clients should be aware of (e.g., health tracking failures).
///
/// The header value is a semicolon-separated list of warning messages.
/// Semicolons are used instead of commas since warning messages may contain commas.
pub const X_OCTOROUTE_WARNING: &str = "x-octoroute-warning";

/// Build a JSON response with optional warning header.
///
/// If warnings are present, adds an `X-Octoroute-Warning` header with a
/// semicolon-separated list of warning messages (truncated to 500 chars).
fn build_response_with_warnings<T: serde::Serialize>(body: T, warnings: &[String]) -> Response {
    let json_response = Json(body).into_response();

    if warnings.is_empty() {
        return json_response;
    }

    // Combine warnings into a single header value
    // Use semicolon as separator since commas might appear in messages
    let warning_value = warnings.join("; ");

    // Sanitize characters that are invalid in HTTP headers to preserve warning content.
    // HTTP headers require ASCII (RFC 7230 Section 3.2.6):
    // - Cannot contain control characters (0x00-0x1F except tab 0x09, and 0x7F)
    // - Cannot contain characters outside the visible ASCII range (> 0x7E)
    // Replace invalid characters with '?' to preserve message readability while ensuring
    // the header is valid. This prevents fallback to the generic error message.
    let warning_value: String = warning_value
        .chars()
        .map(|c| {
            if c.is_control() && c != ' ' {
                ' ' // Control characters (except space) -> space
            } else if !c.is_ascii() {
                '?' // Non-ASCII -> placeholder
            } else {
                c
            }
        })
        .collect();

    // Truncate to reasonable header length (500 chars) using single-pass iteration.
    // After non-ASCII sanitization above, all chars are ASCII (1 byte = 1 char),
    // so byte length equals char count. This avoids the double iteration of
    // .chars().count() followed by .chars().take().collect().
    let warning_value = if warning_value.len() > 500 {
        // Safe to truncate at byte boundary since all chars are now ASCII
        format!("{}...", &warning_value[..497])
    } else {
        warning_value
    };

    // Add the warning header to the response
    let (mut parts, body) = json_response.into_parts();
    if let Ok(header_value) = HeaderValue::from_str(&warning_value) {
        parts
            .headers
            .insert(HeaderName::from_static(X_OCTOROUTE_WARNING), header_value);
    } else {
        // Fallback should rarely be needed now that we sanitize control characters.
        // This catches any edge cases with non-ASCII or other invalid header characters.
        tracing::warn!(
            original_warning = %warning_value,
            warning_length = warning_value.len(),
            warnings_count = warnings.len(),
            "Warning header contains invalid HTTP characters even after sanitization, using fallback. \
            Original warning logged for debugging."
        );
        // Include warning count in fallback for some context (all ASCII, safe for headers)
        let fallback = format!(
            "health-tracking-degraded; warnings-count={}",
            warnings.len()
        );
        // This format! is guaranteed ASCII-safe, but use from_str for consistency
        parts.headers.insert(
            HeaderName::from_static(X_OCTOROUTE_WARNING),
            HeaderValue::from_str(&fallback).unwrap_or_else(|_| {
                // Ultimate fallback if even our simple format fails (should never happen)
                HeaderValue::from_static("health-tracking-degraded")
            }),
        );
    }

    Response::from_parts(parts, body)
}

/// POST /v1/chat/completions handler
///
/// OpenAI-compatible chat completions endpoint. Supports:
/// - Model selection via tier names (auto, fast, balanced, deep) or specific model names
/// - Automatic task type inference for routing
/// - Both streaming (SSE) and non-streaming responses
///
/// # Model Selection
///
/// The `model` field can be:
/// - `"auto"` - Use LLM/hybrid routing to select tier
/// - `"fast"` / `"balanced"` / `"deep"` - Route directly to that tier
/// - Specific model name (e.g., `"qwen3-8b"`) - Find endpoint by name, bypass routing
///
/// # Response Format
///
/// **Non-streaming** (`stream: false` or omitted):
/// Returns OpenAI-compatible JSON response with:
/// - `id`: Unique completion ID
/// - `object`: "chat.completion"
/// - `created`: Unix timestamp
/// - `model`: Name of model used
/// - `choices`: Array with assistant message and finish_reason
/// - `usage`: Token usage statistics (estimated)
///
/// **Streaming** (`stream: true`):
/// Returns Server-Sent Events stream with chunks containing:
/// - Initial: role announcement (`delta.role: "assistant"`)
/// - Content: text deltas (`delta.content: "..."`)
/// - Finish: completion signal (`finish_reason: "stop"`)
/// - Done: `data: [DONE]`
pub async fn handler(
    State(state): State<AppState>,
    Extension(request_id): Extension<RequestId>,
    OpenAiJson(request): OpenAiJson<ChatCompletionRequest>,
) -> Result<Response, AppError> {
    tracing::debug!(
        request_id = %request_id,
        model = ?request.model(),
        messages_count = request.messages().len(),
        stream = request.stream(),
        "Received chat completions request"
    );

    // Dispatch to streaming handler if requested
    if request.stream() {
        return super::streaming::handler(State(state), Extension(request_id), Json(request)).await;
    }

    // Convert messages to a single prompt for routing and query
    let prompt = request.to_prompt_string();
    let prompt_chars = prompt.chars().count();

    // Extract sampling parameters from request (overrides endpoint defaults)
    let sampling_params = SamplingParams {
        temperature: request.temperature(),
        max_tokens: request.max_tokens(),
    };

    // Handle specific model requests differently - query the exact endpoint requested
    if let ModelChoice::Specific(name) = request.model() {
        // Find and use the specific endpoint (no tier selection)
        let (tier, endpoint) = find_endpoint_by_name(state.config(), name)?;

        tracing::info!(
            request_id = %request_id,
            model_name = %name,
            endpoint_name = %endpoint.name(),
            target_tier = ?tier,
            "Specific model selection - querying endpoint directly"
        );

        // Record routing metrics for observability parity with tier-based routing
        // Creates a synthetic RoutingDecision since no actual routing occurred
        let decision =
            crate::router::RoutingDecision::new(tier, crate::router::RoutingStrategy::Rule);
        record_routing_metrics(&state, &decision, 0.0, request_id);

        // Query the specific endpoint directly (no retry to different endpoints)
        let timeout_seconds = state.config().timeout_for_tier(tier);
        let content = match query_model(
            &endpoint,
            &prompt,
            timeout_seconds,
            request_id,
            1,
            1,
            Some(&sampling_params),
        )
        .await
        {
            Ok(content) => content,
            Err(e) => {
                // Mark endpoint as failed for health tracking (parity with tier-based routing)
                if let Err(health_err) = state
                    .selector()
                    .health_checker()
                    .mark_failure(endpoint.name())
                    .await
                {
                    tracing::warn!(
                        request_id = %request_id,
                        endpoint_name = %endpoint.name(),
                        query_error = %e,
                        health_error = %health_err,
                        "Health tracking failed while marking endpoint failure"
                    );
                    state
                        .metrics()
                        .health_tracking_failure(endpoint.name(), health_err.error_type());
                }
                return Err(e);
            }
        };

        // Record model invocation for observability (same as tier-based routing)
        let tier_enum = match tier {
            crate::router::TargetModel::Fast => crate::metrics::Tier::Fast,
            crate::router::TargetModel::Balanced => crate::metrics::Tier::Balanced,
            crate::router::TargetModel::Deep => crate::metrics::Tier::Deep,
        };
        if let Err(e) = state.metrics().record_model_invocation(tier_enum) {
            state
                .metrics()
                .metrics_recording_failure("record_model_invocation");
            tracing::error!(
                request_id = %request_id,
                error = %e,
                tier = ?tier_enum,
                "Metrics recording failed. Observability degraded but request continues."
            );
        }

        // Mark endpoint as healthy on success, collect warnings
        let mut warnings: Vec<String> = Vec::new();
        if let Err(e) = state
            .selector()
            .health_checker()
            .mark_success(endpoint.name())
            .await
        {
            tracing::warn!(
                request_id = %request_id,
                endpoint_name = %endpoint.name(),
                error = %e,
                "Health tracking failed for specific model query"
            );
            // Record in metrics for observability parity with tier-based routing
            state
                .metrics()
                .health_tracking_failure(endpoint.name(), e.error_type());
            // Surface to client via warning header
            warnings.push(format!(
                "Health tracking failed: {} (endpoint health state may be stale)",
                e
            ));
        }

        let TimestampResult {
            timestamp: created,
            warning: clock_warning,
        } = current_timestamp(Some(state.metrics().as_ref()), Some(&request_id));
        if let Some(w) = clock_warning {
            warnings.push(w);
        }
        let response =
            ChatCompletion::new(content, endpoint.name().to_string(), prompt_chars, created);

        tracing::info!(
            request_id = %request_id,
            model = %response.model,
            response_length = response.choices[0].message.content().len(),
            warnings_count = warnings.len(),
            "Chat completion successful (specific model)"
        );

        return Ok(build_response_with_warnings(response, &warnings));
    }

    // For tier-based routing (auto, fast, balanced, deep)
    let decision = match request.model() {
        ModelChoice::Auto => {
            // Use router to determine tier (auto-detection)
            let metadata = request.to_route_metadata();
            let routing_start = std::time::Instant::now();
            let decision = state
                .router()
                .route(&prompt, &metadata, state.selector())
                .await?;
            let routing_duration_ms = routing_start.elapsed().as_secs_f64() * 1000.0;

            tracing::info!(
                request_id = %request_id,
                target_tier = ?decision.target(),
                routing_strategy = ?decision.strategy(),
                routing_duration_ms = %routing_duration_ms,
                "Routing decision made (auto)"
            );

            record_routing_metrics(&state, &decision, routing_duration_ms, request_id);
            decision
        }
        ModelChoice::Fast | ModelChoice::Balanced | ModelChoice::Deep => {
            // Direct tier selection (bypass routing)
            // Convert model choice to target tier - match arm guarantees this succeeds
            let tier = match request.model() {
                ModelChoice::Fast => crate::router::TargetModel::Fast,
                ModelChoice::Balanced => crate::router::TargetModel::Balanced,
                ModelChoice::Deep => crate::router::TargetModel::Deep,
                _ => unreachable!("outer match arm guarantees Fast/Balanced/Deep"),
            };
            let decision =
                crate::router::RoutingDecision::new(tier, crate::router::RoutingStrategy::Rule);

            tracing::info!(
                request_id = %request_id,
                target_tier = ?tier,
                "Direct tier selection (no routing)"
            );

            // Record metrics for observability parity with auto-routing
            // Duration is 0.0 since no actual routing computation happens
            record_routing_metrics(&state, &decision, 0.0, request_id);
            decision
        }
        ModelChoice::Specific(_) => unreachable!("handled above"),
    };

    // Execute query with retry logic (selects from tier)
    let config = QueryConfig::default();
    let result = execute_query_with_retry(
        &state,
        &decision,
        &prompt,
        request_id,
        &config,
        Some(&sampling_params),
    )
    .await?;

    // Use the endpoint that was actually selected
    let response_model = result.endpoint.name().to_string();

    // Collect all warnings (from query + clock)
    let mut warnings = result.warnings;
    let TimestampResult {
        timestamp: created,
        warning: clock_warning,
    } = current_timestamp(Some(state.metrics().as_ref()), Some(&request_id));
    if let Some(w) = clock_warning {
        warnings.push(w);
    }

    // Build OpenAI-compatible response
    let response = ChatCompletion::new(result.content, response_model, prompt_chars, created);

    tracing::info!(
        request_id = %request_id,
        model = %response.model,
        response_length = response.choices[0].message.content().len(),
        warnings_count = warnings.len(),
        "Chat completion successful"
    );

    // Return response with warning header if there were non-fatal issues
    Ok(build_response_with_warnings(response, &warnings))
}

#[cfg(test)]
mod tests {
    #[allow(unused_imports)]
    use super::*;

    // -------------------------------------------------------------------------
    // Warning Header Truncation Tests
    // -------------------------------------------------------------------------

    /// Helper to extract the warning sanitization and truncation logic for testing.
    /// Returns the sanitized and truncated warning string that would be used in the header.
    /// This mirrors the full logic in `build_response_with_warnings`:
    /// 1. Join warnings with "; "
    /// 2. Sanitize non-ASCII β†’ '?' and control chars β†’ ' '
    /// 3. Truncate to 500 bytes (safe because all chars are ASCII after sanitization)
    fn truncate_warning_for_header(warnings: &[String]) -> String {
        let warning_value = warnings.join("; ");

        // Sanitize (mirrors production): non-ASCII β†’ '?', control β†’ ' '
        let warning_value: String = warning_value
            .chars()
            .map(|c| {
                if c.is_control() && c != ' ' {
                    ' '
                } else if !c.is_ascii() {
                    '?'
                } else {
                    c
                }
            })
            .collect();

        // Truncate using byte length (safe because all chars are now ASCII)
        if warning_value.len() > 500 {
            format!("{}...", &warning_value[..497])
        } else {
            warning_value
        }
    }

    #[test]
    fn test_warning_truncation_ascii_only() {
        // ASCII-only string, truncation is safe at any byte boundary
        let long_warning = "a".repeat(600);
        let result = truncate_warning_for_header(&[long_warning]);

        assert_eq!(result.len(), 500); // 497 + "..."
        assert!(result.ends_with("..."));
        assert!(result.is_char_boundary(result.len())); // Valid UTF-8
    }

    #[test]
    fn test_warning_truncation_sanitizes_multibyte_to_placeholder() {
        // Multi-byte chars (like emoji) are sanitized to '?' BEFORE truncation.
        // This test verifies the sanitization + truncation pipeline.

        // Strategy: Fill with ASCII up to position 495, then add a 4-byte emoji
        let prefix = "x".repeat(495);
        let emoji = "πŸ¦‘"; // 4 bytes in UTF-8, but becomes 1 char '?' after sanitization
        let suffix = "y".repeat(100);
        let warning = format!("{}{}{}", prefix, emoji, suffix);

        let result = truncate_warning_for_header(&[warning]);

        // The emoji should have been sanitized to '?'
        // Total: 495 + 1 + 100 = 596 chars (all ASCII after sanitization)
        // Should be truncated to 497 + "..." = 500 chars

        // Result must be valid UTF-8 (trivially true since all ASCII)
        assert!(
            std::str::from_utf8(result.as_bytes()).is_ok(),
            "Truncated warning must be valid UTF-8"
        );

        // Result should end with "..."
        assert!(
            result.ends_with("..."),
            "Truncated warning should end with ..."
        );

        // Result should be exactly 500 chars (byte length equals char count for ASCII)
        assert_eq!(
            result.len(),
            500,
            "Truncated warning should be exactly 500 bytes"
        );
        assert!(
            result.is_ascii(),
            "Result should be ASCII after sanitization"
        );
    }

    #[test]
    fn test_warning_truncation_chinese_characters_become_placeholders() {
        // Chinese characters are 3 bytes each in UTF-8, but get sanitized to '?'
        // After sanitization: 600 '?' chars = 600 bytes
        // Truncation: 497 + "..." = 500 bytes
        let chinese = "δΈ­".repeat(600); // 600 chars, 1800 bytes
        let result = truncate_warning_for_header(&[chinese]);

        // Must be valid UTF-8 (trivially true - all '?' and '.')
        assert!(
            std::str::from_utf8(result.as_bytes()).is_ok(),
            "Truncated text must be valid UTF-8"
        );

        assert!(result.ends_with("..."));
        // After sanitization: 600 '?' chars, truncated to 497 + "..." = 500 bytes
        assert_eq!(result.len(), 500);
        assert!(
            result.is_ascii(),
            "Result should be ASCII after sanitization"
        );
        // First 497 chars should all be '?'
        assert!(
            result[..497].chars().all(|c| c == '?'),
            "Chinese chars should become '?'"
        );
    }

    #[test]
    fn test_warning_truncation_mixed_multibyte_sanitizes_non_ascii() {
        // Mix of 1-byte (ASCII), 2-byte (Γ©), 3-byte (δΈ­), and 4-byte (πŸ¦‘) chars
        // Non-ASCII chars get sanitized to '?' before truncation
        let mixed = format!(
            "{}{}{}{}{}",
            "a".repeat(200),  // 200 chars, stays 'a'
            "Γ©".repeat(150),  // 150 chars, becomes '?'
            "δΈ­".repeat(100), // 100 chars, becomes '?'
            "πŸ¦‘".repeat(50),  // 50 chars, becomes '?'
            "z".repeat(50)    // 50 chars, stays 'z'
        ); // Total after sanitization: 550 ASCII chars

        assert!(
            mixed.chars().count() > 500,
            "Test setup: need >500 chars to trigger truncation"
        );

        let result = truncate_warning_for_header(&[mixed]);

        assert!(
            std::str::from_utf8(result.as_bytes()).is_ok(),
            "Truncated mixed text must be valid UTF-8"
        );
        assert!(result.ends_with("..."));
        // After sanitization, all 550 chars become ASCII, truncated to 500 bytes
        assert_eq!(result.len(), 500);
        assert!(
            result.is_ascii(),
            "Result should be ASCII after sanitization"
        );

        // Verify structure: first 200 'a', then some '?' (from Γ©/δΈ­/πŸ¦‘), then some 'z'
        assert!(
            result.starts_with(&"a".repeat(200)),
            "First 200 should be 'a'"
        );
        assert!(
            result[200..201].chars().all(|c| c == '?'),
            "After 'a's should be '?' from sanitized Γ©"
        );
    }

    #[test]
    fn test_warning_under_limit_not_truncated() {
        let short = "This is a short warning";
        let result = truncate_warning_for_header(&[short.to_string()]);

        assert_eq!(result, short);
        assert!(!result.ends_with("..."));
    }

    #[test]
    fn test_warning_exactly_500_chars_not_truncated() {
        let exactly_500 = "a".repeat(500);
        let result = truncate_warning_for_header(std::slice::from_ref(&exactly_500));

        assert_eq!(result, exactly_500);
        assert!(!result.ends_with("..."));
    }

    #[test]
    fn test_warning_501_chars_gets_truncated() {
        let chars_501 = "a".repeat(501);
        let result = truncate_warning_for_header(&[chars_501]);

        assert!(result.ends_with("..."));
        assert_eq!(result.chars().count(), 500);
    }

    // -------------------------------------------------------------------------
    // Invalid Header Character Fallback Tests
    // -------------------------------------------------------------------------

    #[test]
    fn test_build_response_with_valid_warning_header() {
        use axum::http::HeaderName;

        let body = serde_json::json!({"test": "value"});
        let warnings = vec!["valid warning message".to_string()];

        let response = build_response_with_warnings(body, &warnings);

        let header = response
            .headers()
            .get(HeaderName::from_static(X_OCTOROUTE_WARNING));
        assert!(header.is_some(), "Warning header should be present");
        assert_eq!(header.unwrap().to_str().unwrap(), "valid warning message");
    }

    #[test]
    fn test_build_response_with_newline_sanitizes() {
        use axum::http::HeaderName;

        let body = serde_json::json!({"test": "value"});
        // Newline is invalid in HTTP headers - should be sanitized to space
        let warnings = vec!["warning with\nnewline".to_string()];

        let response = build_response_with_warnings(body, &warnings);

        let header = response
            .headers()
            .get(HeaderName::from_static(X_OCTOROUTE_WARNING));
        assert!(header.is_some(), "Warning header should still be present");
        // Newline should be replaced with space, preserving the message
        assert_eq!(header.unwrap().to_str().unwrap(), "warning with newline");
    }

    #[test]
    fn test_build_response_with_control_char_sanitizes() {
        use axum::http::HeaderName;

        let body = serde_json::json!({"test": "value"});
        // Null byte is invalid in HTTP headers - should be sanitized to space
        let warnings = vec!["warning with\x00null".to_string()];

        let response = build_response_with_warnings(body, &warnings);

        let header = response
            .headers()
            .get(HeaderName::from_static(X_OCTOROUTE_WARNING));
        assert!(header.is_some());
        // Control characters should be replaced with spaces, preserving the message
        assert_eq!(header.unwrap().to_str().unwrap(), "warning with null");
    }

    #[test]
    fn test_build_response_empty_warnings_no_header() {
        use axum::http::HeaderName;

        let body = serde_json::json!({"test": "value"});
        let warnings: Vec<String> = vec![];

        let response = build_response_with_warnings(body, &warnings);

        let header = response
            .headers()
            .get(HeaderName::from_static(X_OCTOROUTE_WARNING));
        assert!(header.is_none(), "No warning header when warnings empty");
    }

    #[test]
    fn test_build_response_with_non_ascii_sanitizes() {
        use axum::http::HeaderName;

        let body = serde_json::json!({"test": "value"});
        // Non-ASCII characters (Chinese and emoji) should be sanitized to preserve message
        let warnings = vec!["Health check failed for εŒ—δΊ¬-server πŸ¦‘".to_string()];

        let response = build_response_with_warnings(body, &warnings);

        let header = response
            .headers()
            .get(HeaderName::from_static(X_OCTOROUTE_WARNING));
        assert!(header.is_some(), "Warning header should be present");

        let value = header.unwrap().to_str().unwrap();
        // Should NOT fall back to generic message - should preserve content
        assert_ne!(
            value, "health-tracking-degraded",
            "Should not fall back to generic message, should sanitize non-ASCII"
        );
        // Should contain the sanitized message
        assert!(
            value.contains("Health check failed"),
            "Should preserve ASCII content. Got: {}",
            value
        );
    }

    #[test]
    fn test_build_response_with_emoji_sanitizes() {
        use axum::http::HeaderName;

        let body = serde_json::json!({"test": "value"});
        // Pure emoji should be sanitized (each emoji replaced with placeholder)
        let warnings = vec!["Error πŸ”΄ warning 🟑 info 🟒".to_string()];

        let response = build_response_with_warnings(body, &warnings);

        let header = response
            .headers()
            .get(HeaderName::from_static(X_OCTOROUTE_WARNING));
        assert!(header.is_some());

        let value = header.unwrap().to_str().unwrap();
        // Should preserve ASCII content
        assert!(
            value.contains("Error"),
            "Should preserve 'Error'. Got: {}",
            value
        );
        assert!(
            value.contains("warning"),
            "Should preserve 'warning'. Got: {}",
            value
        );
        assert!(
            value.contains("info"),
            "Should preserve 'info'. Got: {}",
            value
        );
    }
}