Skip to main content

subx_cli/services/ai/
error_sanitizer.rs

1//! Error message sanitization helpers for AI service clients.
2//!
3//! These helpers prevent leaking sensitive information (overly long error
4//! bodies, URL query strings that may contain API keys or tokens) into
5//! user-facing error messages.
6
7use std::sync::OnceLock;
8
9use regex::Regex;
10
11/// Maximum number of characters preserved from an upstream error body.
12pub const DEFAULT_ERROR_BODY_MAX_LEN: usize = 500;
13
14/// Truncate an error response body to at most `max_len` characters.
15///
16/// If the body exceeds `max_len`, the returned string is truncated at the
17/// nearest UTF-8 character boundary not exceeding `max_len` and suffixed
18/// with `"... (truncated)"`.
19pub fn truncate_error_body(body: &str, max_len: usize) -> String {
20    if body.len() <= max_len {
21        return body.to_string();
22    }
23    let mut cut = max_len;
24    while cut > 0 && !body.is_char_boundary(cut) {
25        cut -= 1;
26    }
27    format!("{}... (truncated)", &body[..cut])
28}
29
30fn url_regex() -> &'static Regex {
31    static RE: OnceLock<Regex> = OnceLock::new();
32    RE.get_or_init(|| Regex::new(r"(https?://[^\s?#]+)\?[^\s]*").expect("valid URL regex"))
33}
34
35/// Strip query parameters from any URLs appearing in an error message.
36///
37/// Query strings can contain sensitive material such as API keys or session
38/// tokens (e.g. Azure AD access tokens passed via `?api-key=...`). This
39/// helper removes everything from the `?` up to the next whitespace
40/// character, leaving the URL path intact for debugging purposes.
41pub fn sanitize_url_in_error(msg: &str) -> String {
42    url_regex().replace_all(msg, "$1").into_owned()
43}
44
45#[cfg(test)]
46mod tests {
47    use super::*;
48
49    #[test]
50    fn truncate_shorter_than_limit_is_unchanged() {
51        assert_eq!(truncate_error_body("abc", 500), "abc");
52    }
53
54    #[test]
55    fn truncate_longer_is_cut_and_marked() {
56        let body = "a".repeat(600);
57        let out = truncate_error_body(&body, 500);
58        assert!(out.ends_with("... (truncated)"));
59        assert_eq!(out.len(), 500 + "... (truncated)".len());
60    }
61
62    #[test]
63    fn truncate_respects_utf8_boundaries() {
64        // "漢" is 3 bytes in UTF-8; truncating at 2 would split it.
65        let body = "漢".repeat(300);
66        let out = truncate_error_body(&body, 500);
67        assert!(out.ends_with("... (truncated)"));
68        // Prefix must still be valid UTF-8 (implicitly true if no panic).
69        assert!(out.contains('漢'));
70    }
71
72    #[test]
73    fn sanitize_strips_query_string() {
74        let input = "request failed for https://api.example.com/v1/chat?api-key=sk-test-key-12345";
75        let out = sanitize_url_in_error(input);
76        assert_eq!(out, "request failed for https://api.example.com/v1/chat");
77        assert!(!out.contains("sk-test-key"));
78    }
79
80    #[test]
81    fn sanitize_leaves_urls_without_query_alone() {
82        let input = "timeout on https://api.example.com/v1/chat";
83        assert_eq!(sanitize_url_in_error(input), input);
84    }
85
86    #[test]
87    fn sanitize_preserves_trailing_text_after_whitespace() {
88        let input = "error at https://x.test/a?token=secret while retrying";
89        let out = sanitize_url_in_error(input);
90        assert_eq!(out, "error at https://x.test/a while retrying");
91    }
92}