Skip to main content

zerobox_utils_string/
lib.rs

1mod json;
2mod truncate;
3
4pub use json::to_ascii_json_string;
5pub use truncate::approx_bytes_for_tokens;
6pub use truncate::approx_token_count;
7pub use truncate::approx_tokens_from_byte_count;
8pub use truncate::truncate_middle_chars;
9pub use truncate::truncate_middle_with_token_budget;
10
11// Truncate a &str to a byte budget at a char boundary (prefix)
12#[inline]
13pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
14    if s.len() <= maxb {
15        return s;
16    }
17    let mut last_ok = 0;
18    for (i, ch) in s.char_indices() {
19        let nb = i + ch.len_utf8();
20        if nb > maxb {
21            break;
22        }
23        last_ok = nb;
24    }
25    &s[..last_ok]
26}
27
28/// Sanitize a tag value to comply with metric tag validation rules:
29/// only ASCII alphanumeric, '.', '_', '-', and '/' are allowed.
30pub fn sanitize_metric_tag_value(value: &str) -> String {
31    const MAX_LEN: usize = 256;
32    let sanitized: String = value
33        .chars()
34        .map(|ch| {
35            if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') {
36                ch
37            } else {
38                '_'
39            }
40        })
41        .collect();
42    let trimmed = sanitized.trim_matches('_');
43    if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) {
44        return "unspecified".to_string();
45    }
46    if trimmed.len() <= MAX_LEN {
47        trimmed.to_string()
48    } else {
49        trimmed[..MAX_LEN].to_string()
50    }
51}
52
53/// Find all UUIDs in a string.
54#[allow(clippy::unwrap_used)]
55pub fn find_uuids(s: &str) -> Vec<String> {
56    static RE: std::sync::OnceLock<regex_lite::Regex> = std::sync::OnceLock::new();
57    let re = RE.get_or_init(|| {
58        regex_lite::Regex::new(
59            r"[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}",
60        )
61        .unwrap() // Unwrap is safe thanks to the tests.
62    });
63
64    re.find_iter(s).map(|m| m.as_str().to_string()).collect()
65}
66
67/// Convert a markdown-style `#L..` location suffix into a terminal-friendly
68/// `:line[:column][-line[:column]]` suffix.
69pub fn normalize_markdown_hash_location_suffix(suffix: &str) -> Option<String> {
70    let fragment = suffix.strip_prefix('#')?;
71    let (start, end) = match fragment.split_once('-') {
72        Some((start, end)) => (start, Some(end)),
73        None => (fragment, None),
74    };
75    let (start_line, start_column) = parse_markdown_hash_location_point(start)?;
76    let mut normalized = String::from(":");
77    normalized.push_str(start_line);
78    if let Some(column) = start_column {
79        normalized.push(':');
80        normalized.push_str(column);
81    }
82    if let Some(end) = end {
83        let (end_line, end_column) = parse_markdown_hash_location_point(end)?;
84        normalized.push('-');
85        normalized.push_str(end_line);
86        if let Some(column) = end_column {
87            normalized.push(':');
88            normalized.push_str(column);
89        }
90    }
91    Some(normalized)
92}
93
94fn parse_markdown_hash_location_point(point: &str) -> Option<(&str, Option<&str>)> {
95    let point = point.strip_prefix('L')?;
96    match point.split_once('C') {
97        Some((line, column)) => Some((line, Some(column))),
98        None => Some((point, None)),
99    }
100}
101
102#[cfg(test)]
103#[allow(warnings, clippy::all)]
104mod tests {
105    use super::find_uuids;
106    use super::normalize_markdown_hash_location_suffix;
107    use super::sanitize_metric_tag_value;
108    use pretty_assertions::assert_eq;
109
110    #[test]
111    fn find_uuids_finds_multiple() {
112        let input =
113            "x 00112233-4455-6677-8899-aabbccddeeff-k y 12345678-90ab-cdef-0123-456789abcdef";
114        assert_eq!(
115            find_uuids(input),
116            vec![
117                "00112233-4455-6677-8899-aabbccddeeff".to_string(),
118                "12345678-90ab-cdef-0123-456789abcdef".to_string(),
119            ]
120        );
121    }
122
123    #[test]
124    fn find_uuids_ignores_invalid() {
125        let input = "not-a-uuid-1234-5678-9abc-def0-123456789abc";
126        assert_eq!(find_uuids(input), Vec::<String>::new());
127    }
128
129    #[test]
130    fn find_uuids_handles_non_ascii_without_overlap() {
131        let input = "🙂 55e5d6f7-8a7f-4d2a-8d88-123456789012abc";
132        assert_eq!(
133            find_uuids(input),
134            vec!["55e5d6f7-8a7f-4d2a-8d88-123456789012".to_string()]
135        );
136    }
137
138    #[test]
139    fn sanitize_metric_tag_value_trims_and_fills_unspecified() {
140        let msg = "///";
141        assert_eq!(sanitize_metric_tag_value(msg), "unspecified");
142    }
143
144    #[test]
145    fn sanitize_metric_tag_value_replaces_invalid_chars() {
146        let msg = "bad value!";
147        assert_eq!(sanitize_metric_tag_value(msg), "bad_value");
148    }
149
150    #[test]
151    fn normalize_markdown_hash_location_suffix_converts_single_location() {
152        assert_eq!(
153            normalize_markdown_hash_location_suffix("#L74C3"),
154            Some(":74:3".to_string())
155        );
156    }
157
158    #[test]
159    fn normalize_markdown_hash_location_suffix_converts_ranges() {
160        assert_eq!(
161            normalize_markdown_hash_location_suffix("#L74C3-L76C9"),
162            Some(":74:3-76:9".to_string())
163        );
164    }
165}