zerobox_utils_string/
lib.rs1mod json;
2mod truncate;
3
4pub use json::to_ascii_json_string;
5pub use truncate::approx_bytes_for_tokens;
6pub use truncate::approx_token_count;
7pub use truncate::approx_tokens_from_byte_count;
8pub use truncate::truncate_middle_chars;
9pub use truncate::truncate_middle_with_token_budget;
10
11#[inline]
13pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
14 if s.len() <= maxb {
15 return s;
16 }
17 let mut last_ok = 0;
18 for (i, ch) in s.char_indices() {
19 let nb = i + ch.len_utf8();
20 if nb > maxb {
21 break;
22 }
23 last_ok = nb;
24 }
25 &s[..last_ok]
26}
27
28pub fn sanitize_metric_tag_value(value: &str) -> String {
31 const MAX_LEN: usize = 256;
32 let sanitized: String = value
33 .chars()
34 .map(|ch| {
35 if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') {
36 ch
37 } else {
38 '_'
39 }
40 })
41 .collect();
42 let trimmed = sanitized.trim_matches('_');
43 if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) {
44 return "unspecified".to_string();
45 }
46 if trimmed.len() <= MAX_LEN {
47 trimmed.to_string()
48 } else {
49 trimmed[..MAX_LEN].to_string()
50 }
51}
52
53#[allow(clippy::unwrap_used)]
55pub fn find_uuids(s: &str) -> Vec<String> {
56 static RE: std::sync::OnceLock<regex_lite::Regex> = std::sync::OnceLock::new();
57 let re = RE.get_or_init(|| {
58 regex_lite::Regex::new(
59 r"[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}",
60 )
61 .unwrap() });
63
64 re.find_iter(s).map(|m| m.as_str().to_string()).collect()
65}
66
67pub fn normalize_markdown_hash_location_suffix(suffix: &str) -> Option<String> {
70 let fragment = suffix.strip_prefix('#')?;
71 let (start, end) = match fragment.split_once('-') {
72 Some((start, end)) => (start, Some(end)),
73 None => (fragment, None),
74 };
75 let (start_line, start_column) = parse_markdown_hash_location_point(start)?;
76 let mut normalized = String::from(":");
77 normalized.push_str(start_line);
78 if let Some(column) = start_column {
79 normalized.push(':');
80 normalized.push_str(column);
81 }
82 if let Some(end) = end {
83 let (end_line, end_column) = parse_markdown_hash_location_point(end)?;
84 normalized.push('-');
85 normalized.push_str(end_line);
86 if let Some(column) = end_column {
87 normalized.push(':');
88 normalized.push_str(column);
89 }
90 }
91 Some(normalized)
92}
93
94fn parse_markdown_hash_location_point(point: &str) -> Option<(&str, Option<&str>)> {
95 let point = point.strip_prefix('L')?;
96 match point.split_once('C') {
97 Some((line, column)) => Some((line, Some(column))),
98 None => Some((point, None)),
99 }
100}
101
102#[cfg(test)]
103#[allow(warnings, clippy::all)]
104mod tests {
105 use super::find_uuids;
106 use super::normalize_markdown_hash_location_suffix;
107 use super::sanitize_metric_tag_value;
108 use pretty_assertions::assert_eq;
109
110 #[test]
111 fn find_uuids_finds_multiple() {
112 let input =
113 "x 00112233-4455-6677-8899-aabbccddeeff-k y 12345678-90ab-cdef-0123-456789abcdef";
114 assert_eq!(
115 find_uuids(input),
116 vec![
117 "00112233-4455-6677-8899-aabbccddeeff".to_string(),
118 "12345678-90ab-cdef-0123-456789abcdef".to_string(),
119 ]
120 );
121 }
122
123 #[test]
124 fn find_uuids_ignores_invalid() {
125 let input = "not-a-uuid-1234-5678-9abc-def0-123456789abc";
126 assert_eq!(find_uuids(input), Vec::<String>::new());
127 }
128
129 #[test]
130 fn find_uuids_handles_non_ascii_without_overlap() {
131 let input = "🙂 55e5d6f7-8a7f-4d2a-8d88-123456789012abc";
132 assert_eq!(
133 find_uuids(input),
134 vec!["55e5d6f7-8a7f-4d2a-8d88-123456789012".to_string()]
135 );
136 }
137
138 #[test]
139 fn sanitize_metric_tag_value_trims_and_fills_unspecified() {
140 let msg = "///";
141 assert_eq!(sanitize_metric_tag_value(msg), "unspecified");
142 }
143
144 #[test]
145 fn sanitize_metric_tag_value_replaces_invalid_chars() {
146 let msg = "bad value!";
147 assert_eq!(sanitize_metric_tag_value(msg), "bad_value");
148 }
149
150 #[test]
151 fn normalize_markdown_hash_location_suffix_converts_single_location() {
152 assert_eq!(
153 normalize_markdown_hash_location_suffix("#L74C3"),
154 Some(":74:3".to_string())
155 );
156 }
157
158 #[test]
159 fn normalize_markdown_hash_location_suffix_converts_ranges() {
160 assert_eq!(
161 normalize_markdown_hash_location_suffix("#L74C3-L76C9"),
162 Some(":74:3-76:9".to_string())
163 );
164 }
165}