zerobox_utils_string/
lib.rs1mod truncate;
2
3pub use truncate::approx_bytes_for_tokens;
4pub use truncate::approx_token_count;
5pub use truncate::approx_tokens_from_byte_count;
6pub use truncate::truncate_middle_chars;
7pub use truncate::truncate_middle_with_token_budget;
8
9#[inline]
11pub fn take_bytes_at_char_boundary(s: &str, maxb: usize) -> &str {
12 if s.len() <= maxb {
13 return s;
14 }
15 let mut last_ok = 0;
16 for (i, ch) in s.char_indices() {
17 let nb = i + ch.len_utf8();
18 if nb > maxb {
19 break;
20 }
21 last_ok = nb;
22 }
23 &s[..last_ok]
24}
25
26pub fn sanitize_metric_tag_value(value: &str) -> String {
29 const MAX_LEN: usize = 256;
30 let sanitized: String = value
31 .chars()
32 .map(|ch| {
33 if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '/') {
34 ch
35 } else {
36 '_'
37 }
38 })
39 .collect();
40 let trimmed = sanitized.trim_matches('_');
41 if trimmed.is_empty() || trimmed.chars().all(|ch| !ch.is_ascii_alphanumeric()) {
42 return "unspecified".to_string();
43 }
44 if trimmed.len() <= MAX_LEN {
45 trimmed.to_string()
46 } else {
47 trimmed[..MAX_LEN].to_string()
48 }
49}
50
51#[allow(clippy::unwrap_used)]
53pub fn find_uuids(s: &str) -> Vec<String> {
54 static RE: std::sync::OnceLock<regex_lite::Regex> = std::sync::OnceLock::new();
55 let re = RE.get_or_init(|| {
56 regex_lite::Regex::new(
57 r"[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}",
58 )
59 .unwrap() });
61
62 re.find_iter(s).map(|m| m.as_str().to_string()).collect()
63}
64
65pub fn normalize_markdown_hash_location_suffix(suffix: &str) -> Option<String> {
68 let fragment = suffix.strip_prefix('#')?;
69 let (start, end) = match fragment.split_once('-') {
70 Some((start, end)) => (start, Some(end)),
71 None => (fragment, None),
72 };
73 let (start_line, start_column) = parse_markdown_hash_location_point(start)?;
74 let mut normalized = String::from(":");
75 normalized.push_str(start_line);
76 if let Some(column) = start_column {
77 normalized.push(':');
78 normalized.push_str(column);
79 }
80 if let Some(end) = end {
81 let (end_line, end_column) = parse_markdown_hash_location_point(end)?;
82 normalized.push('-');
83 normalized.push_str(end_line);
84 if let Some(column) = end_column {
85 normalized.push(':');
86 normalized.push_str(column);
87 }
88 }
89 Some(normalized)
90}
91
92fn parse_markdown_hash_location_point(point: &str) -> Option<(&str, Option<&str>)> {
93 let point = point.strip_prefix('L')?;
94 match point.split_once('C') {
95 Some((line, column)) => Some((line, Some(column))),
96 None => Some((point, None)),
97 }
98}
99
100#[cfg(test)]
101#[allow(warnings, clippy::all)]
102mod tests {
103 use super::find_uuids;
104 use super::normalize_markdown_hash_location_suffix;
105 use super::sanitize_metric_tag_value;
106 use pretty_assertions::assert_eq;
107
108 #[test]
109 fn find_uuids_finds_multiple() {
110 let input =
111 "x 00112233-4455-6677-8899-aabbccddeeff-k y 12345678-90ab-cdef-0123-456789abcdef";
112 assert_eq!(
113 find_uuids(input),
114 vec![
115 "00112233-4455-6677-8899-aabbccddeeff".to_string(),
116 "12345678-90ab-cdef-0123-456789abcdef".to_string(),
117 ]
118 );
119 }
120
121 #[test]
122 fn find_uuids_ignores_invalid() {
123 let input = "not-a-uuid-1234-5678-9abc-def0-123456789abc";
124 assert_eq!(find_uuids(input), Vec::<String>::new());
125 }
126
127 #[test]
128 fn find_uuids_handles_non_ascii_without_overlap() {
129 let input = "🙂 55e5d6f7-8a7f-4d2a-8d88-123456789012abc";
130 assert_eq!(
131 find_uuids(input),
132 vec!["55e5d6f7-8a7f-4d2a-8d88-123456789012".to_string()]
133 );
134 }
135
136 #[test]
137 fn sanitize_metric_tag_value_trims_and_fills_unspecified() {
138 let msg = "///";
139 assert_eq!(sanitize_metric_tag_value(msg), "unspecified");
140 }
141
142 #[test]
143 fn sanitize_metric_tag_value_replaces_invalid_chars() {
144 let msg = "bad value!";
145 assert_eq!(sanitize_metric_tag_value(msg), "bad_value");
146 }
147
148 #[test]
149 fn normalize_markdown_hash_location_suffix_converts_single_location() {
150 assert_eq!(
151 normalize_markdown_hash_location_suffix("#L74C3"),
152 Some(":74:3".to_string())
153 );
154 }
155
156 #[test]
157 fn normalize_markdown_hash_location_suffix_converts_ranges() {
158 assert_eq!(
159 normalize_markdown_hash_location_suffix("#L74C3-L76C9"),
160 Some(":74:3-76:9".to_string())
161 );
162 }
163}