1use super::types::{
6 CaseFoldPool, Fnv1aHasher, FrequencyPool, GenerationalPool, InternedSlice, InternedString,
7 NormForm, NormalizedPool, PoolDiff, PoolGrowthEstimator, PoolPartition, PoolSnapshot,
8 PoolSortedView, PoolStatistics, PoolWriter, PrefixPool, Rope, RopeNode, StringCategory,
9 StringIndex, StringPool, TriePool,
10};
11use std::fmt;
12
13#[cfg(test)]
14mod tests {
15 use super::*;
16 #[test]
17 fn test_interned_string_raw_roundtrip() {
18 let is = InternedString::from_raw(42);
19 assert_eq!(is.raw_index(), 42);
20 }
21 #[test]
22 fn test_interned_string_display() {
23 let is = InternedString::from_raw(7);
24 assert_eq!(format!("{}", is), "#7");
25 }
26 #[test]
27 fn test_interned_string_debug() {
28 let is = InternedString::from_raw(3);
29 assert_eq!(format!("{:?}", is), "InternedString(3)");
30 }
31 #[test]
32 fn test_pool_intern_and_resolve() {
33 let mut pool = StringPool::new();
34 let id = pool.intern("hello");
35 assert_eq!(pool.resolve(id), Some("hello"));
36 }
37 #[test]
38 fn test_pool_deduplication() {
39 let mut pool = StringPool::new();
40 let id1 = pool.intern("hello");
41 let id2 = pool.intern("hello");
42 assert_eq!(id1, id2);
43 assert_eq!(pool.len(), 1);
44 }
45 #[test]
46 fn test_pool_multiple_strings() {
47 let mut pool = StringPool::new();
48 let a = pool.intern("alpha");
49 let b = pool.intern("beta");
50 let c = pool.intern("gamma");
51 assert_ne!(a, b);
52 assert_ne!(b, c);
53 assert_eq!(pool.len(), 3);
54 assert_eq!(pool.resolve(a), Some("alpha"));
55 assert_eq!(pool.resolve(b), Some("beta"));
56 assert_eq!(pool.resolve(c), Some("gamma"));
57 }
58 #[test]
59 fn test_pool_lookup() {
60 let mut pool = StringPool::new();
61 pool.intern("test");
62 assert!(pool.lookup("test").is_some());
63 assert!(pool.lookup("nope").is_none());
64 }
65 #[test]
66 fn test_pool_contains() {
67 let mut pool = StringPool::new();
68 pool.intern("present");
69 assert!(pool.contains("present"));
70 assert!(!pool.contains("absent"));
71 }
72 #[test]
73 fn test_pool_intern_bulk() {
74 let mut pool = StringPool::new();
75 let ids = pool.intern_bulk(&["a", "b", "c", "a"]);
76 assert_eq!(ids.len(), 4);
77 assert_eq!(ids[0], ids[3]);
78 assert_eq!(pool.len(), 3);
79 }
80 #[test]
81 fn test_pool_intern_iter() {
82 let mut pool = StringPool::new();
83 let words = vec!["foo", "bar", "baz"];
84 let ids = pool.intern_iter(words);
85 assert_eq!(ids.len(), 3);
86 assert_eq!(pool.resolve(ids[1]), Some("bar"));
87 }
88 #[test]
89 fn test_pool_statistics_basic() {
90 let mut pool = StringPool::new();
91 pool.intern("hello");
92 pool.intern("world");
93 pool.intern("hello");
94 let stats = pool.statistics();
95 assert_eq!(stats.unique_count, 2);
96 assert_eq!(stats.unique_bytes, 10);
97 assert_eq!(stats.total_intern_requests, 3);
98 assert_eq!(stats.total_requested_bytes, 15);
99 assert_eq!(stats.bytes_saved(), 5);
100 }
101 #[test]
102 fn test_pool_statistics_dedup_ratio() {
103 let stats = PoolStatistics {
104 unique_count: 2,
105 unique_bytes: 10,
106 total_intern_requests: 4,
107 total_requested_bytes: 20,
108 };
109 assert!((stats.dedup_ratio() - 0.5).abs() < f64::EPSILON);
110 }
111 #[test]
112 fn test_pool_statistics_avg_len() {
113 let stats = PoolStatistics {
114 unique_count: 5,
115 unique_bytes: 50,
116 total_intern_requests: 10,
117 total_requested_bytes: 100,
118 };
119 assert!((stats.avg_string_len() - 10.0).abs() < f64::EPSILON);
120 }
121 #[test]
122 fn test_pool_statistics_display() {
123 let mut pool = StringPool::new();
124 pool.intern("test");
125 let s = format!("{}", pool.statistics());
126 assert!(s.contains("unique: 1"));
127 }
128 #[test]
129 fn test_snapshot_roundtrip() {
130 let mut pool = StringPool::new();
131 pool.intern("alpha");
132 pool.intern("beta");
133 pool.intern("gamma");
134 let snap = pool.snapshot();
135 assert_eq!(snap.len(), 3);
136 assert_eq!(snap.get(InternedString::from_raw(1)), Some("beta"));
137 }
138 #[test]
139 fn test_snapshot_restore() {
140 let mut pool = StringPool::new();
141 let id_a = pool.intern("one");
142 let _id_b = pool.intern("two");
143 let snap = pool.snapshot();
144 let restored = snap.restore();
145 assert_eq!(restored.len(), 2);
146 assert_eq!(restored.resolve(id_a), Some("one"));
147 }
148 #[test]
149 fn test_snapshot_encode_decode() {
150 let mut pool = StringPool::new();
151 pool.intern("hello");
152 pool.intern("world");
153 let snap = pool.snapshot();
154 let encoded = snap.encode();
155 let decoded = PoolSnapshot::decode(&encoded).expect("test operation should succeed");
156 assert_eq!(snap, decoded);
157 }
158 #[test]
159 fn test_snapshot_decode_empty() {
160 let snap = PoolSnapshot { strings: vec![] };
161 let encoded = snap.encode();
162 let decoded = PoolSnapshot::decode(&encoded).expect("test operation should succeed");
163 assert!(decoded.is_empty());
164 }
165 #[test]
166 fn test_snapshot_decode_invalid() {
167 assert!(PoolSnapshot::decode(&[0, 1]).is_none());
168 }
169 #[test]
170 fn test_pool_clear() {
171 let mut pool = StringPool::new();
172 pool.intern("a");
173 pool.intern("b");
174 pool.clear();
175 assert!(pool.is_empty());
176 assert!(!pool.contains("a"));
177 }
178 #[test]
179 fn test_pool_merge() {
180 let mut pool1 = StringPool::new();
181 pool1.intern("alpha");
182 pool1.intern("beta");
183 let mut pool2 = StringPool::new();
184 pool2.intern("beta");
185 pool2.intern("gamma");
186 let mapping = pool1.merge(&pool2);
187 assert_eq!(pool1.len(), 3);
188 assert_eq!(pool1.resolve(mapping[0]), Some("beta"));
189 assert_eq!(pool1.resolve(mapping[1]), Some("gamma"));
190 }
191 #[test]
192 fn test_pool_iter() {
193 let mut pool = StringPool::new();
194 pool.intern("x");
195 pool.intern("y");
196 let pairs: Vec<_> = pool.iter().collect();
197 assert_eq!(pairs.len(), 2);
198 assert_eq!(pairs[0].1, "x");
199 assert_eq!(pairs[1].1, "y");
200 }
201 #[test]
202 fn test_pool_with_capacity() {
203 let pool = StringPool::with_capacity(100);
204 assert!(pool.is_empty());
205 }
206 #[test]
207 fn test_pool_debug() {
208 let mut pool = StringPool::new();
209 pool.intern("test");
210 let dbg = format!("{:?}", pool);
211 assert!(dbg.contains("StringPool(1 strings)"));
212 }
213 #[test]
214 fn test_pool_empty_string() {
215 let mut pool = StringPool::new();
216 let id = pool.intern("");
217 assert_eq!(pool.resolve(id), Some(""));
218 assert_eq!(pool.statistics().unique_bytes, 0);
219 }
220 #[test]
221 fn test_snapshot_total_bytes() {
222 let mut pool = StringPool::new();
223 pool.intern("abc");
224 pool.intern("de");
225 let snap = pool.snapshot();
226 assert_eq!(snap.total_bytes(), 5);
227 }
228 #[test]
229 fn test_interned_string_ordering() {
230 let a = InternedString::from_raw(1);
231 let b = InternedString::from_raw(2);
232 assert!(a < b);
233 }
234}
235#[allow(dead_code)]
236pub(super) fn rope_node_len(node: &RopeNode) -> usize {
237 match node {
238 RopeNode::Leaf(s) => s.len(),
239 RopeNode::Concat(_, _, len) => *len,
240 }
241}
242#[allow(dead_code)]
243pub(super) fn rope_collect(node: &RopeNode, buf: &mut String) {
244 match node {
245 RopeNode::Leaf(s) => buf.push_str(s),
246 RopeNode::Concat(l, r, _) => {
247 rope_collect(l, buf);
248 rope_collect(r, buf);
249 }
250 }
251}
252#[allow(dead_code)]
253pub(super) fn rope_depth(node: &RopeNode) -> usize {
254 match node {
255 RopeNode::Leaf(_) => 1,
256 RopeNode::Concat(l, r, _) => 1 + rope_depth(l).max(rope_depth(r)),
257 }
258}
259#[allow(dead_code)]
260pub(super) fn rope_fmt(node: &RopeNode, f: &mut fmt::Formatter<'_>) -> fmt::Result {
261 match node {
262 RopeNode::Leaf(s) => f.write_str(s),
263 RopeNode::Concat(l, r, _) => {
264 rope_fmt(l, f)?;
265 rope_fmt(r, f)
266 }
267 }
268}
269#[allow(dead_code)]
271pub(super) fn is_combining(c: char) -> bool {
272 let cp = c as u32;
273 (0x0300..=0x036F).contains(&cp) || (0x20D0..=0x20FF).contains(&cp)
274}
275#[allow(dead_code)]
277pub(super) fn compat_fold(c: char) -> char {
278 match c {
279 '\u{FF01}'..='\u{FF5E}' => char::from_u32(c as u32 - 0xFF01 + 0x0021).unwrap_or(c),
280 '\u{2070}' => '0',
281 '\u{00B9}' => '1',
282 '\u{00B2}' => '2',
283 '\u{00B3}' => '3',
284 '\u{2074}'..='\u{2079}' => char::from_u32(c as u32 - 0x2074 + 0x0034).unwrap_or(c),
285 _ => c,
286 }
287}
288#[allow(dead_code)]
290pub fn classify_string(s: &str) -> StringCategory {
291 if s.is_empty() {
292 return StringCategory::Empty;
293 }
294 if s.chars().all(|c| c.is_ascii_whitespace()) {
295 return StringCategory::Whitespace;
296 }
297 if s.chars().all(|c| c.is_ascii_digit()) {
298 return StringCategory::Numeric;
299 }
300 if s.chars().all(|c| c.is_ascii_alphabetic()) {
301 return StringCategory::Alpha;
302 }
303 if s.chars().all(|c| c.is_ascii_alphanumeric()) {
304 return StringCategory::AlphaNum;
305 }
306 let mut chars = s.chars();
307 if let Some(first) = chars.next() {
308 if (first.is_ascii_alphabetic() || first == '_')
309 && chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
310 {
311 return StringCategory::Identifier;
312 }
313 }
314 StringCategory::Mixed
315}
316pub(super) const FNV_OFFSET_BASIS: u64 = 14695981039346656037;
317pub(super) const FNV_PRIME: u64 = 1099511628211;
318#[allow(dead_code)]
320pub fn tokenize_and_intern(pool: &mut StringPool, s: &str, delimiter: char) -> Vec<InternedString> {
321 s.split(delimiter)
322 .filter(|piece| !piece.is_empty())
323 .map(|piece| pool.intern(piece))
324 .collect()
325}
326#[allow(dead_code)]
328pub fn join_interned(pool: &StringPool, ids: &[InternedString], sep: &str) -> String {
329 ids.iter()
330 .filter_map(|id| pool.resolve(*id))
331 .collect::<Vec<_>>()
332 .join(sep)
333}
334#[cfg(test)]
335mod tests_extended {
336 use super::*;
337 #[test]
338 fn test_case_fold_pool_basic() {
339 let mut pool = CaseFoldPool::new();
340 let id1 = pool.intern("Hello");
341 let id2 = pool.intern("HELLO");
342 let id3 = pool.intern("hello");
343 assert_eq!(id1, id2);
344 assert_eq!(id2, id3);
345 assert_eq!(pool.resolve(id1), Some("hello"));
346 }
347 #[test]
348 fn test_case_fold_pool_contains() {
349 let mut pool = CaseFoldPool::new();
350 pool.intern("Rust");
351 assert!(pool.contains("rust"));
352 assert!(pool.contains("RUST"));
353 assert!(!pool.contains("python"));
354 }
355 #[test]
356 fn test_case_fold_pool_len() {
357 let mut pool = CaseFoldPool::new();
358 pool.intern("Alpha");
359 pool.intern("alpha");
360 pool.intern("ALPHA");
361 assert_eq!(pool.len(), 1);
362 }
363 #[test]
364 fn test_prefix_pool_longest_prefix() {
365 let mut pool = PrefixPool::new();
366 pool.intern("foo");
367 pool.intern("foobar");
368 pool.intern("foobarbaz");
369 let lp = pool
370 .longest_prefix("foobarbazqux")
371 .expect("test operation should succeed");
372 assert_eq!(pool.resolve(lp), Some("foobarbaz"));
373 }
374 #[test]
375 fn test_prefix_pool_all_prefixes() {
376 let mut pool = PrefixPool::new();
377 pool.intern("a");
378 pool.intern("ab");
379 pool.intern("abc");
380 pool.intern("xyz");
381 let prefixes = pool.all_prefixes("abcdef");
382 let strs: Vec<&str> = prefixes
383 .iter()
384 .map(|id| pool.resolve(*id).expect("lookup should succeed"))
385 .collect();
386 assert!(strs.contains(&"abc"));
387 assert!(strs.contains(&"ab"));
388 assert!(strs.contains(&"a"));
389 assert!(!strs.contains(&"xyz"));
390 assert_eq!(strs[0], "abc");
391 }
392 #[test]
393 fn test_prefix_pool_no_prefix() {
394 let mut pool = PrefixPool::new();
395 pool.intern("xyz");
396 assert!(pool.longest_prefix("abc").is_none());
397 }
398 #[test]
399 fn test_rope_basic() {
400 let r = Rope::from_str("hello");
401 assert_eq!(r.len(), 5);
402 assert_eq!(r.to_string(), "hello");
403 }
404 #[test]
405 fn test_rope_concat() {
406 let a = Rope::from_str("hello");
407 let b = Rope::from_str(", world");
408 let c = a.concat(b);
409 assert_eq!(c.to_string(), "hello, world");
410 assert_eq!(c.len(), 12);
411 }
412 #[test]
413 fn test_rope_append_str() {
414 let r = Rope::from_str("foo").append_str("bar").append_str("baz");
415 assert_eq!(r.to_string(), "foobarbaz");
416 }
417 #[test]
418 fn test_rope_empty() {
419 let r = Rope::new();
420 assert!(r.is_empty());
421 assert_eq!(r.to_string(), "");
422 }
423 #[test]
424 fn test_rope_concat_empty() {
425 let a = Rope::new();
426 let b = Rope::from_str("hello");
427 let c = a.concat(b);
428 assert_eq!(c.to_string(), "hello");
429 }
430 #[test]
431 fn test_rope_depth() {
432 let r = Rope::from_str("a")
433 .concat(Rope::from_str("b"))
434 .concat(Rope::from_str("c"));
435 assert!(r.depth() >= 2);
436 }
437 #[test]
438 fn test_rope_display() {
439 let r = Rope::from_str("hello").concat(Rope::from_str(" world"));
440 let s = format!("{}", r);
441 assert_eq!(s, "hello world");
442 }
443 #[test]
444 fn test_normalized_pool_nfc() {
445 let mut pool = NormalizedPool::new(NormForm::Nfc);
446 let id = pool.intern("hello");
447 assert_eq!(pool.resolve(id), Some("hello"));
448 }
449 #[test]
450 fn test_normalized_pool_nfkc_fullwidth() {
451 let mut pool = NormalizedPool::new(NormForm::Nfkc);
452 let s = "\u{FF01}";
453 let id = pool.intern(s);
454 let resolved = pool.resolve(id).expect("lookup should succeed");
455 assert_eq!(resolved, "!");
456 }
457 #[test]
458 fn test_normalized_pool_len() {
459 let mut pool = NormalizedPool::new(NormForm::Nfc);
460 pool.intern("test");
461 pool.intern("test");
462 assert_eq!(pool.len(), 1);
463 }
464 #[test]
465 fn test_generational_pool_basic() {
466 let mut pool = GenerationalPool::new();
467 let h = pool.intern("hello");
468 assert_eq!(pool.resolve(h), Some("hello"));
469 assert!(pool.is_valid(h));
470 }
471 #[test]
472 fn test_generational_pool_release() {
473 let mut pool = GenerationalPool::new();
474 let h = pool.intern("hello");
475 pool.release(h);
476 assert!(!pool.is_valid(h));
477 }
478 #[test]
479 fn test_generational_pool_compact() {
480 let mut pool = GenerationalPool::new();
481 let h_live = pool.intern("live");
482 let h_dead = pool.intern("dead");
483 pool.release(h_dead);
484 let removed = pool.compact();
485 assert_eq!(removed, 1);
486 assert!(!pool.is_valid(h_live));
487 assert_eq!(pool.generation(), 1);
488 assert_eq!(pool.live_count(), 1);
489 }
490 #[test]
491 fn test_generational_pool_stale_resolve() {
492 let mut pool = GenerationalPool::new();
493 let h = pool.intern("test");
494 pool.compact();
495 assert!(pool.resolve(h).is_none());
496 }
497 #[test]
498 fn test_string_index_prefix() {
499 let mut pool = StringPool::new();
500 pool.intern("apple");
501 pool.intern("application");
502 pool.intern("banana");
503 pool.intern("apply");
504 let index = StringIndex::build(&pool);
505 let results = index.find_prefix("app");
506 let strs: Vec<&str> = results
507 .iter()
508 .map(|id| pool.resolve(*id).expect("lookup should succeed"))
509 .collect();
510 assert!(strs.contains(&"apple"));
511 assert!(strs.contains(&"application"));
512 assert!(strs.contains(&"apply"));
513 assert!(!strs.contains(&"banana"));
514 }
515 #[test]
516 fn test_string_index_suffix() {
517 let mut pool = StringPool::new();
518 pool.intern("test");
519 pool.intern("best");
520 pool.intern("rest");
521 pool.intern("apple");
522 let index = StringIndex::build(&pool);
523 let results = index.find_suffix("est");
524 assert_eq!(results.len(), 3);
525 }
526 #[test]
527 fn test_string_index_contains() {
528 let mut pool = StringPool::new();
529 pool.intern("hello");
530 pool.intern("world");
531 pool.intern("ell");
532 let index = StringIndex::build(&pool);
533 let results = index.find_contains("ell");
534 let strs: Vec<&str> = results
535 .iter()
536 .map(|id| pool.resolve(*id).expect("lookup should succeed"))
537 .collect();
538 assert!(strs.contains(&"hello"));
539 assert!(strs.contains(&"ell"));
540 assert!(!strs.contains(&"world"));
541 }
542 #[test]
543 fn test_pool_diff_compute() {
544 let mut old_pool = StringPool::new();
545 old_pool.intern("alpha");
546 old_pool.intern("beta");
547 let old_snap = old_pool.snapshot();
548 let mut new_pool = StringPool::new();
549 new_pool.intern("beta");
550 new_pool.intern("gamma");
551 let new_snap = new_pool.snapshot();
552 let diff = PoolDiff::compute(&old_snap, &new_snap);
553 assert!(diff.added.contains(&"gamma".to_string()));
554 assert!(diff.removed.contains(&"alpha".to_string()));
555 assert!(diff.common.contains(&"beta".to_string()));
556 }
557 #[test]
558 fn test_pool_diff_empty() {
559 let mut pool = StringPool::new();
560 pool.intern("x");
561 let snap = pool.snapshot();
562 let diff = PoolDiff::compute(&snap, &snap);
563 assert!(diff.is_empty());
564 }
565 #[test]
566 fn test_classify_string() {
567 assert_eq!(classify_string(""), StringCategory::Empty);
568 assert_eq!(classify_string(" "), StringCategory::Whitespace);
569 assert_eq!(classify_string("123"), StringCategory::Numeric);
570 assert_eq!(classify_string("abc"), StringCategory::Alpha);
571 assert_eq!(classify_string("abc123"), StringCategory::AlphaNum);
572 assert_eq!(classify_string("my_var"), StringCategory::Identifier);
573 assert_eq!(classify_string("hello world!"), StringCategory::Mixed);
574 }
575 #[test]
576 fn test_frequency_pool_basic() {
577 let mut pool = FrequencyPool::new();
578 pool.intern("hello");
579 pool.intern("hello");
580 pool.intern("world");
581 let id_hello = pool.pool().lookup("hello").expect("lookup should succeed");
582 let id_world = pool.pool().lookup("world").expect("lookup should succeed");
583 assert_eq!(pool.frequency(id_hello), 2);
584 assert_eq!(pool.frequency(id_world), 1);
585 }
586 #[test]
587 fn test_frequency_pool_top_k() {
588 let mut pool = FrequencyPool::new();
589 for _ in 0..5 {
590 pool.intern("a");
591 }
592 for _ in 0..3 {
593 pool.intern("b");
594 }
595 for _ in 0..1 {
596 pool.intern("c");
597 }
598 let top = pool.top_k(2);
599 let top_strs: Vec<&str> = top
600 .iter()
601 .map(|(id, _)| pool.resolve(*id).expect("lookup should succeed"))
602 .collect();
603 assert_eq!(top_strs[0], "a");
604 assert_eq!(top_strs[1], "b");
605 }
606 #[test]
607 fn test_frequency_pool_total_calls() {
608 let mut pool = FrequencyPool::new();
609 pool.intern("x");
610 pool.intern("x");
611 pool.intern("y");
612 assert_eq!(pool.total_calls(), 3);
613 }
614 #[test]
615 fn test_fnv1a_consistency() {
616 let h1 = Fnv1aHasher::hash_str("hello");
617 let h2 = Fnv1aHasher::hash_str("hello");
618 assert_eq!(h1, h2);
619 }
620 #[test]
621 fn test_fnv1a_different_strings() {
622 let h1 = Fnv1aHasher::hash_str("hello");
623 let h2 = Fnv1aHasher::hash_str("world");
624 assert_ne!(h1, h2);
625 }
626 #[test]
627 fn test_fnv1a_32() {
628 let h = Fnv1aHasher::hash_str_32("test");
629 assert_ne!(h, 0);
630 }
631 #[test]
632 fn test_fnv1a_empty() {
633 let h = Fnv1aHasher::hash_str("");
634 assert_eq!(h, FNV_OFFSET_BASIS);
635 }
636 #[test]
637 fn test_pool_writer() {
638 let mut pool = StringPool::new();
639 pool.intern("alpha");
640 pool.intern("beta");
641 let out = PoolWriter::write_to_string(&pool);
642 assert!(out.contains("alpha"));
643 assert!(out.contains("beta"));
644 }
645 #[test]
646 fn test_tokenize_and_intern() {
647 let mut pool = StringPool::new();
648 let ids = tokenize_and_intern(&mut pool, "foo,bar,baz,foo", ',');
649 assert_eq!(ids.len(), 4);
650 assert_eq!(ids[0], ids[3]);
651 assert_eq!(pool.len(), 3);
652 }
653 #[test]
654 fn test_join_interned() {
655 let mut pool = StringPool::new();
656 let id_a = pool.intern("hello");
657 let id_b = pool.intern("world");
658 let joined = join_interned(&pool, &[id_a, id_b], ", ");
659 assert_eq!(joined, "hello, world");
660 }
661 #[test]
662 fn test_join_interned_empty() {
663 let pool = StringPool::new();
664 let joined = join_interned(&pool, &[], ", ");
665 assert_eq!(joined, "");
666 }
667}
668#[allow(dead_code)]
670pub fn levenshtein_distance(a: &str, b: &str) -> usize {
671 let a_chars: Vec<char> = a.chars().collect();
672 let b_chars: Vec<char> = b.chars().collect();
673 let m = a_chars.len();
674 let n = b_chars.len();
675 if m == 0 {
676 return n;
677 }
678 if n == 0 {
679 return m;
680 }
681 let mut dp: Vec<usize> = (0..=n).collect();
682 for i in 1..=m {
683 let mut prev = dp[0];
684 dp[0] = i;
685 for j in 1..=n {
686 let temp = dp[j];
687 dp[j] = if a_chars[i - 1] == b_chars[j - 1] {
688 prev
689 } else {
690 1 + prev.min(dp[j]).min(dp[j - 1])
691 };
692 prev = temp;
693 }
694 }
695 dp[n]
696}
697#[allow(dead_code)]
699pub fn normalized_similarity(a: &str, b: &str) -> f64 {
700 let max_len = a.chars().count().max(b.chars().count());
701 if max_len == 0 {
702 return 1.0;
703 }
704 let dist = levenshtein_distance(a, b);
705 1.0 - dist as f64 / max_len as f64
706}
707#[allow(dead_code)]
709pub fn fuzzy_search(
710 pool: &StringPool,
711 query: &str,
712 max_distance: usize,
713) -> Vec<(InternedString, usize)> {
714 pool.iter()
715 .filter_map(|(id, s)| {
716 let dist = levenshtein_distance(query, s);
717 if dist <= max_distance {
718 Some((id, dist))
719 } else {
720 None
721 }
722 })
723 .collect()
724}
725#[allow(dead_code)]
727pub fn pool_checksum(snap: &PoolSnapshot) -> u32 {
728 let encoded = snap.encode();
729 encoded
730 .iter()
731 .fold(0u32, |acc, &b| acc.wrapping_add(b as u32))
732}
733#[allow(dead_code)]
735pub fn validate_checksum(snap: &PoolSnapshot, expected: u32) -> bool {
736 pool_checksum(snap) == expected
737}
738#[cfg(test)]
739mod tests_extended2 {
740 use super::*;
741 #[test]
742 fn test_trie_pool_insert_and_find() {
743 let mut pool = TriePool::new();
744 pool.insert("apple");
745 pool.insert("application");
746 pool.insert("apply");
747 pool.insert("banana");
748 let results = pool.find_prefix("app");
749 assert_eq!(results.len(), 3);
750 let no_results = pool.find_prefix("xyz");
751 assert!(no_results.is_empty());
752 }
753 #[test]
754 fn test_trie_pool_exact() {
755 let mut pool = TriePool::new();
756 pool.insert("hello");
757 assert!(pool.get("hello").is_some());
758 assert!(pool.get("hell").is_none());
759 }
760 #[test]
761 fn test_trie_pool_dedup() {
762 let mut pool = TriePool::new();
763 let id1 = pool.insert("test");
764 let id2 = pool.insert("test");
765 assert_eq!(id1, id2);
766 assert_eq!(pool.len(), 1);
767 }
768 #[test]
769 fn test_growth_estimator_basic() {
770 let mut est = PoolGrowthEstimator::new(5);
771 for i in 0..10 {
772 est.record(i * 5);
773 }
774 let growth = est.avg_growth();
775 assert!((growth - 5.0).abs() < f64::EPSILON);
776 }
777 #[test]
778 fn test_growth_estimator_estimate() {
779 let mut est = PoolGrowthEstimator::new(5);
780 est.record(0);
781 est.record(10);
782 est.record(20);
783 let future = est.estimate_after(3);
784 assert!((future - 50.0).abs() < f64::EPSILON);
785 }
786 #[test]
787 fn test_growth_estimator_no_history() {
788 let est = PoolGrowthEstimator::new(5);
789 assert_eq!(est.avg_growth(), 0.0);
790 assert_eq!(est.estimate_after(5), 0.0);
791 }
792 #[test]
793 fn test_levenshtein_identical() {
794 assert_eq!(levenshtein_distance("hello", "hello"), 0);
795 }
796 #[test]
797 fn test_levenshtein_empty() {
798 assert_eq!(levenshtein_distance("", "abc"), 3);
799 assert_eq!(levenshtein_distance("abc", ""), 3);
800 }
801 #[test]
802 fn test_levenshtein_one_edit() {
803 assert_eq!(levenshtein_distance("kitten", "sitten"), 1);
804 }
805 #[test]
806 fn test_levenshtein_classic() {
807 assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
808 }
809 #[test]
810 fn test_normalized_similarity_identical() {
811 assert!((normalized_similarity("abc", "abc") - 1.0).abs() < f64::EPSILON);
812 }
813 #[test]
814 fn test_normalized_similarity_empty() {
815 assert!((normalized_similarity("", "") - 1.0).abs() < f64::EPSILON);
816 }
817 #[test]
818 fn test_fuzzy_search() {
819 let mut pool = StringPool::new();
820 pool.intern("hello");
821 pool.intern("helo");
822 pool.intern("world");
823 let results = fuzzy_search(&pool, "hello", 1);
824 let strs: Vec<&str> = results
825 .iter()
826 .map(|(id, _)| pool.resolve(*id).expect("lookup should succeed"))
827 .collect();
828 assert!(strs.contains(&"hello"));
829 assert!(strs.contains(&"helo"));
830 assert!(!strs.contains(&"world"));
831 }
832 #[test]
833 fn test_pool_partition() {
834 let mut pool = StringPool::new();
835 pool.intern("foo");
836 pool.intern("foobar");
837 pool.intern("bar");
838 let part = PoolPartition::by(&pool, |s| s.starts_with("foo"));
839 assert_eq!(part.matching_count(), 2);
840 assert_eq!(part.non_matching_count(), 1);
841 }
842 #[test]
843 fn test_interned_slice_resolve() {
844 let mut pool = StringPool::new();
845 let id = pool.intern("hello world");
846 let slice = InternedSlice::new(id, 6, 11);
847 assert_eq!(slice.resolve(&pool), Some("world"));
848 }
849 #[test]
850 fn test_interned_slice_empty() {
851 let mut pool = StringPool::new();
852 let id = pool.intern("test");
853 let slice = InternedSlice::new(id, 2, 2);
854 assert!(slice.is_empty());
855 }
856 #[test]
857 fn test_interned_slice_out_of_bounds() {
858 let mut pool = StringPool::new();
859 let id = pool.intern("hi");
860 let slice = InternedSlice::new(id, 0, 100);
861 assert!(slice.resolve(&pool).is_none());
862 }
863 #[test]
864 fn test_pool_checksum_deterministic() {
865 let mut pool = StringPool::new();
866 pool.intern("alpha");
867 pool.intern("beta");
868 let snap = pool.snapshot();
869 let c1 = pool_checksum(&snap);
870 let c2 = pool_checksum(&snap);
871 assert_eq!(c1, c2);
872 }
873 #[test]
874 fn test_pool_checksum_validate() {
875 let mut pool = StringPool::new();
876 pool.intern("test");
877 let snap = pool.snapshot();
878 let checksum = pool_checksum(&snap);
879 assert!(validate_checksum(&snap, checksum));
880 assert!(!validate_checksum(&snap, checksum.wrapping_add(1)));
881 }
882}
883#[allow(dead_code)]
886pub fn is_rotation(a: &str, b: &str) -> bool {
887 if a.len() != b.len() {
888 return false;
889 }
890 if a.is_empty() {
891 return true;
892 }
893 let doubled = format!("{}{}", a, a);
894 doubled.contains(b)
895}
896#[allow(dead_code)]
898pub fn is_palindrome(s: &str) -> bool {
899 let bytes = s.as_bytes();
900 let n = bytes.len();
901 if n == 0 {
902 return true;
903 }
904 let mut i = 0;
905 let mut j = n - 1;
906 while i < j {
907 if bytes[i] != bytes[j] {
908 return false;
909 }
910 i += 1;
911 j -= 1;
912 }
913 true
914}
915#[allow(dead_code)]
917pub fn reverse_str(s: &str) -> String {
918 s.chars().rev().collect()
919}
920#[allow(dead_code)]
922pub fn capitalize(s: &str) -> String {
923 let mut iter = s.chars();
924 match iter.next() {
925 None => String::new(),
926 Some(first) => {
927 let upper: String = first.to_uppercase().collect();
928 upper + iter.as_str()
929 }
930 }
931}
932#[allow(dead_code)]
934pub fn to_title_case(s: &str) -> String {
935 s.split_whitespace()
936 .map(capitalize)
937 .collect::<Vec<_>>()
938 .join(" ")
939}
940#[allow(dead_code)]
942pub fn count_occurrences(text: &str, pattern: &str) -> usize {
943 if pattern.is_empty() {
944 return 0;
945 }
946 let mut count = 0;
947 let mut start = 0;
948 while let Some(pos) = text[start..].find(pattern) {
949 count += 1;
950 start += pos + pattern.len();
951 }
952 count
953}
954#[allow(dead_code)]
956pub fn truncate_str(s: &str, max_chars: usize, ellipsis: &str) -> String {
957 let char_count = s.chars().count();
958 if char_count <= max_chars {
959 s.to_string()
960 } else {
961 let elided = max_chars.saturating_sub(ellipsis.chars().count());
962 let truncated: String = s.chars().take(elided).collect();
963 truncated + ellipsis
964 }
965}
966#[cfg(test)]
967mod tests_string_utils {
968 use super::*;
969 #[test]
970 fn test_is_rotation() {
971 assert!(is_rotation("abcde", "cdeab"));
972 assert!(!is_rotation("abcde", "abced"));
973 assert!(is_rotation("", ""));
974 }
975 #[test]
976 fn test_is_palindrome() {
977 assert!(is_palindrome("racecar"));
978 assert!(is_palindrome(""));
979 assert!(!is_palindrome("hello"));
980 }
981 #[test]
982 fn test_reverse_str() {
983 assert_eq!(reverse_str("hello"), "olleh");
984 assert_eq!(reverse_str(""), "");
985 }
986 #[test]
987 fn test_capitalize() {
988 assert_eq!(capitalize("hello"), "Hello");
989 assert_eq!(capitalize(""), "");
990 assert_eq!(capitalize("WORLD"), "WORLD");
991 }
992 #[test]
993 fn test_to_title_case() {
994 assert_eq!(to_title_case("hello world"), "Hello World");
995 assert_eq!(to_title_case("foo bar baz"), "Foo Bar Baz");
996 }
997 #[test]
998 fn test_count_occurrences() {
999 assert_eq!(count_occurrences("hello world hello", "hello"), 2);
1000 assert_eq!(count_occurrences("aaa", "aa"), 1);
1001 assert_eq!(count_occurrences("test", ""), 0);
1002 }
1003 #[test]
1004 fn test_truncate_str() {
1005 assert_eq!(truncate_str("hello world", 5, "..."), "he...");
1006 assert_eq!(truncate_str("hi", 10, "..."), "hi");
1007 assert_eq!(truncate_str("abcde", 5, "..."), "abcde");
1008 }
1009}
1010#[cfg(test)]
1011mod tests_sorted_view {
1012 use super::*;
1013 #[test]
1014 fn test_sorted_view_order() {
1015 let mut pool = StringPool::new();
1016 pool.intern("zebra");
1017 pool.intern("apple");
1018 pool.intern("mango");
1019 let view = PoolSortedView::build(&pool);
1020 let strs: Vec<&str> = view.iter().map(|(_, s)| s).collect();
1021 assert_eq!(strs, vec!["apple", "mango", "zebra"]);
1022 }
1023 #[test]
1024 fn test_sorted_view_len() {
1025 let mut pool = StringPool::new();
1026 pool.intern("a");
1027 pool.intern("b");
1028 let view = PoolSortedView::build(&pool);
1029 assert_eq!(view.len(), 2);
1030 }
1031}