1#![forbid(unsafe_code)]
2
3use crate::buffer::Buffer;
47use crate::cell::GraphemeId;
48use std::collections::HashMap;
49
50#[derive(Debug, Clone)]
52struct GraphemeSlot {
53 text: String,
55 #[allow(dead_code)]
58 width: u8,
59 refcount: u32,
61}
62
63#[derive(Debug, Clone)]
67pub struct GraphemePool {
68 slots: Vec<Option<GraphemeSlot>>,
70 lookup: HashMap<String, GraphemeId>,
72 free_list: Vec<u32>,
74}
75
76impl GraphemePool {
77 pub fn new() -> Self {
79 Self {
80 slots: Vec::new(),
81 lookup: HashMap::new(),
82 free_list: Vec::new(),
83 }
84 }
85
86 pub fn with_capacity(capacity: usize) -> Self {
88 Self {
89 slots: Vec::with_capacity(capacity),
90 lookup: HashMap::with_capacity(capacity),
91 free_list: Vec::new(),
92 }
93 }
94
95 pub fn len(&self) -> usize {
97 self.slots.len().saturating_sub(self.free_list.len())
98 }
99
100 pub fn is_empty(&self) -> bool {
102 self.len() == 0
103 }
104
105 pub fn capacity(&self) -> usize {
107 self.slots.capacity()
108 }
109
110 pub fn intern(&mut self, text: &str, width: u8) -> GraphemeId {
124 assert!(width <= GraphemeId::MAX_WIDTH, "width overflow");
125
126 if let Some(&id) = self.lookup.get(text) {
128 debug_assert_eq!(
129 id.width() as u8,
130 width,
131 "intern() called with different width for the same text {:?}: existing={}, new={}",
132 text,
133 id.width(),
134 width
135 );
136 self.retain(id);
137 return id;
138 }
139
140 let slot_idx = self.alloc_slot();
142 let id = GraphemeId::new(slot_idx, width);
143
144 let slot = GraphemeSlot {
146 text: text.to_string(),
147 width,
148 refcount: 1,
149 };
150
151 if (slot_idx as usize) < self.slots.len() {
152 self.slots[slot_idx as usize] = Some(slot);
153 } else {
154 debug_assert_eq!(slot_idx as usize, self.slots.len());
155 self.slots.push(Some(slot));
156 }
157
158 self.lookup.insert(text.to_string(), id);
159 id
160 }
161
162 pub fn get(&self, id: GraphemeId) -> Option<&str> {
166 self.slots
167 .get(id.slot())
168 .and_then(|slot| slot.as_ref())
169 .map(|slot| slot.text.as_str())
170 }
171
172 pub fn retain(&mut self, id: GraphemeId) {
176 if let Some(Some(slot)) = self.slots.get_mut(id.slot()) {
177 slot.refcount = slot.refcount.saturating_add(1);
178 }
179 }
180
181 pub fn release(&mut self, id: GraphemeId) {
186 let slot_idx = id.slot();
187 if let Some(Some(slot)) = self.slots.get_mut(slot_idx) {
188 slot.refcount = slot.refcount.saturating_sub(1);
189 if slot.refcount == 0 {
190 self.lookup.remove(&slot.text);
192 self.slots[slot_idx] = None;
194 self.free_list.push(slot_idx as u32);
196 }
197 }
198 }
199
200 pub fn refcount(&self, id: GraphemeId) -> u32 {
204 self.slots
205 .get(id.slot())
206 .and_then(|slot| slot.as_ref())
207 .map(|slot| slot.refcount)
208 .unwrap_or(0)
209 }
210
211 pub fn clear(&mut self) {
213 self.slots.clear();
214 self.lookup.clear();
215 self.free_list.clear();
216 }
217
218 fn alloc_slot(&mut self) -> u32 {
220 if let Some(idx) = self.free_list.pop() {
221 idx
222 } else {
223 let idx = self.slots.len() as u32;
224 assert!(
225 idx <= GraphemeId::MAX_SLOT,
226 "grapheme pool capacity exceeded"
227 );
228 idx
229 }
230 }
231
232 pub fn gc(&mut self, buffers: &[&Buffer]) {
242 for slot in self.slots.iter_mut().flatten() {
244 slot.refcount = 0;
245 }
246
247 for buf in buffers {
249 for cell in buf.cells() {
250 if let Some(id) = cell.content.grapheme_id() {
251 if let Some(Some(slot)) = self.slots.get_mut(id.slot()) {
254 slot.refcount = slot.refcount.saturating_add(1);
255 }
256 }
257 }
258 }
259
260 let mut keys_to_remove = Vec::new();
263
264 for (idx, slot_opt) in self.slots.iter_mut().enumerate() {
265 let should_free = slot_opt.as_ref().is_some_and(|s| s.refcount == 0);
267
268 if should_free {
269 if let Some(dead_slot) = slot_opt.take() {
271 keys_to_remove.push(dead_slot.text);
272 self.free_list.push(idx as u32);
273 }
274 }
275 }
276
277 for text in keys_to_remove {
278 self.lookup.remove(&text);
279 }
280 }
281}
282
283impl Default for GraphemePool {
284 fn default() -> Self {
285 Self::new()
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292
293 #[test]
294 fn intern_and_get() {
295 let mut pool = GraphemePool::new();
296 let id = pool.intern("๐จโ๐ฉโ๐งโ๐ฆ", 2);
297
298 assert_eq!(pool.get(id), Some("๐จโ๐ฉโ๐งโ๐ฆ"));
299 assert_eq!(id.width(), 2);
300 }
301
302 #[test]
303 fn deduplication() {
304 let mut pool = GraphemePool::new();
305 let id1 = pool.intern("๐", 2);
306 let id2 = pool.intern("๐", 2);
307
308 assert_eq!(id1, id2);
310 assert_eq!(pool.refcount(id1), 2);
312 assert_eq!(pool.len(), 1);
314 }
315
316 #[test]
317 fn retain_and_release() {
318 let mut pool = GraphemePool::new();
319 let id = pool.intern("๐", 2);
320 assert_eq!(pool.refcount(id), 1);
321
322 pool.retain(id);
323 assert_eq!(pool.refcount(id), 2);
324
325 pool.release(id);
326 assert_eq!(pool.refcount(id), 1);
327
328 pool.release(id);
329 assert_eq!(pool.get(id), None);
331 assert_eq!(pool.len(), 0);
332 }
333
334 #[test]
335 fn slot_reuse() {
336 let mut pool = GraphemePool::new();
337
338 let id1 = pool.intern("A", 1);
340 pool.release(id1);
341 assert_eq!(pool.len(), 0);
342
343 let id2 = pool.intern("B", 1);
345 assert_eq!(id1.slot(), id2.slot());
346 assert_eq!(pool.get(id2), Some("B"));
347 }
348
349 #[test]
350 fn empty_pool() {
351 let pool = GraphemePool::new();
352 assert!(pool.is_empty());
353 assert_eq!(pool.len(), 0);
354 }
355
356 #[test]
357 fn multiple_graphemes() {
358 let mut pool = GraphemePool::new();
359
360 let id1 = pool.intern("๐จโ๐ป", 2);
361 let id2 = pool.intern("๐ฉโ๐ฌ", 2);
362 let id3 = pool.intern("๐ง๐ฝโ๐", 2);
363
364 assert_eq!(pool.len(), 3);
365 assert_ne!(id1, id2);
366 assert_ne!(id2, id3);
367
368 assert_eq!(pool.get(id1), Some("๐จโ๐ป"));
369 assert_eq!(pool.get(id2), Some("๐ฉโ๐ฌ"));
370 assert_eq!(pool.get(id3), Some("๐ง๐ฝโ๐"));
371 }
372
373 #[test]
374 fn width_preserved() {
375 let mut pool = GraphemePool::new();
376
377 let id1 = pool.intern("๐", 2);
379 let id2 = pool.intern("A", 1);
380 let id3 = pool.intern("ๆฅ", 2);
381
382 assert_eq!(id1.width(), 2);
383 assert_eq!(id2.width(), 1);
384 assert_eq!(id3.width(), 2);
385 }
386
387 #[test]
388 fn clear_pool() {
389 let mut pool = GraphemePool::new();
390 pool.intern("A", 1);
391 pool.intern("B", 1);
392 pool.intern("C", 1);
393
394 assert_eq!(pool.len(), 3);
395
396 pool.clear();
397 assert!(pool.is_empty());
398 }
399
400 #[test]
401 fn invalid_id_returns_none() {
402 let pool = GraphemePool::new();
403 let fake_id = GraphemeId::new(999, 1);
404 assert_eq!(pool.get(fake_id), None);
405 }
406
407 #[test]
408 fn release_invalid_id_is_safe() {
409 let mut pool = GraphemePool::new();
410 let fake_id = GraphemeId::new(999, 1);
411 pool.release(fake_id); }
413
414 #[test]
415 fn retain_invalid_id_is_safe() {
416 let mut pool = GraphemePool::new();
417 let fake_id = GraphemeId::new(999, 1);
418 pool.retain(fake_id); }
420
421 #[test]
422 #[should_panic(expected = "width overflow")]
423 fn width_overflow_panics() {
424 let mut pool = GraphemePool::new();
425 pool.intern("X", 128); }
427
428 #[test]
429 fn with_capacity() {
430 let pool = GraphemePool::with_capacity(100);
431 assert!(pool.capacity() >= 100);
432 assert!(pool.is_empty());
433 }
434
435 mod gc_tests {
436 use super::*;
437 use crate::buffer::Buffer;
438 use crate::cell::{Cell, CellContent};
439
440 fn buf_with_grapheme(id: GraphemeId) -> Buffer {
442 let mut buf = Buffer::new(4, 1);
443 let content = CellContent::from_grapheme(id);
444 buf.set(0, 0, Cell::new(content));
445 buf
446 }
447
448 #[test]
449 fn gc_retains_referenced_grapheme() {
450 let mut pool = GraphemePool::new();
451 let id = pool.intern("๐", 2);
452
453 let buf = buf_with_grapheme(id);
454 pool.gc(&[&buf]);
455
456 assert_eq!(pool.get(id), Some("๐"));
457 assert_eq!(pool.refcount(id), 1);
458 }
459
460 #[test]
461 fn gc_frees_unreferenced_grapheme() {
462 let mut pool = GraphemePool::new();
463 let id = pool.intern("๐", 2);
464
465 let buf = Buffer::new(4, 1);
467 pool.gc(&[&buf]);
468
469 assert_eq!(pool.get(id), None);
470 assert_eq!(pool.refcount(id), 0);
471 assert!(pool.is_empty());
472 }
473
474 #[test]
475 fn gc_with_multiple_buffers() {
476 let mut pool = GraphemePool::new();
477 let id1 = pool.intern("๐", 2);
478 let id2 = pool.intern("๐งช", 2);
479 let id3 = pool.intern("๐ฅ", 2);
480
481 let buf1 = buf_with_grapheme(id1);
483 let buf2 = buf_with_grapheme(id3);
484
485 pool.gc(&[&buf1, &buf2]);
486
487 assert_eq!(pool.get(id1), Some("๐"));
488 assert_eq!(pool.get(id2), None); assert_eq!(pool.get(id3), Some("๐ฅ"));
490 assert_eq!(pool.len(), 2);
491 }
492
493 #[test]
494 fn gc_with_multiple_references_in_buffer() {
495 let mut pool = GraphemePool::new();
496 let id = pool.intern("๐จโ๐ฉโ๐ง", 2);
497
498 let mut buf = Buffer::new(4, 1);
500 let content = CellContent::from_grapheme(id);
501 buf.set(0, 0, Cell::new(content));
502 buf.set(2, 0, Cell::new(content));
503
504 pool.gc(&[&buf]);
505
506 assert_eq!(pool.get(id), Some("๐จโ๐ฉโ๐ง"));
507 assert_eq!(pool.refcount(id), 2);
508 }
509
510 #[test]
511 fn gc_with_empty_pool() {
512 let mut pool = GraphemePool::new();
513 let buf = Buffer::new(4, 1);
514 pool.gc(&[&buf]); assert!(pool.is_empty());
516 }
517
518 #[test]
519 fn gc_with_no_buffers() {
520 let mut pool = GraphemePool::new();
521 let id = pool.intern("test", 1);
522 pool.gc(&[]);
523 assert_eq!(pool.get(id), None);
525 assert!(pool.is_empty());
526 }
527
528 #[test]
529 fn gc_freed_slots_are_reusable() {
530 let mut pool = GraphemePool::new();
531 let id1 = pool.intern("A", 1);
532 let _id2 = pool.intern("B", 1);
533 let slot1 = id1.slot();
534
535 let buf = buf_with_grapheme(id1);
537 pool.gc(&[&buf]);
538
539 let id3 = pool.intern("C", 1);
541 assert_eq!(pool.get(id3), Some("C"));
543 assert_eq!(pool.len(), 2); assert_eq!(pool.get(id1), Some("A"));
547 assert_eq!(id1.slot(), slot1);
548 }
549
550 #[test]
551 fn gc_resets_refcounts_accurately() {
552 let mut pool = GraphemePool::new();
553 let id = pool.intern("๐", 2);
554
555 pool.retain(id);
557 pool.retain(id);
558 assert_eq!(pool.refcount(id), 3);
559
560 let buf = buf_with_grapheme(id);
562 pool.gc(&[&buf]);
563
564 assert_eq!(pool.refcount(id), 1);
566 }
567
568 #[test]
569 fn gc_lookup_table_stays_consistent() {
570 let mut pool = GraphemePool::new();
571 let _id1 = pool.intern("A", 1);
572 let id2 = pool.intern("B", 1);
573
574 let buf = buf_with_grapheme(id2);
576 pool.gc(&[&buf]);
577
578 let id_new = pool.intern("A", 1);
580 assert_eq!(pool.get(id_new), Some("A"));
581
582 let id_b2 = pool.intern("B", 1);
584 assert_eq!(id_b2, id2);
585 }
586 }
587
588 mod property {
589 use super::*;
590 use proptest::prelude::*;
591
592 fn arb_grapheme() -> impl Strategy<Value = String> {
594 prop::string::string_regex(".{1,8}")
595 .unwrap()
596 .prop_filter("non-empty", |s| !s.is_empty())
597 }
598
599 fn arb_width() -> impl Strategy<Value = u8> {
601 0u8..=GraphemeId::MAX_WIDTH
602 }
603
604 proptest! {
605 #![proptest_config(ProptestConfig::with_cases(256))]
606
607 #[test]
609 fn intern_get_roundtrip(s in arb_grapheme(), w in arb_width()) {
610 let mut pool = GraphemePool::new();
611 let id = pool.intern(&s, w);
612 prop_assert_eq!(pool.get(id), Some(s.as_str()));
613 }
614
615 #[test]
617 fn intern_preserves_width(s in arb_grapheme(), w in arb_width()) {
618 let mut pool = GraphemePool::new();
619 let id = pool.intern(&s, w);
620 prop_assert_eq!(id.width(), w as usize);
621 }
622
623 #[test]
625 fn deduplication_same_id(s in arb_grapheme(), w in arb_width()) {
626 let mut pool = GraphemePool::new();
627 let id1 = pool.intern(&s, w);
628 let id2 = pool.intern(&s, w);
629 prop_assert_eq!(id1, id2);
630 prop_assert_eq!(pool.len(), 1);
631 }
632
633 #[test]
635 fn deduplication_refcount(s in arb_grapheme(), w in arb_width(), extra in 0u32..10) {
636 let mut pool = GraphemePool::new();
637 let id = pool.intern(&s, w);
638 for _ in 0..extra {
639 pool.intern(&s, w);
640 }
641 prop_assert_eq!(pool.refcount(id), 1 + extra);
642 }
643
644 #[test]
646 fn retain_release_refcount(
647 s in arb_grapheme(),
648 w in arb_width(),
649 retains in 0u32..10,
650 releases in 0u32..10
651 ) {
652 let mut pool = GraphemePool::new();
653 let id = pool.intern(&s, w);
654 for _ in 0..retains {
656 pool.retain(id);
657 }
658 let expected_after_retain = 1 + retains;
659 prop_assert_eq!(pool.refcount(id), expected_after_retain);
660
661 let actual_releases = releases.min(expected_after_retain - 1);
662 for _ in 0..actual_releases {
663 pool.release(id);
664 }
665 prop_assert_eq!(pool.refcount(id), expected_after_retain - actual_releases);
666 prop_assert_eq!(pool.get(id), Some(s.as_str()));
668 }
669
670 #[test]
672 fn release_to_zero_frees(s in arb_grapheme(), w in arb_width(), extra in 0u32..5) {
673 let mut pool = GraphemePool::new();
674 let id = pool.intern(&s, w);
675 for _ in 0..extra {
676 pool.retain(id);
677 }
678 for _ in 0..=extra {
680 pool.release(id);
681 }
682 prop_assert_eq!(pool.get(id), None);
683 prop_assert_eq!(pool.refcount(id), 0);
684 prop_assert!(pool.is_empty());
685 }
686
687 #[test]
689 fn slot_reuse_after_free(
690 s1 in arb_grapheme(),
691 s2 in arb_grapheme(),
692 w in arb_width()
693 ) {
694 let mut pool = GraphemePool::new();
695 let id1 = pool.intern(&s1, w);
696 let slot1 = id1.slot();
697 pool.release(id1);
698
699 let id2 = pool.intern(&s2, w);
701 prop_assert_eq!(id2.slot(), slot1);
702 prop_assert_eq!(pool.get(id2), Some(s2.as_str()));
703 }
704
705 #[test]
707 fn len_invariant(count in 1usize..20) {
708 let mut pool = GraphemePool::new();
709 let mut ids = Vec::new();
710 for i in 0..count {
711 let s = format!("g{i}");
712 ids.push(pool.intern(&s, 1));
713 }
714 prop_assert_eq!(pool.len(), count);
715
716 let release_count = count / 2;
718 for id in &ids[..release_count] {
719 pool.release(*id);
720 }
721 prop_assert_eq!(pool.len(), count - release_count);
722 }
723
724 #[test]
726 fn distinct_strings_distinct_ids(count in 2usize..15) {
727 let mut pool = GraphemePool::new();
728 let mut ids = Vec::new();
729 for i in 0..count {
730 let s = format!("unique_{i}");
731 ids.push(pool.intern(&s, 1));
732 }
733 for i in 0..ids.len() {
735 for j in (i + 1)..ids.len() {
736 prop_assert_ne!(ids[i], ids[j]);
737 }
738 }
739 }
740
741 #[test]
743 fn clear_resets_all(count in 1usize..20) {
744 let mut pool = GraphemePool::new();
745 let mut ids = Vec::new();
746 for i in 0..count {
747 let s = format!("c{i}");
748 ids.push(pool.intern(&s, 1));
749 }
750 pool.clear();
751 prop_assert!(pool.is_empty());
752 prop_assert_eq!(pool.len(), 0);
753 for id in &ids {
754 prop_assert_eq!(pool.get(*id), None);
755 }
756 }
757
758 #[test]
762 fn positive_refcount_implies_valid_slot(
763 count in 1usize..10,
764 retains in proptest::collection::vec(0u32..5, 1..10),
765 ) {
766 let mut pool = GraphemePool::new();
767 let mut ids = Vec::new();
768 for i in 0..count {
769 let s = format!("inv_{i}");
770 ids.push(pool.intern(&s, 1));
771 }
772
773 for (i, &extra) in retains.iter().enumerate() {
775 let id = ids[i % count];
776 for _ in 0..extra {
777 pool.retain(id);
778 }
779 }
780
781 for (i, &id) in ids.iter().enumerate() {
783 let rc = pool.refcount(id);
784 if rc > 0 {
785 prop_assert!(pool.get(id).is_some(),
786 "slot {} has refcount {} but get() returned None", i, rc);
787 }
788 }
789 }
790
791 #[test]
793 fn release_decrements_by_one(s in arb_grapheme(), w in arb_width(), retains in 1u32..8) {
794 let mut pool = GraphemePool::new();
795 let id = pool.intern(&s, w);
796 for _ in 0..retains {
797 pool.retain(id);
798 }
799 let rc_before = pool.refcount(id);
800 pool.release(id);
801 let rc_after = pool.refcount(id);
802 prop_assert_eq!(rc_after, rc_before - 1,
803 "release should decrement refcount by exactly 1");
804 }
805
806 #[test]
808 fn over_release_does_not_corrupt(count in 1usize..5) {
809 let mut pool = GraphemePool::new();
810 let mut ids = Vec::new();
811 for i in 0..count {
812 let s = format!("or_{i}");
813 ids.push(pool.intern(&s, 1));
814 }
815
816 let victim = ids[0];
818 pool.release(victim);
819 prop_assert_eq!(pool.refcount(victim), 0);
820 prop_assert_eq!(pool.get(victim), None);
821
822 pool.release(victim);
824 prop_assert_eq!(pool.refcount(victim), 0);
825
826 for &id in &ids[1..] {
828 prop_assert!(pool.get(id).is_some(),
829 "over-release corrupted unrelated slot");
830 prop_assert!(pool.refcount(id) > 0);
831 }
832 }
833
834 #[test]
836 fn cross_pool_id_is_invalid(s in arb_grapheme(), w in arb_width()) {
837 let mut pool_a = GraphemePool::new();
838 let pool_b = GraphemePool::new();
839 let id = pool_a.intern(&s, w);
840
841 prop_assert_eq!(pool_b.get(id), None,
843 "GraphemeId from pool A should not be valid in pool B");
844 }
845 }
846 }
847}