1use std::collections::HashMap;
6
7use super::functions::{
8 compat_fold, is_combining, rope_collect, rope_depth, rope_node_len, FNV_OFFSET_BASIS, FNV_PRIME,
9};
10
11#[allow(dead_code)]
13struct TrieNode {
14 children: HashMap<char, Box<TrieNode>>,
15 terminal: Option<InternedString>,
16}
17#[allow(dead_code)]
18impl TrieNode {
19 fn new() -> Self {
20 Self {
21 children: HashMap::new(),
22 terminal: None,
23 }
24 }
25}
26#[allow(dead_code)]
28#[derive(Clone, Debug)]
29pub struct Fnv1aHasher {
30 state: u64,
31}
32#[allow(dead_code)]
33impl Fnv1aHasher {
34 pub fn new() -> Self {
36 Self {
37 state: FNV_OFFSET_BASIS,
38 }
39 }
40 pub fn write_byte(&mut self, byte: u8) {
42 self.state ^= byte as u64;
43 self.state = self.state.wrapping_mul(FNV_PRIME);
44 }
45 pub fn write_str(&mut self, s: &str) {
47 for byte in s.as_bytes() {
48 self.write_byte(*byte);
49 }
50 }
51 pub fn finish(&self) -> u64 {
53 self.state
54 }
55 pub fn hash_str(s: &str) -> u64 {
57 let mut h = Self::new();
58 h.write_str(s);
59 h.finish()
60 }
61 pub fn hash_str_32(s: &str) -> u32 {
63 let h = Self::hash_str(s);
64 ((h >> 32) ^ h) as u32
65 }
66}
67#[derive(Clone, Debug, Default, PartialEq, Eq)]
69pub struct PoolStatistics {
70 pub unique_count: usize,
72 pub unique_bytes: usize,
74 pub total_intern_requests: usize,
76 pub total_requested_bytes: usize,
78}
79impl PoolStatistics {
80 pub fn bytes_saved(&self) -> usize {
82 self.total_requested_bytes.saturating_sub(self.unique_bytes)
83 }
84 pub fn dedup_ratio(&self) -> f64 {
86 if self.total_requested_bytes == 0 {
87 0.0
88 } else {
89 self.bytes_saved() as f64 / self.total_requested_bytes as f64
90 }
91 }
92 pub fn avg_string_len(&self) -> f64 {
94 if self.unique_count == 0 {
95 0.0
96 } else {
97 self.unique_bytes as f64 / self.unique_count as f64
98 }
99 }
100}
101pub struct StringPool {
106 map: HashMap<String, u32>,
108 pub(super) strings: Vec<String>,
110 stats: PoolStatistics,
112}
113impl StringPool {
114 pub fn new() -> Self {
116 Self {
117 map: HashMap::new(),
118 strings: Vec::new(),
119 stats: PoolStatistics::default(),
120 }
121 }
122 pub fn with_capacity(cap: usize) -> Self {
124 Self {
125 map: HashMap::with_capacity(cap),
126 strings: Vec::with_capacity(cap),
127 stats: PoolStatistics::default(),
128 }
129 }
130 pub fn intern(&mut self, s: &str) -> InternedString {
133 self.stats.total_intern_requests += 1;
134 self.stats.total_requested_bytes += s.len();
135 if let Some(&idx) = self.map.get(s) {
136 return InternedString { index: idx };
137 }
138 let idx = self.strings.len() as u32;
139 self.strings.push(s.to_string());
140 self.map.insert(s.to_string(), idx);
141 self.stats.unique_count += 1;
142 self.stats.unique_bytes += s.len();
143 InternedString { index: idx }
144 }
145 pub fn intern_bulk(&mut self, strs: &[&str]) -> Vec<InternedString> {
147 strs.iter().map(|s| self.intern(s)).collect()
148 }
149 pub fn intern_iter<'a, I>(&mut self, iter: I) -> Vec<InternedString>
151 where
152 I: IntoIterator<Item = &'a str>,
153 {
154 iter.into_iter().map(|s| self.intern(s)).collect()
155 }
156 pub fn resolve(&self, id: InternedString) -> Option<&str> {
158 self.strings.get(id.index as usize).map(|s| s.as_str())
159 }
160 pub fn lookup(&self, s: &str) -> Option<InternedString> {
163 self.map.get(s).map(|&idx| InternedString { index: idx })
164 }
165 pub fn contains(&self, s: &str) -> bool {
167 self.map.contains_key(s)
168 }
169 pub fn len(&self) -> usize {
171 self.strings.len()
172 }
173 pub fn is_empty(&self) -> bool {
175 self.strings.is_empty()
176 }
177 pub fn statistics(&self) -> &PoolStatistics {
179 &self.stats
180 }
181 pub fn snapshot(&self) -> PoolSnapshot {
183 PoolSnapshot {
184 strings: self.strings.clone(),
185 }
186 }
187 pub fn iter(&self) -> impl Iterator<Item = (InternedString, &str)> {
189 self.strings
190 .iter()
191 .enumerate()
192 .map(|(i, s)| (InternedString { index: i as u32 }, s.as_str()))
193 }
194 pub fn clear(&mut self) {
196 self.map.clear();
197 self.strings.clear();
198 self.stats.unique_count = 0;
199 self.stats.unique_bytes = 0;
200 }
201 pub fn merge(&mut self, other: &StringPool) -> Vec<InternedString> {
204 other.strings.iter().map(|s| self.intern(s)).collect()
205 }
206}
207#[allow(dead_code)]
210#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
211pub struct GenerationalString {
212 index: u32,
213 generation: u16,
214}
215#[allow(dead_code)]
216impl GenerationalString {
217 pub fn index(self) -> u32 {
219 self.index
220 }
221 pub fn generation(self) -> u16 {
223 self.generation
224 }
225}
226#[allow(dead_code)]
228#[derive(Clone, Copy, Debug, PartialEq, Eq)]
229pub enum StringCategory {
230 AlphaNum,
232 Alpha,
234 Numeric,
236 Whitespace,
238 Empty,
240 Identifier,
242 Mixed,
244}
245#[allow(dead_code)]
247pub struct TriePool {
248 root: TrieNode,
249 pool: StringPool,
250}
251#[allow(dead_code)]
252impl TriePool {
253 pub fn new() -> Self {
255 Self {
256 root: TrieNode::new(),
257 pool: StringPool::new(),
258 }
259 }
260 pub fn insert(&mut self, s: &str) -> InternedString {
262 let id = self.pool.intern(s);
263 let mut node = &mut self.root;
264 for ch in s.chars() {
265 node = node
266 .children
267 .entry(ch)
268 .or_insert_with(|| Box::new(TrieNode::new()));
269 }
270 node.terminal = Some(id);
271 id
272 }
273 fn collect_all(node: &TrieNode, result: &mut Vec<InternedString>) {
275 if let Some(id) = node.terminal {
276 result.push(id);
277 }
278 for child in node.children.values() {
279 Self::collect_all(child, result);
280 }
281 }
282 pub fn find_prefix(&self, prefix: &str) -> Vec<InternedString> {
284 let mut node = &self.root;
285 for ch in prefix.chars() {
286 match node.children.get(&ch) {
287 Some(child) => node = child,
288 None => return Vec::new(),
289 }
290 }
291 let mut result = Vec::new();
292 Self::collect_all(node, &mut result);
293 result
294 }
295 pub fn get(&self, s: &str) -> Option<InternedString> {
297 self.pool.lookup(s)
298 }
299 pub fn resolve(&self, id: InternedString) -> Option<&str> {
301 self.pool.resolve(id)
302 }
303 pub fn len(&self) -> usize {
305 self.pool.len()
306 }
307 pub fn is_empty(&self) -> bool {
309 self.pool.is_empty()
310 }
311}
312#[allow(dead_code)]
314pub struct PoolGrowthEstimator {
315 history: Vec<usize>,
316 window: usize,
317}
318#[allow(dead_code)]
319impl PoolGrowthEstimator {
320 pub fn new(window: usize) -> Self {
322 Self {
323 history: Vec::new(),
324 window: window.max(1),
325 }
326 }
327 pub fn record(&mut self, size: usize) {
329 self.history.push(size);
330 if self.history.len() > self.window * 2 {
331 let drain_to = self.history.len() - self.window;
332 self.history.drain(..drain_to);
333 }
334 }
335 pub fn avg_growth(&self) -> f64 {
337 if self.history.len() < 2 {
338 return 0.0;
339 }
340 let _n = self.history.len();
341 let deltas: Vec<f64> = self
342 .history
343 .windows(2)
344 .map(|w| w[1] as f64 - w[0] as f64)
345 .collect();
346 deltas.iter().sum::<f64>() / deltas.len() as f64
347 }
348 pub fn estimate_after(&self, n: usize) -> f64 {
350 let last = self.history.last().copied().unwrap_or(0) as f64;
351 last + self.avg_growth() * n as f64
352 }
353 pub fn observation_count(&self) -> usize {
355 self.history.len()
356 }
357}
358#[allow(dead_code)]
363#[derive(Clone, Debug)]
364pub struct Rope {
365 pub(super) root: Option<RopeNode>,
366}
367#[allow(dead_code)]
368impl Rope {
369 pub fn new() -> Self {
371 Self { root: None }
372 }
373 pub fn from_str(s: &str) -> Self {
375 if s.is_empty() {
376 Self { root: None }
377 } else {
378 Self {
379 root: Some(RopeNode::Leaf(s.to_string())),
380 }
381 }
382 }
383 pub fn len(&self) -> usize {
385 match &self.root {
386 None => 0,
387 Some(node) => rope_node_len(node),
388 }
389 }
390 pub fn is_empty(&self) -> bool {
392 self.len() == 0
393 }
394 pub fn concat(self, other: Rope) -> Rope {
396 match (self.root, other.root) {
397 (None, r) => Rope { root: r },
398 (l, None) => Rope { root: l },
399 (Some(l), Some(r)) => {
400 let total = rope_node_len(&l) + rope_node_len(&r);
401 Rope {
402 root: Some(RopeNode::Concat(Box::new(l), Box::new(r), total)),
403 }
404 }
405 }
406 }
407 pub fn append_str(self, s: &str) -> Rope {
409 self.concat(Rope::from_str(s))
410 }
411 pub fn collect_string(&self) -> String {
416 use std::fmt::Write;
418 let mut buf = String::with_capacity(self.len());
419 if let Some(node) = &self.root {
420 rope_collect(node, &mut buf);
421 }
422 buf
423 }
424 pub fn depth(&self) -> usize {
426 match &self.root {
427 None => 0,
428 Some(node) => rope_depth(node),
429 }
430 }
431}
432#[allow(dead_code)]
434#[derive(Clone, Debug, PartialEq, Eq)]
435pub struct PoolDiff {
436 pub added: Vec<String>,
438 pub removed: Vec<String>,
440 pub common: Vec<String>,
442}
443#[allow(dead_code)]
444impl PoolDiff {
445 pub fn compute(old: &PoolSnapshot, new: &PoolSnapshot) -> Self {
447 use std::collections::HashSet;
448 let old_set: HashSet<&str> = old.strings.iter().map(|s| s.as_str()).collect();
449 let new_set: HashSet<&str> = new.strings.iter().map(|s| s.as_str()).collect();
450 let added = new_set
451 .difference(&old_set)
452 .map(|s| s.to_string())
453 .collect();
454 let removed = old_set
455 .difference(&new_set)
456 .map(|s| s.to_string())
457 .collect();
458 let common = old_set
459 .intersection(&new_set)
460 .map(|s| s.to_string())
461 .collect();
462 Self {
463 added,
464 removed,
465 common,
466 }
467 }
468 pub fn added_count(&self) -> usize {
470 self.added.len()
471 }
472 pub fn removed_count(&self) -> usize {
474 self.removed.len()
475 }
476 pub fn is_empty(&self) -> bool {
478 self.added.is_empty() && self.removed.is_empty()
479 }
480}
481#[allow(dead_code)]
484pub struct CaseFoldPool {
485 inner: StringPool,
486}
487#[allow(dead_code)]
488impl CaseFoldPool {
489 pub fn new() -> Self {
491 Self {
492 inner: StringPool::new(),
493 }
494 }
495 pub fn intern(&mut self, s: &str) -> InternedString {
497 let folded = s.to_lowercase();
498 self.inner.intern(&folded)
499 }
500 pub fn resolve(&self, id: InternedString) -> Option<&str> {
502 self.inner.resolve(id)
503 }
504 pub fn contains(&self, s: &str) -> bool {
506 self.inner.contains(&s.to_lowercase())
507 }
508 pub fn len(&self) -> usize {
510 self.inner.len()
511 }
512 pub fn is_empty(&self) -> bool {
514 self.inner.is_empty()
515 }
516 pub fn inner(&self) -> &StringPool {
518 &self.inner
519 }
520}
521#[allow(dead_code)]
523#[derive(Clone, Copy, Debug, PartialEq, Eq)]
524pub enum NormForm {
525 Nfc,
527 Nfd,
529 Nfkc,
531 Nfkd,
533}
534#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
539pub struct InternedString {
540 pub(super) index: u32,
542}
543impl InternedString {
544 pub fn from_raw(index: u32) -> Self {
547 Self { index }
548 }
549 pub fn raw_index(self) -> u32 {
551 self.index
552 }
553}
554#[derive(Clone, Debug, PartialEq, Eq)]
558pub struct PoolSnapshot {
559 pub strings: Vec<String>,
561}
562impl PoolSnapshot {
563 pub fn len(&self) -> usize {
565 self.strings.len()
566 }
567 pub fn is_empty(&self) -> bool {
569 self.strings.is_empty()
570 }
571 pub fn get(&self, id: InternedString) -> Option<&str> {
573 self.strings.get(id.index as usize).map(|s| s.as_str())
574 }
575 pub fn restore(&self) -> StringPool {
577 let mut pool = StringPool::new();
578 for s in &self.strings {
579 pool.intern(s);
580 }
581 pool
582 }
583 pub fn total_bytes(&self) -> usize {
585 self.strings.iter().map(|s| s.len()).sum()
586 }
587 pub fn encode(&self) -> Vec<u8> {
589 let mut buf = Vec::new();
590 let count = self.strings.len() as u32;
591 buf.extend_from_slice(&count.to_le_bytes());
592 for s in &self.strings {
593 let len = s.len() as u32;
594 buf.extend_from_slice(&len.to_le_bytes());
595 buf.extend_from_slice(s.as_bytes());
596 }
597 buf
598 }
599 pub fn decode(data: &[u8]) -> Option<Self> {
601 if data.len() < 4 {
602 return None;
603 }
604 let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
605 let mut pos = 4;
606 let mut strings = Vec::with_capacity(count);
607 for _ in 0..count {
608 if pos + 4 > data.len() {
609 return None;
610 }
611 let len = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]])
612 as usize;
613 pos += 4;
614 if pos + len > data.len() {
615 return None;
616 }
617 let s = std::str::from_utf8(&data[pos..pos + len]).ok()?;
618 strings.push(s.to_string());
619 pos += len;
620 }
621 Some(Self { strings })
622 }
623}
624#[allow(dead_code)]
627pub struct NormalizedPool {
628 form: NormForm,
629 inner: StringPool,
630}
631#[allow(dead_code)]
632impl NormalizedPool {
633 pub fn new(form: NormForm) -> Self {
635 Self {
636 form,
637 inner: StringPool::new(),
638 }
639 }
640 pub fn intern(&mut self, s: &str) -> InternedString {
642 let normalized = self.normalize(s);
643 self.inner.intern(&normalized)
644 }
645 pub fn normalize(&self, s: &str) -> String {
647 match self.form {
648 NormForm::Nfc | NormForm::Nfd => s.chars().filter(|c| !is_combining(*c)).collect(),
649 NormForm::Nfkc | NormForm::Nfkd => s
650 .chars()
651 .filter(|c| !is_combining(*c))
652 .map(compat_fold)
653 .collect(),
654 }
655 }
656 pub fn resolve(&self, id: InternedString) -> Option<&str> {
658 self.inner.resolve(id)
659 }
660 pub fn len(&self) -> usize {
662 self.inner.len()
663 }
664 pub fn is_empty(&self) -> bool {
666 self.inner.is_empty()
667 }
668}
669#[allow(dead_code)]
671#[derive(Clone, Debug)]
672pub(super) enum RopeNode {
673 Leaf(String),
674 Concat(Box<RopeNode>, Box<RopeNode>, usize),
675}
676#[allow(dead_code)]
679pub struct PrefixPool {
680 pool: StringPool,
681}
682#[allow(dead_code)]
683impl PrefixPool {
684 pub fn new() -> Self {
686 Self {
687 pool: StringPool::new(),
688 }
689 }
690 pub fn intern(&mut self, s: &str) -> InternedString {
692 self.pool.intern(s)
693 }
694 pub fn longest_prefix(&self, s: &str) -> Option<InternedString> {
696 let mut best: Option<InternedString> = None;
697 let mut best_len = 0usize;
698 for (id, interned) in self.pool.iter() {
699 if s.starts_with(interned) && interned.len() >= best_len {
700 best_len = interned.len();
701 best = Some(id);
702 }
703 }
704 best
705 }
706 pub fn all_prefixes(&self, s: &str) -> Vec<InternedString> {
708 let mut result: Vec<(usize, InternedString)> = self
709 .pool
710 .iter()
711 .filter(|(_, interned)| s.starts_with(*interned))
712 .map(|(id, interned)| (interned.len(), id))
713 .collect();
714 result.sort_by(|a, b| b.0.cmp(&a.0));
715 result.into_iter().map(|(_, id)| id).collect()
716 }
717 pub fn resolve(&self, id: InternedString) -> Option<&str> {
719 self.pool.resolve(id)
720 }
721 pub fn len(&self) -> usize {
723 self.pool.len()
724 }
725 pub fn is_empty(&self) -> bool {
727 self.pool.is_empty()
728 }
729}
730#[allow(dead_code)]
732pub struct StringIndex {
733 sorted: Vec<(String, InternedString)>,
735}
736#[allow(dead_code)]
737impl StringIndex {
738 pub fn build(pool: &StringPool) -> Self {
740 let mut sorted: Vec<(String, InternedString)> =
741 pool.iter().map(|(id, s)| (s.to_string(), id)).collect();
742 sorted.sort_by(|a, b| a.0.cmp(&b.0));
743 Self { sorted }
744 }
745 pub fn find_prefix(&self, prefix: &str) -> Vec<InternedString> {
747 let start = self.sorted.partition_point(|(s, _)| s.as_str() < prefix);
748 self.sorted[start..]
749 .iter()
750 .take_while(|(s, _)| s.starts_with(prefix))
751 .map(|(_, id)| *id)
752 .collect()
753 }
754 pub fn find_suffix(&self, suffix: &str) -> Vec<InternedString> {
756 self.sorted
757 .iter()
758 .filter(|(s, _)| s.ends_with(suffix))
759 .map(|(_, id)| *id)
760 .collect()
761 }
762 pub fn find_contains(&self, sub: &str) -> Vec<InternedString> {
764 self.sorted
765 .iter()
766 .filter(|(s, _)| s.contains(sub))
767 .map(|(_, id)| *id)
768 .collect()
769 }
770 pub fn len(&self) -> usize {
772 self.sorted.len()
773 }
774 pub fn is_empty(&self) -> bool {
776 self.sorted.is_empty()
777 }
778}
779#[allow(dead_code)]
781pub struct PoolWriter;
782#[allow(dead_code)]
783impl PoolWriter {
784 pub fn write_to_string(pool: &StringPool) -> String {
786 let mut out = String::new();
787 for (id, s) in pool.iter() {
788 out.push_str(&format!("{}: {:?}\n", id.raw_index(), s));
789 }
790 out
791 }
792 pub fn write_stats_to_string(pool: &StringPool) -> String {
794 format!("{}", pool.statistics())
795 }
796}
797#[allow(dead_code)]
799pub struct PoolPartition {
800 pub matching: Vec<InternedString>,
801 pub non_matching: Vec<InternedString>,
802}
803#[allow(dead_code)]
804impl PoolPartition {
805 pub fn by<F>(pool: &StringPool, mut pred: F) -> Self
807 where
808 F: FnMut(&str) -> bool,
809 {
810 let mut matching = Vec::new();
811 let mut non_matching = Vec::new();
812 for (id, s) in pool.iter() {
813 if pred(s) {
814 matching.push(id);
815 } else {
816 non_matching.push(id);
817 }
818 }
819 Self {
820 matching,
821 non_matching,
822 }
823 }
824 pub fn matching_count(&self) -> usize {
826 self.matching.len()
827 }
828 pub fn non_matching_count(&self) -> usize {
830 self.non_matching.len()
831 }
832}
833#[allow(dead_code)]
835#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
836pub struct InternedSlice {
837 pub base: InternedString,
838 pub start: u32,
839 pub end: u32,
840}
841#[allow(dead_code)]
842impl InternedSlice {
843 pub fn new(base: InternedString, start: u32, end: u32) -> Self {
845 Self { base, start, end }
846 }
847 pub fn len(&self) -> usize {
849 (self.end - self.start) as usize
850 }
851 pub fn is_empty(&self) -> bool {
853 self.start >= self.end
854 }
855 pub fn resolve<'a>(&self, pool: &'a StringPool) -> Option<&'a str> {
857 let base = pool.resolve(self.base)?;
858 let start = self.start as usize;
859 let end = self.end as usize;
860 if end > base.len() {
861 return None;
862 }
863 base.get(start..end)
864 }
865}
866#[allow(dead_code)]
871pub struct GenerationalPool {
872 pool: StringPool,
873 generation: u16,
874 ref_counts: Vec<u32>,
875}
876#[allow(dead_code)]
877impl GenerationalPool {
878 pub fn new() -> Self {
880 Self {
881 pool: StringPool::new(),
882 generation: 0,
883 ref_counts: Vec::new(),
884 }
885 }
886 pub fn intern(&mut self, s: &str) -> GenerationalString {
888 let id = self.pool.intern(s);
889 let idx = id.raw_index() as usize;
890 while self.ref_counts.len() <= idx {
891 self.ref_counts.push(0);
892 }
893 self.ref_counts[idx] = self.ref_counts[idx].saturating_add(1);
894 GenerationalString {
895 index: id.raw_index(),
896 generation: self.generation,
897 }
898 }
899 pub fn release(&mut self, handle: GenerationalString) {
901 if handle.generation != self.generation {
902 return;
903 }
904 let idx = handle.index as usize;
905 if idx < self.ref_counts.len() {
906 self.ref_counts[idx] = self.ref_counts[idx].saturating_sub(1);
907 }
908 }
909 pub fn resolve(&self, handle: GenerationalString) -> Option<&str> {
911 if handle.generation != self.generation {
912 return None;
913 }
914 self.pool.resolve(InternedString::from_raw(handle.index))
915 }
916 pub fn is_valid(&self, handle: GenerationalString) -> bool {
918 if handle.generation != self.generation {
919 return false;
920 }
921 let idx = handle.index as usize;
922 idx < self.ref_counts.len() && self.ref_counts[idx] > 0
923 }
924 pub fn compact(&mut self) -> usize {
927 let mut new_pool = StringPool::new();
928 let mut new_refs: Vec<u32> = Vec::new();
929 let mut removed = 0usize;
930 for (id, s) in self.pool.iter() {
931 let idx = id.raw_index() as usize;
932 let rc = if idx < self.ref_counts.len() {
933 self.ref_counts[idx]
934 } else {
935 0
936 };
937 if rc > 0 {
938 let new_id = new_pool.intern(s);
939 let new_idx = new_id.raw_index() as usize;
940 while new_refs.len() <= new_idx {
941 new_refs.push(0);
942 }
943 new_refs[new_idx] = rc;
944 } else {
945 removed += 1;
946 }
947 }
948 self.pool = new_pool;
949 self.ref_counts = new_refs;
950 self.generation = self.generation.wrapping_add(1);
951 removed
952 }
953 pub fn generation(&self) -> u16 {
955 self.generation
956 }
957 pub fn live_count(&self) -> usize {
959 self.ref_counts.iter().filter(|&&rc| rc > 0).count()
960 }
961 pub fn total_count(&self) -> usize {
963 self.pool.len()
964 }
965}
966#[allow(dead_code)]
968pub struct FrequencyPool {
969 pool: StringPool,
970 counts: Vec<u64>,
971}
972#[allow(dead_code)]
973impl FrequencyPool {
974 pub fn new() -> Self {
976 Self {
977 pool: StringPool::new(),
978 counts: Vec::new(),
979 }
980 }
981 pub fn intern(&mut self, s: &str) -> InternedString {
983 let id = self.pool.intern(s);
984 let idx = id.raw_index() as usize;
985 while self.counts.len() <= idx {
986 self.counts.push(0);
987 }
988 self.counts[idx] += 1;
989 id
990 }
991 pub fn frequency(&self, id: InternedString) -> u64 {
993 self.counts
994 .get(id.raw_index() as usize)
995 .copied()
996 .unwrap_or(0)
997 }
998 pub fn top_k(&self, k: usize) -> Vec<(InternedString, u64)> {
1000 let mut pairs: Vec<(InternedString, u64)> = self
1001 .pool
1002 .iter()
1003 .map(|(id, _)| (id, self.frequency(id)))
1004 .collect();
1005 pairs.sort_by(|a, b| b.1.cmp(&a.1));
1006 pairs.truncate(k);
1007 pairs
1008 }
1009 pub fn resolve(&self, id: InternedString) -> Option<&str> {
1011 self.pool.resolve(id)
1012 }
1013 pub fn pool(&self) -> &StringPool {
1015 &self.pool
1016 }
1017 pub fn total_calls(&self) -> u64 {
1019 self.counts.iter().sum()
1020 }
1021 pub fn len(&self) -> usize {
1023 self.pool.len()
1024 }
1025 pub fn is_empty(&self) -> bool {
1027 self.pool.is_empty()
1028 }
1029}
1030#[allow(dead_code)]
1032pub struct PoolSortedView {
1033 entries: Vec<(InternedString, String)>,
1034}
1035#[allow(dead_code)]
1036impl PoolSortedView {
1037 pub fn build(pool: &StringPool) -> Self {
1039 let mut entries: Vec<(InternedString, String)> =
1040 pool.iter().map(|(id, s)| (id, s.to_string())).collect();
1041 entries.sort_by(|a, b| a.1.cmp(&b.1));
1042 Self { entries }
1043 }
1044 pub fn iter(&self) -> impl Iterator<Item = (InternedString, &str)> {
1046 self.entries.iter().map(|(id, s)| (*id, s.as_str()))
1047 }
1048 pub fn len(&self) -> usize {
1050 self.entries.len()
1051 }
1052 pub fn is_empty(&self) -> bool {
1054 self.entries.is_empty()
1055 }
1056}