Skip to main content

oxilean_runtime/string_pool/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use std::collections::HashMap;
6
7use super::functions::{
8    compat_fold, is_combining, rope_collect, rope_depth, rope_node_len, FNV_OFFSET_BASIS, FNV_PRIME,
9};
10
11/// A node in the trie.
12#[allow(dead_code)]
13struct TrieNode {
14    children: HashMap<char, Box<TrieNode>>,
15    terminal: Option<InternedString>,
16}
17#[allow(dead_code)]
18impl TrieNode {
19    fn new() -> Self {
20        Self {
21            children: HashMap::new(),
22            terminal: None,
23        }
24    }
25}
26/// An FNV-1a hash state for fast string hashing.
27#[allow(dead_code)]
28#[derive(Clone, Debug)]
29pub struct Fnv1aHasher {
30    state: u64,
31}
32#[allow(dead_code)]
33impl Fnv1aHasher {
34    /// Create a new hasher.
35    pub fn new() -> Self {
36        Self {
37            state: FNV_OFFSET_BASIS,
38        }
39    }
40    /// Feed a byte into the hash.
41    pub fn write_byte(&mut self, byte: u8) {
42        self.state ^= byte as u64;
43        self.state = self.state.wrapping_mul(FNV_PRIME);
44    }
45    /// Feed a string into the hash.
46    pub fn write_str(&mut self, s: &str) {
47        for byte in s.as_bytes() {
48            self.write_byte(*byte);
49        }
50    }
51    /// Get the final hash value.
52    pub fn finish(&self) -> u64 {
53        self.state
54    }
55    /// Hash a string in one call.
56    pub fn hash_str(s: &str) -> u64 {
57        let mut h = Self::new();
58        h.write_str(s);
59        h.finish()
60    }
61    /// Hash a string, returning a u32 by xor-folding.
62    pub fn hash_str_32(s: &str) -> u32 {
63        let h = Self::hash_str(s);
64        ((h >> 32) ^ h) as u32
65    }
66}
67/// Statistics for a [`StringPool`].
68#[derive(Clone, Debug, Default, PartialEq, Eq)]
69pub struct PoolStatistics {
70    /// Number of unique strings stored.
71    pub unique_count: usize,
72    /// Total bytes of unique string data stored.
73    pub unique_bytes: usize,
74    /// Total number of intern requests (including duplicates).
75    pub total_intern_requests: usize,
76    /// Total bytes that would have been stored without deduplication.
77    pub total_requested_bytes: usize,
78}
79impl PoolStatistics {
80    /// Bytes saved by deduplication.
81    pub fn bytes_saved(&self) -> usize {
82        self.total_requested_bytes.saturating_sub(self.unique_bytes)
83    }
84    /// Deduplication ratio (0.0 to 1.0). Returns 0.0 if no bytes were requested.
85    pub fn dedup_ratio(&self) -> f64 {
86        if self.total_requested_bytes == 0 {
87            0.0
88        } else {
89            self.bytes_saved() as f64 / self.total_requested_bytes as f64
90        }
91    }
92    /// Average string length.
93    pub fn avg_string_len(&self) -> f64 {
94        if self.unique_count == 0 {
95            0.0
96        } else {
97            self.unique_bytes as f64 / self.unique_count as f64
98        }
99    }
100}
101/// A string interning pool.
102///
103/// Stores each unique string exactly once and returns lightweight handles
104/// ([`InternedString`]) for fast comparison.
105pub struct StringPool {
106    /// Map from string content to its interned index.
107    map: HashMap<String, u32>,
108    /// Index-ordered storage of all unique strings.
109    pub(super) strings: Vec<String>,
110    /// Running statistics.
111    stats: PoolStatistics,
112}
113impl StringPool {
114    /// Create an empty string pool.
115    pub fn new() -> Self {
116        Self {
117            map: HashMap::new(),
118            strings: Vec::new(),
119            stats: PoolStatistics::default(),
120        }
121    }
122    /// Create a pool pre-sized for the given capacity.
123    pub fn with_capacity(cap: usize) -> Self {
124        Self {
125            map: HashMap::with_capacity(cap),
126            strings: Vec::with_capacity(cap),
127            stats: PoolStatistics::default(),
128        }
129    }
130    /// Intern a string. If the string is already in the pool, returns the
131    /// existing handle. Otherwise inserts it and returns a new handle.
132    pub fn intern(&mut self, s: &str) -> InternedString {
133        self.stats.total_intern_requests += 1;
134        self.stats.total_requested_bytes += s.len();
135        if let Some(&idx) = self.map.get(s) {
136            return InternedString { index: idx };
137        }
138        let idx = self.strings.len() as u32;
139        self.strings.push(s.to_string());
140        self.map.insert(s.to_string(), idx);
141        self.stats.unique_count += 1;
142        self.stats.unique_bytes += s.len();
143        InternedString { index: idx }
144    }
145    /// Intern multiple strings at once, returning handles in the same order.
146    pub fn intern_bulk(&mut self, strs: &[&str]) -> Vec<InternedString> {
147        strs.iter().map(|s| self.intern(s)).collect()
148    }
149    /// Intern strings from an iterator.
150    pub fn intern_iter<'a, I>(&mut self, iter: I) -> Vec<InternedString>
151    where
152        I: IntoIterator<Item = &'a str>,
153    {
154        iter.into_iter().map(|s| self.intern(s)).collect()
155    }
156    /// Resolve an interned string back to its content.
157    pub fn resolve(&self, id: InternedString) -> Option<&str> {
158        self.strings.get(id.index as usize).map(|s| s.as_str())
159    }
160    /// Look up an interned string by content. Returns `None` if the string
161    /// has not been interned.
162    pub fn lookup(&self, s: &str) -> Option<InternedString> {
163        self.map.get(s).map(|&idx| InternedString { index: idx })
164    }
165    /// Check whether a string has been interned.
166    pub fn contains(&self, s: &str) -> bool {
167        self.map.contains_key(s)
168    }
169    /// Number of unique strings in the pool.
170    pub fn len(&self) -> usize {
171        self.strings.len()
172    }
173    /// Whether the pool is empty.
174    pub fn is_empty(&self) -> bool {
175        self.strings.is_empty()
176    }
177    /// Get pool statistics.
178    pub fn statistics(&self) -> &PoolStatistics {
179        &self.stats
180    }
181    /// Create a snapshot of the pool for serialization.
182    pub fn snapshot(&self) -> PoolSnapshot {
183        PoolSnapshot {
184            strings: self.strings.clone(),
185        }
186    }
187    /// Iterate over all interned strings with their handles.
188    pub fn iter(&self) -> impl Iterator<Item = (InternedString, &str)> {
189        self.strings
190            .iter()
191            .enumerate()
192            .map(|(i, s)| (InternedString { index: i as u32 }, s.as_str()))
193    }
194    /// Clear the pool, removing all interned strings.
195    pub fn clear(&mut self) {
196        self.map.clear();
197        self.strings.clear();
198        self.stats.unique_count = 0;
199        self.stats.unique_bytes = 0;
200    }
201    /// Merge another pool into this one. Returns a mapping from old indices to
202    /// new indices.
203    pub fn merge(&mut self, other: &StringPool) -> Vec<InternedString> {
204        other.strings.iter().map(|s| self.intern(s)).collect()
205    }
206}
207/// A handle that includes a generation marker for garbage-collection-friendly
208/// use cases. The generation allows detecting stale handles after pool compaction.
209#[allow(dead_code)]
210#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
211pub struct GenerationalString {
212    index: u32,
213    generation: u16,
214}
215#[allow(dead_code)]
216impl GenerationalString {
217    /// The raw index component.
218    pub fn index(self) -> u32 {
219        self.index
220    }
221    /// The generation component.
222    pub fn generation(self) -> u16 {
223        self.generation
224    }
225}
226/// A simple categorization of a string's character content.
227#[allow(dead_code)]
228#[derive(Clone, Copy, Debug, PartialEq, Eq)]
229pub enum StringCategory {
230    /// All ASCII alphanumeric.
231    AlphaNum,
232    /// All ASCII letters.
233    Alpha,
234    /// All ASCII digits.
235    Numeric,
236    /// Contains only whitespace.
237    Whitespace,
238    /// Empty string.
239    Empty,
240    /// Identifier-like (starts with letter/underscore, rest alphanumeric/underscore).
241    Identifier,
242    /// Other.
243    Mixed,
244}
245/// A string pool backed by a trie for fast prefix enumeration.
246#[allow(dead_code)]
247pub struct TriePool {
248    root: TrieNode,
249    pool: StringPool,
250}
251#[allow(dead_code)]
252impl TriePool {
253    /// Create an empty trie pool.
254    pub fn new() -> Self {
255        Self {
256            root: TrieNode::new(),
257            pool: StringPool::new(),
258        }
259    }
260    /// Insert a string into the trie pool.
261    pub fn insert(&mut self, s: &str) -> InternedString {
262        let id = self.pool.intern(s);
263        let mut node = &mut self.root;
264        for ch in s.chars() {
265            node = node
266                .children
267                .entry(ch)
268                .or_insert_with(|| Box::new(TrieNode::new()));
269        }
270        node.terminal = Some(id);
271        id
272    }
273    /// Collect all strings in the subtrie rooted at `node`.
274    fn collect_all(node: &TrieNode, result: &mut Vec<InternedString>) {
275        if let Some(id) = node.terminal {
276            result.push(id);
277        }
278        for child in node.children.values() {
279            Self::collect_all(child, result);
280        }
281    }
282    /// Find all strings with the given prefix.
283    pub fn find_prefix(&self, prefix: &str) -> Vec<InternedString> {
284        let mut node = &self.root;
285        for ch in prefix.chars() {
286            match node.children.get(&ch) {
287                Some(child) => node = child,
288                None => return Vec::new(),
289            }
290        }
291        let mut result = Vec::new();
292        Self::collect_all(node, &mut result);
293        result
294    }
295    /// Exact lookup.
296    pub fn get(&self, s: &str) -> Option<InternedString> {
297        self.pool.lookup(s)
298    }
299    /// Resolve an interned string.
300    pub fn resolve(&self, id: InternedString) -> Option<&str> {
301        self.pool.resolve(id)
302    }
303    /// Number of strings.
304    pub fn len(&self) -> usize {
305        self.pool.len()
306    }
307    /// Whether the pool is empty.
308    pub fn is_empty(&self) -> bool {
309        self.pool.is_empty()
310    }
311}
312/// Estimates future pool growth based on historical intern rates.
313#[allow(dead_code)]
314pub struct PoolGrowthEstimator {
315    history: Vec<usize>,
316    window: usize,
317}
318#[allow(dead_code)]
319impl PoolGrowthEstimator {
320    /// Create a new estimator with the given smoothing window.
321    pub fn new(window: usize) -> Self {
322        Self {
323            history: Vec::new(),
324            window: window.max(1),
325        }
326    }
327    /// Record the current pool size.
328    pub fn record(&mut self, size: usize) {
329        self.history.push(size);
330        if self.history.len() > self.window * 2 {
331            let drain_to = self.history.len() - self.window;
332            self.history.drain(..drain_to);
333        }
334    }
335    /// Estimate the average growth rate (strings per observation).
336    pub fn avg_growth(&self) -> f64 {
337        if self.history.len() < 2 {
338            return 0.0;
339        }
340        let _n = self.history.len();
341        let deltas: Vec<f64> = self
342            .history
343            .windows(2)
344            .map(|w| w[1] as f64 - w[0] as f64)
345            .collect();
346        deltas.iter().sum::<f64>() / deltas.len() as f64
347    }
348    /// Estimate the pool size after `n` more observations.
349    pub fn estimate_after(&self, n: usize) -> f64 {
350        let last = self.history.last().copied().unwrap_or(0) as f64;
351        last + self.avg_growth() * n as f64
352    }
353    /// Number of recorded observations.
354    pub fn observation_count(&self) -> usize {
355        self.history.len()
356    }
357}
358/// A rope data structure for efficient large string construction.
359///
360/// A rope is a binary tree of string slices that supports O(log n)
361/// concatenation. Flattening to a `String` is O(n).
362#[allow(dead_code)]
363#[derive(Clone, Debug)]
364pub struct Rope {
365    pub(super) root: Option<RopeNode>,
366}
367#[allow(dead_code)]
368impl Rope {
369    /// Create an empty rope.
370    pub fn new() -> Self {
371        Self { root: None }
372    }
373    /// Create a rope from a string.
374    pub fn from_str(s: &str) -> Self {
375        if s.is_empty() {
376            Self { root: None }
377        } else {
378            Self {
379                root: Some(RopeNode::Leaf(s.to_string())),
380            }
381        }
382    }
383    /// Total byte length.
384    pub fn len(&self) -> usize {
385        match &self.root {
386            None => 0,
387            Some(node) => rope_node_len(node),
388        }
389    }
390    /// Whether the rope is empty.
391    pub fn is_empty(&self) -> bool {
392        self.len() == 0
393    }
394    /// Concatenate two ropes.
395    pub fn concat(self, other: Rope) -> Rope {
396        match (self.root, other.root) {
397            (None, r) => Rope { root: r },
398            (l, None) => Rope { root: l },
399            (Some(l), Some(r)) => {
400                let total = rope_node_len(&l) + rope_node_len(&r);
401                Rope {
402                    root: Some(RopeNode::Concat(Box::new(l), Box::new(r), total)),
403                }
404            }
405        }
406    }
407    /// Append a string slice to the rope.
408    pub fn append_str(self, s: &str) -> Rope {
409        self.concat(Rope::from_str(s))
410    }
411    /// Flatten the rope into a `String`.
412    ///
413    /// Note: This uses the `Display` implementation which performs
414    /// the same in-order traversal of the rope tree.
415    pub fn collect_string(&self) -> String {
416        // Use Display trait (which does the same rope traversal)
417        use std::fmt::Write;
418        let mut buf = String::with_capacity(self.len());
419        if let Some(node) = &self.root {
420            rope_collect(node, &mut buf);
421        }
422        buf
423    }
424    /// Depth of the rope tree (for balance diagnostics).
425    pub fn depth(&self) -> usize {
426        match &self.root {
427            None => 0,
428            Some(node) => rope_depth(node),
429        }
430    }
431}
432/// The result of comparing two pool snapshots.
433#[allow(dead_code)]
434#[derive(Clone, Debug, PartialEq, Eq)]
435pub struct PoolDiff {
436    /// Strings present in `new` but not in `old`.
437    pub added: Vec<String>,
438    /// Strings present in `old` but not in `new`.
439    pub removed: Vec<String>,
440    /// Strings present in both.
441    pub common: Vec<String>,
442}
443#[allow(dead_code)]
444impl PoolDiff {
445    /// Compute the diff between two snapshots.
446    pub fn compute(old: &PoolSnapshot, new: &PoolSnapshot) -> Self {
447        use std::collections::HashSet;
448        let old_set: HashSet<&str> = old.strings.iter().map(|s| s.as_str()).collect();
449        let new_set: HashSet<&str> = new.strings.iter().map(|s| s.as_str()).collect();
450        let added = new_set
451            .difference(&old_set)
452            .map(|s| s.to_string())
453            .collect();
454        let removed = old_set
455            .difference(&new_set)
456            .map(|s| s.to_string())
457            .collect();
458        let common = old_set
459            .intersection(&new_set)
460            .map(|s| s.to_string())
461            .collect();
462        Self {
463            added,
464            removed,
465            common,
466        }
467    }
468    /// Number of added strings.
469    pub fn added_count(&self) -> usize {
470        self.added.len()
471    }
472    /// Number of removed strings.
473    pub fn removed_count(&self) -> usize {
474        self.removed.len()
475    }
476    /// Whether the two snapshots are identical.
477    pub fn is_empty(&self) -> bool {
478        self.added.is_empty() && self.removed.is_empty()
479    }
480}
481/// A string pool that normalizes strings to lowercase before interning,
482/// enabling case-insensitive comparisons via cheap integer equality.
483#[allow(dead_code)]
484pub struct CaseFoldPool {
485    inner: StringPool,
486}
487#[allow(dead_code)]
488impl CaseFoldPool {
489    /// Create an empty case-folding pool.
490    pub fn new() -> Self {
491        Self {
492            inner: StringPool::new(),
493        }
494    }
495    /// Intern a string after folding to lowercase.
496    pub fn intern(&mut self, s: &str) -> InternedString {
497        let folded = s.to_lowercase();
498        self.inner.intern(&folded)
499    }
500    /// Resolve back to the canonicalized (lowercase) form.
501    pub fn resolve(&self, id: InternedString) -> Option<&str> {
502        self.inner.resolve(id)
503    }
504    /// Check whether the lowercase version of `s` has been interned.
505    pub fn contains(&self, s: &str) -> bool {
506        self.inner.contains(&s.to_lowercase())
507    }
508    /// Number of unique strings.
509    pub fn len(&self) -> usize {
510        self.inner.len()
511    }
512    /// Whether the pool is empty.
513    pub fn is_empty(&self) -> bool {
514        self.inner.is_empty()
515    }
516    /// Access the inner pool.
517    pub fn inner(&self) -> &StringPool {
518        &self.inner
519    }
520}
521/// Normalization form for Unicode strings.
522#[allow(dead_code)]
523#[derive(Clone, Copy, Debug, PartialEq, Eq)]
524pub enum NormForm {
525    /// Decompose then recompose (canonical composition).
526    Nfc,
527    /// Canonical decomposition only.
528    Nfd,
529    /// Compatibility decomposition then recompose.
530    Nfkc,
531    /// Compatibility decomposition only.
532    Nfkd,
533}
534/// A handle to an interned string. This is a lightweight index into a [`StringPool`].
535///
536/// Two `InternedString` values that came from the same pool compare equal
537/// if and only if they refer to the same underlying string content.
538#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
539pub struct InternedString {
540    /// Index into the pool's string storage.
541    pub(super) index: u32,
542}
543impl InternedString {
544    /// Create an interned string from a raw index.
545    /// Intended for deserialization; use [`StringPool::intern`] for normal usage.
546    pub fn from_raw(index: u32) -> Self {
547        Self { index }
548    }
549    /// The raw index of this interned string.
550    pub fn raw_index(self) -> u32 {
551        self.index
552    }
553}
554/// A serializable snapshot of a [`StringPool`].
555///
556/// Can be used to persist the pool to disk or transfer it across processes.
557#[derive(Clone, Debug, PartialEq, Eq)]
558pub struct PoolSnapshot {
559    /// The interned strings, in index order.
560    pub strings: Vec<String>,
561}
562impl PoolSnapshot {
563    /// Number of strings in the snapshot.
564    pub fn len(&self) -> usize {
565        self.strings.len()
566    }
567    /// Whether the snapshot is empty.
568    pub fn is_empty(&self) -> bool {
569        self.strings.is_empty()
570    }
571    /// Get a string by its interned index.
572    pub fn get(&self, id: InternedString) -> Option<&str> {
573        self.strings.get(id.index as usize).map(|s| s.as_str())
574    }
575    /// Restore a [`StringPool`] from this snapshot.
576    pub fn restore(&self) -> StringPool {
577        let mut pool = StringPool::new();
578        for s in &self.strings {
579            pool.intern(s);
580        }
581        pool
582    }
583    /// Total bytes of string data in the snapshot.
584    pub fn total_bytes(&self) -> usize {
585        self.strings.iter().map(|s| s.len()).sum()
586    }
587    /// Encode the snapshot as a single byte vector (length-prefixed strings).
588    pub fn encode(&self) -> Vec<u8> {
589        let mut buf = Vec::new();
590        let count = self.strings.len() as u32;
591        buf.extend_from_slice(&count.to_le_bytes());
592        for s in &self.strings {
593            let len = s.len() as u32;
594            buf.extend_from_slice(&len.to_le_bytes());
595            buf.extend_from_slice(s.as_bytes());
596        }
597        buf
598    }
599    /// Decode a snapshot from bytes produced by [`Self::encode`].
600    pub fn decode(data: &[u8]) -> Option<Self> {
601        if data.len() < 4 {
602            return None;
603        }
604        let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
605        let mut pos = 4;
606        let mut strings = Vec::with_capacity(count);
607        for _ in 0..count {
608            if pos + 4 > data.len() {
609                return None;
610            }
611            let len = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]])
612                as usize;
613            pos += 4;
614            if pos + len > data.len() {
615                return None;
616            }
617            let s = std::str::from_utf8(&data[pos..pos + len]).ok()?;
618            strings.push(s.to_string());
619            pos += len;
620        }
621        Some(Self { strings })
622    }
623}
624/// A pool that interns strings after Unicode normalization.
625/// Uses a simple ASCII-safe approximation for common cases.
626#[allow(dead_code)]
627pub struct NormalizedPool {
628    form: NormForm,
629    inner: StringPool,
630}
631#[allow(dead_code)]
632impl NormalizedPool {
633    /// Create a normalized pool with the given normalization form.
634    pub fn new(form: NormForm) -> Self {
635        Self {
636            form,
637            inner: StringPool::new(),
638        }
639    }
640    /// Intern a string after normalization.
641    pub fn intern(&mut self, s: &str) -> InternedString {
642        let normalized = self.normalize(s);
643        self.inner.intern(&normalized)
644    }
645    /// Normalize a string according to the pool's form.
646    pub fn normalize(&self, s: &str) -> String {
647        match self.form {
648            NormForm::Nfc | NormForm::Nfd => s.chars().filter(|c| !is_combining(*c)).collect(),
649            NormForm::Nfkc | NormForm::Nfkd => s
650                .chars()
651                .filter(|c| !is_combining(*c))
652                .map(compat_fold)
653                .collect(),
654        }
655    }
656    /// Resolve an interned string.
657    pub fn resolve(&self, id: InternedString) -> Option<&str> {
658        self.inner.resolve(id)
659    }
660    /// Pool length.
661    pub fn len(&self) -> usize {
662        self.inner.len()
663    }
664    /// Whether the pool is empty.
665    pub fn is_empty(&self) -> bool {
666        self.inner.is_empty()
667    }
668}
669/// A node in the rope tree.
670#[allow(dead_code)]
671#[derive(Clone, Debug)]
672pub(super) enum RopeNode {
673    Leaf(String),
674    Concat(Box<RopeNode>, Box<RopeNode>, usize),
675}
676/// A string pool that also supports finding the longest interned prefix
677/// of a given string.
678#[allow(dead_code)]
679pub struct PrefixPool {
680    pool: StringPool,
681}
682#[allow(dead_code)]
683impl PrefixPool {
684    /// Create a new prefix pool.
685    pub fn new() -> Self {
686        Self {
687            pool: StringPool::new(),
688        }
689    }
690    /// Intern a string.
691    pub fn intern(&mut self, s: &str) -> InternedString {
692        self.pool.intern(s)
693    }
694    /// Find the longest interned string that is a prefix of `s`.
695    pub fn longest_prefix(&self, s: &str) -> Option<InternedString> {
696        let mut best: Option<InternedString> = None;
697        let mut best_len = 0usize;
698        for (id, interned) in self.pool.iter() {
699            if s.starts_with(interned) && interned.len() >= best_len {
700                best_len = interned.len();
701                best = Some(id);
702            }
703        }
704        best
705    }
706    /// All interned strings that are prefixes of `s`, ordered by length descending.
707    pub fn all_prefixes(&self, s: &str) -> Vec<InternedString> {
708        let mut result: Vec<(usize, InternedString)> = self
709            .pool
710            .iter()
711            .filter(|(_, interned)| s.starts_with(*interned))
712            .map(|(id, interned)| (interned.len(), id))
713            .collect();
714        result.sort_by(|a, b| b.0.cmp(&a.0));
715        result.into_iter().map(|(_, id)| id).collect()
716    }
717    /// Resolve an interned string.
718    pub fn resolve(&self, id: InternedString) -> Option<&str> {
719        self.pool.resolve(id)
720    }
721    /// Number of interned strings.
722    pub fn len(&self) -> usize {
723        self.pool.len()
724    }
725    /// Whether the pool is empty.
726    pub fn is_empty(&self) -> bool {
727        self.pool.is_empty()
728    }
729}
730/// A sorted index over an existing `StringPool` for fast prefix searches.
731#[allow(dead_code)]
732pub struct StringIndex {
733    /// Sorted (string_content, InternedString) pairs.
734    sorted: Vec<(String, InternedString)>,
735}
736#[allow(dead_code)]
737impl StringIndex {
738    /// Build the index from a pool.
739    pub fn build(pool: &StringPool) -> Self {
740        let mut sorted: Vec<(String, InternedString)> =
741            pool.iter().map(|(id, s)| (s.to_string(), id)).collect();
742        sorted.sort_by(|a, b| a.0.cmp(&b.0));
743        Self { sorted }
744    }
745    /// Find all interned strings with the given prefix.
746    pub fn find_prefix(&self, prefix: &str) -> Vec<InternedString> {
747        let start = self.sorted.partition_point(|(s, _)| s.as_str() < prefix);
748        self.sorted[start..]
749            .iter()
750            .take_while(|(s, _)| s.starts_with(prefix))
751            .map(|(_, id)| *id)
752            .collect()
753    }
754    /// Find all interned strings with the given suffix.
755    pub fn find_suffix(&self, suffix: &str) -> Vec<InternedString> {
756        self.sorted
757            .iter()
758            .filter(|(s, _)| s.ends_with(suffix))
759            .map(|(_, id)| *id)
760            .collect()
761    }
762    /// Find all interned strings containing the given substring.
763    pub fn find_contains(&self, sub: &str) -> Vec<InternedString> {
764        self.sorted
765            .iter()
766            .filter(|(s, _)| s.contains(sub))
767            .map(|(_, id)| *id)
768            .collect()
769    }
770    /// Number of entries in the index.
771    pub fn len(&self) -> usize {
772        self.sorted.len()
773    }
774    /// Whether the index is empty.
775    pub fn is_empty(&self) -> bool {
776        self.sorted.is_empty()
777    }
778}
779/// Writes the contents of a pool to a `Write` sink in a human-readable format.
780#[allow(dead_code)]
781pub struct PoolWriter;
782#[allow(dead_code)]
783impl PoolWriter {
784    /// Write pool contents to a string.
785    pub fn write_to_string(pool: &StringPool) -> String {
786        let mut out = String::new();
787        for (id, s) in pool.iter() {
788            out.push_str(&format!("{}: {:?}\n", id.raw_index(), s));
789        }
790        out
791    }
792    /// Write pool statistics to a string.
793    pub fn write_stats_to_string(pool: &StringPool) -> String {
794        format!("{}", pool.statistics())
795    }
796}
797/// The result of partitioning a pool's strings.
798#[allow(dead_code)]
799pub struct PoolPartition {
800    pub matching: Vec<InternedString>,
801    pub non_matching: Vec<InternedString>,
802}
803#[allow(dead_code)]
804impl PoolPartition {
805    /// Partition all strings in a pool by predicate.
806    pub fn by<F>(pool: &StringPool, mut pred: F) -> Self
807    where
808        F: FnMut(&str) -> bool,
809    {
810        let mut matching = Vec::new();
811        let mut non_matching = Vec::new();
812        for (id, s) in pool.iter() {
813            if pred(s) {
814                matching.push(id);
815            } else {
816                non_matching.push(id);
817            }
818        }
819        Self {
820            matching,
821            non_matching,
822        }
823    }
824    /// Number of matching strings.
825    pub fn matching_count(&self) -> usize {
826        self.matching.len()
827    }
828    /// Number of non-matching strings.
829    pub fn non_matching_count(&self) -> usize {
830        self.non_matching.len()
831    }
832}
833/// An interned sub-slice: stores a base interned string plus a byte range.
834#[allow(dead_code)]
835#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
836pub struct InternedSlice {
837    pub base: InternedString,
838    pub start: u32,
839    pub end: u32,
840}
841#[allow(dead_code)]
842impl InternedSlice {
843    /// Create a new interned slice.
844    pub fn new(base: InternedString, start: u32, end: u32) -> Self {
845        Self { base, start, end }
846    }
847    /// Get the byte length of the slice.
848    pub fn len(&self) -> usize {
849        (self.end - self.start) as usize
850    }
851    /// Whether the slice is empty.
852    pub fn is_empty(&self) -> bool {
853        self.start >= self.end
854    }
855    /// Resolve the slice against a pool.
856    pub fn resolve<'a>(&self, pool: &'a StringPool) -> Option<&'a str> {
857        let base = pool.resolve(self.base)?;
858        let start = self.start as usize;
859        let end = self.end as usize;
860        if end > base.len() {
861            return None;
862        }
863        base.get(start..end)
864    }
865}
866/// A string pool that supports generational compaction.
867///
868/// When `compact` is called, unreferenced strings are removed and the
869/// generation counter is incremented, invalidating old handles.
870#[allow(dead_code)]
871pub struct GenerationalPool {
872    pool: StringPool,
873    generation: u16,
874    ref_counts: Vec<u32>,
875}
876#[allow(dead_code)]
877impl GenerationalPool {
878    /// Create an empty generational pool.
879    pub fn new() -> Self {
880        Self {
881            pool: StringPool::new(),
882            generation: 0,
883            ref_counts: Vec::new(),
884        }
885    }
886    /// Intern a string and return a generational handle.
887    pub fn intern(&mut self, s: &str) -> GenerationalString {
888        let id = self.pool.intern(s);
889        let idx = id.raw_index() as usize;
890        while self.ref_counts.len() <= idx {
891            self.ref_counts.push(0);
892        }
893        self.ref_counts[idx] = self.ref_counts[idx].saturating_add(1);
894        GenerationalString {
895            index: id.raw_index(),
896            generation: self.generation,
897        }
898    }
899    /// Release a handle, decrementing its reference count.
900    pub fn release(&mut self, handle: GenerationalString) {
901        if handle.generation != self.generation {
902            return;
903        }
904        let idx = handle.index as usize;
905        if idx < self.ref_counts.len() {
906            self.ref_counts[idx] = self.ref_counts[idx].saturating_sub(1);
907        }
908    }
909    /// Resolve a handle to its string content.
910    pub fn resolve(&self, handle: GenerationalString) -> Option<&str> {
911        if handle.generation != self.generation {
912            return None;
913        }
914        self.pool.resolve(InternedString::from_raw(handle.index))
915    }
916    /// Check if a handle is still valid (correct generation and non-zero refcount).
917    pub fn is_valid(&self, handle: GenerationalString) -> bool {
918        if handle.generation != self.generation {
919            return false;
920        }
921        let idx = handle.index as usize;
922        idx < self.ref_counts.len() && self.ref_counts[idx] > 0
923    }
924    /// Compact the pool: remove strings with zero reference count.
925    /// All handles from the previous generation are invalidated.
926    pub fn compact(&mut self) -> usize {
927        let mut new_pool = StringPool::new();
928        let mut new_refs: Vec<u32> = Vec::new();
929        let mut removed = 0usize;
930        for (id, s) in self.pool.iter() {
931            let idx = id.raw_index() as usize;
932            let rc = if idx < self.ref_counts.len() {
933                self.ref_counts[idx]
934            } else {
935                0
936            };
937            if rc > 0 {
938                let new_id = new_pool.intern(s);
939                let new_idx = new_id.raw_index() as usize;
940                while new_refs.len() <= new_idx {
941                    new_refs.push(0);
942                }
943                new_refs[new_idx] = rc;
944            } else {
945                removed += 1;
946            }
947        }
948        self.pool = new_pool;
949        self.ref_counts = new_refs;
950        self.generation = self.generation.wrapping_add(1);
951        removed
952    }
953    /// Current generation counter.
954    pub fn generation(&self) -> u16 {
955        self.generation
956    }
957    /// Number of live strings (with ref_count > 0).
958    pub fn live_count(&self) -> usize {
959        self.ref_counts.iter().filter(|&&rc| rc > 0).count()
960    }
961    /// Total strings in the pool (including those with rc=0).
962    pub fn total_count(&self) -> usize {
963        self.pool.len()
964    }
965}
966/// A string pool that tracks how often each string is interned.
967#[allow(dead_code)]
968pub struct FrequencyPool {
969    pool: StringPool,
970    counts: Vec<u64>,
971}
972#[allow(dead_code)]
973impl FrequencyPool {
974    /// Create an empty frequency pool.
975    pub fn new() -> Self {
976        Self {
977            pool: StringPool::new(),
978            counts: Vec::new(),
979        }
980    }
981    /// Intern a string and increment its frequency.
982    pub fn intern(&mut self, s: &str) -> InternedString {
983        let id = self.pool.intern(s);
984        let idx = id.raw_index() as usize;
985        while self.counts.len() <= idx {
986            self.counts.push(0);
987        }
988        self.counts[idx] += 1;
989        id
990    }
991    /// Get the frequency of an interned string.
992    pub fn frequency(&self, id: InternedString) -> u64 {
993        self.counts
994            .get(id.raw_index() as usize)
995            .copied()
996            .unwrap_or(0)
997    }
998    /// Get the top-k most frequent strings.
999    pub fn top_k(&self, k: usize) -> Vec<(InternedString, u64)> {
1000        let mut pairs: Vec<(InternedString, u64)> = self
1001            .pool
1002            .iter()
1003            .map(|(id, _)| (id, self.frequency(id)))
1004            .collect();
1005        pairs.sort_by(|a, b| b.1.cmp(&a.1));
1006        pairs.truncate(k);
1007        pairs
1008    }
1009    /// Resolve an interned string.
1010    pub fn resolve(&self, id: InternedString) -> Option<&str> {
1011        self.pool.resolve(id)
1012    }
1013    /// Inner pool reference.
1014    pub fn pool(&self) -> &StringPool {
1015        &self.pool
1016    }
1017    /// Total number of intern calls (sum of all frequencies).
1018    pub fn total_calls(&self) -> u64 {
1019        self.counts.iter().sum()
1020    }
1021    /// Number of unique strings.
1022    pub fn len(&self) -> usize {
1023        self.pool.len()
1024    }
1025    /// Whether the pool is empty.
1026    pub fn is_empty(&self) -> bool {
1027        self.pool.is_empty()
1028    }
1029}
1030/// A sorted snapshot of pool strings for deterministic output.
1031#[allow(dead_code)]
1032pub struct PoolSortedView {
1033    entries: Vec<(InternedString, String)>,
1034}
1035#[allow(dead_code)]
1036impl PoolSortedView {
1037    /// Build a sorted view from a pool (alphabetical order).
1038    pub fn build(pool: &StringPool) -> Self {
1039        let mut entries: Vec<(InternedString, String)> =
1040            pool.iter().map(|(id, s)| (id, s.to_string())).collect();
1041        entries.sort_by(|a, b| a.1.cmp(&b.1));
1042        Self { entries }
1043    }
1044    /// Iterate in sorted order.
1045    pub fn iter(&self) -> impl Iterator<Item = (InternedString, &str)> {
1046        self.entries.iter().map(|(id, s)| (*id, s.as_str()))
1047    }
1048    /// Number of entries.
1049    pub fn len(&self) -> usize {
1050        self.entries.len()
1051    }
1052    /// Whether the view is empty.
1053    pub fn is_empty(&self) -> bool {
1054        self.entries.is_empty()
1055    }
1056}