Skip to main content

command_trie/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(missing_docs)]
3#![warn(missing_debug_implementations)]
4
5use std::fmt;
6use std::iter::FusedIterator;
7
8// ============================================================
9// UTF-8 helpers
10// ============================================================
11//
12// All keys enter the trie as `&str`, i.e. valid UTF-8. The builder splits
13// edges only on **char boundaries** (see `lcp` and `BuilderNode::child_index`
14// below), so every edge label — and therefore every concatenation of edge
15// labels reachable from a frozen node — is itself valid UTF-8. The helpers
16// below centralize this invariant; read paths use them in place of a checked
17// `from_utf8(...).expect(...)` to skip a redundant validation pass.
18
19/// # Safety
20/// Caller must ensure `bytes` is either an edge label produced by
21/// `CommandTrieBuilder::build` or a concatenation of such labels following
22/// parent → child edges. Both are guaranteed valid UTF-8 by the char-aligned
23/// split invariant.
24#[inline]
25unsafe fn utf8_str_unchecked(bytes: &[u8]) -> &str {
26    debug_assert!(std::str::from_utf8(bytes).is_ok());
27    // SAFETY: char-aligned splits keep every edge label valid UTF-8; the
28    // concatenation of valid UTF-8 fragments is itself valid UTF-8.
29    unsafe { std::str::from_utf8_unchecked(bytes) }
30}
31
32/// # Safety
33/// Same contract as [`utf8_str_unchecked`].
34#[inline]
35unsafe fn utf8_string_unchecked(bytes: Vec<u8>) -> String {
36    debug_assert!(std::str::from_utf8(&bytes).is_ok());
37    // SAFETY: see [`utf8_str_unchecked`].
38    unsafe { String::from_utf8_unchecked(bytes) }
39}
40
41/// Byte length of the UTF-8 character whose leading byte is `b`. For valid
42/// UTF-8 leading bytes the result is 1..=4. For continuation bytes (`0b10xxxxxx`)
43/// or otherwise-invalid leading bytes the result is 1, which causes lookup to
44/// fall through without panicking — invalid bytes can't be in any stored key.
45#[inline]
46fn utf8_char_len(b: u8) -> usize {
47    // ASCII or continuation byte: 1 (continuation is invalid as a leader,
48    // but returning 1 makes the lookup fall through without panicking).
49    if b < 0xC0 {
50        1
51    } else if b < 0xE0 {
52        2
53    } else if b < 0xF0 {
54        3
55    } else {
56        4
57    }
58}
59
60// ============================================================
61// Builder
62// ============================================================
63
64/// Mutable trie used during the build phase.
65///
66/// Call [`Self::build`] to produce a frozen [`CommandTrie`] when you are
67/// done inserting.
68#[derive(Clone)]
69pub struct CommandTrieBuilder<T> {
70    root: BuilderNode<T>,
71    len: usize,
72}
73
74#[derive(Clone)]
75struct BuilderNode<T> {
76    /// Bytes consumed by the edge from this node's parent. Empty only for the root.
77    label: Box<[u8]>,
78    value: Option<T>,
79    /// Children, kept sorted by `label[0]`. First bytes are unique among siblings.
80    children: Vec<BuilderNode<T>>,
81}
82
83impl<T> Default for CommandTrieBuilder<T> {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89impl<T> CommandTrieBuilder<T> {
90    /// Create a new, empty builder.
91    #[must_use]
92    pub fn new() -> Self {
93        Self {
94            root: BuilderNode {
95                label: Box::from(&[][..]),
96                value: None,
97                children: Vec::new(),
98            },
99            len: 0,
100        }
101    }
102
103    /// Number of entries currently in the builder.
104    #[must_use]
105    pub fn len(&self) -> usize {
106        self.len
107    }
108
109    /// Returns `true` when no entries have been inserted.
110    #[must_use]
111    pub fn is_empty(&self) -> bool {
112        self.len == 0
113    }
114
115    /// Remove every entry, returning the builder to its initial state.
116    pub fn clear(&mut self) {
117        *self = Self::new();
118    }
119
120    /// Insert `key` with `value`. Returns the previous value, if any.
121    ///
122    /// Any `&str` is accepted; the radix splits on UTF-8 char boundaries so
123    /// every internal edge label remains valid UTF-8.
124    pub fn insert(&mut self, key: &str, value: T) -> Option<T> {
125        let prev = self.root.insert(key.as_bytes(), value);
126        if prev.is_none() {
127            self.len += 1;
128        }
129        prev
130    }
131
132    /// Remove `key` and return its value, if present. Maintains the radix
133    /// invariant by pruning empty branches and merging single-child
134    /// passthroughs with their parent edge.
135    pub fn remove(&mut self, key: &str) -> Option<T> {
136        let v = self.root.remove(key.as_bytes())?;
137        self.len -= 1;
138        Some(v)
139    }
140
141    /// Exact lookup against the builder.
142    #[must_use]
143    pub fn get(&self, key: &str) -> Option<&T> {
144        let mut node = &self.root;
145        let mut rem = key.as_bytes();
146        loop {
147            if rem.is_empty() {
148                return node.value.as_ref();
149            }
150            let child = node.find_child(rem)?;
151            if !rem.starts_with(&child.label) {
152                return None;
153            }
154            rem = &rem[child.label.len()..];
155            node = child;
156        }
157    }
158
159    /// Returns `true` when `key` is present in the builder.
160    #[must_use]
161    pub fn contains(&self, key: &str) -> bool {
162        self.get(key).is_some()
163    }
164
165    /// Freeze the builder into a compact, read-only [`CommandTrie`].
166    ///
167    /// Performs one DFS over the builder trie and writes the result into
168    /// four contiguous slabs (`nodes`, `labels`, `children`,
169    /// `child_first_bytes`). After this call the frozen trie has exactly
170    /// four heap allocations regardless of trie size.
171    ///
172    /// # Panics
173    /// If the trie's total node count, total label bytes, or total edge
174    /// count would exceed [`u16::MAX`] (65,535). The frozen representation
175    /// stores every offset and node id as a `u16` to halve the structural
176    /// slabs. The worst case for `N` inserted entries is `2N + 1` nodes,
177    /// so this caps the trie at roughly **32,767 entries** — enough for
178    /// any realistic command set.
179    pub fn build(self) -> CommandTrie<T> {
180        let len = self.len;
181        let mut nodes: Vec<FrozenNode<T>> = Vec::new();
182        let mut labels: Vec<u8> = Vec::new();
183        let mut children: Vec<NodeId> = Vec::new();
184        let mut child_first_bytes: Vec<u8> = Vec::new();
185        build_visit(
186            self.root,
187            &mut nodes,
188            &mut labels,
189            &mut children,
190            &mut child_first_bytes,
191        );
192        CommandTrie {
193            nodes: nodes.into_boxed_slice(),
194            labels: labels.into_boxed_slice(),
195            children: children.into_boxed_slice(),
196            child_first_bytes: child_first_bytes.into_boxed_slice(),
197            len,
198        }
199    }
200}
201
202/// Pre-order DFS: visit a node, emit its `FrozenNode` slot, reserve a
203/// contiguous range of child-id slots, then recurse and fill those slots.
204fn build_visit<T>(
205    node: BuilderNode<T>,
206    nodes: &mut Vec<FrozenNode<T>>,
207    labels: &mut Vec<u8>,
208    children: &mut Vec<NodeId>,
209    child_first_bytes: &mut Vec<u8>,
210) -> NodeId {
211    let id = u16_or_panic(nodes.len());
212    let label_start = u16_or_panic(labels.len());
213    let label_len = u16_or_panic(node.label.len());
214    labels.extend_from_slice(&node.label);
215
216    nodes.push(FrozenNode {
217        label_start,
218        label_len,
219        children_start: 0,
220        children_len: 0,
221        value: node.value,
222    });
223
224    // Reserve a contiguous run of child-id slots BEFORE recursing, so all of
225    // this node's children end up adjacent in `children` regardless of how
226    // many descendants each child contributes.
227    let n_children = u16_or_panic(node.children.len());
228    let children_start = u16_or_panic(children.len());
229    for _ in 0..n_children {
230        children.push(0);
231        child_first_bytes.push(0);
232    }
233    for (i, child) in node.children.into_iter().enumerate() {
234        // Capture the child's first byte BEFORE recursing, since `child` is
235        // about to be moved into `build_visit`. Non-root children always have
236        // a non-empty label.
237        let first = child.label[0];
238        let slot = children_start as usize + i;
239        let child_id = build_visit(child, nodes, labels, children, child_first_bytes);
240        children[slot] = child_id;
241        child_first_bytes[slot] = first;
242    }
243
244    let n = &mut nodes[id as usize];
245    n.children_start = children_start;
246    n.children_len = n_children;
247    id
248}
249
250/// Convert a `usize` index/length to `u16`, panicking with a clear message
251/// if the documented size cap (see [`FrozenNode`]) is exceeded. Used during
252/// `build` to turn what would otherwise be silent `as u16` truncation into
253/// an explicit failure.
254#[inline]
255fn u16_or_panic(n: usize) -> u16 {
256    u16::try_from(n).expect("command-trie size exceeds u16::MAX (see FrozenNode docs)")
257}
258
259impl<T> BuilderNode<T> {
260    /// Find the child whose label starts with the same UTF-8 char as `rem`.
261    ///
262    /// `rem` must be non-empty and start with a valid UTF-8 leading byte
263    /// (always true when called with a slice of an inserted `&str` or with
264    /// remaining query bytes from `Self::get`).
265    fn find_child(&self, rem: &[u8]) -> Option<&BuilderNode<T>> {
266        let idx = self.child_index(rem).ok()?;
267        Some(&self.children[idx])
268    }
269
270    /// Binary search for the child whose label starts with the same UTF-8 char
271    /// as `rem`, returning the index or where it would be inserted.
272    ///
273    /// Children are sorted by their first char's UTF-8 bytes; this is identical
274    /// to first-byte order for valid UTF-8, so the comparison only needs to
275    /// look at each child's leading-char bytes (1..=4 bytes) against the
276    /// needle's leading-char bytes.
277    ///
278    /// ASCII fast path: when the needle's first byte is `< 0x80` it *is* the
279    /// whole leading char, and an ASCII first byte is unique among siblings
280    /// (char-aligned splits guarantee distinct sibling leading chars, and an
281    /// ASCII byte cannot collide with any multi-byte first byte which is
282    /// always `>= 0xC0`). A plain `binary_search_by_key` on the first byte
283    /// suffices and matches the pre-UTF-8 cost.
284    fn child_index(&self, rem: &[u8]) -> Result<usize, usize> {
285        let first = rem[0];
286        if first < 0x80 {
287            return self.children.binary_search_by_key(&first, |c| c.label[0]);
288        }
289        let needle_len = utf8_char_len(first).min(rem.len());
290        let needle = &rem[..needle_len];
291        self.children.binary_search_by(|c| {
292            let cn = utf8_char_len(c.label[0]).min(c.label.len());
293            c.label[..cn].cmp(needle)
294        })
295    }
296
297    fn insert(&mut self, rem: &[u8], value: T) -> Option<T> {
298        if rem.is_empty() {
299            return self.value.replace(value);
300        }
301        match self.child_index(rem) {
302            Err(at) => {
303                self.children.insert(
304                    at,
305                    BuilderNode {
306                        label: Box::from(rem),
307                        value: Some(value),
308                        children: Vec::new(),
309                    },
310                );
311                None
312            }
313            Ok(idx) => {
314                let child = &mut self.children[idx];
315                let common = lcp(&child.label, rem);
316                if common == child.label.len() {
317                    return child.insert(&rem[common..], value);
318                }
319                // Split this edge at `common`. `common` is char-aligned by `lcp`,
320                // so both halves are valid UTF-8.
321                let old_label = std::mem::replace(&mut child.label, Box::from(&rem[..common]));
322                let old_value = child.value.take();
323                let old_children = std::mem::take(&mut child.children);
324                let existing = BuilderNode {
325                    label: Box::from(&old_label[common..]),
326                    value: old_value,
327                    children: old_children,
328                };
329                if common == rem.len() {
330                    child.value = Some(value);
331                    child.children = vec![existing];
332                } else {
333                    let new_node = BuilderNode {
334                        label: Box::from(&rem[common..]),
335                        value: Some(value),
336                        children: Vec::new(),
337                    };
338                    // Sort by first byte (equivalent to first-char lex order for
339                    // valid UTF-8). The two leading chars differ (otherwise `lcp`
340                    // would have extended `common`), so first bytes differ unless
341                    // the chars share a leading UTF-8 byte — in which case the
342                    // tie is broken by the next byte.
343                    child.children = if existing.label[..].cmp(&new_node.label[..])
344                        == std::cmp::Ordering::Less
345                    {
346                        vec![existing, new_node]
347                    } else {
348                        vec![new_node, existing]
349                    };
350                }
351                None
352            }
353        }
354    }
355
356    fn remove(&mut self, rem: &[u8]) -> Option<T> {
357        if rem.is_empty() {
358            return self.value.take();
359        }
360        let idx = self.child_index(rem).ok()?;
361        if !rem.starts_with(&self.children[idx].label) {
362            return None;
363        }
364        let label_len = self.children[idx].label.len();
365        let removed = self.children[idx].remove(&rem[label_len..])?;
366
367        let child = &self.children[idx];
368        if child.value.is_none() {
369            if child.children.is_empty() {
370                self.children.remove(idx);
371            } else if child.children.len() == 1 {
372                let mut removed_child = self.children.remove(idx);
373                let mut grandchild = removed_child.children.pop().unwrap();
374                let mut merged =
375                    Vec::with_capacity(removed_child.label.len() + grandchild.label.len());
376                merged.extend_from_slice(&removed_child.label);
377                merged.extend_from_slice(&grandchild.label);
378                grandchild.label = merged.into_boxed_slice();
379                self.children.insert(idx, grandchild);
380            }
381        }
382        Some(removed)
383    }
384}
385
386/// Longest common **char-aligned** byte prefix of `a` and `b`.
387///
388/// Both inputs are assumed to be valid UTF-8 (they come from `&str`s or from
389/// previously char-aligned edge labels). The byte-LCP is computed normally,
390/// then truncated back to the nearest char boundary. This ensures that any
391/// radix split using the returned length never bisects a multi-byte codepoint.
392fn lcp(a: &[u8], b: &[u8]) -> usize {
393    let mut i = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
394    // If the first differing byte (or one-past-end in either input) is a
395    // UTF-8 continuation byte in `a`, back up to the codepoint boundary.
396    // For valid UTF-8 the matching prefix in `b` is identical, so checking
397    // either side is sufficient.
398    while i > 0 && i < a.len() && (a[i] & 0xC0) == 0x80 {
399        i -= 1;
400    }
401    i
402}
403
404impl<K: AsRef<str>, T> FromIterator<(K, T)> for CommandTrieBuilder<T> {
405    /// Build a [`CommandTrieBuilder`] from an iterator of `(key, value)` pairs.
406    fn from_iter<I: IntoIterator<Item = (K, T)>>(iter: I) -> Self {
407        let mut t = Self::new();
408        t.extend(iter);
409        t
410    }
411}
412
413impl<K: AsRef<str>, T> Extend<(K, T)> for CommandTrieBuilder<T> {
414    /// Insert each `(key, value)` from `iter`.
415    fn extend<I: IntoIterator<Item = (K, T)>>(&mut self, iter: I) {
416        for (k, v) in iter {
417            self.insert(k.as_ref(), v);
418        }
419    }
420}
421
422impl<T: fmt::Debug> fmt::Debug for CommandTrieBuilder<T> {
423    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
424        f.debug_struct("CommandTrieBuilder")
425            .field("len", &self.len)
426            .field("root", &self.root)
427            .finish()
428    }
429}
430
431impl<T: fmt::Debug> fmt::Debug for BuilderNode<T> {
432    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
433        f.debug_struct("BuilderNode")
434            // SAFETY: `label` is a char-aligned slice of an inserted `&str`,
435            // so it is valid UTF-8.
436            .field("label", &unsafe { utf8_str_unchecked(&self.label) })
437            .field("value", &self.value)
438            .field("children", &self.children)
439            .finish()
440    }
441}
442
443// ============================================================
444// Frozen trie
445// ============================================================
446
447type NodeId = u16;
448const ROOT: NodeId = 0;
449
450/// Compact, read-only radix trie produced by [`CommandTrieBuilder::build`].
451///
452/// All storage lives in four boxed slices (`nodes`, `labels`, `children`,
453/// `child_first_bytes`). All query methods are non-allocating in their
454/// lookup paths; only methods that materialize keys (e.g. `Iter::next`)
455/// allocate.
456#[derive(Clone)]
457pub struct CommandTrie<T> {
458    /// `nodes[0]` is the root. Pre-order DFS layout from `build`.
459    nodes: Box<[FrozenNode<T>]>,
460    /// All edge labels concatenated; sliced by `(label_start, label_len)`.
461    labels: Box<[u8]>,
462    /// All child-id lists concatenated; each node's children live in a
463    /// contiguous run sorted by the first byte of their label.
464    children: Box<[NodeId]>,
465    /// Parallel to `children`: the first byte of each child's label.
466    /// Lets `find_child` binary-search a tight `&[u8]` slice instead of
467    /// chasing two pointer indirections per probe (`nodes` → `labels`).
468    child_first_bytes: Box<[u8]>,
469    len: usize,
470}
471
472#[derive(Clone)]
473struct FrozenNode<T> {
474    // All four offsets/lengths are `u16`; the builder consequently rejects
475    // any trie whose total node count, total label bytes, or total edge
476    // count would exceed `u16::MAX` (65,535). The worst-case node count
477    // for `N` inserted entries is `2N + 1`, so this caps the trie at
478    // roughly **32,767 entries** — enough for any realistic command set.
479    //
480    // Storing the `start + len` pair (rather than just the end and
481    // deriving start from the previous node) keeps `label_of` and
482    // `children_of` to a single `FrozenNode` load per access, which
483    // matters on the lookup hot path.
484    label_start: u16,
485    label_len: u16,
486    children_start: u16,
487    children_len: u16,
488    value: Option<T>,
489}
490
491impl<T> CommandTrie<T> {
492    // SAFETY NOTE FOR INTERNAL UNCHECKED ACCESSES
493    // -------------------------------------------
494    // The following helpers (`label_of`, `children_of`, `value_of`,
495    // `find_child`) use `get_unchecked` on `nodes`, `labels`, `children`,
496    // and `child_first_bytes`. The build invariants make every index they
497    // compute provably in-bounds:
498    //
499    //   * `nodes` always contains at least the root after `build`, so
500    //     `ROOT = 0` is a valid index.
501    //   * Every `NodeId` stored in `children` was emitted by `build_visit`
502    //     as `nodes.len()` at the moment of the matching `nodes.push`, so
503    //     it is always `< nodes.len()` in the finished trie.
504    //   * `(label_start, label_len)` and `(children_start, children_len)`
505    //     for each node describe ranges that `build_visit` pushed into
506    //     `labels` / `children` / `child_first_bytes` *before* writing the
507    //     node's fields. They are always in-bounds.
508    //   * `child_first_bytes.len() == children.len()` by construction.
509    //
510    // External index-typed APIs (e.g. `descend_to_node`'s key bytes) are
511    // not affected; they still use safe slicing.
512
513    /// Number of entries stored in the trie.
514    #[must_use]
515    pub fn len(&self) -> usize {
516        self.len
517    }
518
519    /// Returns `true` when the trie holds no entries.
520    #[must_use]
521    pub fn is_empty(&self) -> bool {
522        self.len == 0
523    }
524
525    #[inline]
526    fn label_of(&self, id: NodeId) -> &[u8] {
527        // SAFETY: see the SAFETY NOTE at the top of this impl.
528        unsafe {
529            let n = self.nodes.get_unchecked(id as usize);
530            let start = n.label_start as usize;
531            let end = start + n.label_len as usize;
532            self.labels.get_unchecked(start..end)
533        }
534    }
535
536    #[inline]
537    fn children_of(&self, id: NodeId) -> &[NodeId] {
538        // SAFETY: see the SAFETY NOTE at the top of this impl.
539        unsafe {
540            let n = self.nodes.get_unchecked(id as usize);
541            let start = n.children_start as usize;
542            let end = start + n.children_len as usize;
543            self.children.get_unchecked(start..end)
544        }
545    }
546
547    #[inline]
548    fn value_of(&self, id: NodeId) -> Option<&T> {
549        // SAFETY: see the SAFETY NOTE at the top of this impl.
550        unsafe { self.nodes.get_unchecked(id as usize).value.as_ref() }
551    }
552
553    /// Returns the child of `parent` whose label starts with the same UTF-8
554    /// char as `rem`, if any.
555    ///
556    /// `rem` must be non-empty and begin with a valid UTF-8 leading byte; this
557    /// is always true when called with a slice of a `&str`. The fast path is
558    /// pure ASCII: the leading byte is `< 0x80`, unique among siblings, and
559    /// binary search on `child_first_bytes` returns the match directly with
560    /// no further work — equivalent to the pre-UTF-8 implementation. For
561    /// multi-byte chars two siblings can share a leading byte (e.g.
562    /// `'é'`=`C3 A9` and `'ê'`=`C3 AA`), so on a hit we widen to the
563    /// equal-byte run and check the full leading char against each candidate.
564    #[inline]
565    fn find_child(&self, parent: NodeId, rem: &[u8]) -> Option<NodeId> {
566        // SAFETY: see the SAFETY NOTE at the top of this impl.
567        unsafe {
568            let n = self.nodes.get_unchecked(parent as usize);
569            let start = n.children_start as usize;
570            let end = start + n.children_len as usize;
571            let first = *rem.get_unchecked(0);
572            let slab = self.child_first_bytes.get_unchecked(start..end);
573            // Binary-search the dense first-byte slab — typically one cache
574            // line — instead of dereferencing `nodes` then `labels` per probe.
575            let idx = slab.binary_search(&first).ok()?;
576            // Hot path: ASCII first byte is unique among siblings (char-aligned
577            // splits guarantee siblings have distinct leading chars, and an
578            // ASCII byte cannot share its slot with any other char's leading
579            // byte). No UTF-8 work needed.
580            if first < 0x80 {
581                return Some(*self.children.get_unchecked(start + idx));
582            }
583            // Cold path: multi-byte char. Disambiguate against the equal-byte
584            // run by comparing the full leading-char bytes.
585            self.find_child_multibyte(start, slab, idx, first, rem)
586        }
587    }
588
589    /// Cold-path disambiguation for multi-byte leading chars. Outlined so the
590    /// ASCII fast path in `find_child` stays small enough to inline cleanly.
591    #[cold]
592    #[inline(never)]
593    fn find_child_multibyte(
594        &self,
595        start: usize,
596        slab: &[u8],
597        idx: usize,
598        first: u8,
599        rem: &[u8],
600    ) -> Option<NodeId> {
601        let clen = utf8_char_len(first);
602        // `rem` is always a suffix of a valid `&str` cut at an edge-label
603        // boundary, which is itself char-aligned. A leader byte at `rem[0]`
604        // therefore implies `rem.len() >= clen`. We assert this in debug
605        // builds; the SAFETY block below relies on it.
606        debug_assert!(rem.len() >= clen);
607        // SAFETY: see the SAFETY NOTE at the top of this impl; the debug
608        // assertion above and the loop guards make each unchecked index
609        // in-bounds.
610        unsafe {
611            let needle = rem.get_unchecked(..clen);
612            // Walk the (usually length-1, rarely up to 4) run of siblings
613            // sharing this leading byte, comparing full leading-char bytes.
614            let mut lo = idx;
615            while lo > 0 && *slab.get_unchecked(lo - 1) == first {
616                lo -= 1;
617            }
618            let mut i = lo;
619            while i < slab.len() && *slab.get_unchecked(i) == first {
620                let child = *self.children.get_unchecked(start + i);
621                let lbl = self.label_of(child);
622                if lbl.len() >= clen && lbl.get_unchecked(..clen) == needle {
623                    return Some(child);
624                }
625                i += 1;
626            }
627            None
628        }
629    }
630
631    /// Exact lookup.
632    #[must_use]
633    pub fn get(&self, key: &str) -> Option<&T> {
634        let mut node = ROOT;
635        let mut rem = key.as_bytes();
636        loop {
637            if rem.is_empty() {
638                return self.value_of(node);
639            }
640            let child = self.find_child(node, rem)?;
641            let lbl = self.label_of(child);
642            if !rem.starts_with(lbl) {
643                return None;
644            }
645            rem = &rem[lbl.len()..];
646            node = child;
647        }
648    }
649
650    /// Returns `true` when `key` is present in the trie.
651    #[must_use]
652    pub fn contains(&self, key: &str) -> bool {
653        self.get(key).is_some()
654    }
655
656    /// Longest stored key that is a prefix of `input`, with its value.
657    ///
658    /// The returned `&str` is a slice of `input`; its `len()` is the number
659    /// of bytes consumed.
660    #[must_use]
661    pub fn longest_prefix_match<'a>(&self, input: &'a str) -> Option<(&'a str, &T)> {
662        let bytes = input.as_bytes();
663        let mut node = ROOT;
664        let mut consumed = 0usize;
665        // Track the deepest ancestor with a value as `(consumed_bytes, value_ref)`
666        // so we never need to re-`unwrap` a known-Some lookup at the end.
667        let mut best: Option<(usize, &T)> = None;
668        loop {
669            if let Some(v) = self.value_of(node) {
670                best = Some((consumed, v));
671            }
672            let rem = &bytes[consumed..];
673            if rem.is_empty() {
674                break;
675            }
676            let Some(child) = self.find_child(node, rem) else {
677                break;
678            };
679            let lbl = self.label_of(child);
680            if !rem.starts_with(lbl) {
681                break;
682            }
683            consumed += lbl.len();
684            node = child;
685        }
686        best.map(|(n, v)| (&input[..n], v))
687    }
688
689    /// Returns true if any stored key starts with `prefix`.
690    #[must_use]
691    pub fn contains_prefix(&self, prefix: &str) -> bool {
692        match self.descend_to_node(prefix.as_bytes()) {
693            Some(node) => self.value_of(node).is_some() || !self.children_of(node).is_empty(),
694            None => false,
695        }
696    }
697
698    /// Walks down the trie consuming `prefix` and returns the node it lands
699    /// on (which may be one whose edge label extends past `prefix`). Allocates
700    /// nothing. Used by the hot "does anything match?" / "how many match?"
701    /// paths that don't care about reconstructing the prefix string.
702    fn descend_to_node(&self, mut rem: &[u8]) -> Option<NodeId> {
703        let mut node = ROOT;
704        while !rem.is_empty() {
705            let child = self.find_child(node, rem)?;
706            let lbl = self.label_of(child);
707            if rem.len() >= lbl.len() {
708                if !rem.starts_with(lbl) {
709                    return None;
710                }
711                rem = &rem[lbl.len()..];
712                node = child;
713            } else {
714                if !lbl.starts_with(rem) {
715                    return None;
716                }
717                node = child;
718                break;
719            }
720        }
721        Some(node)
722    }
723
724    /// Like [`Self::descend_to_node`] but also returns the bytes from the
725    /// trie root to the landing node — needed by [`Self::subtrie`] which
726    /// exposes them as `common_prefix`.
727    fn descend_to_prefix(&self, mut rem: &[u8]) -> Option<(NodeId, Vec<u8>)> {
728        let mut node = ROOT;
729        let mut path: Vec<u8> = Vec::with_capacity(rem.len());
730        while !rem.is_empty() {
731            let child = self.find_child(node, rem)?;
732            let lbl = self.label_of(child);
733            if rem.len() >= lbl.len() {
734                if !rem.starts_with(lbl) {
735                    return None;
736                }
737                path.extend_from_slice(lbl);
738                rem = &rem[lbl.len()..];
739                node = child;
740            } else {
741                if !lbl.starts_with(rem) {
742                    return None;
743                }
744                path.extend_from_slice(lbl);
745                node = child;
746                break;
747            }
748        }
749        Some((node, path))
750    }
751
752    /// Iterator over all `(key, value)` pairs in alphabetical order.
753    ///
754    /// Each item allocates a fresh `String` for the key. For hot loops
755    /// prefer [`Self::for_each`], which reuses an internal buffer.
756    #[must_use]
757    pub fn iter(&self) -> Iter<'_, T> {
758        Iter::new(self, ROOT, Vec::new())
759    }
760
761    /// Visit every `(key, value)` pair in alphabetical order without
762    /// allocating per match.
763    pub fn for_each(&self, mut f: impl FnMut(&str, &T)) {
764        let mut buf = Vec::new();
765        for_each_descendants(self, ROOT, &mut buf, &mut f);
766    }
767
768    /// View of the entries whose key starts with `prefix`.
769    ///
770    /// Returns `None` if no entry has that prefix. Prefer this when you need
771    /// to ask multiple questions about the same prefix.
772    #[must_use]
773    pub fn subtrie<'a>(&'a self, prefix: &str) -> Option<SubTrie<'a, T>> {
774        let (mut node, mut path) = self.descend_to_prefix(prefix.as_bytes())?;
775        if self.value_of(node).is_none() && self.children_of(node).is_empty() {
776            return None;
777        }
778        // Extend the shared prefix through any unambiguous (single-child,
779        // no-value) passthrough chain.
780        loop {
781            let kids = self.children_of(node);
782            if self.value_of(node).is_none() && kids.len() == 1 {
783                let child = kids[0];
784                path.extend_from_slice(self.label_of(child));
785                node = child;
786            } else {
787                break;
788            }
789        }
790        Some(SubTrie {
791            trie: self,
792            node,
793            query_len: prefix.len(),
794            common_prefix: path,
795        })
796    }
797
798    /// All `(key, value)` pairs whose key starts with `prefix`.
799    ///
800    /// Allocates a `Vec` and a `String` per match; for hot paths prefer
801    /// [`Self::for_each_completion`].
802    #[must_use]
803    pub fn completions<'a>(&'a self, prefix: &str) -> Vec<(String, &'a T)> {
804        match self.subtrie(prefix) {
805            Some(sub) => sub.into_iter().collect(),
806            None => Vec::new(),
807        }
808    }
809
810    /// Number of entries whose key starts with `prefix`. Allocation-free.
811    #[must_use]
812    pub fn count_completions(&self, prefix: &str) -> usize {
813        match self.descend_to_node(prefix.as_bytes()) {
814            Some(node) => count_values(self, node),
815            None => 0,
816        }
817    }
818
819    /// Longest string `s` such that every key matching `prefix` also starts
820    /// with `s`. Always `s.starts_with(prefix)`; may extend past `prefix`
821    /// when only one branch is reachable. `None` if no entries match.
822    ///
823    /// Allocates exactly one `String` (the returned value).
824    #[must_use]
825    pub fn completion_prefix(&self, prefix: &str) -> Option<String> {
826        let mut rem = prefix.as_bytes();
827        let mut node = ROOT;
828        let mut buf: Vec<u8> = Vec::with_capacity(rem.len());
829        // Inline descent so we collect bytes into a single buffer that becomes
830        // the returned String -- no intermediate `SubTrie` allocation.
831        while !rem.is_empty() {
832            let child = self.find_child(node, rem)?;
833            let lbl = self.label_of(child);
834            if rem.len() >= lbl.len() {
835                if !rem.starts_with(lbl) {
836                    return None;
837                }
838                buf.extend_from_slice(lbl);
839                rem = &rem[lbl.len()..];
840                node = child;
841            } else {
842                if !lbl.starts_with(rem) {
843                    return None;
844                }
845                buf.extend_from_slice(lbl);
846                node = child;
847                break;
848            }
849        }
850        if self.value_of(node).is_none() && self.children_of(node).is_empty() {
851            return None;
852        }
853        // Extend through unambiguous passthrough chains.
854        while self.value_of(node).is_none() && self.children_of(node).len() == 1 {
855            let child = self.children_of(node)[0];
856            buf.extend_from_slice(self.label_of(child));
857            node = child;
858        }
859        // SAFETY: `buf` is a concatenation of char-aligned edge labels, all valid UTF-8.
860        Some(unsafe { utf8_string_unchecked(buf) })
861    }
862
863    /// Visit every completion of `prefix` without allocating per match.
864    pub fn for_each_completion(&self, prefix: &str, mut f: impl FnMut(&str, &T)) {
865        if let Some(sub) = self.subtrie(prefix) {
866            sub.for_each(&mut f);
867        }
868    }
869}
870
871impl<T: fmt::Debug> fmt::Debug for CommandTrie<T> {
872    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
873        // `child_first_bytes` is a derived lookup index over `children`;
874        // intentionally omitted from Debug output.
875        f.debug_struct("CommandTrie")
876            .field("len", &self.len)
877            .field("nodes", &self.nodes.len())
878            .field("labels_bytes", &self.labels.len())
879            .field("children_edges", &self.children.len())
880            .finish_non_exhaustive()
881    }
882}
883
884impl<T> Default for CommandTrie<T> {
885    /// An empty, frozen trie (equivalent to `CommandTrieBuilder::new().build()`).
886    fn default() -> Self {
887        CommandTrieBuilder::new().build()
888    }
889}
890
891impl<'a, T> IntoIterator for &'a CommandTrie<T> {
892    type Item = (String, &'a T);
893    type IntoIter = Iter<'a, T>;
894    fn into_iter(self) -> Self::IntoIter {
895        self.iter()
896    }
897}
898
899// ============================================================
900// SubTrie view
901// ============================================================
902
903/// View over the subset of a [`CommandTrie`] whose keys share a common prefix.
904///
905/// Construct via [`CommandTrie::subtrie`]. Guaranteed non-empty.
906#[derive(Clone)]
907pub struct SubTrie<'a, T> {
908    trie: &'a CommandTrie<T>,
909    /// Deepest node such that every entry in this view lives in its subtrie.
910    node: NodeId,
911    /// Length in bytes of the original `prefix` argument to [`CommandTrie::subtrie`].
912    /// Always `<= common_prefix.len()`. Used by [`SubTrie::extension`].
913    query_len: usize,
914    /// Bytes from the trie root to `node`; also the longest common prefix
915    /// shared by every entry in this view.
916    common_prefix: Vec<u8>,
917}
918
919impl<'a, T> SubTrie<'a, T> {
920    /// Longest prefix shared by every entry in the view.
921    #[must_use]
922    pub fn common_prefix(&self) -> &str {
923        // SAFETY: `common_prefix` is a concatenation of char-aligned edge labels.
924        unsafe { utf8_str_unchecked(&self.common_prefix) }
925    }
926
927    /// Bytes between the originally queried prefix and [`Self::common_prefix`].
928    ///
929    /// This is exactly what a fish-style line editor should splice into the
930    /// buffer on TAB: the unambiguous auto-extension implied by what the user
931    /// has typed so far. Empty when the typed prefix is already at a branch
932    /// point (caller should then display the menu of completions).
933    #[must_use]
934    pub fn extension(&self) -> &str {
935        // SAFETY: `self.query_len` is the byte length of the originally queried
936        // `&str`, a char boundary; `common_prefix` is valid UTF-8 in full, so
937        // the suffix is also valid UTF-8.
938        unsafe { utf8_str_unchecked(&self.common_prefix[self.query_len..]) }
939    }
940
941    /// `true` when exactly one entry matches — the caller can commit it and
942    /// stop prompting. O(1).
943    #[must_use]
944    pub fn is_unique(&self) -> bool {
945        self.trie.value_of(self.node).is_some() && self.trie.children_of(self.node).is_empty()
946    }
947
948    /// Value at this view's deepest node, when that node itself holds a value
949    /// (i.e. [`Self::common_prefix`] is itself a stored key). Returns `None`
950    /// for a pure branch-point view.
951    #[must_use]
952    pub fn value(&self) -> Option<&'a T> {
953        self.trie.value_of(self.node)
954    }
955
956    /// The single matching value when this view contains exactly one entry,
957    /// else `None`. Combines [`Self::is_unique`] and [`Self::value`]; spares
958    /// the caller a follow-up `trie.get(sub.common_prefix())`.
959    #[must_use]
960    pub fn unique_value(&self) -> Option<&'a T> {
961        if self.is_unique() {
962            self.value()
963        } else {
964            None
965        }
966    }
967
968    /// Number of entries in the view. Walks the subtrie.
969    #[must_use]
970    pub fn len(&self) -> usize {
971        count_values(self.trie, self.node)
972    }
973
974    /// Always `false` — a `SubTrie` only exists when at least one entry matches.
975    #[must_use]
976    pub fn is_empty(&self) -> bool {
977        false
978    }
979
980    /// Iterator over `(key, value)` pairs in this view, alphabetically.
981    ///
982    /// Clones `common_prefix` into the iterator's buffer. If you only walk
983    /// the view once, prefer `subtrie.into_iter()`, which moves the buffer
984    /// in instead.
985    #[must_use]
986    pub fn iter(&self) -> Iter<'a, T> {
987        Iter::new(self.trie, self.node, self.common_prefix.clone())
988    }
989
990    /// Visit every entry in the view without allocating per match.
991    pub fn for_each(&self, mut f: impl FnMut(&str, &T)) {
992        let mut buf = self.common_prefix.clone();
993        // The starting node's label is already in `common_prefix`, so emit
994        // its value here and recurse into children directly.
995        if let Some(v) = self.trie.value_of(self.node) {
996            // SAFETY: `buf` clones `common_prefix`, all char-aligned UTF-8.
997            f(unsafe { utf8_str_unchecked(&buf) }, v);
998        }
999        for &child in self.trie.children_of(self.node) {
1000            for_each_descendants(self.trie, child, &mut buf, &mut f);
1001        }
1002    }
1003}
1004
1005impl<'a, T> IntoIterator for &SubTrie<'a, T> {
1006    type Item = (String, &'a T);
1007    type IntoIter = Iter<'a, T>;
1008    fn into_iter(self) -> Self::IntoIter {
1009        self.iter()
1010    }
1011}
1012
1013impl<'a, T> IntoIterator for SubTrie<'a, T> {
1014    type Item = (String, &'a T);
1015    type IntoIter = Iter<'a, T>;
1016    /// Moves `common_prefix` into the iterator's buffer, avoiding the clone
1017    /// that `(&subtrie).into_iter()` would perform.
1018    fn into_iter(self) -> Self::IntoIter {
1019        Iter::new(self.trie, self.node, self.common_prefix)
1020    }
1021}
1022
1023impl<T> fmt::Debug for SubTrie<'_, T> {
1024    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1025        f.debug_struct("SubTrie")
1026            .field("common_prefix", &self.common_prefix())
1027            .field("is_unique", &self.is_unique())
1028            .finish()
1029    }
1030}
1031
1032// ============================================================
1033// Iterator
1034// ============================================================
1035
1036/// Iterator yielding `(key, value)` pairs from a [`CommandTrie`] or [`SubTrie`].
1037///
1038/// Implements [`ExactSizeIterator`]: the remaining count is computed once at
1039/// construction (one DFS over the starting subtrie) and decremented per item,
1040/// so `collect::<Vec<_>>()` reserves the exact capacity up front.
1041pub struct Iter<'a, T> {
1042    trie: &'a CommandTrie<T>,
1043    stack: Vec<Frame>,
1044    path: Vec<u8>,
1045    /// On the first call, emit the starting node's own value (if any).
1046    /// Its label is already represented in `path`, so it can't be handled by
1047    /// the normal `Frame::Enter` step.
1048    pending_root: Option<NodeId>,
1049    /// Exact number of `(key, value)` pairs still to be yielded.
1050    remaining: usize,
1051}
1052
1053enum Frame {
1054    /// Push the node's label onto `path`, then emit its value (if any), then
1055    /// schedule its children.
1056    Enter(NodeId),
1057    /// Pop `usize` bytes from `path`. Width matches `FrozenNode::label_len`;
1058    /// see the note there about the `u16` size cap.
1059    Exit(u16),
1060}
1061
1062impl<'a, T> Iter<'a, T> {
1063    fn new(trie: &'a CommandTrie<T>, root: NodeId, initial_path: Vec<u8>) -> Self {
1064        let mut stack = Vec::new();
1065        // Push children in reverse so the first child pops first → left-to-right.
1066        let kids = trie.children_of(root);
1067        for &child in kids.iter().rev() {
1068            stack.push(Frame::Enter(child));
1069        }
1070        let pending_root = if trie.value_of(root).is_some() {
1071            Some(root)
1072        } else {
1073            None
1074        };
1075        let remaining = count_values(trie, root);
1076        Self {
1077            trie,
1078            stack,
1079            path: initial_path,
1080            pending_root,
1081            remaining,
1082        }
1083    }
1084}
1085
1086impl<'a, T> Iterator for Iter<'a, T> {
1087    type Item = (String, &'a T);
1088
1089    fn next(&mut self) -> Option<Self::Item> {
1090        if let Some(id) = self.pending_root.take() {
1091            let v = self.trie.value_of(id).expect("pending_root has a value");
1092            self.remaining -= 1;
1093            // SAFETY: `path` holds the starting node's bytes, a char-aligned
1094            // concatenation of edge labels — valid UTF-8.
1095            return Some((unsafe { utf8_string_unchecked(self.path.clone()) }, v));
1096        }
1097        while let Some(frame) = self.stack.pop() {
1098            match frame {
1099                Frame::Exit(n) => {
1100                    let new_len = self.path.len() - n as usize;
1101                    self.path.truncate(new_len);
1102                }
1103                Frame::Enter(node) => {
1104                    let lbl = self.trie.label_of(node);
1105                    self.path.extend_from_slice(lbl);
1106                    // SAFETY of cast: `lbl` came from `FrozenNode::label_len: u16`,
1107                    // so its length always fits in `u16`.
1108                    self.stack
1109                        .push(Frame::Exit(u16::try_from(lbl.len()).unwrap()));
1110                    for &child in self.trie.children_of(node).iter().rev() {
1111                        self.stack.push(Frame::Enter(child));
1112                    }
1113                    if let Some(v) = self.trie.value_of(node) {
1114                        self.remaining -= 1;
1115                        // SAFETY: `path` is the concatenation of char-aligned edge labels — valid UTF-8.
1116                        return Some((unsafe { utf8_string_unchecked(self.path.clone()) }, v));
1117                    }
1118                }
1119            }
1120        }
1121        None
1122    }
1123
1124    #[inline]
1125    fn size_hint(&self) -> (usize, Option<usize>) {
1126        (self.remaining, Some(self.remaining))
1127    }
1128}
1129
1130impl<T> ExactSizeIterator for Iter<'_, T> {
1131    #[inline]
1132    fn len(&self) -> usize {
1133        self.remaining
1134    }
1135}
1136
1137impl<T> FusedIterator for Iter<'_, T> {}
1138
1139impl<T> fmt::Debug for Iter<'_, T> {
1140    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1141        f.debug_struct("Iter")
1142            .field("remaining_frames", &self.stack.len())
1143            .finish()
1144    }
1145}
1146
1147// ============================================================
1148// Internal walkers
1149// ============================================================
1150
1151fn for_each_descendants<T>(
1152    trie: &CommandTrie<T>,
1153    node: NodeId,
1154    buf: &mut Vec<u8>,
1155    f: &mut impl FnMut(&str, &T),
1156) {
1157    let prev = buf.len();
1158    buf.extend_from_slice(trie.label_of(node));
1159    if let Some(v) = trie.value_of(node) {
1160        // SAFETY: `buf` is the concatenation of char-aligned edge labels — valid UTF-8.
1161        f(unsafe { utf8_str_unchecked(buf) }, v);
1162    }
1163    for &child in trie.children_of(node) {
1164        for_each_descendants(trie, child, buf, f);
1165    }
1166    buf.truncate(prev);
1167}
1168
1169fn count_values<T>(trie: &CommandTrie<T>, node: NodeId) -> usize {
1170    let mut n = usize::from(trie.value_of(node).is_some());
1171    for &child in trie.children_of(node) {
1172        n += count_values(trie, child);
1173    }
1174    n
1175}
1176
1177// ============================================================
1178// Tests
1179// ============================================================
1180
1181#[cfg(test)]
1182mod tests {
1183    use super::*;
1184
1185    /// Convenience for tests that don't care about the build step.
1186    fn build_from<'a, I: IntoIterator<Item = (&'a str, i32)>>(items: I) -> CommandTrie<i32> {
1187        let mut b = CommandTrieBuilder::new();
1188        for (k, v) in items {
1189            b.insert(k, v);
1190        }
1191        b.build()
1192    }
1193
1194    // -------- Builder behavior --------
1195
1196    #[test]
1197    fn builder_insert_overwrite_remove() {
1198        let mut b = CommandTrieBuilder::new();
1199        assert_eq!(b.insert("commit", 1), None);
1200        assert_eq!(b.insert("commit", 2), Some(1));
1201        assert_eq!(b.get("commit"), Some(&2));
1202        assert!(b.contains("commit"));
1203        assert_eq!(b.remove("commit"), Some(2));
1204        assert_eq!(b.remove("commit"), None);
1205        assert!(b.is_empty());
1206    }
1207
1208    #[test]
1209    fn builder_remove_prunes_and_merges() {
1210        let mut b = CommandTrieBuilder::new();
1211        b.insert("command", 1);
1212        b.insert("commit", 2);
1213        b.insert("comm", 3);
1214        assert_eq!(b.remove("comm"), Some(3));
1215        assert_eq!(b.remove("commit"), Some(2));
1216        assert_eq!(b.get("command"), Some(&1));
1217        assert_eq!(b.len(), 1);
1218    }
1219
1220    #[test]
1221    fn builder_from_iter_and_extend() {
1222        let b: CommandTrieBuilder<i32> = [("a", 1), ("ab", 2), ("abc", 3)].into_iter().collect();
1223        assert_eq!(b.len(), 3);
1224        assert_eq!(b.get("ab"), Some(&2));
1225    }
1226
1227    #[test]
1228    fn builder_get_diverges_mid_edge() {
1229        // First byte matches a child but the label diverges: covers
1230        // the `!rem.starts_with(&child.label)` path in BuilderNode::get.
1231        let mut b = CommandTrieBuilder::new();
1232        b.insert("command", 1);
1233        assert_eq!(b.get("comx"), None);
1234        assert_eq!(b.get("c"), None);
1235    }
1236
1237    // -------- Frozen trie: basic queries --------
1238
1239    #[test]
1240    fn frozen_get() {
1241        let t = build_from([("commit", 1), ("command", 2)]);
1242        assert_eq!(t.get("commit"), Some(&1));
1243        assert_eq!(t.get("command"), Some(&2));
1244        assert_eq!(t.get("comm"), None);
1245        assert_eq!(t.get("commits"), None);
1246        assert_eq!(t.get(""), None);
1247        assert!(t.contains("commit"));
1248        assert!(!t.contains("comm"));
1249    }
1250
1251    #[test]
1252    fn frozen_query_accepts_non_ascii_keys() {
1253        // Read methods accept `&str` and must remain safe (no panic, no UB)
1254        // when queried with multi-byte UTF-8 input against an ASCII-only
1255        // trie. The queries below simply cannot match any stored edge and
1256        // should return the empty answer rather than panicking.
1257        let t = build_from([("commit", 1), ("command", 2), ("config", 3)]);
1258
1259        // Multi-byte first byte: 0xC3 ('é' starts with 0xC3 0xA9) is > 0x7F
1260        // and cannot index any child edge.
1261        assert_eq!(t.get("café"), None);
1262        assert!(!t.contains("café"));
1263
1264        // Non-ASCII appearing after a valid ASCII prefix.
1265        assert_eq!(t.get("commité"), None);
1266        assert_eq!(t.get("comméand"), None);
1267
1268        // Pure non-ASCII / emoji.
1269        assert_eq!(t.get("🦀"), None);
1270        assert_eq!(t.get("π"), None);
1271
1272        // Prefix-flavored queries behave the same: no match, no panic.
1273        assert!(!t.contains_prefix("café"));
1274        assert_eq!(t.count_completions("café"), 0);
1275        assert!(t.completions("café").is_empty());
1276        assert_eq!(t.completion_prefix("café"), None);
1277        assert!(t.subtrie("café").is_none());
1278        assert_eq!(t.longest_prefix_match("café"), None);
1279
1280        // Non-ASCII tail after a real command still finds the command via
1281        // longest-prefix match -- the ASCII prefix is consumed cleanly and
1282        // the multi-byte suffix simply fails to extend any edge.
1283        assert_eq!(t.longest_prefix_match("commit é"), Some(("commit", &1)),);
1284    }
1285
1286    #[test]
1287    fn frozen_empty_trie() {
1288        let t = CommandTrieBuilder::<i32>::new().build();
1289        assert_eq!(t.len(), 0);
1290        assert!(t.is_empty());
1291        assert_eq!(t.get(""), None);
1292        assert_eq!(t.get("anything"), None);
1293        assert!(!t.contains_prefix(""));
1294        assert!(t.subtrie("").is_none());
1295        assert_eq!(t.completion_prefix(""), None);
1296        assert_eq!(t.count_completions(""), 0);
1297        assert!(t.completions("").is_empty());
1298        assert_eq!(t.iter().count(), 0);
1299    }
1300
1301    #[test]
1302    fn frozen_longest_prefix_match() {
1303        let t = build_from([("git", 1), ("git-status", 2)]);
1304        assert_eq!(
1305            t.longest_prefix_match("git-status --short"),
1306            Some(("git-status", &2))
1307        );
1308        assert_eq!(t.longest_prefix_match("git foo"), Some(("git", &1)));
1309        assert_eq!(t.longest_prefix_match("git"), Some(("git", &1)));
1310        assert_eq!(t.longest_prefix_match("gi"), None);
1311        assert_eq!(t.longest_prefix_match("zzz"), None);
1312        assert_eq!(t.longest_prefix_match(""), None);
1313        // Returned slice's len() is the consumed byte count.
1314        let (matched, _) = t.longest_prefix_match("git-status xyz").unwrap();
1315        assert_eq!(matched.len(), 10);
1316    }
1317
1318    #[test]
1319    fn frozen_contains_prefix() {
1320        let t = build_from([("commit", 1)]);
1321        assert!(t.contains_prefix(""));
1322        assert!(t.contains_prefix("c"));
1323        assert!(t.contains_prefix("comm"));
1324        assert!(t.contains_prefix("commit"));
1325        assert!(!t.contains_prefix("commits"));
1326        assert!(!t.contains_prefix("d"));
1327    }
1328
1329    // -------- Frozen trie: completions / subtrie --------
1330
1331    #[test]
1332    fn frozen_completions() {
1333        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1334        let mut got = t.completions("comm");
1335        got.sort_by(|a, b| a.0.cmp(&b.0));
1336        assert_eq!(
1337            got,
1338            vec![("command".to_string(), &2), ("commit".to_string(), &1)]
1339        );
1340        assert_eq!(t.completions("").len(), 4);
1341        assert!(t.completions("xyz").is_empty());
1342    }
1343
1344    #[test]
1345    fn frozen_completions_prefix_ends_mid_edge() {
1346        let t = build_from([("command", 1)]);
1347        let got = t.completions("co");
1348        assert_eq!(got, vec![("command".to_string(), &1)]);
1349    }
1350
1351    #[test]
1352    fn frozen_completion_prefix_extends_past_query() {
1353        let t = build_from([("command", 1), ("commit", 2)]);
1354        assert_eq!(t.completion_prefix("c").as_deref(), Some("comm"));
1355
1356        let t = build_from([("command", 1)]);
1357        assert_eq!(t.completion_prefix("").as_deref(), Some("command"));
1358        assert_eq!(t.completion_prefix("c").as_deref(), Some("command"));
1359        assert_eq!(t.completion_prefix("commits"), None);
1360    }
1361
1362    #[test]
1363    fn frozen_subtrie_views() {
1364        let t = build_from([("commit", 1), ("command", 2), ("config", 3)]);
1365        let sub = t.subtrie("comm").unwrap();
1366        assert_eq!(sub.common_prefix(), "comm");
1367        assert_eq!(sub.len(), 2);
1368        assert!(!sub.is_empty());
1369
1370        let mut via_iter: Vec<(String, i32)> = sub.iter().map(|(k, v)| (k, *v)).collect();
1371        via_iter.sort();
1372        let mut via_for_each: Vec<(String, i32)> = Vec::new();
1373        sub.for_each(|k, v| via_for_each.push((k.to_string(), *v)));
1374        via_for_each.sort();
1375        assert_eq!(via_iter, via_for_each);
1376        assert_eq!(
1377            via_iter,
1378            vec![("command".to_string(), 2), ("commit".to_string(), 1)]
1379        );
1380
1381        // by-value IntoIterator
1382        let owned: Vec<_> = sub.into_iter().collect();
1383        assert_eq!(owned.len(), 2);
1384    }
1385
1386    #[test]
1387    fn frozen_subtrie_on_exact_leaf() {
1388        let t = build_from([("commit", 1), ("command", 2)]);
1389        let sub = t.subtrie("commit").unwrap();
1390        assert_eq!(sub.common_prefix(), "commit");
1391        assert_eq!(sub.len(), 1);
1392    }
1393
1394    #[test]
1395    fn frozen_subtrie_extension_and_is_unique() {
1396        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1397
1398        // Branch point: extension is empty, more than one match.
1399        let sub = t.subtrie("c").unwrap();
1400        assert_eq!(sub.common_prefix(), "c");
1401        assert_eq!(sub.extension(), "");
1402        assert!(!sub.is_unique());
1403
1404        // Passthrough extends the LCP past the typed prefix.
1405        let sub = t.subtrie("co").unwrap();
1406        assert_eq!(sub.common_prefix(), "co");
1407        assert_eq!(sub.extension(), "");
1408        assert!(!sub.is_unique());
1409
1410        let sub = t.subtrie("comma").unwrap();
1411        assert_eq!(sub.common_prefix(), "command");
1412        assert_eq!(sub.extension(), "nd");
1413        assert!(sub.is_unique());
1414
1415        // Typing exactly one full command yields a unique view with no extension.
1416        let sub = t.subtrie("clone").unwrap();
1417        assert_eq!(sub.extension(), "");
1418        assert!(sub.is_unique());
1419
1420        // A value-bearing internal node bounds the LCP at its own key.
1421        let t2 = build_from([("git", 1), ("github", 2)]);
1422        let sub = t2.subtrie("gi").unwrap();
1423        assert_eq!(sub.common_prefix(), "git");
1424        assert_eq!(sub.extension(), "t");
1425        assert!(!sub.is_unique()); // "git" itself plus "github"
1426    }
1427
1428    #[test]
1429    fn frozen_subtrie_value_and_unique_value() {
1430        let t = build_from([("commit", 1), ("command", 2), ("config", 3)]);
1431
1432        // Branch-point view: no value at the deepest node.
1433        let sub = t.subtrie("com").unwrap();
1434        assert_eq!(sub.value(), None);
1435        assert_eq!(sub.unique_value(), None);
1436
1437        // Unique view: value() and unique_value() both return the entry.
1438        let sub = t.subtrie("commi").unwrap();
1439        assert_eq!(sub.common_prefix(), "commit");
1440        assert_eq!(sub.value(), Some(&1));
1441        assert_eq!(sub.unique_value(), Some(&1));
1442
1443        // Value-bearing internal node: value() exposes it; unique_value() does not.
1444        let t2 = build_from([("git", 10), ("github", 20)]);
1445        let sub = t2.subtrie("gi").unwrap();
1446        assert_eq!(sub.common_prefix(), "git");
1447        assert_eq!(sub.value(), Some(&10));
1448        assert_eq!(sub.unique_value(), None);
1449    }
1450
1451    #[test]
1452    fn frozen_iter_alphabetical() {
1453        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1454        let got: Vec<_> = t.iter().map(|(k, v)| (k, *v)).collect();
1455        assert_eq!(
1456            got,
1457            vec![
1458                ("clone".to_string(), 4),
1459                ("command".to_string(), 2),
1460                ("commit".to_string(), 1),
1461                ("config".to_string(), 3),
1462            ]
1463        );
1464
1465        // for-loop sugar via IntoIterator for &CommandTrie
1466        let mut n = 0;
1467        for _ in &t {
1468            n += 1;
1469        }
1470        assert_eq!(n, 4);
1471    }
1472
1473    #[test]
1474    fn frozen_for_each_no_alloc() {
1475        let t = build_from([("a", 1), ("ab", 2), ("abc", 3)]);
1476        let mut got: Vec<(String, i32)> = Vec::new();
1477        t.for_each(|k, v| got.push((k.to_string(), *v)));
1478        got.sort();
1479        assert_eq!(
1480            got,
1481            vec![
1482                ("a".to_string(), 1),
1483                ("ab".to_string(), 2),
1484                ("abc".to_string(), 3),
1485            ]
1486        );
1487    }
1488
1489    #[test]
1490    fn frozen_count_and_for_each_completion() {
1491        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1492        assert_eq!(t.count_completions("c"), 4);
1493        assert_eq!(t.count_completions("comm"), 2);
1494        assert_eq!(t.count_completions("commit"), 1);
1495        assert_eq!(t.count_completions("z"), 0);
1496
1497        let mut got: Vec<String> = Vec::new();
1498        t.for_each_completion("comm", |k, _| got.push(k.to_string()));
1499        got.sort();
1500        assert_eq!(got, vec!["command".to_string(), "commit".to_string()]);
1501    }
1502
1503    // -------- Layout invariants --------
1504
1505    #[test]
1506    fn build_packs_into_four_allocations() {
1507        let t = build_from([
1508            ("add", 0),
1509            ("alias", 1),
1510            ("branch", 2),
1511            ("checkout", 3),
1512            ("cherry-pick", 4),
1513            ("clean", 5),
1514            ("clone", 6),
1515            ("commit", 7),
1516            ("command", 8),
1517            ("config", 9),
1518        ]);
1519        // Sanity: round-trip all keys.
1520        for (i, k) in [
1521            "add",
1522            "alias",
1523            "branch",
1524            "checkout",
1525            "cherry-pick",
1526            "clean",
1527            "clone",
1528            "commit",
1529            "command",
1530            "config",
1531        ]
1532        .iter()
1533        .enumerate()
1534        {
1535            assert_eq!(t.get(k), Some(&(i as i32)));
1536        }
1537        // `child_first_bytes` is parallel to `children` and reflects the
1538        // first byte of each child's label.
1539        assert_eq!(t.child_first_bytes.len(), t.children.len());
1540        for (i, &child) in t.children.iter().enumerate() {
1541            let lbl0 = t.label_of(child)[0];
1542            assert_eq!(t.child_first_bytes[i], lbl0);
1543        }
1544        // Children of any node form a contiguous, sorted-by-first-byte slice.
1545        for id in 0..t.nodes.len() as NodeId {
1546            let kids = t.children_of(id);
1547            for w in kids.windows(2) {
1548                let a = t.label_of(w[0])[0];
1549                let b = t.label_of(w[1])[0];
1550                assert!(a < b, "siblings not sorted at node {id}: {a} >= {b}");
1551            }
1552        }
1553    }
1554
1555    // -------- Compile-time guarantees --------
1556
1557    const _: fn() = || {
1558        fn assert_send_sync<X: Send + Sync>() {}
1559        assert_send_sync::<CommandTrieBuilder<i32>>();
1560        assert_send_sync::<CommandTrie<i32>>();
1561        assert_send_sync::<SubTrie<'static, i32>>();
1562        assert_send_sync::<Iter<'static, i32>>();
1563    };
1564
1565    // -------- Defaults, Debug, misc trait impls --------
1566
1567    #[test]
1568    fn builder_default_and_clear() {
1569        let mut b: CommandTrieBuilder<i32> = CommandTrieBuilder::default();
1570        assert!(b.is_empty());
1571        b.insert("a", 1);
1572        b.insert("ab", 2);
1573        assert_eq!(b.len(), 2);
1574        b.clear();
1575        assert!(b.is_empty());
1576        assert_eq!(b.get("a"), None);
1577    }
1578
1579    #[test]
1580    fn trie_default_is_empty() {
1581        let t: CommandTrie<i32> = CommandTrie::default();
1582        assert!(t.is_empty());
1583        assert_eq!(t.get("anything"), None);
1584        assert_eq!(t.iter().count(), 0);
1585    }
1586
1587    #[test]
1588    fn debug_impls_render() {
1589        let mut b = CommandTrieBuilder::new();
1590        b.insert("commit", 1);
1591        b.insert("command", 2);
1592        // BuilderNode debug runs via the builder's children field.
1593        let s = format!("{b:?}");
1594        assert!(s.contains("CommandTrieBuilder"));
1595        assert!(s.contains("BuilderNode"));
1596
1597        let t = b.build();
1598        let s = format!("{t:?}");
1599        assert!(s.contains("CommandTrie"));
1600        assert!(s.contains("len"));
1601
1602        let sub = t.subtrie("comm").unwrap();
1603        let s = format!("{sub:?}");
1604        assert!(s.contains("SubTrie"));
1605        assert!(s.contains("comm"));
1606
1607        let it = t.iter();
1608        let s = format!("{it:?}");
1609        assert!(s.contains("Iter"));
1610    }
1611
1612    #[test]
1613    fn subtrie_ref_into_iter() {
1614        let t = build_from([("commit", 1), ("command", 2)]);
1615        let sub = t.subtrie("comm").unwrap();
1616        // Exercise IntoIterator for &SubTrie (clones common_prefix).
1617        let from_ref: Vec<_> = (&sub).into_iter().collect();
1618        assert_eq!(from_ref.len(), 2);
1619        // sub is still usable afterwards.
1620        assert_eq!(sub.len(), 2);
1621    }
1622
1623    #[test]
1624    fn subtrie_for_each_emits_starting_node_value() {
1625        // The starting node itself carries a value: covers the
1626        // "emit value at root of view" branch in SubTrie::for_each.
1627        let t = build_from([("git", 10), ("github", 20)]);
1628        let sub = t.subtrie("git").unwrap();
1629        let mut got: Vec<(String, i32)> = Vec::new();
1630        sub.for_each(|k, v| got.push((k.to_string(), *v)));
1631        got.sort();
1632        assert_eq!(
1633            got,
1634            vec![("git".to_string(), 10), ("github".to_string(), 20)]
1635        );
1636    }
1637
1638    #[test]
1639    fn insert_32k_entries_no_panic() {
1640        // Design constraint: the frozen trie's `u16`-indexed slabs cap the
1641        // worst-case node count at `u16::MAX = 65,535`, which corresponds to
1642        // roughly `32,767` entries (worst case: `2N + 1` nodes per `N` entries).
1643        //
1644        // Build a dense corpus at that size using realistic command-name-style
1645        // keys (think `print -rl -- ${(k)commands}` on a busy `$PATH`): a few
1646        // dozen tool prefixes, a vocabulary of subcommand stems, then a
1647        // numeric suffix to fan out to the target count. Average key length
1648        // is ~16 bytes, which is closer to real CLI completion sets than
1649        // the original 3-byte base-62 indices.
1650        const PREFIXES: &[&str] = &[
1651            "git-",
1652            "cargo-",
1653            "docker-",
1654            "kubectl-",
1655            "npm-",
1656            "pip-",
1657            "rustup-",
1658            "systemctl-",
1659            "journalctl-",
1660            "ip-",
1661            "nmcli-",
1662            "brew-",
1663        ];
1664        const STEMS: &[&str] = &[
1665            "list", "get", "set", "show", "describe", "create", "delete", "update", "apply",
1666            "watch", "rollout", "exec", "logs", "status", "info", "config", "scale", "patch",
1667            "expose", "annotate",
1668        ];
1669        // 12 prefixes × 20 stems × 134 buckets = 32,160 unique keys.
1670        const BUCKETS: u32 = 134;
1671        const N: u32 = PREFIXES.len() as u32 * STEMS.len() as u32 * BUCKETS;
1672        const _: () = assert!(N >= 32_000, "test corpus must hit the documented ~32k cap");
1673
1674        fn key(n: u32, buf: &mut String) {
1675            use std::fmt::Write;
1676            buf.clear();
1677            let p = (n as usize) % PREFIXES.len();
1678            let s = ((n as usize) / PREFIXES.len()) % STEMS.len();
1679            let bucket = (n as usize) / (PREFIXES.len() * STEMS.len());
1680            buf.push_str(PREFIXES[p]);
1681            buf.push_str(STEMS[s]);
1682            buf.push('-');
1683            write!(buf, "{bucket:03}").unwrap();
1684        }
1685
1686        let mut b: CommandTrieBuilder<u32> = CommandTrieBuilder::new();
1687        let mut buf = String::new();
1688        for i in 0..N {
1689            key(i, &mut buf);
1690            b.insert(&buf, i);
1691        }
1692        assert_eq!(b.len(), N as usize);
1693
1694        let t = b.build();
1695        assert_eq!(t.len(), N as usize);
1696
1697        // Spot-check a handful of lookups across the range.
1698        for &i in &[0u32, 1, 11, 12, 239, 240, 1023, 12345, N / 2, N - 1] {
1699            key(i, &mut buf);
1700            assert_eq!(t.get(&buf), Some(&i), "lookup failed for {i}");
1701        }
1702    }
1703
1704    #[test]
1705    fn iter_is_fused() {
1706        fn assert_fused<I: FusedIterator>(_: &I) {}
1707        let t = CommandTrieBuilder::<i32>::new().build();
1708        let it = t.iter();
1709        assert_fused(&it);
1710    }
1711
1712    #[test]
1713    fn iter_is_exact_size() {
1714        fn assert_exact<I: ExactSizeIterator>(_: &I) {}
1715
1716        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1717
1718        let it = t.iter();
1719        assert_exact(&it);
1720        assert_eq!(it.len(), 4);
1721        assert_eq!(it.size_hint(), (4, Some(4)));
1722
1723        // `len` decreases monotonically and reaches 0 at exhaustion.
1724        let mut it = t.iter();
1725        for expected in (0..4).rev() {
1726            assert!(it.next().is_some());
1727            assert_eq!(it.len(), expected);
1728            assert_eq!(it.size_hint(), (expected, Some(expected)));
1729        }
1730        assert!(it.next().is_none());
1731        assert_eq!(it.len(), 0);
1732
1733        // SubTrie iterators report the subtrie's exact size, including a
1734        // value at the starting node.
1735        let t2 = build_from([("git", 10), ("github", 20), ("gitlab", 30)]);
1736        let sub = t2.subtrie("git").unwrap();
1737        let it = sub.iter();
1738        assert_eq!(it.len(), 3);
1739        let collected: Vec<_> = sub.into_iter().collect();
1740        assert_eq!(collected.len(), 3);
1741
1742        // Empty trie: len() == 0 up front.
1743        let empty: CommandTrie<i32> = CommandTrieBuilder::new().build();
1744        assert_eq!(empty.iter().len(), 0);
1745    }
1746
1747    // -------- Fuzz vs BTreeMap (rebuild after every op) --------
1748
1749    #[test]
1750    fn fuzz_against_btreemap() {
1751        use std::collections::BTreeMap;
1752
1753        let mut state: u64 = 0x_dead_beef_cafe_f00d;
1754        let mut rand = || {
1755            state = state
1756                .wrapping_mul(6364136223846793005)
1757                .wrapping_add(1442695040888963407);
1758            state
1759        };
1760
1761        let keys = [
1762            "", "a", "ab", "abc", "abd", "abcd", "abce", "b", "ba", "bar", "baz", "c", "co", "com",
1763            "comm", "command", "commit", "config", "x", "xy", "xyz",
1764        ];
1765        let probe_prefixes = [
1766            "", "a", "ab", "abc", "abz", "b", "c", "comm", "com", "x", "z",
1767        ];
1768
1769        let mut builder: CommandTrieBuilder<i32> = CommandTrieBuilder::new();
1770        let mut model: BTreeMap<String, i32> = BTreeMap::new();
1771
1772        for op in 0..500 {
1773            let r = rand();
1774            let key = keys[(r as usize) % keys.len()];
1775
1776            if (r >> 32) % 4 == 0 {
1777                let b_prev = builder.remove(key);
1778                let m_prev = model.remove(key);
1779                assert_eq!(b_prev, m_prev, "remove({key:?}) at op {op}");
1780            } else {
1781                let v = (r >> 8) as i32;
1782                let b_prev = builder.insert(key, v);
1783                let m_prev = model.insert(key.to_string(), v);
1784                assert_eq!(b_prev, m_prev, "insert({key:?}, {v}) at op {op}");
1785            }
1786
1787            assert_eq!(builder.len(), model.len(), "len at op {op}");
1788
1789            // Rebuild and validate the frozen trie's read API.
1790            let trie = builder.clone().build();
1791            assert_eq!(trie.len(), model.len());
1792
1793            for k in &keys {
1794                assert_eq!(trie.get(k), model.get(*k), "get({k:?}) at op {op}");
1795                assert_eq!(trie.contains(k), model.contains_key(*k));
1796            }
1797
1798            for pfx in &probe_prefixes {
1799                let mut from_trie: Vec<String> =
1800                    trie.completions(pfx).into_iter().map(|(k, _)| k).collect();
1801                from_trie.sort();
1802                let mut from_model: Vec<String> = model
1803                    .keys()
1804                    .filter(|k| k.starts_with(pfx))
1805                    .cloned()
1806                    .collect();
1807                from_model.sort();
1808                assert_eq!(from_trie, from_model, "completions({pfx:?}) at op {op}");
1809
1810                assert_eq!(
1811                    trie.count_completions(pfx),
1812                    from_model.len(),
1813                    "count_completions({pfx:?}) at op {op}"
1814                );
1815
1816                if let Some(cp) = trie.completion_prefix(pfx) {
1817                    assert!(cp.starts_with(pfx));
1818                    for k in &from_model {
1819                        assert!(k.starts_with(&cp));
1820                    }
1821                } else {
1822                    assert!(from_model.is_empty());
1823                }
1824            }
1825
1826            let from_trie: Vec<(String, i32)> = trie.iter().map(|(k, v)| (k, *v)).collect();
1827            let from_model: Vec<(String, i32)> =
1828                model.iter().map(|(k, v)| (k.clone(), *v)).collect();
1829            assert_eq!(from_trie, from_model, "iter at op {op}");
1830        }
1831    }
1832
1833    // ---------------------------------------------------------------------
1834    // UTF-8 tests
1835    //
1836    // Splits in the radix trie are char-aligned, so any concatenation of
1837    // edge labels is valid UTF-8. These tests cover both the easy cases
1838    // (codepoints whose first byte is unique among siblings) and the
1839    // harder case where two siblings share a leading byte (e.g. `'é'` and
1840    // `'ê'`, both starting with `0xC3`), which exercises the
1841    // equal-byte-run walk in `find_child`.
1842    // ---------------------------------------------------------------------
1843
1844    #[test]
1845    fn utf8_basic_insert_get() {
1846        let t = build_from([
1847            ("café", 1),
1848            ("über", 2),
1849            ("naïve", 3),
1850            ("naïveté", 4),
1851            ("🦀", 5),
1852            ("π", 6),
1853        ]);
1854        assert_eq!(t.get("café"), Some(&1));
1855        assert_eq!(t.get("über"), Some(&2));
1856        assert_eq!(t.get("naïve"), Some(&3));
1857        assert_eq!(t.get("naïveté"), Some(&4));
1858        assert_eq!(t.get("🦀"), Some(&5));
1859        assert_eq!(t.get("π"), Some(&6));
1860        assert_eq!(t.get("cafe"), None);
1861        assert_eq!(t.get("naïv"), None);
1862    }
1863
1864    #[test]
1865    fn utf8_shared_first_byte_siblings() {
1866        // 'é' = C3 A9, 'ê' = C3 AA, 'è' = C3 A8 — all share leading byte
1867        // 0xC3, so the frozen find_child must walk the equal-byte run
1868        // rather than trust binary_search alone.
1869        let t = build_from([("éa", 1), ("êb", 2), ("èc", 3), ("ad", 4)]);
1870        assert_eq!(t.get("éa"), Some(&1));
1871        assert_eq!(t.get("êb"), Some(&2));
1872        assert_eq!(t.get("èc"), Some(&3));
1873        assert_eq!(t.get("ad"), Some(&4));
1874        assert_eq!(t.get("éb"), None);
1875        assert_eq!(t.get("ê"), None);
1876        assert!(t.contains_prefix("é"));
1877        assert!(t.contains_prefix("ê"));
1878        assert!(!t.contains_prefix("ë"));
1879    }
1880
1881    #[test]
1882    fn utf8_split_at_shared_codepoint() {
1883        // Both keys start with 'é' (C3 A9), then diverge at the *next*
1884        // char. The builder must place the split at byte 2 (after 'é'),
1885        // not inside it.
1886        let t = build_from([("éa", 1), ("éb", 2)]);
1887        assert_eq!(t.get("éa"), Some(&1));
1888        assert_eq!(t.get("éb"), Some(&2));
1889        let sub = t.subtrie("é").expect("prefix 'é' should exist");
1890        assert_eq!(sub.common_prefix(), "é");
1891        assert_eq!(sub.len(), 2);
1892    }
1893
1894    #[test]
1895    fn utf8_sort_order_matches_btreemap() {
1896        use std::collections::BTreeMap;
1897        let pairs: Vec<(&str, i32)> = vec![
1898            ("apple", 1),
1899            ("café", 2),
1900            ("cab", 3),
1901            ("über", 4),
1902            ("ünder", 5),
1903            ("naïve", 6),
1904            ("naive", 7),
1905            ("🦀rust", 8),
1906            ("🦀", 9),
1907            ("π", 10),
1908            ("zoo", 11),
1909        ];
1910        let model: BTreeMap<&str, i32> = pairs.iter().copied().collect();
1911        let t = build_from(pairs.iter().copied());
1912        let from_trie: Vec<(String, i32)> = t.iter().map(|(k, v)| (k, *v)).collect();
1913        let from_model: Vec<(String, i32)> =
1914            model.iter().map(|(k, v)| (k.to_string(), *v)).collect();
1915        assert_eq!(from_trie, from_model);
1916    }
1917
1918    #[test]
1919    fn utf8_completion_prefix_extends_through_char() {
1920        // The only key starting with "n" is "naïveté", so completion_prefix
1921        // should extend all the way through the multi-byte 'ï'.
1922        let t = build_from([("naïveté", 1), ("zzz", 2)]);
1923        assert_eq!(t.completion_prefix("n").as_deref(), Some("naïveté"));
1924        // Querying with a prefix that ends mid-char is impossible for a
1925        // `&str` input, but querying with a prefix that ends *exactly* on
1926        // a char boundary inside the key must work:
1927        assert_eq!(t.completion_prefix("naï").as_deref(), Some("naïveté"));
1928    }
1929
1930    #[test]
1931    fn utf8_longest_prefix_match() {
1932        let t = build_from([("café", 1), ("ca", 2)]);
1933        assert_eq!(t.longest_prefix_match("café au lait"), Some(("café", &1)));
1934        assert_eq!(t.longest_prefix_match("cab"), Some(("ca", &2)));
1935        // "caf" sits between "ca" and "café"; only "ca" is a stored key.
1936        assert_eq!(t.longest_prefix_match("caf"), Some(("ca", &2)));
1937    }
1938
1939    #[test]
1940    fn utf8_remove_and_reinsert() {
1941        let mut b = CommandTrieBuilder::new();
1942        b.insert("café", 1);
1943        b.insert("naïve", 2);
1944        assert_eq!(b.remove("café"), Some(1));
1945        assert_eq!(b.get("café"), None);
1946        assert_eq!(b.get("naïve"), Some(&2));
1947        b.insert("café", 11);
1948        let t = b.build();
1949        assert_eq!(t.get("café"), Some(&11));
1950        assert_eq!(t.get("naïve"), Some(&2));
1951    }
1952
1953    #[test]
1954    fn utf8_iter_roundtrip_emoji_heavy() {
1955        let keys = ["🦀", "🦀rust", "🦀🦀", "🔥", "🔥fire", "ascii"];
1956        let mut b = CommandTrieBuilder::new();
1957        for (i, k) in keys.iter().enumerate() {
1958            b.insert(k, i as i32);
1959        }
1960        let t = b.build();
1961        for (i, k) in keys.iter().enumerate() {
1962            assert_eq!(t.get(k), Some(&(i as i32)), "lookup {k}");
1963        }
1964        // round-trip
1965        let collected: Vec<String> = t.iter().map(|(k, _)| k).collect();
1966        let mut expected: Vec<String> = keys.iter().map(|s| s.to_string()).collect();
1967        expected.sort();
1968        assert_eq!(collected, expected);
1969    }
1970
1971    #[test]
1972    fn utf8_three_byte_char_paths() {
1973        // '中' = E4 B8 AD — exercises the 3-byte arm of `utf8_char_len`
1974        // through both the builder cold path and the frozen multi-byte
1975        // lookup path.
1976        let mut b = CommandTrieBuilder::new();
1977        b.insert("中", 1);
1978        b.insert("中a", 2);
1979        b.insert("中b", 3);
1980        b.insert("間", 4);
1981        let t = b.build();
1982        assert_eq!(t.get("中"), Some(&1));
1983        assert_eq!(t.get("中a"), Some(&2));
1984        assert_eq!(t.get("中b"), Some(&3));
1985        assert_eq!(t.get("間"), Some(&4));
1986        assert_eq!(t.get("中c"), None);
1987        assert_eq!(t.longest_prefix_match("中ax"), Some(("中a", &2)),);
1988    }
1989
1990    #[test]
1991    fn utf8_lcp_backs_off_into_codepoint_boundary() {
1992        // "Xé" (58 C3 A9) and "Xè" (58 C3 A8) share an ASCII 'X' then
1993        // diverge inside the second byte of a 2-byte codepoint. The byte
1994        // LCP is 2, but the char-aligned `lcp` must back off to 1 so the
1995        // split point lands on a codepoint boundary.
1996        let mut b = CommandTrieBuilder::new();
1997        b.insert("Xé", 1);
1998        b.insert("Xè", 2);
1999        let t = b.build();
2000        assert_eq!(t.get("Xé"), Some(&1));
2001        assert_eq!(t.get("Xè"), Some(&2));
2002        // The shared edge label must be "X", not "X" + a stray leading
2003        // byte of the multi-byte char; iteration order confirms both
2004        // entries are reachable and distinct.
2005        let keys: Vec<String> = t.iter().map(|(k, _)| k).collect();
2006        assert_eq!(keys, vec!["Xè".to_string(), "Xé".to_string()]);
2007    }
2008}