Skip to main content

command_trie/
lib.rs

1#![doc = include_str!("../README.md")]
2#![no_std]
3#![warn(missing_docs)]
4#![warn(missing_debug_implementations)]
5
6extern crate alloc;
7
8use alloc::boxed::Box;
9use alloc::string::String;
10use alloc::vec;
11use alloc::vec::Vec;
12use core::fmt;
13use core::iter::FusedIterator;
14
15// ============================================================
16// UTF-8 helpers
17// ============================================================
18//
19// All keys enter the trie as `&str`, i.e. valid UTF-8. The builder splits
20// edges only on **char boundaries** (see `lcp` and `BuilderNode::child_index`
21// below), so every edge label — and therefore every concatenation of edge
22// labels reachable from a frozen node — is itself valid UTF-8. The helpers
23// below centralize this invariant; read paths use them in place of a checked
24// `from_utf8(...).expect(...)` to skip a redundant validation pass.
25
26/// # Safety
27/// Caller must ensure `bytes` is either an edge label produced by
28/// `CommandTrieBuilder::build` or a concatenation of such labels following
29/// parent → child edges. Both are guaranteed valid UTF-8 by the char-aligned
30/// split invariant.
31#[inline]
32unsafe fn utf8_str_unchecked(bytes: &[u8]) -> &str {
33    debug_assert!(core::str::from_utf8(bytes).is_ok());
34    // SAFETY: char-aligned splits keep every edge label valid UTF-8; the
35    // concatenation of valid UTF-8 fragments is itself valid UTF-8.
36    unsafe { core::str::from_utf8_unchecked(bytes) }
37}
38
39/// # Safety
40/// Same contract as [`utf8_str_unchecked`].
41#[inline]
42unsafe fn utf8_string_unchecked(bytes: Vec<u8>) -> String {
43    debug_assert!(core::str::from_utf8(&bytes).is_ok());
44    // SAFETY: see [`utf8_str_unchecked`].
45    unsafe { String::from_utf8_unchecked(bytes) }
46}
47
48/// Byte length of the UTF-8 character whose leading byte is `b`. For valid
49/// UTF-8 leading bytes the result is 1..=4. For continuation bytes (`0b10xxxxxx`)
50/// or otherwise-invalid leading bytes the result is 1, which causes lookup to
51/// fall through without panicking — invalid bytes can't be in any stored key.
52#[inline]
53fn utf8_char_len(b: u8) -> usize {
54    // ASCII or continuation byte: 1 (continuation is invalid as a leader,
55    // but returning 1 makes the lookup fall through without panicking).
56    if b < 0xC0 {
57        1
58    } else if b < 0xE0 {
59        2
60    } else if b < 0xF0 {
61        3
62    } else {
63        4
64    }
65}
66
67// ============================================================
68// Builder
69// ============================================================
70
71/// Mutable trie used during the build phase.
72///
73/// Call [`Self::build`] to produce a frozen [`CommandTrie`] when you are
74/// done inserting.
75#[derive(Clone)]
76pub struct CommandTrieBuilder<T> {
77    root: BuilderNode<T>,
78    len: usize,
79}
80
81#[derive(Clone)]
82struct BuilderNode<T> {
83    /// Bytes consumed by the edge from this node's parent. Empty only for the root.
84    label: Box<[u8]>,
85    value: Option<T>,
86    /// Children, kept sorted by `label[0]`. First bytes are unique among siblings.
87    children: Vec<BuilderNode<T>>,
88}
89
90impl<T> Default for CommandTrieBuilder<T> {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96impl<T> CommandTrieBuilder<T> {
97    /// Create a new, empty builder.
98    #[must_use]
99    pub fn new() -> Self {
100        Self {
101            root: BuilderNode {
102                label: Box::from(&[][..]),
103                value: None,
104                children: Vec::new(),
105            },
106            len: 0,
107        }
108    }
109
110    /// Number of entries currently in the builder.
111    #[must_use]
112    pub fn len(&self) -> usize {
113        self.len
114    }
115
116    /// Returns `true` when no entries have been inserted.
117    #[must_use]
118    pub fn is_empty(&self) -> bool {
119        self.len == 0
120    }
121
122    /// Remove every entry, returning the builder to its initial state.
123    pub fn clear(&mut self) {
124        *self = Self::new();
125    }
126
127    /// Insert `key` with `value`. Returns the previous value, if any.
128    ///
129    /// Any `&str` is accepted; the radix splits on UTF-8 char boundaries so
130    /// every internal edge label remains valid UTF-8.
131    pub fn insert(&mut self, key: &str, value: T) -> Option<T> {
132        let prev = self.root.insert(key.as_bytes(), value);
133        if prev.is_none() {
134            self.len += 1;
135        }
136        prev
137    }
138
139    /// Remove `key` and return its value, if present. Maintains the radix
140    /// invariant by pruning empty branches and merging single-child
141    /// passthroughs with their parent edge.
142    pub fn remove(&mut self, key: &str) -> Option<T> {
143        let v = self.root.remove(key.as_bytes())?;
144        self.len -= 1;
145        Some(v)
146    }
147
148    /// Exact lookup against the builder.
149    #[must_use]
150    pub fn get(&self, key: &str) -> Option<&T> {
151        let mut node = &self.root;
152        let mut rem = key.as_bytes();
153        loop {
154            if rem.is_empty() {
155                return node.value.as_ref();
156            }
157            let child = node.find_child(rem)?;
158            if !rem.starts_with(&child.label) {
159                return None;
160            }
161            rem = &rem[child.label.len()..];
162            node = child;
163        }
164    }
165
166    /// Returns `true` when `key` is present in the builder.
167    #[must_use]
168    pub fn contains(&self, key: &str) -> bool {
169        self.get(key).is_some()
170    }
171
172    /// Freeze the builder into a compact, read-only [`CommandTrie`].
173    ///
174    /// Performs one DFS over the builder trie and writes the result into
175    /// four contiguous slabs (`nodes`, `labels`, `children`,
176    /// `child_first_bytes`). After this call the frozen trie has exactly
177    /// four heap allocations regardless of trie size.
178    ///
179    /// # Panics
180    /// If the trie's total node count, total label bytes, or total edge
181    /// count would exceed [`u16::MAX`] (65,535). The frozen representation
182    /// stores every offset and node id as a `u16` to halve the structural
183    /// slabs. The worst case for `N` inserted entries is `2N + 1` nodes,
184    /// so this caps the trie at roughly **32,767 entries** — enough for
185    /// any realistic command set.
186    pub fn build(self) -> CommandTrie<T> {
187        let len = self.len;
188        let mut nodes: Vec<FrozenNode<T>> = Vec::new();
189        let mut labels: Vec<u8> = Vec::new();
190        let mut children: Vec<NodeId> = Vec::new();
191        let mut child_first_bytes: Vec<u8> = Vec::new();
192        build_visit(
193            self.root,
194            &mut nodes,
195            &mut labels,
196            &mut children,
197            &mut child_first_bytes,
198        );
199        CommandTrie {
200            nodes: nodes.into_boxed_slice(),
201            labels: labels.into_boxed_slice(),
202            children: children.into_boxed_slice(),
203            child_first_bytes: child_first_bytes.into_boxed_slice(),
204            len,
205        }
206    }
207}
208
209/// Pre-order DFS: visit a node, emit its `FrozenNode` slot, reserve a
210/// contiguous range of child-id slots, then recurse and fill those slots.
211fn build_visit<T>(
212    node: BuilderNode<T>,
213    nodes: &mut Vec<FrozenNode<T>>,
214    labels: &mut Vec<u8>,
215    children: &mut Vec<NodeId>,
216    child_first_bytes: &mut Vec<u8>,
217) -> NodeId {
218    let id = u16_or_panic(nodes.len());
219    let label_start = u16_or_panic(labels.len());
220    let label_len = u16_or_panic(node.label.len());
221    labels.extend_from_slice(&node.label);
222
223    nodes.push(FrozenNode {
224        label_start,
225        label_len,
226        children_start: 0,
227        children_len: 0,
228        value: node.value,
229    });
230
231    // Reserve a contiguous run of child-id slots BEFORE recursing, so all of
232    // this node's children end up adjacent in `children` regardless of how
233    // many descendants each child contributes.
234    let n_children = u16_or_panic(node.children.len());
235    let children_start = u16_or_panic(children.len());
236    for _ in 0..n_children {
237        children.push(0);
238        child_first_bytes.push(0);
239    }
240    for (i, child) in node.children.into_iter().enumerate() {
241        // Capture the child's first byte BEFORE recursing, since `child` is
242        // about to be moved into `build_visit`. Non-root children always have
243        // a non-empty label.
244        let first = child.label[0];
245        let slot = children_start as usize + i;
246        let child_id = build_visit(child, nodes, labels, children, child_first_bytes);
247        children[slot] = child_id;
248        child_first_bytes[slot] = first;
249    }
250
251    let n = &mut nodes[id as usize];
252    n.children_start = children_start;
253    n.children_len = n_children;
254    id
255}
256
257/// Convert a `usize` index/length to `u16`, panicking with a clear message
258/// if the documented size cap (see [`FrozenNode`]) is exceeded. Used during
259/// `build` to turn what would otherwise be silent `as u16` truncation into
260/// an explicit failure.
261#[inline]
262fn u16_or_panic(n: usize) -> u16 {
263    u16::try_from(n).expect("command-trie size exceeds u16::MAX (see FrozenNode docs)")
264}
265
266impl<T> BuilderNode<T> {
267    /// Find the child whose label starts with the same UTF-8 char as `rem`.
268    ///
269    /// `rem` must be non-empty and start with a valid UTF-8 leading byte
270    /// (always true when called with a slice of an inserted `&str` or with
271    /// remaining query bytes from `Self::get`).
272    fn find_child(&self, rem: &[u8]) -> Option<&BuilderNode<T>> {
273        let idx = self.child_index(rem).ok()?;
274        Some(&self.children[idx])
275    }
276
277    /// Binary search for the child whose label starts with the same UTF-8 char
278    /// as `rem`, returning the index or where it would be inserted.
279    ///
280    /// Children are sorted by their first char's UTF-8 bytes; this is identical
281    /// to first-byte order for valid UTF-8, so the comparison only needs to
282    /// look at each child's leading-char bytes (1..=4 bytes) against the
283    /// needle's leading-char bytes.
284    ///
285    /// ASCII fast path: when the needle's first byte is `< 0x80` it *is* the
286    /// whole leading char, and an ASCII first byte is unique among siblings
287    /// (char-aligned splits guarantee distinct sibling leading chars, and an
288    /// ASCII byte cannot collide with any multi-byte first byte which is
289    /// always `>= 0xC0`). A plain `binary_search_by_key` on the first byte
290    /// suffices and matches the pre-UTF-8 cost.
291    fn child_index(&self, rem: &[u8]) -> Result<usize, usize> {
292        let first = rem[0];
293        if first < 0x80 {
294            return self.children.binary_search_by_key(&first, |c| c.label[0]);
295        }
296        let needle_len = utf8_char_len(first).min(rem.len());
297        let needle = &rem[..needle_len];
298        self.children.binary_search_by(|c| {
299            let cn = utf8_char_len(c.label[0]).min(c.label.len());
300            c.label[..cn].cmp(needle)
301        })
302    }
303
304    fn insert(&mut self, rem: &[u8], value: T) -> Option<T> {
305        if rem.is_empty() {
306            return self.value.replace(value);
307        }
308        match self.child_index(rem) {
309            Err(at) => {
310                self.children.insert(
311                    at,
312                    BuilderNode {
313                        label: Box::from(rem),
314                        value: Some(value),
315                        children: Vec::new(),
316                    },
317                );
318                None
319            }
320            Ok(idx) => {
321                let child = &mut self.children[idx];
322                let common = lcp(&child.label, rem);
323                if common == child.label.len() {
324                    return child.insert(&rem[common..], value);
325                }
326                // Split this edge at `common`. `common` is char-aligned by `lcp`,
327                // so both halves are valid UTF-8.
328                let old_label = core::mem::replace(&mut child.label, Box::from(&rem[..common]));
329                let old_value = child.value.take();
330                let old_children = core::mem::take(&mut child.children);
331                let existing = BuilderNode {
332                    label: Box::from(&old_label[common..]),
333                    value: old_value,
334                    children: old_children,
335                };
336                if common == rem.len() {
337                    child.value = Some(value);
338                    child.children = vec![existing];
339                } else {
340                    let new_node = BuilderNode {
341                        label: Box::from(&rem[common..]),
342                        value: Some(value),
343                        children: Vec::new(),
344                    };
345                    // Sort by first byte (equivalent to first-char lex order for
346                    // valid UTF-8). The two leading chars differ (otherwise `lcp`
347                    // would have extended `common`), so first bytes differ unless
348                    // the chars share a leading UTF-8 byte — in which case the
349                    // tie is broken by the next byte.
350                    child.children = if existing.label[..].cmp(&new_node.label[..])
351                        == core::cmp::Ordering::Less
352                    {
353                        vec![existing, new_node]
354                    } else {
355                        vec![new_node, existing]
356                    };
357                }
358                None
359            }
360        }
361    }
362
363    fn remove(&mut self, rem: &[u8]) -> Option<T> {
364        if rem.is_empty() {
365            return self.value.take();
366        }
367        let idx = self.child_index(rem).ok()?;
368        if !rem.starts_with(&self.children[idx].label) {
369            return None;
370        }
371        let label_len = self.children[idx].label.len();
372        let removed = self.children[idx].remove(&rem[label_len..])?;
373
374        let child = &self.children[idx];
375        if child.value.is_none() {
376            if child.children.is_empty() {
377                self.children.remove(idx);
378            } else if child.children.len() == 1 {
379                let mut removed_child = self.children.remove(idx);
380                let mut grandchild = removed_child.children.pop().unwrap();
381                let mut merged =
382                    Vec::with_capacity(removed_child.label.len() + grandchild.label.len());
383                merged.extend_from_slice(&removed_child.label);
384                merged.extend_from_slice(&grandchild.label);
385                grandchild.label = merged.into_boxed_slice();
386                self.children.insert(idx, grandchild);
387            }
388        }
389        Some(removed)
390    }
391}
392
393/// Longest common **char-aligned** byte prefix of `a` and `b`.
394///
395/// Both inputs are assumed to be valid UTF-8 (they come from `&str`s or from
396/// previously char-aligned edge labels). The byte-LCP is computed normally,
397/// then truncated back to the nearest char boundary. This ensures that any
398/// radix split using the returned length never bisects a multi-byte codepoint.
399fn lcp(a: &[u8], b: &[u8]) -> usize {
400    let mut i = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
401    // If the first differing byte (or one-past-end in either input) is a
402    // UTF-8 continuation byte in `a`, back up to the codepoint boundary.
403    // For valid UTF-8 the matching prefix in `b` is identical, so checking
404    // either side is sufficient.
405    while i > 0 && i < a.len() && (a[i] & 0xC0) == 0x80 {
406        i -= 1;
407    }
408    i
409}
410
411impl<K: AsRef<str>, T> FromIterator<(K, T)> for CommandTrieBuilder<T> {
412    /// Build a [`CommandTrieBuilder`] from an iterator of `(key, value)` pairs.
413    fn from_iter<I: IntoIterator<Item = (K, T)>>(iter: I) -> Self {
414        let mut t = Self::new();
415        t.extend(iter);
416        t
417    }
418}
419
420impl<K: AsRef<str>, T> Extend<(K, T)> for CommandTrieBuilder<T> {
421    /// Insert each `(key, value)` from `iter`.
422    fn extend<I: IntoIterator<Item = (K, T)>>(&mut self, iter: I) {
423        for (k, v) in iter {
424            self.insert(k.as_ref(), v);
425        }
426    }
427}
428
429impl<T: fmt::Debug> fmt::Debug for CommandTrieBuilder<T> {
430    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
431        f.debug_struct("CommandTrieBuilder")
432            .field("len", &self.len)
433            .field("root", &self.root)
434            .finish()
435    }
436}
437
438impl<T: fmt::Debug> fmt::Debug for BuilderNode<T> {
439    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
440        f.debug_struct("BuilderNode")
441            // SAFETY: `label` is a char-aligned slice of an inserted `&str`,
442            // so it is valid UTF-8.
443            .field("label", &unsafe { utf8_str_unchecked(&self.label) })
444            .field("value", &self.value)
445            .field("children", &self.children)
446            .finish()
447    }
448}
449
450// ============================================================
451// Frozen trie
452// ============================================================
453
454type NodeId = u16;
455const ROOT: NodeId = 0;
456
457/// Compact, read-only radix trie produced by [`CommandTrieBuilder::build`].
458///
459/// All storage lives in four boxed slices (`nodes`, `labels`, `children`,
460/// `child_first_bytes`). All query methods are non-allocating in their
461/// lookup paths; only methods that materialize keys (e.g. `Iter::next`)
462/// allocate.
463#[derive(Clone)]
464pub struct CommandTrie<T> {
465    /// `nodes[0]` is the root. Pre-order DFS layout from `build`.
466    nodes: Box<[FrozenNode<T>]>,
467    /// All edge labels concatenated; sliced by `(label_start, label_len)`.
468    labels: Box<[u8]>,
469    /// All child-id lists concatenated; each node's children live in a
470    /// contiguous run sorted by the first byte of their label.
471    children: Box<[NodeId]>,
472    /// Parallel to `children`: the first byte of each child's label.
473    /// Lets `find_child` binary-search a tight `&[u8]` slice instead of
474    /// chasing two pointer indirections per probe (`nodes` → `labels`).
475    child_first_bytes: Box<[u8]>,
476    len: usize,
477}
478
479#[derive(Clone)]
480struct FrozenNode<T> {
481    // All four offsets/lengths are `u16`; the builder consequently rejects
482    // any trie whose total node count, total label bytes, or total edge
483    // count would exceed `u16::MAX` (65,535). The worst-case node count
484    // for `N` inserted entries is `2N + 1`, so this caps the trie at
485    // roughly **32,767 entries** — enough for any realistic command set.
486    //
487    // Storing the `start + len` pair (rather than just the end and
488    // deriving start from the previous node) keeps `label_of` and
489    // `children_of` to a single `FrozenNode` load per access, which
490    // matters on the lookup hot path.
491    label_start: u16,
492    label_len: u16,
493    children_start: u16,
494    children_len: u16,
495    value: Option<T>,
496}
497
498impl<T> CommandTrie<T> {
499    // SAFETY NOTE FOR INTERNAL UNCHECKED ACCESSES
500    // -------------------------------------------
501    // The following helpers (`label_of`, `children_of`, `value_of`,
502    // `find_child`) use `get_unchecked` on `nodes`, `labels`, `children`,
503    // and `child_first_bytes`. The build invariants make every index they
504    // compute provably in-bounds:
505    //
506    //   * `nodes` always contains at least the root after `build`, so
507    //     `ROOT = 0` is a valid index.
508    //   * Every `NodeId` stored in `children` was emitted by `build_visit`
509    //     as `nodes.len()` at the moment of the matching `nodes.push`, so
510    //     it is always `< nodes.len()` in the finished trie.
511    //   * `(label_start, label_len)` and `(children_start, children_len)`
512    //     for each node describe ranges that `build_visit` pushed into
513    //     `labels` / `children` / `child_first_bytes` *before* writing the
514    //     node's fields. They are always in-bounds.
515    //   * `child_first_bytes.len() == children.len()` by construction.
516    //
517    // External index-typed APIs (e.g. `descend_to_node`'s key bytes) are
518    // not affected; they still use safe slicing.
519
520    /// Number of entries stored in the trie.
521    #[must_use]
522    pub fn len(&self) -> usize {
523        self.len
524    }
525
526    /// Returns `true` when the trie holds no entries.
527    #[must_use]
528    pub fn is_empty(&self) -> bool {
529        self.len == 0
530    }
531
532    #[inline]
533    fn label_of(&self, id: NodeId) -> &[u8] {
534        // SAFETY: see the SAFETY NOTE at the top of this impl.
535        unsafe {
536            let n = self.nodes.get_unchecked(id as usize);
537            let start = n.label_start as usize;
538            let end = start + n.label_len as usize;
539            self.labels.get_unchecked(start..end)
540        }
541    }
542
543    #[inline]
544    fn children_of(&self, id: NodeId) -> &[NodeId] {
545        // SAFETY: see the SAFETY NOTE at the top of this impl.
546        unsafe {
547            let n = self.nodes.get_unchecked(id as usize);
548            let start = n.children_start as usize;
549            let end = start + n.children_len as usize;
550            self.children.get_unchecked(start..end)
551        }
552    }
553
554    #[inline]
555    fn value_of(&self, id: NodeId) -> Option<&T> {
556        // SAFETY: see the SAFETY NOTE at the top of this impl.
557        unsafe { self.nodes.get_unchecked(id as usize).value.as_ref() }
558    }
559
560    /// Returns the child of `parent` whose label starts with the same UTF-8
561    /// char as `rem`, if any.
562    ///
563    /// `rem` must be non-empty and begin with a valid UTF-8 leading byte; this
564    /// is always true when called with a slice of a `&str`. The fast path is
565    /// pure ASCII: the leading byte is `< 0x80`, unique among siblings, and
566    /// binary search on `child_first_bytes` returns the match directly with
567    /// no further work — equivalent to the pre-UTF-8 implementation. For
568    /// multi-byte chars two siblings can share a leading byte (e.g.
569    /// `'é'`=`C3 A9` and `'ê'`=`C3 AA`), so on a hit we widen to the
570    /// equal-byte run and check the full leading char against each candidate.
571    #[inline]
572    fn find_child(&self, parent: NodeId, rem: &[u8]) -> Option<NodeId> {
573        // SAFETY: see the SAFETY NOTE at the top of this impl.
574        unsafe {
575            let n = self.nodes.get_unchecked(parent as usize);
576            let start = n.children_start as usize;
577            let end = start + n.children_len as usize;
578            let first = *rem.get_unchecked(0);
579            let slab = self.child_first_bytes.get_unchecked(start..end);
580            // Binary-search the dense first-byte slab — typically one cache
581            // line — instead of dereferencing `nodes` then `labels` per probe.
582            let idx = slab.binary_search(&first).ok()?;
583            // Hot path: ASCII first byte is unique among siblings (char-aligned
584            // splits guarantee siblings have distinct leading chars, and an
585            // ASCII byte cannot share its slot with any other char's leading
586            // byte). No UTF-8 work needed.
587            if first < 0x80 {
588                return Some(*self.children.get_unchecked(start + idx));
589            }
590            // Cold path: multi-byte char. Disambiguate against the equal-byte
591            // run by comparing the full leading-char bytes.
592            self.find_child_multibyte(start, slab, idx, first, rem)
593        }
594    }
595
596    /// Cold-path disambiguation for multi-byte leading chars. Outlined so the
597    /// ASCII fast path in `find_child` stays small enough to inline cleanly.
598    #[cold]
599    #[inline(never)]
600    fn find_child_multibyte(
601        &self,
602        start: usize,
603        slab: &[u8],
604        idx: usize,
605        first: u8,
606        rem: &[u8],
607    ) -> Option<NodeId> {
608        let clen = utf8_char_len(first);
609        // `rem` is always a suffix of a valid `&str` cut at an edge-label
610        // boundary, which is itself char-aligned. A leader byte at `rem[0]`
611        // therefore implies `rem.len() >= clen`. We assert this in debug
612        // builds; the SAFETY block below relies on it.
613        debug_assert!(rem.len() >= clen);
614        // SAFETY: see the SAFETY NOTE at the top of this impl; the debug
615        // assertion above and the loop guards make each unchecked index
616        // in-bounds.
617        unsafe {
618            let needle = rem.get_unchecked(..clen);
619            // Walk the (usually length-1, rarely up to 4) run of siblings
620            // sharing this leading byte, comparing full leading-char bytes.
621            let mut lo = idx;
622            while lo > 0 && *slab.get_unchecked(lo - 1) == first {
623                lo -= 1;
624            }
625            let mut i = lo;
626            while i < slab.len() && *slab.get_unchecked(i) == first {
627                let child = *self.children.get_unchecked(start + i);
628                let lbl = self.label_of(child);
629                if lbl.len() >= clen && lbl.get_unchecked(..clen) == needle {
630                    return Some(child);
631                }
632                i += 1;
633            }
634            None
635        }
636    }
637
638    /// Exact lookup.
639    #[must_use]
640    pub fn get(&self, key: &str) -> Option<&T> {
641        let mut node = ROOT;
642        let mut rem = key.as_bytes();
643        loop {
644            if rem.is_empty() {
645                return self.value_of(node);
646            }
647            let child = self.find_child(node, rem)?;
648            let lbl = self.label_of(child);
649            if !rem.starts_with(lbl) {
650                return None;
651            }
652            rem = &rem[lbl.len()..];
653            node = child;
654        }
655    }
656
657    /// Returns `true` when `key` is present in the trie.
658    #[must_use]
659    pub fn contains(&self, key: &str) -> bool {
660        self.get(key).is_some()
661    }
662
663    /// Longest stored key that is a prefix of `input`, with its value.
664    ///
665    /// The returned `&str` is a slice of `input`; its `len()` is the number
666    /// of bytes consumed.
667    #[must_use]
668    pub fn longest_prefix_match<'a>(&self, input: &'a str) -> Option<(&'a str, &T)> {
669        let bytes = input.as_bytes();
670        let mut node = ROOT;
671        let mut consumed = 0usize;
672        // Track the deepest ancestor with a value as `(consumed_bytes, value_ref)`
673        // so we never need to re-`unwrap` a known-Some lookup at the end.
674        let mut best: Option<(usize, &T)> = None;
675        loop {
676            if let Some(v) = self.value_of(node) {
677                best = Some((consumed, v));
678            }
679            let rem = &bytes[consumed..];
680            if rem.is_empty() {
681                break;
682            }
683            let Some(child) = self.find_child(node, rem) else {
684                break;
685            };
686            let lbl = self.label_of(child);
687            if !rem.starts_with(lbl) {
688                break;
689            }
690            consumed += lbl.len();
691            node = child;
692        }
693        best.map(|(n, v)| (&input[..n], v))
694    }
695
696    /// Returns true if any stored key starts with `prefix`.
697    #[must_use]
698    pub fn contains_prefix(&self, prefix: &str) -> bool {
699        match self.descend_to_node(prefix.as_bytes()) {
700            Some(node) => self.value_of(node).is_some() || !self.children_of(node).is_empty(),
701            None => false,
702        }
703    }
704
705    /// Walks down the trie consuming `prefix` and returns the node it lands
706    /// on (which may be one whose edge label extends past `prefix`). Allocates
707    /// nothing. Used by the hot "does anything match?" / "how many match?"
708    /// paths that don't care about reconstructing the prefix string.
709    fn descend_to_node(&self, mut rem: &[u8]) -> Option<NodeId> {
710        let mut node = ROOT;
711        while !rem.is_empty() {
712            let child = self.find_child(node, rem)?;
713            let lbl = self.label_of(child);
714            if rem.len() >= lbl.len() {
715                if !rem.starts_with(lbl) {
716                    return None;
717                }
718                rem = &rem[lbl.len()..];
719                node = child;
720            } else {
721                if !lbl.starts_with(rem) {
722                    return None;
723                }
724                node = child;
725                break;
726            }
727        }
728        Some(node)
729    }
730
731    /// Like [`Self::descend_to_node`] but also returns the bytes from the
732    /// trie root to the landing node — needed by [`Self::subtrie`] which
733    /// exposes them as `common_prefix`.
734    fn descend_to_prefix(&self, mut rem: &[u8]) -> Option<(NodeId, Vec<u8>)> {
735        let mut node = ROOT;
736        let mut path: Vec<u8> = Vec::with_capacity(rem.len());
737        while !rem.is_empty() {
738            let child = self.find_child(node, rem)?;
739            let lbl = self.label_of(child);
740            if rem.len() >= lbl.len() {
741                if !rem.starts_with(lbl) {
742                    return None;
743                }
744                path.extend_from_slice(lbl);
745                rem = &rem[lbl.len()..];
746                node = child;
747            } else {
748                if !lbl.starts_with(rem) {
749                    return None;
750                }
751                path.extend_from_slice(lbl);
752                node = child;
753                break;
754            }
755        }
756        Some((node, path))
757    }
758
759    /// Iterator over all `(key, value)` pairs in alphabetical order.
760    ///
761    /// Each item allocates a fresh `String` for the key. For hot loops
762    /// prefer [`Self::for_each`], which reuses an internal buffer.
763    #[must_use]
764    pub fn iter(&self) -> Iter<'_, T> {
765        Iter::new(self, ROOT, Vec::new())
766    }
767
768    /// Visit every `(key, value)` pair in alphabetical order without
769    /// allocating per match.
770    pub fn for_each(&self, mut f: impl FnMut(&str, &T)) {
771        let mut buf = Vec::new();
772        for_each_descendants(self, ROOT, &mut buf, &mut f);
773    }
774
775    /// View of the entries whose key starts with `prefix`.
776    ///
777    /// Returns `None` if no entry has that prefix. Prefer this when you need
778    /// to ask multiple questions about the same prefix.
779    #[must_use]
780    pub fn subtrie<'a>(&'a self, prefix: &str) -> Option<SubTrie<'a, T>> {
781        let (mut node, mut path) = self.descend_to_prefix(prefix.as_bytes())?;
782        if self.value_of(node).is_none() && self.children_of(node).is_empty() {
783            return None;
784        }
785        // Extend the shared prefix through any unambiguous (single-child,
786        // no-value) passthrough chain.
787        loop {
788            let kids = self.children_of(node);
789            if self.value_of(node).is_none() && kids.len() == 1 {
790                let child = kids[0];
791                path.extend_from_slice(self.label_of(child));
792                node = child;
793            } else {
794                break;
795            }
796        }
797        Some(SubTrie {
798            trie: self,
799            node,
800            query_len: prefix.len(),
801            common_prefix: path,
802        })
803    }
804
805    /// All `(key, value)` pairs whose key starts with `prefix`.
806    ///
807    /// Allocates a `Vec` and a `String` per match; for hot paths prefer
808    /// [`Self::for_each_completion`].
809    #[must_use]
810    pub fn completions<'a>(&'a self, prefix: &str) -> Vec<(String, &'a T)> {
811        match self.subtrie(prefix) {
812            Some(sub) => sub.into_iter().collect(),
813            None => Vec::new(),
814        }
815    }
816
817    /// Number of entries whose key starts with `prefix`. Allocation-free.
818    #[must_use]
819    pub fn count_completions(&self, prefix: &str) -> usize {
820        match self.descend_to_node(prefix.as_bytes()) {
821            Some(node) => count_values(self, node),
822            None => 0,
823        }
824    }
825
826    /// Longest string `s` such that every key matching `prefix` also starts
827    /// with `s`. Always `s.starts_with(prefix)`; may extend past `prefix`
828    /// when only one branch is reachable. `None` if no entries match.
829    ///
830    /// Allocates exactly one `String` (the returned value).
831    #[must_use]
832    pub fn completion_prefix(&self, prefix: &str) -> Option<String> {
833        let mut rem = prefix.as_bytes();
834        let mut node = ROOT;
835        let mut buf: Vec<u8> = Vec::with_capacity(rem.len());
836        // Inline descent so we collect bytes into a single buffer that becomes
837        // the returned String -- no intermediate `SubTrie` allocation.
838        while !rem.is_empty() {
839            let child = self.find_child(node, rem)?;
840            let lbl = self.label_of(child);
841            if rem.len() >= lbl.len() {
842                if !rem.starts_with(lbl) {
843                    return None;
844                }
845                buf.extend_from_slice(lbl);
846                rem = &rem[lbl.len()..];
847                node = child;
848            } else {
849                if !lbl.starts_with(rem) {
850                    return None;
851                }
852                buf.extend_from_slice(lbl);
853                node = child;
854                break;
855            }
856        }
857        if self.value_of(node).is_none() && self.children_of(node).is_empty() {
858            return None;
859        }
860        // Extend through unambiguous passthrough chains.
861        while self.value_of(node).is_none() && self.children_of(node).len() == 1 {
862            let child = self.children_of(node)[0];
863            buf.extend_from_slice(self.label_of(child));
864            node = child;
865        }
866        // SAFETY: `buf` is a concatenation of char-aligned edge labels, all valid UTF-8.
867        Some(unsafe { utf8_string_unchecked(buf) })
868    }
869
870    /// Visit every completion of `prefix` without allocating per match.
871    pub fn for_each_completion(&self, prefix: &str, mut f: impl FnMut(&str, &T)) {
872        if let Some(sub) = self.subtrie(prefix) {
873            sub.for_each(&mut f);
874        }
875    }
876}
877
878impl<T: fmt::Debug> fmt::Debug for CommandTrie<T> {
879    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
880        // `child_first_bytes` is a derived lookup index over `children`;
881        // intentionally omitted from Debug output.
882        f.debug_struct("CommandTrie")
883            .field("len", &self.len)
884            .field("nodes", &self.nodes.len())
885            .field("labels_bytes", &self.labels.len())
886            .field("children_edges", &self.children.len())
887            .finish_non_exhaustive()
888    }
889}
890
891impl<T> Default for CommandTrie<T> {
892    /// An empty, frozen trie (equivalent to `CommandTrieBuilder::new().build()`).
893    fn default() -> Self {
894        CommandTrieBuilder::new().build()
895    }
896}
897
898impl<'a, T> IntoIterator for &'a CommandTrie<T> {
899    type Item = (String, &'a T);
900    type IntoIter = Iter<'a, T>;
901    fn into_iter(self) -> Self::IntoIter {
902        self.iter()
903    }
904}
905
906// ============================================================
907// SubTrie view
908// ============================================================
909
910/// View over the subset of a [`CommandTrie`] whose keys share a common prefix.
911///
912/// Construct via [`CommandTrie::subtrie`]. Guaranteed non-empty.
913#[derive(Clone)]
914pub struct SubTrie<'a, T> {
915    trie: &'a CommandTrie<T>,
916    /// Deepest node such that every entry in this view lives in its subtrie.
917    node: NodeId,
918    /// Length in bytes of the original `prefix` argument to [`CommandTrie::subtrie`].
919    /// Always `<= common_prefix.len()`. Used by [`SubTrie::extension`].
920    query_len: usize,
921    /// Bytes from the trie root to `node`; also the longest common prefix
922    /// shared by every entry in this view.
923    common_prefix: Vec<u8>,
924}
925
926impl<'a, T> SubTrie<'a, T> {
927    /// Longest prefix shared by every entry in the view.
928    #[must_use]
929    pub fn common_prefix(&self) -> &str {
930        // SAFETY: `common_prefix` is a concatenation of char-aligned edge labels.
931        unsafe { utf8_str_unchecked(&self.common_prefix) }
932    }
933
934    /// Bytes between the originally queried prefix and [`Self::common_prefix`].
935    ///
936    /// This is exactly what a fish-style line editor should splice into the
937    /// buffer on TAB: the unambiguous auto-extension implied by what the user
938    /// has typed so far. Empty when the typed prefix is already at a branch
939    /// point (caller should then display the menu of completions).
940    #[must_use]
941    pub fn extension(&self) -> &str {
942        // SAFETY: `self.query_len` is the byte length of the originally queried
943        // `&str`, a char boundary; `common_prefix` is valid UTF-8 in full, so
944        // the suffix is also valid UTF-8.
945        unsafe { utf8_str_unchecked(&self.common_prefix[self.query_len..]) }
946    }
947
948    /// `true` when exactly one entry matches — the caller can commit it and
949    /// stop prompting. O(1).
950    #[must_use]
951    pub fn is_unique(&self) -> bool {
952        self.trie.value_of(self.node).is_some() && self.trie.children_of(self.node).is_empty()
953    }
954
955    /// Value at this view's deepest node, when that node itself holds a value
956    /// (i.e. [`Self::common_prefix`] is itself a stored key). Returns `None`
957    /// for a pure branch-point view.
958    #[must_use]
959    pub fn value(&self) -> Option<&'a T> {
960        self.trie.value_of(self.node)
961    }
962
963    /// The single matching value when this view contains exactly one entry,
964    /// else `None`. Combines [`Self::is_unique`] and [`Self::value`]; spares
965    /// the caller a follow-up `trie.get(sub.common_prefix())`.
966    #[must_use]
967    pub fn unique_value(&self) -> Option<&'a T> {
968        if self.is_unique() {
969            self.value()
970        } else {
971            None
972        }
973    }
974
975    /// Number of entries in the view. Walks the subtrie.
976    #[must_use]
977    pub fn len(&self) -> usize {
978        count_values(self.trie, self.node)
979    }
980
981    /// Always `false` — a `SubTrie` only exists when at least one entry matches.
982    #[must_use]
983    pub fn is_empty(&self) -> bool {
984        false
985    }
986
987    /// Iterator over `(key, value)` pairs in this view, alphabetically.
988    ///
989    /// Clones `common_prefix` into the iterator's buffer. If you only walk
990    /// the view once, prefer `subtrie.into_iter()`, which moves the buffer
991    /// in instead.
992    #[must_use]
993    pub fn iter(&self) -> Iter<'a, T> {
994        Iter::new(self.trie, self.node, self.common_prefix.clone())
995    }
996
997    /// Visit every entry in the view without allocating per match.
998    pub fn for_each(&self, mut f: impl FnMut(&str, &T)) {
999        let mut buf = self.common_prefix.clone();
1000        // The starting node's label is already in `common_prefix`, so emit
1001        // its value here and recurse into children directly.
1002        if let Some(v) = self.trie.value_of(self.node) {
1003            // SAFETY: `buf` clones `common_prefix`, all char-aligned UTF-8.
1004            f(unsafe { utf8_str_unchecked(&buf) }, v);
1005        }
1006        for &child in self.trie.children_of(self.node) {
1007            for_each_descendants(self.trie, child, &mut buf, &mut f);
1008        }
1009    }
1010}
1011
1012impl<'a, T> IntoIterator for &SubTrie<'a, T> {
1013    type Item = (String, &'a T);
1014    type IntoIter = Iter<'a, T>;
1015    fn into_iter(self) -> Self::IntoIter {
1016        self.iter()
1017    }
1018}
1019
1020impl<'a, T> IntoIterator for SubTrie<'a, T> {
1021    type Item = (String, &'a T);
1022    type IntoIter = Iter<'a, T>;
1023    /// Moves `common_prefix` into the iterator's buffer, avoiding the clone
1024    /// that `(&subtrie).into_iter()` would perform.
1025    fn into_iter(self) -> Self::IntoIter {
1026        Iter::new(self.trie, self.node, self.common_prefix)
1027    }
1028}
1029
1030impl<T> fmt::Debug for SubTrie<'_, T> {
1031    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1032        f.debug_struct("SubTrie")
1033            .field("common_prefix", &self.common_prefix())
1034            .field("is_unique", &self.is_unique())
1035            .finish()
1036    }
1037}
1038
1039// ============================================================
1040// Iterator
1041// ============================================================
1042
1043/// Iterator yielding `(key, value)` pairs from a [`CommandTrie`] or [`SubTrie`].
1044///
1045/// Implements [`ExactSizeIterator`]: the remaining count is computed once at
1046/// construction (one DFS over the starting subtrie) and decremented per item,
1047/// so `collect::<Vec<_>>()` reserves the exact capacity up front.
1048pub struct Iter<'a, T> {
1049    trie: &'a CommandTrie<T>,
1050    stack: Vec<Frame>,
1051    path: Vec<u8>,
1052    /// On the first call, emit the starting node's own value (if any).
1053    /// Its label is already represented in `path`, so it can't be handled by
1054    /// the normal `Frame::Enter` step.
1055    pending_root: Option<NodeId>,
1056    /// Exact number of `(key, value)` pairs still to be yielded.
1057    remaining: usize,
1058}
1059
1060enum Frame {
1061    /// Push the node's label onto `path`, then emit its value (if any), then
1062    /// schedule its children.
1063    Enter(NodeId),
1064    /// Pop `usize` bytes from `path`. Width matches `FrozenNode::label_len`;
1065    /// see the note there about the `u16` size cap.
1066    Exit(u16),
1067}
1068
1069impl<'a, T> Iter<'a, T> {
1070    fn new(trie: &'a CommandTrie<T>, root: NodeId, initial_path: Vec<u8>) -> Self {
1071        let mut stack = Vec::new();
1072        // Push children in reverse so the first child pops first → left-to-right.
1073        let kids = trie.children_of(root);
1074        for &child in kids.iter().rev() {
1075            stack.push(Frame::Enter(child));
1076        }
1077        let pending_root = if trie.value_of(root).is_some() {
1078            Some(root)
1079        } else {
1080            None
1081        };
1082        let remaining = count_values(trie, root);
1083        Self {
1084            trie,
1085            stack,
1086            path: initial_path,
1087            pending_root,
1088            remaining,
1089        }
1090    }
1091}
1092
1093impl<'a, T> Iterator for Iter<'a, T> {
1094    type Item = (String, &'a T);
1095
1096    fn next(&mut self) -> Option<Self::Item> {
1097        if let Some(id) = self.pending_root.take() {
1098            let v = self.trie.value_of(id).expect("pending_root has a value");
1099            self.remaining -= 1;
1100            // SAFETY: `path` holds the starting node's bytes, a char-aligned
1101            // concatenation of edge labels — valid UTF-8.
1102            return Some((unsafe { utf8_string_unchecked(self.path.clone()) }, v));
1103        }
1104        while let Some(frame) = self.stack.pop() {
1105            match frame {
1106                Frame::Exit(n) => {
1107                    let new_len = self.path.len() - n as usize;
1108                    self.path.truncate(new_len);
1109                }
1110                Frame::Enter(node) => {
1111                    let lbl = self.trie.label_of(node);
1112                    self.path.extend_from_slice(lbl);
1113                    // SAFETY of cast: `lbl` came from `FrozenNode::label_len: u16`,
1114                    // so its length always fits in `u16`.
1115                    self.stack
1116                        .push(Frame::Exit(u16::try_from(lbl.len()).unwrap()));
1117                    for &child in self.trie.children_of(node).iter().rev() {
1118                        self.stack.push(Frame::Enter(child));
1119                    }
1120                    if let Some(v) = self.trie.value_of(node) {
1121                        self.remaining -= 1;
1122                        // SAFETY: `path` is the concatenation of char-aligned edge labels — valid UTF-8.
1123                        return Some((unsafe { utf8_string_unchecked(self.path.clone()) }, v));
1124                    }
1125                }
1126            }
1127        }
1128        None
1129    }
1130
1131    #[inline]
1132    fn size_hint(&self) -> (usize, Option<usize>) {
1133        (self.remaining, Some(self.remaining))
1134    }
1135}
1136
1137impl<T> ExactSizeIterator for Iter<'_, T> {
1138    #[inline]
1139    fn len(&self) -> usize {
1140        self.remaining
1141    }
1142}
1143
1144impl<T> FusedIterator for Iter<'_, T> {}
1145
1146impl<T> fmt::Debug for Iter<'_, T> {
1147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1148        f.debug_struct("Iter")
1149            .field("remaining_frames", &self.stack.len())
1150            .finish()
1151    }
1152}
1153
1154// ============================================================
1155// Internal walkers
1156// ============================================================
1157
1158fn for_each_descendants<T>(
1159    trie: &CommandTrie<T>,
1160    node: NodeId,
1161    buf: &mut Vec<u8>,
1162    f: &mut impl FnMut(&str, &T),
1163) {
1164    let prev = buf.len();
1165    buf.extend_from_slice(trie.label_of(node));
1166    if let Some(v) = trie.value_of(node) {
1167        // SAFETY: `buf` is the concatenation of char-aligned edge labels — valid UTF-8.
1168        f(unsafe { utf8_str_unchecked(buf) }, v);
1169    }
1170    for &child in trie.children_of(node) {
1171        for_each_descendants(trie, child, buf, f);
1172    }
1173    buf.truncate(prev);
1174}
1175
1176fn count_values<T>(trie: &CommandTrie<T>, node: NodeId) -> usize {
1177    let mut n = usize::from(trie.value_of(node).is_some());
1178    for &child in trie.children_of(node) {
1179        n += count_values(trie, child);
1180    }
1181    n
1182}
1183
1184// ============================================================
1185// Tests
1186// ============================================================
1187
1188#[cfg(test)]
1189mod tests {
1190    use super::*;
1191    use alloc::format;
1192    use alloc::string::ToString;
1193
1194    /// Convenience for tests that don't care about the build step.
1195    fn build_from<'a, I: IntoIterator<Item = (&'a str, i32)>>(items: I) -> CommandTrie<i32> {
1196        let mut b = CommandTrieBuilder::new();
1197        for (k, v) in items {
1198            b.insert(k, v);
1199        }
1200        b.build()
1201    }
1202
1203    // -------- Builder behavior --------
1204
1205    #[test]
1206    fn builder_insert_overwrite_remove() {
1207        let mut b = CommandTrieBuilder::new();
1208        assert_eq!(b.insert("commit", 1), None);
1209        assert_eq!(b.insert("commit", 2), Some(1));
1210        assert_eq!(b.get("commit"), Some(&2));
1211        assert!(b.contains("commit"));
1212        assert_eq!(b.remove("commit"), Some(2));
1213        assert_eq!(b.remove("commit"), None);
1214        assert!(b.is_empty());
1215    }
1216
1217    #[test]
1218    fn builder_remove_prunes_and_merges() {
1219        let mut b = CommandTrieBuilder::new();
1220        b.insert("command", 1);
1221        b.insert("commit", 2);
1222        b.insert("comm", 3);
1223        assert_eq!(b.remove("comm"), Some(3));
1224        assert_eq!(b.remove("commit"), Some(2));
1225        assert_eq!(b.get("command"), Some(&1));
1226        assert_eq!(b.len(), 1);
1227    }
1228
1229    #[test]
1230    fn builder_from_iter_and_extend() {
1231        let b: CommandTrieBuilder<i32> = [("a", 1), ("ab", 2), ("abc", 3)].into_iter().collect();
1232        assert_eq!(b.len(), 3);
1233        assert_eq!(b.get("ab"), Some(&2));
1234    }
1235
1236    #[test]
1237    fn builder_get_diverges_mid_edge() {
1238        // First byte matches a child but the label diverges: covers
1239        // the `!rem.starts_with(&child.label)` path in BuilderNode::get.
1240        let mut b = CommandTrieBuilder::new();
1241        b.insert("command", 1);
1242        assert_eq!(b.get("comx"), None);
1243        assert_eq!(b.get("c"), None);
1244    }
1245
1246    // -------- Frozen trie: basic queries --------
1247
1248    #[test]
1249    fn frozen_get() {
1250        let t = build_from([("commit", 1), ("command", 2)]);
1251        assert_eq!(t.get("commit"), Some(&1));
1252        assert_eq!(t.get("command"), Some(&2));
1253        assert_eq!(t.get("comm"), None);
1254        assert_eq!(t.get("commits"), None);
1255        assert_eq!(t.get(""), None);
1256        assert!(t.contains("commit"));
1257        assert!(!t.contains("comm"));
1258    }
1259
1260    #[test]
1261    fn frozen_query_accepts_non_ascii_keys() {
1262        // Read methods accept `&str` and must remain safe (no panic, no UB)
1263        // when queried with multi-byte UTF-8 input against an ASCII-only
1264        // trie. The queries below simply cannot match any stored edge and
1265        // should return the empty answer rather than panicking.
1266        let t = build_from([("commit", 1), ("command", 2), ("config", 3)]);
1267
1268        // Multi-byte first byte: 0xC3 ('é' starts with 0xC3 0xA9) is > 0x7F
1269        // and cannot index any child edge.
1270        assert_eq!(t.get("café"), None);
1271        assert!(!t.contains("café"));
1272
1273        // Non-ASCII appearing after a valid ASCII prefix.
1274        assert_eq!(t.get("commité"), None);
1275        assert_eq!(t.get("comméand"), None);
1276
1277        // Pure non-ASCII / emoji.
1278        assert_eq!(t.get("🦀"), None);
1279        assert_eq!(t.get("π"), None);
1280
1281        // Prefix-flavored queries behave the same: no match, no panic.
1282        assert!(!t.contains_prefix("café"));
1283        assert_eq!(t.count_completions("café"), 0);
1284        assert!(t.completions("café").is_empty());
1285        assert_eq!(t.completion_prefix("café"), None);
1286        assert!(t.subtrie("café").is_none());
1287        assert_eq!(t.longest_prefix_match("café"), None);
1288
1289        // Non-ASCII tail after a real command still finds the command via
1290        // longest-prefix match -- the ASCII prefix is consumed cleanly and
1291        // the multi-byte suffix simply fails to extend any edge.
1292        assert_eq!(t.longest_prefix_match("commit é"), Some(("commit", &1)),);
1293    }
1294
1295    #[test]
1296    fn frozen_empty_trie() {
1297        let t = CommandTrieBuilder::<i32>::new().build();
1298        assert_eq!(t.len(), 0);
1299        assert!(t.is_empty());
1300        assert_eq!(t.get(""), None);
1301        assert_eq!(t.get("anything"), None);
1302        assert!(!t.contains_prefix(""));
1303        assert!(t.subtrie("").is_none());
1304        assert_eq!(t.completion_prefix(""), None);
1305        assert_eq!(t.count_completions(""), 0);
1306        assert!(t.completions("").is_empty());
1307        assert_eq!(t.iter().count(), 0);
1308    }
1309
1310    #[test]
1311    fn frozen_longest_prefix_match() {
1312        let t = build_from([("git", 1), ("git-status", 2)]);
1313        assert_eq!(
1314            t.longest_prefix_match("git-status --short"),
1315            Some(("git-status", &2))
1316        );
1317        assert_eq!(t.longest_prefix_match("git foo"), Some(("git", &1)));
1318        assert_eq!(t.longest_prefix_match("git"), Some(("git", &1)));
1319        assert_eq!(t.longest_prefix_match("gi"), None);
1320        assert_eq!(t.longest_prefix_match("zzz"), None);
1321        assert_eq!(t.longest_prefix_match(""), None);
1322        // Returned slice's len() is the consumed byte count.
1323        let (matched, _) = t.longest_prefix_match("git-status xyz").unwrap();
1324        assert_eq!(matched.len(), 10);
1325    }
1326
1327    #[test]
1328    fn frozen_contains_prefix() {
1329        let t = build_from([("commit", 1)]);
1330        assert!(t.contains_prefix(""));
1331        assert!(t.contains_prefix("c"));
1332        assert!(t.contains_prefix("comm"));
1333        assert!(t.contains_prefix("commit"));
1334        assert!(!t.contains_prefix("commits"));
1335        assert!(!t.contains_prefix("d"));
1336    }
1337
1338    // -------- Frozen trie: completions / subtrie --------
1339
1340    #[test]
1341    fn frozen_completions() {
1342        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1343        let mut got = t.completions("comm");
1344        got.sort_by(|a, b| a.0.cmp(&b.0));
1345        assert_eq!(
1346            got,
1347            vec![("command".to_string(), &2), ("commit".to_string(), &1)]
1348        );
1349        assert_eq!(t.completions("").len(), 4);
1350        assert!(t.completions("xyz").is_empty());
1351    }
1352
1353    #[test]
1354    fn frozen_completions_prefix_ends_mid_edge() {
1355        let t = build_from([("command", 1)]);
1356        let got = t.completions("co");
1357        assert_eq!(got, vec![("command".to_string(), &1)]);
1358    }
1359
1360    #[test]
1361    fn frozen_completion_prefix_extends_past_query() {
1362        let t = build_from([("command", 1), ("commit", 2)]);
1363        assert_eq!(t.completion_prefix("c").as_deref(), Some("comm"));
1364
1365        let t = build_from([("command", 1)]);
1366        assert_eq!(t.completion_prefix("").as_deref(), Some("command"));
1367        assert_eq!(t.completion_prefix("c").as_deref(), Some("command"));
1368        assert_eq!(t.completion_prefix("commits"), None);
1369    }
1370
1371    #[test]
1372    fn frozen_subtrie_views() {
1373        let t = build_from([("commit", 1), ("command", 2), ("config", 3)]);
1374        let sub = t.subtrie("comm").unwrap();
1375        assert_eq!(sub.common_prefix(), "comm");
1376        assert_eq!(sub.len(), 2);
1377        assert!(!sub.is_empty());
1378
1379        let mut via_iter: Vec<(String, i32)> = sub.iter().map(|(k, v)| (k, *v)).collect();
1380        via_iter.sort();
1381        let mut via_for_each: Vec<(String, i32)> = Vec::new();
1382        sub.for_each(|k, v| via_for_each.push((k.to_string(), *v)));
1383        via_for_each.sort();
1384        assert_eq!(via_iter, via_for_each);
1385        assert_eq!(
1386            via_iter,
1387            vec![("command".to_string(), 2), ("commit".to_string(), 1)]
1388        );
1389
1390        // by-value IntoIterator
1391        let owned: Vec<_> = sub.into_iter().collect();
1392        assert_eq!(owned.len(), 2);
1393    }
1394
1395    #[test]
1396    fn frozen_subtrie_on_exact_leaf() {
1397        let t = build_from([("commit", 1), ("command", 2)]);
1398        let sub = t.subtrie("commit").unwrap();
1399        assert_eq!(sub.common_prefix(), "commit");
1400        assert_eq!(sub.len(), 1);
1401    }
1402
1403    #[test]
1404    fn frozen_subtrie_extension_and_is_unique() {
1405        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1406
1407        // Branch point: extension is empty, more than one match.
1408        let sub = t.subtrie("c").unwrap();
1409        assert_eq!(sub.common_prefix(), "c");
1410        assert_eq!(sub.extension(), "");
1411        assert!(!sub.is_unique());
1412
1413        // Passthrough extends the LCP past the typed prefix.
1414        let sub = t.subtrie("co").unwrap();
1415        assert_eq!(sub.common_prefix(), "co");
1416        assert_eq!(sub.extension(), "");
1417        assert!(!sub.is_unique());
1418
1419        let sub = t.subtrie("comma").unwrap();
1420        assert_eq!(sub.common_prefix(), "command");
1421        assert_eq!(sub.extension(), "nd");
1422        assert!(sub.is_unique());
1423
1424        // Typing exactly one full command yields a unique view with no extension.
1425        let sub = t.subtrie("clone").unwrap();
1426        assert_eq!(sub.extension(), "");
1427        assert!(sub.is_unique());
1428
1429        // A value-bearing internal node bounds the LCP at its own key.
1430        let t2 = build_from([("git", 1), ("github", 2)]);
1431        let sub = t2.subtrie("gi").unwrap();
1432        assert_eq!(sub.common_prefix(), "git");
1433        assert_eq!(sub.extension(), "t");
1434        assert!(!sub.is_unique()); // "git" itself plus "github"
1435    }
1436
1437    #[test]
1438    fn frozen_subtrie_value_and_unique_value() {
1439        let t = build_from([("commit", 1), ("command", 2), ("config", 3)]);
1440
1441        // Branch-point view: no value at the deepest node.
1442        let sub = t.subtrie("com").unwrap();
1443        assert_eq!(sub.value(), None);
1444        assert_eq!(sub.unique_value(), None);
1445
1446        // Unique view: value() and unique_value() both return the entry.
1447        let sub = t.subtrie("commi").unwrap();
1448        assert_eq!(sub.common_prefix(), "commit");
1449        assert_eq!(sub.value(), Some(&1));
1450        assert_eq!(sub.unique_value(), Some(&1));
1451
1452        // Value-bearing internal node: value() exposes it; unique_value() does not.
1453        let t2 = build_from([("git", 10), ("github", 20)]);
1454        let sub = t2.subtrie("gi").unwrap();
1455        assert_eq!(sub.common_prefix(), "git");
1456        assert_eq!(sub.value(), Some(&10));
1457        assert_eq!(sub.unique_value(), None);
1458    }
1459
1460    #[test]
1461    fn frozen_iter_alphabetical() {
1462        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1463        let got: Vec<_> = t.iter().map(|(k, v)| (k, *v)).collect();
1464        assert_eq!(
1465            got,
1466            vec![
1467                ("clone".to_string(), 4),
1468                ("command".to_string(), 2),
1469                ("commit".to_string(), 1),
1470                ("config".to_string(), 3),
1471            ]
1472        );
1473
1474        // for-loop sugar via IntoIterator for &CommandTrie
1475        let mut n = 0;
1476        for _ in &t {
1477            n += 1;
1478        }
1479        assert_eq!(n, 4);
1480    }
1481
1482    #[test]
1483    fn frozen_for_each_no_alloc() {
1484        let t = build_from([("a", 1), ("ab", 2), ("abc", 3)]);
1485        let mut got: Vec<(String, i32)> = Vec::new();
1486        t.for_each(|k, v| got.push((k.to_string(), *v)));
1487        got.sort();
1488        assert_eq!(
1489            got,
1490            vec![
1491                ("a".to_string(), 1),
1492                ("ab".to_string(), 2),
1493                ("abc".to_string(), 3),
1494            ]
1495        );
1496    }
1497
1498    #[test]
1499    fn frozen_count_and_for_each_completion() {
1500        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1501        assert_eq!(t.count_completions("c"), 4);
1502        assert_eq!(t.count_completions("comm"), 2);
1503        assert_eq!(t.count_completions("commit"), 1);
1504        assert_eq!(t.count_completions("z"), 0);
1505
1506        let mut got: Vec<String> = Vec::new();
1507        t.for_each_completion("comm", |k, _| got.push(k.to_string()));
1508        got.sort();
1509        assert_eq!(got, vec!["command".to_string(), "commit".to_string()]);
1510    }
1511
1512    // -------- Layout invariants --------
1513
1514    #[test]
1515    fn build_packs_into_four_allocations() {
1516        let t = build_from([
1517            ("add", 0),
1518            ("alias", 1),
1519            ("branch", 2),
1520            ("checkout", 3),
1521            ("cherry-pick", 4),
1522            ("clean", 5),
1523            ("clone", 6),
1524            ("commit", 7),
1525            ("command", 8),
1526            ("config", 9),
1527        ]);
1528        // Sanity: round-trip all keys.
1529        for (i, k) in [
1530            "add",
1531            "alias",
1532            "branch",
1533            "checkout",
1534            "cherry-pick",
1535            "clean",
1536            "clone",
1537            "commit",
1538            "command",
1539            "config",
1540        ]
1541        .iter()
1542        .enumerate()
1543        {
1544            assert_eq!(t.get(k), Some(&(i as i32)));
1545        }
1546        // `child_first_bytes` is parallel to `children` and reflects the
1547        // first byte of each child's label.
1548        assert_eq!(t.child_first_bytes.len(), t.children.len());
1549        for (i, &child) in t.children.iter().enumerate() {
1550            let lbl0 = t.label_of(child)[0];
1551            assert_eq!(t.child_first_bytes[i], lbl0);
1552        }
1553        // Children of any node form a contiguous, sorted-by-first-byte slice.
1554        for id in 0..t.nodes.len() as NodeId {
1555            let kids = t.children_of(id);
1556            for w in kids.windows(2) {
1557                let a = t.label_of(w[0])[0];
1558                let b = t.label_of(w[1])[0];
1559                assert!(a < b, "siblings not sorted at node {id}: {a} >= {b}");
1560            }
1561        }
1562    }
1563
1564    // -------- Compile-time guarantees --------
1565
1566    const _: fn() = || {
1567        fn assert_send_sync<X: Send + Sync>() {}
1568        assert_send_sync::<CommandTrieBuilder<i32>>();
1569        assert_send_sync::<CommandTrie<i32>>();
1570        assert_send_sync::<SubTrie<'static, i32>>();
1571        assert_send_sync::<Iter<'static, i32>>();
1572    };
1573
1574    // -------- Defaults, Debug, misc trait impls --------
1575
1576    #[test]
1577    fn builder_default_and_clear() {
1578        let mut b: CommandTrieBuilder<i32> = CommandTrieBuilder::default();
1579        assert!(b.is_empty());
1580        b.insert("a", 1);
1581        b.insert("ab", 2);
1582        assert_eq!(b.len(), 2);
1583        b.clear();
1584        assert!(b.is_empty());
1585        assert_eq!(b.get("a"), None);
1586    }
1587
1588    #[test]
1589    fn trie_default_is_empty() {
1590        let t: CommandTrie<i32> = CommandTrie::default();
1591        assert!(t.is_empty());
1592        assert_eq!(t.get("anything"), None);
1593        assert_eq!(t.iter().count(), 0);
1594    }
1595
1596    #[test]
1597    fn debug_impls_render() {
1598        let mut b = CommandTrieBuilder::new();
1599        b.insert("commit", 1);
1600        b.insert("command", 2);
1601        // BuilderNode debug runs via the builder's children field.
1602        let s = format!("{b:?}");
1603        assert!(s.contains("CommandTrieBuilder"));
1604        assert!(s.contains("BuilderNode"));
1605
1606        let t = b.build();
1607        let s = format!("{t:?}");
1608        assert!(s.contains("CommandTrie"));
1609        assert!(s.contains("len"));
1610
1611        let sub = t.subtrie("comm").unwrap();
1612        let s = format!("{sub:?}");
1613        assert!(s.contains("SubTrie"));
1614        assert!(s.contains("comm"));
1615
1616        let it = t.iter();
1617        let s = format!("{it:?}");
1618        assert!(s.contains("Iter"));
1619    }
1620
1621    #[test]
1622    fn subtrie_ref_into_iter() {
1623        let t = build_from([("commit", 1), ("command", 2)]);
1624        let sub = t.subtrie("comm").unwrap();
1625        // Exercise IntoIterator for &SubTrie (clones common_prefix).
1626        let from_ref: Vec<_> = (&sub).into_iter().collect();
1627        assert_eq!(from_ref.len(), 2);
1628        // sub is still usable afterwards.
1629        assert_eq!(sub.len(), 2);
1630    }
1631
1632    #[test]
1633    fn subtrie_for_each_emits_starting_node_value() {
1634        // The starting node itself carries a value: covers the
1635        // "emit value at root of view" branch in SubTrie::for_each.
1636        let t = build_from([("git", 10), ("github", 20)]);
1637        let sub = t.subtrie("git").unwrap();
1638        let mut got: Vec<(String, i32)> = Vec::new();
1639        sub.for_each(|k, v| got.push((k.to_string(), *v)));
1640        got.sort();
1641        assert_eq!(
1642            got,
1643            vec![("git".to_string(), 10), ("github".to_string(), 20)]
1644        );
1645    }
1646
1647    #[test]
1648    fn insert_32k_entries_no_panic() {
1649        // Design constraint: the frozen trie's `u16`-indexed slabs cap the
1650        // worst-case node count at `u16::MAX = 65,535`, which corresponds to
1651        // roughly `32,767` entries (worst case: `2N + 1` nodes per `N` entries).
1652        //
1653        // Build a dense corpus at that size using realistic command-name-style
1654        // keys (think `print -rl -- ${(k)commands}` on a busy `$PATH`): a few
1655        // dozen tool prefixes, a vocabulary of subcommand stems, then a
1656        // numeric suffix to fan out to the target count. Average key length
1657        // is ~16 bytes, which is closer to real CLI completion sets than
1658        // the original 3-byte base-62 indices.
1659        const PREFIXES: &[&str] = &[
1660            "git-",
1661            "cargo-",
1662            "docker-",
1663            "kubectl-",
1664            "npm-",
1665            "pip-",
1666            "rustup-",
1667            "systemctl-",
1668            "journalctl-",
1669            "ip-",
1670            "nmcli-",
1671            "brew-",
1672        ];
1673        const STEMS: &[&str] = &[
1674            "list", "get", "set", "show", "describe", "create", "delete", "update", "apply",
1675            "watch", "rollout", "exec", "logs", "status", "info", "config", "scale", "patch",
1676            "expose", "annotate",
1677        ];
1678        // 12 prefixes × 20 stems × 134 buckets = 32,160 unique keys.
1679        const BUCKETS: u32 = 134;
1680        const N: u32 = PREFIXES.len() as u32 * STEMS.len() as u32 * BUCKETS;
1681        const _: () = assert!(N >= 32_000, "test corpus must hit the documented ~32k cap");
1682
1683        fn key(n: u32, buf: &mut String) {
1684            use core::fmt::Write;
1685            buf.clear();
1686            let p = (n as usize) % PREFIXES.len();
1687            let s = ((n as usize) / PREFIXES.len()) % STEMS.len();
1688            let bucket = (n as usize) / (PREFIXES.len() * STEMS.len());
1689            buf.push_str(PREFIXES[p]);
1690            buf.push_str(STEMS[s]);
1691            buf.push('-');
1692            write!(buf, "{bucket:03}").unwrap();
1693        }
1694
1695        let mut b: CommandTrieBuilder<u32> = CommandTrieBuilder::new();
1696        let mut buf = String::new();
1697        for i in 0..N {
1698            key(i, &mut buf);
1699            b.insert(&buf, i);
1700        }
1701        assert_eq!(b.len(), N as usize);
1702
1703        let t = b.build();
1704        assert_eq!(t.len(), N as usize);
1705
1706        // Spot-check a handful of lookups across the range.
1707        for &i in &[0u32, 1, 11, 12, 239, 240, 1023, 12345, N / 2, N - 1] {
1708            key(i, &mut buf);
1709            assert_eq!(t.get(&buf), Some(&i), "lookup failed for {i}");
1710        }
1711    }
1712
1713    #[test]
1714    fn iter_is_fused() {
1715        fn assert_fused<I: FusedIterator>(_: &I) {}
1716        let t = CommandTrieBuilder::<i32>::new().build();
1717        let it = t.iter();
1718        assert_fused(&it);
1719    }
1720
1721    #[test]
1722    fn iter_is_exact_size() {
1723        fn assert_exact<I: ExactSizeIterator>(_: &I) {}
1724
1725        let t = build_from([("commit", 1), ("command", 2), ("config", 3), ("clone", 4)]);
1726
1727        let it = t.iter();
1728        assert_exact(&it);
1729        assert_eq!(it.len(), 4);
1730        assert_eq!(it.size_hint(), (4, Some(4)));
1731
1732        // `len` decreases monotonically and reaches 0 at exhaustion.
1733        let mut it = t.iter();
1734        for expected in (0..4).rev() {
1735            assert!(it.next().is_some());
1736            assert_eq!(it.len(), expected);
1737            assert_eq!(it.size_hint(), (expected, Some(expected)));
1738        }
1739        assert!(it.next().is_none());
1740        assert_eq!(it.len(), 0);
1741
1742        // SubTrie iterators report the subtrie's exact size, including a
1743        // value at the starting node.
1744        let t2 = build_from([("git", 10), ("github", 20), ("gitlab", 30)]);
1745        let sub = t2.subtrie("git").unwrap();
1746        let it = sub.iter();
1747        assert_eq!(it.len(), 3);
1748        let collected: Vec<_> = sub.into_iter().collect();
1749        assert_eq!(collected.len(), 3);
1750
1751        // Empty trie: len() == 0 up front.
1752        let empty: CommandTrie<i32> = CommandTrieBuilder::new().build();
1753        assert_eq!(empty.iter().len(), 0);
1754    }
1755
1756    // -------- Fuzz vs BTreeMap (rebuild after every op) --------
1757
1758    #[test]
1759    fn fuzz_against_btreemap() {
1760        use alloc::collections::BTreeMap;
1761
1762        let mut state: u64 = 0x_dead_beef_cafe_f00d;
1763        let mut rand = || {
1764            state = state
1765                .wrapping_mul(6364136223846793005)
1766                .wrapping_add(1442695040888963407);
1767            state
1768        };
1769
1770        let keys = [
1771            "", "a", "ab", "abc", "abd", "abcd", "abce", "b", "ba", "bar", "baz", "c", "co", "com",
1772            "comm", "command", "commit", "config", "x", "xy", "xyz",
1773        ];
1774        let probe_prefixes = [
1775            "", "a", "ab", "abc", "abz", "b", "c", "comm", "com", "x", "z",
1776        ];
1777
1778        let mut builder: CommandTrieBuilder<i32> = CommandTrieBuilder::new();
1779        let mut model: BTreeMap<String, i32> = BTreeMap::new();
1780
1781        for op in 0..500 {
1782            let r = rand();
1783            let key = keys[(r as usize) % keys.len()];
1784
1785            if (r >> 32) % 4 == 0 {
1786                let b_prev = builder.remove(key);
1787                let m_prev = model.remove(key);
1788                assert_eq!(b_prev, m_prev, "remove({key:?}) at op {op}");
1789            } else {
1790                let v = (r >> 8) as i32;
1791                let b_prev = builder.insert(key, v);
1792                let m_prev = model.insert(key.to_string(), v);
1793                assert_eq!(b_prev, m_prev, "insert({key:?}, {v}) at op {op}");
1794            }
1795
1796            assert_eq!(builder.len(), model.len(), "len at op {op}");
1797
1798            // Rebuild and validate the frozen trie's read API.
1799            let trie = builder.clone().build();
1800            assert_eq!(trie.len(), model.len());
1801
1802            for k in &keys {
1803                assert_eq!(trie.get(k), model.get(*k), "get({k:?}) at op {op}");
1804                assert_eq!(trie.contains(k), model.contains_key(*k));
1805            }
1806
1807            for pfx in &probe_prefixes {
1808                let mut from_trie: Vec<String> =
1809                    trie.completions(pfx).into_iter().map(|(k, _)| k).collect();
1810                from_trie.sort();
1811                let mut from_model: Vec<String> = model
1812                    .keys()
1813                    .filter(|k| k.starts_with(pfx))
1814                    .cloned()
1815                    .collect();
1816                from_model.sort();
1817                assert_eq!(from_trie, from_model, "completions({pfx:?}) at op {op}");
1818
1819                assert_eq!(
1820                    trie.count_completions(pfx),
1821                    from_model.len(),
1822                    "count_completions({pfx:?}) at op {op}"
1823                );
1824
1825                if let Some(cp) = trie.completion_prefix(pfx) {
1826                    assert!(cp.starts_with(pfx));
1827                    for k in &from_model {
1828                        assert!(k.starts_with(&cp));
1829                    }
1830                } else {
1831                    assert!(from_model.is_empty());
1832                }
1833            }
1834
1835            let from_trie: Vec<(String, i32)> = trie.iter().map(|(k, v)| (k, *v)).collect();
1836            let from_model: Vec<(String, i32)> =
1837                model.iter().map(|(k, v)| (k.clone(), *v)).collect();
1838            assert_eq!(from_trie, from_model, "iter at op {op}");
1839        }
1840    }
1841
1842    // ---------------------------------------------------------------------
1843    // UTF-8 tests
1844    //
1845    // Splits in the radix trie are char-aligned, so any concatenation of
1846    // edge labels is valid UTF-8. These tests cover both the easy cases
1847    // (codepoints whose first byte is unique among siblings) and the
1848    // harder case where two siblings share a leading byte (e.g. `'é'` and
1849    // `'ê'`, both starting with `0xC3`), which exercises the
1850    // equal-byte-run walk in `find_child`.
1851    // ---------------------------------------------------------------------
1852
1853    #[test]
1854    fn utf8_basic_insert_get() {
1855        let t = build_from([
1856            ("café", 1),
1857            ("über", 2),
1858            ("naïve", 3),
1859            ("naïveté", 4),
1860            ("🦀", 5),
1861            ("π", 6),
1862        ]);
1863        assert_eq!(t.get("café"), Some(&1));
1864        assert_eq!(t.get("über"), Some(&2));
1865        assert_eq!(t.get("naïve"), Some(&3));
1866        assert_eq!(t.get("naïveté"), Some(&4));
1867        assert_eq!(t.get("🦀"), Some(&5));
1868        assert_eq!(t.get("π"), Some(&6));
1869        assert_eq!(t.get("cafe"), None);
1870        assert_eq!(t.get("naïv"), None);
1871    }
1872
1873    #[test]
1874    fn utf8_shared_first_byte_siblings() {
1875        // 'é' = C3 A9, 'ê' = C3 AA, 'è' = C3 A8 — all share leading byte
1876        // 0xC3, so the frozen find_child must walk the equal-byte run
1877        // rather than trust binary_search alone.
1878        let t = build_from([("éa", 1), ("êb", 2), ("èc", 3), ("ad", 4)]);
1879        assert_eq!(t.get("éa"), Some(&1));
1880        assert_eq!(t.get("êb"), Some(&2));
1881        assert_eq!(t.get("èc"), Some(&3));
1882        assert_eq!(t.get("ad"), Some(&4));
1883        assert_eq!(t.get("éb"), None);
1884        assert_eq!(t.get("ê"), None);
1885        assert!(t.contains_prefix("é"));
1886        assert!(t.contains_prefix("ê"));
1887        assert!(!t.contains_prefix("ë"));
1888    }
1889
1890    #[test]
1891    fn utf8_split_at_shared_codepoint() {
1892        // Both keys start with 'é' (C3 A9), then diverge at the *next*
1893        // char. The builder must place the split at byte 2 (after 'é'),
1894        // not inside it.
1895        let t = build_from([("éa", 1), ("éb", 2)]);
1896        assert_eq!(t.get("éa"), Some(&1));
1897        assert_eq!(t.get("éb"), Some(&2));
1898        let sub = t.subtrie("é").expect("prefix 'é' should exist");
1899        assert_eq!(sub.common_prefix(), "é");
1900        assert_eq!(sub.len(), 2);
1901    }
1902
1903    #[test]
1904    fn utf8_sort_order_matches_btreemap() {
1905        use alloc::collections::BTreeMap;
1906        let pairs: Vec<(&str, i32)> = vec![
1907            ("apple", 1),
1908            ("café", 2),
1909            ("cab", 3),
1910            ("über", 4),
1911            ("ünder", 5),
1912            ("naïve", 6),
1913            ("naive", 7),
1914            ("🦀rust", 8),
1915            ("🦀", 9),
1916            ("π", 10),
1917            ("zoo", 11),
1918        ];
1919        let model: BTreeMap<&str, i32> = pairs.iter().copied().collect();
1920        let t = build_from(pairs.iter().copied());
1921        let from_trie: Vec<(String, i32)> = t.iter().map(|(k, v)| (k, *v)).collect();
1922        let from_model: Vec<(String, i32)> =
1923            model.iter().map(|(k, v)| (k.to_string(), *v)).collect();
1924        assert_eq!(from_trie, from_model);
1925    }
1926
1927    #[test]
1928    fn utf8_completion_prefix_extends_through_char() {
1929        // The only key starting with "n" is "naïveté", so completion_prefix
1930        // should extend all the way through the multi-byte 'ï'.
1931        let t = build_from([("naïveté", 1), ("zzz", 2)]);
1932        assert_eq!(t.completion_prefix("n").as_deref(), Some("naïveté"));
1933        // Querying with a prefix that ends mid-char is impossible for a
1934        // `&str` input, but querying with a prefix that ends *exactly* on
1935        // a char boundary inside the key must work:
1936        assert_eq!(t.completion_prefix("naï").as_deref(), Some("naïveté"));
1937    }
1938
1939    #[test]
1940    fn utf8_longest_prefix_match() {
1941        let t = build_from([("café", 1), ("ca", 2)]);
1942        assert_eq!(t.longest_prefix_match("café au lait"), Some(("café", &1)));
1943        assert_eq!(t.longest_prefix_match("cab"), Some(("ca", &2)));
1944        // "caf" sits between "ca" and "café"; only "ca" is a stored key.
1945        assert_eq!(t.longest_prefix_match("caf"), Some(("ca", &2)));
1946    }
1947
1948    #[test]
1949    fn utf8_remove_and_reinsert() {
1950        let mut b = CommandTrieBuilder::new();
1951        b.insert("café", 1);
1952        b.insert("naïve", 2);
1953        assert_eq!(b.remove("café"), Some(1));
1954        assert_eq!(b.get("café"), None);
1955        assert_eq!(b.get("naïve"), Some(&2));
1956        b.insert("café", 11);
1957        let t = b.build();
1958        assert_eq!(t.get("café"), Some(&11));
1959        assert_eq!(t.get("naïve"), Some(&2));
1960    }
1961
1962    #[test]
1963    fn utf8_iter_roundtrip_emoji_heavy() {
1964        let keys = ["🦀", "🦀rust", "🦀🦀", "🔥", "🔥fire", "ascii"];
1965        let mut b = CommandTrieBuilder::new();
1966        for (i, k) in keys.iter().enumerate() {
1967            b.insert(k, i as i32);
1968        }
1969        let t = b.build();
1970        for (i, k) in keys.iter().enumerate() {
1971            assert_eq!(t.get(k), Some(&(i as i32)), "lookup {k}");
1972        }
1973        // round-trip
1974        let collected: Vec<String> = t.iter().map(|(k, _)| k).collect();
1975        let mut expected: Vec<String> = keys.iter().map(|s| s.to_string()).collect();
1976        expected.sort();
1977        assert_eq!(collected, expected);
1978    }
1979
1980    #[test]
1981    fn utf8_three_byte_char_paths() {
1982        // '中' = E4 B8 AD — exercises the 3-byte arm of `utf8_char_len`
1983        // through both the builder cold path and the frozen multi-byte
1984        // lookup path.
1985        let mut b = CommandTrieBuilder::new();
1986        b.insert("中", 1);
1987        b.insert("中a", 2);
1988        b.insert("中b", 3);
1989        b.insert("間", 4);
1990        let t = b.build();
1991        assert_eq!(t.get("中"), Some(&1));
1992        assert_eq!(t.get("中a"), Some(&2));
1993        assert_eq!(t.get("中b"), Some(&3));
1994        assert_eq!(t.get("間"), Some(&4));
1995        assert_eq!(t.get("中c"), None);
1996        assert_eq!(t.longest_prefix_match("中ax"), Some(("中a", &2)),);
1997    }
1998
1999    #[test]
2000    fn utf8_lcp_backs_off_into_codepoint_boundary() {
2001        // "Xé" (58 C3 A9) and "Xè" (58 C3 A8) share an ASCII 'X' then
2002        // diverge inside the second byte of a 2-byte codepoint. The byte
2003        // LCP is 2, but the char-aligned `lcp` must back off to 1 so the
2004        // split point lands on a codepoint boundary.
2005        let mut b = CommandTrieBuilder::new();
2006        b.insert("Xé", 1);
2007        b.insert("Xè", 2);
2008        let t = b.build();
2009        assert_eq!(t.get("Xé"), Some(&1));
2010        assert_eq!(t.get("Xè"), Some(&2));
2011        // The shared edge label must be "X", not "X" + a stray leading
2012        // byte of the multi-byte char; iteration order confirms both
2013        // entries are reachable and distinct.
2014        let keys: Vec<String> = t.iter().map(|(k, _)| k).collect();
2015        assert_eq!(keys, vec!["Xè".to_string(), "Xé".to_string()]);
2016    }
2017}