Skip to main content

smoothutf8/
lib.rs

1//! Portable, formally verified UTF-8 validation.
2//!
3//! This crate provides two entry points for validating that a byte sequence is
4//! well-formed UTF-8:
5//!
6//! - [`verify`] takes a plain `&[u8]` and is fully safe. It uses an 8-byte
7//!   SWAR (SIMD-within-a-register) ASCII fast path, and handles the final
8//!   partial chunk with overlapping in-bounds loads so that it never reads
9//!   past the slice.
10//!
11//! - [`SlackBuf`] is the safe wrapper for zero-copy parsers that maintain at
12//!   least [`SLACK`] readable bytes after every logical field (the "eps-copy"
13//!   pattern used by hyperpb and UPB). The padding invariant is established
14//!   once at the buffer level — typically via `SlackBuf::new_add_slack`
15//!   (with `feature = "alloc"`) or [`SlackBuf::new_embedded_slack`] — and
16//!   per-field
17//!   [`verify`](SlackBuf::verify) / [`to_str`](SlackBuf::to_str) /
18//!   [`le_u32`](SlackBuf::le_u32) calls are then safe and skip the per-string
19//!   tail-handling cost.
20//!
21//!   [`verify_with_slack`] is the underlying `unsafe` per-call entry point;
22//!   prefer [`SlackBuf`] for new code.
23//!
24//! Both share the same multi-byte path: a shift-encoded DFA (the
25//! Vognsen/Langdale encoding of Höhrmann's UTF-8 automaton).
26//!
27//! The crate is `#![no_std]` and zero-dependency by default.
28//!
29//! # Features
30//!
31//! - **`alloc`** — adds `SlackBuf::new_add_slack`, which appends the padding
32//!   to a `Vec<u8>` itself. Everything else stays no-alloc.
33//! - **`simdutf8`** — delegate inputs ≥ 128 bytes to
34//!   [`simdutf8::basic::from_utf8`](https://docs.rs/simdutf8). Adds one
35//!   dependency. Below the threshold the verified path runs.
36//! - Building with `-C target-cpu=x86-64-v3` (or `native` on a Haswell+
37//!   machine) enables a 32-byte/iteration `movemask` ASCII prefix scan
38//!   (no runtime dispatch; not covered by the proofs below) and BMI2
39//!   `shrx` for the shift-DFA (~+40% on the multibyte path).
40//! - **`verus`** is verification-only (CI); it does not change runtime
41//!   behaviour and is not intended to be combined with `simdutf8`.
42//!
43//! # Verification
44//!
45//! Under `--features verus` (portable 64-bit build, no `simdutf8`/`avx2`),
46//! Verus proves **functional correctness**: [`verify`] and [`verify_with_slack`]
47//! carry `ensures ret == is_valid_utf8(b@)`, where `spec::is_valid_utf8`
48//! is a direct transcription of Unicode §3.9 Table 3-7. Every bit-trick in
49//! the SWAR fast path and the multi-byte decoder is connected to that table
50//! by a `by(bit_vector)` lemma; nothing is `assume`d. Differential testing
51//! against [`core::str::from_utf8`] (proptest, libfuzzer) remains as a
52//! cross-check on the trusted leaf-load specs.
53//!
54//! Memory safety of the raw-pointer loads — the only `unsafe` on the
55//! verified path — is checked by two tools targeting complementary parts:
56//!
57//! - **Verus** (SMT-backed; enable with `--features verus`) verifies the
58//!   slice-typed core end-to-end, including the multibyte state machine.
59//!   The leaf load helpers are `external_body` with the spec
60//!   `ret == pack64(buf@, at)` (and `pack32`/`pack16` for the sub-word
61//!   loads) — the standard little-endian load contract.
62//!
63//! - **[RefinedRust]** (Rocq-backed; build with `--cfg rr`) verifies exactly
64//!   those leaf bodies in `raw`: that `ptr.add(at).cast().read_unaligned()`
65//!   is sound given separating ownership of `n` bytes with `at + N ≤ n`. The
66//!   slice-typed core is out of its reach (it does not currently model Rust
67//!   slices).
68//!
69//! Each tool's trusted base is what the other proves. The connecting step —
70//! that `&[u8]::as_ptr()` yields a pointer valid for `len()` initialized
71//! bytes — is the standard-library contract for slices. The
72//! `simdutf8`-feature delegation path, the `cfg(avx2)` prefix scan, and the
73//! `core::str::from_utf8` delegation on 32-bit targets are *not* covered by
74//! these proofs. [`from_utf8`]'s call to `from_utf8_unchecked` is justified by
75//! the functional-correctness proof of [`verify`], on the assumption that
76//! `spec::is_valid_utf8` coincides with Rust's `str` invariant — both are
77//! Unicode §3.9, but neither tool checks that equivalence.
78//!
79//! This crate is bool-only by design. Callers needing the byte position of a
80//! validation error should use [`core::str::from_utf8`].
81//!
82//! [RefinedRust]: https://plv.mpi-sws.org/refinedrust/
83
84#![no_std]
85// Verus's driver injects `stmt_expr_attributes` and friends via `-Zcrate-attr`;
86// `proc_macro_hygiene` is additionally needed for `#[verus_spec(invariant)]`
87// on `while` loops (see rust_verify_test/tests/common/mod.rs). The `verus`
88// feature is verification-only and is never built under stock rustc.
89#![cfg_attr(feature = "verus", feature(proc_macro_hygiene))]
90#![deny(unsafe_op_in_unsafe_fn)]
91#![warn(missing_docs)]
92// The load helpers below are one or two instructions each and sit in the hot
93// loop; a non-inlined call boundary there would dominate the work. The
94// `#[inline(always)]` is load-bearing, not decorative.
95#![allow(clippy::inline_always)]
96// RefinedRust attribute tool registration. The `rr` cfg is set only when
97// running under the RefinedRust frontend; on a normal build these are no-ops.
98#![cfg_attr(rr, feature(register_tool))]
99#![cfg_attr(rr, feature(custom_inner_attributes))]
100#![cfg_attr(rr, register_tool(rr))]
101#![cfg_attr(rr, rr::package("smooth_utf8"))]
102#![cfg_attr(rr, rr::coq_prefix("smooth_utf8"))]
103#![cfg_attr(rr, rr::include("ptr"))]
104
105#[cfg(feature = "alloc")]
106extern crate alloc;
107
108use core::ops::Range;
109
110/// `debug_assert!` that compiles out under Verus (which does not model
111/// panics; the Verus `requires` clause states the same condition).
112macro_rules! da {
113    ($($t:tt)*) => { #[cfg(not(feature = "verus"))] { core::debug_assert!($($t)*); } };
114}
115
116#[cfg(feature = "verus")]
117use spec::*;
118#[cfg(feature = "verus")]
119use verus_builtin_macros::{proof, verus_spec, verus_verify};
120#[cfg(feature = "verus")]
121#[allow(unused_imports)]
122use vstd::prelude::*;
123
124mod ascii_skip;
125mod raw;
126#[cfg(feature = "verus")]
127pub mod spec;
128
129/// Inputs of this length or longer are delegated to `simdutf8` when the
130/// `simdutf8` feature is enabled. Below it, the verified SWAR/slack path
131/// is faster (no runtime dispatch); at and above, simdutf8's Keiser–Lemire
132/// SIMD validator wins, decisively on mixed input. On aarch64 simdutf8's
133/// NEON kernel overtakes from ~64 B (vs ~128 B for AVX2 on x86), so the
134/// threshold is lowered there.
135#[cfg(all(feature = "simdutf8", not(target_arch = "aarch64")))]
136const LONG_THRESHOLD: usize = 128;
137#[cfg(all(feature = "simdutf8", target_arch = "aarch64"))]
138const LONG_THRESHOLD: usize = 64;
139
140/// Mask with the high bit of every byte set.
141#[cfg_attr(feature = "verus", verus_verify)]
142#[allow(clippy::unreadable_literal)] // grouping by byte is the readable form here
143const SIGN_BITS: u64 = 0x8080_8080_8080_8080;
144
145/// Number of readable bytes that must follow the logical end of the input
146/// passed to [`verify_with_slack`].
147///
148/// The current implementation reads at most 7 bytes past the logical end; the
149/// constant is rounded up to 8 to keep the contract simple and leave headroom.
150#[cfg_attr(feature = "verus", verus_verify)]
151pub const SLACK: usize = 8;
152
153/// Returns `true` if `b` is well-formed UTF-8.
154///
155/// This is functionally equivalent to `core::str::from_utf8(b).is_ok()`, but
156/// is tuned for the short, mostly-ASCII strings typical of serialized
157/// protocols. It never reads outside `b`: partial chunks are covered by
158/// overlapping in-bounds loads instead of over-reads or stack copies.
159///
160/// With `feature = "simdutf8"`, inputs at or above a per-arch threshold
161/// (128 bytes on x86-64, 64 on aarch64) are delegated to
162/// [`simdutf8::basic::from_utf8`](https://docs.rs/simdutf8).
163///
164/// ```
165/// assert!(smoothutf8::verify("hello, 世界! 🌍".as_bytes()));
166/// assert!(!smoothutf8::verify(&[0xC0, 0x80])); // overlong NUL
167/// ```
168#[cfg_attr(feature = "verus", verus_spec(ret =>
169    ensures ret == is_valid_utf8(b@),
170))]
171#[inline]
172#[must_use]
173pub fn verify(b: &[u8]) -> bool {
174    #[cfg(feature = "simdutf8")]
175    if b.len() >= LONG_THRESHOLD {
176        return verify_long(b, 0, b.len());
177    }
178    #[cfg(feature = "verus")]
179    proof! { assert(b@.subrange(0, b@.len() as int) =~= b@); }
180    // SAFETY: the range is `0..b.len()`, so `start <= end` and
181    // `end + 0 <= b.len()` hold trivially.
182    unsafe { verify_impl::<0>(b, 0..b.len()) }
183}
184
185/// The `simdutf8` delegation, outlined so it does not count against the
186/// public entry points' inline cost: with the call and its slice-panic
187/// plumbing in the hot body, LLVM stops inlining `verify`/`verify_with_slack`
188/// into callers, and the call boundary alone costs more than validating a
189/// short input (the same mechanism as the 0.2.1 inline-partition fix, one
190/// level up; see CHANGELOG 0.2.3 for the measurements).
191///
192/// Takes `buf` plus indices rather than a pre-built slice so the range
193/// check and its panic path live here, not in the inlined caller bodies —
194/// re-slicing at the call sites would reintroduce the regression. `#[cold]`
195/// is intentional even for long-input-heavy callers: the branch-weight bias
196/// costs at most one mispredict, amortized over a threshold's worth of SIMD
197/// validation, while short calls are the latency-sensitive ones.
198#[cfg(feature = "simdutf8")]
199#[cold]
200#[inline(never)]
201fn verify_long(buf: &[u8], start: usize, end: usize) -> bool {
202    simdutf8::basic::from_utf8(&buf[start..end]).is_ok()
203}
204
205/// Returns `Some(b as &str)` if `b` is well-formed UTF-8.
206///
207/// Single-scan: validates with [`verify`] and converts via
208/// [`core::str::from_utf8_unchecked`] on success. Drop-in replacement for
209/// `core::str::from_utf8(b).ok()`; callers needing the byte position of a
210/// validation error should use [`core::str::from_utf8`] directly.
211///
212/// ```
213/// assert_eq!(smoothutf8::from_utf8(b"abc"), Some("abc"));
214/// assert_eq!(smoothutf8::from_utf8(&[0xFF]), None);
215/// ```
216#[inline]
217#[must_use]
218pub fn from_utf8(b: &[u8]) -> Option<&str> {
219    if verify(b) {
220        // SAFETY: `verify` returned true, so `b` is valid UTF-8.
221        Some(unsafe { core::str::from_utf8_unchecked(b) })
222    } else {
223        None
224    }
225}
226
227/// Renamed to [`from_utf8`].
228#[deprecated(since = "0.2.2", note = "renamed to `from_utf8`")]
229#[inline]
230#[must_use]
231pub fn to_str(b: &[u8]) -> Option<&str> {
232    from_utf8(b)
233}
234
235/// Returns `true` if `buf[range]` is well-formed UTF-8, using the slack-buffer
236/// fast path.
237///
238/// Prefer [`SlackBuf`] for new code: it hoists this function's per-call
239/// `unsafe` precondition to a buffer-level type invariant, so per-field
240/// validation is safe.
241///
242/// This variant performs unaligned 8-byte loads that may read up to
243/// [`SLACK`] − 1 bytes past `range.end`. Those bytes are masked off and never
244/// influence the result; they need only be *readable*.
245///
246/// # Safety
247///
248/// The caller must guarantee all of the following:
249///
250/// - `range.start <= range.end`
251/// - `range.end + SLACK <= buf.len()`
252///
253/// Because `buf` is a slice, satisfying the second condition is sufficient to
254/// keep every load within the allocation that `buf` points into; the over-read
255/// stays inside `buf` and is therefore not an out-of-bounds memory access at
256/// the machine level. Violating either condition is undefined behaviour.
257///
258/// This is why the function takes the full backing buffer plus a logical
259/// `range`, rather than a pre-sliced `&buf[range]`: the slack bytes past
260/// `range.end` must remain part of the slice so that reading them is sound.
261///
262/// With `feature = "simdutf8"`, inputs at or above a per-arch threshold
263/// (128 bytes on x86-64, 64 on aarch64) are delegated to
264/// [`simdutf8::basic::from_utf8`](https://docs.rs/simdutf8) (the slack region
265/// is not used on that path).
266///
267/// # Examples
268///
269/// ```
270/// use smoothutf8::{verify_with_slack, SLACK};
271/// let mut buf = b"field-value".to_vec();
272/// let end = buf.len();
273/// buf.resize(end + SLACK, 0); // your decoder's eps-copy padding
274/// // SAFETY: `0 <= end` and `end + SLACK == buf.len()`.
275/// assert!(unsafe { verify_with_slack(&buf, 0..end) });
276/// ```
277#[cfg_attr(feature = "verus", verus_spec(ret =>
278    requires
279        range.start <= range.end,
280        range.end + SLACK <= buf@.len(),
281    ensures
282        ret == is_valid_utf8(buf@.subrange(range.start as int, range.end as int)),
283))]
284#[inline]
285#[must_use]
286pub unsafe fn verify_with_slack(buf: &[u8], range: Range<usize>) -> bool {
287    da!(range.start <= range.end);
288    da!(range.end.saturating_add(SLACK) <= buf.len());
289    #[cfg(feature = "simdutf8")]
290    if range.end - range.start >= LONG_THRESHOLD {
291        return verify_long(buf, range.start, range.end);
292    }
293    // SAFETY: `range.start <= range.end` and `range.end + SLACK <= buf.len()`
294    // are this function's own documented contract.
295    unsafe { verify_impl::<SLACK>(buf, range) }
296}
297
298// -- SlackBuf: safe wrapper over the slack-buffer invariant ------------------
299
300pub use slack_buf::SlackBuf;
301
302mod slack_buf {
303    #[cfg(feature = "verus")]
304    use super::{is_valid_utf8, verus_spec, verus_verify};
305    use super::{verify_with_slack, Range, SLACK};
306    #[cfg(feature = "verus")]
307    #[allow(unused_imports)]
308    use vstd::prelude::*;
309
310    /// A borrowed byte buffer whose final [`SLACK`] bytes are padding.
311    ///
312    /// Any read of width up to `SLACK` starting at a position `≤`
313    /// [`payload_len`](Self::payload_len) stays in bounds, so per-field
314    /// validation and fixed-width loads need no `unsafe` at the call site.
315    #[cfg_attr(feature = "verus", verus_verify)]
316    #[repr(transparent)]
317    #[derive(Clone, Copy)]
318    #[cfg_attr(not(feature = "verus"), derive(Debug))]
319    // Field private to this module: every construction goes through a
320    // constructor that establishes the `len >= SLACK` invariant.
321    pub struct SlackBuf<'a>(&'a [u8]);
322
323    // -- Verus-verified surface ---------------------------------------------
324    //
325    // `verus_spec` on inherent-impl methods must be the bare attribute name
326    // (the outer `verus_verify` proc-macro matches it literally and does not
327    // unwrap `cfg_attr` or path-qualified forms), so the verus impl is a
328    // separate `cfg`-gated block. The non-verus impl below is the runtime
329    // build; bodies are kept in sync.
330    #[cfg(feature = "verus")]
331    verus_builtin_macros::verus! {
332        impl<'a> View for SlackBuf<'a> {
333            type V = Seq<u8>;
334            closed spec fn view(&self) -> Seq<u8> { self.0@ }
335        }
336    }
337
338    #[cfg(feature = "verus")]
339    #[verus_verify]
340    #[allow(missing_docs)] // the runtime impl below carries the docs
341    impl<'a> SlackBuf<'a> {
342        #[verus_spec(ret =>
343            requires buf@.len() >= SLACK,
344            ensures ret@ == buf@,
345        )]
346        pub unsafe fn new_unchecked(buf: &'a [u8]) -> Self {
347            Self(buf)
348        }
349
350        #[verus_spec(ret =>
351            requires buf@.len() >= SLACK,
352            ensures ret@ == buf@,
353        )]
354        pub fn new_embedded_slack(buf: &'a [u8]) -> Self {
355            Self(buf)
356        }
357
358        #[verus_spec(ret =>
359            ensures ret@ == self@,
360        )]
361        pub fn as_bytes(&self) -> &'a [u8] {
362            self.0
363        }
364
365        #[verus_spec(ret =>
366            requires self@.len() >= SLACK,
367            ensures ret == self@.len() - SLACK,
368        )]
369        pub fn payload_len(&self) -> usize {
370            self.0.len() - SLACK
371        }
372
373        #[verus_spec(ret =>
374            requires
375                range.start <= range.end,
376                range.end + SLACK <= self@.len(),
377            ensures
378                ret == is_valid_utf8(self@.subrange(range.start as int, range.end as int)),
379        )]
380        pub fn verify(&self, range: Range<usize>) -> bool {
381            unsafe { verify_with_slack(self.0, range) }
382        }
383    }
384
385    // -- runtime (non-verus) surface ---------------------------------------
386
387    // `le_u32`'s SAFETY argument relies on this; pin it at compile time so a
388    // future change to `SLACK` cannot silently make the load OOB.
389    #[cfg(not(feature = "verus"))]
390    const _: () = assert!(SLACK >= 4);
391
392    #[cfg(not(feature = "verus"))]
393    impl<'a> SlackBuf<'a> {
394        /// Wraps `buf` without checking the length invariant.
395        ///
396        /// # Safety
397        /// `buf.len() >= SLACK`.
398        #[inline]
399        #[must_use]
400        pub const unsafe fn new_unchecked(buf: &'a [u8]) -> Self {
401            debug_assert!(buf.len() >= SLACK);
402            Self(buf)
403        }
404
405        /// Wraps `buf`, or returns `None` if `buf.len() < SLACK`.
406        #[inline]
407        #[must_use]
408        pub const fn new(buf: &'a [u8]) -> Option<Self> {
409            if buf.len() >= SLACK {
410                Some(Self(buf))
411            } else {
412                None
413            }
414        }
415
416        /// Wraps `buf`, treating its final [`SLACK`] bytes as padding.
417        ///
418        /// This is the alloc-free constructor for callers that have already
419        /// padded the buffer themselves. With the `alloc` feature,
420        #[cfg_attr(feature = "alloc", doc = " [`new_add_slack`](Self::new_add_slack)")]
421        #[cfg_attr(not(feature = "alloc"), doc = " `new_add_slack`")]
422        /// appends the padding for you.
423        ///
424        /// # Panics
425        /// If `buf.len() < SLACK`.
426        #[inline]
427        #[must_use]
428        #[track_caller]
429        pub const fn new_embedded_slack(buf: &'a [u8]) -> Self {
430            assert!(buf.len() >= SLACK, "buf must carry SLACK trailing bytes");
431            Self(buf)
432        }
433
434        /// Appends [`SLACK`] zero bytes to `v` and wraps the result.
435        ///
436        /// `v` is borrowed for `'a`: it cannot be grown or otherwise mutated
437        /// while the returned `SlackBuf` is alive, and on return its length is
438        /// `original_len + SLACK`. May reallocate; reserve `SLACK` extra
439        /// capacity before filling `v` if that matters.
440        #[cfg(feature = "alloc")]
441        #[inline]
442        #[must_use]
443        pub fn new_add_slack(v: &'a mut alloc::vec::Vec<u8>) -> Self {
444            v.extend_from_slice(&[0u8; SLACK]);
445            Self(v.as_slice())
446        }
447
448        /// The full backing slice, including the trailing slack bytes.
449        #[inline]
450        #[must_use]
451        pub const fn as_bytes(&self) -> &'a [u8] {
452            self.0
453        }
454
455        /// `as_bytes().len() - SLACK`: the largest valid `range.end` for
456        /// [`verify`](Self::verify) and the largest valid `at` for the
457        /// fixed-width loads.
458        #[inline]
459        #[must_use]
460        pub const fn payload_len(&self) -> usize {
461            self.0.len() - SLACK
462        }
463
464        /// Returns `true` if `self.as_bytes()[range]` is well-formed UTF-8,
465        /// using the slack-buffer fast path.
466        ///
467        /// # Panics
468        /// If `range.start > range.end` or `range.end > self.payload_len()`.
469        #[inline]
470        #[must_use]
471        pub fn verify(&self, range: Range<usize>) -> bool {
472            // One combined assert (not two) so there is a single panic call
473            // site and the function stays prologue-free for the tail call.
474            assert!(range.start <= range.end && range.end <= self.payload_len());
475            // SAFETY: the assert establishes both of `verify_with_slack`'s
476            // preconditions (`start <= end` and `end + SLACK <= len`, given the
477            // type invariant `len >= SLACK`).
478            unsafe { verify_with_slack(self.0, range) }
479        }
480
481        /// Returns `Some(self.as_bytes()[range] as &str)` if the range is
482        /// well-formed UTF-8. Single-scan; the returned slice borrows the
483        /// backing buffer for `'a`.
484        ///
485        /// # Panics
486        /// Same conditions as [`verify`](Self::verify).
487        #[inline]
488        #[must_use]
489        pub fn to_str(&self, range: Range<usize>) -> Option<&'a str> {
490            if self.verify(range.clone()) {
491                // SAFETY: `verify`'s asserts established
492                // `range.start <= range.end <= len - SLACK <= len`, so
493                // `get_unchecked(range)` is in-bounds; `verify` returned true,
494                // so the bytes are valid UTF-8.
495                Some(unsafe { core::str::from_utf8_unchecked(self.0.get_unchecked(range)) })
496            } else {
497                None
498            }
499        }
500
501        /// Loads 4 bytes at `at..at+4` as a little-endian `u32`. The slack
502        /// invariant (`SLACK >= 4`) makes this a single unaligned load with no
503        /// tail handling.
504        ///
505        /// # Panics
506        /// If `at > self.payload_len()`.
507        #[inline]
508        #[must_use]
509        pub fn le_u32(&self, at: usize) -> u32 {
510            assert!(at <= self.payload_len());
511            // SAFETY: `at <= len - SLACK` and `SLACK >= 4` (const-asserted
512            // above), so `at + 4 <= len`; `as_ptr()` carries provenance over
513            // the full slice and `read_unaligned` avoids alignment UB.
514            u32::from_le(unsafe { self.0.as_ptr().add(at).cast::<u32>().read_unaligned() })
515        }
516    }
517}
518
519// -- leaf loads (Verus-trusted, RefinedRust-verified) ------------------------
520
521/// Load 8 bytes at `buf[at..at+8]` as a little-endian `u64`.
522///
523/// # Safety
524/// `at + 8 <= buf.len()`.
525#[cfg_attr(feature = "verus", verus_verify(external_body))]
526#[cfg_attr(feature = "verus", verus_spec(ret =>
527    requires at + 8 <= buf@.len(),
528    ensures ret == pack64(buf@, at as int),
529))]
530#[inline(always)]
531unsafe fn load64(buf: &[u8], at: usize) -> u64 {
532    da!(at + 8 <= buf.len());
533    // SAFETY: `at + 8 <= buf.len()`, so `buf.as_ptr()` is valid for `at + 8`
534    // bytes; this is exactly `load64_raw`'s precondition.
535    unsafe { raw::load64_raw(buf.as_ptr(), at) }
536}
537
538/// Load 4 bytes at `buf[at..at+4]` as a little-endian `u32`.
539///
540/// # Safety
541/// `at + 4 <= buf.len()`.
542#[cfg_attr(feature = "verus", verus_verify(external_body))]
543#[cfg_attr(feature = "verus", verus_spec(ret =>
544    requires at + 4 <= buf@.len(),
545    ensures ret == pack32(buf@, at as int),
546))]
547#[inline(always)]
548unsafe fn load32(buf: &[u8], at: usize) -> u32 {
549    da!(at + 4 <= buf.len());
550    // SAFETY: `at + 4 <= buf.len()`; see `load64`.
551    unsafe { raw::load32_raw(buf.as_ptr(), at) }
552}
553
554/// Load 2 bytes at `buf[at..at+2]` as a little-endian `u16`.
555///
556/// # Safety
557/// `at + 2 <= buf.len()`.
558#[cfg_attr(feature = "verus", verus_verify(external_body))]
559#[cfg_attr(feature = "verus", verus_spec(ret =>
560    requires at + 2 <= buf@.len(),
561    ensures ret == pack16(buf@, at as int),
562))]
563#[inline(always)]
564unsafe fn load16(buf: &[u8], at: usize) -> u16 {
565    da!(at + 2 <= buf.len());
566    // SAFETY: `at + 2 <= buf.len()`; see `load64`.
567    unsafe { raw::load16_raw(buf.as_ptr(), at) }
568}
569
570/// Load 1 byte at `buf[at]`.
571///
572/// # Safety
573/// `at < buf.len()`.
574#[cfg_attr(feature = "verus", verus_verify(external_body))]
575#[cfg_attr(feature = "verus", verus_spec(ret =>
576    requires at < buf@.len(),
577    ensures ret == buf@[at as int],
578))]
579#[inline(always)]
580unsafe fn load8(buf: &[u8], at: usize) -> u8 {
581    da!(at < buf.len());
582    // SAFETY: `at < buf.len()`; see `load64`.
583    unsafe { raw::load8_raw(buf.as_ptr(), at) }
584}
585
586// -- implementation ----------------------------------------------------------
587
588/// Ranges shorter than 8 bytes: no `ascii_skip`, no word loop.
589///
590/// With slack (`PAD >= 8`) the whole range is covered by one unconditional
591/// 8-byte load whose bytes past `end` are masked off. Without slack, a pair
592/// of overlapping in-bounds sub-word loads covers the range; OR-ing them is
593/// sound for the sign-bit test because duplicated bytes contribute the same
594/// sign bits twice. Neither shape stores to the stack — the variable-length
595/// zero-padded copy this replaces compiled to a libc `memcpy` call plus a
596/// store-to-load-forwarding stall, which dominated short-input latency.
597///
598/// # Safety
599/// `start <= end`, `end - start < 8`, and `end + PAD <= buf.len()`.
600/// (Machine-checked as `requires` under `--features verus`.)
601#[cfg_attr(feature = "verus", verus_spec(ret =>
602    requires
603        start <= end,
604        end - start < 8,
605        end + PAD <= buf@.len(),
606    ensures
607        ret == is_valid_utf8(buf@.subrange(start as int, end as int)),
608))]
609#[inline(always)]
610unsafe fn verify_short<const PAD: usize>(buf: &[u8], start: usize, end: usize) -> bool {
611    da!(start <= end && end - start < 8 && end.saturating_add(PAD) <= buf.len());
612    if PAD >= 8 {
613        if start == end {
614            #[cfg(feature = "verus")]
615            proof! { assert(buf@.subrange(start as int, end as int).len() == 0); }
616            return true;
617        }
618        // 1..=7, so the mask shift below is in `8..=56` — never 0 or 64.
619        let left = end - start;
620        // SAFETY: `start < end` and `end + PAD <= buf.len()` with `PAD >= 8`
621        // give `start + 8 < end + PAD <= buf.len()`; the bytes read past
622        // `end` land in the slack and are masked off below.
623        let bytes = unsafe { load64(buf, start) };
624        let mask = SIGN_BITS >> ((8 - left) * 8);
625        if bytes & mask != 0 {
626            return verify_multibyte(buf, start, end);
627        }
628        #[cfg(feature = "verus")]
629        proof! {
630            assert forall |j: int| 0 <= j < left
631                implies #[trigger] byte64(bytes, j) == buf@[start + j] by {
632                lemma_pack64_byte(buf@, start as int, j);
633            };
634            assert(mask == sign_mask(left as int));
635            lemma_mask_zero_ascii(buf@, start as int, left as int, bytes);
636            lemma_ascii_valid(buf@, start as int, end as int);
637        }
638        return true;
639    }
640    if end - start >= 4 {
641        // SAFETY: `end + PAD <= buf.len()` gives `end <= buf.len()`, and
642        // `start + 4 <= end` in this branch, so both loads stay inside
643        // `[start, end)`; they overlap when `end - start < 8`, which the
644        // sign-bit OR absorbs.
645        let lo = unsafe { load32(buf, start) };
646        let hi = unsafe { load32(buf, end - 4) };
647        if (lo | hi) & 0x8080_8080 != 0 {
648            return verify_multibyte(buf, start, end);
649        }
650        #[cfg(feature = "verus")]
651        proof! {
652            assert((lo | hi) & 0x8080_8080u32 == 0
653                ==> lo & 0x8080_8080u32 == 0 && hi & 0x8080_8080u32 == 0) by (bit_vector);
654            lemma_signbits4(buf@, start as int);
655            lemma_signbits4(buf@, end as int - 4);
656            assert(all_ascii(buf@, start as int, end as int));
657            lemma_ascii_valid(buf@, start as int, end as int);
658        }
659        return true;
660    }
661    if end - start >= 2 {
662        // SAFETY: `end <= buf.len()` as above, and `start + 2 <= end` in
663        // this branch, so both loads stay inside `[start, end)`.
664        let lo = unsafe { load16(buf, start) };
665        let hi = unsafe { load16(buf, end - 2) };
666        if (lo | hi) & 0x8080 != 0 {
667            return verify_multibyte(buf, start, end);
668        }
669        #[cfg(feature = "verus")]
670        proof! {
671            assert((lo | hi) & 0x8080u16 == 0
672                ==> lo & 0x8080u16 == 0 && hi & 0x8080u16 == 0) by (bit_vector);
673            lemma_signbits2(buf@, start as int);
674            lemma_signbits2(buf@, end as int - 2);
675            assert(all_ascii(buf@, start as int, end as int));
676            lemma_ascii_valid(buf@, start as int, end as int);
677        }
678        return true;
679    }
680    if start < end {
681        // end - start == 1
682        // SAFETY: `start < end <= buf.len()`.
683        let b = unsafe { load8(buf, start) };
684        if b >= 0x80 {
685            return verify_multibyte(buf, start, end);
686        }
687        #[cfg(feature = "verus")]
688        proof! {
689            assert(all_ascii(buf@, start as int, end as int));
690            lemma_ascii_valid(buf@, start as int, end as int);
691        }
692        return true;
693    }
694    #[cfg(feature = "verus")]
695    proof! { assert(buf@.subrange(start as int, end as int).len() == 0); }
696    true
697}
698
699/// # Safety
700/// `range.start <= range.end` and `range.end + PAD <= buf.len()`.
701/// (Machine-checked as `requires` under `--features verus`.)
702#[cfg_attr(feature = "verus", verus_spec(ret =>
703    requires
704        range.start <= range.end,
705        range.end + PAD <= buf@.len(),
706    ensures
707        ret == is_valid_utf8(buf@.subrange(range.start as int, range.end as int)),
708))]
709// `inline(always)`: with two call sites in a binary (e.g. a caller using both
710// `verify_with_slack` and `SlackBuf::verify`), plain `#[inline]` lets LLVM
711// out-line this — and the call boundary then dominates the work on short
712// inputs (~2 ns of prologue vs ~1.5 ns of actual validation at 8 B).
713#[inline(always)]
714unsafe fn verify_impl<const PAD: usize>(buf: &[u8], range: Range<usize>) -> bool {
715    let start = range.start;
716    let end = range.end;
717    da!(start <= end && end.saturating_add(PAD) <= buf.len());
718    if end - start < 8 {
719        // SAFETY: `end - start < 8` was just checked; `start <= end` and
720        // `end + PAD <= buf.len()` are this function's own contract.
721        return unsafe { verify_short::<PAD>(buf, start, end) };
722    }
723    let mut p = start;
724
725    // ---- ASCII fast path: full STEP-byte blocks ---------------------------
726    // Portable SWAR (16 B/iter) by default; AVX2 or NEON (32 B/iter) when
727    // available. Returns at the first non-ASCII block or with `< STEP` left.
728    p = ascii_skip::skip(buf, p, end);
729    if end - p >= ascii_skip::STEP {
730        #[cfg(feature = "verus")]
731        proof! { lemma_ascii_prefix_iff(buf@, start as int, p as int, end as int); }
732        return verify_multibyte(buf, p, end);
733    }
734
735    // ---- ASCII fast path: remaining full 8-byte words ---------------------
736    // At most one iteration when `STEP == 16`; up to three when `STEP == 32`.
737    // (A 16-byte-pair variant of this loop was tried and regressed Neoverse-V2
738    // by 10-40% at 8-128 B — the extra tail branches cost more than the saved
739    // test on a path the 8-byte loop already runs at one cycle per word.)
740    #[cfg_attr(feature = "verus", verus_spec(
741        invariant
742            start == range.start, end == range.end,
743            start <= p, p <= end, end + PAD <= buf@.len(),
744            all_ascii(buf@, start as int, p as int),
745        decreases end - p
746    ))]
747    while end - p >= 8 {
748        // SAFETY: `p + 8 <= end <= buf.len()` (the latter from
749        // `end + PAD <= buf.len()`, `PAD >= 0`).
750        let bytes = unsafe { load64(buf, p) };
751        if bytes & SIGN_BITS != 0 {
752            #[cfg(feature = "verus")]
753            proof! { lemma_ascii_prefix_iff(buf@, start as int, p as int, end as int); }
754            return verify_multibyte(buf, p, end);
755        }
756        #[cfg(feature = "verus")]
757        proof! {
758            lemma_signbits8(buf@, p as int);
759            lemma_ascii_extend(buf@, start as int, p as int, p as int + 8);
760        }
761        p += 8;
762    }
763
764    // ---- ASCII fast path: 1..=7 trailing bytes via the last-8-byte window -
765    if p < end {
766        // SAFETY: `end >= start + 8 >= 8` and `end <= buf.len()` (from
767        // `end + PAD <= buf.len()`, `PAD >= 0`).
768        let bytes = unsafe { load64(buf, end - 8) };
769        if bytes & SIGN_BITS != 0 {
770            #[cfg(feature = "verus")]
771            proof! { lemma_ascii_prefix_iff(buf@, start as int, p as int, end as int); }
772            return verify_multibyte(buf, p, end);
773        }
774        #[cfg(feature = "verus")]
775        proof! {
776            lemma_signbits8(buf@, end as int - 8);
777            // `[p, end) ⊆ [end-8, end)` since `end - p <= 8`.
778            assert(all_ascii(buf@, p as int, end as int));
779            lemma_ascii_extend(buf@, start as int, p as int, end as int);
780            lemma_ascii_valid(buf@, start as int, end as int);
781        }
782    } else {
783        #[cfg(feature = "verus")]
784        proof! { lemma_ascii_valid(buf@, start as int, end as int); }
785    }
786
787    true
788}
789
790// -- Höhrmann-style UTF-8 DFA -----------------------------------------------
791//
792// The shift-DFA's per-byte step is `(ROW[byte] >> state) & 63` — a 64-bit
793// variable shift. That's one instruction on 64-bit targets but 7-13 on 32-bit
794// (Cortex-M, i686), where it ends up slower than the standard library's
795// branchy validator. On non-64-bit targets `verify_multibyte` delegates to
796// `core::str::from_utf8` instead, and the tables below are dead `const`s
797// (no runtime footprint). The ASCII fast path above is kept regardless.
798//
799// `target_pointer_width` is a proxy for "has native u64 ops". wasm32 is opted
800// in explicitly: it has native `i64.shr_u`, so the DFA runs at full speed even
801// though pointers are 32-bit.
802
803#[cfg(not(any(target_pointer_width = "64", target_arch = "wasm32")))]
804fn verify_multibyte(buf: &[u8], start: usize, end: usize) -> bool {
805    core::str::from_utf8(&buf[start..end]).is_ok()
806}
807
808// Byte → class (12 classes) and state×class → next-state tables. State values
809// are pre-multiplied by 12 so the transition is a single add+load. Tables are
810// the canonical Höhrmann set (also used by `bstr`); see
811// <https://bjoern.hoehrmann.de/utf-8/decoder/dfa/>.
812//
813// `TRANS` is padded from 108 to 256 entries (with REJECT) so that indexing by
814// a `u8` sum lets the compiler elide the bounds check.
815
816#[allow(dead_code)] // documentation for `TRANS`; the hot path uses `DFA_ACCEPT`
817const ACCEPT: u8 = 12;
818#[allow(dead_code)]
819const REJECT: u8 = 0;
820
821#[rustfmt::skip]
822const CLASS: [u8; 256] = [
823     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
824     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
825     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
826     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
827     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
828     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
829     8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
830    10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
831];
832
833#[rustfmt::skip]
834const TRANS: [u8; 256] = [
835     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
836    12, 0,24,36,60,96,84, 0, 0, 0,48,72,
837     0,12, 0, 0, 0, 0, 0,12, 0,12, 0, 0,
838     0,24, 0, 0, 0, 0, 0,24, 0,24, 0, 0,
839     0, 0, 0, 0, 0, 0, 0,24, 0, 0, 0, 0,
840     0,24, 0, 0, 0, 0, 0, 0, 0,24, 0, 0,
841     0, 0, 0, 0, 0, 0, 0,36, 0,36, 0, 0,
842     0,36, 0, 0, 0, 0, 0,36, 0,36, 0, 0,
843     0,36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
844    // padding (REJECT) so that a u8 index elides the bounds check
845     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
846     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
847     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
848     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
849     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
850];
851
852/// Shift-based DFA row table: `ROW[byte]` packs the next state for *every*
853/// current state into one `u64`, at 6-bit stride. The transition is then
854/// `state' = (ROW[byte] >> state) & 63` — the only load depends on `byte`
855/// (off the critical path), and the state→state' chain is one shift + one
856/// mask. This is the Vognsen/Langdale encoding of the Höhrmann automaton.
857///
858/// State values are `index * 6` (so they double as shift amounts):
859/// REJECT = 0, ACCEPT = 6, intermediates 12..=48. Nine states × 6 bits = 54
860/// bits used per row.
861const fn build_rows() -> [u64; 256] {
862    let mut row = [0u64; 256];
863    let mut b = 0usize;
864    while b < 256 {
865        let class = CLASS[b] as usize;
866        let mut s = 0usize;
867        while s < 9 {
868            // `TRANS` next-state in the {0,12,24,..,96} encoding → index → ×6.
869            let next = (TRANS[s * 12 + class] / 12) as u64 * 6;
870            row[b] |= next << (s * 6);
871            s += 1;
872        }
873        b += 1;
874    }
875    row
876}
877const ROW: [u64; 256] = build_rows();
878
879/// Shift-DFA state values (`index * 6`, so each is its own shift amount).
880/// Names follow the Unicode §3.9 Table 3-7 partial-match position each
881/// represents; the proof's per-state-meaning lemmas reference these.
882mod state {
883    /// Absorbing reject.
884    pub const REJECT: u64 = 0;
885    /// At a sequence boundary: input so far is well-formed.
886    pub const ACCEPT: u64 = 6;
887    /// One continuation byte (`80..=BF`) remaining.
888    pub const C1: u64 = 12;
889    /// Two continuation bytes remaining.
890    pub const C2: u64 = 18;
891    /// After `E0`: next byte must be `A0..=BF`, then one continuation.
892    pub const E0: u64 = 24;
893    /// After `ED`: next byte must be `80..=9F`, then one continuation.
894    pub const ED: u64 = 30;
895    /// After `F0`: next byte must be `90..=BF`, then two continuations.
896    pub const F0: u64 = 36;
897    /// Three continuation bytes remaining (after `F1..=F3`).
898    pub const C3: u64 = 42;
899    /// After `F4`: next byte must be `80..=8F`, then two continuations.
900    pub const F4: u64 = 48;
901}
902#[cfg_attr(feature = "verus", verus_verify)]
903const DFA_ACCEPT: u64 = 6;
904#[cfg_attr(feature = "verus", verus_verify)]
905const DFA_REJECT: u64 = 0;
906const _: [(); 0] = [(); (DFA_ACCEPT != state::ACCEPT) as usize];
907const _: [(); 0] = [(); (DFA_REJECT != state::REJECT) as usize];
908const _: [(); 0] = [(); (state::F4 + 6 > 64) as usize]; // 9 states fit in u64
909
910// Compile-time check that the shift encoding agrees with `TRANS`/`CLASS` at
911// the row that drives state assignment (ACCEPT × every lead-byte class). The
912// full 2304-cell equivalence is the Verus `by(compute)` obligation.
913macro_rules! row_check {
914    ($($byte:literal -> $state:path,)*) => {
915        $(const _: [(); 0] = [(); ((ROW[$byte] >> state::ACCEPT) & 63 != $state) as usize];)*
916    };
917}
918row_check! {
919    0x41 -> state::ACCEPT, 0x80 -> state::REJECT, 0xC1 -> state::REJECT,
920    0xC2 -> state::C1,     0xE0 -> state::E0,     0xE1 -> state::C2,
921    0xED -> state::ED,     0xEE -> state::C2,     0xF0 -> state::F0,
922    0xF1 -> state::C3,     0xF4 -> state::F4,     0xF5 -> state::REJECT,
923}
924
925/// `ROW[byte]`. The Verus spec is [`spec_row`]; the compile-time
926/// `_CHECK_SPEC_ROW` assertion below validates `ROW[b]` against a literal
927/// transcription of `spec_row` for all 256 bytes, so the residual trusted
928/// step is the visually-auditable match between that literal and
929/// `spec::spec_row` (a `spec fn`, not callable from const-eval).
930#[cfg_attr(feature = "verus", verus_verify(external_body))]
931#[cfg_attr(feature = "verus", verus_spec(ret =>
932    ensures ret == spec_row(byte),
933))]
934#[inline(always)]
935#[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))]
936const fn row(byte: u8) -> u64 {
937    ROW[byte as usize]
938}
939
940/// Exhaustive const-eval check that `row`'s body agrees with its Verus spec.
941#[allow(clippy::unreadable_literal, clippy::cast_possible_truncation)]
942const _CHECK_SPEC_ROW: () = {
943    let mut b = 0usize;
944    while b < 256 {
945        let bb = b as u8;
946        // Literal transcription of `spec::spec_row`.
947        let spec: u64 = if bb <= 0x7F {
948            0x0000000000000180
949        } else if bb <= 0x8F {
950            0x0012480300306000
951        } else if bb <= 0x9F {
952            0x0000492300306000
953        } else if bb <= 0xBF {
954            0x000049200C306000
955        } else if bb <= 0xC1 {
956            0
957        } else if bb <= 0xDF {
958            0x0000000000000300
959        } else if bb == 0xE0 {
960            0x0000000000000600
961        } else if bb <= 0xEC {
962            0x0000000000000480
963        } else if bb == 0xED {
964            0x0000000000000780
965        } else if bb <= 0xEF {
966            0x0000000000000480
967        } else if bb == 0xF0 {
968            0x0000000000000900
969        } else if bb <= 0xF3 {
970            0x0000000000000A80
971        } else if bb == 0xF4 {
972            0x0000000000000C00
973        } else {
974            0
975        };
976        assert!(ROW[b] == spec);
977        b += 1;
978    }
979};
980
981/// One shift-DFA step. Critical-path latency: shr + and (≈2 cyc); the
982/// `ROW[byte]` load depends only on the input byte.
983#[cfg_attr(feature = "verus", verus_verify)]
984#[cfg_attr(feature = "verus", verus_spec(ret =>
985    requires is_state(state),
986    ensures ret == spec_step(state, byte), is_state(ret),
987))]
988#[inline(always)]
989#[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))]
990const fn step(state: u64, byte: u8) -> u64 {
991    #[cfg(feature = "verus")]
992    proof! { lemma_row_step(state, byte); }
993    (row(byte) >> (state & 63)) & 63
994}
995
996/// Slow path: at least one byte at or after `start` has its high bit set.
997///
998/// Functional contract: returns `is_valid_utf8(buf[start..end])`.
999#[cfg_attr(feature = "verus", verus_verify)]
1000#[cfg_attr(feature = "verus", verus_spec(ret =>
1001    requires
1002        start < end,
1003        end <= buf@.len(),
1004    ensures
1005        ret == is_valid_utf8(buf@.subrange(start as int, end as int)),
1006))]
1007#[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))]
1008// Not `#[inline]`: this is the cold path (reached only when the ASCII fast
1009// path finds a high bit). With `verify_impl` `inline(always)`, leaving this
1010// out-of-line keeps the per-call-site inlined body to ~40 instructions
1011// instead of ~250.
1012#[allow(clippy::cast_possible_truncation)] // `(w >> 8k) as u8` is byte extraction
1013#[allow(clippy::too_many_lines)] // proof annotations
1014fn verify_multibyte(buf: &[u8], start: usize, end: usize) -> bool {
1015    let mut state = DFA_ACCEPT;
1016    let mut p = start;
1017    #[cfg(feature = "verus")]
1018    proof! { assert(buf@.subrange(start as int, start as int).len() == 0); }
1019
1020    // Full 8-byte chunks: one unaligned load, eight unrolled DFA steps.
1021    #[cfg_attr(feature = "verus", verus_spec(
1022        invariant
1023            start <= p, p <= end, end <= buf@.len(),
1024            is_state(state),
1025            state == run(ST_ACCEPT, buf@.subrange(start as int, p as int)),
1026        decreases end - p
1027    ))]
1028    while end - p >= 8 {
1029        // SAFETY: `p + 8 <= end <= buf.len()` (this function's contract).
1030        let w = unsafe { load64(buf, p) };
1031        // ASCII re-skip: if we are between codepoints and all eight bytes are
1032        // ASCII, the DFA would walk ACCEPT→ACCEPT eight times. One masked
1033        // compare per chunk; always-false on dense multibyte (predictable).
1034        if state == DFA_ACCEPT && w & SIGN_BITS == 0 {
1035            #[cfg(feature = "verus")]
1036            proof! {
1037                lemma_signbits8(buf@, p as int);
1038                lemma_ascii_valid(buf@, p as int, p as int + 8);
1039                lemma_run_valid(buf@.subrange(p as int, p as int + 8));
1040                lemma_run_join(ST_ACCEPT, buf@, start as int, p as int, p as int + 8);
1041            }
1042            p += 8;
1043            continue;
1044        }
1045        #[cfg(feature = "verus")]
1046        proof! {
1047            assert(w >> 0u64 == w) by (bit_vector);
1048            lemma_chunk_snoc(buf@, start as int, p as int, w, 0, state);
1049        }
1050        state = step(state, w as u8);
1051        #[cfg(feature = "verus")]
1052        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 1, state); }
1053        state = step(state, (w >> 8) as u8);
1054        #[cfg(feature = "verus")]
1055        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 2, state); }
1056        state = step(state, (w >> 16) as u8);
1057        #[cfg(feature = "verus")]
1058        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 3, state); }
1059        state = step(state, (w >> 24) as u8);
1060        #[cfg(feature = "verus")]
1061        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 4, state); }
1062        state = step(state, (w >> 32) as u8);
1063        #[cfg(feature = "verus")]
1064        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 5, state); }
1065        state = step(state, (w >> 40) as u8);
1066        #[cfg(feature = "verus")]
1067        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 6, state); }
1068        state = step(state, (w >> 48) as u8);
1069        #[cfg(feature = "verus")]
1070        proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 7, state); }
1071        state = step(state, (w >> 56) as u8);
1072        p += 8;
1073        if state == DFA_REJECT {
1074            #[cfg(feature = "verus")]
1075            proof! {
1076                lemma_run_join(ST_ACCEPT, buf@, start as int, p as int, end as int);
1077                lemma_run_reject(buf@.subrange(p as int, end as int));
1078                lemma_run_valid(buf@.subrange(start as int, end as int));
1079            }
1080            return false;
1081        }
1082    }
1083    // Tail: 0..=7 bytes.
1084    #[cfg_attr(feature = "verus", verus_spec(
1085        invariant
1086            start <= p, p <= end, end <= buf@.len(),
1087            is_state(state),
1088            state == run(ST_ACCEPT, buf@.subrange(start as int, p as int)),
1089        decreases end - p
1090    ))]
1091    while p < end {
1092        // SAFETY: `p < end <= buf.len()` (this function's contract).
1093        state = step(state, unsafe { load8(buf, p) });
1094        #[cfg(feature = "verus")]
1095        proof! { lemma_run_snoc(ST_ACCEPT, buf@, start as int, p as int); }
1096        p += 1;
1097    }
1098    #[cfg(feature = "verus")]
1099    proof! { lemma_run_valid(buf@.subrange(start as int, end as int)); }
1100    state == DFA_ACCEPT
1101}
1102
1103// -- tests -------------------------------------------------------------------
1104
1105#[cfg(test)]
1106mod tests;