smoothutf8/lib.rs
1//! Portable, formally verified UTF-8 validation.
2//!
3//! This crate provides two entry points for validating that a byte sequence is
4//! well-formed UTF-8:
5//!
6//! - [`verify`] takes a plain `&[u8]` and is fully safe. It uses an 8-byte
7//! SWAR (SIMD-within-a-register) ASCII fast path, and handles the final
8//! partial chunk with overlapping in-bounds loads so that it never reads
9//! past the slice.
10//!
11//! - [`SlackBuf`] is the safe wrapper for zero-copy parsers that maintain at
12//! least [`SLACK`] readable bytes after every logical field (the "eps-copy"
13//! pattern used by hyperpb and UPB). The padding invariant is established
14//! once at the buffer level — typically via `SlackBuf::new_add_slack`
15//! (with `feature = "alloc"`) or [`SlackBuf::new_embedded_slack`] — and
16//! per-field
17//! [`verify`](SlackBuf::verify) / [`to_str`](SlackBuf::to_str) /
18//! [`le_u32`](SlackBuf::le_u32) calls are then safe and skip the per-string
19//! tail-handling cost.
20//!
21//! [`verify_with_slack`] is the underlying `unsafe` per-call entry point;
22//! prefer [`SlackBuf`] for new code.
23//!
24//! Both share the same multi-byte path: a shift-encoded DFA (the
25//! Vognsen/Langdale encoding of Höhrmann's UTF-8 automaton).
26//!
27//! The crate is `#![no_std]` and zero-dependency by default.
28//!
29//! # Features
30//!
31//! - **`alloc`** — adds `SlackBuf::new_add_slack`, which appends the padding
32//! to a `Vec<u8>` itself. Everything else stays no-alloc.
33//! - **`simdutf8`** — delegate inputs ≥ 128 bytes to
34//! [`simdutf8::basic::from_utf8`](https://docs.rs/simdutf8). Adds one
35//! dependency. Below the threshold the verified path runs.
36//! - Building with `-C target-cpu=x86-64-v3` (or `native` on a Haswell+
37//! machine) enables a 32-byte/iteration `movemask` ASCII prefix scan
38//! (no runtime dispatch; not covered by the proofs below) and BMI2
39//! `shrx` for the shift-DFA (~+40% on the multibyte path).
40//! - **`verus`** is verification-only (CI); it does not change runtime
41//! behaviour and is not intended to be combined with `simdutf8`.
42//!
43//! # Verification
44//!
45//! Under `--features verus` (portable 64-bit build, no `simdutf8`/`avx2`),
46//! Verus proves **functional correctness**: [`verify`] and [`verify_with_slack`]
47//! carry `ensures ret == is_valid_utf8(b@)`, where `spec::is_valid_utf8`
48//! is a direct transcription of Unicode §3.9 Table 3-7. Every bit-trick in
49//! the SWAR fast path and the multi-byte decoder is connected to that table
50//! by a `by(bit_vector)` lemma; nothing is `assume`d. Differential testing
51//! against [`core::str::from_utf8`] (proptest, libfuzzer) remains as a
52//! cross-check on the trusted leaf-load specs.
53//!
54//! Memory safety of the raw-pointer loads — the only `unsafe` on the
55//! verified path — is checked by two tools targeting complementary parts:
56//!
57//! - **Verus** (SMT-backed; enable with `--features verus`) verifies the
58//! slice-typed core end-to-end, including the multibyte state machine.
59//! The leaf load helpers are `external_body` with the spec
60//! `ret == pack64(buf@, at)` (and `pack32`/`pack16` for the sub-word
61//! loads) — the standard little-endian load contract.
62//!
63//! - **[RefinedRust]** (Rocq-backed; build with `--cfg rr`) verifies exactly
64//! those leaf bodies in `raw`: that `ptr.add(at).cast().read_unaligned()`
65//! is sound given separating ownership of `n` bytes with `at + N ≤ n`. The
66//! slice-typed core is out of its reach (it does not currently model Rust
67//! slices).
68//!
69//! Each tool's trusted base is what the other proves. The connecting step —
70//! that `&[u8]::as_ptr()` yields a pointer valid for `len()` initialized
71//! bytes — is the standard-library contract for slices. The
72//! `simdutf8`-feature delegation path, the `cfg(avx2)` prefix scan, and the
73//! `core::str::from_utf8` delegation on 32-bit targets are *not* covered by
74//! these proofs. [`from_utf8`]'s call to `from_utf8_unchecked` is justified by
75//! the functional-correctness proof of [`verify`], on the assumption that
76//! `spec::is_valid_utf8` coincides with Rust's `str` invariant — both are
77//! Unicode §3.9, but neither tool checks that equivalence.
78//!
79//! This crate is bool-only by design. Callers needing the byte position of a
80//! validation error should use [`core::str::from_utf8`].
81//!
82//! [RefinedRust]: https://plv.mpi-sws.org/refinedrust/
83
84#![no_std]
85// Verus's driver injects `stmt_expr_attributes` and friends via `-Zcrate-attr`;
86// `proc_macro_hygiene` is additionally needed for `#[verus_spec(invariant)]`
87// on `while` loops (see rust_verify_test/tests/common/mod.rs). The `verus`
88// feature is verification-only and is never built under stock rustc.
89#![cfg_attr(feature = "verus", feature(proc_macro_hygiene))]
90#![deny(unsafe_op_in_unsafe_fn)]
91#![warn(missing_docs)]
92// The load helpers below are one or two instructions each and sit in the hot
93// loop; a non-inlined call boundary there would dominate the work. The
94// `#[inline(always)]` is load-bearing, not decorative.
95#![allow(clippy::inline_always)]
96// RefinedRust attribute tool registration. The `rr` cfg is set only when
97// running under the RefinedRust frontend; on a normal build these are no-ops.
98#![cfg_attr(rr, feature(register_tool))]
99#![cfg_attr(rr, feature(custom_inner_attributes))]
100#![cfg_attr(rr, register_tool(rr))]
101#![cfg_attr(rr, rr::package("smooth_utf8"))]
102#![cfg_attr(rr, rr::coq_prefix("smooth_utf8"))]
103#![cfg_attr(rr, rr::include("ptr"))]
104
105#[cfg(feature = "alloc")]
106extern crate alloc;
107
108use core::ops::Range;
109
110/// `debug_assert!` that compiles out under Verus (which does not model
111/// panics; the Verus `requires` clause states the same condition).
112macro_rules! da {
113 ($($t:tt)*) => { #[cfg(not(feature = "verus"))] { core::debug_assert!($($t)*); } };
114}
115
116#[cfg(feature = "verus")]
117use spec::*;
118#[cfg(feature = "verus")]
119use verus_builtin_macros::{proof, verus_spec, verus_verify};
120#[cfg(feature = "verus")]
121#[allow(unused_imports)]
122use vstd::prelude::*;
123
124mod ascii_skip;
125mod raw;
126#[cfg(feature = "verus")]
127pub mod spec;
128
129/// Inputs of this length or longer are delegated to `simdutf8` when the
130/// `simdutf8` feature is enabled. Below it, the verified SWAR/slack path
131/// is faster (no runtime dispatch); at and above, simdutf8's Keiser–Lemire
132/// SIMD validator wins, decisively on mixed input. On aarch64 simdutf8's
133/// NEON kernel overtakes from ~64 B (vs ~128 B for AVX2 on x86), so the
134/// threshold is lowered there.
135#[cfg(all(feature = "simdutf8", not(target_arch = "aarch64")))]
136const LONG_THRESHOLD: usize = 128;
137#[cfg(all(feature = "simdutf8", target_arch = "aarch64"))]
138const LONG_THRESHOLD: usize = 64;
139
140/// Mask with the high bit of every byte set.
141#[cfg_attr(feature = "verus", verus_verify)]
142#[allow(clippy::unreadable_literal)] // grouping by byte is the readable form here
143const SIGN_BITS: u64 = 0x8080_8080_8080_8080;
144
145/// Number of readable bytes that must follow the logical end of the input
146/// passed to [`verify_with_slack`].
147///
148/// The current implementation reads at most 7 bytes past the logical end; the
149/// constant is rounded up to 8 to keep the contract simple and leave headroom.
150#[cfg_attr(feature = "verus", verus_verify)]
151pub const SLACK: usize = 8;
152
153/// Returns `true` if `b` is well-formed UTF-8.
154///
155/// This is functionally equivalent to `core::str::from_utf8(b).is_ok()`, but
156/// is tuned for the short, mostly-ASCII strings typical of serialized
157/// protocols. It never reads outside `b`: partial chunks are covered by
158/// overlapping in-bounds loads instead of over-reads or stack copies.
159///
160/// With `feature = "simdutf8"`, inputs at or above a per-arch threshold
161/// (128 bytes on x86-64, 64 on aarch64) are delegated to
162/// [`simdutf8::basic::from_utf8`](https://docs.rs/simdutf8).
163///
164/// ```
165/// assert!(smoothutf8::verify("hello, 世界! 🌍".as_bytes()));
166/// assert!(!smoothutf8::verify(&[0xC0, 0x80])); // overlong NUL
167/// ```
168#[cfg_attr(feature = "verus", verus_spec(ret =>
169 ensures ret == is_valid_utf8(b@),
170))]
171#[inline]
172#[must_use]
173pub fn verify(b: &[u8]) -> bool {
174 #[cfg(feature = "simdutf8")]
175 if b.len() >= LONG_THRESHOLD {
176 return verify_long(b, 0, b.len());
177 }
178 #[cfg(feature = "verus")]
179 proof! { assert(b@.subrange(0, b@.len() as int) =~= b@); }
180 // SAFETY: the range is `0..b.len()`, so `start <= end` and
181 // `end + 0 <= b.len()` hold trivially.
182 unsafe { verify_impl::<0>(b, 0..b.len()) }
183}
184
185/// The `simdutf8` delegation, outlined so it does not count against the
186/// public entry points' inline cost: with the call and its slice-panic
187/// plumbing in the hot body, LLVM stops inlining `verify`/`verify_with_slack`
188/// into callers, and the call boundary alone costs more than validating a
189/// short input (the same mechanism as the 0.2.1 inline-partition fix, one
190/// level up; see CHANGELOG 0.2.3 for the measurements).
191///
192/// Takes `buf` plus indices rather than a pre-built slice so the range
193/// check and its panic path live here, not in the inlined caller bodies —
194/// re-slicing at the call sites would reintroduce the regression. `#[cold]`
195/// is intentional even for long-input-heavy callers: the branch-weight bias
196/// costs at most one mispredict, amortized over a threshold's worth of SIMD
197/// validation, while short calls are the latency-sensitive ones.
198#[cfg(feature = "simdutf8")]
199#[cold]
200#[inline(never)]
201fn verify_long(buf: &[u8], start: usize, end: usize) -> bool {
202 simdutf8::basic::from_utf8(&buf[start..end]).is_ok()
203}
204
205/// Returns `Some(b as &str)` if `b` is well-formed UTF-8.
206///
207/// Single-scan: validates with [`verify`] and converts via
208/// [`core::str::from_utf8_unchecked`] on success. Drop-in replacement for
209/// `core::str::from_utf8(b).ok()`; callers needing the byte position of a
210/// validation error should use [`core::str::from_utf8`] directly.
211///
212/// ```
213/// assert_eq!(smoothutf8::from_utf8(b"abc"), Some("abc"));
214/// assert_eq!(smoothutf8::from_utf8(&[0xFF]), None);
215/// ```
216#[inline]
217#[must_use]
218pub fn from_utf8(b: &[u8]) -> Option<&str> {
219 if verify(b) {
220 // SAFETY: `verify` returned true, so `b` is valid UTF-8.
221 Some(unsafe { core::str::from_utf8_unchecked(b) })
222 } else {
223 None
224 }
225}
226
227/// Renamed to [`from_utf8`].
228#[deprecated(since = "0.2.2", note = "renamed to `from_utf8`")]
229#[inline]
230#[must_use]
231pub fn to_str(b: &[u8]) -> Option<&str> {
232 from_utf8(b)
233}
234
235/// Returns `true` if `buf[range]` is well-formed UTF-8, using the slack-buffer
236/// fast path.
237///
238/// Prefer [`SlackBuf`] for new code: it hoists this function's per-call
239/// `unsafe` precondition to a buffer-level type invariant, so per-field
240/// validation is safe.
241///
242/// This variant performs unaligned 8-byte loads that may read up to
243/// [`SLACK`] − 1 bytes past `range.end`. Those bytes are masked off and never
244/// influence the result; they need only be *readable*.
245///
246/// # Safety
247///
248/// The caller must guarantee all of the following:
249///
250/// - `range.start <= range.end`
251/// - `range.end + SLACK <= buf.len()`
252///
253/// Because `buf` is a slice, satisfying the second condition is sufficient to
254/// keep every load within the allocation that `buf` points into; the over-read
255/// stays inside `buf` and is therefore not an out-of-bounds memory access at
256/// the machine level. Violating either condition is undefined behaviour.
257///
258/// This is why the function takes the full backing buffer plus a logical
259/// `range`, rather than a pre-sliced `&buf[range]`: the slack bytes past
260/// `range.end` must remain part of the slice so that reading them is sound.
261///
262/// With `feature = "simdutf8"`, inputs at or above a per-arch threshold
263/// (128 bytes on x86-64, 64 on aarch64) are delegated to
264/// [`simdutf8::basic::from_utf8`](https://docs.rs/simdutf8) (the slack region
265/// is not used on that path).
266///
267/// # Examples
268///
269/// ```
270/// use smoothutf8::{verify_with_slack, SLACK};
271/// let mut buf = b"field-value".to_vec();
272/// let end = buf.len();
273/// buf.resize(end + SLACK, 0); // your decoder's eps-copy padding
274/// // SAFETY: `0 <= end` and `end + SLACK == buf.len()`.
275/// assert!(unsafe { verify_with_slack(&buf, 0..end) });
276/// ```
277#[cfg_attr(feature = "verus", verus_spec(ret =>
278 requires
279 range.start <= range.end,
280 range.end + SLACK <= buf@.len(),
281 ensures
282 ret == is_valid_utf8(buf@.subrange(range.start as int, range.end as int)),
283))]
284#[inline]
285#[must_use]
286pub unsafe fn verify_with_slack(buf: &[u8], range: Range<usize>) -> bool {
287 da!(range.start <= range.end);
288 da!(range.end.saturating_add(SLACK) <= buf.len());
289 #[cfg(feature = "simdutf8")]
290 if range.end - range.start >= LONG_THRESHOLD {
291 return verify_long(buf, range.start, range.end);
292 }
293 // SAFETY: `range.start <= range.end` and `range.end + SLACK <= buf.len()`
294 // are this function's own documented contract.
295 unsafe { verify_impl::<SLACK>(buf, range) }
296}
297
298// -- SlackBuf: safe wrapper over the slack-buffer invariant ------------------
299
300pub use slack_buf::SlackBuf;
301
302mod slack_buf {
303 #[cfg(feature = "verus")]
304 use super::{is_valid_utf8, verus_spec, verus_verify};
305 use super::{verify_with_slack, Range, SLACK};
306 #[cfg(feature = "verus")]
307 #[allow(unused_imports)]
308 use vstd::prelude::*;
309
310 /// A borrowed byte buffer whose final [`SLACK`] bytes are padding.
311 ///
312 /// Any read of width up to `SLACK` starting at a position `≤`
313 /// [`payload_len`](Self::payload_len) stays in bounds, so per-field
314 /// validation and fixed-width loads need no `unsafe` at the call site.
315 #[cfg_attr(feature = "verus", verus_verify)]
316 #[repr(transparent)]
317 #[derive(Clone, Copy)]
318 #[cfg_attr(not(feature = "verus"), derive(Debug))]
319 // Field private to this module: every construction goes through a
320 // constructor that establishes the `len >= SLACK` invariant.
321 pub struct SlackBuf<'a>(&'a [u8]);
322
323 // -- Verus-verified surface ---------------------------------------------
324 //
325 // `verus_spec` on inherent-impl methods must be the bare attribute name
326 // (the outer `verus_verify` proc-macro matches it literally and does not
327 // unwrap `cfg_attr` or path-qualified forms), so the verus impl is a
328 // separate `cfg`-gated block. The non-verus impl below is the runtime
329 // build; bodies are kept in sync.
330 #[cfg(feature = "verus")]
331 verus_builtin_macros::verus! {
332 impl<'a> View for SlackBuf<'a> {
333 type V = Seq<u8>;
334 closed spec fn view(&self) -> Seq<u8> { self.0@ }
335 }
336 }
337
338 #[cfg(feature = "verus")]
339 #[verus_verify]
340 #[allow(missing_docs)] // the runtime impl below carries the docs
341 impl<'a> SlackBuf<'a> {
342 #[verus_spec(ret =>
343 requires buf@.len() >= SLACK,
344 ensures ret@ == buf@,
345 )]
346 pub unsafe fn new_unchecked(buf: &'a [u8]) -> Self {
347 Self(buf)
348 }
349
350 #[verus_spec(ret =>
351 requires buf@.len() >= SLACK,
352 ensures ret@ == buf@,
353 )]
354 pub fn new_embedded_slack(buf: &'a [u8]) -> Self {
355 Self(buf)
356 }
357
358 #[verus_spec(ret =>
359 ensures ret@ == self@,
360 )]
361 pub fn as_bytes(&self) -> &'a [u8] {
362 self.0
363 }
364
365 #[verus_spec(ret =>
366 requires self@.len() >= SLACK,
367 ensures ret == self@.len() - SLACK,
368 )]
369 pub fn payload_len(&self) -> usize {
370 self.0.len() - SLACK
371 }
372
373 #[verus_spec(ret =>
374 requires
375 range.start <= range.end,
376 range.end + SLACK <= self@.len(),
377 ensures
378 ret == is_valid_utf8(self@.subrange(range.start as int, range.end as int)),
379 )]
380 pub fn verify(&self, range: Range<usize>) -> bool {
381 unsafe { verify_with_slack(self.0, range) }
382 }
383 }
384
385 // -- runtime (non-verus) surface ---------------------------------------
386
387 // `le_u32`'s SAFETY argument relies on this; pin it at compile time so a
388 // future change to `SLACK` cannot silently make the load OOB.
389 #[cfg(not(feature = "verus"))]
390 const _: () = assert!(SLACK >= 4);
391
392 #[cfg(not(feature = "verus"))]
393 impl<'a> SlackBuf<'a> {
394 /// Wraps `buf` without checking the length invariant.
395 ///
396 /// # Safety
397 /// `buf.len() >= SLACK`.
398 #[inline]
399 #[must_use]
400 pub const unsafe fn new_unchecked(buf: &'a [u8]) -> Self {
401 debug_assert!(buf.len() >= SLACK);
402 Self(buf)
403 }
404
405 /// Wraps `buf`, or returns `None` if `buf.len() < SLACK`.
406 #[inline]
407 #[must_use]
408 pub const fn new(buf: &'a [u8]) -> Option<Self> {
409 if buf.len() >= SLACK {
410 Some(Self(buf))
411 } else {
412 None
413 }
414 }
415
416 /// Wraps `buf`, treating its final [`SLACK`] bytes as padding.
417 ///
418 /// This is the alloc-free constructor for callers that have already
419 /// padded the buffer themselves. With the `alloc` feature,
420 #[cfg_attr(feature = "alloc", doc = " [`new_add_slack`](Self::new_add_slack)")]
421 #[cfg_attr(not(feature = "alloc"), doc = " `new_add_slack`")]
422 /// appends the padding for you.
423 ///
424 /// # Panics
425 /// If `buf.len() < SLACK`.
426 #[inline]
427 #[must_use]
428 #[track_caller]
429 pub const fn new_embedded_slack(buf: &'a [u8]) -> Self {
430 assert!(buf.len() >= SLACK, "buf must carry SLACK trailing bytes");
431 Self(buf)
432 }
433
434 /// Appends [`SLACK`] zero bytes to `v` and wraps the result.
435 ///
436 /// `v` is borrowed for `'a`: it cannot be grown or otherwise mutated
437 /// while the returned `SlackBuf` is alive, and on return its length is
438 /// `original_len + SLACK`. May reallocate; reserve `SLACK` extra
439 /// capacity before filling `v` if that matters.
440 #[cfg(feature = "alloc")]
441 #[inline]
442 #[must_use]
443 pub fn new_add_slack(v: &'a mut alloc::vec::Vec<u8>) -> Self {
444 v.extend_from_slice(&[0u8; SLACK]);
445 Self(v.as_slice())
446 }
447
448 /// The full backing slice, including the trailing slack bytes.
449 #[inline]
450 #[must_use]
451 pub const fn as_bytes(&self) -> &'a [u8] {
452 self.0
453 }
454
455 /// `as_bytes().len() - SLACK`: the largest valid `range.end` for
456 /// [`verify`](Self::verify) and the largest valid `at` for the
457 /// fixed-width loads.
458 #[inline]
459 #[must_use]
460 pub const fn payload_len(&self) -> usize {
461 self.0.len() - SLACK
462 }
463
464 /// Returns `true` if `self.as_bytes()[range]` is well-formed UTF-8,
465 /// using the slack-buffer fast path.
466 ///
467 /// # Panics
468 /// If `range.start > range.end` or `range.end > self.payload_len()`.
469 #[inline]
470 #[must_use]
471 pub fn verify(&self, range: Range<usize>) -> bool {
472 // One combined assert (not two) so there is a single panic call
473 // site and the function stays prologue-free for the tail call.
474 assert!(range.start <= range.end && range.end <= self.payload_len());
475 // SAFETY: the assert establishes both of `verify_with_slack`'s
476 // preconditions (`start <= end` and `end + SLACK <= len`, given the
477 // type invariant `len >= SLACK`).
478 unsafe { verify_with_slack(self.0, range) }
479 }
480
481 /// Returns `Some(self.as_bytes()[range] as &str)` if the range is
482 /// well-formed UTF-8. Single-scan; the returned slice borrows the
483 /// backing buffer for `'a`.
484 ///
485 /// # Panics
486 /// Same conditions as [`verify`](Self::verify).
487 #[inline]
488 #[must_use]
489 pub fn to_str(&self, range: Range<usize>) -> Option<&'a str> {
490 if self.verify(range.clone()) {
491 // SAFETY: `verify`'s asserts established
492 // `range.start <= range.end <= len - SLACK <= len`, so
493 // `get_unchecked(range)` is in-bounds; `verify` returned true,
494 // so the bytes are valid UTF-8.
495 Some(unsafe { core::str::from_utf8_unchecked(self.0.get_unchecked(range)) })
496 } else {
497 None
498 }
499 }
500
501 /// Loads 4 bytes at `at..at+4` as a little-endian `u32`. The slack
502 /// invariant (`SLACK >= 4`) makes this a single unaligned load with no
503 /// tail handling.
504 ///
505 /// # Panics
506 /// If `at > self.payload_len()`.
507 #[inline]
508 #[must_use]
509 pub fn le_u32(&self, at: usize) -> u32 {
510 assert!(at <= self.payload_len());
511 // SAFETY: `at <= len - SLACK` and `SLACK >= 4` (const-asserted
512 // above), so `at + 4 <= len`; `as_ptr()` carries provenance over
513 // the full slice and `read_unaligned` avoids alignment UB.
514 u32::from_le(unsafe { self.0.as_ptr().add(at).cast::<u32>().read_unaligned() })
515 }
516 }
517}
518
519// -- leaf loads (Verus-trusted, RefinedRust-verified) ------------------------
520
521/// Load 8 bytes at `buf[at..at+8]` as a little-endian `u64`.
522///
523/// # Safety
524/// `at + 8 <= buf.len()`.
525#[cfg_attr(feature = "verus", verus_verify(external_body))]
526#[cfg_attr(feature = "verus", verus_spec(ret =>
527 requires at + 8 <= buf@.len(),
528 ensures ret == pack64(buf@, at as int),
529))]
530#[inline(always)]
531unsafe fn load64(buf: &[u8], at: usize) -> u64 {
532 da!(at + 8 <= buf.len());
533 // SAFETY: `at + 8 <= buf.len()`, so `buf.as_ptr()` is valid for `at + 8`
534 // bytes; this is exactly `load64_raw`'s precondition.
535 unsafe { raw::load64_raw(buf.as_ptr(), at) }
536}
537
538/// Load 4 bytes at `buf[at..at+4]` as a little-endian `u32`.
539///
540/// # Safety
541/// `at + 4 <= buf.len()`.
542#[cfg_attr(feature = "verus", verus_verify(external_body))]
543#[cfg_attr(feature = "verus", verus_spec(ret =>
544 requires at + 4 <= buf@.len(),
545 ensures ret == pack32(buf@, at as int),
546))]
547#[inline(always)]
548unsafe fn load32(buf: &[u8], at: usize) -> u32 {
549 da!(at + 4 <= buf.len());
550 // SAFETY: `at + 4 <= buf.len()`; see `load64`.
551 unsafe { raw::load32_raw(buf.as_ptr(), at) }
552}
553
554/// Load 2 bytes at `buf[at..at+2]` as a little-endian `u16`.
555///
556/// # Safety
557/// `at + 2 <= buf.len()`.
558#[cfg_attr(feature = "verus", verus_verify(external_body))]
559#[cfg_attr(feature = "verus", verus_spec(ret =>
560 requires at + 2 <= buf@.len(),
561 ensures ret == pack16(buf@, at as int),
562))]
563#[inline(always)]
564unsafe fn load16(buf: &[u8], at: usize) -> u16 {
565 da!(at + 2 <= buf.len());
566 // SAFETY: `at + 2 <= buf.len()`; see `load64`.
567 unsafe { raw::load16_raw(buf.as_ptr(), at) }
568}
569
570/// Load 1 byte at `buf[at]`.
571///
572/// # Safety
573/// `at < buf.len()`.
574#[cfg_attr(feature = "verus", verus_verify(external_body))]
575#[cfg_attr(feature = "verus", verus_spec(ret =>
576 requires at < buf@.len(),
577 ensures ret == buf@[at as int],
578))]
579#[inline(always)]
580unsafe fn load8(buf: &[u8], at: usize) -> u8 {
581 da!(at < buf.len());
582 // SAFETY: `at < buf.len()`; see `load64`.
583 unsafe { raw::load8_raw(buf.as_ptr(), at) }
584}
585
586// -- implementation ----------------------------------------------------------
587
588/// Ranges shorter than 8 bytes: no `ascii_skip`, no word loop.
589///
590/// With slack (`PAD >= 8`) the whole range is covered by one unconditional
591/// 8-byte load whose bytes past `end` are masked off. Without slack, a pair
592/// of overlapping in-bounds sub-word loads covers the range; OR-ing them is
593/// sound for the sign-bit test because duplicated bytes contribute the same
594/// sign bits twice. Neither shape stores to the stack — the variable-length
595/// zero-padded copy this replaces compiled to a libc `memcpy` call plus a
596/// store-to-load-forwarding stall, which dominated short-input latency.
597///
598/// # Safety
599/// `start <= end`, `end - start < 8`, and `end + PAD <= buf.len()`.
600/// (Machine-checked as `requires` under `--features verus`.)
601#[cfg_attr(feature = "verus", verus_spec(ret =>
602 requires
603 start <= end,
604 end - start < 8,
605 end + PAD <= buf@.len(),
606 ensures
607 ret == is_valid_utf8(buf@.subrange(start as int, end as int)),
608))]
609#[inline(always)]
610unsafe fn verify_short<const PAD: usize>(buf: &[u8], start: usize, end: usize) -> bool {
611 da!(start <= end && end - start < 8 && end.saturating_add(PAD) <= buf.len());
612 if PAD >= 8 {
613 if start == end {
614 #[cfg(feature = "verus")]
615 proof! { assert(buf@.subrange(start as int, end as int).len() == 0); }
616 return true;
617 }
618 // 1..=7, so the mask shift below is in `8..=56` — never 0 or 64.
619 let left = end - start;
620 // SAFETY: `start < end` and `end + PAD <= buf.len()` with `PAD >= 8`
621 // give `start + 8 < end + PAD <= buf.len()`; the bytes read past
622 // `end` land in the slack and are masked off below.
623 let bytes = unsafe { load64(buf, start) };
624 let mask = SIGN_BITS >> ((8 - left) * 8);
625 if bytes & mask != 0 {
626 return verify_multibyte(buf, start, end);
627 }
628 #[cfg(feature = "verus")]
629 proof! {
630 assert forall |j: int| 0 <= j < left
631 implies #[trigger] byte64(bytes, j) == buf@[start + j] by {
632 lemma_pack64_byte(buf@, start as int, j);
633 };
634 assert(mask == sign_mask(left as int));
635 lemma_mask_zero_ascii(buf@, start as int, left as int, bytes);
636 lemma_ascii_valid(buf@, start as int, end as int);
637 }
638 return true;
639 }
640 if end - start >= 4 {
641 // SAFETY: `end + PAD <= buf.len()` gives `end <= buf.len()`, and
642 // `start + 4 <= end` in this branch, so both loads stay inside
643 // `[start, end)`; they overlap when `end - start < 8`, which the
644 // sign-bit OR absorbs.
645 let lo = unsafe { load32(buf, start) };
646 let hi = unsafe { load32(buf, end - 4) };
647 if (lo | hi) & 0x8080_8080 != 0 {
648 return verify_multibyte(buf, start, end);
649 }
650 #[cfg(feature = "verus")]
651 proof! {
652 assert((lo | hi) & 0x8080_8080u32 == 0
653 ==> lo & 0x8080_8080u32 == 0 && hi & 0x8080_8080u32 == 0) by (bit_vector);
654 lemma_signbits4(buf@, start as int);
655 lemma_signbits4(buf@, end as int - 4);
656 assert(all_ascii(buf@, start as int, end as int));
657 lemma_ascii_valid(buf@, start as int, end as int);
658 }
659 return true;
660 }
661 if end - start >= 2 {
662 // SAFETY: `end <= buf.len()` as above, and `start + 2 <= end` in
663 // this branch, so both loads stay inside `[start, end)`.
664 let lo = unsafe { load16(buf, start) };
665 let hi = unsafe { load16(buf, end - 2) };
666 if (lo | hi) & 0x8080 != 0 {
667 return verify_multibyte(buf, start, end);
668 }
669 #[cfg(feature = "verus")]
670 proof! {
671 assert((lo | hi) & 0x8080u16 == 0
672 ==> lo & 0x8080u16 == 0 && hi & 0x8080u16 == 0) by (bit_vector);
673 lemma_signbits2(buf@, start as int);
674 lemma_signbits2(buf@, end as int - 2);
675 assert(all_ascii(buf@, start as int, end as int));
676 lemma_ascii_valid(buf@, start as int, end as int);
677 }
678 return true;
679 }
680 if start < end {
681 // end - start == 1
682 // SAFETY: `start < end <= buf.len()`.
683 let b = unsafe { load8(buf, start) };
684 if b >= 0x80 {
685 return verify_multibyte(buf, start, end);
686 }
687 #[cfg(feature = "verus")]
688 proof! {
689 assert(all_ascii(buf@, start as int, end as int));
690 lemma_ascii_valid(buf@, start as int, end as int);
691 }
692 return true;
693 }
694 #[cfg(feature = "verus")]
695 proof! { assert(buf@.subrange(start as int, end as int).len() == 0); }
696 true
697}
698
699/// # Safety
700/// `range.start <= range.end` and `range.end + PAD <= buf.len()`.
701/// (Machine-checked as `requires` under `--features verus`.)
702#[cfg_attr(feature = "verus", verus_spec(ret =>
703 requires
704 range.start <= range.end,
705 range.end + PAD <= buf@.len(),
706 ensures
707 ret == is_valid_utf8(buf@.subrange(range.start as int, range.end as int)),
708))]
709// `inline(always)`: with two call sites in a binary (e.g. a caller using both
710// `verify_with_slack` and `SlackBuf::verify`), plain `#[inline]` lets LLVM
711// out-line this — and the call boundary then dominates the work on short
712// inputs (~2 ns of prologue vs ~1.5 ns of actual validation at 8 B).
713#[inline(always)]
714unsafe fn verify_impl<const PAD: usize>(buf: &[u8], range: Range<usize>) -> bool {
715 let start = range.start;
716 let end = range.end;
717 da!(start <= end && end.saturating_add(PAD) <= buf.len());
718 if end - start < 8 {
719 // SAFETY: `end - start < 8` was just checked; `start <= end` and
720 // `end + PAD <= buf.len()` are this function's own contract.
721 return unsafe { verify_short::<PAD>(buf, start, end) };
722 }
723 let mut p = start;
724
725 // ---- ASCII fast path: full STEP-byte blocks ---------------------------
726 // Portable SWAR (16 B/iter) by default; AVX2 or NEON (32 B/iter) when
727 // available. Returns at the first non-ASCII block or with `< STEP` left.
728 p = ascii_skip::skip(buf, p, end);
729 if end - p >= ascii_skip::STEP {
730 #[cfg(feature = "verus")]
731 proof! { lemma_ascii_prefix_iff(buf@, start as int, p as int, end as int); }
732 return verify_multibyte(buf, p, end);
733 }
734
735 // ---- ASCII fast path: remaining full 8-byte words ---------------------
736 // At most one iteration when `STEP == 16`; up to three when `STEP == 32`.
737 // (A 16-byte-pair variant of this loop was tried and regressed Neoverse-V2
738 // by 10-40% at 8-128 B — the extra tail branches cost more than the saved
739 // test on a path the 8-byte loop already runs at one cycle per word.)
740 #[cfg_attr(feature = "verus", verus_spec(
741 invariant
742 start == range.start, end == range.end,
743 start <= p, p <= end, end + PAD <= buf@.len(),
744 all_ascii(buf@, start as int, p as int),
745 decreases end - p
746 ))]
747 while end - p >= 8 {
748 // SAFETY: `p + 8 <= end <= buf.len()` (the latter from
749 // `end + PAD <= buf.len()`, `PAD >= 0`).
750 let bytes = unsafe { load64(buf, p) };
751 if bytes & SIGN_BITS != 0 {
752 #[cfg(feature = "verus")]
753 proof! { lemma_ascii_prefix_iff(buf@, start as int, p as int, end as int); }
754 return verify_multibyte(buf, p, end);
755 }
756 #[cfg(feature = "verus")]
757 proof! {
758 lemma_signbits8(buf@, p as int);
759 lemma_ascii_extend(buf@, start as int, p as int, p as int + 8);
760 }
761 p += 8;
762 }
763
764 // ---- ASCII fast path: 1..=7 trailing bytes via the last-8-byte window -
765 if p < end {
766 // SAFETY: `end >= start + 8 >= 8` and `end <= buf.len()` (from
767 // `end + PAD <= buf.len()`, `PAD >= 0`).
768 let bytes = unsafe { load64(buf, end - 8) };
769 if bytes & SIGN_BITS != 0 {
770 #[cfg(feature = "verus")]
771 proof! { lemma_ascii_prefix_iff(buf@, start as int, p as int, end as int); }
772 return verify_multibyte(buf, p, end);
773 }
774 #[cfg(feature = "verus")]
775 proof! {
776 lemma_signbits8(buf@, end as int - 8);
777 // `[p, end) ⊆ [end-8, end)` since `end - p <= 8`.
778 assert(all_ascii(buf@, p as int, end as int));
779 lemma_ascii_extend(buf@, start as int, p as int, end as int);
780 lemma_ascii_valid(buf@, start as int, end as int);
781 }
782 } else {
783 #[cfg(feature = "verus")]
784 proof! { lemma_ascii_valid(buf@, start as int, end as int); }
785 }
786
787 true
788}
789
790// -- Höhrmann-style UTF-8 DFA -----------------------------------------------
791//
792// The shift-DFA's per-byte step is `(ROW[byte] >> state) & 63` — a 64-bit
793// variable shift. That's one instruction on 64-bit targets but 7-13 on 32-bit
794// (Cortex-M, i686), where it ends up slower than the standard library's
795// branchy validator. On non-64-bit targets `verify_multibyte` delegates to
796// `core::str::from_utf8` instead, and the tables below are dead `const`s
797// (no runtime footprint). The ASCII fast path above is kept regardless.
798//
799// `target_pointer_width` is a proxy for "has native u64 ops". wasm32 is opted
800// in explicitly: it has native `i64.shr_u`, so the DFA runs at full speed even
801// though pointers are 32-bit.
802
803#[cfg(not(any(target_pointer_width = "64", target_arch = "wasm32")))]
804fn verify_multibyte(buf: &[u8], start: usize, end: usize) -> bool {
805 core::str::from_utf8(&buf[start..end]).is_ok()
806}
807
808// Byte → class (12 classes) and state×class → next-state tables. State values
809// are pre-multiplied by 12 so the transition is a single add+load. Tables are
810// the canonical Höhrmann set (also used by `bstr`); see
811// <https://bjoern.hoehrmann.de/utf-8/decoder/dfa/>.
812//
813// `TRANS` is padded from 108 to 256 entries (with REJECT) so that indexing by
814// a `u8` sum lets the compiler elide the bounds check.
815
816#[allow(dead_code)] // documentation for `TRANS`; the hot path uses `DFA_ACCEPT`
817const ACCEPT: u8 = 12;
818#[allow(dead_code)]
819const REJECT: u8 = 0;
820
821#[rustfmt::skip]
822const CLASS: [u8; 256] = [
823 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
824 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
825 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
826 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
827 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
828 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
829 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
830 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
831];
832
833#[rustfmt::skip]
834const TRANS: [u8; 256] = [
835 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
836 12, 0,24,36,60,96,84, 0, 0, 0,48,72,
837 0,12, 0, 0, 0, 0, 0,12, 0,12, 0, 0,
838 0,24, 0, 0, 0, 0, 0,24, 0,24, 0, 0,
839 0, 0, 0, 0, 0, 0, 0,24, 0, 0, 0, 0,
840 0,24, 0, 0, 0, 0, 0, 0, 0,24, 0, 0,
841 0, 0, 0, 0, 0, 0, 0,36, 0,36, 0, 0,
842 0,36, 0, 0, 0, 0, 0,36, 0,36, 0, 0,
843 0,36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
844 // padding (REJECT) so that a u8 index elides the bounds check
845 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
846 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
847 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
848 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
849 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
850];
851
852/// Shift-based DFA row table: `ROW[byte]` packs the next state for *every*
853/// current state into one `u64`, at 6-bit stride. The transition is then
854/// `state' = (ROW[byte] >> state) & 63` — the only load depends on `byte`
855/// (off the critical path), and the state→state' chain is one shift + one
856/// mask. This is the Vognsen/Langdale encoding of the Höhrmann automaton.
857///
858/// State values are `index * 6` (so they double as shift amounts):
859/// REJECT = 0, ACCEPT = 6, intermediates 12..=48. Nine states × 6 bits = 54
860/// bits used per row.
861const fn build_rows() -> [u64; 256] {
862 let mut row = [0u64; 256];
863 let mut b = 0usize;
864 while b < 256 {
865 let class = CLASS[b] as usize;
866 let mut s = 0usize;
867 while s < 9 {
868 // `TRANS` next-state in the {0,12,24,..,96} encoding → index → ×6.
869 let next = (TRANS[s * 12 + class] / 12) as u64 * 6;
870 row[b] |= next << (s * 6);
871 s += 1;
872 }
873 b += 1;
874 }
875 row
876}
877const ROW: [u64; 256] = build_rows();
878
879/// Shift-DFA state values (`index * 6`, so each is its own shift amount).
880/// Names follow the Unicode §3.9 Table 3-7 partial-match position each
881/// represents; the proof's per-state-meaning lemmas reference these.
882mod state {
883 /// Absorbing reject.
884 pub const REJECT: u64 = 0;
885 /// At a sequence boundary: input so far is well-formed.
886 pub const ACCEPT: u64 = 6;
887 /// One continuation byte (`80..=BF`) remaining.
888 pub const C1: u64 = 12;
889 /// Two continuation bytes remaining.
890 pub const C2: u64 = 18;
891 /// After `E0`: next byte must be `A0..=BF`, then one continuation.
892 pub const E0: u64 = 24;
893 /// After `ED`: next byte must be `80..=9F`, then one continuation.
894 pub const ED: u64 = 30;
895 /// After `F0`: next byte must be `90..=BF`, then two continuations.
896 pub const F0: u64 = 36;
897 /// Three continuation bytes remaining (after `F1..=F3`).
898 pub const C3: u64 = 42;
899 /// After `F4`: next byte must be `80..=8F`, then two continuations.
900 pub const F4: u64 = 48;
901}
902#[cfg_attr(feature = "verus", verus_verify)]
903const DFA_ACCEPT: u64 = 6;
904#[cfg_attr(feature = "verus", verus_verify)]
905const DFA_REJECT: u64 = 0;
906const _: [(); 0] = [(); (DFA_ACCEPT != state::ACCEPT) as usize];
907const _: [(); 0] = [(); (DFA_REJECT != state::REJECT) as usize];
908const _: [(); 0] = [(); (state::F4 + 6 > 64) as usize]; // 9 states fit in u64
909
910// Compile-time check that the shift encoding agrees with `TRANS`/`CLASS` at
911// the row that drives state assignment (ACCEPT × every lead-byte class). The
912// full 2304-cell equivalence is the Verus `by(compute)` obligation.
913macro_rules! row_check {
914 ($($byte:literal -> $state:path,)*) => {
915 $(const _: [(); 0] = [(); ((ROW[$byte] >> state::ACCEPT) & 63 != $state) as usize];)*
916 };
917}
918row_check! {
919 0x41 -> state::ACCEPT, 0x80 -> state::REJECT, 0xC1 -> state::REJECT,
920 0xC2 -> state::C1, 0xE0 -> state::E0, 0xE1 -> state::C2,
921 0xED -> state::ED, 0xEE -> state::C2, 0xF0 -> state::F0,
922 0xF1 -> state::C3, 0xF4 -> state::F4, 0xF5 -> state::REJECT,
923}
924
925/// `ROW[byte]`. The Verus spec is [`spec_row`]; the compile-time
926/// `_CHECK_SPEC_ROW` assertion below validates `ROW[b]` against a literal
927/// transcription of `spec_row` for all 256 bytes, so the residual trusted
928/// step is the visually-auditable match between that literal and
929/// `spec::spec_row` (a `spec fn`, not callable from const-eval).
930#[cfg_attr(feature = "verus", verus_verify(external_body))]
931#[cfg_attr(feature = "verus", verus_spec(ret =>
932 ensures ret == spec_row(byte),
933))]
934#[inline(always)]
935#[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))]
936const fn row(byte: u8) -> u64 {
937 ROW[byte as usize]
938}
939
940/// Exhaustive const-eval check that `row`'s body agrees with its Verus spec.
941#[allow(clippy::unreadable_literal, clippy::cast_possible_truncation)]
942const _CHECK_SPEC_ROW: () = {
943 let mut b = 0usize;
944 while b < 256 {
945 let bb = b as u8;
946 // Literal transcription of `spec::spec_row`.
947 let spec: u64 = if bb <= 0x7F {
948 0x0000000000000180
949 } else if bb <= 0x8F {
950 0x0012480300306000
951 } else if bb <= 0x9F {
952 0x0000492300306000
953 } else if bb <= 0xBF {
954 0x000049200C306000
955 } else if bb <= 0xC1 {
956 0
957 } else if bb <= 0xDF {
958 0x0000000000000300
959 } else if bb == 0xE0 {
960 0x0000000000000600
961 } else if bb <= 0xEC {
962 0x0000000000000480
963 } else if bb == 0xED {
964 0x0000000000000780
965 } else if bb <= 0xEF {
966 0x0000000000000480
967 } else if bb == 0xF0 {
968 0x0000000000000900
969 } else if bb <= 0xF3 {
970 0x0000000000000A80
971 } else if bb == 0xF4 {
972 0x0000000000000C00
973 } else {
974 0
975 };
976 assert!(ROW[b] == spec);
977 b += 1;
978 }
979};
980
981/// One shift-DFA step. Critical-path latency: shr + and (≈2 cyc); the
982/// `ROW[byte]` load depends only on the input byte.
983#[cfg_attr(feature = "verus", verus_verify)]
984#[cfg_attr(feature = "verus", verus_spec(ret =>
985 requires is_state(state),
986 ensures ret == spec_step(state, byte), is_state(ret),
987))]
988#[inline(always)]
989#[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))]
990const fn step(state: u64, byte: u8) -> u64 {
991 #[cfg(feature = "verus")]
992 proof! { lemma_row_step(state, byte); }
993 (row(byte) >> (state & 63)) & 63
994}
995
996/// Slow path: at least one byte at or after `start` has its high bit set.
997///
998/// Functional contract: returns `is_valid_utf8(buf[start..end])`.
999#[cfg_attr(feature = "verus", verus_verify)]
1000#[cfg_attr(feature = "verus", verus_spec(ret =>
1001 requires
1002 start < end,
1003 end <= buf@.len(),
1004 ensures
1005 ret == is_valid_utf8(buf@.subrange(start as int, end as int)),
1006))]
1007#[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))]
1008// Not `#[inline]`: this is the cold path (reached only when the ASCII fast
1009// path finds a high bit). With `verify_impl` `inline(always)`, leaving this
1010// out-of-line keeps the per-call-site inlined body to ~40 instructions
1011// instead of ~250.
1012#[allow(clippy::cast_possible_truncation)] // `(w >> 8k) as u8` is byte extraction
1013#[allow(clippy::too_many_lines)] // proof annotations
1014fn verify_multibyte(buf: &[u8], start: usize, end: usize) -> bool {
1015 let mut state = DFA_ACCEPT;
1016 let mut p = start;
1017 #[cfg(feature = "verus")]
1018 proof! { assert(buf@.subrange(start as int, start as int).len() == 0); }
1019
1020 // Full 8-byte chunks: one unaligned load, eight unrolled DFA steps.
1021 #[cfg_attr(feature = "verus", verus_spec(
1022 invariant
1023 start <= p, p <= end, end <= buf@.len(),
1024 is_state(state),
1025 state == run(ST_ACCEPT, buf@.subrange(start as int, p as int)),
1026 decreases end - p
1027 ))]
1028 while end - p >= 8 {
1029 // SAFETY: `p + 8 <= end <= buf.len()` (this function's contract).
1030 let w = unsafe { load64(buf, p) };
1031 // ASCII re-skip: if we are between codepoints and all eight bytes are
1032 // ASCII, the DFA would walk ACCEPT→ACCEPT eight times. One masked
1033 // compare per chunk; always-false on dense multibyte (predictable).
1034 if state == DFA_ACCEPT && w & SIGN_BITS == 0 {
1035 #[cfg(feature = "verus")]
1036 proof! {
1037 lemma_signbits8(buf@, p as int);
1038 lemma_ascii_valid(buf@, p as int, p as int + 8);
1039 lemma_run_valid(buf@.subrange(p as int, p as int + 8));
1040 lemma_run_join(ST_ACCEPT, buf@, start as int, p as int, p as int + 8);
1041 }
1042 p += 8;
1043 continue;
1044 }
1045 #[cfg(feature = "verus")]
1046 proof! {
1047 assert(w >> 0u64 == w) by (bit_vector);
1048 lemma_chunk_snoc(buf@, start as int, p as int, w, 0, state);
1049 }
1050 state = step(state, w as u8);
1051 #[cfg(feature = "verus")]
1052 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 1, state); }
1053 state = step(state, (w >> 8) as u8);
1054 #[cfg(feature = "verus")]
1055 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 2, state); }
1056 state = step(state, (w >> 16) as u8);
1057 #[cfg(feature = "verus")]
1058 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 3, state); }
1059 state = step(state, (w >> 24) as u8);
1060 #[cfg(feature = "verus")]
1061 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 4, state); }
1062 state = step(state, (w >> 32) as u8);
1063 #[cfg(feature = "verus")]
1064 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 5, state); }
1065 state = step(state, (w >> 40) as u8);
1066 #[cfg(feature = "verus")]
1067 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 6, state); }
1068 state = step(state, (w >> 48) as u8);
1069 #[cfg(feature = "verus")]
1070 proof! { lemma_chunk_snoc(buf@, start as int, p as int, w, 7, state); }
1071 state = step(state, (w >> 56) as u8);
1072 p += 8;
1073 if state == DFA_REJECT {
1074 #[cfg(feature = "verus")]
1075 proof! {
1076 lemma_run_join(ST_ACCEPT, buf@, start as int, p as int, end as int);
1077 lemma_run_reject(buf@.subrange(p as int, end as int));
1078 lemma_run_valid(buf@.subrange(start as int, end as int));
1079 }
1080 return false;
1081 }
1082 }
1083 // Tail: 0..=7 bytes.
1084 #[cfg_attr(feature = "verus", verus_spec(
1085 invariant
1086 start <= p, p <= end, end <= buf@.len(),
1087 is_state(state),
1088 state == run(ST_ACCEPT, buf@.subrange(start as int, p as int)),
1089 decreases end - p
1090 ))]
1091 while p < end {
1092 // SAFETY: `p < end <= buf.len()` (this function's contract).
1093 state = step(state, unsafe { load8(buf, p) });
1094 #[cfg(feature = "verus")]
1095 proof! { lemma_run_snoc(ST_ACCEPT, buf@, start as int, p as int); }
1096 p += 1;
1097 }
1098 #[cfg(feature = "verus")]
1099 proof! { lemma_run_valid(buf@.subrange(start as int, end as int)); }
1100 state == DFA_ACCEPT
1101}
1102
1103// -- tests -------------------------------------------------------------------
1104
1105#[cfg(test)]
1106mod tests;