forbidden-strings 0.1.8

Out-of-band scanner for forbidden literal strings and regex patterns. Gitignore-aware, fast, dependency-light: built for CI deny-listing of leaked credentials and banned tokens.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
// What:     Unit tests for `super::atom::walk_literal_bytes`. Lives
//           in a sibling module so it can reach the `pub(super)`
//           function via `super::atom::...` (sibling modules under
//           the same parent share that visibility scope).
// Why:      Compile-time gated by `#[cfg(test)]` in the parent module
//           (`rules.rs`), so this file contributes nothing to the
//           release binary. Keeping tests in a separate file (rather
//           than inline `mod tests` in `atom.rs`) preserves the
//           production file's focus.
// TS map:   `import { walkLiteralBytes } from "./atom"; describe(...)`
//           in a `*.test.ts` file with Vitest/Jest.
//
// In TS you'd write (pseudocode):
// ```ts
// import { walkLiteralBytes } from "./atom";
// describe("walkLiteralBytes", () => { ... });
// ```

// What:     `use super::atom::walk_literal_bytes;` brings the
//           function under test into scope. `super` refers to the
//           parent module (`crate::rules`); `atom` is its sibling
//           submodule.
// Why:      Avoid writing the full path at every call site.
// TS map:   `import { walkLiteralBytes } from "./atom";`.
//
// In TS you'd write (pseudocode):
// ```ts
// import { walkLiteralBytes } from "./atom";
// ```
use super::atom::walk_literal_bytes;

// What:     `struct Case { ... }` is a record type with four owned
//           fields:
//           - `input: &'static str`. A borrowed slice of bytes baked
//             into the binary at compile time. Sibling: `String`,
//             which would be heap-allocated and owned. We use
//             `&'static str` because all our test cases are literals.
//           - `expected_out: &'static str`. Same.
//           - `expected_remainder: &'static str`. Same.
//           - `expected_out_bytes: &'static [u8]`. A borrowed slice
//             of bytes (NOT a `Vec<u8>` which would be owned/heap).
//             We use this to assert the exact UTF-8 byte sequence
//             of `out`, catching any mojibake regression that a
//             string-equality check might miss.
// Why:      Group the four fixture values per case so the table
//           below stays one-row-per-case. Naming over a tuple
//           because four positional fields would be illegible.
// TS map:   `type Case = { input: string; expectedOut: string;
//           expectedRemainder: string; expectedOutBytes: Uint8Array; };`.
//
// In TS you'd write (pseudocode):
// ```ts
// type Case = { input: string; expectedOut: string;
//   expectedRemainder: string; expectedOutBytes: Uint8Array; };
// ```
struct Case {
    input: &'static str,
    expected_out: &'static str,
    expected_remainder: &'static str,
    expected_out_bytes: &'static [u8],
}

// What:     `fn run_case(case: &Case)` runs one test case. Takes a
//           shared (read-only) borrow of the `Case`; we only read
//           from it.
// Why:      Factor out the arrange-act-assert boilerplate so each
//           `#[test]` function is one line.
// TS map:   `function runCase(case: Case): void { ... }`.
//
// In TS you'd write (pseudocode):
// ```ts
// function runCase(c: Case): void { ... }
// ```
fn run_case(case: &Case) {
    // What:     `let mut out = String::new();`. `String::new()` is
    //           the zero-arg constructor for `String` -- a
    //           heap-allocated growable owned UTF-8 buffer (siblings:
    //           `&str` borrowed slice; `Vec<u8>` raw byte buffer
    //           without UTF-8 invariant). `mut` because
    //           `walk_literal_bytes` will push into it.
    // Why:      Output sink for the walker.
    // TS map:   `let out = "";`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // let out = "";
    // ```
    let mut out = String::new();
    // What:     `let mut remainder = case.input;`. `case.input` is
    //           a `&'static str`; we copy that borrow (cheap, `&str`
    //           is `Copy`) into a new `mut`-able binding so we can
    //           pass `&mut remainder` to the walker. Initial value
    //           is irrelevant -- the walker overwrites it before we
    //           inspect it.
    // Why:      The walker writes the un-walked tail into this
    //           binding via the `&mut &str` out-parameter.
    // TS map:   `let remainder = "";` plus a wrapper object to hand
    //           a mutable reference to the function.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // const remainderRef = { value: "" };
    // ```
    let mut remainder = case.input;
    // What:     `walk_literal_bytes(case.input, &mut out, &mut remainder);`.
    //           Three arguments: `case.input` passed by value (cheap
    //           `&str` copy), then `&mut out` and `&mut remainder`
    //           which are mutable BORROWS -- "I am lending you
    //           write access to this binding for the duration of
    //           the call." The walker may modify both.
    // Why:      Exercise the unit under test.
    // TS map:   `walkLiteralBytes(case.input, outRef, remainderRef);`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // walkLiteralBytes(case.input, outRef, remainderRef);
    // ```
    walk_literal_bytes(case.input, &mut out, &mut remainder);
    // What:     `assert_eq!(out, case.expected_out, "...")`. Macro
    //           that panics if its first two arguments compare
    //           unequal under `PartialEq`. Optional trailing
    //           arguments are a format-string and values for the
    //           panic message. Rust auto-implements `PartialEq`
    //           between `String` and `&str`.
    // Why:      String-equality check; the format message identifies
    //           which case failed when run as part of the table.
    // TS map:   `expect(out).toBe(case.expectedOut);`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // expect(out).toBe(case.expectedOut);
    // ```
    assert_eq!(
        out, case.expected_out,
        "out mismatch for input {:?}",
        case.input
    );
    // What:     `out.as_bytes()` returns a `&[u8]` view of the
    //           string's underlying bytes WITHOUT copying. The
    //           lifetime of the returned slice is tied to `out`.
    // Why:      Byte-level equality protects against silent
    //           regressions: the bug we just fixed produced 6
    //           mojibake bytes for em-dash; a future regression
    //           that re-introduces it would fail this assertion
    //           even if some `==`-equivalent representation
    //           accidentally compared equal.
    // TS map:   `[...new TextEncoder().encode(out)]`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // expect([...new TextEncoder().encode(out)]).toEqual(
    //   [...case.expectedOutBytes],
    // );
    // ```
    assert_eq!(
        out.as_bytes(),
        case.expected_out_bytes,
        "byte mismatch for input {:?}",
        case.input
    );
    // What:     Same `assert_eq!` macro; checks the un-walked tail.
    // Why:      Confirms the walker stopped at the expected
    //           position (start of metacharacter, or end of input).
    // TS map:   `expect(remainder).toBe(case.expectedRemainder);`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // expect(remainder).toBe(case.expectedRemainder);
    // ```
    assert_eq!(
        remainder, case.expected_remainder,
        "remainder mismatch for input {:?}",
        case.input
    );
}

// What:     `#[test]` attribute marks the function as a unit test;
//           `cargo test` collects every `#[test]` and invokes it.
//           The function name shows up in the test runner output.
// Why:      Per-case `#[test]` functions (rather than one mega-test
//           that loops the whole table) so a failure pinpoints the
//           specific bug-shape that broke. The `Case` struct + `run_case`
//           helper keeps each function to one line.
// TS map:   `test("walks em-dash leading", () => { runCase(...); });`.
//
// In TS you'd write (pseudocode):
// ```ts
// test("walks em-dash leading", () => { runCase({ ... }); });
// ```
#[test]
fn walks_em_dash_leading() {
    // What:     `&Case { ... }` constructs a `Case` and immediately
    //           takes a shared reference to it (because `run_case`
    //           takes `&Case`). The reference is valid for the
    //           duration of the function call. `b"\xe2\x80\x94..."`
    //           is a byte-string literal: prefix `b` makes it
    //           `&'static [u8; N]`, with each `\xHH` being one
    //           literal byte (NOT a UTF-8 escape -- we want the
    //           three exact bytes of the em-dash, not any other
    //           interpretation).
    // Why:      Headline regression test: pattern starts with `—`
    //           (U+2014, encoded as 3 UTF-8 bytes `\xe2\x80\x94`).
    //           The pre-fix code would have emitted 6 mojibake
    //           bytes here.
    // TS map:   `runCase({ input: "—password", expectedOut: "—password",
    //           expectedRemainder: "", expectedOutBytes: new Uint8Array([0xe2, 0x80, 0x94, ...]) });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "—password", expectedOut: "—password",
    //   expectedRemainder: "", expectedOutBytes: new Uint8Array([
    //     0xe2, 0x80, 0x94, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64,
    //   ]) });
    // ```
    run_case(&Case {
        input: "—password",
        expected_out: "—password",
        expected_remainder: "",
        expected_out_bytes: b"\xe2\x80\x94password",
    });
}

#[test]
fn walks_escaped_em_dash() {
    // What:     Same `&Case { ... }` shorthand as the previous test.
    //           The input `"\\—rest"` is a Rust string literal
    //           where `\\` is one literal backslash and `—` is the
    //           em-dash (3 UTF-8 bytes); total source bytes = 5.
    //           `expected_out` is the em-dash followed by `rest`
    //           (no backslash) because the walker's escape branch
    //           treats `\X` as literal `X`.
    // Why:      Regression test for line 90's old `out.push(next as char)`
    //           where `next` was a `u8`: the byte after `\` could
    //           itself be a high byte of a multi-byte sequence,
    //           triggering the same mojibake bug.
    // TS map:   `runCase({ input: "\\—rest", expectedOut: "—rest",
    //           expectedRemainder: "", expectedOutBytes: <em-dash + "rest"> });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "\\—rest", expectedOut: "—rest",
    //   expectedRemainder: "",
    //   expectedOutBytes: new Uint8Array([0xe2, 0x80, 0x94, 0x72, 0x65, 0x73, 0x74]) });
    // ```
    run_case(&Case {
        input: "\\—rest",
        expected_out: "—rest",
        expected_remainder: "",
        expected_out_bytes: b"\xe2\x80\x94rest",
    });
}

#[test]
fn walks_pure_ascii_regression() {
    // What:     Plain-ASCII case to confirm the rewrite did not
    //           regress the common path. `b"hello world"` is the
    //           11 ASCII bytes of the literal.
    // Why:      The fix changes the inner loop substantially
    //           (chars iteration instead of byte indexing); a
    //           plain-ASCII regression would be the first thing
    //           we'd want to know about.
    // TS map:   `runCase({ input: "hello world", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "hello world", expectedOut: "hello world",
    //   expectedRemainder: "", expectedOutBytes: new Uint8Array(11) /* ascii */ });
    // ```
    run_case(&Case {
        input: "hello world",
        expected_out: "hello world",
        expected_remainder: "",
        expected_out_bytes: b"hello world",
    });
}

#[test]
fn walks_pipe_breaks_at_alternation() {
    // What:     Input `"foo|bar"` -- the walker should consume `foo`
    //           and stop at the `|`, leaving `|bar` as the
    //           remainder.
    // Why:      Confirm the alternation-break behaviour survives
    //           the rewrite. This is critical for soundness:
    //           without it, `extract_required_prefix` would extract
    //           `foo` from `foo|bar` and AC-gate on it, missing
    //           files that contain only `bar`.
    // TS map:   `runCase({ input: "foo|bar", expectedOut: "foo",
    //           expectedRemainder: "|bar", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "foo|bar", expectedOut: "foo",
    //   expectedRemainder: "|bar", expectedOutBytes: new Uint8Array([0x66, 0x6f, 0x6f]) });
    // ```
    run_case(&Case {
        input: "foo|bar",
        expected_out: "foo",
        expected_remainder: "|bar",
        expected_out_bytes: b"foo",
    });
}

#[test]
fn walks_metacharacter_breaks() {
    // What:     Input `"foo.*bar"` -- the walker should stop at the
    //           regex metacharacter `.`, leaving `.*bar` as the
    //           remainder.
    // Why:      Confirm metacharacter detection still works after
    //           switching from byte-literals (`b'.'`) to char-literals
    //           (`'.'`).
    // TS map:   `runCase({ input: "foo.*bar", expectedOut: "foo",
    //           expectedRemainder: ".*bar", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "foo.*bar", expectedOut: "foo",
    //   expectedRemainder: ".*bar", expectedOutBytes: new Uint8Array([0x66, 0x6f, 0x6f]) });
    // ```
    run_case(&Case {
        input: "foo.*bar",
        expected_out: "foo",
        expected_remainder: ".*bar",
        expected_out_bytes: b"foo",
    });
}

#[test]
fn walks_escape_underscore_regression() {
    // What:     Input `"\\_foo"` -- the walker's escape branch
    //           should treat `\_` as literal `_` and produce
    //           `_foo`.
    // Why:      The pre-existing `\_` extraction (referenced in
    //           the function's `Why` comment) is a known important
    //           case for betterleaks-shape rules. Make sure the
    //           rewrite didn't break it.
    // TS map:   `runCase({ input: "\\_foo", expectedOut: "_foo",
    //           expectedRemainder: "", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "\\_foo", expectedOut: "_foo",
    //   expectedRemainder: "", expectedOutBytes: new Uint8Array([0x5f, 0x66, 0x6f, 0x6f]) });
    // ```
    run_case(&Case {
        input: "\\_foo",
        expected_out: "_foo",
        expected_remainder: "",
        expected_out_bytes: b"_foo",
    });
}

#[test]
fn walks_alphanumeric_escape_breaks() {
    // What:     Input `"foo\\dbar"` -- after `foo` the walker
    //           encounters `\d`, an ASCII alphanumeric escape,
    //           which ends the walk. `out` is `foo`, remainder is
    //           `\dbar`.
    // Why:      Confirm the alphanumeric-escape break still
    //           works (pre-fix it broke on `next.is_ascii_alphanumeric()`
    //           where `next` was `u8`; post-fix it breaks on
    //           `char::is_ascii_alphanumeric`, same behaviour).
    // TS map:   `runCase({ input: "foo\\dbar", expectedOut: "foo",
    //           expectedRemainder: "\\dbar", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "foo\\dbar", expectedOut: "foo",
    //   expectedRemainder: "\\dbar", expectedOutBytes: new Uint8Array([0x66, 0x6f, 0x6f]) });
    // ```
    run_case(&Case {
        input: "foo\\dbar",
        expected_out: "foo",
        expected_remainder: "\\dbar",
        expected_out_bytes: b"foo",
    });
}

#[test]
fn walks_em_dash_then_metacharacter() {
    // What:     Input `"—.*"` -- em-dash followed by metacharacter
    //           `.`. The walker should consume the em-dash and
    //           stop at `.`. Remainder is `.*`.
    // Why:      Cross-cutting case: confirms the new char-iteration
    //           correctly advances `tail` past the multi-byte
    //           em-dash before evaluating the next char as a
    //           potential metacharacter. A naive `tail = &tail[1..]`
    //           after `—` would slice mid-character and panic.
    // TS map:   `runCase({ input: "—.*", expectedOut: "—",
    //           expectedRemainder: ".*", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "—.*", expectedOut: "—",
    //   expectedRemainder: ".*", expectedOutBytes: new Uint8Array([0xe2, 0x80, 0x94]) });
    // ```
    run_case(&Case {
        input: "—.*",
        expected_out: "",
        expected_remainder: ".*",
        expected_out_bytes: b"\xe2\x80\x94",
    });
}

#[test]
fn walks_two_byte_utf8_leading() {
    // What:     Input starts with `é` (U+00E9), encoded as the
    //           2 UTF-8 bytes `\xc3\xa9`. The walker should
    //           consume it and the rest as literal characters.
    // Why:      Cover the 2-byte UTF-8 path. Previously this was
    //           the EXACT BUG SHAPE: byte `0xc3` cast to `char`
    //           would have produced U+00C3 (`Ã`), and byte `0xa9`
    //           would have produced U+00A9 (`(c)`-symbol). Two
    //           wrong codepoints re-encoding to 4 mojibake bytes
    //           instead of the original 2.
    // TS map:   `runCase({ input: "écret", expectedOut: "écret", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "écret", expectedOut: "écret",
    //   expectedRemainder: "",
    //   expectedOutBytes: new Uint8Array([0xc3, 0xa9, 0x63, 0x72, 0x65, 0x74]) });
    // ```
    run_case(&Case {
        input: "écret",
        expected_out: "écret",
        expected_remainder: "",
        expected_out_bytes: b"\xc3\xa9cret",
    });
}

#[test]
fn walks_four_byte_utf8_leading() {
    // What:     Input starts with `🔑` (U+1F511, "key" emoji),
    //           encoded as the 4 UTF-8 bytes `\xf0\x9f\x94\x91`.
    //           The walker should consume the emoji and continue
    //           through any literal characters that follow.
    // Why:      Cover the 4-byte UTF-8 path (the maximum width).
    //           Pre-fix, this would have produced 8 mojibake bytes
    //           (each of the 4 source bytes upcasting to a separate
    //           U+0080..U+00FF codepoint, each re-encoding to 2 UTF-8
    //           bytes). Confirms `next.len_utf8()` advance handles
    //           4 correctly.
    // TS map:   `runCase({ input: "🔑secret", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "🔑secret", expectedOut: "🔑secret",
    //   expectedRemainder: "",
    //   expectedOutBytes: new Uint8Array([0xf0, 0x9f, 0x94, 0x91, 0x73, 0x65, 0x63, 0x72, 0x65, 0x74]) });
    // ```
    run_case(&Case {
        input: "🔑secret",
        expected_out: "🔑secret",
        expected_remainder: "",
        expected_out_bytes: b"\xf0\x9f\x94\x91secret",
    });
}

#[test]
fn walks_escaped_emoji() {
    // What:     Input `"\\🔑rest"` -- backslash followed by the
    //           4-byte emoji `🔑` followed by `rest`. The escape
    //           branch should treat `\🔑` as literal `🔑` and
    //           continue through `rest`.
    // Why:      Stress-tests the `next.len_utf8()` advance on the
    //           escape branch (`tail = &after_bs[next.len_utf8()..]`).
    //           Hard-coding `2` (one byte for `\`, one byte for
    //           `next`) would underadvance by 3 bytes here and the
    //           next iteration's `chars.next()` would panic on a
    //           non-char-boundary slice.
    // TS map:   `runCase({ input: "\\🔑rest", expectedOut: "🔑rest", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "\\🔑rest", expectedOut: "🔑rest",
    //   expectedRemainder: "",
    //   expectedOutBytes: new Uint8Array([0xf0, 0x9f, 0x94, 0x91, 0x72, 0x65, 0x73, 0x74]) });
    // ```
    run_case(&Case {
        input: "\\🔑rest",
        expected_out: "🔑rest",
        expected_remainder: "",
        expected_out_bytes: b"\xf0\x9f\x94\x91rest",
    });
}

#[test]
fn walks_empty_input() {
    // What:     Empty input `""`. Loop guard `!tail.is_empty()`
    //           is immediately false, so the loop body never
    //           runs; remainder is set to `tail` (also empty).
    // Why:      Edge case: callers may pass empty `&str` after
    //           consuming an entire prior atom. Walker must not
    //           panic and must leave `out` and `remainder` empty.
    // TS map:   `runCase({ input: "", expectedOut: "", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "", expectedOut: "",
    //   expectedRemainder: "", expectedOutBytes: new Uint8Array([]) });
    // ```
    run_case(&Case {
        input: "",
        expected_out: "",
        expected_remainder: "",
        expected_out_bytes: b"",
    });
}

#[test]
fn walks_trailing_backslash() {
    // What:     Input `"\\"` -- a lone backslash with nothing
    //           after it. The escape branch's let-else
    //           `let Some(next) = ... else { break; };` triggers
    //           the break, and the remainder ends up pointing at
    //           `\`.
    // Why:      Corresponds to the original byte-walker's
    //           `if i + 1 >= bytes.len() { break; }` check.
    //           Without this branch the let-else would silently
    //           consume the `\` and produce wrong output.
    // TS map:   `runCase({ input: "\\", expectedOut: "",
    //           expectedRemainder: "\\", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "\\", expectedOut: "",
    //   expectedRemainder: "\\", expectedOutBytes: new Uint8Array([]) });
    // ```
    run_case(&Case {
        input: "\\",
        expected_out: "",
        expected_remainder: "\\",
        expected_out_bytes: b"",
    });
}

#[test]
fn walks_mixed_widths_consecutive() {
    // What:     Input `"a—é🔑z"` mixes 1-byte ASCII, 3-byte BMP,
    //           2-byte Latin-1-supplement, 4-byte SMP, and ASCII
    //           again. All five chars are literals; walker should
    //           consume the whole input and produce identical
    //           bytes.
    // Why:      Stress-test the char-by-char advance: `tail =
    //           chars.as_str()` after each push must land on the
    //           correct char boundary regardless of the previous
    //           char's width. A regression where the byte-offset
    //           accounting drifts after one width would produce
    //           panics or mojibake on the next char.
    // TS map:   `runCase({ input: "a—é🔑z", expectedOut: "a—é🔑z", ... });`.
    //
    // In TS you'd write (pseudocode):
    // ```ts
    // runCase({ input: "a—é🔑z", expectedOut: "a—é🔑z",
    //   expectedRemainder: "",
    //   expectedOutBytes: new Uint8Array([
    //     0x61, 0xe2, 0x80, 0x94, 0xc3, 0xa9, 0xf0, 0x9f, 0x94, 0x91, 0x7a,
    //   ]) });
    // ```
    run_case(&Case {
        input: "a—é🔑z",
        expected_out: "a—é🔑z",
        expected_remainder: "",
        expected_out_bytes: b"a\xe2\x80\x94\xc3\xa9\xf0\x9f\x94\x91z",
    });
}