mrrc 0.8.2

A Rust library for reading, writing, and manipulating MARC bibliographic records in ISO 2709 binary format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
# Error-handling test coverage manifest.
#
# Documentation-as-spec for mrrc's error handling: every documented
# error code in docs/reference/error-codes.md is paired here with one
# or more cases that should trigger it. The harness
# (tests/error_coverage.rs and the Python equivalent) reads this
# manifest and asserts the documented variant, code, slug, and
# positional context fire when the parser exercises the trigger —
# not when an error is constructed in test code.
#
# A single code can have multiple cases when the docs describe
# multiple distinct trigger patterns (e.g., E101 fires on both
# "non-digit length in a directory entry" and "missing field
# terminator before base address"). Each case is exercised
# independently; wiring is tracked per case, not per code, so the
# coverage tally reflects the granularity of the spec.
#
# Per-trigger coverage convention: when a single MarcError variant has
# multiple distinct production fire sites (different functions, files,
# or conditions), each fire site gets its own manifest case — even
# when the variant slug is shared. E404 `WriterError` is the canonical
# example: three cases (size cap, non-3-ASCII tag, finished-writer
# reuse) share the slug `record_too_large_for_iso2709` but each
# exercises a distinct path. The harness's `exercise_writer` /
# `exercise_accessor` dispatches case-id-branch to the right driver.
# Adding a new fire site for an existing variant requires both a new
# `[[case]]` here and a branch in the relevant `exercise_*` helper.
#
# Cases marked `wired = false` describe trigger patterns where the
# parser does not currently produce the documented variant for the
# given input. The `skip_reason` field states what happens today, in
# technical terms, so a contributor implementing the missing detection
# knows what they are replacing. Adding a new error code requires
# adding a case here AND a fixture (or other trigger mechanism).
#
# Schema:
#   id               unique identifier "<code>_<short_trigger>"; used
#                    as the test display name and disambiguates cases
#                    that share a code
#   code             "Exxx" identifier (stable across releases)
#   variant          MarcError variant name (Rust)
#   slug             snake_case slug (stable across releases)
#   trigger_kind     how the harness exercises the trigger:
#                      "parse_iso2709"  — feed bytes to MarcReader
#                      "parse_marcxml"  — feed string to marcxml_to_record
#                      "parse_marcjson" — feed string to marcjson_to_record
#                      "io_error"       — inject a Read source that errors
#                      "recovery_cap"   — drive lenient mode past max_errors
#                      "accessor"       — call a Record accessor on a parsed record
#                      "writer"         — construct a record and attempt to write it
#                    The authoritative list of trigger_kinds the Rust
#                    harness exercises is the module docstring in
#                    tests/error_coverage.rs. A kind that harness can't
#                    drive (e.g. parse_marcjson — no Rust str-to-Record
#                    entry point; covered in the Python harness instead)
#                    skips with a manifest-driven reason.
#   trigger_fixture  path to fixture bytes (parse_iso2709) or text
#                    (parse_marcxml / parse_marcjson) that should fire
#                    `code`. Required for parse_* kinds; optional for
#                    others (their semantics are described in the
#                    case's `description` and `skip_reason`).
#   description      one-line description of the malformation or trigger
#   expected_context positional fields the variant must populate when
#                    fired (per docs/reference/error-codes.md)
#   recovery_modes   recovery modes the harness exercises on this case
#                    (parse kinds only; declared so the harness can
#                    extend without manifest churn)
#   wired            true if the parser currently emits the documented
#                    variant for this trigger
#   skip_reason      required when wired = false; describes current
#                    behavior in technical terms

schema_version = 1

# === Stream / leader (E0xx) =========================================

# E001: leader bytes 0-4 not five ASCII digits.
[[case]]
id = "e001_record_length_non_digit"
code = "E001"
variant = "RecordLengthInvalid"
slug = "record_length_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e001_record_length_non_digit.bin"
description = "Leader byte 0 ('0' of '00150') replaced with 'X'."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E001: leader bytes 0-4 parse but the resulting length is < 24 (the
# leader alone is 24 bytes, so any smaller value is impossible).
# Distinct production path from the non-digit case above: parsed via
# Leader::validate_for_reading after Leader::from_bytes succeeds.
[[case]]
id = "e001_record_length_below_24"
code = "E001"
variant = "RecordLengthInvalid"
slug = "record_length_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e001_record_length_below_24.bin"
description = "Leader bytes 0-4 set to '00010' (decimal 10 < 24-byte minimum)."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E002: leader byte 10 (indicator count, normally '2') is non-digit.
# Audit note: docs/reference/error-codes.md describes E002 as also
# firing on malformations like "reserved bytes 20-23 not '4500'" or
# "encoding indicator out of range", but the current parser does not
# inspect those bytes. Today's E002 path covers indicator count parse
# failure, subfield code count parse failure, and (overlapping with
# the docs-claimed E001/E003 paths) record_length < 24 and
# data_base_address < 24. The reserved-field check is a docs-vs-code
# gap separate from the wiring work for E001/E003/E004.
[[case]]
id = "e002_indicator_count_non_digit"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_indicator_count_non_digit.bin"
description = "Leader byte 10 (indicator count, normally '2') replaced with 'X'."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
wired = true

# E002: leader byte 5 (record_status) is byte-valid but not in the MARC 21
# allowed set {a, c, d, n, p}. RecordStructureValidator runs only at
# validation_level=strict_marc and reports this as InvalidLeader.
[[case]]
id = "e002_invalid_record_status"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_record_status.bin"
description = "Leader byte 5 (record status) is 'x' (not in {a, c, d, n, p})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 6 (type of record) outside MARC 21
# allowed set.
[[case]]
id = "e002_invalid_record_type"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_record_type.bin"
description = "Leader byte 6 (type of record) is 'q' (not in the documented set)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 7 (bibliographic level) outside
# {a, b, c, d, i, m, s}.
[[case]]
id = "e002_invalid_bibliographic_level"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_bibliographic_level.bin"
description = "Leader byte 7 (bibliographic level) is 'q' (not in {a, b, c, d, i, m, s})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 8 (control record type) outside
# {' ', 'a'}.
[[case]]
id = "e002_invalid_control_record_type"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_control_record_type.bin"
description = "Leader byte 8 (control record type) is 'q' (not in {' ', 'a'})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 9 (character coding) outside {' ', 'a'}.
[[case]]
id = "e002_invalid_character_coding"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_character_coding.bin"
description = "Leader byte 9 (character coding) is 'q' (not in {' ', 'a'})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 10 (indicator count) is a byte-valid
# digit but != 2 (MARC 21 requires exactly 2). Distinct from the
# structural e002_indicator_count_non_digit case, which fails the
# digit check in Leader::from_bytes.
[[case]]
id = "e002_indicator_count_not_two"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_indicator_count_not_two.bin"
description = "Leader byte 10 (indicator count) is '3' (digit but MARC 21 requires 2)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 11 (subfield code count) is a
# byte-valid digit but != 2.
[[case]]
id = "e002_subfield_code_count_not_two"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_subfield_code_count_not_two.bin"
description = "Leader byte 11 (subfield code count) is '3' (digit but MARC 21 requires 2)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 17 (encoding level) outside the MARC 21
# allowed set.
[[case]]
id = "e002_invalid_encoding_level"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_encoding_level.bin"
description = "Leader byte 17 (encoding level) is 'q' (not in {' ', 1-5, 7, 8, u, z})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 18 (cataloging form) outside
# {' ', a, c, i, n, u}.
[[case]]
id = "e002_invalid_cataloging_form"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_cataloging_form.bin"
description = "Leader byte 18 (cataloging form) is 'q' (not in {' ', a, c, i, n, u})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 19 (multipart level) outside
# {' ', a, b, c}.
[[case]]
id = "e002_invalid_multipart_level"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_multipart_level.bin"
description = "Leader byte 19 (multipart level) is 'q' (not in {' ', a, b, c})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (defensive) — data_base_address > 99999. Unreachable from any
# 5-digit-ASCII-parsed leader (max value 99999), but reachable via
# programmatic Leader construction (public struct, public fields).
# The defensive check at record_validation.rs:91-96 guards against
# this constructed state and is exercised here by building a Leader
# with the bad value and invoking RecordStructureValidator directly.
[[case]]
id = "e002_data_base_address_overflow_programmatic"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "programmatic_validator"
description = "Construct Leader with data_base_address = 100_000 and invoke RecordStructureValidator::validate_leader directly."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E003: leader bytes 12-16 not five ASCII digits.
[[case]]
id = "e003_base_address_non_digit"
code = "E003"
variant = "BaseAddressInvalid"
slug = "base_address_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e003_base_address_non_digit.bin"
description = "Leader byte 12 ('0' of base address '00061') replaced with 'X'."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E003: leader bytes 12-16 parse but the resulting base address is
# < 24 (the leader alone is 24 bytes). Distinct production path from
# the non-digit case above: parsed via Leader::validate_for_reading
# after Leader::from_bytes succeeds.
[[case]]
id = "e003_base_address_below_24"
code = "E003"
variant = "BaseAddressInvalid"
slug = "base_address_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e003_base_address_below_24.bin"
description = "Leader bytes 12-16 set to '00020' (decimal 20 < 24-byte leader minimum)."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E004: base address claims a value past the available bytes.
[[case]]
id = "e004_base_address_past_record"
code = "E004"
variant = "BaseAddressNotFound"
slug = "base_address_not_found"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e004_base_address_past_record.bin"
description = "Leader bytes 12-16 set to '99999' (> 150-byte record length)."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E005: stream EOF before reading the leader-claimed record length.
[[case]]
id = "e005_truncated_record"
code = "E005"
variant = "TruncatedRecord"
slug = "truncated_record"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e005_truncated_record.bin"
description = "Record claims 150 bytes; stream ends after 100."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E005 lenient skeleton path: same truncation that the strict case fires
# at also fires in lenient mode, but as a non-raised error pushed onto
# the yielded record's `errors: Arc<Vec<MarcError>>`. Distinct fire
# behavior from the strict path, so a separate case.
[[case]]
id = "e005_truncated_record_lenient"
code = "E005"
variant = "TruncatedRecord"
slug = "truncated_record"
trigger_kind = "parse_iso2709_lenient"
trigger_fixture = "tests/data/error_fixtures/e005_truncated_record.bin"
description = "Same truncated fixture as the strict case; lenient mode pushes E005 onto record.errors instead of raising."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["lenient"]
wired = true

# E006: byte at the leader's claimed end position is not 0x1D.
[[case]]
id = "e006_no_record_terminator"
code = "E006"
variant = "EndOfRecordNotFound"
slug = "end_of_record_not_found"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e006_no_record_terminator.bin"
description = "Final byte (0x1D RECORD_TERMINATOR) replaced with 0x00."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E007: underlying I/O error from the reader source. Two distinct paths
# construct MarcError::IoError, each covered by a case below:
#   - raw-io / leader boundary: the read fails before the parser has
#     begun a record (read_leader_bytes), so no positional context is
#     available and the context-free From<io::Error> fallback is used.
#   - parse path: the read fails mid-record in read_record_data, which
#     enriches the error via ParseContext::err_io with the in-progress
#     record's record_index, byte_offset, and source_name.
[[case]]
id = "e007_io_failure"
code = "E007"
variant = "IoError"
slug = "io_error"
trigger_kind = "io_error"
description = "Reader source errors on the first read (leader boundary); no positional context available."
expected_context = []
recovery_modes = ["strict"]
wired = true

[[case]]
id = "e007_io_failure_parse_path"
code = "E007"
variant = "IoError"
slug = "io_error"
trigger_kind = "io_error_parse_path"
description = "Reader source errors mid-record while reading the data area; IoError carries the in-progress record's context."
expected_context = ["record_index", "byte_offset", "source_name"]
recovery_modes = ["strict"]
wired = true

# E099: lenient/permissive recovered-error cap exceeded. Wired in
# MarcReader::read_record (constructs MarcError::FatalReaderError when
# the per-stream cap is hit); the harness exercises it once a
# trigger_kind=recovery_cap mechanism lands.
[[case]]
id = "e099_recovery_cap_exceeded"
code = "E099"
variant = "FatalReaderError"
slug = "fatal_reader_error"
trigger_kind = "recovery_cap"
description = "Stream of malformed records exceeds MarcReader::with_max_errors(N)."
expected_context = []
recovery_modes = ["lenient", "permissive"]
wired = true

# === Directory / field header (E1xx) ================================

# E101: directory has no FIELD_TERMINATOR before base address.
[[case]]
id = "e101_no_field_terminator"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_directory_no_terminator.bin"
description = "Byte 60 (FIELD_TERMINATOR 0x1E ending the directory) replaced with '0', producing a partial trailing entry."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E101: directory entry length field contains a non-digit byte.
[[case]]
id = "e101_non_digit_length"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_directory_non_digit_length.bin"
description = "Directory entry length field 'X025' (was '0025') for the 100 entry."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E101: directory entry tag contains a non-ASCII byte. Tags are 3 ASCII
# bytes per the codec; lossy UTF-8 conversion would replace the byte
# with U+FFFD (3 bytes), producing a tag the writer can't fit back into
# the directory's fixed-width tag field. Round-trip-breaking; the
# error-classification fuzz target's round-trip assertion surfaces it.
[[case]]
id = "e101_non_ascii_tag"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_non_ascii_tag.bin"
description = "Directory entry 1 tag's first byte replaced with 0xCC (non-ASCII)."
# field_tag omitted: the tag bytes are themselves the malformation, so the
# parser does not set current_field_tag on this path (unlike the E106
# data-field guard, where the tag is known).
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E101: directory entry start-position byte is non-digit. Distinct
# production path from non_digit_length (mutates the length field's
# bytes 3-6 of the entry) — this mutates the start-position field's
# bytes 7-11 of the entry. Different code branch in the directory walker.
[[case]]
id = "e101_non_digit_start"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_directory_non_digit_start.bin"
description = "Directory entry 1 start-position byte 7 of 11 set to 'X' (first byte of the 5-byte start field)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E105: accessor lookup for a tag the record does not contain.
[[case]]
id = "e105_field_not_found"
code = "E105"
variant = "FieldNotFound"
slug = "field_not_found"
trigger_kind = "accessor"
trigger_fixture = "tests/data/simple_book.mrc"
description = "Parse simple_book.mrc cleanly, then call record.get_field_or_err('999')."
expected_context = ["field_tag"]
recovery_modes = ["strict"]
wired = true

# E105 from AuthorityRecord — the same accessor pattern fires from the
# authority record type via the parity API.
[[case]]
id = "e105_authority_field_not_found"
code = "E105"
variant = "FieldNotFound"
slug = "field_not_found"
trigger_kind = "accessor"
trigger_fixture = "tests/data/simple_authority.mrc"
description = "Parse simple_authority.mrc cleanly, then call record.get_field_or_err('999')."
expected_context = ["field_tag"]
recovery_modes = ["strict"]
wired = true

# E105 from HoldingsRecord — the same accessor pattern fires from the
# holdings record type via the parity API.
[[case]]
id = "e105_holdings_field_not_found"
code = "E105"
variant = "FieldNotFound"
slug = "field_not_found"
trigger_kind = "accessor"
trigger_fixture = "tests/data/simple_holdings.mrc"
description = "Parse simple_holdings.mrc cleanly, then call record.get_field_or_err('999')."
expected_context = ["field_tag"]
recovery_modes = ["strict"]
wired = true

# E106: directory entry claims a field length larger than the data area.
[[case]]
id = "e106_field_length_past_data"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e106_field_length_past_data.bin"
description = "Directory entry for tag 100 claims length '9999' (was '0025'); declared field bytes extend past the data area."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E106: byte after data-field indicators is not the subfield delimiter.
# Bibliographic reader (SubfieldStructureMode::Strict) fires this in
# parse_subfields; authority and holdings tolerate the same byte under
# SubfieldStructureMode::Permissive.
[[case]]
id = "e106_expected_subfield_delimiter"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e106_expected_subfield_delimiter.bin"
description = "Byte 63 (first subfield delimiter 0x1F of the 100 field) replaced with 'X'."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E106 authority field-too-short: the documented per-reader minimum
# guard ("authority returns Err when field_bytes.len() < 2") fires
# from AuthorityMarcReader when a *data* field is below the 2-byte
# indicator minimum. The guard runs only on data fields — control
# fields (001-009) decode on a separate path and are exempt — so the
# fixture mutates the 100 heading field's directory length to '0001'.
# An earlier trial that mutated the 005 control field could never trip
# it, which is what the data-field-only gating predicts.
[[case]]
id = "e106_authority_field_too_short"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_authority"
trigger_fixture = "tests/data/error_fixtures/e106_authority_field_too_short.bin"
description = "Authority record's 100 data-field directory length set to '0001' (1 byte; below the 2-byte indicator minimum)."
expected_context = ["record_index", "byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E106 recovery path: when a truncated record drops into try_recover_record
# in lenient mode, the recovery directory walker calls parse_4digits /
# parse_5digits directly. A non-digit length/start byte there fires E106
# InvalidField (via MarcError::invalid_field_msg) — distinct from the
# main parser path's E101 DirectoryInvalid for the same kind of malformed
# directory byte. The fixture combines truncation (so the recovery path
# runs) with a malformed length field (so parse_4digits errors). Both
# E005 and E106 land on record.errors; the harness picks the E106 one.
[[case]]
id = "e106_recovery_invalid_field"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_iso2709_lenient"
trigger_fixture = "tests/data/error_fixtures/e106_recovery_invalid_field.bin"
description = "Leader claims 999-byte record; only the directory (with a non-digit length field 'ABCD') is provided. Truncation routes through try_recover_record, which calls parse_4digits and pushes InvalidField."
expected_context = []
recovery_modes = ["lenient"]
wired = true

# === Subfield / indicator (E2xx) ====================================

# E201: indicator byte not digit/space.
[[case]]
id = "e201_bad_indicator_245"
code = "E201"
variant = "InvalidIndicator"
slug = "invalid_indicator"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e201_bad_indicator.bin"
description = "First indicator byte of field 245 is ':' (not digit/space)."
expected_context = [
    "record_index",
    "byte_offset",
    "record_byte_offset",
    "field_tag",
    "indicator_position",
    "found",
    "expected",
]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E201: indicator byte is digit-valid but violates the per-tag MARC 21 rule
# for the field. IndicatorValidator runs in addition to the universal
# byte-validity check at strict_marc.
[[case]]
id = "e201_per_tag_indicator_245"
code = "E201"
variant = "InvalidIndicator"
slug = "invalid_indicator"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e201_per_tag_indicator_245.bin"
description = "First indicator of field 245 is '9' (byte-valid digit but per-tag rule allows only 0/1)."
expected_context = [
    "record_index",
    "byte_offset",
    "record_byte_offset",
    "field_tag",
    "indicator_position",
    "found",
    "expected",
]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E202: subfield code byte not printable ASCII.
[[case]]
id = "e202_non_printable_subfield_code"
code = "E202"
variant = "BadSubfieldCode"
slug = "bad_subfield_code"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e202_non_printable_subfield_code.bin"
description = "Byte 64 (subfield code 'a' after 0x1F at byte 63) replaced with 0x00."
expected_context = [
    "record_index",
    "byte_offset",
    "record_byte_offset",
    "field_tag",
    "subfield_code",
]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# === Encoding (E3xx) ================================================

# E301: subfield value contains invalid UTF-8 bytes.
[[case]]
id = "e301_invalid_utf8_in_subfield"
code = "E301"
variant = "EncodingError"
slug = "utf8_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e301_invalid_utf8_in_subfield.bin"
description = "Byte 70 (inside the 100$a 'Fitzgerald' subfield value) replaced with 0xFF."
expected_context = ["record_index"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E301: bibliographic CONTROL field invalid UTF-8 (distinct production
# path from the subfield case above — fires from
# iso2709_skeleton.rs's control-field reader at strict_marc, not from
# parse_subfields).
[[case]]
id = "e301_invalid_utf8_in_control_field"
code = "E301"
variant = "EncodingError"
slug = "utf8_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e301_invalid_utf8_in_control_field.bin"
description = "Byte 49 (first byte of 008 control field in with_control_fields.mrc) replaced with 0xFF."
expected_context = ["record_index"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E301 authority CONTROL field invalid UTF-8 at strict_marc. The
# fixture's leader is the simple_authority.mrc leader with position 18
# (punctuation policy) set to ' ' so it conforms to the MARC 21 Authority
# Format allowed-value sets; the per-record-type leader validators in
# `RecordStructureValidator` let the parse proceed past the leader and
# fire E301 on the corrupted control-field byte (0xFF at offset 73).
[[case]]
id = "e301_invalid_utf8_in_authority_control_field"
code = "E301"
variant = "EncodingError"
slug = "utf8_invalid"
trigger_kind = "parse_authority"
trigger_fixture = "tests/data/error_fixtures/e301_invalid_utf8_in_authority_control_field.bin"
description = "Byte 73 (first byte of first control field in simple_authority.mrc) replaced with 0xFF; leader position 18 set to ' ' so the MARC 21 Authority Format allowed-value sets accept the leader at strict_marc."
expected_context = ["record_index"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# === Serialization (E4xx) ===========================================

# E401: malformed MARCXML (mismatched closing tag). Triggers
# marcxml.rs's read_event_into wrapping at line 157/252/454/497 —
# the closing-tag mismatch fires at the read_event_into call active
# when the bad tag is consumed.
[[case]]
id = "e401_marcxml_close_tag_mismatch"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_malformed_marcxml.xml"
description = "Subfield 'a' closing tag is </WRONG> instead of </subfield>."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E401: input is well-formed XML but contains no <record> element.
# Triggers marcxml.rs:458 (Custom error after the outer event loop
# exhausts without finding a <record> Start event).
[[case]]
id = "e401_no_record_element"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_no_record_element.xml"
description = "Well-formed XML wrapper with no <record> element inside."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E401: invalid numeric character reference in subfield text. Triggers
# marcxml.rs:278 (resolve_char_ref returns Err for out-of-range
# values like &#xFFFFFFF;).
[[case]]
id = "e401_bad_char_reference"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_bad_char_reference.xml"
description = "Subfield 'a' text contains &#xFFFFFFF; (overflowing character reference)."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E401: input truncated mid-subfield (file ends inside <subfield>
# before any closing tag). Triggers marcxml.rs:301 (Custom error
# "unexpected EOF inside <subfield>" from read_leaf_text's EOF
# handler).
[[case]]
id = "e401_unexpected_eof_in_subfield"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_unexpected_eof_in_subfield.xml"
description = "XML truncated mid-text inside <subfield>; no closing tag."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E402: malformed MARCJSON (truncated JSON).
[[case]]
id = "e402_marcjson_truncated"
code = "E402"
variant = "JsonError"
slug = "marcjson_invalid"
trigger_kind = "parse_marcjson"
trigger_fixture = "tests/data/error_fixtures/e402_malformed_marcjson.json"
description = "JSON document is truncated mid-string in a subfield value."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E404: writer attempts to serialize a record exceeding the ISO 2709
# length limit. Wired in the writer path (MARCWriter and analogues
# construct MarcError::WriterError when total length or base address
# exceeds 99999); the harness exercises it once a trigger_kind=writer
# mechanism lands.
[[case]]
id = "e404_record_too_large_for_iso2709"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "writer"
description = "Construct a record whose total length or base address exceeds 99999 and attempt to serialize it."
expected_context = ["record_index"]
recovery_modes = ["strict"]
wired = true

# E404 also fires from validate_directory_tag when a Field's tag is not
# 3 ASCII bytes. Distinct production trigger from the size cap above;
# per the per-trigger coverage convention, each fire site gets its own
# manifest case even when the variant slug is shared.
[[case]]
id = "e404_writer_non_ascii_tag"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "writer"
description = "Construct a record with a field whose tag is not 3 ASCII bytes and attempt to serialize it."
expected_context = ["record_index"]
recovery_modes = ["strict"]
wired = true

# E404 also fires from MarcWriter::write_record when called after
# finish(). Reachable from Rust but not from the Python `mrrc.MARCWriter`
# wrapper (which short-circuits via PyRuntimeError before reaching the
# Rust writer).
[[case]]
id = "e404_writer_finished_writer_reuse"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "writer"
description = "Call MarcWriter::finish(), then attempt another write_record on the same writer."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E404 (defensive) — check_iso2709_size's base_address > 99999 guard.
# Unreachable from the writer's normal control flow (base_address <=
# record_length, so the record_length > 99999 check fires first), but
# reachable via direct invocation of the public helper. Exercised here
# by calling check_iso2709_size with record_length=1 and
# base_address=100_000, isolating the base_address branch.
[[case]]
id = "e404_check_iso2709_size_base_address_overflow_programmatic"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "programmatic_writer_check"
description = "Call iso2709::check_iso2709_size(record_length=1, base_address=100_000) directly to exercise the base_address > 99999 defensive branch."
expected_context = ["record_index"]
recovery_modes = ["strict"]
wired = true