kindling-mobi 0.14.5

Kindle MOBI/AZW3 builder for dictionaries, books, and comics. Drop-in kindlegen replacement.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
/// Structural MOBI/AZW3 dumper.
///
/// Parses a finished MOBI file and emits one line per semantic field in
/// `section.field = value` form so `diff -u` surfaces structural differences
/// instead of an offset cascade. Text and image record contents are
/// intentionally summarized (length + 4-byte magic only) to keep diffs
/// focused on index / header / EXTH / INDX state.
///
/// This is a read-only parser that duplicates a fair bit of mobi_check's
/// PalmDB and MOBI header walking. The duplication is intentional: the
/// dumper needs a different output shape and we'd rather iterate on it
/// without perturbing the readback checker. If the overlap becomes painful
/// we can factor both onto a shared primitives module later.

use std::fmt::Write as _;
use std::io;
use std::path::Path;

// ---------------------------------------------------------------------
// Byte readers
// ---------------------------------------------------------------------

fn read_u16_be(data: &[u8], offset: usize) -> Option<u16> {
    data.get(offset..offset + 2)
        .map(|s| u16::from_be_bytes([s[0], s[1]]))
}

fn read_u32_be(data: &[u8], offset: usize) -> Option<u32> {
    data.get(offset..offset + 4)
        .map(|s| u32::from_be_bytes([s[0], s[1], s[2], s[3]]))
}

/// Render the first 4 bytes of a buffer as either a quoted ASCII magic
/// string or a hex literal if any byte is non-printable.
fn magic_or_hex(buf: &[u8]) -> String {
    if buf.len() < 4 {
        let mut s = String::from("0x");
        for b in buf {
            let _ = write!(s, "{:02X}", b);
        }
        return s;
    }
    let head = &buf[..4];
    if head.iter().all(|&b| (0x20..0x7F).contains(&b)) {
        format!("\"{}\"", String::from_utf8_lossy(head))
    } else {
        format!(
            "0x{:02X}{:02X}{:02X}{:02X}",
            head[0], head[1], head[2], head[3]
        )
    }
}

/// Render a byte slice as a hex literal prefixed with `0x`.
fn to_hex(buf: &[u8]) -> String {
    let mut s = String::from("0x");
    for b in buf {
        let _ = write!(s, "{:02X}", b);
    }
    s
}

/// Pretty-print a u32 that may encode "no such record" as 0xFFFFFFFF.
fn opt_record_idx(v: u32) -> String {
    if v == 0xFFFFFFFF {
        "NONE".to_string()
    } else {
        v.to_string()
    }
}

/// Escape a string for dump output. Newlines, tabs, non-printables, quotes,
/// and backslashes are rendered as escapes so the output stays single-line.
fn quote_str(s: &str) -> String {
    let mut out = String::with_capacity(s.len() + 2);
    out.push('"');
    for c in s.chars() {
        match c {
            '"' => out.push_str("\\\""),
            '\\' => out.push_str("\\\\"),
            '\n' => out.push_str("\\n"),
            '\r' => out.push_str("\\r"),
            '\t' => out.push_str("\\t"),
            c if (c as u32) < 0x20 => {
                let _ = write!(out, "\\x{:02X}", c as u32);
            }
            c => out.push(c),
        }
    }
    out.push('"');
    out
}

// ---------------------------------------------------------------------
// PalmDB
// ---------------------------------------------------------------------

struct PalmDb {
    /// Byte offset in the file where each record begins.
    offsets: Vec<u32>,
    num_records: usize,
}

impl PalmDb {
    fn record<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a [u8]> {
        let start = *self.offsets.get(idx)? as usize;
        let end = if idx + 1 < self.num_records {
            self.offsets[idx + 1] as usize
        } else {
            data.len()
        };
        data.get(start..end)
    }
}

fn parse_palmdb(data: &[u8]) -> io::Result<PalmDb> {
    if data.len() < 78 {
        return Err(io::Error::new(
            io::ErrorKind::InvalidData,
            format!("PalmDB header truncated: {} bytes", data.len()),
        ));
    }
    let num_records = read_u16_be(data, 76).unwrap_or(0) as usize;
    let list_end = 78 + num_records * 8;
    if data.len() < list_end {
        return Err(io::Error::new(
            io::ErrorKind::InvalidData,
            "PalmDB record list truncated",
        ));
    }
    let mut offsets = Vec::with_capacity(num_records);
    for i in 0..num_records {
        offsets.push(read_u32_be(data, 78 + i * 8).unwrap_or(0));
    }
    Ok(PalmDb { offsets, num_records })
}

fn dump_palmdb_header(out: &mut String, data: &[u8], palmdb: &PalmDb) {
    // PalmDB name is a 32-byte null-terminated field.
    let name_end = data[..32].iter().position(|&b| b == 0).unwrap_or(32);
    let name = String::from_utf8_lossy(&data[..name_end]).into_owned();
    let _ = writeln!(out, "palmdb.name = {}", quote_str(&name));

    let attrs = read_u16_be(data, 32).unwrap_or(0);
    let version = read_u16_be(data, 34).unwrap_or(0);
    let ctime = read_u32_be(data, 36).unwrap_or(0);
    let mtime = read_u32_be(data, 40).unwrap_or(0);
    let btime = read_u32_be(data, 44).unwrap_or(0);
    let mod_num = read_u32_be(data, 48).unwrap_or(0);
    let _appinfo = read_u32_be(data, 52).unwrap_or(0);
    let _sortinfo = read_u32_be(data, 56).unwrap_or(0);
    let ptype = String::from_utf8_lossy(&data[60..64]).into_owned();
    let creator = String::from_utf8_lossy(&data[64..68]).into_owned();
    let uid = read_u32_be(data, 68).unwrap_or(0);
    let _next_rec_list = read_u32_be(data, 72).unwrap_or(0);

    let _ = writeln!(out, "palmdb.attributes = 0x{:04X}", attrs);
    let _ = writeln!(out, "palmdb.version = {}", version);
    let _ = writeln!(out, "palmdb.ctime = {}", ctime);
    let _ = writeln!(out, "palmdb.mtime = {}", mtime);
    let _ = writeln!(out, "palmdb.btime = {}", btime);
    let _ = writeln!(out, "palmdb.mod_num = {}", mod_num);
    let _ = writeln!(out, "palmdb.type = {}", quote_str(&ptype));
    let _ = writeln!(out, "palmdb.creator = {}", quote_str(&creator));
    let _ = writeln!(out, "palmdb.uid = {}", uid);
    let _ = writeln!(out, "palmdb.rec_count = {}", palmdb.num_records);
}

// ---------------------------------------------------------------------
// MOBI section (PalmDOC + MOBI header + EXTH)
// ---------------------------------------------------------------------

struct MobiHeader {
    header_length: u32,
    exth: Vec<(u32, Vec<u8>)>,
    file_version: u32,
}

fn dump_mobi_section(
    out: &mut String,
    label: &str,
    record0: &[u8],
    record_idx: usize,
) -> Option<MobiHeader> {
    if record0.len() < 16 + 24 {
        let _ = writeln!(
            out,
            "{}.error = \"record {} too small for PalmDOC+MOBI header ({} bytes)\"",
            label,
            record_idx,
            record0.len()
        );
        return None;
    }

    // PalmDOC header (16 bytes at offset 0).
    let compression = read_u16_be(record0, 0).unwrap_or(0);
    // offset 2: unused
    let text_length = read_u32_be(record0, 4).unwrap_or(0);
    let text_record_count = read_u16_be(record0, 8).unwrap_or(0);
    let text_record_size = read_u16_be(record0, 10).unwrap_or(0);
    let encryption = read_u16_be(record0, 12).unwrap_or(0);

    let _ = writeln!(out, "palmdoc.compression = {}", compression);
    let _ = writeln!(out, "palmdoc.text_length = {}", text_length);
    let _ = writeln!(out, "palmdoc.text_record_count = {}", text_record_count);
    let _ = writeln!(out, "palmdoc.text_record_size = {}", text_record_size);
    let _ = writeln!(out, "palmdoc.encryption = {}", encryption);

    // MOBI header at offset 16.
    if &record0[16..20] != b"MOBI" {
        let _ = writeln!(
            out,
            "{}.mobi.identifier_raw = {}",
            label,
            magic_or_hex(&record0[16..20])
        );
        return None;
    }

    let header_length = read_u32_be(record0, 20).unwrap_or(0);
    let mobi_type = read_u32_be(record0, 24).unwrap_or(0);
    let text_encoding = read_u32_be(record0, 28).unwrap_or(0);
    let unique_id = read_u32_be(record0, 32).unwrap_or(0);
    let file_version = read_u32_be(record0, 36).unwrap_or(0);
    let orth_index = read_u32_be(record0, 40).unwrap_or(0);
    let infl_index = read_u32_be(record0, 44).unwrap_or(0);
    let names_index = read_u32_be(record0, 48).unwrap_or(0);
    let keys_index = read_u32_be(record0, 52).unwrap_or(0);
    let _extra0 = read_u32_be(record0, 56).unwrap_or(0);
    let _extra1 = read_u32_be(record0, 60).unwrap_or(0);
    let _extra2 = read_u32_be(record0, 64).unwrap_or(0);
    let _extra3 = read_u32_be(record0, 68).unwrap_or(0);
    let first_non_text_index = read_u32_be(record0, 72).unwrap_or(0);
    let full_name_offset = read_u32_be(record0, 76).unwrap_or(0);
    let full_name_length = read_u32_be(record0, 80).unwrap_or(0);
    let locale = read_u32_be(record0, 84).unwrap_or(0);
    let dict_input_lang = read_u32_be(record0, 88).unwrap_or(0);
    let dict_output_lang = read_u32_be(record0, 92).unwrap_or(0);
    let min_version = read_u32_be(record0, 96).unwrap_or(0);
    let first_image_index = read_u32_be(record0, 100).unwrap_or(0);
    let huff_rec_index = read_u32_be(record0, 104).unwrap_or(0);
    let huff_rec_count = read_u32_be(record0, 108).unwrap_or(0);
    let datp_rec_index = read_u32_be(record0, 112).unwrap_or(0);
    let datp_rec_count = read_u32_be(record0, 116).unwrap_or(0);
    let exth_flags = read_u32_be(record0, 120).unwrap_or(0);
    // offsets 124..172: 32 bytes unknown + 8 bytes reserved area
    let drm_offset = read_u32_be(record0, 168).unwrap_or(0);
    let drm_count = read_u32_be(record0, 172).unwrap_or(0);
    let drm_size = read_u32_be(record0, 176).unwrap_or(0);
    let drm_flags = read_u32_be(record0, 180).unwrap_or(0);
    // offsets 184..192 unused
    let first_text_index = read_u16_be(record0, 192).unwrap_or(0);
    let last_text_index = read_u16_be(record0, 194).unwrap_or(0);
    let fdst_index = read_u32_be(record0, 192).unwrap_or(0); // KF8 alias for first/last
    let fdst_section_count = read_u32_be(record0, 196).unwrap_or(0);
    let fcis_index = read_u32_be(record0, 200).unwrap_or(0);
    let fcis_count = read_u32_be(record0, 204).unwrap_or(0);
    let flis_index = read_u32_be(record0, 208).unwrap_or(0);
    let flis_count = read_u32_be(record0, 212).unwrap_or(0);
    let srcs_index = read_u32_be(record0, 224).unwrap_or(0);
    let srcs_count = read_u32_be(record0, 228).unwrap_or(0);
    // extra_record_flags: MOBI-header offset 224 (= record0 offset 240) as u32.
    // libmobi docs often say u16 at MOBI+242, but both kindlegen and kindling
    // actually write a full u32 at MOBI+224. Verified by dumping the raw
    // bytes of kindlegen_reference.mobi.
    let extra_record_flags = read_u32_be(record0, 240).unwrap_or(0);
    let primary_index_record = read_u32_be(record0, 244).unwrap_or(0);
    // KF8-specific extensions at offsets 248 and beyond (relative to MOBI
    // header start, which is record0+16, so record0 offsets add 16).
    // NOTE: older libmobi docs show the KF8 sect/skel/datp/oth at header
    // offsets 232-248, which in record0 coords is 248-264. We try both
    // documented slots; the one we trust most is the libmobi definitions.
    let fragment_index = read_u32_be(record0, 16 + 232).unwrap_or(0xFFFFFFFF);
    let skeleton_index = read_u32_be(record0, 16 + 236).unwrap_or(0xFFFFFFFF);
    let datp_index = read_u32_be(record0, 16 + 240).unwrap_or(0xFFFFFFFF);
    let oth_index = read_u32_be(record0, 16 + 244).unwrap_or(0xFFFFFFFF);

    let _ = writeln!(out, "mobi.identifier = \"MOBI\"");
    let _ = writeln!(out, "mobi.header_length = {}", header_length);
    let _ = writeln!(out, "mobi.mobi_type = {}", mobi_type);
    let _ = writeln!(out, "mobi.text_encoding = {}", text_encoding);
    let _ = writeln!(out, "mobi.unique_id = {}", unique_id);
    let _ = writeln!(out, "mobi.file_version = {}", file_version);
    let _ = writeln!(out, "mobi.orth_index = {}", opt_record_idx(orth_index));
    let _ = writeln!(out, "mobi.infl_index = {}", opt_record_idx(infl_index));
    let _ = writeln!(out, "mobi.names_index = {}", opt_record_idx(names_index));
    let _ = writeln!(out, "mobi.keys_index = {}", opt_record_idx(keys_index));
    let _ = writeln!(
        out,
        "mobi.first_non_text_index = {}",
        opt_record_idx(first_non_text_index)
    );
    let _ = writeln!(out, "mobi.first_text_index = {}", first_text_index);
    let _ = writeln!(out, "mobi.last_text_index = {}", last_text_index);
    let _ = writeln!(out, "mobi.full_name_offset = {}", full_name_offset);
    let _ = writeln!(out, "mobi.full_name_length = {}", full_name_length);

    // Decode full_name — offset is relative to MOBI header start (record0+16).
    let name_abs = 16 + full_name_offset as usize;
    let name_end = name_abs + full_name_length as usize;
    let full_name = if name_end <= record0.len() {
        String::from_utf8_lossy(&record0[name_abs..name_end]).into_owned()
    } else {
        String::new()
    };
    let _ = writeln!(out, "mobi.full_name = {}", quote_str(&full_name));

    let _ = writeln!(out, "mobi.locale = {}", locale);
    let _ = writeln!(out, "mobi.dict_input_lang = {}", dict_input_lang);
    let _ = writeln!(out, "mobi.dict_output_lang = {}", dict_output_lang);
    let _ = writeln!(out, "mobi.min_version = {}", min_version);
    let _ = writeln!(
        out,
        "mobi.first_image_index = {}",
        opt_record_idx(first_image_index)
    );
    let _ = writeln!(
        out,
        "mobi.huff_rec_index = {}",
        opt_record_idx(huff_rec_index)
    );
    let _ = writeln!(out, "mobi.huff_rec_count = {}", huff_rec_count);
    let _ = writeln!(
        out,
        "mobi.datp_rec_index = {}",
        opt_record_idx(datp_rec_index)
    );
    let _ = writeln!(out, "mobi.datp_rec_count = {}", datp_rec_count);
    let _ = writeln!(out, "mobi.exth_flags = 0x{:08X}", exth_flags);
    let _ = writeln!(out, "mobi.drm_offset = 0x{:08X}", drm_offset);
    let _ = writeln!(out, "mobi.drm_count = {}", drm_count);
    let _ = writeln!(out, "mobi.drm_size = {}", drm_size);
    let _ = writeln!(out, "mobi.drm_flags = 0x{:08X}", drm_flags);
    let _ = writeln!(out, "mobi.fdst_index = {}", opt_record_idx(fdst_index));
    let _ = writeln!(out, "mobi.fdst_section_count = {}", fdst_section_count);
    let _ = writeln!(out, "mobi.fcis_index = {}", opt_record_idx(fcis_index));
    let _ = writeln!(out, "mobi.fcis_count = {}", fcis_count);
    let _ = writeln!(out, "mobi.flis_index = {}", opt_record_idx(flis_index));
    let _ = writeln!(out, "mobi.flis_count = {}", flis_count);
    let _ = writeln!(out, "mobi.srcs_index = {}", opt_record_idx(srcs_index));
    let _ = writeln!(out, "mobi.srcs_count = {}", srcs_count);
    let _ = writeln!(out, "mobi.extra_record_flags = 0x{:08X}", extra_record_flags);
    let _ = writeln!(
        out,
        "mobi.primary_index_record = {}",
        opt_record_idx(primary_index_record)
    );
    let _ = writeln!(
        out,
        "mobi.fragment_index = {}",
        opt_record_idx(fragment_index)
    );
    let _ = writeln!(
        out,
        "mobi.skeleton_index = {}",
        opt_record_idx(skeleton_index)
    );
    let _ = writeln!(out, "mobi.datp_index = {}", opt_record_idx(datp_index));
    let _ = writeln!(out, "mobi.oth_index = {}", opt_record_idx(oth_index));

    // EXTH parsing. EXTH block starts at MOBI header end = 16 + header_len.
    let exth_off = 16 + header_length as usize;
    let mut exth: Vec<(u32, Vec<u8>)> = Vec::new();
    if exth_off + 12 <= record0.len() && &record0[exth_off..exth_off + 4] == b"EXTH" {
        let exth_len = read_u32_be(record0, exth_off + 4).unwrap_or(0) as usize;
        let exth_count = read_u32_be(record0, exth_off + 8).unwrap_or(0) as usize;
        let _ = writeln!(out, "exth.length = {}", exth_len);
        let _ = writeln!(out, "exth.count = {}", exth_count);

        let exth_end = exth_off + exth_len;
        let mut pos = exth_off + 12;
        for _ in 0..exth_count {
            if pos + 8 > exth_end || pos + 8 > record0.len() {
                break;
            }
            let rtype = read_u32_be(record0, pos).unwrap_or(0);
            let rlen = read_u32_be(record0, pos + 4).unwrap_or(0) as usize;
            if rlen < 8 || pos + rlen > record0.len() {
                break;
            }
            let payload = record0[pos + 8..pos + rlen].to_vec();
            exth.push((rtype, payload));
            pos += rlen;
        }

        // Sort EXTH by type for stable diff output, then by order of appearance
        // within the same type (some types appear multiple times, e.g. 101
        // contributors).
        let mut sorted: Vec<(usize, &(u32, Vec<u8>))> = exth.iter().enumerate().collect();
        sorted.sort_by_key(|(i, (t, _))| (*t, *i));
        for (_, (rtype, payload)) in sorted {
            dump_exth_record(out, *rtype, payload);
        }
    } else {
        let _ = writeln!(out, "exth.present = false");
    }

    Some(MobiHeader {
        header_length,
        exth,
        file_version,
    })
}

/// Emit lines for a single EXTH record. String-valued records emit a
/// `.value` line; numeric u32-valued records emit a `.value_u32` line; other
/// records emit `.value_hex`.
fn dump_exth_record(out: &mut String, rtype: u32, payload: &[u8]) {
    let _ = writeln!(out, "exth[{}].name = {}", rtype, quote_str(exth_type_name(rtype)));
    let _ = writeln!(out, "exth[{}].value_len = {}", rtype, payload.len());

    if is_numeric_exth(rtype) && payload.len() == 4 {
        let v = u32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]);
        let _ = writeln!(out, "exth[{}].value_u32 = {}", rtype, v);
        return;
    }

    if is_string_exth(rtype) {
        if let Ok(s) = std::str::from_utf8(payload) {
            let _ = writeln!(out, "exth[{}].value = {}", rtype, quote_str(s));
            return;
        }
    }

    let _ = writeln!(out, "exth[{}].value_hex = {}", rtype, to_hex(payload));
}

/// Well-known EXTH types. Mirrors the libmobi + MobileRead wiki names.
fn exth_type_name(rtype: u32) -> &'static str {
    match rtype {
        100 => "creator",
        101 => "publisher",
        103 => "description",
        104 => "isbn",
        105 => "subject",
        106 => "publishingdate",
        108 => "contributor",
        109 => "rights",
        112 => "source",
        113 => "asin",
        114 => "versionnumber",
        115 => "sample",
        116 => "startreading",
        117 => "adult",
        118 => "retail_price",
        119 => "retail_price_currency",
        121 => "kf8boundary",
        125 => "kf8_count_resources_fonts",
        129 => "kf8_cover_uri",
        131 => "kf8_unknown131",
        201 => "coveroffset",
        202 => "thumboffset",
        203 => "hasfakecover",
        204 => "creatorsoftware",
        205 => "creatormajor",
        206 => "creatorminor",
        207 => "creatorbuild",
        208 => "watermark",
        209 => "tamperkey",
        300 => "fontsignature",
        401 => "clippinglimit",
        402 => "publisherlimit",
        403 => "unknown403",
        404 => "ttsdisable",
        406 => "rental_expiration",
        450 => "unknown450",
        451 => "unknown451",
        452 => "unknown452",
        453 => "unknown453",
        501 => "cde_content_type",
        502 => "lastupdatetime",
        503 => "updatedtitle",
        504 => "asin2",
        524 => "language",
        525 => "writingmode",
        526 => "nominalpagecount",
        527 => "pageprogressiondirection",
        528 => "overrideslhfeatures",
        529 => "originalsourcedesc",
        534 => "inputsourcetype",
        535 => "creatorbuildrev",
        536 => "containerinfo",
        538 => "originalresolution",
        542 => "unknown542",
        _ => "unknown",
    }
}

fn is_string_exth(rtype: u32) -> bool {
    matches!(
        rtype,
        100 | 101
            | 103
            | 104
            | 105
            | 106
            | 108
            | 109
            | 112
            | 113
            | 117
            | 118
            | 119
            | 129
            | 208
            | 501
            | 503
            | 504
            | 524
            | 525
            | 527
            | 529
            | 534
            | 535
            | 536
            | 538
    )
}

fn is_numeric_exth(rtype: u32) -> bool {
    matches!(
        rtype,
        115 | 116
            | 121
            | 125
            | 131
            | 201
            | 202
            | 203
            | 204
            | 205
            | 206
            | 207
            | 401
            | 402
            | 403
            | 404
            | 406
            | 502
            | 526
    )
}

// ---------------------------------------------------------------------
// INDX record dumping
// ---------------------------------------------------------------------

/// One entry in a parsed TAGX block. The bitmask identifies which bit(s)
/// of the entry control byte indicate that this tag is present; for most
/// scalar tags num_values=1 and the mask picks a single bit, but ARRAY
/// tags reserve a multi-bit slot that stores the element count.
#[derive(Clone, Debug)]
struct TagDef {
    tag_id: u8,
    num_values: u8,
    mask: u8,
    _end_flag: u8,
}

/// Schema for a data-entry block: the list of tag definitions taken from
/// a paired primary INDX's TAGX plus the control-byte count that governs
/// how many leading control bytes each entry starts with.
#[derive(Clone, Debug, Default)]
struct TagxSchema {
    tags: Vec<TagDef>,
    control_byte_count: u32,
}

/// Parse a TAGX block starting at `tagx_start`. Returns the parsed schema
/// plus the length field so the caller can skip forward.
fn parse_tagx(rec: &[u8], tagx_start: usize) -> Option<(TagxSchema, usize)> {
    if tagx_start + 12 > rec.len() || &rec[tagx_start..tagx_start + 4] != b"TAGX" {
        return None;
    }
    let tagx_len = read_u32_be(rec, tagx_start + 4)? as usize;
    let control_byte_count = read_u32_be(rec, tagx_start + 8)?;
    let mut tags = Vec::new();
    let mut i = tagx_start + 12;
    let tagx_end = (tagx_start + tagx_len).min(rec.len());
    while i + 4 <= tagx_end {
        let tag_id = rec[i];
        let num_values = rec[i + 1];
        let mask = rec[i + 2];
        let end_flag = rec[i + 3];
        tags.push(TagDef {
            tag_id,
            num_values,
            mask,
            _end_flag: end_flag,
        });
        i += 4;
        if tag_id == 0 && mask == 0 && end_flag == 1 {
            break;
        }
    }
    Some((
        TagxSchema {
            tags,
            control_byte_count,
        },
        tagx_len,
    ))
}

/// Decode an inverted variable-length integer used by kindlegen for INDX
/// data-record tag values. Each byte carries 7 data bits in big-endian
/// order; the HIGH bit indicates the LAST byte (high bit SET = stop).
/// This is the inverse of the forward VWI convention libmobi uses for
/// header counts; it matches vwi::encode_vwi_inv in this crate.
/// Returns the decoded value and the number of bytes consumed.
fn read_vwi(bytes: &[u8]) -> Option<(u32, usize)> {
    let mut value: u32 = 0;
    for (i, &b) in bytes.iter().enumerate() {
        value = (value << 7) | (b & 0x7F) as u32;
        if b & 0x80 != 0 {
            return Some((value, i + 1));
        }
    }
    None
}

/// Decode an INDX data-entry tag block given a TAGX schema.
///
/// Returns a vec of (tag_id, Vec<u32>) pairs. For tags present via a
/// single-bit mask the Vec has `num_values` entries; for tags using a
/// multi-bit mask (ARRAY tags), the count is `(control_byte_mask_slot /
/// shift) * num_values`.
///
/// `control_bytes` is the leading control-byte run (1 byte for
/// control_byte_count=1). `tag_stream` is the VWI-encoded bytes following
/// the control bytes.
fn decode_entry_tags(
    control_bytes: &[u8],
    tag_stream: &[u8],
    schema: &TagxSchema,
) -> Option<Vec<(u8, Vec<u32>)>> {
    let control = *control_bytes.first()?;
    let mut values: Vec<(u8, Vec<u32>)> = Vec::new();
    let mut cursor = 0usize;

    for td in &schema.tags {
        if td.tag_id == 0 && td.mask == 0 {
            continue;
        }
        if td.mask == 0 {
            continue;
        }

        // Check whether this tag's slot is non-zero in the control byte.
        // For a single-bit mask (e.g. 0x01), this is a boolean presence
        // check. For multi-bit masks (e.g. 0x04 with bits [2:3]), the
        // slot value is the array count.
        let slot = control & td.mask;
        if slot == 0 {
            continue;
        }
        let shift = td.mask.trailing_zeros();
        let count_multiplier = (slot >> shift) as usize;
        let total_values = count_multiplier * td.num_values as usize;

        let mut decoded = Vec::with_capacity(total_values);
        for _ in 0..total_values {
            if cursor >= tag_stream.len() {
                return None;
            }
            let (v, n) = read_vwi(&tag_stream[cursor..])?;
            decoded.push(v);
            cursor += n;
        }
        values.push((td.tag_id, decoded));
    }

    Some(values)
}

/// Dump an INDX record at `rec_idx`. Emits all fixed-header fields, the
/// ORDT2 codepoint list (if any), TAGX definitions (if any), and every
/// routing or data entry with decoded labels and tag values.
///
/// `paired_schema` is the TAGX schema taken from this record's primary
/// (for data records, generation=1) or from the record itself (for
/// primaries, generation=0). When None, tag decoding falls back to raw
/// hex.
fn dump_indx_record(
    out: &mut String,
    rec_idx: usize,
    rec: &[u8],
    paired_schema: Option<&TagxSchema>,
    paired_ordt2: Option<&[u32]>,
) {
    let label = format!("indx[{}]", rec_idx);
    if rec.len() < 192 || &rec[..4] != b"INDX" {
        let _ = writeln!(out, "{}.length = {}", label, rec.len());
        let _ = writeln!(out, "{}.magic = {}", label, magic_or_hex(rec));
        return;
    }

    let header_length = read_u32_be(rec, 4).unwrap_or(0);
    let index_type = read_u32_be(rec, 8).unwrap_or(0);
    let gen_number = read_u32_be(rec, 12).unwrap_or(0);
    // offset 16 is a kindlegen-constant (2)
    let const16 = read_u32_be(rec, 16).unwrap_or(0);
    let idxt_offset = read_u32_be(rec, 20).unwrap_or(0);
    let idxt_count = read_u32_be(rec, 24).unwrap_or(0);
    let encoding = read_u32_be(rec, 28).unwrap_or(0);
    let language = read_u32_be(rec, 32).unwrap_or(0);
    let total_entry_count = read_u32_be(rec, 36).unwrap_or(0);
    let ordt_offset_legacy = read_u32_be(rec, 40).unwrap_or(0);
    let ligt_offset = read_u32_be(rec, 44).unwrap_or(0);
    let ligt_entries = read_u32_be(rec, 48).unwrap_or(0);
    let cncx_records_count = read_u32_be(rec, 52).unwrap_or(0);
    // offsets 56..164 are reserved/unknown in most writers
    let ordt_type = read_u32_be(rec, 164).unwrap_or(0);
    let ordt_entries_count = read_u32_be(rec, 168).unwrap_or(0);
    let ordt1_offset = read_u32_be(rec, 172).unwrap_or(0);
    let ordt2_offset = read_u32_be(rec, 176).unwrap_or(0);
    let tagx_offset = read_u32_be(rec, 180).unwrap_or(0);

    let _ = writeln!(out, "{}.identifier = \"INDX\"", label);
    let _ = writeln!(out, "{}.length = {}", label, rec.len());
    let _ = writeln!(out, "{}.header_length = {}", label, header_length);
    let _ = writeln!(out, "{}.index_type = {}", label, index_type);
    let _ = writeln!(out, "{}.generation = {}", label, gen_number);
    let _ = writeln!(out, "{}.const16 = {}", label, const16);
    let _ = writeln!(out, "{}.idxt_offset = {}", label, idxt_offset);
    let _ = writeln!(out, "{}.idxt_count = {}", label, idxt_count);
    let _ = writeln!(out, "{}.encoding = {}", label, encoding);
    let _ = writeln!(out, "{}.language = {}", label, language);
    let _ = writeln!(out, "{}.total_entry_count = {}", label, total_entry_count);
    let _ = writeln!(
        out,
        "{}.ordt_offset_legacy = {}",
        label, ordt_offset_legacy
    );
    let _ = writeln!(out, "{}.ligt_offset = {}", label, ligt_offset);
    let _ = writeln!(out, "{}.ligt_entries = {}", label, ligt_entries);
    let _ = writeln!(
        out,
        "{}.cncx_records_count = {}",
        label, cncx_records_count
    );
    let _ = writeln!(out, "{}.ordt_type = {}", label, ordt_type);
    let _ = writeln!(
        out,
        "{}.ordt_entries_count = {}",
        label, ordt_entries_count
    );
    let _ = writeln!(out, "{}.ordt1_offset = {}", label, ordt1_offset);
    let _ = writeln!(out, "{}.ordt2_offset = {}", label, ordt2_offset);
    let _ = writeln!(out, "{}.tagx_offset_field = {}", label, tagx_offset);

    // ORDT2 codepoint table decoding. libmobi's `mobi_parse_ordt` reads
    // N u16 BE codepoints starting at ordt2_offset + 4 (skipping the
    // ORDT2 magic). kindling places the ORDT2 magic directly at
    // ordt2_offset so we skip 4 bytes.
    let ordt2: Option<Vec<u32>> = if ordt_type == 1 && ordt_entries_count > 0 {
        let start = ordt2_offset as usize + 4;
        let end = start + 2 * ordt_entries_count as usize;
        if end <= rec.len() {
            let mut cps = Vec::with_capacity(ordt_entries_count as usize);
            for i in 0..ordt_entries_count as usize {
                let cp = read_u16_be(rec, start + 2 * i).unwrap_or(0) as u32;
                cps.push(cp);
            }
            Some(cps)
        } else {
            None
        }
    } else {
        None
    };

    if let Some(cps) = &ordt2 {
        let _ = writeln!(out, "{}.ordt2_entries = {}", label, cps.len());
        // Emit one line per codepoint so a diff can see exactly which
        // codepoint moved where in the table.
        for (i, cp) in cps.iter().enumerate() {
            let _ = writeln!(out, "{}.ordt2[{}] = U+{:04X}", label, i, cp);
        }
    }

    // TAGX parsing. Kindlegen places TAGX right after the primary header
    // region at rec offset `header_length` — the declared header_length
    // already includes the 7-byte "default" string for sub-index 1
    // (header_length=199), so the TAGX start is simply `header_length`.
    // Data records (generation=1) have no TAGX of their own.
    //
    // Note: older versions of this dumper computed tagx_start as
    // `header_length + default_str_len`, which double-counted the
    // default string for header_length=199 and silently skipped the
    // TAGX block on the orth primary (indx[3]).
    let own_schema = if gen_number == 0 {
        let tagx_start = header_length as usize;
        if let Some((schema, tagx_len)) = parse_tagx(rec, tagx_start) {
            let _ = writeln!(out, "{}.tagx.length = {}", label, tagx_len);
            let _ = writeln!(
                out,
                "{}.tagx.control_byte_count = {}",
                label, schema.control_byte_count
            );
            for (idx, td) in schema.tags.iter().enumerate() {
                let _ = writeln!(
                    out,
                    "{}.tagx[{}] = (tag_id={}, num_values={}, bitmask=0x{:02X}, end={})",
                    label, idx, td.tag_id, td.num_values, td.mask, td._end_flag
                );
            }
            Some(schema)
        } else {
            None
        }
    } else {
        None
    };

    // IDXT block: read the offsets of each entry.
    let idxt_off = idxt_offset as usize;
    let mut entry_offsets: Vec<usize> = Vec::new();
    if idxt_off + 4 <= rec.len() && &rec[idxt_off..idxt_off + 4] == b"IDXT" {
        let _ = writeln!(out, "{}.idxt.magic = \"IDXT\"", label);
        for i in 0..idxt_count as usize {
            let off = match read_u16_be(rec, idxt_off + 4 + 2 * i) {
                Some(v) => v as usize,
                None => break,
            };
            entry_offsets.push(off);
        }
        // Emit the IDXT offsets as a compact list for easy visual diffing.
        let offsets_list: Vec<String> =
            entry_offsets.iter().map(|o| format!("0x{:04X}", o)).collect();
        let _ = writeln!(
            out,
            "{}.idxt_offsets = [{}]",
            label,
            offsets_list.join(", ")
        );
    }

    // For primaries, entries are routing records (label_len u8, label
    // bytes, u16 count). For data records (generation=1), each entry is
    // a label + control byte(s) + VWI-encoded tag values decoded via
    // the paired primary's TAGX schema.
    //
    // Label decoding uses this record's own ORDT2 table when present;
    // data records don't carry their own ORDT2, so we fall back to the
    // paired primary's table.
    let effective_ordt2 = ordt2.as_deref().or(paired_ordt2);
    let effective_schema = if gen_number == 0 {
        own_schema.as_ref()
    } else {
        paired_schema
    };

    for (i, &off) in entry_offsets.iter().enumerate() {
        let end = if i + 1 < entry_offsets.len() {
            entry_offsets[i + 1]
        } else {
            idxt_off
        };
        if off >= rec.len() || end > rec.len() || end < off {
            continue;
        }
        let entry = &rec[off..end];
        let decoded =
            decode_indx_entry(entry, gen_number, effective_ordt2, effective_schema);
        let _ = writeln!(out, "{}.entries[{}] = {}", label, i, decoded);
    }
}

/// Decode a single INDX entry. For routing records (generation=0) the
/// layout is [u8 label_len][label_bytes][u16 BE count]. For data records
/// (generation=1) the layout is [u8 byte0 = prefix_len<<5 | new_len]
/// [label_bytes][u8 control_byte(s)][VWI-encoded tag values...].
///
/// When `ordt2` is Some and the record uses ORDT2 encoding (ordt_type=1),
/// each label byte is looked up in the codepoint table to produce UTF-8.
/// Otherwise we try UTF-16BE, then fall back to hex. When a data record
/// has a paired TAGX `schema`, the tag bytes are decoded into explicit
/// `tag[N] = [values]` fragments; when decoding fails (truncated /
/// schema mismatch), we fall back to raw hex with a `# decode_failed`
/// trailing comment.
fn decode_indx_entry(
    entry: &[u8],
    generation: u32,
    ordt2: Option<&[u32]>,
    schema: Option<&TagxSchema>,
) -> String {
    if entry.is_empty() {
        return "(empty)".to_string();
    }

    if generation == 0 {
        // Routing entry: one-byte length, label, u16 BE count.
        let label_len = entry[0] as usize;
        if 1 + label_len > entry.len() {
            return format!("(truncated routing, raw={})", to_hex(entry));
        }
        let label_bytes = &entry[1..1 + label_len];
        let label = decode_label(label_bytes, ordt2);
        let count_bytes = &entry[1 + label_len..];
        let count = if count_bytes.len() >= 2 {
            u16::from_be_bytes([count_bytes[0], count_bytes[1]]) as u32
        } else {
            0
        };
        format!(
            "{{ routing_label = {}, label_bytes = {}, record_count = {} }}",
            quote_str(&label),
            to_hex(label_bytes),
            count
        )
    } else {
        // Data entry: prefix_len (3 bits) | new_len (5 bits), label,
        // control_byte_count bytes of control, VWI-encoded tag values.
        let byte0 = entry[0];
        let prefix_len = (byte0 >> 5) & 0x07;
        let new_len = (byte0 & 0x1F) as usize;
        if 1 + new_len + 1 > entry.len() {
            return format!("(truncated data, raw={})", to_hex(entry));
        }
        let label_bytes = &entry[1..1 + new_len];
        let label = decode_label(label_bytes, ordt2);

        let control_byte_count = schema.map(|s| s.control_byte_count as usize).unwrap_or(1);
        let control_start = 1 + new_len;
        let control_end = control_start + control_byte_count;
        if control_end > entry.len() {
            return format!(
                "{{ label = {}, label_bytes = {}, prefix_len = {}, new_len = {}, tag_bytes = {} }} # decode_failed:truncated_control",
                quote_str(&label),
                to_hex(label_bytes),
                prefix_len,
                new_len,
                to_hex(&entry[control_start..])
            );
        }
        let control_bytes = &entry[control_start..control_end];
        let tail = &entry[control_end..];

        // Render a single-byte control as a u8 literal for readability;
        // a multi-byte control as a hex blob. Either way we pass all
        // control bytes through to the decoder.
        let control_repr = if control_bytes.len() == 1 {
            format!("0x{:02X}", control_bytes[0])
        } else {
            to_hex(control_bytes)
        };

        if let Some(schema) = schema {
            if let Some(decoded) = decode_entry_tags(control_bytes, tail, schema) {
                // Render as `tag[N] = [a, b, c]` sorted by tag_id for stable diff.
                let mut decoded_sorted = decoded.clone();
                decoded_sorted.sort_by_key(|(id, _)| *id);
                let mut parts = Vec::new();
                parts.push(format!("label = {}", quote_str(&label)));
                parts.push(format!("label_bytes = {}", to_hex(label_bytes)));
                parts.push(format!("control = {}", control_repr));
                for (tag_id, values) in &decoded_sorted {
                    let vs: Vec<String> = values.iter().map(|v| v.to_string()).collect();
                    parts.push(format!("tag[{}] = [{}]", tag_id, vs.join(", ")));
                }
                return format!("{{ {} }}", parts.join(", "));
            }
        }

        format!(
            "{{ label = {}, label_bytes = {}, prefix_len = {}, new_len = {}, control = {}, tag_bytes = {} }}{}",
            quote_str(&label),
            to_hex(label_bytes),
            prefix_len,
            new_len,
            control_repr,
            to_hex(tail),
            if schema.is_some() {
                " # decode_failed"
            } else {
                " # no_schema"
            }
        )
    }
}

/// Decode INDX label bytes to a displayable string.
///
/// When an ORDT2 table is provided, each byte is treated as an index into
/// the codepoint list. When no ORDT2 is present, we try UTF-16BE (the
/// historical kindlegen encoding for non-dict sub-indexes). If both fail
/// we return `hex:0x...` so the diff still shows bytes.
fn decode_label(bytes: &[u8], ordt2: Option<&[u32]>) -> String {
    if let Some(table) = ordt2 {
        let mut out = String::with_capacity(bytes.len());
        let mut all_ok = true;
        for &b in bytes {
            match table.get(b as usize).copied() {
                Some(cp) => {
                    if let Some(c) = char::from_u32(cp) {
                        out.push(c);
                    } else {
                        all_ok = false;
                        break;
                    }
                }
                None => {
                    all_ok = false;
                    break;
                }
            }
        }
        if all_ok {
            return out;
        }
    }

    if bytes.len() % 2 == 0 && !bytes.is_empty() {
        let mut chars: Vec<u16> = Vec::with_capacity(bytes.len() / 2);
        for chunk in bytes.chunks_exact(2) {
            chars.push(u16::from_be_bytes([chunk[0], chunk[1]]));
        }
        if let Ok(s) = String::from_utf16(&chars) {
            if s.chars().all(|c| !c.is_control() || c == ' ') {
                return s;
            }
        }
    }

    format!("hex:{}", to_hex(bytes))
}

// ---------------------------------------------------------------------
// Top-level driver
// ---------------------------------------------------------------------

/// Parse a MOBI/AZW3 file at `path` and return a structural dump as a
/// String. The dump is designed so `diff -u` between two files surfaces
/// semantic differences without being drowned in offset cascades.
pub fn dump_mobi(path: &Path) -> io::Result<String> {
    let data = std::fs::read(path)?;
    let palmdb = parse_palmdb(&data)?;

    let mut out = String::new();
    dump_palmdb_header(&mut out, &data, &palmdb);

    // PalmDB record list: emit length + magic for every record so the
    // overall layout is visible even before we parse individual records.
    for i in 0..palmdb.num_records {
        let rec = palmdb.record(&data, i).unwrap_or(&[]);
        let _ = writeln!(&mut out, "record[{}].length = {}", i, rec.len());
        let _ = writeln!(&mut out, "record[{}].magic = {}", i, magic_or_hex(rec));
    }

    // MOBI section 0 (always present: KF7 or KF8-only).
    let rec0 = palmdb.record(&data, 0).unwrap_or(&[]);
    let kf7 = dump_mobi_section(&mut out, "section0", rec0, 0);

    // KF8 section if EXTH 121 boundary is set in the KF7 header.
    if let Some(kf7) = kf7.as_ref() {
        if let Some((_, payload)) = kf7.exth.iter().find(|(t, _)| *t == 121) {
            if payload.len() == 4 {
                let boundary = u32::from_be_bytes([
                    payload[0], payload[1], payload[2], payload[3],
                ]) as usize;
                if boundary < palmdb.num_records {
                    let _ = writeln!(&mut out, "kf8.boundary_record = {}", boundary);
                    if let Some(kf8_rec0) = palmdb.record(&data, boundary) {
                        dump_mobi_section(&mut out, "section_kf8", kf8_rec0, boundary);
                    }
                }
            }
        }
        let _ = writeln!(&mut out, "section0.file_version = {}", kf7.file_version);
        let _ = writeln!(
            &mut out,
            "section0.header_length_total = {}",
            kf7.header_length
        );
    }

    // INDX records. Walk every PalmDB record twice. First pass: pick
    // out primary (generation=0) INDX records and record their TAGX
    // schema + ORDT2 table keyed by PalmDB record index. Second pass:
    // walk in order, passing each data record the most-recently-seen
    // primary's schema and ORDT2 so entry tag bytes can be decoded.
    let mut indx_records: Vec<usize> = Vec::new();
    for i in 0..palmdb.num_records {
        if let Some(rec) = palmdb.record(&data, i) {
            if rec.len() >= 4 && &rec[..4] == b"INDX" {
                indx_records.push(i);
            }
        }
    }

    let mut primary_info: Vec<(usize, Option<TagxSchema>, Option<Vec<u32>>)> = Vec::new();
    for &i in &indx_records {
        let rec = match palmdb.record(&data, i) {
            Some(r) => r,
            None => continue,
        };
        let header_length = read_u32_be(rec, 4).unwrap_or(0);
        let generation = read_u32_be(rec, 12).unwrap_or(0);
        if generation != 0 {
            continue;
        }
        let schema = parse_tagx(rec, header_length as usize).map(|(s, _)| s);
        let ordt_type = read_u32_be(rec, 164).unwrap_or(0);
        let ordt_entries_count = read_u32_be(rec, 168).unwrap_or(0);
        let ordt2_offset = read_u32_be(rec, 176).unwrap_or(0);
        let ordt2 = if ordt_type == 1 && ordt_entries_count > 0 {
            let start = ordt2_offset as usize + 4;
            let end = start + 2 * ordt_entries_count as usize;
            if end <= rec.len() {
                let mut cps = Vec::with_capacity(ordt_entries_count as usize);
                for k in 0..ordt_entries_count as usize {
                    cps.push(read_u16_be(rec, start + 2 * k).unwrap_or(0) as u32);
                }
                Some(cps)
            } else {
                None
            }
        } else {
            None
        };
        primary_info.push((i, schema, ordt2));
    }

    for &i in &indx_records {
        let rec = match palmdb.record(&data, i) {
            Some(r) => r,
            None => continue,
        };
        let generation = read_u32_be(rec, 12).unwrap_or(0);
        let (schema, ordt2) = if generation == 0 {
            (None, None)
        } else {
            // Find the nearest preceding primary.
            let mut best: Option<&(usize, Option<TagxSchema>, Option<Vec<u32>>)> = None;
            for pi in &primary_info {
                if pi.0 < i {
                    best = Some(pi);
                } else {
                    break;
                }
            }
            match best {
                Some((_, s, o)) => (s.as_ref(), o.as_deref()),
                None => (None, None),
            }
        };
        dump_indx_record(&mut out, i, rec, schema, ordt2);
    }

    Ok(out)
}

// ---------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;

    fn fixture(name: &str) -> PathBuf {
        let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
        p.push("tests/fixtures/parity");
        p.push(name);
        p.push("kindlegen_reference.mobi");
        p
    }

    /// The dumper must succeed and emit PalmDB header lines for every
    /// parity fixture. We assert on structural shape, not byte-exact
    /// contents, so committed fixtures can evolve without breaking this
    /// test.
    #[test]
    fn dumps_simple_dict_fixture() {
        let path = fixture("simple_dict");
        if !path.exists() {
            eprintln!("skipping: fixture {} missing", path.display());
            return;
        }
        let dump = dump_mobi(&path).expect("dump should succeed");
        assert!(dump.contains("palmdb.type = \"BOOK\""));
        assert!(dump.contains("palmdb.creator = \"MOBI\""));
        assert!(dump.contains("mobi.identifier = \"MOBI\""));
        assert!(dump.contains("mobi.header_length"));
        assert!(
            dump.contains("exth["),
            "simple_dict should have at least one EXTH record"
        );
        assert!(
            dump.contains("indx["),
            "simple_dict must contain an INDX section"
        );
    }

    #[test]
    fn dumps_simple_book_fixture() {
        let path = fixture("simple_book");
        if !path.exists() {
            eprintln!("skipping: fixture {} missing", path.display());
            return;
        }
        let dump = dump_mobi(&path).expect("dump should succeed");
        assert!(dump.contains("palmdb.type = \"BOOK\""));
        assert!(dump.contains("mobi.identifier = \"MOBI\""));
        assert!(dump.contains("exth[100].name = \"creator\""));
        assert!(dump.contains("exth[524].name = \"language\""));
    }

    #[test]
    fn dumps_simple_comic_fixture() {
        let path = fixture("simple_comic");
        if !path.exists() {
            eprintln!("skipping: fixture {} missing", path.display());
            return;
        }
        let dump = dump_mobi(&path).expect("dump should succeed");
        assert!(dump.contains("palmdb.type = \"BOOK\""));
        assert!(dump.contains("exth[201].name = \"coveroffset\""));
    }

    #[test]
    fn inverted_vwi_decodes_single_and_multi_byte() {
        // Single byte: 0xFD = 1111_1101 ⇒ high bit set ⇒ stop. Value =
        // 0x7D = 125. Consumed 1.
        assert_eq!(read_vwi(&[0xFD]), Some((125, 1)));
        // Two bytes: 0x02 (high bit clear, more), 0xAF (high bit set, stop).
        // Value = (0x02 << 7) | 0x2F = 256 + 47 = 303. Consumed 2.
        assert_eq!(read_vwi(&[0x02, 0xAF]), Some((303, 2)));
        // Three bytes: 0x01 0x00 0x80 ⇒ (1 << 14) | (0 << 7) | 0 = 16384.
        assert_eq!(read_vwi(&[0x01, 0x00, 0x80]), Some((16384, 3)));
        // Unterminated (no high bit ever set): returns None.
        assert_eq!(read_vwi(&[0x01, 0x02, 0x03]), None);
    }

    #[test]
    fn data_entry_decodes_three_scalar_tags() {
        // Schema: tags 1, 2, 42 each with mask = 0x01 / 0x02 / 0x04 and
        // num_values = 1. Control byte 0x07 = all three present.
        let schema = TagxSchema {
            control_byte_count: 1,
            tags: vec![
                TagDef {
                    tag_id: 1,
                    num_values: 1,
                    mask: 0x01,
                    _end_flag: 0,
                },
                TagDef {
                    tag_id: 2,
                    num_values: 1,
                    mask: 0x02,
                    _end_flag: 0,
                },
                TagDef {
                    tag_id: 42,
                    num_values: 1,
                    mask: 0x04,
                    _end_flag: 0,
                },
            ],
        };
        // Entry: byte0=0x00 (prefix=0, len=0), control=0x07, values VWI
        // [125, 73, 1] = bytes 0xFD 0xC9 0x81 (all single-byte).
        let entry = vec![0x00, 0x07, 0xFD, 0xC9, 0x81];
        let s = decode_indx_entry(&entry, 1, None, Some(&schema));
        assert!(s.contains("tag[1] = [125]"), "got {}", s);
        assert!(s.contains("tag[2] = [73]"), "got {}", s);
        assert!(s.contains("tag[42] = [1]"), "got {}", s);
        assert!(!s.contains("decode_failed"), "got {}", s);
    }

    /// Tiny synthetic INDX test: build a minimal PalmDB + MOBI + INDX
    /// record with an ORDT2 table and a single routing entry, then
    /// confirm the dumper decodes the label via the ORDT2 codepoint
    /// list.
    #[test]
    fn ordt2_label_decodes_via_table() {
        // Two-codepoint ORDT2 table: [U+0041 'A', U+0042 'B']. A label
        // of bytes [0x00, 0x01] should decode to "AB".
        let ordt2_cps: Vec<u32> = vec![0x0041, 0x0042];
        let label_bytes = vec![0x00u8, 0x01u8];

        // Minimum-viable INDX record.
        let header_length: u32 = 192;
        let mut rec = vec![0u8; 192];
        rec[0..4].copy_from_slice(b"INDX");
        rec[4..8].copy_from_slice(&header_length.to_be_bytes());
        // generation = 0 (routing)
        rec[12..16].copy_from_slice(&0u32.to_be_bytes());
        // const16 = 2
        rec[16..20].copy_from_slice(&2u32.to_be_bytes());
        // idxt_count = 1
        rec[24..28].copy_from_slice(&1u32.to_be_bytes());
        // ordt_type = 1
        rec[164..168].copy_from_slice(&1u32.to_be_bytes());
        // ordt_entries_count = 2
        rec[168..172].copy_from_slice(&2u32.to_be_bytes());

        // Entry starts at offset 192: [len=2][0x00][0x01][count=0 u16]
        let entry_offset = header_length as usize;
        rec.push(label_bytes.len() as u8);
        rec.extend_from_slice(&label_bytes);
        rec.extend_from_slice(&0u16.to_be_bytes());

        // IDXT follows.
        let idxt_offset = rec.len();
        rec.extend_from_slice(b"IDXT");
        rec.extend_from_slice(&(entry_offset as u16).to_be_bytes());
        // Write idxt_offset into header.
        let idxt_off_u32 = idxt_offset as u32;
        rec[20..24].copy_from_slice(&idxt_off_u32.to_be_bytes());

        // Pad to 4-byte boundary.
        while rec.len() % 4 != 0 {
            rec.push(0);
        }

        // ORDT2 blob: 2 pad + "ORDT"+4 filler + "ORDT" + N*u16 cps.
        let ordt_start = rec.len();
        rec.push(0);
        rec.push(0);
        rec.extend_from_slice(b"ORDT");
        rec.extend_from_slice(&[0, 0, 0, 0]);
        rec.extend_from_slice(b"ORDT");
        for cp in &ordt2_cps {
            rec.extend_from_slice(&(*cp as u16).to_be_bytes());
        }
        let ordt2_abs = ordt_start + 10;
        rec[176..180].copy_from_slice(&(ordt2_abs as u32).to_be_bytes());

        // Dump.
        let mut out = String::new();
        dump_indx_record(&mut out, 7, &rec, None, None);

        assert!(
            out.contains("indx[7].ordt_type = 1"),
            "ordt_type should be 1, got:\n{}",
            out
        );
        assert!(
            out.contains("indx[7].ordt2[0] = U+0041"),
            "ORDT2 should decode U+0041 at index 0, got:\n{}",
            out
        );
        assert!(
            out.contains("indx[7].ordt2[1] = U+0042"),
            "ORDT2 should decode U+0042 at index 1, got:\n{}",
            out
        );
        assert!(
            out.contains("routing_label = \"AB\""),
            "label should decode via ORDT2 to AB, got:\n{}",
            out
        );
    }
}