use std::collections::HashSet;
pub fn exth_record(rec_type: u32, data: &[u8]) -> Vec<u8> {
let mut rec = Vec::with_capacity(8 + data.len());
rec.extend_from_slice(&rec_type.to_be_bytes());
rec.extend_from_slice(&((8 + data.len()) as u32).to_be_bytes());
rec.extend_from_slice(data);
rec
}
const USB_RANGES: &[(u32, u32, usize, u32)] = &[
(0x0020, 0x007F, 0, 0), (0x0080, 0x00FF, 0, 1), (0x0100, 0x024F, 0, 2), (0x0250, 0x02AF, 0, 3), (0x02B0, 0x02FF, 0, 5), (0x0300, 0x036F, 0, 6), (0x0370, 0x03FF, 0, 7), (0x0400, 0x04FF, 0, 9), (0x0530, 0x058F, 0, 10), (0x0590, 0x05FF, 0, 11), (0x0600, 0x06FF, 0, 13), (0x0E00, 0x0E7F, 0, 24), (0x10A0, 0x10FF, 0, 26), (0x1100, 0x11FF, 0, 28), (0x1E00, 0x1EFF, 0, 29), (0x1F00, 0x1FFF, 0, 30), (0x2000, 0x206F, 0, 31), (0x2070, 0x209F, 1, 0), (0x20A0, 0x20CF, 1, 1), (0x2100, 0x214F, 1, 3), (0x2150, 0x218F, 1, 4), (0x2190, 0x21FF, 1, 5), (0x2200, 0x22FF, 1, 6), (0x3000, 0x303F, 1, 20), (0x3040, 0x309F, 1, 21), (0x30A0, 0x30FF, 1, 22), (0x3100, 0x312F, 1, 23), (0x3130, 0x318F, 1, 24), (0x4E00, 0x9FFF, 1, 27), (0xAC00, 0xD7AF, 1, 28), (0xFB00, 0xFB06, 1, 30), (0xFB50, 0xFDFF, 1, 31), (0xFE70, 0xFEFF, 2, 0), ];
fn build_fontsignature(headword_chars: &HashSet<u32>) -> Vec<u8> {
let mut usb = [0u32; 4];
let mut csb = [0u32; 2];
for &cp in headword_chars {
for &(range_start, range_end, usb_idx, bit) in USB_RANGES {
if cp >= range_start && cp <= range_end {
usb[usb_idx] |= 1 << bit;
break;
}
}
}
usb[3] |= 1 << 31;
if usb[0] & (1 << 7) != 0 {
csb[0] |= 0x00002000;
}
let mut header = Vec::with_capacity(32);
for &v in &usb {
header.extend_from_slice(&v.to_le_bytes());
}
for &v in &csb {
header.extend_from_slice(&v.to_le_bytes());
}
header.extend_from_slice(&[0u8; 8]);
let mut non_ascii: Vec<u16> = headword_chars
.iter()
.filter(|&&cp| cp > 0x7F)
.map(|&cp| (cp + 0x0400) as u16)
.collect();
non_ascii.sort();
let mut cp_bytes = Vec::with_capacity(non_ascii.len() * 2);
for &v in &non_ascii {
cp_bytes.extend_from_slice(&v.to_be_bytes());
}
let cp_hash = md5_hash(&cp_bytes);
let mut prefix_bytes = [0xBEu8, 0xEC, 0xED, 0xF4];
prefix_bytes.sort_by_key(|&b| cp_hash[(b as usize) % cp_hash.len()]);
let mut char_data = Vec::new();
char_data.extend_from_slice(&prefix_bytes);
for &v in &non_ascii {
char_data.extend_from_slice(&v.to_be_bytes());
}
header.extend_from_slice(&char_data);
header
}
fn md5_hash(data: &[u8]) -> [u8; 16] {
let mut msg = data.to_vec();
let bit_len = (data.len() as u64) * 8;
msg.push(0x80);
while msg.len() % 64 != 56 {
msg.push(0x00);
}
msg.extend_from_slice(&bit_len.to_le_bytes());
let mut a0: u32 = 0x67452301;
let mut b0: u32 = 0xEFCDAB89;
let mut c0: u32 = 0x98BADCFE;
let mut d0: u32 = 0x10325476;
const S: [u32; 64] = [
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20,
5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21,
];
const K: [u32; 64] = [
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613,
0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193,
0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d,
0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122,
0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa,
0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244,
0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb,
0xeb86d391,
];
for chunk in msg.chunks(64) {
let mut m = [0u32; 16];
for (i, word) in chunk.chunks(4).enumerate() {
m[i] = u32::from_le_bytes([word[0], word[1], word[2], word[3]]);
}
let mut a = a0;
let mut b = b0;
let mut c = c0;
let mut d = d0;
for i in 0..64 {
let (f, g) = match i {
0..=15 => ((b & c) | (!b & d), i),
16..=31 => ((d & b) | (!d & c), (5 * i + 1) % 16),
32..=47 => (b ^ c ^ d, (3 * i + 5) % 16),
_ => (c ^ (b | !d), (7 * i) % 16),
};
let temp = d;
d = c;
c = b;
b = b.wrapping_add(
(a.wrapping_add(f).wrapping_add(K[i]).wrapping_add(m[g])).rotate_left(S[i]),
);
a = temp;
}
a0 = a0.wrapping_add(a);
b0 = b0.wrapping_add(b);
c0 = c0.wrapping_add(c);
d0 = d0.wrapping_add(d);
}
let mut result = [0u8; 16];
result[0..4].copy_from_slice(&a0.to_le_bytes());
result[4..8].copy_from_slice(&b0.to_le_bytes());
result[8..12].copy_from_slice(&c0.to_le_bytes());
result[12..16].copy_from_slice(&d0.to_le_bytes());
result
}
pub struct FixedLayoutMeta {
pub is_fixed_layout: bool,
pub original_resolution: Option<String>,
pub page_progression_direction: Option<String>,
}
pub fn build_book_exth(
title: &str,
author: &str,
date: &str,
language: &str,
cover_offset: Option<u32>,
thumb_offset: Option<u32>,
kf8_cover_uri: Option<&str>,
fixed_layout: Option<&FixedLayoutMeta>,
kf8_boundary_record: Option<u32>,
hd_geometry: Option<&str>,
creator_tag: bool,
doc_type: Option<&str>,
description: Option<&str>,
subject: Option<&str>,
series: Option<&str>,
series_index: Option<&str>,
) -> Vec<u8> {
let mut records: Vec<Vec<u8>> = Vec::new();
if !date.is_empty() {
records.push(exth_record(106, date.as_bytes()));
}
let author_out = if author.is_empty() { crate::DEFAULT_AUTHOR } else { author };
records.push(exth_record(100, author_out.as_bytes()));
if fixed_layout.is_none() && !title.is_empty() {
records.push(exth_record(503, title.as_bytes()));
}
if let Some(desc) = description {
if !desc.is_empty() {
records.push(exth_record(103, desc.as_bytes()));
}
}
if let Some(subj) = subject {
if !subj.is_empty() {
records.push(exth_record(105, subj.as_bytes()));
}
}
let title_bytes = if title.is_empty() {
b"Book".to_vec()
} else {
title.as_bytes().to_vec()
};
let exth542_hash = md5_hash(&title_bytes);
records.push(exth_record(542, &exth542_hash[..4]));
if !language.is_empty() {
records.push(exth_record(524, language.as_bytes()));
}
let writing_mode = if fixed_layout
.map(|fl| fl.page_progression_direction.as_deref() == Some("rtl"))
.unwrap_or(false)
{
b"horizontal-rl" as &[u8]
} else {
b"horizontal-lr" as &[u8]
};
records.push(exth_record(525, writing_mode));
records.push(exth_record(131, &0u32.to_be_bytes()));
if creator_tag {
records.push(exth_record(204, &300u32.to_be_bytes())); records.push(exth_record(205, &0u32.to_be_bytes()));
records.push(exth_record(206, &2u32.to_be_bytes()));
let creator_str = format!("kindling-{}", env!("CARGO_PKG_VERSION"));
records.push(exth_record(535, creator_str.as_bytes()));
} else {
records.push(exth_record(204, &201u32.to_be_bytes())); records.push(exth_record(205, &2u32.to_be_bytes()));
records.push(exth_record(206, &9u32.to_be_bytes()));
records.push(exth_record(535, b"0730-890adc2"));
}
records.push(exth_record(207, &0u32.to_be_bytes()));
if let Some(uri) = kf8_cover_uri {
if !uri.is_empty() {
records.push(exth_record(129, uri.as_bytes()));
}
}
if let Some(offset) = cover_offset {
records.push(exth_record(201, &offset.to_be_bytes()));
let thumb = thumb_offset.unwrap_or(offset);
records.push(exth_record(202, &thumb.to_be_bytes()));
}
records.push(exth_record(203, &0u32.to_be_bytes()));
if let Some(fl) = fixed_layout {
if fl.is_fixed_layout {
records.push(exth_record(122, b"true"));
records.push(exth_record(123, b"comic"));
records.push(exth_record(124, b"none"));
let resolution = fl.original_resolution.as_deref().unwrap_or("1072x1448");
records.push(exth_record(126, resolution.as_bytes()));
records.push(exth_record(127, b"true"));
records.push(exth_record(128, b"true"));
let fontsig: [u8; 36] = [
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
];
records.push(exth_record(300, &fontsig));
let ppd = fl.page_progression_direction.as_deref().unwrap_or("ltr");
records.push(exth_record(527, ppd.as_bytes()));
}
}
if let Some(s) = series {
if !s.is_empty() {
records.push(exth_record(112, s.as_bytes()));
}
}
if let Some(si) = series_index {
if !si.is_empty() {
records.push(exth_record(113, si.as_bytes()));
}
}
match doc_type {
Some("EBOK") => {
records.push(exth_record(501, b"EBOK"));
}
_ => {
records.push(exth_record(501, b"PDOC"));
}
}
records.push(exth_record(547, b"InMemory"));
records.push(exth_record(125, &21u32.to_be_bytes()));
if let Some(boundary) = kf8_boundary_record {
records.push(exth_record(121, &boundary.to_be_bytes()));
}
if let Some(geometry) = hd_geometry {
records.push(exth_record(536, geometry.as_bytes()));
}
let record_data: Vec<u8> = records.iter().flat_map(|r| r.iter().copied()).collect();
let exth_length = 12 + record_data.len();
let padding = (4 - (exth_length % 4)) % 4;
let padded_length = exth_length + padding;
let mut exth = Vec::with_capacity(padded_length);
exth.extend_from_slice(b"EXTH");
exth.extend_from_slice(&(padded_length as u32).to_be_bytes());
exth.extend_from_slice(&(records.len() as u32).to_be_bytes());
exth.extend_from_slice(&record_data);
exth.extend_from_slice(&vec![0u8; padding]);
exth
}
pub fn build_exth(
title: &str,
author: &str,
date: &str,
language: &str,
dict_in_language: &str,
dict_out_language: &str,
headword_chars: &HashSet<u32>,
creator_tag: bool,
cover_offset: Option<u32>,
) -> Vec<u8> {
let mut records: Vec<Vec<u8>> = Vec::new();
if !date.is_empty() {
records.push(exth_record(106, date.as_bytes()));
}
let author_out = if author.is_empty() { crate::DEFAULT_AUTHOR } else { author };
records.push(exth_record(100, author_out.as_bytes()));
if !title.is_empty() {
records.push(exth_record(503, title.as_bytes()));
}
let title_bytes = if title.is_empty() {
b"Dictionary".to_vec()
} else {
title.as_bytes().to_vec()
};
let exth542_hash = md5_hash(&title_bytes);
records.push(exth_record(542, &exth542_hash[..4]));
if !dict_in_language.is_empty() {
records.push(exth_record(531, dict_in_language.as_bytes()));
}
if !dict_out_language.is_empty() {
records.push(exth_record(532, dict_out_language.as_bytes()));
}
if !language.is_empty() {
records.push(exth_record(524, language.as_bytes()));
}
records.push(exth_record(525, b"horizontal-lr"));
records.push(exth_record(131, &0u32.to_be_bytes()));
records.push(exth_record(300, &build_fontsignature(headword_chars)));
if creator_tag {
records.push(exth_record(204, &300u32.to_be_bytes())); records.push(exth_record(205, &0u32.to_be_bytes()));
records.push(exth_record(206, &2u32.to_be_bytes()));
let creator_str = format!("kindling-{}", env!("CARGO_PKG_VERSION"));
records.push(exth_record(535, creator_str.as_bytes()));
} else {
records.push(exth_record(204, &201u32.to_be_bytes())); records.push(exth_record(205, &2u32.to_be_bytes()));
records.push(exth_record(206, &9u32.to_be_bytes()));
records.push(exth_record(535, b"0730-890adc2"));
}
records.push(exth_record(207, &0u32.to_be_bytes()));
if let Some(offset) = cover_offset {
records.push(exth_record(201, &offset.to_be_bytes()));
records.push(exth_record(202, &offset.to_be_bytes()));
}
records.push(exth_record(203, &0u32.to_be_bytes()));
records.push(exth_record(547, b"InMemory"));
records.push(exth_record(125, &1u32.to_be_bytes()));
let record_data: Vec<u8> = records.iter().flat_map(|r| r.iter().copied()).collect();
let exth_length = 12 + record_data.len();
let padding = (4 - (exth_length % 4)) % 4;
let padded_length = exth_length + padding;
let mut exth = Vec::with_capacity(padded_length);
exth.extend_from_slice(b"EXTH");
exth.extend_from_slice(&(padded_length as u32).to_be_bytes());
exth.extend_from_slice(&(records.len() as u32).to_be_bytes());
exth.extend_from_slice(&record_data);
exth.extend_from_slice(&vec![0u8; padding]);
exth
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
fn parse_exth_records(exth: &[u8]) -> Vec<(u32, Vec<u8>)> {
assert_eq!(&exth[0..4], b"EXTH");
let _exth_len = u32::from_be_bytes([exth[4], exth[5], exth[6], exth[7]]) as usize;
let rec_count = u32::from_be_bytes([exth[8], exth[9], exth[10], exth[11]]) as usize;
let mut offset = 12;
let mut records = Vec::new();
for _ in 0..rec_count {
let rec_type = u32::from_be_bytes([
exth[offset], exth[offset + 1], exth[offset + 2], exth[offset + 3],
]);
let rec_len = u32::from_be_bytes([
exth[offset + 4], exth[offset + 5], exth[offset + 6], exth[offset + 7],
]) as usize;
let data = exth[offset + 8..offset + rec_len].to_vec();
records.push((rec_type, data));
offset += rec_len;
}
records
}
fn find_record(records: &[(u32, Vec<u8>)], rec_type: u32) -> Option<Vec<u8>> {
records.iter().find(|(t, _)| *t == rec_type).map(|(_, d)| d.clone())
}
#[test]
fn test_exth_doc_type_pdoc_default() {
let exth = build_book_exth(
"Test Book", "Author", "2026-01-01", "en",
None, None, None, None, None, None, false,
None, None, None, None, None,
);
let records = parse_exth_records(&exth);
let rec501 = find_record(&records, 501).expect("EXTH 501 should exist");
assert_eq!(rec501, b"PDOC", "Default doc_type should be PDOC");
println!(" \u{2713} EXTH 501 default = PDOC");
}
#[test]
fn test_exth_doc_type_pdoc_explicit() {
let exth = build_book_exth(
"Test Book", "Author", "2026-01-01", "en",
None, None, None, None, None, None, false,
Some("PDOC"),
None, None, None, None,
);
let records = parse_exth_records(&exth);
let rec501 = find_record(&records, 501).expect("EXTH 501 should exist");
assert_eq!(rec501, b"PDOC");
println!(" \u{2713} EXTH 501 explicit = PDOC");
}
#[test]
fn test_exth_doc_type_ebok() {
let exth = build_book_exth(
"Test Book", "Author", "2026-01-01", "en",
None, None, None, None, None, None, false,
Some("EBOK"),
None, None, None, None,
);
let records = parse_exth_records(&exth);
let rec501 = find_record(&records, 501).expect("EXTH 501 should exist");
assert_eq!(rec501, b"EBOK", "doc_type EBOK should produce EXTH 501 = EBOK");
println!(" \u{2713} EXTH 501 = EBOK");
}
#[test]
fn test_exth_series_metadata() {
let exth = build_book_exth(
"One Piece Vol 1", "Eiichiro Oda", "2026-01-01", "en",
None, None, None, None, None, None, false,
None,
Some("Luffy begins his adventure"), Some("Manga, Adventure"), Some("One Piece"), Some("1"), );
let records = parse_exth_records(&exth);
let desc = find_record(&records, 103).expect("EXTH 103 (description) should exist");
assert_eq!(std::str::from_utf8(&desc).unwrap(), "Luffy begins his adventure");
let subj = find_record(&records, 105).expect("EXTH 105 (subject) should exist");
assert_eq!(std::str::from_utf8(&subj).unwrap(), "Manga, Adventure");
let series = find_record(&records, 112).expect("EXTH 112 (series) should exist");
assert_eq!(std::str::from_utf8(&series).unwrap(), "One Piece");
let si = find_record(&records, 113).expect("EXTH 113 (series_index) should exist");
assert_eq!(std::str::from_utf8(&si).unwrap(), "1");
println!(" \u{2713} Series metadata: 103/105/112/113 all present and correct");
}
#[test]
fn test_exth_series_metadata_omitted_when_none() {
let exth = build_book_exth(
"Standalone Book", "Author", "2026-01-01", "en",
None, None, None, None, None, None, false,
None, None, None, None, None,
);
let records = parse_exth_records(&exth);
assert!(find_record(&records, 103).is_none(), "EXTH 103 should be absent when None");
assert!(find_record(&records, 105).is_none(), "EXTH 105 should be absent when None");
assert!(find_record(&records, 112).is_none(), "EXTH 112 should be absent when None");
assert!(find_record(&records, 113).is_none(), "EXTH 113 should be absent when None");
println!(" \u{2713} EXTH 103/105/112/113 all absent when None");
}
#[test]
fn test_exth_series_metadata_omitted_when_empty() {
let exth = build_book_exth(
"Standalone Book", "Author", "2026-01-01", "en",
None, None, None, None, None, None, false,
None,
Some(""), Some(""), Some(""), Some(""), );
let records = parse_exth_records(&exth);
assert!(find_record(&records, 103).is_none(), "Empty description should not produce EXTH 103");
assert!(find_record(&records, 105).is_none(), "Empty subject should not produce EXTH 105");
assert!(find_record(&records, 112).is_none(), "Empty series should not produce EXTH 112");
assert!(find_record(&records, 113).is_none(), "Empty series_index should not produce EXTH 113");
println!(" \u{2713} EXTH 103/105/112/113 all absent when empty string");
}
#[test]
fn test_exth_header_structure() {
let exth = build_book_exth(
"Test", "Author", "2026-01-01", "en",
None, None, None, None, None, None, false,
Some("EBOK"),
Some("A test book"), Some("Fiction"), Some("Test Series"), Some("3"),
);
assert_eq!(&exth[0..4], b"EXTH");
let stated_len = u32::from_be_bytes([exth[4], exth[5], exth[6], exth[7]]) as usize;
assert_eq!(stated_len, exth.len(), "EXTH stated length must match actual length");
assert_eq!(exth.len() % 4, 0, "EXTH length must be 4-byte aligned");
println!(" \u{2713} EXTH header: magic ok, length={}, 4-byte aligned", stated_len);
}
#[test]
fn test_exth_dict_unchanged() {
let mut chars = HashSet::new();
chars.insert(0x0041); chars.insert(0x03B1); let exth = build_exth(
"Test Dict", "Author", "2026-01-01", "en", "el", "en", &chars, false, None,
);
assert_eq!(&exth[0..4], b"EXTH");
let records = parse_exth_records(&exth);
assert!(find_record(&records, 531).is_some(), "Dict should have EXTH 531");
assert!(find_record(&records, 532).is_some(), "Dict should have EXTH 532");
assert!(find_record(&records, 501).is_none(), "Dict should not have EXTH 501");
assert!(find_record(&records, 112).is_none(), "Dict should not have EXTH 112");
println!(" \u{2713} Dict EXTH: has 531/532, no 501/112");
}
}