use regex::Regex;
use crate::cncx::CncxBuilder;
use crate::palmdoc;
use crate::vwi::{encode_vwi, encode_vwi_inv};
const RECORD_SIZE: usize = 4096;
const INDX_HEADER_LENGTH: usize = 192;
pub struct Kf8Section {
pub text_records: Vec<Vec<u8>>,
pub text_length: usize,
pub fdst: Vec<u8>,
pub fragment_indx: Vec<Vec<u8>>,
#[allow(dead_code)]
pub cncx_records: Vec<Vec<u8>>,
pub skeleton_indx: Vec<Vec<u8>>,
pub ncx_indx: Vec<Vec<u8>>,
pub ncx_cncx_records: Vec<Vec<u8>>,
pub datp: Vec<u8>,
pub flow_count: usize,
#[allow(dead_code)]
pub css_content: Vec<u8>,
pub html_bytes: Vec<u8>,
}
#[derive(Debug, Clone)]
struct SkeletonEntry {
label: String,
start_pos: usize,
length: usize,
chunk_count: usize,
}
#[derive(Debug, Clone)]
struct FragmentEntry {
insert_pos: usize,
selector: String,
file_number: usize,
sequence_number: usize,
start_pos: usize,
length: usize,
}
pub fn build_kf8_section(
html_parts: &[String],
css_content: &str,
href_to_recindex: &std::collections::HashMap<String, usize>,
spine_items: &[(String, String)],
no_compress: bool,
kindlegen_parity: bool,
title: &str,
) -> Kf8Section {
let (kf8_html, skeleton_entries, fragment_entries) =
build_kf8_html(html_parts, href_to_recindex, spine_items, kindlegen_parity);
let css_bytes = css_content.as_bytes();
let html_length = kf8_html.len();
let total_text_length = html_length + css_bytes.len();
let html_bytes_snapshot = kf8_html.clone();
let mut combined_text = kf8_html;
combined_text.extend_from_slice(css_bytes);
let num_skeletons = skeleton_entries.len();
let (text_records, text_length) = if no_compress {
split_text_uncompressed_kf8(&combined_text, num_skeletons)
} else {
compress_text_kf8(&combined_text, num_skeletons)
};
let fdst = build_fdst(html_length, total_text_length);
let skeleton_indx = build_skeleton_indx(&skeleton_entries);
let (fragment_indx, cncx_records) = build_fragment_indx_with_cncx(&fragment_entries);
let (ncx_indx, ncx_cncx_records) =
build_ncx_indx(title, &skeleton_entries, html_length);
let datp = build_datp();
let flow_count = 2;
Kf8Section {
text_records,
text_length,
fdst,
fragment_indx,
cncx_records,
skeleton_indx,
ncx_indx,
ncx_cncx_records,
datp,
flow_count,
css_content: css_bytes.to_vec(),
html_bytes: html_bytes_snapshot,
}
}
fn build_kf8_html(
html_parts: &[String],
href_to_recindex: &std::collections::HashMap<String, usize>,
spine_items: &[(String, String)],
kindlegen_parity: bool,
) -> (Vec<u8>, Vec<SkeletonEntry>, Vec<FragmentEntry>) {
let path_to_recindex = build_image_path_lookup(href_to_recindex, spine_items);
let mut skeleton_entries: Vec<SkeletonEntry> = Vec::new();
let mut fragment_entries: Vec<FragmentEntry> = Vec::new();
let mut combined: Vec<u8> = Vec::new();
let mut global_seq: usize = 0;
const AID_PAGE_STRIDE: u32 = 1_000_000;
let mut global_aid_counter: u32 = 0;
for (skel_idx, raw_part) in html_parts.iter().enumerate() {
let mut aid_counter: u32 = if kindlegen_parity {
(skel_idx as u32) * AID_PAGE_STRIDE
} else {
global_aid_counter
};
let processed = process_kf8_part(raw_part, &mut aid_counter, &path_to_recindex, kindlegen_parity);
if !kindlegen_parity {
global_aid_counter = aid_counter;
}
let split = split_skeleton_and_body(&processed);
let skel_bytes = split.skeleton.as_bytes();
let body_inner = split.body_inner.as_bytes();
let skel_start = combined.len();
combined.extend_from_slice(skel_bytes);
let skel_len = skel_bytes.len();
let insert_pos = skel_start + split.body_inner_offset;
let frag_start = combined.len();
combined.extend_from_slice(body_inner);
let frag_len = body_inner.len();
let _ = frag_start; let relative_start = 0usize;
fragment_entries.push(FragmentEntry {
insert_pos,
selector: format!("P-//*[@aid='{}']", split.body_aid),
file_number: skel_idx,
sequence_number: global_seq,
start_pos: relative_start,
length: frag_len,
});
global_seq += 1;
skeleton_entries.push(SkeletonEntry {
label: format!("SKEL{:010}", skel_idx),
start_pos: skel_start,
length: skel_len,
chunk_count: 1,
});
}
(combined, skeleton_entries, fragment_entries)
}
struct SkelSplit {
skeleton: String,
body_inner: String,
body_inner_offset: usize,
body_aid: String,
}
fn split_skeleton_and_body(html: &str) -> SkelSplit {
let bytes = html.as_bytes();
let (open_start, open_end, body_aid) = match find_body_open(bytes) {
Some(v) => v,
None => {
return SkelSplit {
skeleton: html.to_string(),
body_inner: String::new(),
body_inner_offset: html.len(),
body_aid: "0".to_string(),
};
}
};
let close_start = match find_last_close_body(bytes) {
Some(v) => v,
None => {
return SkelSplit {
skeleton: html.to_string(),
body_inner: String::new(),
body_inner_offset: html.len(),
body_aid: body_aid,
};
}
};
if close_start <= open_end {
return SkelSplit {
skeleton: html.to_string(),
body_inner: String::new(),
body_inner_offset: open_end,
body_aid,
};
}
let head = &html[..open_end]; let inner = &html[open_end..close_start];
let tail = &html[close_start..];
let mut skeleton = String::with_capacity(head.len() + tail.len());
skeleton.push_str(head);
skeleton.push_str(tail);
SkelSplit {
skeleton,
body_inner: inner.to_string(),
body_inner_offset: head.len(),
body_aid,
}
.also(|_| {
let _ = open_start;
})
}
trait Also: Sized {
fn also<F: FnOnce(&Self)>(self, f: F) -> Self {
f(&self);
self
}
}
impl<T> Also for T {}
fn find_body_open(bytes: &[u8]) -> Option<(usize, usize, String)> {
let haystack = bytes;
let needle = b"<body";
let mut i = 0;
while i + needle.len() <= haystack.len() {
if &haystack[i..i + needle.len()] == needle {
let after = haystack[i + needle.len()];
if after == b' '
|| after == b'\t'
|| after == b'\n'
|| after == b'\r'
|| after == b'>'
|| after == b'/'
{
let mut j = i + needle.len();
while j < haystack.len() && haystack[j] != b'>' {
j += 1;
}
if j >= haystack.len() {
return None;
}
let tag_str = std::str::from_utf8(&haystack[i..=j]).ok()?;
let aid = extract_aid_value(tag_str).unwrap_or_else(|| "0".to_string());
return Some((i, j + 1, aid));
}
}
i += 1;
}
None
}
fn find_last_close_body(bytes: &[u8]) -> Option<usize> {
let needle = b"</body>";
if bytes.len() < needle.len() {
return None;
}
let mut i = bytes.len() - needle.len();
loop {
if &bytes[i..i + needle.len()] == needle {
return Some(i);
}
if i == 0 {
return None;
}
i -= 1;
}
}
fn extract_aid_value(tag_str: &str) -> Option<String> {
let re = Regex::new(r#"\baid\s*=\s*["']([^"']*)["']"#).unwrap();
re.captures(tag_str)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
}
fn process_kf8_part(
html: &str,
aid_counter: &mut u32,
path_to_recindex: &std::collections::HashMap<String, usize>,
kindlegen_parity: bool,
) -> String {
let mut result = html.to_string();
let mime_suffix = "jpg";
let src_re = Regex::new(r#"(?i)\bsrc\s*=\s*"([^"]*)""#).unwrap();
result = src_re
.replace_all(&result, |caps: ®ex::Captures| {
let src_path = caps.get(1).unwrap().as_str();
if let Some(&recindex) = path_to_recindex.get(src_path) {
format!(
"src=\"kindle:embed:{}?mime=image/{}\"",
encode_base32_4char(recindex),
mime_suffix
)
} else if let Some(fname) = src_path.rsplit('/').next() {
if let Some(&recindex) = path_to_recindex.get(fname) {
format!(
"src=\"kindle:embed:{}?mime=image/{}\"",
encode_base32_4char(recindex),
mime_suffix
)
} else {
caps.get(0).unwrap().as_str().to_string()
}
} else {
caps.get(0).unwrap().as_str().to_string()
}
})
.to_string();
let tag_re = if kindlegen_parity {
Regex::new(
r"(?i)<(p|div|h[1-6]|li|ul|ol|table|tr|td|th|section|article|aside|nav|header|footer|figure|figcaption|blockquote|span|a|em|strong|b|i|body)(\s[^>]*?)?(/?)>",
)
.unwrap()
} else {
Regex::new(
r"(?i)<(p|div|h[1-6]|li|ul|ol|table|tr|td|th|section|article|aside|nav|header|footer|figure|figcaption|blockquote|img|span|a|em|strong|b|i|body)(\s[^>]*?)?(/?)>",
)
.unwrap()
};
result = tag_re
.replace_all(&result, |caps: ®ex::Captures| {
let tag = caps.get(1).unwrap().as_str();
let attrs = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let self_close = caps.get(3).map(|m| m.as_str()).unwrap_or("");
if attrs.contains(" aid=") || attrs.contains("\taid=") {
return caps.get(0).unwrap().as_str().to_string();
}
let aid = encode_aid_base32(*aid_counter);
*aid_counter += 1;
format!("<{}{} aid=\"{}\"{}>", tag, attrs, aid, self_close)
})
.to_string();
result
}
fn encode_aid_base32(value: u32) -> String {
if value == 0 {
return "0".to_string();
}
const CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUV";
let mut out = Vec::new();
let mut v = value;
while v > 0 {
out.push(CHARS[(v & 0x1F) as usize]);
v >>= 5;
}
out.reverse();
String::from_utf8(out).unwrap()
}
fn encode_base32_4char(recindex: usize) -> String {
const CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUV";
let mut result = [b'0'; 4];
let mut v = recindex;
for i in (0..4).rev() {
result[i] = CHARS[v % 32];
v /= 32;
}
String::from_utf8(result.to_vec()).unwrap()
}
fn build_image_path_lookup(
href_to_recindex: &std::collections::HashMap<String, usize>,
spine_items: &[(String, String)],
) -> std::collections::HashMap<String, usize> {
let mut path_to_recindex: std::collections::HashMap<String, usize> =
std::collections::HashMap::new();
for (href, &recindex) in href_to_recindex {
path_to_recindex.insert(href.clone(), recindex);
if let Some(fname) = href.rsplit('/').next() {
path_to_recindex.entry(fname.to_string()).or_insert(recindex);
}
}
for (_, spine_href) in spine_items {
if let Some((spine_dir, _)) = spine_href.rsplit_once('/') {
let _ = spine_dir;
for (href, &recindex) in href_to_recindex {
let relative = format!("../{}", href);
path_to_recindex.entry(relative).or_insert(recindex);
}
}
}
path_to_recindex
}
fn append_kf8_tbs(record: &mut Vec<u8>, record_index: usize, num_skeletons: usize) {
if record_index == 0 {
let value = if num_skeletons > 0 { (2 * num_skeletons - 1) as u32 } else { 0 };
let value_bytes = encode_vwi_inv(value);
let tbs_size = 1 + value_bytes.len() + 1; record.push(0x82); record.extend_from_slice(&value_bytes);
record.push((tbs_size as u8) | 0x80); } else {
record.push(0x81);
}
}
fn compress_text_kf8(text_bytes: &[u8], num_skeletons: usize) -> (Vec<Vec<u8>>, usize) {
let total_length = text_bytes.len();
let chunk_size = RECORD_SIZE;
let records: Vec<Vec<u8>> = text_bytes
.chunks(chunk_size)
.enumerate()
.map(|(i, chunk)| {
let mut compressed = palmdoc::compress(chunk);
compressed.push(0x00); append_kf8_tbs(&mut compressed, i, num_skeletons);
compressed
})
.collect();
(records, total_length)
}
fn split_text_uncompressed_kf8(text_bytes: &[u8], num_skeletons: usize) -> (Vec<Vec<u8>>, usize) {
let total_length = text_bytes.len();
let chunk_size = RECORD_SIZE;
let records: Vec<Vec<u8>> = text_bytes
.chunks(chunk_size)
.enumerate()
.map(|(i, chunk)| {
let mut rec = chunk.to_vec();
rec.push(0x00); append_kf8_tbs(&mut rec, i, num_skeletons);
rec
})
.collect();
(records, total_length)
}
fn build_fdst(html_length: usize, total_length: usize) -> Vec<u8> {
let has_css = total_length > html_length;
let flow_count: usize = 2;
let record_size = 12 + flow_count * 8;
let mut fdst = Vec::with_capacity(record_size);
fdst.extend_from_slice(b"FDST");
fdst.extend_from_slice(&12u32.to_be_bytes());
fdst.extend_from_slice(&(flow_count as u32).to_be_bytes());
fdst.extend_from_slice(&0u32.to_be_bytes());
fdst.extend_from_slice(&(html_length as u32).to_be_bytes());
if has_css {
fdst.extend_from_slice(&(html_length as u32).to_be_bytes());
fdst.extend_from_slice(&(total_length as u32).to_be_bytes());
} else {
fdst.extend_from_slice(&(html_length as u32).to_be_bytes());
fdst.extend_from_slice(&(html_length as u32).to_be_bytes());
}
fdst
}
#[derive(Clone, Copy, Debug)]
struct TagMeta {
#[allow(dead_code)]
number: u8,
values_per_entry: u8,
mask: u8,
}
const END_TAG: u8 = 1;
fn mask_shifts(mask: u8) -> u32 {
match mask {
1 => 0,
2 => 1,
3 => 0,
4 => 2,
8 => 3,
12 => 2,
16 => 4,
32 => 5,
48 => 4,
64 => 6,
128 => 7,
192 => 6,
_ => 0,
}
}
fn build_tagx(tag_defs: &[(u8, u8, u8)]) -> Vec<u8> {
let mut body = Vec::new();
for (num, vpe, mask) in tag_defs {
body.push(*num);
body.push(*vpe);
body.push(*mask);
body.push(0); }
body.push(0);
body.push(0);
body.push(0);
body.push(END_TAG);
let total_length = 12 + body.len();
let control_byte_count: u32 = 1;
let mut out = Vec::with_capacity(total_length);
out.extend_from_slice(b"TAGX");
out.extend_from_slice(&(total_length as u32).to_be_bytes());
out.extend_from_slice(&control_byte_count.to_be_bytes());
out.extend_from_slice(&body);
out
}
fn control_byte_for(tag_defs: &[TagMeta], nvals_per_tag: &[usize]) -> u8 {
let mut ans: u32 = 0;
for (tag, &nvals) in tag_defs.iter().zip(nvals_per_tag.iter()) {
let nentries = (nvals as u32) / (tag.values_per_entry as u32);
let shifts = mask_shifts(tag.mask);
ans |= (tag.mask as u32) & (nentries << shifts);
}
ans as u8
}
fn encode_indx_entry(
label: &[u8],
tag_defs: &[TagMeta],
values_by_tag: &[Vec<u32>],
) -> Vec<u8> {
assert_eq!(tag_defs.len(), values_by_tag.len());
let nvals_per_tag: Vec<usize> = values_by_tag.iter().map(|v| v.len()).collect();
let control = control_byte_for(tag_defs, &nvals_per_tag);
let mut out = Vec::with_capacity(1 + label.len() + 1 + 8 * tag_defs.len());
out.push(label.len() as u8);
out.extend_from_slice(label);
out.push(control);
for vals in values_by_tag {
for v in vals {
out.extend_from_slice(&encode_vwi_inv(*v));
}
}
out
}
fn build_indx_data_record(entries: &[Vec<u8>]) -> Vec<u8> {
let mut header = vec![0u8; INDX_HEADER_LENGTH];
header[0..4].copy_from_slice(b"INDX");
put32(&mut header, 4, INDX_HEADER_LENGTH as u32);
put32(&mut header, 12, 1);
let mut entries_data = Vec::new();
let mut offsets: Vec<u16> = Vec::with_capacity(entries.len());
for e in entries {
let off = INDX_HEADER_LENGTH + entries_data.len();
offsets.push(off as u16);
entries_data.extend_from_slice(e);
}
while (INDX_HEADER_LENGTH + entries_data.len()) % 4 != 0 {
entries_data.push(0);
}
let idxt_offset = INDX_HEADER_LENGTH + entries_data.len();
put32(&mut header, 20, idxt_offset as u32);
put32(&mut header, 24, entries.len() as u32);
for b in &mut header[28..36] {
*b = 0xFF;
}
let mut idxt = Vec::with_capacity(4 + 2 * offsets.len());
idxt.extend_from_slice(b"IDXT");
for o in &offsets {
idxt.extend_from_slice(&o.to_be_bytes());
}
while idxt.len() % 4 != 0 {
idxt.push(0);
}
let mut record = header;
record.extend_from_slice(&entries_data);
record.extend_from_slice(&idxt);
record
}
fn build_indx_primary(
tagx: &[u8],
num_data_records: usize,
num_entries: usize,
num_cncx: usize,
geometry: &[(Vec<u8>, u32)], ) -> Vec<u8> {
let mut header = vec![0u8; INDX_HEADER_LENGTH];
header[0..4].copy_from_slice(b"INDX");
put32(&mut header, 4, INDX_HEADER_LENGTH as u32);
put32(&mut header, 16, 2); put32(&mut header, 24, num_data_records as u32);
put32(&mut header, 28, 65001); put32(&mut header, 32, 0xFFFFFFFF); put32(&mut header, 36, num_entries as u32);
put32(&mut header, 52, num_cncx as u32);
put32(&mut header, 180, INDX_HEADER_LENGTH as u32);
let mut tagx_block = tagx.to_vec();
while tagx_block.len() % 4 != 0 {
tagx_block.push(0);
}
let mut geom_block = Vec::new();
let mut geom_offsets: Vec<u16> = Vec::with_capacity(geometry.len());
let geom_base = INDX_HEADER_LENGTH + tagx_block.len();
for (label, count) in geometry {
geom_offsets.push((geom_base + geom_block.len()) as u16);
geom_block.push(label.len() as u8);
geom_block.extend_from_slice(label);
geom_block.extend_from_slice(&(*count as u16).to_be_bytes());
}
while geom_block.len() % 4 != 0 {
geom_block.push(0);
}
let idxt_offset = geom_base + geom_block.len();
put32(&mut header, 20, idxt_offset as u32);
let mut idxt = Vec::with_capacity(4 + 2 * geom_offsets.len());
idxt.extend_from_slice(b"IDXT");
for o in &geom_offsets {
idxt.extend_from_slice(&o.to_be_bytes());
}
while idxt.len() % 4 != 0 {
idxt.push(0);
}
let mut record = header;
record.extend_from_slice(&tagx_block);
record.extend_from_slice(&geom_block);
record.extend_from_slice(&idxt);
record
}
fn build_skeleton_indx(skels: &[SkeletonEntry]) -> Vec<Vec<u8>> {
if skels.is_empty() {
return minimal_indx();
}
let tag_defs = [
TagMeta { number: 1, values_per_entry: 1, mask: 3 },
TagMeta { number: 6, values_per_entry: 2, mask: 12 },
];
let tagx = build_tagx(&[(1, 1, 3), (6, 2, 12)]);
let mut entries: Vec<Vec<u8>> = Vec::with_capacity(skels.len());
for s in skels {
let chunk_count_vals = vec![s.chunk_count as u32, s.chunk_count as u32];
let geom_vals = vec![
s.start_pos as u32,
s.length as u32,
s.start_pos as u32,
s.length as u32,
];
let entry = encode_indx_entry(
s.label.as_bytes(),
&tag_defs,
&[chunk_count_vals, geom_vals],
);
entries.push(entry);
}
let data_record = build_indx_data_record(&entries);
let last_label = skels.last().unwrap().label.as_bytes().to_vec();
let primary = build_indx_primary(
&tagx,
1,
skels.len(),
0,
&[(last_label, skels.len() as u32)],
);
vec![primary, data_record]
}
fn build_fragment_indx_with_cncx(
frags: &[FragmentEntry],
) -> (Vec<Vec<u8>>, Vec<Vec<u8>>) {
if frags.is_empty() {
return (minimal_indx(), Vec::new());
}
let tag_defs = [
TagMeta { number: 2, values_per_entry: 1, mask: 1 },
TagMeta { number: 3, values_per_entry: 1, mask: 2 },
TagMeta { number: 4, values_per_entry: 1, mask: 4 },
TagMeta { number: 6, values_per_entry: 2, mask: 8 },
];
let tagx = build_tagx(&[(2, 1, 1), (3, 1, 2), (4, 1, 4), (6, 2, 8)]);
let mut cncx = CncxBuilder::new();
let cncx_offsets: Vec<u32> = frags.iter().map(|f| cncx.add(&f.selector)).collect();
let mut entries: Vec<Vec<u8>> = Vec::with_capacity(frags.len());
for (f, cncx_off) in frags.iter().zip(cncx_offsets.iter()) {
let label = format!("{:010}", f.insert_pos);
let label_bytes = label.as_bytes();
let values: [Vec<u32>; 4] = [
vec![*cncx_off],
vec![f.file_number as u32],
vec![f.sequence_number as u32],
vec![f.start_pos as u32, f.length as u32],
];
let entry = encode_indx_entry(label_bytes, &tag_defs, &values);
entries.push(entry);
}
let cncx_records = cncx.into_records();
let num_cncx = cncx_records.len();
let data_record = build_indx_data_record(&entries);
let last_label = format!("{:010}", frags.last().unwrap().insert_pos)
.into_bytes();
let primary = build_indx_primary(
&tagx,
1,
frags.len(),
num_cncx,
&[(last_label, frags.len() as u32)],
);
(vec![primary, data_record], cncx_records)
}
fn build_ncx_indx(
title: &str,
skeleton_entries: &[SkeletonEntry],
text_length: usize,
) -> (Vec<Vec<u8>>, Vec<Vec<u8>>) {
let mut ncx_cncx = CncxBuilder::new();
let label_offset = ncx_cncx.add(title);
let tag_defs = [
TagMeta { number: 1, values_per_entry: 1, mask: 0x01 },
TagMeta { number: 2, values_per_entry: 1, mask: 0x02 },
TagMeta { number: 3, values_per_entry: 1, mask: 0x04 },
TagMeta { number: 4, values_per_entry: 1, mask: 0x08 },
TagMeta { number: 6, values_per_entry: 2, mask: 0x10 },
];
let tagx = build_tagx(&[(1, 1, 0x01), (2, 1, 0x02), (3, 1, 0x04), (4, 1, 0x08), (6, 2, 0x10)]);
let offset = if skeleton_entries.is_empty() { 0 } else { skeleton_entries[0].start_pos };
let length = if text_length > offset { text_length - offset } else { 0 };
let label = b"0";
let values: [Vec<u32>; 5] = [
vec![offset as u32], vec![length as u32], vec![label_offset], vec![0], vec![0, 0], ];
let entry = encode_indx_entry(label, &tag_defs, &values);
let data_record = build_indx_data_record(&[entry]);
let ncx_cncx_count = ncx_cncx.record_count();
let primary = build_indx_primary(&tagx, 1, 1, ncx_cncx_count, &[(label.to_vec(), 1u32)]);
(vec![primary, data_record], ncx_cncx.into_records())
}
fn build_datp() -> Vec<u8> {
vec![
0x44, 0x41, 0x54, 0x50, 0x00, 0x00, 0x00, 0x0D, 0x01, 0x04, 0x00, 0x04,
0x02, 0x00, 0x00, 0x06,
0x19, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x01,
0x6D, 0x02, 0x46, 0x02,
0x66, 0x00, 0x00, 0x00,
]
}
fn minimal_indx() -> Vec<Vec<u8>> {
let tagx = build_tagx(&[(1, 1, 1)]);
let data = build_indx_data_record(&[]);
let primary = build_indx_primary(&tagx, 1, 0, 0, &[]);
vec![primary, data]
}
fn put32(buf: &mut [u8], offset: usize, value: u32) {
buf[offset..offset + 4].copy_from_slice(&value.to_be_bytes());
}
#[allow(dead_code)]
fn _vwi_import_keepalive(v: u32) -> Vec<u8> {
encode_vwi(v)
}
#[cfg(test)]
mod tests {
use super::*;
fn make_comic_page(aid_body: &str, img_src: &str) -> String {
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\"><head><title>Page</title></head><body aid=\"{}\"><div aid=\"1\"><img src=\"{}\"/></div></body></html>",
aid_body, img_src
)
}
#[test]
fn base32_aid_encoder() {
assert_eq!(encode_aid_base32(0), "0");
assert_eq!(encode_aid_base32(1), "1");
assert_eq!(encode_aid_base32(9), "9");
assert_eq!(encode_aid_base32(10), "A");
assert_eq!(encode_aid_base32(31), "V");
assert_eq!(encode_aid_base32(32), "10");
assert_eq!(encode_aid_base32(33), "11");
assert_eq!(encode_aid_base32(63), "1V");
assert_eq!(encode_aid_base32(1024), "100");
}
#[test]
fn base32_4char_image_recindex() {
assert_eq!(encode_base32_4char(0), "0000");
assert_eq!(encode_base32_4char(1), "0001");
assert_eq!(encode_base32_4char(32), "0010");
assert_eq!(encode_base32_4char(1024), "0100");
}
#[test]
fn find_body_open_and_close() {
let html = make_comic_page("0", "img1.jpg");
let (_, after_open, aid) = find_body_open(html.as_bytes()).expect("body open");
assert_eq!(aid, "0");
let head = &html[..after_open];
assert!(head.ends_with("<body aid=\"0\">"));
let close = find_last_close_body(html.as_bytes()).expect("body close");
assert_eq!(&html[close..close + 7], "</body>");
}
#[test]
fn split_skeleton_reconstructs_original() {
let html = make_comic_page("0", "img1.jpg");
let split = split_skeleton_and_body(&html);
assert!(split.skeleton.contains("<body aid=\"0\"></body>"));
assert_eq!(&split.skeleton[split.body_inner_offset..split.body_inner_offset + 7], "</body>");
assert!(split.body_inner.starts_with("<div aid=\"1\">"));
assert!(split.body_inner.contains("<img src=\"img1.jpg\"/>"));
assert!(!split.body_inner.contains("<body"));
let mut rebuilt = String::new();
rebuilt.push_str(&split.skeleton[..split.body_inner_offset]);
rebuilt.push_str(&split.body_inner);
rebuilt.push_str(&split.skeleton[split.body_inner_offset..]);
assert_eq!(rebuilt, html);
}
#[test]
fn global_sequence_number_monotonic_across_pages() {
let parts: Vec<String> = (0..5)
.map(|i| {
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html><head><title>P</title></head><body><div><img src=\"page{}.jpg\"/></div></body></html>",
i
)
})
.collect();
let href_to_recindex = std::collections::HashMap::new();
let spine_items: Vec<(String, String)> = Vec::new();
let (combined, skels, frags) =
build_kf8_html(&parts, &href_to_recindex, &spine_items, false);
assert_eq!(skels.len(), 5);
assert_eq!(frags.len(), 5);
for (i, f) in frags.iter().enumerate() {
assert_eq!(f.sequence_number, i, "seq #{i}");
assert_eq!(f.file_number, i, "file_number #{i}");
}
for s in &skels {
assert_eq!(
&combined[s.start_pos..s.start_pos + s.length.min(6)],
&b"<?xml "[..6]
);
}
for (f, s) in frags.iter().zip(skels.iter()) {
assert!(f.insert_pos >= s.start_pos);
assert!(f.insert_pos <= s.start_pos + s.length);
}
}
#[test]
fn fragment_selector_uses_body_aid() {
let parts = vec![make_comic_page("0", "img.jpg")];
let href_to_recindex = std::collections::HashMap::new();
let spine_items: Vec<(String, String)> = Vec::new();
let (_, _, frags) = build_kf8_html(&parts, &href_to_recindex, &spine_items, false);
assert_eq!(frags.len(), 1);
assert_eq!(frags[0].selector, "P-//*[@aid='0']");
}
#[test]
fn process_kf8_part_adds_aid_to_body() {
let html = "<html><head></head><body><div><img src=\"a.jpg\"/></div></body></html>";
let mut counter = 0u32;
let lookup = std::collections::HashMap::new();
let out = process_kf8_part(html, &mut counter, &lookup, false);
assert!(out.contains("<body"));
assert!(out.contains("aid=\""));
}
#[test]
fn control_byte_matches_calibre_skeleton() {
let tag_defs = [
TagMeta { number: 1, values_per_entry: 1, mask: 3 },
TagMeta { number: 6, values_per_entry: 2, mask: 12 },
];
assert_eq!(control_byte_for(&tag_defs, &[2, 4]), 0x0A);
}
#[test]
fn control_byte_matches_calibre_chunk() {
let tag_defs = [
TagMeta { number: 2, values_per_entry: 1, mask: 1 },
TagMeta { number: 3, values_per_entry: 1, mask: 2 },
TagMeta { number: 4, values_per_entry: 1, mask: 4 },
TagMeta { number: 6, values_per_entry: 2, mask: 8 },
];
assert_eq!(control_byte_for(&tag_defs, &[1, 1, 1, 2]), 0x0F);
}
#[test]
fn skeleton_indx_records_have_correct_magic_and_length() {
let skels = vec![SkeletonEntry {
label: "SKEL0000000000".to_string(),
start_pos: 0,
length: 100,
chunk_count: 1,
}];
let recs = build_skeleton_indx(&skels);
assert_eq!(recs.len(), 2);
assert_eq!(&recs[0][0..4], b"INDX");
assert_eq!(&recs[1][0..4], b"INDX");
assert_eq!(u32::from_be_bytes(recs[0][4..8].try_into().unwrap()), 192);
assert_eq!(u32::from_be_bytes(recs[0][16..20].try_into().unwrap()), 2);
assert_eq!(u32::from_be_bytes(recs[0][28..32].try_into().unwrap()), 65001);
assert_eq!(u32::from_be_bytes(recs[0][180..184].try_into().unwrap()), 192);
assert_eq!(u32::from_be_bytes(recs[1][12..16].try_into().unwrap()), 1);
}
#[test]
fn fragment_indx_emits_cncx_records() {
let frags = vec![FragmentEntry {
insert_pos: 201,
selector: "P-//*[@aid='0']".to_string(),
file_number: 0,
sequence_number: 0,
start_pos: 0,
length: 42,
}];
let (recs, cncx) = build_fragment_indx_with_cncx(&frags);
assert_eq!(recs.len(), 2);
assert_eq!(cncx.len(), 1, "should emit one CNCX record");
assert!(
cncx[0].windows(b"P-//*[@aid='0']".len()).any(|w| w == b"P-//*[@aid='0']"),
"selector not found in CNCX record"
);
assert_eq!(u32::from_be_bytes(recs[0][52..56].try_into().unwrap()), 1);
}
#[test]
fn kf8_trailer_has_tbs() {
let text = b"<html><body>hello</body></html>";
let (records, _) = compress_text_kf8(text, 1);
assert_eq!(records.len(), 1);
let rec = &records[0];
let len = rec.len();
assert!(len >= 4, "record too short for trailing bytes");
assert_eq!(rec[len - 1], 0x83, "TBS size byte should be 0x83");
assert_eq!(rec[len - 2], 0x81, "TBS value byte should be 0x81 (inv VWI for 1)");
assert_eq!(rec[len - 3], 0x82, "TBS type byte should be 0x82");
assert_eq!(rec[len - 4], 0x00, "multibyte byte should be 0x00");
}
#[test]
fn build_kf8_section_records_are_well_formed() {
let parts = vec![
make_comic_page("0", "p1.jpg"),
make_comic_page("0", "p2.jpg"),
];
let css = "body{margin:0}";
let href_to_recindex = std::collections::HashMap::new();
let spine_items: Vec<(String, String)> = Vec::new();
let section = build_kf8_section(
&parts,
css,
&href_to_recindex,
&spine_items,
true, false, "Test Book",
);
assert!(!section.text_records.is_empty());
assert_eq!(§ion.fdst[0..4], b"FDST");
assert_eq!(section.flow_count, 2);
assert_eq!(section.fragment_indx.len(), 2); assert_eq!(section.skeleton_indx.len(), 2);
assert_eq!(section.ncx_indx.len(), 2);
assert_eq!(section.cncx_records.len(), 1, "one CNCX record for the deduped selector");
assert_eq!(
u32::from_be_bytes(section.skeleton_indx[0][36..40].try_into().unwrap()),
2
);
assert_eq!(
u32::from_be_bytes(section.fragment_indx[0][36..40].try_into().unwrap()),
2
);
assert_eq!(
u32::from_be_bytes(section.fragment_indx[0][52..56].try_into().unwrap()),
1
);
for (i, r) in section.text_records.iter().enumerate() {
let last = *r.last().unwrap();
if i == 0 {
assert_eq!(last, 0x83, "first text record TBS size byte should be 0x83");
} else {
assert_eq!(last, 0x81, "subsequent text record TBS size byte should be 0x81");
}
}
}
#[test]
fn cncx_uses_inverted_vwi() {
let mut b = crate::cncx::CncxBuilder::new();
b.add("P-//*[@aid='0']"); let recs = b.into_records();
assert_eq!(recs.len(), 1);
assert_eq!(recs[0][0], 0x8F,
"CNCX length prefix must be inverted VWI (0x8F for len 15), got 0x{:02X}",
recs[0][0]);
}
#[test]
fn ncx_indx_has_five_tags() {
let skels = vec![SkeletonEntry {
label: "SKEL0000000000".to_string(),
start_pos: 0,
length: 100,
chunk_count: 1,
}];
let (ncx_recs, ncx_cncx) = build_ncx_indx("Test", &skels, 500);
assert_eq!(ncx_recs.len(), 2, "NCX should have primary + data");
assert!(!ncx_cncx.is_empty(), "NCX should have CNCX for labels");
let primary = &ncx_recs[0];
let tagx_off = u32::from_be_bytes(primary[180..184].try_into().unwrap()) as usize;
assert_eq!(&primary[tagx_off..tagx_off+4], b"TAGX");
let tagx_len = u32::from_be_bytes(primary[tagx_off+4..tagx_off+8].try_into().unwrap()) as usize;
let mut tag_count = 0;
let mut pos = tagx_off + 12;
while pos < tagx_off + tagx_len {
if primary[pos+3] == 1 { break; } tag_count += 1;
pos += 4;
}
assert_eq!(tag_count, 5, "NCX TAGX must define 5 tags, got {}", tag_count);
let num_cncx = u32::from_be_bytes(primary[52..56].try_into().unwrap());
assert!(num_cncx >= 1, "NCX num_cncx must be >= 1, got {}", num_cncx);
let total = u32::from_be_bytes(primary[36..40].try_into().unwrap());
assert_eq!(total, 1, "NCX should have 1 entry for simple book");
}
#[test]
fn ncx_cncx_contains_title() {
let skels = vec![SkeletonEntry {
label: "SKEL0000000000".to_string(),
start_pos: 0,
length: 100,
chunk_count: 1,
}];
let (_, ncx_cncx) = build_ncx_indx("My Comic Title", &skels, 500);
assert_eq!(ncx_cncx.len(), 1);
let cncx = &ncx_cncx[0];
let title = b"My Comic Title";
assert_eq!(cncx[0], 0x8E,
"CNCX title length should be 0x8E (inv VWI for 14), got 0x{:02X}", cncx[0]);
assert_eq!(&cncx[1..1+title.len()], title);
}
#[test]
fn datp_is_32_bytes_with_content() {
let datp = build_datp();
assert_eq!(datp.len(), 32, "DATP must be 32 bytes (not 152-byte stub)");
assert_eq!(&datp[0..4], b"DATP");
let content = &datp[8..];
assert!(content.iter().any(|&b| b != 0),
"DATP content must not be all zeros");
}
#[test]
fn kf8_section_has_ncx_cncx_records() {
let parts = vec![make_comic_page("0", "p1.jpg")];
let section = build_kf8_section(
&parts, "", &std::collections::HashMap::new(),
&Vec::new(), true, false, "Test",
);
assert!(!section.ncx_cncx_records.is_empty(),
"KF8 section must include NCX CNCX records");
}
#[test]
fn kindle_embed_uses_image_jpg_not_jpeg() {
let mut href_to_recindex = std::collections::HashMap::new();
href_to_recindex.insert("p1.jpg".to_string(), 1usize);
href_to_recindex.insert("p2.jpg".to_string(), 2usize);
let parts = vec![
make_comic_page("0", "p1.jpg"),
make_comic_page("0", "p2.jpg"),
];
let spine_items: Vec<(String, String)> = Vec::new();
let section = build_kf8_section(
&parts,
"body{margin:0}",
&href_to_recindex,
&spine_items,
true, false, "Test Book",
);
let html = std::str::from_utf8(§ion.html_bytes)
.expect("KF8 HTML should be valid UTF-8");
assert!(
html.contains("image/jpg"),
"KF8 HTML must contain 'image/jpg' kindle:embed references"
);
assert!(
!html.contains("image/jpeg"),
"KF8 HTML must NOT contain 'image/jpeg' - Kindle firmware rejects it"
);
}
}