use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use crate::import::ChapterId;
use crate::model::{GlobalNodeId, LandmarkType, NodeId, TocEntry};
use crate::style::StyleId;
use super::style_registry::StyleRegistry;
use super::symbols::KFX_SYMBOL_TABLE_SIZE;
use super::transforms::encode_base32;
pub struct SymbolTable {
local_symbols: Vec<String>,
symbol_map: HashMap<String, u64>,
next_id: u64,
}
impl SymbolTable {
pub const LOCAL_MIN_ID: u64 = KFX_SYMBOL_TABLE_SIZE as u64;
pub fn new() -> Self {
Self {
local_symbols: Vec::new(),
symbol_map: HashMap::new(),
next_id: Self::LOCAL_MIN_ID,
}
}
pub fn get_or_intern(&mut self, name: &str) -> u64 {
if let Some(id_str) = name.strip_prefix('$')
&& let Ok(id) = id_str.parse::<u64>()
{
return id;
}
if let Some(&id) = self.symbol_map.get(name) {
return id;
}
let id = self.next_id;
self.next_id += 1;
self.local_symbols.push(name.to_string());
self.symbol_map.insert(name.to_string(), id);
id
}
pub fn get(&self, name: &str) -> Option<u64> {
if let Some(id_str) = name.strip_prefix('$')
&& let Ok(id) = id_str.parse::<u64>()
{
return Some(id);
}
self.symbol_map.get(name).copied()
}
pub fn local_symbols(&self) -> &[String] {
&self.local_symbols
}
pub fn len(&self) -> usize {
self.local_symbols.len()
}
pub fn is_empty(&self) -> bool {
self.local_symbols.is_empty()
}
}
impl Default for SymbolTable {
fn default() -> Self {
Self::new()
}
}
pub struct IdGenerator {
next_id: u64,
}
impl IdGenerator {
pub const FRAGMENT_MIN_ID: u64 = 866;
pub fn new() -> Self {
Self {
next_id: Self::FRAGMENT_MIN_ID,
}
}
pub fn next_id(&mut self) -> u64 {
let id = self.next_id;
self.next_id += 1;
id
}
pub fn peek(&self) -> u64 {
self.next_id
}
}
impl Default for IdGenerator {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct ResourceRegistry {
resources: HashMap<String, u64>,
resource_names: HashMap<String, String>,
next_resource_id: usize,
}
impl ResourceRegistry {
pub fn new() -> Self {
Self {
resources: HashMap::new(),
resource_names: HashMap::new(),
next_resource_id: 0,
}
}
pub fn register(&mut self, href: &str, symbols: &mut SymbolTable) -> u64 {
if let Some(&id) = self.resources.get(href) {
return id;
}
let symbol_name = format!("resource:{}", href);
let id = symbols.get_or_intern(&symbol_name);
self.resources.insert(href.to_string(), id);
id
}
pub fn get_or_create_name(&mut self, href: &str) -> String {
if let Some(name) = self.resource_names.get(href) {
return name.clone();
}
let name = format!("e{:X}", self.next_resource_id);
self.next_resource_id += 1;
self.resource_names.insert(href.to_string(), name.clone());
name
}
pub fn get(&self, href: &str) -> Option<u64> {
self.resources.get(href).copied()
}
pub fn get_name(&self, href: &str) -> Option<&str> {
self.resource_names.get(href).map(|s| s.as_str())
}
pub fn iter(&self) -> impl Iterator<Item = (&String, &u64)> {
self.resources.iter()
}
pub fn len(&self) -> usize {
self.resource_names.len()
}
pub fn is_empty(&self) -> bool {
self.resource_names.is_empty()
}
}
impl Default for ResourceRegistry {
fn default() -> Self {
Self::new()
}
}
#[derive(Default)]
pub struct TextAccumulator {
segments: Vec<String>,
total_len: usize,
}
impl TextAccumulator {
pub fn new() -> Self {
Self::default()
}
pub fn push(&mut self, text: &str) -> usize {
let index = self.segments.len();
self.total_len += text.len();
self.segments.push(text.to_string());
index
}
pub fn len(&self) -> usize {
self.total_len
}
pub fn is_empty(&self) -> bool {
self.total_len == 0
}
pub fn segments(&self) -> &[String] {
&self.segments
}
pub fn drain(&mut self) -> Vec<String> {
self.total_len = 0;
std::mem::take(&mut self.segments)
}
}
#[derive(Debug, Clone, Copy)]
pub struct Position {
pub fragment_id: u64,
pub offset: usize,
}
#[derive(Debug, Clone)]
pub struct AnchorPosition {
pub symbol: String,
pub fragment_id: u64,
pub section_id: u64,
pub offset: usize,
}
#[derive(Debug, Clone)]
pub struct ExternalAnchor {
pub symbol: String,
pub uri: String,
}
#[derive(Debug, Default)]
pub struct AnchorRegistry {
node_to_symbol: HashMap<GlobalNodeId, String>,
chapter_to_symbol: HashMap<ChapterId, String>,
href_to_symbol: HashMap<String, String>,
resolved_symbols: HashSet<String>,
resolved: Vec<AnchorPosition>,
external_anchors: Vec<ExternalAnchor>,
next_anchor_id: usize,
node_positions: HashMap<GlobalNodeId, (u64, usize)>,
chapter_positions: HashMap<ChapterId, u64>,
}
impl AnchorRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn register_internal_target(&mut self, target: GlobalNodeId, href: &str) -> String {
if let Some(symbol) = self.node_to_symbol.get(&target) {
self.href_to_symbol.insert(href.to_string(), symbol.clone());
return symbol.clone();
}
let symbol = format!("a{:X}", self.next_anchor_id);
self.next_anchor_id += 1;
self.node_to_symbol.insert(target, symbol.clone());
self.href_to_symbol.insert(href.to_string(), symbol.clone());
symbol
}
pub fn register_chapter_target(&mut self, chapter: ChapterId, href: &str) -> String {
if let Some(symbol) = self.chapter_to_symbol.get(&chapter) {
self.href_to_symbol.insert(href.to_string(), symbol.clone());
return symbol.clone();
}
let symbol = format!("a{:X}", self.next_anchor_id);
self.next_anchor_id += 1;
self.chapter_to_symbol.insert(chapter, symbol.clone());
self.href_to_symbol.insert(href.to_string(), symbol.clone());
symbol
}
pub fn register_external(&mut self, url: &str) -> String {
if let Some(symbol) = self.href_to_symbol.get(url) {
return symbol.clone();
}
let symbol = format!("a{:X}", self.next_anchor_id);
self.next_anchor_id += 1;
self.href_to_symbol.insert(url.to_string(), symbol.clone());
self.external_anchors.push(ExternalAnchor {
symbol: symbol.clone(),
uri: url.to_string(),
});
symbol
}
pub fn get_or_create_href_symbol(&mut self, href: &str) -> String {
if let Some(symbol) = self.href_to_symbol.get(href) {
return symbol.clone();
}
if href.starts_with("http://") || href.starts_with("https://") {
return self.register_external(href);
}
let symbol = format!("a{:X}", self.next_anchor_id);
self.next_anchor_id += 1;
self.href_to_symbol.insert(href.to_string(), symbol.clone());
symbol
}
pub fn get_symbol(&self, target: GlobalNodeId) -> Option<&str> {
self.node_to_symbol.get(&target).map(|s| s.as_str())
}
pub fn get_chapter_symbol(&self, chapter: ChapterId) -> Option<&str> {
self.chapter_to_symbol.get(&chapter).map(|s| s.as_str())
}
pub fn get_href_symbol(&self, href: &str) -> Option<&str> {
self.href_to_symbol.get(href).map(|s| s.as_str())
}
pub fn is_internal_target(&self, target: GlobalNodeId) -> bool {
self.node_to_symbol.contains_key(&target)
}
pub fn is_chapter_target(&self, chapter: ChapterId) -> bool {
self.chapter_to_symbol.contains_key(&chapter)
}
pub fn create_anchor(
&mut self,
target: GlobalNodeId,
content_fragment_id: u64,
section_id: u64,
offset: usize,
) -> Option<String> {
let symbol = self.node_to_symbol.get(&target)?.clone();
if self.resolved_symbols.contains(&symbol) {
return None;
}
self.resolved_symbols.insert(symbol.clone());
self.resolved.push(AnchorPosition {
symbol: symbol.clone(),
fragment_id: content_fragment_id,
section_id,
offset,
});
self.node_positions
.insert(target, (content_fragment_id, offset));
Some(symbol)
}
pub fn create_chapter_anchor(
&mut self,
chapter: ChapterId,
content_fragment_id: u64,
section_id: u64,
) -> Option<String> {
let symbol = self.chapter_to_symbol.get(&chapter)?.clone();
if self.resolved_symbols.contains(&symbol) {
return None;
}
self.resolved_symbols.insert(symbol.clone());
self.resolved.push(AnchorPosition {
symbol: symbol.clone(),
fragment_id: content_fragment_id,
section_id,
offset: 0,
});
self.chapter_positions.insert(chapter, content_fragment_id);
Some(symbol)
}
pub fn record_node_position(&mut self, target: GlobalNodeId, fragment_id: u64, offset: usize) {
self.node_positions
.entry(target)
.or_insert((fragment_id, offset));
}
pub fn record_chapter_position(&mut self, chapter: ChapterId, fragment_id: u64) {
self.chapter_positions.entry(chapter).or_insert(fragment_id);
}
pub fn get_node_position(&self, target: GlobalNodeId) -> Option<(u64, usize)> {
self.node_positions.get(&target).copied()
}
pub fn get_chapter_position(&self, chapter: ChapterId) -> Option<u64> {
self.chapter_positions.get(&chapter).copied()
}
pub fn drain_anchors(&mut self) -> Vec<AnchorPosition> {
std::mem::take(&mut self.resolved)
}
pub fn drain_external_anchors(&mut self) -> Vec<ExternalAnchor> {
std::mem::take(&mut self.external_anchors)
}
pub fn len(&self) -> usize {
self.node_to_symbol.len() + self.chapter_to_symbol.len() + self.external_anchors.len()
}
pub fn is_empty(&self) -> bool {
self.node_to_symbol.is_empty()
&& self.chapter_to_symbol.is_empty()
&& self.external_anchors.is_empty()
}
}
pub struct ExportContext {
pub symbols: SymbolTable,
pub fragment_ids: IdGenerator,
pub resource_registry: ResourceRegistry,
pub section_ids: Vec<u64>,
text_accumulator: TextAccumulator,
pub current_content_name: u64,
pub position_map: HashMap<(ChapterId, NodeId), Position>,
pub chapter_fragments: HashMap<ChapterId, u64>,
current_chapter: Option<ChapterId>,
current_fragment_id: u64,
current_text_offset: usize,
pub path_to_fragment: HashMap<String, u64>,
pub default_style_symbol: u64,
pub style_registry: StyleRegistry,
pub anchor_registry: AnchorRegistry,
pub landmark_fragments: HashMap<LandmarkType, LandmarkTarget>,
pub nav_container_symbols: NavContainerSymbols,
pub heading_positions: Vec<HeadingPosition>,
pub cover_fragment_id: Option<u64>,
pub cover_content_id: Option<u64>,
chapters_needing_anchor: HashSet<ChapterId>,
pending_chapter_anchor: Option<ChapterId>,
pub first_content_ids: HashMap<ChapterId, u64>,
pub content_ids_by_chapter: HashMap<ChapterId, Vec<u64>>,
pub content_id_lengths: HashMap<u64, usize>,
pub section_resource_deps: BTreeMap<String, BTreeSet<String>>,
}
#[derive(Debug, Clone)]
pub struct HeadingPosition {
pub level: u8,
pub fragment_id: u64,
pub offset: usize,
}
#[derive(Debug, Clone)]
pub struct LandmarkTarget {
pub fragment_id: u64,
pub offset: u64,
pub label: String,
}
#[derive(Debug, Clone, Default)]
pub struct NavContainerSymbols {
pub toc: u64,
pub headings: u64,
pub landmarks: u64,
}
impl ExportContext {
pub fn new() -> Self {
let mut symbols = SymbolTable::new();
let default_style_symbol = symbols.get_or_intern("s0");
Self {
symbols,
fragment_ids: IdGenerator::new(),
resource_registry: ResourceRegistry::new(),
section_ids: Vec::new(),
text_accumulator: TextAccumulator::new(),
current_content_name: 0,
position_map: HashMap::new(),
chapter_fragments: HashMap::new(),
current_chapter: None,
current_fragment_id: 0,
current_text_offset: 0,
path_to_fragment: HashMap::new(),
default_style_symbol,
style_registry: StyleRegistry::new(default_style_symbol),
anchor_registry: AnchorRegistry::new(),
landmark_fragments: HashMap::new(),
nav_container_symbols: NavContainerSymbols::default(),
heading_positions: Vec::new(),
cover_fragment_id: None,
cover_content_id: None,
chapters_needing_anchor: HashSet::new(),
pending_chapter_anchor: None,
first_content_ids: HashMap::new(),
content_ids_by_chapter: HashMap::new(),
content_id_lengths: HashMap::new(),
section_resource_deps: BTreeMap::new(),
}
}
pub fn record_section_image_ref(&mut self, section_name: &str, short_name: &str) {
self.section_resource_deps
.entry(section_name.to_string())
.or_default()
.insert(short_name.to_string());
}
pub fn begin_chapter(&mut self, content_name: &str) -> u64 {
self.text_accumulator = TextAccumulator::new();
self.current_content_name = self.symbols.get_or_intern(content_name);
self.current_content_name
}
pub fn begin_chapter_export(&mut self, chapter_id: ChapterId) {
self.current_chapter = Some(chapter_id);
if self.chapters_needing_anchor.contains(&chapter_id) {
self.pending_chapter_anchor = Some(chapter_id);
} else {
self.pending_chapter_anchor = None;
}
}
pub fn intern(&mut self, s: &str) -> u64 {
self.symbols.get_or_intern(s)
}
pub fn append_text(&mut self, text: &str) -> (usize, usize) {
let offset = self.text_accumulator.len();
let index = self.text_accumulator.push(text);
(index, offset)
}
pub fn text_accumulator(&self) -> &TextAccumulator {
&self.text_accumulator
}
pub fn drain_text(&mut self) -> Vec<String> {
self.text_accumulator.drain()
}
pub fn next_fragment_id(&mut self) -> u64 {
self.fragment_ids.next_id()
}
pub fn register_section(&mut self, name: &str) -> u64 {
let id = self.intern(name);
self.section_ids.push(id);
id
}
pub fn register_ir_style(&mut self, ir_style: &crate::style::ComputedStyle) -> u64 {
let schema = crate::kfx::style_schema::StyleSchema::standard();
let mut builder = crate::kfx::style_registry::StyleBuilder::new(schema);
builder.ingest_ir_style(ir_style);
let kfx_style = builder.build();
self.style_registry.register(kfx_style, &mut self.symbols)
}
pub fn register_style_id(
&mut self,
style_id: StyleId,
style_pool: &crate::style::StylePool,
) -> u64 {
if style_id == StyleId::DEFAULT {
return self.default_style_symbol;
}
if let Some(ir_style) = style_pool.get(style_id) {
self.register_ir_style(ir_style)
} else {
self.default_style_symbol
}
}
pub fn begin_chapter_survey(&mut self, chapter_id: ChapterId, path: &str) -> u64 {
let fragment_id = self.fragment_ids.next_id();
self.chapter_fragments.insert(chapter_id, fragment_id);
self.path_to_fragment.insert(path.to_string(), fragment_id);
self.current_chapter = Some(chapter_id);
self.current_fragment_id = fragment_id;
self.current_text_offset = 0;
if self.anchor_registry.is_chapter_target(chapter_id) {
self.chapters_needing_anchor.insert(chapter_id);
}
fragment_id
}
pub fn end_chapter_survey(&mut self) {
self.current_chapter = None;
}
pub fn get_fragment_for_path(&self, path: &str) -> Option<u64> {
self.path_to_fragment.get(path).copied()
}
pub fn record_position(&mut self, node_id: NodeId) {
if let Some(chapter_id) = self.current_chapter {
self.position_map.insert(
(chapter_id, node_id),
Position {
fragment_id: self.current_fragment_id,
offset: self.current_text_offset,
},
);
}
}
pub fn record_heading(&mut self, level: u8) {
self.heading_positions.push(HeadingPosition {
level,
fragment_id: self.current_fragment_id,
offset: self.current_text_offset,
});
}
pub fn record_heading_with_id(&mut self, level: u8, fragment_id: u64) {
self.heading_positions.push(HeadingPosition {
level,
fragment_id,
offset: 0,
});
}
pub fn resolve_pending_chapter_anchor(&mut self, first_content_id: u64) {
if let Some(chapter_id) = self.current_chapter {
self.first_content_ids
.entry(chapter_id)
.or_insert(first_content_id);
self.anchor_registry
.record_chapter_position(chapter_id, first_content_id);
}
let section_id = self
.current_chapter
.and_then(|ch| self.chapter_fragments.get(&ch).copied())
.unwrap_or(first_content_id);
if let Some(chapter_id) = self.pending_chapter_anchor.take()
&& let Some(symbol) =
self.anchor_registry
.create_chapter_anchor(chapter_id, first_content_id, section_id)
{
self.symbols.get_or_intern(&symbol);
}
}
pub fn create_anchor_if_needed(&mut self, node_id: NodeId, content_id: u64, offset: usize) {
let Some(chapter_id) = self.current_chapter else {
return;
};
let gid = GlobalNodeId::new(chapter_id, node_id);
let section_id = self
.chapter_fragments
.get(&chapter_id)
.copied()
.unwrap_or(content_id);
self.anchor_registry
.record_node_position(gid, content_id, offset);
if let Some(symbol) = self
.anchor_registry
.create_anchor(gid, content_id, section_id, offset)
{
self.symbols.get_or_intern(&symbol);
}
}
pub fn record_content_id(&mut self, content_id: u64) {
if let Some(chapter_id) = self.current_chapter {
self.content_ids_by_chapter
.entry(chapter_id)
.or_default()
.push(content_id);
}
}
pub fn record_content_length(&mut self, content_id: u64, text_len: usize) {
self.content_id_lengths.insert(content_id, text_len);
}
pub fn advance_text_offset(&mut self, text_len: usize) {
self.current_text_offset += text_len;
}
pub fn current_fragment_id(&self) -> u64 {
self.current_fragment_id
}
pub fn current_text_offset(&self) -> usize {
self.current_text_offset
}
pub fn get_position(&self, chapter_id: ChapterId, node_id: NodeId) -> Option<Position> {
self.position_map.get(&(chapter_id, node_id)).copied()
}
pub fn get_chapter_fragment(&self, chapter_id: ChapterId) -> Option<u64> {
self.chapter_fragments.get(&chapter_id).copied()
}
pub fn max_eid(&self) -> u64 {
if self.fragment_ids.peek() > IdGenerator::FRAGMENT_MIN_ID {
self.fragment_ids.peek() - 1
} else {
0
}
}
pub fn format_kindle_pos(fragment_id: u64, offset: usize) -> String {
let fid_encoded = encode_base32(fragment_id as u32, 4);
let off_encoded = encode_base32(offset as u32, 10);
format!("kindle:pos:fid:{}:off:{}", fid_encoded, off_encoded)
}
pub fn register_toc_targets(&mut self, entries: &[TocEntry]) {
for entry in entries {
if !entry.children.is_empty() {
self.register_toc_targets(&entry.children);
}
}
}
pub fn fix_landmark_content_ids(&mut self) {
for target in self.landmark_fragments.values_mut() {
let mut found_chapter = None;
for (cid, &fid) in &self.chapter_fragments {
if fid == target.fragment_id {
found_chapter = Some(*cid);
break;
}
}
if let Some(chapter_id) = found_chapter
&& let Some(&content_id) = self.first_content_ids.get(&chapter_id)
{
target.fragment_id = content_id;
}
}
}
pub fn current_chapter(&self) -> Option<ChapterId> {
self.current_chapter
}
pub fn is_registered_target(&self, node_id: NodeId) -> bool {
let Some(chapter_id) = self.current_chapter else {
return false;
};
let gid = GlobalNodeId::new(chapter_id, node_id);
self.anchor_registry.is_internal_target(gid)
}
}
impl Default for ExportContext {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_symbol_table_shared_symbols() {
let mut symtab = SymbolTable::new();
assert_eq!(symtab.get_or_intern("$260"), 260);
assert_eq!(symtab.get_or_intern("$145"), 145);
}
#[test]
fn test_symbol_table_local_symbols() {
let mut symtab = SymbolTable::new();
let id1 = symtab.get_or_intern("section-1");
let id2 = symtab.get_or_intern("section-2");
assert!(id1 >= SymbolTable::LOCAL_MIN_ID);
assert_eq!(id2, id1 + 1);
assert_eq!(symtab.get_or_intern("section-1"), id1);
}
#[test]
fn test_id_generator() {
let mut id_gen = IdGenerator::new();
assert_eq!(id_gen.next_id(), 866);
assert_eq!(id_gen.next_id(), 867);
assert_eq!(id_gen.next_id(), 868);
}
#[test]
fn test_resource_registry() {
let mut symbols = SymbolTable::new();
let mut registry = ResourceRegistry::new();
let id1 = registry.register("images/cover.jpg", &mut symbols);
let id2 = registry.register("images/cover.jpg", &mut symbols);
let id3 = registry.register("images/other.jpg", &mut symbols);
assert_eq!(id1, id2);
assert_ne!(id1, id3);
}
#[test]
fn test_anchor_registry_internal() {
let mut registry = AnchorRegistry::new();
let target = GlobalNodeId::new(ChapterId(1), NodeId(42));
let symbol = registry.register_internal_target(target, "chapter.xhtml#id42");
assert_eq!(symbol, "a0");
assert!(registry.is_internal_target(target));
assert_eq!(registry.get_symbol(target), Some("a0"));
assert_eq!(registry.get_href_symbol("chapter.xhtml#id42"), Some("a0"));
}
#[test]
fn test_anchor_registry_chapter() {
let mut registry = AnchorRegistry::new();
let chapter = ChapterId(5);
let symbol = registry.register_chapter_target(chapter, "chapter5.xhtml");
assert_eq!(symbol, "a0");
assert!(registry.is_chapter_target(chapter));
assert_eq!(registry.get_chapter_symbol(chapter), Some("a0"));
assert_eq!(registry.get_href_symbol("chapter5.xhtml"), Some("a0"));
}
#[test]
fn test_anchor_registry_external() {
let mut registry = AnchorRegistry::new();
let url = "https://example.com/";
let symbol = registry.register_external(url);
assert_eq!(symbol, "a0");
assert_eq!(registry.get_href_symbol(url), Some("a0"));
let externals = registry.drain_external_anchors();
assert_eq!(externals.len(), 1);
assert_eq!(externals[0].uri, url);
}
#[test]
fn test_anchor_registry_create_anchor() {
let mut registry = AnchorRegistry::new();
let target = GlobalNodeId::new(ChapterId(1), NodeId(42));
registry.register_internal_target(target, "chapter.xhtml#id42");
let symbol = registry.create_anchor(target, 100, 200, 50);
assert_eq!(symbol, Some("a0".to_string()));
let symbol2 = registry.create_anchor(target, 100, 200, 50);
assert_eq!(symbol2, None);
assert_eq!(registry.get_node_position(target), Some((100, 50)));
}
#[test]
fn test_export_context() {
let mut ctx = ExportContext::new();
let id1 = ctx.intern("section-1");
let id2 = ctx.intern("section-1");
assert_eq!(id1, id2);
let fid1 = ctx.next_fragment_id();
let fid2 = ctx.next_fragment_id();
assert_eq!(fid1, 866);
assert_eq!(fid2, 867);
}
}