use std::collections::BTreeMap;
use crate::node::NodeId;
use crate::tree::StyledTree;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ChunkBoundary {
Section {
section_root: NodeId,
},
Group {
first: NodeId,
last: NodeId,
},
}
impl ChunkBoundary {
#[must_use]
pub fn child_ids<'a>(&self, root_children: &'a [NodeId]) -> &'a [NodeId] {
match self {
ChunkBoundary::Section { section_root } => {
let idx = root_children
.iter()
.position(|c| c == section_root)
.expect("section_root must be a child of the tree root");
&root_children[idx..=idx]
}
ChunkBoundary::Group { first, last } => {
let first_idx = root_children
.iter()
.position(|c| c == first)
.expect("group first must be a child of the tree root");
let last_idx = root_children
.iter()
.position(|c| c == last)
.expect("group last must be a child of the tree root");
&root_children[first_idx..=last_idx]
}
}
}
}
#[derive(Debug, Clone, Default)]
pub struct DocumentSpine {
pub cross_refs: BTreeMap<String, CrossRefEntry>,
pub chunk_page_counts: Vec<u32>,
pub object_index: Vec<ObjectIndexEntry>,
}
impl DocumentSpine {
pub fn add_chunk_pages(&mut self, count: u32) {
self.chunk_page_counts.push(count);
}
#[must_use]
pub fn total_pages(&self) -> u32 {
self.chunk_page_counts.iter().sum()
}
pub fn add_cross_ref(&mut self, element_id: String, chunk_index: usize, local_page: u32) {
self.cross_refs.insert(
element_id,
CrossRefEntry {
chunk_index,
local_page,
},
);
}
#[must_use]
pub fn resolve_element(&self, element_id: &str) -> Option<u32> {
self.cross_refs
.get(element_id)
.map(|entry| entry.absolute_page(&self.chunk_page_counts) + 1)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CrossRefEntry {
pub chunk_index: usize,
pub local_page: u32,
}
impl CrossRefEntry {
#[must_use]
pub fn absolute_page(&self, chunk_page_counts: &[u32]) -> u32 {
let pages_before: u32 = chunk_page_counts.iter().take(self.chunk_index).sum();
pages_before + self.local_page
}
}
#[derive(Debug, Clone)]
pub struct ObjectIndexEntry {
pub chunk_index: usize,
pub byte_offset: u64,
pub byte_length: u64,
}
#[must_use]
pub fn partition_tree(tree: &StyledTree) -> Vec<ChunkBoundary> {
let root = tree.root();
let children = tree.children(root);
if children.is_empty() {
return Vec::new();
}
let mut boundaries = Vec::new();
let mut group_start: Option<NodeId> = None;
for (i, &child_id) in children.iter().enumerate() {
let node = tree.node(child_id);
let keep_with_next = node.style.fragmentation.keep_with_next;
if keep_with_next && i + 1 < children.len() {
if group_start.is_none() {
group_start = Some(child_id);
}
} else if let Some(start) = group_start.take() {
boundaries.push(ChunkBoundary::Group {
first: start,
last: child_id,
});
} else {
boundaries.push(ChunkBoundary::Section {
section_root: child_id,
});
}
}
if let Some(start) = group_start {
let last = *children.last().expect("children is non-empty");
boundaries.push(ChunkBoundary::Group { first: start, last });
}
boundaries
}
#[cfg(test)]
mod tests {
use super::*;
use crate::node::{ContentVariant, TextContent};
use crate::style::ResolvedStyle;
use crate::tree::StyledTreeBuilder;
use crate::version::IrVersion;
#[test]
fn single_leaf_produces_no_chunks() {
let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
b.add_node(
ContentVariant::Container,
ResolvedStyle::default(),
None,
None,
);
let tree = b.build().unwrap();
assert!(partition_tree(&tree).is_empty());
}
#[test]
fn three_sections_produce_three_chunks() {
let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
let root = b.add_node(
ContentVariant::Container,
ResolvedStyle::default(),
None,
None,
);
for i in 0..3 {
b.add_child(
root,
ContentVariant::Text(TextContent::new(format!("Section {i}"))),
ResolvedStyle::default(),
None,
None,
);
}
let tree = b.build().unwrap();
let chunks = partition_tree(&tree);
assert_eq!(chunks.len(), 3);
for chunk in &chunks {
assert!(matches!(chunk, ChunkBoundary::Section { .. }));
}
}
#[test]
fn keep_with_next_creates_group() {
let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
let root = b.add_node(
ContentVariant::Container,
ResolvedStyle::default(),
None,
None,
);
let mut style1 = ResolvedStyle::default();
style1.fragmentation.keep_with_next = true;
b.add_child(
root,
ContentVariant::Text(TextContent::new("Heading")),
style1,
None,
None,
);
b.add_child(
root,
ContentVariant::Text(TextContent::new("Body")),
ResolvedStyle::default(),
None,
None,
);
b.add_child(
root,
ContentVariant::Text(TextContent::new("Footer")),
ResolvedStyle::default(),
None,
None,
);
let tree = b.build().unwrap();
let chunks = partition_tree(&tree);
assert_eq!(chunks.len(), 2);
assert!(matches!(chunks[0], ChunkBoundary::Group { .. }));
assert!(matches!(chunks[1], ChunkBoundary::Section { .. }));
}
#[test]
fn cross_ref_absolute_page() {
let entry = CrossRefEntry {
chunk_index: 2,
local_page: 3,
};
let counts = vec![10, 5, 8];
assert_eq!(entry.absolute_page(&counts), 18);
}
#[test]
fn document_spine_default() {
let spine = DocumentSpine::default();
assert!(spine.cross_refs.is_empty());
assert!(spine.chunk_page_counts.is_empty());
assert!(spine.object_index.is_empty());
}
#[test]
fn partition_produces_valid_boundaries_for_sections() {
let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
let root = b.add_node(
ContentVariant::Container,
ResolvedStyle::default(),
None,
None,
);
for i in 0..5 {
let mut style = ResolvedStyle::default();
if i == 1 {
style.fragmentation.keep_with_next = true;
}
b.add_child(
root,
ContentVariant::Text(TextContent::new(format!("Section {i}"))),
style,
None,
None,
);
}
let tree = b.build().unwrap();
let boundaries = partition_tree(&tree);
assert_eq!(boundaries.len(), 4);
assert!(matches!(boundaries[0], ChunkBoundary::Section { .. }));
assert!(matches!(boundaries[1], ChunkBoundary::Group { .. }));
assert!(matches!(boundaries[2], ChunkBoundary::Section { .. }));
assert!(matches!(boundaries[3], ChunkBoundary::Section { .. }));
let children = tree.children(tree.root());
let mut covered = vec![false; children.len()];
for boundary in &boundaries {
match boundary {
ChunkBoundary::Section { section_root } => {
let idx = children.iter().position(|c| c == section_root).unwrap();
assert!(!covered[idx], "node covered twice");
covered[idx] = true;
}
ChunkBoundary::Group { first, last } => {
let first_idx = children.iter().position(|c| c == first).unwrap();
let last_idx = children.iter().position(|c| c == last).unwrap();
for item in &mut covered[first_idx..=last_idx] {
assert!(!*item, "node covered twice");
*item = true;
}
}
}
}
assert!(covered.iter().all(|&c| c), "all children must be covered");
}
#[test]
fn document_spine_constructed_without_full_tree() {
let mut spine = DocumentSpine::default();
spine.chunk_page_counts.push(3);
spine.cross_refs.insert(
"intro".into(),
CrossRefEntry {
chunk_index: 0,
local_page: 0,
},
);
spine.object_index.push(ObjectIndexEntry {
chunk_index: 0,
byte_offset: 0,
byte_length: 1024,
});
spine.chunk_page_counts.push(5);
spine.cross_refs.insert(
"details".into(),
CrossRefEntry {
chunk_index: 1,
local_page: 2,
},
);
spine.object_index.push(ObjectIndexEntry {
chunk_index: 1,
byte_offset: 1024,
byte_length: 2048,
});
let intro_page = spine.cross_refs["intro"].absolute_page(&spine.chunk_page_counts);
assert_eq!(intro_page, 0);
let details_page = spine.cross_refs["details"].absolute_page(&spine.chunk_page_counts);
assert_eq!(details_page, 5);
assert_eq!(spine.object_index.len(), 2);
assert_eq!(
spine.chunk_page_counts.iter().sum::<u32>(),
8,
"total pages across all chunks"
);
}
}