use crate::ir::{IRChapter, Node, NodeId};
use crate::kfx::container::get_field;
use crate::kfx::ion::IonValue;
use crate::kfx::schema::{SemanticTarget, schema};
use crate::kfx::symbols::KfxSymbol;
use crate::kfx::tokens::{ContentRef, ElementStart, KfxToken, SpanStart, TokenStream};
use crate::kfx::transforms::ImportContext;
use std::collections::HashMap;
struct TokenizeContext<'a> {
doc_symbols: &'a [String],
anchors: Option<&'a HashMap<String, String>>,
}
macro_rules! sym {
($variant:ident) => {
KfxSymbol::$variant as u64
};
}
pub fn tokenize_storyline(
storyline: &IonValue,
doc_symbols: &[String],
anchors: Option<&HashMap<String, String>>,
_styles: Option<&HashMap<String, Vec<(u64, IonValue)>>>,
) -> TokenStream {
let mut stream = TokenStream::new();
let fields = match storyline.as_struct() {
Some(f) => f,
None => return stream,
};
let content_list = match get_field(fields, sym!(ContentList)) {
Some(list) => list,
None => return stream,
};
let ctx = TokenizeContext {
doc_symbols,
anchors,
};
tokenize_content_list(content_list, &ctx, &mut stream);
stream
}
fn tokenize_content_list(list: &IonValue, ctx: &TokenizeContext, stream: &mut TokenStream) {
let items = match list.as_list() {
Some(l) => l,
None => return,
};
for item in items {
tokenize_content_item(item, ctx, stream);
}
}
fn tokenize_content_item(item: &IonValue, ctx: &TokenizeContext, stream: &mut TokenStream) {
let inner = item.unwrap_annotated();
let fields = match inner.as_struct() {
Some(f) => f,
None => return,
};
let kfx_type_id = get_field(fields, sym!(Type))
.and_then(|v| v.as_symbol())
.unwrap_or(sym!(Container)) as u32;
let mut role = schema().resolve_element_role(kfx_type_id, |symbol| {
get_field(fields, symbol as u64)
.and_then(|v| v.as_int().or_else(|| v.as_symbol().map(|s| s as i64)))
});
if let Some(semantic_type) = get_semantic_type_annotation(fields, ctx.doc_symbols)
&& let Some(mapped_role) = schema().role_for_semantic_type(&semantic_type)
{
role = mapped_role;
}
let id = get_field(fields, sym!(Id)).and_then(|v| v.as_int());
let semantics = extract_all_element_attrs(fields, kfx_type_id, ctx);
let content_ref = get_field(fields, sym!(Content))
.and_then(|v| v.as_struct())
.and_then(|content_fields| {
let name = get_field(content_fields, sym!(Name))
.and_then(|v| resolve_symbol_or_string(v, ctx.doc_symbols))?;
let index = get_field(content_fields, sym!(Index))
.and_then(|v| v.as_int())
.map(|n| n as usize)?;
Some(ContentRef { name, index })
});
let style_events = get_field(fields, sym!(StyleEvents))
.and_then(|v| v.as_list())
.map(|events| parse_style_events(events, ctx))
.unwrap_or_default();
let has_children = get_field(fields, sym!(ContentList)).is_some();
let style_name =
get_field(fields, sym!(Style)).and_then(|v| resolve_symbol_or_string(v, ctx.doc_symbols));
stream.push(KfxToken::StartElement(ElementStart {
role,
id,
semantics,
content_ref,
style_events,
kfx_attrs: Vec::new(),
style_symbol: None, style_name, }));
if has_children && let Some(children) = get_field(fields, sym!(ContentList)) {
tokenize_content_list(children, ctx, stream);
}
stream.push(KfxToken::EndElement);
}
fn get_semantic_type_annotation(
fields: &[(u64, IonValue)],
doc_symbols: &[String],
) -> Option<String> {
let doc_idx = doc_symbols.iter().position(|s| s == "yj.semantics.type")?;
let field_id = crate::kfx::symbols::KFX_SYMBOL_TABLE_SIZE + doc_idx;
get_field(fields, field_id as u64).and_then(|v| resolve_symbol_or_string(v, doc_symbols))
}
fn extract_all_element_attrs(
fields: &[(u64, IonValue)],
kfx_type_id: u32,
ctx: &TokenizeContext,
) -> HashMap<SemanticTarget, String> {
let mut result = HashMap::new();
let import_ctx = ImportContext {
doc_symbols: ctx.doc_symbols,
chapter_id: None,
anchors: ctx.anchors,
};
for rule in schema().element_attr_rules(kfx_type_id) {
if let Some(raw_value) = get_field(fields, rule.kfx_field as u64)
.and_then(|v| resolve_symbol_or_string(v, ctx.doc_symbols))
{
let parsed = rule.transform.import(&raw_value, &import_ctx);
let final_value = match parsed {
crate::kfx::transforms::ParsedAttribute::String(s) => s,
crate::kfx::transforms::ParsedAttribute::Link(link) => link.to_href(),
crate::kfx::transforms::ParsedAttribute::Anchor(id) => id,
};
result.insert(rule.target, final_value);
}
}
result
}
fn parse_style_events(events: &[IonValue], ctx: &TokenizeContext) -> Vec<SpanStart> {
events
.iter()
.filter_map(|event| {
let fields = event.as_struct()?;
let offset = get_field(fields, sym!(Offset))
.and_then(|v| v.as_int())
.map(|n| n as usize)?;
let length = get_field(fields, sym!(Length))
.and_then(|v| v.as_int())
.map(|n| n as usize)?;
let has_field = |symbol: KfxSymbol| get_field(fields, symbol as u64).is_some();
let role = schema().resolve_span_role(has_field);
let semantics = extract_all_span_attrs(fields, has_field, ctx);
Some(SpanStart {
role,
semantics,
offset,
length,
style_symbol: None, kfx_attrs: Vec::new(),
})
})
.collect()
}
fn extract_all_span_attrs<F>(
fields: &[(u64, IonValue)],
has_field: F,
ctx: &TokenizeContext,
) -> HashMap<SemanticTarget, String>
where
F: Fn(KfxSymbol) -> bool,
{
let mut result = HashMap::new();
let import_ctx = ImportContext {
doc_symbols: ctx.doc_symbols,
chapter_id: None,
anchors: ctx.anchors,
};
for rule in schema().span_attr_rules(&has_field) {
if let Some(raw_value) = get_field(fields, rule.kfx_field as u64)
.and_then(|v| resolve_symbol_or_string(v, ctx.doc_symbols))
{
let parsed = rule.transform.import(&raw_value, &import_ctx);
let final_value = match parsed {
crate::kfx::transforms::ParsedAttribute::String(s) => s,
crate::kfx::transforms::ParsedAttribute::Link(link) => link.to_href(),
crate::kfx::transforms::ParsedAttribute::Anchor(id) => id,
};
result.insert(rule.target, final_value);
}
}
result
}
pub fn build_ir_from_tokens<F>(
tokens: &TokenStream,
styles: Option<&HashMap<String, Vec<(u64, IonValue)>>>,
mut content_lookup: F,
) -> IRChapter
where
F: FnMut(&str, usize) -> Option<String>,
{
let mut chapter = IRChapter::new();
let mut stack: Vec<NodeId> = vec![chapter.root()];
for token in tokens {
match token {
KfxToken::StartElement(elem) => {
let parent = *stack.last().unwrap_or(&chapter.root());
let node = Node::new(elem.role);
let node_id = chapter.alloc_node(node);
chapter.append_child(parent, node_id);
if let Some(style_name) = &elem.style_name
&& let Some(styles_map) = styles
&& let Some(kfx_props) = styles_map.get(style_name)
{
let ir_style = kfx_style_to_ir(kfx_props);
let style_id = chapter.styles.intern(ir_style);
if let Some(node) = chapter.node_mut(node_id) {
node.style = style_id;
}
}
apply_semantics_to_node(&mut chapter, node_id, &elem.semantics);
if let Some(ref content_ref) = elem.content_ref
&& let Some(text) = content_lookup(&content_ref.name, content_ref.index)
{
if elem.style_events.is_empty() {
let range = chapter.append_text(&text);
let text_node = chapter.alloc_node(Node::text(range));
chapter.append_child(node_id, text_node);
} else {
build_text_with_spans(&mut chapter, node_id, &text, &elem.style_events);
}
}
stack.push(node_id);
}
KfxToken::EndElement => {
stack.pop();
}
KfxToken::Text(text) => {
let parent = *stack.last().unwrap_or(&chapter.root());
let range = chapter.append_text(text);
let text_node = chapter.alloc_node(Node::text(range));
chapter.append_child(parent, text_node);
}
KfxToken::StartSpan(_) | KfxToken::EndSpan => {
}
}
}
chapter
}
fn apply_semantics_to_node(
chapter: &mut IRChapter,
node_id: NodeId,
semantics: &HashMap<SemanticTarget, String>,
) {
for (target, value) in semantics {
match target {
SemanticTarget::Src => chapter.semantics.set_src(node_id, value.clone()),
SemanticTarget::Href => {
chapter.semantics.set_href(node_id, value.clone());
}
SemanticTarget::Alt => chapter.semantics.set_alt(node_id, value.clone()),
SemanticTarget::Id => chapter.semantics.set_id(node_id, value.clone()),
SemanticTarget::EpubType => chapter.semantics.set_epub_type(node_id, value.clone()),
}
}
}
fn kfx_style_to_ir(props: &[(u64, IonValue)]) -> crate::ir::ComputedStyle {
use crate::kfx::style_schema::{StyleSchema, import_kfx_style};
let schema = StyleSchema::standard();
import_kfx_style(&schema, props)
}
fn build_text_with_spans(chapter: &mut IRChapter, parent: NodeId, text: &str, spans: &[SpanStart]) {
let mut sorted_spans: Vec<_> = spans.iter().collect();
sorted_spans.sort_by_key(|s| (s.offset, s.length));
let filtered_spans: Vec<_> = sorted_spans
.iter()
.filter(|span| {
!sorted_spans.iter().any(|other| {
other.offset == span.offset
&& other.length < span.length
&& other.offset + other.length <= span.offset + span.length
})
})
.copied()
.collect();
let sorted_spans = filtered_spans;
let mut pos = 0;
for span in sorted_spans {
let span_start = char_to_byte_offset(text, span.offset);
let span_end = char_to_byte_offset(text, span.offset + span.length);
if span_start > pos {
let before = &text[pos..span_start];
if !before.is_empty() {
let range = chapter.append_text(before);
let text_node = chapter.alloc_node(Node::text(range));
chapter.append_child(parent, text_node);
}
}
if span_end > span_start {
let span_text = &text[span_start..span_end];
let span_node = chapter.alloc_node(Node::new(span.role));
chapter.append_child(parent, span_node);
apply_semantics_to_node(chapter, span_node, &span.semantics);
let range = chapter.append_text(span_text);
let text_node = chapter.alloc_node(Node::text(range));
chapter.append_child(span_node, text_node);
}
pos = span_end;
}
if pos < text.len() {
let after = &text[pos..];
if !after.is_empty() {
let range = chapter.append_text(after);
let text_node = chapter.alloc_node(Node::text(range));
chapter.append_child(parent, text_node);
}
}
}
fn char_to_byte_offset(text: &str, char_offset: usize) -> usize {
text.char_indices()
.nth(char_offset)
.map(|(byte_idx, _)| byte_idx)
.unwrap_or(text.len())
}
fn resolve_symbol(id: u64, doc_symbols: &[String]) -> Option<&str> {
crate::kfx::container::resolve_symbol(id, doc_symbols)
}
fn resolve_symbol_or_string(value: &IonValue, doc_symbols: &[String]) -> Option<String> {
match value {
IonValue::String(s) => Some(s.clone()),
IonValue::Symbol(id) => resolve_symbol(*id, doc_symbols).map(|s| s.to_string()),
_ => None,
}
}
pub fn parse_storyline_to_ir<F>(
storyline: &IonValue,
doc_symbols: &[String],
anchors: Option<&HashMap<String, String>>,
styles: Option<&HashMap<String, Vec<(u64, IonValue)>>>,
content_lookup: F,
) -> IRChapter
where
F: FnMut(&str, usize) -> Option<String>,
{
let tokens = tokenize_storyline(storyline, doc_symbols, anchors, styles);
build_ir_from_tokens(&tokens, styles, content_lookup)
}
use crate::ir::Role;
use crate::kfx::context::ExportContext;
pub fn ir_to_tokens(chapter: &IRChapter, ctx: &mut ExportContext) -> TokenStream {
let sch = schema();
let mut stream = TokenStream::new();
walk_node_for_export(chapter, chapter.root(), sch, ctx, &mut stream);
stream
}
fn walk_node_for_export(
chapter: &IRChapter,
node_id: NodeId,
sch: &crate::kfx::schema::KfxSchema,
ctx: &mut ExportContext,
stream: &mut TokenStream,
) {
let node = match chapter.node(node_id) {
Some(n) => n,
None => return,
};
if node.role == Role::Root {
for child in chapter.children(node_id) {
walk_node_for_export(chapter, child, sch, ctx, stream);
}
return;
}
if node.role == Role::Text {
if !node.text.is_empty() {
let text = chapter.text(node.text);
if !text.is_empty() {
stream.push(KfxToken::Text(text.to_string()));
}
}
return;
}
if node.role == Role::Break {
stream.push(KfxToken::Text("\n".to_string()));
return;
}
if sch.is_inline_role(node.role) {
emit_span_for_export(chapter, node_id, node, sch, ctx, stream);
return;
}
let _kfx_type = sch.kfx_type_for_role(node.role);
let mut elem = ElementStart::new(node.role);
let style_symbol = ctx.register_style_id(node.style, &chapter.styles);
elem.style_symbol = Some(style_symbol);
let export_ctx = crate::kfx::transforms::ExportContext {
spine_map: None,
resource_registry: Some(&ctx.resource_registry),
};
let kfx_attrs = sch.export_attributes(
node.role,
|target| match target {
SemanticTarget::Href => chapter.semantics.href(node_id).map(|s| s.to_string()),
SemanticTarget::Src => chapter.semantics.src(node_id).map(|s| s.to_string()),
SemanticTarget::Alt => chapter.semantics.alt(node_id).map(|s| s.to_string()),
SemanticTarget::Id => chapter.semantics.id(node_id).map(|s| s.to_string()),
SemanticTarget::EpubType => chapter.semantics.epub_type(node_id).map(|s| s.to_string()),
},
&export_ctx,
);
elem.kfx_attrs = kfx_attrs;
if let Some(href) = chapter.semantics.href(node_id) {
elem.set_semantic(SemanticTarget::Href, href.to_string());
}
if let Some(src) = chapter.semantics.src(node_id) {
elem.set_semantic(SemanticTarget::Src, src.to_string());
ctx.resource_registry.register(src, &mut ctx.symbols);
}
if let Some(alt) = chapter.semantics.alt(node_id) {
elem.set_semantic(SemanticTarget::Alt, alt.to_string());
}
if let Some(id) = chapter.semantics.id(node_id) {
elem.set_semantic(SemanticTarget::Id, id.to_string());
}
if let Some(epub_type) = chapter.semantics.epub_type(node_id) {
elem.set_semantic(SemanticTarget::EpubType, epub_type.to_string());
}
stream.push(KfxToken::StartElement(elem));
if !node.text.is_empty() {
let text = chapter.text(node.text);
if !text.is_empty() {
stream.push(KfxToken::Text(text.to_string()));
}
}
for child in chapter.children(node_id) {
walk_node_for_export(chapter, child, sch, ctx, stream);
}
stream.push(KfxToken::EndElement);
}
fn emit_span_for_export(
chapter: &IRChapter,
node_id: NodeId,
node: &crate::ir::Node,
sch: &crate::kfx::schema::KfxSchema,
ctx: &mut ExportContext,
stream: &mut TokenStream,
) {
let mut span = SpanStart::new(node.role, 0, 0);
let style_symbol = ctx.register_style_id(node.style, &chapter.styles);
span.style_symbol = Some(style_symbol);
let export_ctx = crate::kfx::transforms::ExportContext {
spine_map: None,
resource_registry: Some(&ctx.resource_registry),
};
let mut kfx_attrs = sch.export_span_attributes(
node.role,
|target| match target {
SemanticTarget::Href => chapter.semantics.href(node_id).map(|s| s.to_string()),
SemanticTarget::Src => chapter.semantics.src(node_id).map(|s| s.to_string()),
SemanticTarget::Alt => chapter.semantics.alt(node_id).map(|s| s.to_string()),
SemanticTarget::Id => chapter.semantics.id(node_id).map(|s| s.to_string()),
SemanticTarget::EpubType => chapter.semantics.epub_type(node_id).map(|s| s.to_string()),
},
&export_ctx,
);
for (field_id, value) in &mut kfx_attrs {
if *field_id == sym!(LinkTo) {
let anchor_symbol = ctx.anchor_registry.register_link_target(value);
*value = anchor_symbol;
}
}
span.kfx_attrs = kfx_attrs;
if let Some(href) = chapter.semantics.href(node_id) {
span.set_semantic(SemanticTarget::Href, href.to_string());
}
if let Some(id) = chapter.semantics.id(node_id) {
span.set_semantic(SemanticTarget::Id, id.to_string());
}
stream.push(KfxToken::StartSpan(span));
if !node.text.is_empty() {
let text = chapter.text(node.text);
if !text.is_empty() {
stream.push(KfxToken::Text(text.to_string()));
}
}
for child in chapter.children(node_id) {
walk_node_for_export(chapter, child, sch, ctx, stream);
}
stream.push(KfxToken::EndSpan);
}
pub fn tokens_to_ion(tokens: &TokenStream, ctx: &mut ExportContext) -> IonValue {
let mut stack: Vec<IonBuilder> = vec![IonBuilder::new()];
let mut span_stack: Vec<(usize, SpanStart)> = Vec::new();
for token in tokens {
match token {
KfxToken::StartElement(elem) => {
let mut fields = Vec::new();
let container_id = ctx.fragment_ids.next_id();
fields.push((sym!(Id), IonValue::Int(container_id as i64)));
ctx.record_content_id(container_id);
ctx.resolve_pending_chapter_start_anchor(container_id);
if let Some(anchor_id) = elem.get_semantic(SemanticTarget::Id) {
ctx.create_anchor_if_needed(anchor_id, container_id, 0);
}
let style_sym = elem.style_symbol.unwrap_or(ctx.default_style_symbol);
fields.push((sym!(Style), IonValue::Symbol(style_sym)));
if let Some(kfx_type) = schema().kfx_type_for_role(elem.role) {
fields.push((sym!(Type), IonValue::Symbol(kfx_type as u64)));
}
if let Some(strategy) = schema().export_strategy(elem.role)
&& let Some(semantic_type) = strategy.semantic_type()
{
let field_id = ctx.symbols.get_or_intern("yj.semantics.type");
let value_id = ctx.symbols.get_or_intern(semantic_type);
fields.push((field_id, IonValue::Symbol(value_id)));
}
if let Role::Heading(level) = elem.role {
fields.push((sym!(YjSemanticsHeadingLevel), IonValue::Int(level as i64)));
ctx.record_heading_with_id(level, container_id);
}
if elem.role == Role::OrderedList {
fields.push((sym!(ListStyle), IonValue::Symbol(sym!(Numeric))));
}
let layout_hint = match elem.role {
Role::Heading(_) => Some(KfxSymbol::TreatAsTitle),
Role::Figure => Some(KfxSymbol::Figure),
Role::Caption => Some(KfxSymbol::Caption),
_ => {
if let Some(epub_type) = elem.get_semantic(SemanticTarget::EpubType) {
let has_title_type = epub_type.split_whitespace().any(|t| {
matches!(
t,
"title" | "fulltitle" | "subtitle" | "covertitle" | "halftitle"
)
});
let has_caption_type = epub_type
.split_whitespace()
.any(|t| matches!(t, "caption" | "figcaption"));
if has_title_type {
Some(KfxSymbol::TreatAsTitle)
} else if has_caption_type {
Some(KfxSymbol::Caption)
} else {
None
}
} else {
None
}
}
};
if let Some(hint) = layout_hint {
fields.push((
sym!(LayoutHints),
IonValue::List(vec![IonValue::Symbol(hint as u64)]),
));
}
if let Some(epub_type) = elem.get_semantic(SemanticTarget::EpubType) {
let types: Vec<&str> = epub_type.split_whitespace().collect();
let is_footnote = types.contains(&"footnote");
let is_endnote = types.contains(&"endnote") || types.contains(&"rearnote");
let is_sidenote = types.contains(&"sidebar") || types.contains(&"marginalia");
if is_endnote {
fields.push((
sym!(YjClassification),
IonValue::Symbol(KfxSymbol::YjEndnote as u64),
));
} else if is_sidenote {
fields.push((
sym!(YjClassification),
IonValue::Symbol(KfxSymbol::YjSidenote as u64),
));
} else if is_footnote {
fields.push((
sym!(YjClassification),
IonValue::Symbol(KfxSymbol::Footnote as u64),
));
}
}
for (field_id, value_str) in &elem.kfx_attrs {
let is_symbol_field = *field_id == sym!(ResourceName)
|| *field_id == sym!(LinkTo)
|| value_str.starts_with('#')
|| value_str.contains('/');
if is_symbol_field {
let sym_id = ctx.symbols.get_or_intern(value_str);
fields.push((*field_id, IonValue::Symbol(sym_id)));
} else {
fields.push((*field_id, IonValue::String(value_str.clone())));
}
}
stack.push(IonBuilder::with_fields(fields, container_id));
}
KfxToken::EndElement => {
if let Some(completed) = stack.pop()
&& let Some(parent) = stack.last_mut()
{
parent.add_child(completed.build(ctx));
}
}
KfxToken::Text(text) => {
if let Some(current) = stack.last_mut() {
current.append_text(text);
}
}
KfxToken::StartSpan(span) => {
let current_offset = stack.last().map(|b| b.text_len()).unwrap_or(0);
if let Some(anchor_id) = span.get_semantic(SemanticTarget::Id)
&& let Some(parent) = stack.last()
&& let Some(container_id) = parent.container_id
{
ctx.create_anchor_if_needed(anchor_id, container_id, current_offset);
}
span_stack.push((current_offset, span.clone()));
}
KfxToken::EndSpan => {
if let Some((start_offset, mut span_info)) = span_stack.pop() {
let current_offset = stack.last().map(|b| b.text_len()).unwrap_or(0);
let length = current_offset.saturating_sub(start_offset);
span_info.offset = start_offset;
span_info.length = length;
if let Some(current) = stack.last_mut() {
current.add_style_event(span_info, ctx);
}
}
}
}
}
if let Some(root) = stack.pop() {
root.build(ctx)
} else {
IonValue::List(vec![])
}
}
struct IonBuilder {
fields: Vec<(u64, IonValue)>,
children: Vec<IonValue>,
accumulated_text: String,
accumulated_char_count: usize,
style_events: Vec<IonValue>,
container_id: Option<u64>,
}
impl IonBuilder {
fn new() -> Self {
Self {
fields: Vec::new(),
children: Vec::new(),
accumulated_text: String::new(),
accumulated_char_count: 0,
style_events: Vec::new(),
container_id: None,
}
}
fn with_fields(fields: Vec<(u64, IonValue)>, container_id: u64) -> Self {
Self {
fields,
children: Vec::new(),
accumulated_text: String::new(),
accumulated_char_count: 0,
style_events: Vec::new(),
container_id: Some(container_id),
}
}
fn add_child(&mut self, child: IonValue) {
self.children.push(child);
}
fn append_text(&mut self, text: &str) -> usize {
let offset = self.accumulated_char_count;
self.accumulated_text.push_str(text);
self.accumulated_char_count += text.chars().count();
offset
}
fn text_len(&self) -> usize {
self.accumulated_char_count
}
fn add_style_event(&mut self, span: SpanStart, ctx: &mut ExportContext) {
let mut event_fields = Vec::new();
event_fields.push((sym!(Offset), IonValue::Int(span.offset as i64)));
event_fields.push((sym!(Length), IonValue::Int(span.length as i64)));
if let Some(style_sym) = span.style_symbol {
event_fields.push((sym!(Style), IonValue::Symbol(style_sym)));
} else {
event_fields.push((sym!(Style), IonValue::Symbol(ctx.default_style_symbol)));
}
for (field_id, value_str) in &span.kfx_attrs {
if *field_id == sym!(LinkTo) {
let sym_id = ctx.symbols.get_or_intern(value_str);
event_fields.push((*field_id, IonValue::Symbol(sym_id)));
} else if *field_id == sym!(YjDisplay) {
if let Ok(sym_id) = value_str.parse::<u64>() {
event_fields.push((*field_id, IonValue::Symbol(sym_id)));
}
} else {
event_fields.push((*field_id, IonValue::String(value_str.clone())));
}
}
self.style_events.push(IonValue::Struct(event_fields));
}
fn build(mut self, ctx: &mut ExportContext) -> IonValue {
if !self.fields.is_empty() {
if let Some(container_id) = self.container_id {
ctx.record_content_length(container_id, self.accumulated_text.len());
}
if !self.accumulated_text.is_empty() {
let (content_idx, _offset) = ctx.append_text(&self.accumulated_text);
let content_ref = IonValue::Struct(vec![
(sym!(Name), IonValue::Symbol(ctx.current_content_name)),
(sym!(Index), IonValue::Int(content_idx as i64)),
]);
self.fields.push((sym!(Content), content_ref));
}
if !self.style_events.is_empty() {
self.fields
.push((sym!(StyleEvents), IonValue::List(self.style_events)));
}
if !self.children.is_empty() {
self.fields
.push((sym!(ContentList), IonValue::List(self.children)));
}
IonValue::Struct(self.fields)
} else if !self.children.is_empty() {
IonValue::List(self.children)
} else {
IonValue::Null
}
}
}
pub fn build_storyline_ion(chapter: &IRChapter, ctx: &mut ExportContext) -> IonValue {
let tokens = ir_to_tokens(chapter, ctx);
tokens_to_ion(&tokens, ctx)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir::Role;
#[test]
fn test_tokenize_creates_proper_structure() {
let mut stream = TokenStream::new();
stream.start_element(Role::Paragraph);
stream.text("Hello");
stream.end_element();
let chapter = build_ir_from_tokens(&stream, None, |_, _| None);
assert_eq!(chapter.node_count(), 3); }
#[test]
fn test_build_ir_with_image() {
let mut stream = TokenStream::new();
let mut semantics = HashMap::new();
semantics.insert(SemanticTarget::Src, "cover.jpg".to_string());
stream.push(KfxToken::StartElement(ElementStart {
role: Role::Image,
id: Some(123),
semantics,
content_ref: None,
style_events: Vec::new(),
kfx_attrs: Vec::new(),
style_symbol: None,
style_name: None,
}));
stream.end_element();
let chapter = build_ir_from_tokens(&stream, None, |_, _| None);
let children: Vec<_> = chapter.children(chapter.root()).collect();
assert_eq!(children.len(), 1);
let image_node = chapter.node(children[0]).unwrap();
assert_eq!(image_node.role, Role::Image);
assert_eq!(chapter.semantics.src(children[0]), Some("cover.jpg"));
}
#[test]
fn test_build_ir_with_text_content() {
let mut stream = TokenStream::new();
stream.push(KfxToken::StartElement(ElementStart {
role: Role::Paragraph,
id: None,
semantics: HashMap::new(),
content_ref: Some(ContentRef {
name: "content_1".to_string(),
index: 0,
}),
style_events: Vec::new(),
kfx_attrs: Vec::new(),
style_symbol: None,
style_name: None,
}));
stream.end_element();
let chapter = build_ir_from_tokens(&stream, None, |name, idx| {
if name == "content_1" && idx == 0 {
Some("Hello, world!".to_string())
} else {
None
}
});
assert_eq!(chapter.node_count(), 3); let para_id = chapter.children(chapter.root()).next().unwrap();
let text_id = chapter.children(para_id).next().unwrap();
let text_node = chapter.node(text_id).unwrap();
assert_eq!(chapter.text(text_node.text), "Hello, world!");
}
#[test]
fn test_build_ir_with_heading() {
let mut stream = TokenStream::new();
stream.push(KfxToken::StartElement(ElementStart {
role: Role::Heading(2),
id: None,
semantics: HashMap::new(),
content_ref: Some(ContentRef {
name: "content_1".to_string(),
index: 0,
}),
style_events: Vec::new(),
kfx_attrs: Vec::new(),
style_symbol: None,
style_name: None,
}));
stream.end_element();
let chapter = build_ir_from_tokens(&stream, None, |_, _| Some("Chapter 1".to_string()));
let heading_id = chapter.children(chapter.root()).next().unwrap();
let heading = chapter.node(heading_id).unwrap();
assert_eq!(heading.role, Role::Heading(2));
}
#[test]
fn test_build_ir_with_link_span() {
let mut stream = TokenStream::new();
let mut span_semantics = HashMap::new();
span_semantics.insert(SemanticTarget::Href, "chapter2".to_string());
stream.push(KfxToken::StartElement(ElementStart {
role: Role::Paragraph,
id: None,
semantics: HashMap::new(),
content_ref: Some(ContentRef {
name: "content_1".to_string(),
index: 0,
}),
style_events: vec![SpanStart {
role: Role::Link,
semantics: span_semantics,
offset: 7,
length: 5,
style_symbol: None,
kfx_attrs: Vec::new(),
}],
kfx_attrs: Vec::new(),
style_symbol: None,
style_name: None,
}));
stream.end_element();
let chapter = build_ir_from_tokens(&stream, None, |_, _| Some("Hello, world!".to_string()));
let para_id = chapter.children(chapter.root()).next().unwrap();
let children: Vec<_> = chapter.children(para_id).collect();
assert_eq!(children.len(), 3);
let first = chapter.node(children[0]).unwrap();
assert_eq!(first.role, Role::Text);
assert_eq!(chapter.text(first.text), "Hello, ");
let link = chapter.node(children[1]).unwrap();
assert_eq!(link.role, Role::Link);
assert_eq!(chapter.semantics.href(children[1]), Some("chapter2"));
let last = chapter.node(children[2]).unwrap();
assert_eq!(last.role, Role::Text);
assert_eq!(chapter.text(last.text), "!");
}
#[test]
fn test_char_to_byte_offset() {
let text = "Hello ὑπόληψις world";
assert_eq!(char_to_byte_offset(text, 0), 0); assert_eq!(char_to_byte_offset(text, 5), 5);
assert_eq!(char_to_byte_offset(text, 6), 6); assert_eq!(char_to_byte_offset(text, 7), 9); assert_eq!(char_to_byte_offset(text, 14), 23);
assert_eq!(char_to_byte_offset(text, 100), text.len());
}
#[test]
fn test_apply_semantics_generic() {
let mut chapter = IRChapter::new();
let node = Node::new(Role::Image);
let node_id = chapter.alloc_node(node);
let mut semantics = HashMap::new();
semantics.insert(SemanticTarget::Src, "image.jpg".to_string());
semantics.insert(SemanticTarget::Alt, "An image".to_string());
apply_semantics_to_node(&mut chapter, node_id, &semantics);
assert_eq!(chapter.semantics.src(node_id), Some("image.jpg"));
assert_eq!(chapter.semantics.alt(node_id), Some("An image"));
}
#[test]
fn test_ir_to_tokens_basic() {
let mut chapter = IRChapter::new();
let text_range = chapter.append_text("Hello");
let mut text_node = Node::new(Role::Text);
text_node.text = text_range;
let text_id = chapter.alloc_node(text_node);
chapter.append_child(chapter.root(), text_id);
let mut ctx = ExportContext::new();
let tokens = ir_to_tokens(&chapter, &mut ctx);
assert!(!tokens.is_empty());
}
#[test]
fn test_build_storyline_ion() {
let mut chapter = IRChapter::new();
let para = Node::new(Role::Paragraph);
let para_id = chapter.alloc_node(para);
chapter.append_child(chapter.root(), para_id);
let text_range = chapter.append_text("Test content");
let mut text_node = Node::new(Role::Text);
text_node.text = text_range;
let text_id = chapter.alloc_node(text_node);
chapter.append_child(para_id, text_id);
let mut ctx = ExportContext::new();
let ion = build_storyline_ion(&chapter, &mut ctx);
assert!(!matches!(ion, IonValue::Null));
}
#[test]
fn test_tokens_to_ion_empty() {
let tokens = TokenStream::new();
let mut ctx = ExportContext::new();
let ion = tokens_to_ion(&tokens, &mut ctx);
assert!(
matches!(ion, IonValue::List(_)) || matches!(ion, IonValue::Null),
"expected List or Null, got {:?}",
ion
);
}
#[test]
fn test_heading_level_export() {
use crate::kfx::symbols::KfxSymbol;
let mut chapter = IRChapter::new();
let h2 = Node::new(Role::Heading(2));
let h2_id = chapter.alloc_node(h2);
chapter.append_child(chapter.root(), h2_id);
let text_range = chapter.append_text("Chapter Title");
let mut text_node = Node::new(Role::Text);
text_node.text = text_range;
let text_id = chapter.alloc_node(text_node);
chapter.append_child(h2_id, text_id);
let mut ctx = ExportContext::new();
let ion = build_storyline_ion(&chapter, &mut ctx);
fn find_heading_level(ion: &IonValue) -> Option<i64> {
match ion {
IonValue::Struct(fields) => {
for (field_id, value) in fields {
if *field_id == KfxSymbol::YjSemanticsHeadingLevel as u64
&& let IonValue::Int(level) = value
{
return Some(*level);
}
}
for (field_id, value) in fields {
if *field_id == KfxSymbol::ContentList as u64
&& let Some(level) = find_heading_level(value)
{
return Some(level);
}
}
}
IonValue::List(items) => {
for item in items {
if let Some(level) = find_heading_level(item) {
return Some(level);
}
}
}
_ => {}
}
None
}
let heading_level = find_heading_level(&ion);
assert_eq!(
heading_level,
Some(2),
"Expected yj.semantics.heading_level = 2, got {:?}",
heading_level
);
}
#[test]
fn test_style_event_offsets_use_char_count() {
let mut builder = IonBuilder::new();
builder.append_text("Hello ");
assert_eq!(builder.text_len(), 6);
let greek_offset = builder.append_text("ὑπόληψις");
assert_eq!(greek_offset, 6, "Greek text should start at char offset 6");
assert_eq!(builder.text_len(), 14, "Total should be 14 chars (6 + 8)");
assert_eq!(builder.accumulated_text.len(), 23); assert_eq!(builder.accumulated_char_count, 14); }
#[test]
fn test_layout_hints_for_heading() {
let mut chapter = IRChapter::new();
let text_range = chapter.append_text("Chapter 1");
let mut text_node = Node::new(Role::Text);
text_node.text = text_range;
let text_id = chapter.alloc_node(text_node);
let heading = Node::new(Role::Heading(1));
let heading_id = chapter.alloc_node(heading);
chapter.append_child(heading_id, text_id);
chapter.append_child(chapter.root(), heading_id);
let mut ctx = crate::kfx::context::ExportContext::new();
ctx.register_section("test_section");
let ion = build_storyline_ion(&chapter, &mut ctx);
fn find_layout_hints(ion: &IonValue) -> Option<Vec<u64>> {
match ion {
IonValue::Struct(fields) => {
for (key, value) in fields {
if *key == sym!(LayoutHints)
&& let IonValue::List(items) = value
{
return Some(
items
.iter()
.filter_map(|v| {
if let IonValue::Symbol(s) = v {
Some(*s)
} else {
None
}
})
.collect(),
);
}
if let Some(hints) = find_layout_hints(value) {
return Some(hints);
}
}
None
}
IonValue::List(items) => {
for item in items {
if let Some(hints) = find_layout_hints(item) {
return Some(hints);
}
}
None
}
_ => None,
}
}
let hints = find_layout_hints(&ion);
assert!(hints.is_some(), "Heading should have layout_hints");
let hints = hints.unwrap();
assert!(
hints.contains(&(KfxSymbol::TreatAsTitle as u64)),
"Heading layout_hints should contain treat_as_title"
);
}
#[test]
fn test_layout_hints_for_figure() {
let mut chapter = IRChapter::new();
let figure = Node::new(Role::Figure);
let figure_id = chapter.alloc_node(figure);
chapter.append_child(chapter.root(), figure_id);
let mut ctx = crate::kfx::context::ExportContext::new();
ctx.register_section("test_section");
let ion = build_storyline_ion(&chapter, &mut ctx);
fn find_layout_hints(ion: &IonValue) -> Option<Vec<u64>> {
match ion {
IonValue::Struct(fields) => {
for (key, value) in fields {
if *key == sym!(LayoutHints)
&& let IonValue::List(items) = value
{
return Some(
items
.iter()
.filter_map(|v| {
if let IonValue::Symbol(s) = v {
Some(*s)
} else {
None
}
})
.collect(),
);
}
if let Some(hints) = find_layout_hints(value) {
return Some(hints);
}
}
None
}
IonValue::List(items) => {
for item in items {
if let Some(hints) = find_layout_hints(item) {
return Some(hints);
}
}
None
}
_ => None,
}
}
let hints = find_layout_hints(&ion);
assert!(hints.is_some(), "Figure should have layout_hints");
let hints = hints.unwrap();
assert!(
hints.contains(&(KfxSymbol::Figure as u64)),
"Figure layout_hints should contain figure"
);
}
#[test]
fn test_yj_classification_for_footnote_popup() {
let mut chapter = IRChapter::new();
let text_range = chapter.append_text("This is footnote content");
let mut text_node = Node::new(Role::Text);
text_node.text = text_range;
let text_id = chapter.alloc_node(text_node);
let endnote = Node::new(Role::ListItem);
let endnote_id = chapter.alloc_node(endnote);
chapter.append_child(endnote_id, text_id);
chapter.append_child(chapter.root(), endnote_id);
chapter
.semantics
.set_epub_type(endnote_id, "endnote footnote".to_string());
chapter.semantics.set_id(endnote_id, "note-1".to_string());
let mut ctx = crate::kfx::context::ExportContext::new();
ctx.register_section("test_section");
let ion = build_storyline_ion(&chapter, &mut ctx);
fn find_classification(ion: &IonValue) -> Option<u64> {
match ion {
IonValue::Struct(fields) => {
for (key, value) in fields {
if *key == sym!(YjClassification)
&& let IonValue::Symbol(sym) = value
{
return Some(*sym);
}
if let Some(found) = find_classification(value) {
return Some(found);
}
}
None
}
IonValue::List(items) => {
for item in items {
if let Some(found) = find_classification(item) {
return Some(found);
}
}
None
}
_ => None,
}
}
let classification = find_classification(&ion);
assert!(
classification.is_some(),
"Endnote element should have yj.classification attribute"
);
assert_eq!(
classification.unwrap(),
KfxSymbol::YjEndnote as u64,
"yj.classification should be yj.endnote ($619) for endnote elements"
);
}
}