use std::hash::{Hash, Hasher};
use anyhow::Result;
use crate::config::Config;
use crate::project::ProjectLayout;
use crate::prose::resolve_prose_language;
use crate::store::SYSTEM_TAG_CHARACTERS;
use crate::store::NodeKind;
use crate::store::hierarchy::Hierarchy;
use crate::store::node::Node;
use super::store::DialogueStore;
use super::{
AttributionConfidence, AttributionWindows, ChapterDialogueStats, DialogueFinding,
DialogueFindingKind, DialogueLexicon, TagVerbClass, attribute_spans, detect_spans,
dialogue_convention, lexicon_for_with,
};
#[derive(Clone, Copy)]
struct DialogueTuning {
beat_min_words: u32,
talking_head_threshold: u32,
unattributed_run_threshold: u32,
said_bookism_threshold: f32,
}
pub(crate) fn character_names(h: &Hierarchy) -> Vec<String> {
let Some(book) = h.iter().find(|n| {
n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(SYSTEM_TAG_CHARACTERS)
}) else {
return Vec::new();
};
let mut names: Vec<String> = h
.children_of(Some(book.id))
.iter()
.map(|n| n.title.trim().to_string())
.filter(|t| !t.is_empty())
.collect();
names.sort();
names.dedup();
names
}
fn chapter_paragraphs(
layout: &ProjectLayout,
h: &Hierarchy,
chapter_id: uuid::Uuid,
) -> Vec<(String, String)> {
let mut out = Vec::new();
for id in h.collect_subtree(chapter_id) {
let Some(p) = h.get(id) else { continue };
if p.kind != NodeKind::Paragraph {
continue;
}
if p.content_type.as_deref() == Some("jinja") {
continue;
}
let Some(rel) = p.file.as_ref() else { continue };
if let Ok(raw) = std::fs::read_to_string(layout.root.join(rel)) {
out.push((id.to_string(), crate::audiobook::typst_to_plain(&raw)));
}
}
out
}
fn hash_paras(paras: &[(String, String)]) -> u64 {
let mut h = std::collections::hash_map::DefaultHasher::new();
for (_, text) in paras {
text.hash(&mut h);
}
h.finish()
}
pub(crate) fn refresh_book(
store: &DialogueStore,
layout: &ProjectLayout,
h: &Hierarchy,
cfg: &Config,
book: &Node,
explicit_lang: Option<&str>,
now: &str,
) -> Result<Vec<DialogueFinding>> {
let dc = &cfg.dialogue;
let lang_override = explicit_lang.or(dc.language.as_deref());
let (lang, _note) = resolve_prose_language(lang_override, &cfg.language);
let convention = dialogue_convention(&lang);
let names = character_names(h);
let windows = AttributionWindows {
name: dc.attribution_window,
verb: 15,
beat: 30,
};
let lex = lexicon_for_with(&lang, &dc.extra_neutral_verbs, &dc.extra_said_bookisms);
let tuning = DialogueTuning {
beat_min_words: dc.beat_min_words,
talking_head_threshold: dc.talking_head_threshold,
unattributed_run_threshold: dc.unattributed_run_threshold,
said_bookism_threshold: dc.said_bookism_threshold,
};
let chapters: Vec<&Node> = h
.children_of(Some(book.id))
.into_iter()
.filter(|n| n.kind == NodeKind::Chapter)
.collect();
let mut findings = Vec::new();
let mut tag_counts: Vec<(u32, u32, u32)> = Vec::new();
let mut any_recomputed = false;
for (idx, ch) in chapters.iter().enumerate() {
let ord = (idx + 1) as u32;
let paras = chapter_paragraphs(layout, h, ch.id);
let hash = hash_paras(¶s);
if store.stored_chapter_hash(&book.slug, ord)? == Some(hash) {
if let Some(s) = store.chapter_stats(&book.slug, ord)? {
tag_counts.push((ord, s.neutral_tag_count, s.said_bookism_count));
}
continue;
}
store.clear_chapter(&book.slug, ord)?;
let (stats, mut chap_findings) = detect_chapter(
store, &book.slug, ord, ¶s, &names, &convention, lex, &lang, windows, tuning,
now, hash,
)?;
tag_counts.push((ord, stats.neutral_tag_count, stats.said_bookism_count));
findings.append(&mut chap_findings);
any_recomputed = true;
}
if any_recomputed {
super::fingerprint::rebuild_fingerprints(store, &book.slug, &lang, now)?;
}
let total_neutral: u32 = tag_counts.iter().map(|c| c.1).sum();
let total_bookism: u32 = tag_counts.iter().map(|c| c.2).sum();
let total_tags = total_neutral + total_bookism;
let baseline = if total_tags > 0 {
total_bookism as f32 / total_tags as f32
} else {
0.0
};
for (ord, neutral, bookism) in &tag_counts {
let chtags = neutral + bookism;
if chtags == 0 {
continue;
}
let density = *bookism as f32 / chtags as f32;
if density - baseline > tuning.said_bookism_threshold {
findings.push(DialogueFinding {
kind: DialogueFindingKind::SaidBookism,
chapter_ord: *ord,
para_id: None,
detail: format!(
"said-bookism density {density:.2} (book baseline {baseline:.2}, Δ +{:.2} ⚠)",
density - baseline
),
});
}
}
Ok(findings)
}
#[allow(clippy::too_many_arguments)]
fn detect_chapter(
store: &DialogueStore,
book_slug: &str,
ord: u32,
paras: &[(String, String)],
names: &[String],
convention: &super::DialogueConvention,
lex: &DialogueLexicon,
lang: &crate::prose::ProseLanguage,
windows: AttributionWindows,
tuning: DialogueTuning,
now: &str,
hash: u64,
) -> Result<(ChapterDialogueStats, Vec<DialogueFinding>)> {
let mut findings = Vec::new();
let (mut total_spans, mut zero, mut neutral, mut bookism) = (0u32, 0u32, 0u32, 0u32);
let (mut dialogue_words, mut total_words) = (0u32, 0u32);
let mut prev_named: Option<String> = None;
let mut established: Vec<String> = Vec::new(); let mut zero_run = 0u32;
let mut th_run = 0u32;
let mut th_first_para: Option<String> = None;
let mut th_sequences = 0u32;
for (para_id, text) in paras {
total_words += text.split_whitespace().count() as u32;
let mut spans = detect_spans(para_id, text, *convention, lang);
attribute_spans(&mut spans, text, names, prev_named.as_deref(), lex, lang, windows);
let mut para_attributed: Option<String> = None;
let mut zero_in_para = 0u32;
let mut span_words = 0u32;
for span in &spans {
total_spans += 1;
dialogue_words += span.word_count;
span_words += span.word_count;
match span.tag_verb_class {
Some(TagVerbClass::Neutral) => neutral += 1,
Some(TagVerbClass::SaidBookism) => bookism += 1,
None => {}
}
if !span.has_attribution_signal {
zero += 1;
zero_in_para += 1;
}
if span.attribution_conf != AttributionConfidence::None {
if let Some(n) = &span.attribution_name {
para_attributed = Some(n.clone());
}
}
store.upsert_span(book_slug, ord, span, now, hash)?;
}
if let Some(n) = ¶_attributed {
prev_named = Some(n.clone());
if !established.contains(n) {
established.push(n.clone());
if established.len() > 2 {
established.remove(0);
}
}
zero_run = 0;
}
if zero_in_para > 0 {
if para_attributed.is_none() {
zero_run += 1;
let in_established_run =
established.len() >= 2 && zero_run <= tuning.unattributed_run_threshold;
if !in_established_run {
findings.push(DialogueFinding {
kind: DialogueFindingKind::ZeroAttribution,
chapter_ord: ord,
para_id: Some(para_id.clone()),
detail: "unattributed speech — no tag or character name within range"
.into(),
});
}
} else {
findings.push(DialogueFinding {
kind: DialogueFindingKind::ZeroAttribution,
chapter_ord: ord,
para_id: Some(para_id.clone()),
detail: "unattributed speech in an otherwise-tagged paragraph".into(),
});
}
}
let has_dialogue = !spans.is_empty();
let non_span_words =
(text.split_whitespace().count() as u32).saturating_sub(span_words);
let dialogue_only = has_dialogue && non_span_words < tuning.beat_min_words;
if dialogue_only {
th_run += 1;
if th_first_para.is_none() {
th_first_para = Some(para_id.clone());
}
if th_run == tuning.talking_head_threshold {
th_sequences += 1;
findings.push(DialogueFinding {
kind: DialogueFindingKind::TalkingHead,
chapter_ord: ord,
para_id: th_first_para.clone(),
detail: format!(
"talking-head sequence: {}+ paragraphs with no action beat",
tuning.talking_head_threshold
),
});
}
} else {
th_run = 0;
th_first_para = None;
}
}
let chtags = neutral + bookism;
let density = if chtags > 0 { bookism as f32 / chtags as f32 } else { 0.0 };
let ratio = if total_words > 0 {
dialogue_words as f32 / total_words as f32
} else {
0.0
};
let stats = ChapterDialogueStats {
chapter_ord: ord,
total_spans,
zero_attribution_count: zero,
said_bookism_count: bookism,
neutral_tag_count: neutral,
said_bookism_density: density,
dialogue_word_count: dialogue_words,
total_word_count: total_words,
dialogue_density_ratio: ratio,
talking_head_sequences: th_sequences,
};
store.upsert_chapter_stats(book_slug, &stats, now, hash)?;
Ok((stats, findings))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::store::node::NodeKind;
fn mk(kind: NodeKind, slug: &str, parent: Option<uuid::Uuid>, file: Option<&str>) -> Node {
serde_json::from_value(serde_json::json!({
"id": uuid::Uuid::new_v4(), "kind": format!("{kind:?}").to_lowercase(),
"title": slug, "slug": slug, "path": [], "parent_id": parent,
"order": 0, "file": file, "modified_at": "2026-01-01T00:00:00Z",
}))
.expect("node")
}
fn project(
root: &std::path::Path,
bodies: &[&str],
chars: &[&str],
) -> (DialogueStore, ProjectLayout, Hierarchy, Node) {
let layout = ProjectLayout::new(root);
let book = mk(NodeKind::Book, "tale", None, None);
let chap = mk(NodeKind::Chapter, "ch1", Some(book.id), None);
let mut nodes = vec![book.clone(), chap.clone()];
std::fs::create_dir_all(root.join("books/tale/ch1")).unwrap();
for (i, body) in bodies.iter().enumerate() {
let rel = format!("books/tale/ch1/{:02}-p{i}.typ", i + 1);
std::fs::write(root.join(&rel), body).unwrap();
nodes.push(mk(NodeKind::Paragraph, &format!("p{i}"), Some(chap.id), Some(&rel)));
}
let cbook: Node = serde_json::from_value(serde_json::json!({
"id": uuid::Uuid::new_v4(), "kind": "book", "title": "Characters",
"slug": "characters", "path": [], "parent_id": null, "order": 90,
"file": null, "modified_at": "2026-01-01T00:00:00Z",
"system_tag": "characters",
})).unwrap();
for c in chars {
nodes.push(mk(NodeKind::Paragraph, c, Some(cbook.id), None));
}
nodes.push(cbook);
let h = Hierarchy::from_nodes_for_test(nodes);
let st = DialogueStore::open(root).unwrap();
(st, layout, h, book)
}
fn cfg_en() -> Config {
let mut c = Config::default();
c.language = "en".into();
c
}
#[test]
fn zero_attribution_finding_for_untagged_line() {
let dir = tempfile::tempdir().unwrap();
let (st, layout, h, book) =
project(dir.path(), &["\u{201C}Who goes there?\u{201D}"], &["Mara"]);
let f = refresh_book(&st, &layout, &h, &cfg_en(), &book, None, "now").unwrap();
assert!(
f.iter().any(|x| x.kind == DialogueFindingKind::ZeroAttribution),
"{f:?}"
);
let s = st.chapter_stats("tale", 1).unwrap().unwrap();
assert_eq!(s.total_spans, 1);
assert_eq!(s.zero_attribution_count, 1);
}
#[test]
fn named_tag_is_not_flagged_and_is_persisted_certain() {
let dir = tempfile::tempdir().unwrap();
let (st, layout, h, book) =
project(dir.path(), &["\u{201C}Hello,\u{201D} said Mara."], &["Mara"]);
let f = refresh_book(&st, &layout, &h, &cfg_en(), &book, None, "now").unwrap();
assert!(!f.iter().any(|x| x.kind == DialogueFindingKind::ZeroAttribution));
let certain = st.certain_spans("tale").unwrap();
assert_eq!(certain.len(), 1);
assert_eq!(certain[0].1.attribution_name.as_deref(), Some("Mara"));
}
#[test]
fn said_bookism_density_finding() {
let dir = tempfile::tempdir().unwrap();
let (st, layout, h, book) = project(
dir.path(),
&["\u{201C}No,\u{201D} Mara whispered. \u{201C}Stop,\u{201D} Mara hissed."],
&["Mara"],
);
refresh_book(&st, &layout, &h, &cfg_en(), &book, None, "now").unwrap();
let s = st.chapter_stats("tale", 1).unwrap().unwrap();
assert_eq!(s.said_bookism_count, 2);
assert!((s.said_bookism_density - 1.0).abs() < 1e-3);
}
#[test]
fn talking_head_sequence_finding() {
let dir = tempfile::tempdir().unwrap();
let bodies: Vec<&str> = vec![
"\u{201C}One.\u{201D}",
"\u{201C}Two.\u{201D}",
"\u{201C}Three.\u{201D}",
"\u{201C}Four.\u{201D}",
"\u{201C}Five.\u{201D}",
"\u{201C}Six.\u{201D}",
];
let (st, layout, h, book) = project(dir.path(), &bodies, &[]);
let f = refresh_book(&st, &layout, &h, &cfg_en(), &book, None, "now").unwrap();
assert!(
f.iter().any(|x| x.kind == DialogueFindingKind::TalkingHead),
"{f:?}"
);
let s = st.chapter_stats("tale", 1).unwrap().unwrap();
assert_eq!(s.talking_head_sequences, 1);
}
#[test]
fn action_beat_clears_talking_head() {
let dir = tempfile::tempdir().unwrap();
let bodies: Vec<&str> = vec![
"\u{201C}One.\u{201D}",
"\u{201C}Two.\u{201D}",
"She crossed the long cold room and opened the heavy door slowly.",
"\u{201C}Three.\u{201D}",
"\u{201C}Four.\u{201D}",
];
let (st, layout, h, book) = project(dir.path(), &bodies, &[]);
let f = refresh_book(&st, &layout, &h, &cfg_en(), &book, None, "now").unwrap();
assert!(!f.iter().any(|x| x.kind == DialogueFindingKind::TalkingHead));
}
#[test]
fn lazy_skip_on_unchanged_hash() {
let dir = tempfile::tempdir().unwrap();
let (st, layout, h, book) =
project(dir.path(), &["\u{201C}Hello,\u{201D} said Mara."], &["Mara"]);
let cfg = cfg_en();
refresh_book(&st, &layout, &h, &cfg, &book, None, "now").unwrap();
let f2 = refresh_book(&st, &layout, &h, &cfg, &book, None, "later").unwrap();
assert!(f2.is_empty(), "unchanged chapter should not re-emit: {f2:?}");
}
}