use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use anyhow::Result;
use crate::config::Config;
use crate::project::ProjectLayout;
use crate::store::NodeKind;
use crate::store::hierarchy::Hierarchy;
use crate::store::node::Node;
use super::store::MythStore;
use super::{read_archetypes, read_motifs, read_symbols};
fn chapters_of<'a>(h: &'a Hierarchy, book: &Node) -> Vec<&'a Node> {
h.children_of(Some(book.id))
.into_iter()
.filter(|n| n.kind == NodeKind::Chapter)
.collect()
}
fn chapter_prose<'a>(layout: &ProjectLayout, h: &'a Hierarchy, chapter_id: uuid::Uuid) -> Vec<(&'a Node, String)> {
let mut out = Vec::new();
for id in h.collect_subtree(chapter_id) {
let Some(p) = h.get(id) else { continue };
if p.kind != NodeKind::Paragraph || p.content_type.as_deref() == Some("jinja") {
continue;
}
if let Some(rel) = p.file.as_ref() {
if let Ok(raw) = std::fs::read_to_string(layout.root.join(rel)) {
out.push((p, crate::audiobook::typst_to_plain(&raw)));
}
}
}
out
}
fn hash_str(s: &str) -> u64 {
let mut h = std::collections::hash_map::DefaultHasher::new();
s.hash(&mut h);
h.finish()
}
fn count_token(text_lc: &str, token: &str) -> u32 {
let t = token.trim().to_lowercase();
if t.is_empty() {
return 0;
}
if t.contains(' ') {
let mut n = 0u32;
let mut from = 0usize;
while let Some(pos) = text_lc[from..].find(&t) {
n += 1;
from += pos + t.len();
if from >= text_lc.len() {
break;
}
}
n
} else {
text_lc.split(|c: char| !c.is_alphanumeric()).filter(|w| *w == t).count() as u32
}
}
pub(crate) fn refresh_inventory(
store: &MythStore,
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
) -> Result<()> {
let symbols = read_symbols(h, layout);
let motifs = read_motifs(h, layout);
let archetypes = read_archetypes(h, layout);
store.replace_inventory(&book.slug, &symbols, &motifs, &archetypes)
}
pub(crate) fn run_density_scan(
store: &MythStore,
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
force: bool,
) -> Result<usize> {
let symbols = store.symbols(&book.slug)?;
if symbols.is_empty() {
return Ok(0);
}
let now = chrono::Utc::now().to_rfc3339();
let mut count = 0;
for (idx, ch) in chapters_of(h, book).iter().enumerate() {
let ord = (idx + 1) as u32;
let text: String = chapter_prose(layout, h, ch.id)
.iter()
.map(|(_, t)| t.as_str())
.collect::<Vec<_>>()
.join("\n");
let lc = text.to_lowercase();
let ph = hash_str(&lc);
if !force && chapter_unchanged(store, &book.slug, ord, ph) {
continue;
}
store.clear_density_chapter(&book.slug, ord)?;
for s in &symbols {
let total: u32 = s.vocabulary.iter().map(|v| count_token(&lc, v)).sum();
store.upsert_density(&book.slug, &s.para_id, ord, total, ph, &now)?;
count += 1;
}
}
Ok(count)
}
fn chapter_unchanged(store: &MythStore, book_slug: &str, chapter_ord: u32, prose_hash: u64) -> bool {
store
.density_chapter_hash(book_slug, chapter_ord)
.ok()
.flatten()
.map(|h| h == prose_hash)
.unwrap_or(false)
}
pub(crate) fn collect_explicit_motifs(
store: &MythStore,
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
) -> Result<usize> {
let motifs = store.motifs(&book.slug)?;
if motifs.is_empty() {
return Ok(0);
}
let by_name: HashMap<String, String> =
motifs.iter().map(|m| (m.name.trim().to_lowercase(), m.para_id.clone())).collect();
store.clear_motif_occurrences(&book.slug, "explicit_tag")?;
let now = chrono::Utc::now().to_rfc3339();
let _ = layout;
let mut count = 0;
for (idx, ch) in chapters_of(h, book).iter().enumerate() {
let ord = (idx + 1) as u32;
for id in h.collect_subtree(ch.id) {
let Some(p) = h.get(id) else { continue };
if p.kind != NodeKind::Paragraph || !p.tags.iter().any(|t| t == "para:myth-motif") {
continue;
}
if let Some(mp) = p.tags.iter().find_map(|t| by_name.get(&t.trim().to_lowercase())) {
store.upsert_motif_occurrence(&book.slug, mp, ord, &id.to_string(), "explicit_tag", &now)?;
count += 1;
}
}
}
Ok(count)
}
pub(crate) fn run_full_scan(
store: &MythStore,
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
buckets: usize,
final_act_pct: u32,
force: bool,
) -> Result<(usize, String)> {
refresh_inventory(store, layout, h, book)?;
run_density_scan(store, layout, h, book, force)?;
collect_explicit_motifs(store, layout, h, book)?;
let findings = super::checks::run_deterministic_checks(store, layout, h, book, final_act_pct)?;
let total = chapter_count(h, book);
let presence: Vec<(String, Vec<u32>)> = store
.archetypes(&book.slug)?
.iter()
.map(|a| {
let name = a.character_name.trim();
let label = if name.is_empty() {
a.role.as_code().to_string()
} else {
format!("{} ({name})", a.role.as_code())
};
let chapters =
if name.is_empty() { Vec::new() } else { character_mention_chapters(layout, h, book, name) };
(label, chapters)
})
.collect();
let heatmap = super::heatmap::build_heatmap(store, &book.slug, &book.title, total, buckets, &presence)?;
Ok((findings.len(), heatmap))
}
pub(super) fn chapter_count(h: &Hierarchy, book: &Node) -> u32 {
chapters_of(h, book).len() as u32
}
pub(super) fn character_mention_chapters(
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
name: &str,
) -> Vec<u32> {
let nm = name.trim().to_lowercase();
if nm.is_empty() {
return Vec::new();
}
let mut out = Vec::new();
for (idx, ch) in chapters_of(h, book).iter().enumerate() {
let ord = (idx + 1) as u32;
let lc: String = chapter_prose(layout, h, ch.id)
.iter()
.map(|(_, t)| t.to_lowercase())
.collect::<Vec<_>>()
.join("\n");
let hit = if nm.contains(' ') {
lc.contains(&nm)
} else {
lc.split(|c: char| !c.is_alphanumeric()).any(|w| w == nm)
};
if hit {
out.push(ord);
}
}
out
}
pub(super) fn excerpts_containing(
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
lang: &crate::prose::ProseLanguage,
needles: &[String],
max: usize,
) -> Vec<(u32, String)> {
let lc_needles: Vec<String> = needles.iter().map(|n| n.trim().to_lowercase()).filter(|n| !n.is_empty()).collect();
if lc_needles.is_empty() || max == 0 {
return Vec::new();
}
let mut out = Vec::new();
for (idx, ch) in chapters_of(h, book).iter().enumerate() {
let ord = (idx + 1) as u32;
for (_, text) in chapter_prose(layout, h, ch.id) {
for sent in crate::prose::segment::split_sentences(&text, lang) {
let s_lc = sent.to_lowercase();
let hit = lc_needles.iter().any(|n| {
if n.contains(' ') {
s_lc.contains(n.as_str())
} else {
s_lc.split(|c: char| !c.is_alphanumeric()).any(|w| w == n)
}
});
if hit {
out.push((ord, sent.trim().to_string()));
if out.len() >= max {
return out;
}
}
}
}
}
out
}
pub(super) fn motif_occurrence_excerpts(
store: &MythStore,
layout: &ProjectLayout,
h: &Hierarchy,
book: &Node,
motif_para_id: &str,
max: usize,
) -> Vec<(u32, String)> {
let mut out = Vec::new();
let occ_ids: std::collections::HashSet<String> = store
.motif_occurrence_para_ids(&book.slug, motif_para_id)
.unwrap_or_default()
.into_iter()
.collect();
if occ_ids.is_empty() {
return out;
}
for (idx, ch) in chapters_of(h, book).iter().enumerate() {
let ord = (idx + 1) as u32;
for (node, text) in chapter_prose(layout, h, ch.id) {
if occ_ids.contains(&node.id.to_string()) {
out.push((ord, text));
if out.len() >= max {
return out;
}
}
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn count_token_whole_word_and_phrase() {
let lc = "the raven watched. ravens gathered. a white rose, a white rose bloomed.".to_lowercase();
assert_eq!(count_token(&lc, "raven"), 1); assert_eq!(count_token(&lc, "ravens"), 1);
assert_eq!(count_token(&lc, "white rose"), 2);
assert_eq!(count_token(&lc, "crow"), 0);
}
}