use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::config::parse_stemmer_language;
use rust_stemmers::Stemmer;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CharacterFact {
pub character: String,
pub attribute: String,
pub value: String,
pub chapter: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ContinuityBible {
#[serde(default)]
pub version: String,
#[serde(default)]
pub language: String,
pub facts: Vec<CharacterFact>,
#[serde(default)]
pub manuscript_fingerprint: u64,
}
impl ContinuityBible {
pub fn sidecar_path(project_root: &Path) -> PathBuf {
project_root.join(".inkhaven").join("continuity.json")
}
pub fn load(project_root: &Path) -> std::io::Result<Self> {
let path = Self::sidecar_path(project_root);
match std::fs::read_to_string(&path) {
Ok(s) => serde_json::from_str(&s).map_err(|e| {
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
}),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
Ok(Self::default())
}
Err(e) => Err(e),
}
}
pub fn save(&self, project_root: &Path) -> std::io::Result<()> {
let path = Self::sidecar_path(project_root);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let body = serde_json::to_vec_pretty(self).map_err(|e| {
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
})?;
crate::io_atomic::write(&path, &body)
}
pub fn characters(&self) -> Vec<String> {
let mut names: Vec<String> =
self.facts.iter().map(|f| f.character.clone()).collect();
names.sort();
names.dedup();
names
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Drift {
pub character: String,
pub attribute: String,
pub conflicts: Vec<(String, String)>,
}
fn normalise(value: &str, stemmer: &Option<Stemmer>) -> String {
value
.split_whitespace()
.map(|w| {
let trimmed = w.trim_matches(|c: char| !c.is_alphanumeric());
crate::text::normalize_stem(trimmed, stemmer)
})
.filter(|w| !w.is_empty())
.collect::<Vec<_>>()
.join(" ")
}
pub fn detect_drift(bible: &ContinuityBible, language: &str) -> Vec<Drift> {
let stemmer = parse_stemmer_language(language).map(Stemmer::create);
let mut groups: BTreeMap<(String, String), Vec<(String, String, String)>> =
BTreeMap::new();
for f in &bible.facts {
let key = (
f.character.trim().to_lowercase(),
f.attribute.trim().to_lowercase(),
);
let norm = normalise(&f.value, &stemmer);
groups.entry(key).or_default().push((
f.chapter.clone(),
f.value.clone(),
norm,
));
}
let mut drifts = Vec::new();
for ((_char_lc, _attr_lc), rows) in groups {
let distinct: std::collections::HashSet<&String> =
rows.iter().map(|(_, _, n)| n).collect();
if distinct.len() < 2 {
continue;
}
let mut seen: std::collections::HashSet<String> =
std::collections::HashSet::new();
let mut conflicts: Vec<(String, String)> = Vec::new();
for (chapter, value, norm) in &rows {
if seen.insert(norm.clone()) {
conflicts.push((chapter.clone(), value.clone()));
}
}
let (character, attribute) = bible
.facts
.iter()
.find(|f| {
f.character.trim().to_lowercase() == _char_lc
&& f.attribute.trim().to_lowercase() == _attr_lc
})
.map(|f| (f.character.clone(), f.attribute.clone()))
.unwrap_or((_char_lc.clone(), _attr_lc.clone()));
drifts.push(Drift {
character,
attribute,
conflicts,
});
}
drifts.sort_by(|a, b| {
a.character
.cmp(&b.character)
.then_with(|| a.attribute.cmp(&b.attribute))
});
drifts
}
pub fn parse_extraction(raw: &str, chapter: &str) -> Vec<CharacterFact> {
let mut out = Vec::new();
for line in raw.lines() {
let line = line.trim().trim_start_matches(['-', '*', '•']).trim();
if line.is_empty() {
continue;
}
let parts: Vec<&str> = line.splitn(3, '|').map(str::trim).collect();
if parts.len() != 3 {
continue;
}
let (character, attribute, value) = (parts[0], parts[1], parts[2]);
if character.is_empty() || attribute.is_empty() || value.is_empty() {
continue;
}
if character.eq_ignore_ascii_case("character")
&& attribute.eq_ignore_ascii_case("attribute")
{
continue;
}
out.push(CharacterFact {
character: character.to_string(),
attribute: attribute.to_string().to_lowercase().replace(' ', "_"),
value: value.to_string(),
chapter: chapter.to_string(),
});
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn fact(c: &str, a: &str, v: &str, ch: &str) -> CharacterFact {
CharacterFact {
character: c.into(),
attribute: a.into(),
value: v.into(),
chapter: ch.into(),
}
}
#[test]
fn flags_changed_attribute_across_chapters() {
let bible = ContinuityBible {
facts: vec![
fact("Helena", "eye_color", "green", "Ch1"),
fact("Helena", "eye_color", "brown", "Ch9"),
],
..Default::default()
};
let d = detect_drift(&bible, "english");
assert_eq!(d.len(), 1);
assert_eq!(d[0].character, "Helena");
assert_eq!(d[0].attribute, "eye_color");
assert_eq!(d[0].conflicts.len(), 2);
}
#[test]
fn consistent_attribute_not_flagged() {
let bible = ContinuityBible {
facts: vec![
fact("Helena", "eye_color", "green", "Ch1"),
fact("Helena", "eye_color", "green", "Ch9"),
],
..Default::default()
};
assert!(detect_drift(&bible, "english").is_empty());
}
#[test]
fn inflected_value_not_flagged_via_stemmer() {
let bible = ContinuityBible {
facts: vec![
fact("Helena", "eyes", "green eyes", "Ch1"),
fact("Helena", "eyes", "green eye", "Ch9"),
],
..Default::default()
};
assert!(
detect_drift(&bible, "english").is_empty(),
"stemmer should collapse eyes/eye",
);
}
#[test]
fn russian_inflected_value_not_flagged() {
let bible = ContinuityBible {
facts: vec![
fact("Елена", "глаза", "зелёные", "Гл1"),
fact("Елена", "глаза", "зелёными", "Гл9"),
],
..Default::default()
};
assert!(
detect_drift(&bible, "russian").is_empty(),
"Russian stemmer + ё-fold should collapse the inflections",
);
}
#[test]
fn russian_genuine_change_flagged() {
let bible = ContinuityBible {
facts: vec![
fact("Елена", "глаза", "зелёные", "Гл1"),
fact("Елена", "глаза", "карие", "Гл9"),
],
..Default::default()
};
let d = detect_drift(&bible, "russian");
assert_eq!(d.len(), 1);
}
#[test]
fn different_attributes_isolated() {
let bible = ContinuityBible {
facts: vec![
fact("Helena", "eye_color", "green", "Ch1"),
fact("Helena", "eye_color", "brown", "Ch9"),
fact("Helena", "hometown", "Harbor", "Ch1"),
fact("Helena", "hometown", "Harbor", "Ch9"),
],
..Default::default()
};
let d = detect_drift(&bible, "english");
assert_eq!(d.len(), 1);
assert_eq!(d[0].attribute, "eye_color");
}
#[test]
fn different_characters_isolated() {
let bible = ContinuityBible {
facts: vec![
fact("Helena", "eye_color", "green", "Ch1"),
fact("Marcus", "eye_color", "brown", "Ch1"),
],
..Default::default()
};
assert!(detect_drift(&bible, "english").is_empty());
}
#[test]
fn case_insensitive_grouping() {
let bible = ContinuityBible {
facts: vec![
fact("helena", "Eye_Color", "green", "Ch1"),
fact("Helena", "eye_color", "brown", "Ch9"),
],
..Default::default()
};
assert_eq!(detect_drift(&bible, "english").len(), 1);
}
#[test]
fn sidecar_round_trips() {
let tmp = tempfile::tempdir().unwrap();
let bible = ContinuityBible {
version: "1.2.19".into(),
language: "english".into(),
facts: vec![fact("Helena", "eye_color", "green", "Ch1")],
manuscript_fingerprint: 0,
};
bible.save(tmp.path()).unwrap();
let loaded = ContinuityBible::load(tmp.path()).unwrap();
assert_eq!(loaded.facts, bible.facts);
assert_eq!(loaded.language, "english");
}
#[test]
fn load_missing_sidecar_is_empty() {
let tmp = tempfile::tempdir().unwrap();
let b = ContinuityBible::load(tmp.path()).unwrap();
assert!(b.facts.is_empty());
}
#[test]
fn characters_sorted_and_deduped() {
let bible = ContinuityBible {
facts: vec![
fact("Marcus", "a", "x", "Ch1"),
fact("Helena", "a", "x", "Ch1"),
fact("Helena", "b", "y", "Ch2"),
],
..Default::default()
};
assert_eq!(bible.characters(), vec!["Helena", "Marcus"]);
}
#[test]
fn parses_pipe_delimited_facts() {
let raw = "Helena | eye color | green\n\
Helena | hometown | the Harbor\n\
Marcus | occupation | ledger-keeper";
let f = parse_extraction(raw, "Chapter 1");
assert_eq!(f.len(), 3);
assert_eq!(f[0].character, "Helena");
assert_eq!(f[0].attribute, "eye_color");
assert_eq!(f[0].value, "green");
assert_eq!(f[0].chapter, "Chapter 1");
}
#[test]
fn extraction_skips_malformed_and_preamble() {
let raw = "Here are the facts I found:\n\
\n\
- Helena | eye color | green\n\
this line has no pipes\n\
character | attribute | value\n\
Marcus | mood |\n\
* Marcus | weapon | a lacquered box";
let f = parse_extraction(raw, "Ch1");
assert_eq!(f.len(), 2);
assert_eq!(f[0].character, "Helena");
assert_eq!(f[1].character, "Marcus");
assert_eq!(f[1].attribute, "weapon");
}
#[test]
fn extraction_strips_list_markers() {
let raw = "• Helena | hair | dark";
let f = parse_extraction(raw, "Ch1");
assert_eq!(f.len(), 1);
assert_eq!(f[0].character, "Helena");
assert_eq!(f[0].value, "dark");
}
}