use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EntityKind {
Character,
Place,
Artefact,
}
impl EntityKind {
pub fn label(self) -> &'static str {
match self {
Self::Character => "character",
Self::Place => "place",
Self::Artefact => "artefact",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DescriptionSnippet {
pub chapter: String,
pub paragraph: Uuid,
pub text: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntityDescriptions {
pub entity: String,
pub kind: EntityKind,
pub snippets: Vec<DescriptionSnippet>,
}
#[derive(Debug, Clone)]
pub struct Candidate {
pub paragraph: Uuid,
pub chapter_order: usize,
pub chapter_title: String,
pub text: String,
}
pub fn assemble_descriptions(
entity: &str,
candidates: &[Candidate],
max_snippets: usize,
) -> Vec<DescriptionSnippet> {
let needle = entity.trim().to_lowercase();
if needle.is_empty() || max_snippets == 0 {
return Vec::new();
}
let mut seen = std::collections::HashSet::new();
let mut kept: Vec<&Candidate> = Vec::new();
for c in candidates {
if kept.len() >= max_snippets {
break;
}
if !c.text.to_lowercase().contains(&needle) {
continue;
}
if !seen.insert(c.paragraph) {
continue;
}
kept.push(c);
}
kept.sort_by_key(|c| c.chapter_order);
kept.into_iter()
.map(|c| DescriptionSnippet {
chapter: c.chapter_title.clone(),
paragraph: c.paragraph,
text: c.text.clone(),
})
.collect()
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DriftConflict {
pub entity: String,
pub kind: EntityKind,
pub a: String,
pub b: String,
pub chapter_a: String,
pub chapter_b: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub paragraph_b: Option<Uuid>,
pub detail: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DriftReport {
#[serde(default)]
pub version: String,
#[serde(default)]
pub content_hash: u64,
pub conflicts: Vec<DriftConflict>,
#[serde(default)]
pub descriptions: Vec<EntityDescriptions>,
}
impl DriftReport {
pub fn sidecar_path(project_root: &Path) -> PathBuf {
project_root.join(".inkhaven").join("drift.json")
}
pub fn load(project_root: &Path) -> std::io::Result<Self> {
let path = Self::sidecar_path(project_root);
match std::fs::read_to_string(&path) {
Ok(s) => serde_json::from_str(&s)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e)),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Self::default()),
Err(e) => Err(e),
}
}
pub fn save(&self, project_root: &Path) -> std::io::Result<()> {
let path = Self::sidecar_path(project_root);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let body = serde_json::to_vec_pretty(self)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
crate::io_atomic::write(&path, &body)
}
pub fn compute_hash(descs: &[EntityDescriptions]) -> u64 {
use std::hash::{Hash, Hasher};
let mut rows: Vec<String> = Vec::new();
for d in descs {
for s in &d.snippets {
rows.push(format!("{}\u{1}{}\u{1}{}", d.entity, s.chapter, s.text));
}
}
rows.sort();
let mut h = std::collections::hash_map::DefaultHasher::new();
for r in rows {
r.hash(&mut h);
}
h.finish()
}
}
pub fn parse_drift_pairs(raw: &str, n: usize) -> Vec<(usize, usize, String)> {
let mut out = Vec::new();
for line in raw.lines() {
let line = line.trim().trim_start_matches(['-', '*', '•', ' ']).trim();
if line.is_empty() || !line.contains('|') {
continue;
}
let parts: Vec<&str> = line.splitn(3, '|').map(str::trim).collect();
if parts.len() != 3 || parts[2].is_empty() {
continue;
}
let idx = |s: &str| -> Option<usize> {
s.trim_matches(|c: char| !c.is_ascii_digit())
.parse::<usize>()
.ok()
};
let (Some(i), Some(j)) = (idx(parts[0]), idx(parts[1])) else {
continue;
};
if i == 0 || j == 0 || i > n || j > n || i == j {
continue;
}
out.push((i - 1, j - 1, parts[2].to_string()));
}
out
}
pub fn resolve_conflicts(
entity: &str,
kind: EntityKind,
snippets: &[DescriptionSnippet],
pairs: &[(usize, usize, String)],
) -> Vec<DriftConflict> {
let mut out = Vec::new();
for (i, j, why) in pairs {
let (Some(si), Some(sj)) = (snippets.get(*i), snippets.get(*j)) else {
continue;
};
let (earlier, later) = if i <= j { (si, sj) } else { (sj, si) };
out.push(DriftConflict {
entity: entity.to_string(),
kind,
a: quote(&earlier.text),
b: quote(&later.text),
chapter_a: earlier.chapter.clone(),
chapter_b: later.chapter.clone(),
paragraph_b: Some(later.paragraph),
detail: why.trim().to_string(),
});
}
out
}
fn quote(text: &str) -> String {
let one_line = text.split_whitespace().collect::<Vec<_>>().join(" ");
let capped: String = one_line.chars().take(120).collect();
if one_line.chars().count() > 120 {
format!("{}…", capped.trim_end())
} else {
capped
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cand(order: usize, chapter: &str, text: &str) -> Candidate {
Candidate {
paragraph: Uuid::now_v7(),
chapter_order: order,
chapter_title: chapter.into(),
text: text.into(),
}
}
#[test]
fn keeps_only_paragraphs_that_mention_the_entity() {
let cands = vec![
cand(2, "ch-2", "The Drunken Goose was cramped and smoky."),
cand(5, "ch-5", "The inn down the road smelled of woodsmoke."), cand(8, "ch-8", "By winter the Drunken Goose felt airy and bright."),
];
let out = assemble_descriptions("The Drunken Goose", &cands, 8);
assert_eq!(out.len(), 2, "the un-named inn paragraph is filtered out");
assert!(out[0].text.contains("cramped"));
assert!(out[1].text.contains("airy"));
}
#[test]
fn dedups_and_orders_by_chapter_then_caps_by_relevance() {
let p = Uuid::now_v7();
let dup_a = Candidate { paragraph: p, chapter_order: 9, chapter_title: "ch-9".into(), text: "Mara spoke softly.".into() };
let dup_b = Candidate { paragraph: p, chapter_order: 9, chapter_title: "ch-9".into(), text: "Mara spoke softly.".into() };
let cands = vec![
dup_a,
dup_b,
cand(1, "ch-1", "Mara, soft-spoken as ever."),
cand(4, "ch-4", "Mara's voice boomed across the hall."),
];
let out = assemble_descriptions("Mara", &cands, 2);
assert_eq!(out.len(), 2, "dup collapses, cap=2 honoured");
assert_eq!(out[0].chapter, "ch-1", "presented in chapter order");
assert_eq!(out[1].chapter, "ch-9");
}
#[test]
fn empty_entity_or_zero_cap_returns_nothing() {
let cands = vec![cand(1, "ch-1", "anything")];
assert!(assemble_descriptions("", &cands, 8).is_empty());
assert!(assemble_descriptions("x", &cands, 0).is_empty());
}
#[test]
fn parse_drift_pairs_reads_indices_and_skips_noise() {
let raw = "\
i | j | why\n\
- [1] | [2] | cramped vs airy\n\
2 | 4 | soft vs booming\n\
3 | 3 | self-reference (dropped)\n\
9 | 1 | out of range (dropped)\n\
none\n\
gibberish without a pipe\n";
let pairs = parse_drift_pairs(raw, 4);
assert_eq!(pairs, vec![(0, 1, "cramped vs airy".into()), (1, 3, "soft vs booming".into())]);
}
#[test]
fn resolve_conflicts_orders_earlier_first_and_sets_jump() {
let s1 = DescriptionSnippet { chapter: "ch-2".into(), paragraph: Uuid::now_v7(), text: "cramped and smoky".into() };
let s2 = DescriptionSnippet { chapter: "ch-20".into(), paragraph: Uuid::now_v7(), text: "airy and bright".into() };
let snippets = vec![s1.clone(), s2.clone()];
let pairs = vec![(1, 0, "atmosphere flipped".to_string())];
let out = resolve_conflicts("The Drunken Goose", EntityKind::Place, &snippets, &pairs);
assert_eq!(out.len(), 1);
let c = &out[0];
assert_eq!(c.chapter_a, "ch-2", "earlier chapter is a");
assert_eq!(c.chapter_b, "ch-20");
assert_eq!(c.paragraph_b, Some(s2.paragraph), "jump targets the later, divergent passage");
assert_eq!(c.kind, EntityKind::Place);
assert!(c.a.contains("cramped") && c.b.contains("airy"));
}
#[test]
fn report_hash_is_order_independent_and_round_trips() {
let mk = |ch: &str, t: &str| DescriptionSnippet { chapter: ch.into(), paragraph: Uuid::now_v7(), text: t.into() };
let a = EntityDescriptions { entity: "Mara".into(), kind: EntityKind::Character, snippets: vec![mk("ch-1", "soft"), mk("ch-4", "loud")] };
let b = EntityDescriptions { entity: "Goose".into(), kind: EntityKind::Place, snippets: vec![mk("ch-2", "smoky")] };
let h1 = DriftReport::compute_hash(&[a.clone(), b.clone()]);
let h2 = DriftReport::compute_hash(&[b, a]);
assert_eq!(h1, h2, "hash ignores entity/snippet order");
let dir = tempfile::tempdir().unwrap();
let report = DriftReport {
version: "x".into(),
content_hash: h1,
conflicts: vec![DriftConflict {
entity: "Mara".into(),
kind: EntityKind::Character,
a: "soft".into(),
b: "loud".into(),
chapter_a: "ch-1".into(),
chapter_b: "ch-4".into(),
paragraph_b: Some(Uuid::now_v7()),
detail: "voice flipped".into(),
}],
descriptions: Vec::new(),
};
report.save(dir.path()).unwrap();
let loaded = DriftReport::load(dir.path()).unwrap();
assert_eq!(loaded.conflicts, report.conflicts);
assert_eq!(loaded.content_hash, h1);
}
}