use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::config::Config;
use crate::error::{Error, Result};
use crate::project::ProjectLayout;
use crate::store::Store;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum ScanClass {
ZeroByteFile,
OrphanParagraphRow,
MissingReferencedFile,
CorruptCommentsSidecar,
BdslibOnly,
DroppedCharacter,
PacingCollapse,
StalledThread,
NamingInconsistency,
}
impl ScanClass {
pub fn slug(&self) -> &'static str {
match self {
ScanClass::ZeroByteFile => "zero-byte-file",
ScanClass::OrphanParagraphRow => "orphan-paragraph-row",
ScanClass::MissingReferencedFile => "missing-referenced-file",
ScanClass::CorruptCommentsSidecar => "corrupt-comments-sidecar",
ScanClass::BdslibOnly => "bdslib-only",
ScanClass::DroppedCharacter => "dropped-character",
ScanClass::PacingCollapse => "pacing-collapse",
ScanClass::StalledThread => "stalled-thread",
ScanClass::NamingInconsistency => "naming-inconsistency",
}
}
pub fn from_slug(s: &str) -> Option<Self> {
Some(match s {
"zero-byte-file" => ScanClass::ZeroByteFile,
"orphan-paragraph-row" => ScanClass::OrphanParagraphRow,
"missing-referenced-file" => ScanClass::MissingReferencedFile,
"corrupt-comments-sidecar" => ScanClass::CorruptCommentsSidecar,
"bdslib-only" => ScanClass::BdslibOnly,
"dropped-character" => ScanClass::DroppedCharacter,
"pacing-collapse" => ScanClass::PacingCollapse,
"stalled-thread" => ScanClass::StalledThread,
"naming-inconsistency" => ScanClass::NamingInconsistency,
_ => return None,
})
}
pub const ALL: [ScanClass; 9] = [
ScanClass::ZeroByteFile,
ScanClass::OrphanParagraphRow,
ScanClass::MissingReferencedFile,
ScanClass::CorruptCommentsSidecar,
ScanClass::BdslibOnly,
ScanClass::DroppedCharacter,
ScanClass::PacingCollapse,
ScanClass::StalledThread,
ScanClass::NamingInconsistency,
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ScanSeverity {
Critical,
Warning,
Info,
}
impl ScanSeverity {
pub fn slug(&self) -> &'static str {
match self {
ScanSeverity::Critical => "critical",
ScanSeverity::Warning => "warning",
ScanSeverity::Info => "info",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanFinding {
pub class: ScanClass,
pub severity: ScanSeverity,
pub path: Option<String>,
pub detail: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScanReport {
pub version: String,
pub generated_at: String,
pub project_root: String,
pub findings: Vec<ScanFinding>,
}
impl ScanReport {
pub fn new(project_root: &Path) -> Self {
Self {
version: env!("CARGO_PKG_VERSION").to_string(),
generated_at: chrono::Utc::now()
.format("%Y-%m-%dT%H:%M:%SZ")
.to_string(),
project_root: project_root.display().to_string(),
findings: Vec::new(),
}
}
pub fn count_at_or_above(&self, severity: ScanSeverity) -> usize {
self.findings
.iter()
.filter(|f| severity_at_or_above(f.severity, severity))
.count()
}
}
fn severity_at_or_above(have: ScanSeverity, want: ScanSeverity) -> bool {
let rank = |s| match s {
ScanSeverity::Info => 1,
ScanSeverity::Warning => 2,
ScanSeverity::Critical => 3,
};
rank(have) >= rank(want)
}
pub fn scan_project(
project: &Path,
selected: Option<ScanClass>,
) -> Result<ScanReport> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load(&layout.config_path())?;
let store = Store::open(layout.clone(), &cfg).map_err(|e| Error::Store(e.to_string()))?;
let hierarchy =
crate::store::hierarchy::Hierarchy::load(&store).map_err(|e| Error::Store(e.to_string()))?;
let mut report = ScanReport::new(&layout.root);
let run = |c: ScanClass| selected.map_or(true, |s| s == c);
if run(ScanClass::ZeroByteFile) || run(ScanClass::BdslibOnly) {
for finding in scan_zero_byte_files(&layout, &hierarchy, &store) {
if run(finding.class) {
report.findings.push(finding);
}
}
}
if run(ScanClass::OrphanParagraphRow)
|| run(ScanClass::MissingReferencedFile)
|| run(ScanClass::BdslibOnly)
{
for finding in scan_orphans_and_missing(&layout, &hierarchy, &store) {
if run(finding.class) {
report.findings.push(finding);
}
}
}
if run(ScanClass::CorruptCommentsSidecar) {
report.findings.extend(scan_corrupt_comments(&layout, &hierarchy));
}
if run(ScanClass::DroppedCharacter) {
report.findings.extend(scan_dropped_characters(&layout, &hierarchy));
}
if run(ScanClass::PacingCollapse) {
report.findings.extend(scan_pacing_collapse(&layout, &hierarchy));
}
if run(ScanClass::StalledThread) {
report.findings.extend(scan_stalled_threads(&layout, &hierarchy));
}
if run(ScanClass::NamingInconsistency) {
report.findings.extend(scan_naming_inconsistencies(&layout, &hierarchy));
}
Ok(report)
}
fn bdslib_content_len(store: &Store, id: uuid::Uuid) -> Option<usize> {
match store.get_content(id) {
Ok(Some(bytes)) if !bytes.is_empty() => Some(bytes.len()),
_ => None,
}
}
fn scan_zero_byte_files(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
store: &Store,
) -> Vec<ScanFinding> {
let mut out: Vec<ScanFinding> = Vec::new();
for node in hierarchy.iter() {
let Some(rel) = node.file.as_ref() else { continue };
if !rel.ends_with(".typ") {
continue;
}
let abs = layout.root.join(rel);
let Ok(md) = std::fs::metadata(&abs) else { continue };
if md.len() == 0 {
match bdslib_content_len(store, node.id) {
Some(n) => out.push(ScanFinding {
class: ScanClass::BdslibOnly,
severity: ScanSeverity::Info,
path: Some(abs.display().to_string()),
detail: format!(
"paragraph `{}` has 0-byte disk file but bdslib holds {} bytes — re-save in the editor or autofix to rematerialize",
node.slug, n,
),
}),
None => out.push(ScanFinding {
class: ScanClass::ZeroByteFile,
severity: ScanSeverity::Critical,
path: Some(abs.display().to_string()),
detail: format!(
"paragraph `{}` resolves to a 0-byte file AND bdslib has no content — prose lost",
node.slug,
),
}),
}
}
}
out
}
fn scan_orphans_and_missing(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
store: &Store,
) -> Vec<ScanFinding> {
let mut out: Vec<ScanFinding> = Vec::new();
for node in hierarchy.iter() {
let Some(rel) = node.file.as_ref() else { continue };
let abs = layout.root.join(rel);
match std::fs::metadata(&abs) {
Ok(_) => continue,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
if let Some(n) = bdslib_content_len(store, node.id) {
out.push(ScanFinding {
class: ScanClass::BdslibOnly,
severity: ScanSeverity::Info,
path: Some(abs.display().to_string()),
detail: format!(
"paragraph `{}` has no disk file but bdslib holds {} bytes — recoverable",
node.slug, n,
),
});
continue;
}
let class = if rel.contains("..") || rel.is_empty() {
ScanClass::MissingReferencedFile
} else {
ScanClass::OrphanParagraphRow
};
out.push(ScanFinding {
class,
severity: ScanSeverity::Warning,
path: Some(abs.display().to_string()),
detail: format!(
"paragraph row `{}` points at missing file {} and bdslib has no content either",
node.slug,
abs.display(),
),
});
}
Err(e) => {
out.push(ScanFinding {
class: ScanClass::MissingReferencedFile,
severity: ScanSeverity::Warning,
path: Some(abs.display().to_string()),
detail: format!(
"paragraph row `{}` -> {}: {e}",
node.slug,
abs.display(),
),
});
}
}
}
out
}
fn scan_corrupt_comments(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
) -> Vec<ScanFinding> {
let mut out: Vec<ScanFinding> = Vec::new();
for node in hierarchy.iter() {
let Some(rel) = node.file.as_ref() else { continue };
if !rel.ends_with(".typ") {
continue;
}
let abs = layout.root.join(rel);
let sidecar = sidecar_path_for(&abs);
if !sidecar.exists() {
continue;
}
let Ok(raw) = std::fs::read_to_string(&sidecar) else {
continue;
};
if raw.trim().is_empty() {
continue;
}
if serde_json::from_str::<serde_json::Value>(&raw).is_err() {
out.push(ScanFinding {
class: ScanClass::CorruptCommentsSidecar,
severity: ScanSeverity::Warning,
path: Some(sidecar.display().to_string()),
detail: format!(
"comments sidecar for `{}` doesn't parse as JSON",
node.slug,
),
});
}
}
out
}
fn sidecar_path_for(typ_path: &Path) -> PathBuf {
let mut s = typ_path.as_os_str().to_os_string();
s.push(".comments.json");
PathBuf::from(s)
}
pub fn apply_fix(
project: &Path,
finding: &ScanFinding,
) -> Result<String> {
let layout = ProjectLayout::new(project);
layout.require_initialized()?;
let cfg = Config::load(&layout.config_path())?;
let store = Store::open(layout.clone(), &cfg).map_err(|e| Error::Store(e.to_string()))?;
let hierarchy =
crate::store::hierarchy::Hierarchy::load(&store).map_err(|e| Error::Store(e.to_string()))?;
match finding.class {
ScanClass::ZeroByteFile
| ScanClass::OrphanParagraphRow
| ScanClass::MissingReferencedFile => {
let abs = finding
.path
.as_deref()
.ok_or_else(|| Error::Store("finding has no path".into()))?;
let abs_path = std::path::PathBuf::from(abs);
let rel = abs_path
.strip_prefix(&layout.root)
.map_err(|e| Error::Store(format!("path {} not under project root: {e}", abs)))?
.to_string_lossy()
.into_owned();
let mut to_delete: Vec<uuid::Uuid> = Vec::new();
for node in hierarchy.iter() {
if node.file.as_deref() == Some(rel.as_str()) {
to_delete.push(node.id);
}
}
if to_delete.is_empty() {
return Err(Error::Store(format!(
"no DB row matches {rel} — was the project mutated between scan and fix?"
)));
}
store
.delete_subtree(std::path::Path::new(&rel), &to_delete)
.map_err(|e| Error::Store(format!("delete row {rel}: {e}")))?;
Ok(format!(
"deleted {} DB row(s) + file {} ({})",
to_delete.len(),
rel,
finding.class.slug()
))
}
ScanClass::CorruptCommentsSidecar => {
let abs = finding
.path
.as_deref()
.ok_or_else(|| Error::Store("finding has no path".into()))?;
let stamp = chrono::Utc::now().format("%Y%m%dT%H%M%S").to_string();
let dest = format!("{abs}.corrupt-{stamp}.bak");
std::fs::rename(abs, &dest).map_err(Error::Io)?;
Ok(format!(
"moved corrupt sidecar {} → {}",
abs, dest
))
}
ScanClass::BdslibOnly => {
let abs = finding
.path
.as_deref()
.ok_or_else(|| Error::Store("finding has no path".into()))?;
let abs_path = std::path::PathBuf::from(abs);
let rel = abs_path
.strip_prefix(&layout.root)
.map_err(|e| Error::Store(format!("path {} not under project root: {e}", abs)))?
.to_string_lossy()
.into_owned();
let mut found_id: Option<uuid::Uuid> = None;
for node in hierarchy.iter() {
if node.file.as_deref() == Some(rel.as_str()) {
found_id = Some(node.id);
break;
}
}
let id = found_id.ok_or_else(|| {
Error::Store(format!(
"no DB row matches {rel} — was the project mutated between scan and fix?"
))
})?;
let bytes = store
.get_content(id)
.map_err(|e| Error::Store(format!("bdslib read for {rel}: {e}")))?
.ok_or_else(|| {
Error::Store(format!("bdslib has no content for {rel} — refusing to write empty file"))
})?;
if bytes.is_empty() {
return Err(Error::Store(format!(
"bdslib has 0-byte content for {rel} — refusing to write empty file"
)));
}
if let Some(parent) = abs_path.parent() {
std::fs::create_dir_all(parent).map_err(Error::Io)?;
}
if let Ok(md) = std::fs::metadata(&abs_path) {
if md.len() > 0 {
return Err(Error::Store(format!(
"disk file {abs} grew non-empty between scan and fix — refusing to overwrite"
)));
}
}
crate::io_atomic::write(&abs_path, &bytes).map_err(Error::Io)?;
Ok(format!(
"rematerialized {} ({} bytes) from bdslib",
rel,
bytes.len()
))
}
ScanClass::DroppedCharacter
| ScanClass::PacingCollapse
| ScanClass::StalledThread
| ScanClass::NamingInconsistency => Err(Error::Store(format!(
"no autofix for class `{}` — this is an author-judgment finding (review the prose / outline / threads)",
finding.class.slug(),
))),
}
}
pub fn log_fix(project: &Path, finding: &ScanFinding, outcome: &Result<String>) {
let path = project.join(".inkhaven").join("doctor.log");
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ");
let (kind, detail) = match outcome {
Ok(s) => ("OK", s.clone()),
Err(e) => ("ERR", e.to_string()),
};
let line = format!(
"{now}|{kind}|{}|{}\n",
finding.class.slug(),
detail.replace('\n', " "),
);
use std::io::Write;
let _ = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&path)
.and_then(|mut f| f.write_all(line.as_bytes()));
}
pub fn print_human(report: &ScanReport) {
println!("Project scan");
println!(
" generated_at : {}\n project_root : {}",
report.generated_at, report.project_root,
);
if report.findings.is_empty() {
println!(" findings : none — project is clean");
return;
}
println!(" findings : {}", report.findings.len());
println!();
for (i, f) in report.findings.iter().enumerate() {
let path = f.path.as_deref().unwrap_or("-");
println!(
" [{n}] {sev:>8} · {class:<26} · {path}",
n = i + 1,
sev = f.severity.slug(),
class = f.class.slug(),
);
println!(" {}", f.detail);
}
}
const DORMANT_DAYS: u64 = 30;
const DROPPED_CHARACTER_INTRO_FRACTION: f64 = 0.30;
const DROPPED_CHARACTER_OUTRO_FRACTION: f64 = 0.30;
const PACING_HIGH_RATIO: f64 = 3.0;
const PACING_LOW_RATIO: f64 = 0.30;
const PACING_TRAILING_WINDOW: usize = 5;
fn scan_dropped_characters(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
) -> Vec<ScanFinding> {
use crate::store::{NodeKind, SYSTEM_TAG_CHARACTERS};
let Some(chars_root) = hierarchy.iter().find(|n| {
n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(SYSTEM_TAG_CHARACTERS)
}) else {
return Vec::new();
};
let character_names: Vec<String> = hierarchy
.collect_subtree(chars_root.id)
.into_iter()
.filter_map(|id| hierarchy.get(id))
.filter(|n| n.kind == NodeKind::Paragraph)
.map(|n| n.title.clone())
.filter(|t| !t.trim().is_empty())
.collect();
if character_names.is_empty() {
return Vec::new();
}
let chapter_ordinals = collect_user_book_chapter_ordinals(hierarchy);
let total_chapters = chapter_ordinals.len();
if total_chapters < 5 {
return Vec::new();
}
let intro_cap = (total_chapters as f64 * DROPPED_CHARACTER_INTRO_FRACTION) as usize;
let outro_start = total_chapters
.saturating_sub((total_chapters as f64 * DROPPED_CHARACTER_OUTRO_FRACTION) as usize);
let mut findings: Vec<ScanFinding> = Vec::new();
let mut chapter_bodies_cache: Vec<(usize, String)> = Vec::with_capacity(total_chapters);
for (ordinal, chapter_node) in chapter_ordinals.iter().enumerate() {
let body = read_chapter_prose(layout, hierarchy, *chapter_node);
chapter_bodies_cache.push((ordinal, body.to_lowercase()));
}
for name in &character_names {
let needle = name.to_lowercase();
let mut first_seen: Option<usize> = None;
let mut last_seen: Option<usize> = None;
for (ordinal, body) in &chapter_bodies_cache {
if body.contains(&needle) {
if first_seen.is_none() {
first_seen = Some(*ordinal);
}
last_seen = Some(*ordinal);
}
}
let (Some(first), Some(last)) = (first_seen, last_seen) else { continue };
if first < intro_cap && last < outro_start {
findings.push(ScanFinding {
class: ScanClass::DroppedCharacter,
severity: ScanSeverity::Info,
path: None,
detail: format!(
"character `{name}` first appears in chapter {} (of {}) but is absent from the last {:.0}% (last seen chapter {})",
first + 1,
total_chapters,
DROPPED_CHARACTER_OUTRO_FRACTION * 100.0,
last + 1,
),
});
}
}
findings
}
fn scan_pacing_collapse(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
) -> Vec<ScanFinding> {
let chapter_ordinals = collect_user_book_chapter_ordinals(hierarchy);
if chapter_ordinals.len() < PACING_TRAILING_WINDOW + 1 {
return Vec::new();
}
let counts: Vec<i64> = chapter_ordinals
.iter()
.map(|&id| {
let body = read_chapter_prose(layout, hierarchy, id);
crate::progress::count_words(&body)
})
.collect();
classify_pacing(&counts, hierarchy, &chapter_ordinals)
}
pub(crate) fn classify_pacing(
counts: &[i64],
hierarchy: &crate::store::hierarchy::Hierarchy,
chapter_ids: &[uuid::Uuid],
) -> Vec<ScanFinding> {
let mut findings: Vec<ScanFinding> = Vec::new();
for (i, &count) in counts.iter().enumerate().skip(PACING_TRAILING_WINDOW) {
let window = &counts[i - PACING_TRAILING_WINDOW..i];
let mean: f64 = window.iter().sum::<i64>() as f64 / window.len() as f64;
if mean <= 0.0 {
continue;
}
let ratio = count as f64 / mean;
let (descriptor, severe) = if ratio > PACING_HIGH_RATIO {
("notably longer", true)
} else if ratio < PACING_LOW_RATIO {
("notably shorter", true)
} else {
("", false)
};
if !severe {
continue;
}
let title = chapter_ids
.get(i)
.and_then(|id| hierarchy.get(*id))
.map(|n| n.title.clone())
.unwrap_or_else(|| format!("chapter {}", i + 1));
findings.push(ScanFinding {
class: ScanClass::PacingCollapse,
severity: ScanSeverity::Info,
path: None,
detail: format!(
"chapter `{title}` ({count} words) is {descriptor} than the trailing {} chapters (mean {:.0}, ratio {:.2}×)",
PACING_TRAILING_WINDOW, mean, ratio,
),
});
}
findings
}
fn scan_stalled_threads(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
) -> Vec<ScanFinding> {
use crate::store::{NodeKind, SYSTEM_TAG_THREADS};
let Some(threads_root) = hierarchy.iter().find(|n| {
n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(SYSTEM_TAG_THREADS)
}) else {
return Vec::new();
};
let threshold = std::time::SystemTime::now()
- std::time::Duration::from_secs(DORMANT_DAYS * 86400);
let mut findings: Vec<ScanFinding> = Vec::new();
for thread in hierarchy.children_of(Some(threads_root.id)) {
if thread.kind != NodeKind::Chapter {
continue;
}
let mut newest: Option<std::time::SystemTime> = None;
let mut waypoint_count = 0usize;
for waypoint in hierarchy.children_of(Some(thread.id)) {
if waypoint.kind != NodeKind::Paragraph {
continue;
}
waypoint_count += 1;
let Some(rel) = waypoint.file.as_ref() else { continue };
let abs = layout.root.join(rel);
let Ok(md) = std::fs::metadata(&abs) else { continue };
let Ok(mtime) = md.modified() else { continue };
newest = Some(match newest {
Some(prev) if prev >= mtime => prev,
_ => mtime,
});
}
let stalled = match newest {
Some(t) => t < threshold,
None => waypoint_count > 0, };
if waypoint_count == 0 {
findings.push(ScanFinding {
class: ScanClass::StalledThread,
severity: ScanSeverity::Info,
path: None,
detail: format!(
"thread `{}` has no waypoints yet",
thread.title,
),
});
continue;
}
if stalled {
findings.push(ScanFinding {
class: ScanClass::StalledThread,
severity: ScanSeverity::Info,
path: None,
detail: format!(
"thread `{}` newest waypoint is > {} days old ({} waypoints total)",
thread.title, DORMANT_DAYS, waypoint_count,
),
});
}
}
findings
}
fn scan_naming_inconsistencies(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
) -> Vec<ScanFinding> {
use crate::store::{
SYSTEM_TAG_ARTEFACTS, SYSTEM_TAG_CHARACTERS, SYSTEM_TAG_PLACES,
};
let canonical_names = collect_multi_word_canonical_names(
hierarchy,
&[SYSTEM_TAG_CHARACTERS, SYSTEM_TAG_PLACES, SYSTEM_TAG_ARTEFACTS],
);
if canonical_names.is_empty() {
return Vec::new();
}
let chapter_ordinals = collect_user_book_chapter_ordinals(hierarchy);
let mut prose = String::new();
for id in &chapter_ordinals {
prose.push_str(&read_chapter_prose(layout, hierarchy, *id));
prose.push('\n');
}
classify_naming_inconsistencies(&canonical_names, &prose)
}
pub(crate) fn classify_naming_inconsistencies(
canonical_names: &[String],
prose: &str,
) -> Vec<ScanFinding> {
let mut findings: Vec<ScanFinding> = Vec::new();
for canonical in canonical_names {
let parts: Vec<&str> = canonical.split_whitespace().collect();
if parts.len() < 2 {
continue;
}
let head = parts[0];
let canonical_tail = parts[1..].join(" ");
let canonical_lc = canonical.to_lowercase();
let mut seen_variants: std::collections::HashSet<String> =
std::collections::HashSet::new();
let prose_lc = prose.to_lowercase();
let head_lc = head.to_lowercase();
let mut search_start = 0usize;
while let Some(pos) = prose_lc[search_start..].find(&head_lc) {
let abs_pos = search_start + pos;
let prev_char = if abs_pos == 0 {
' '
} else {
prose_lc[..abs_pos].chars().last().unwrap_or(' ')
};
search_start = abs_pos + head_lc.len();
if prev_char.is_alphanumeric() || prev_char == '_' {
continue;
}
let rest = &prose[search_start..];
let after = rest.trim_start();
let need_words = canonical_tail.split_whitespace().count();
let candidate: String = after
.split_whitespace()
.take(need_words)
.collect::<Vec<&str>>()
.join(" ");
if candidate.is_empty() {
continue;
}
let candidate_clean: String = candidate
.trim_end_matches(|c: char| !c.is_alphanumeric())
.to_string();
if candidate_clean.is_empty() {
continue;
}
let full = format!("{head} {candidate_clean}");
if full.eq_ignore_ascii_case(canonical) {
continue;
}
if full.to_lowercase() == canonical_lc {
continue;
}
let dist = levenshtein(&candidate_clean.to_lowercase(), &canonical_tail.to_lowercase());
if dist == 0 {
continue;
}
let max_len = candidate_clean
.chars()
.count()
.max(canonical_tail.chars().count());
if max_len == 0 {
continue;
}
let ratio = dist as f64 / max_len as f64;
if ratio > 0.5 {
continue;
}
if !seen_variants.insert(full.to_lowercase()) {
continue;
}
findings.push(ScanFinding {
class: ScanClass::NamingInconsistency,
severity: ScanSeverity::Info,
path: None,
detail: format!(
"near-miss `{full}` in prose vs. canonical `{canonical}` (edit distance {dist})",
),
});
}
}
findings
}
fn collect_multi_word_canonical_names(
hierarchy: &crate::store::hierarchy::Hierarchy,
system_tags: &[&str],
) -> Vec<String> {
use crate::store::NodeKind;
let mut out: Vec<String> = Vec::new();
for tag in system_tags {
let Some(book) = hierarchy.iter().find(|n| {
n.kind == NodeKind::Book && n.system_tag.as_deref() == Some(*tag)
}) else {
continue;
};
for id in hierarchy.collect_subtree(book.id) {
let Some(n) = hierarchy.get(id) else { continue };
if n.kind != NodeKind::Paragraph {
continue;
}
let title = n.title.trim();
if title.split_whitespace().count() < 2 {
continue;
}
out.push(title.to_string());
}
}
out
}
pub(crate) fn levenshtein(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let n = a_chars.len();
let m = b_chars.len();
if n == 0 {
return m;
}
if m == 0 {
return n;
}
let mut prev: Vec<usize> = (0..=m).collect();
let mut curr: Vec<usize> = vec![0; m + 1];
for i in 1..=n {
curr[0] = i;
for j in 1..=m {
let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 };
curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[m]
}
fn collect_user_book_chapter_ordinals(
hierarchy: &crate::store::hierarchy::Hierarchy,
) -> Vec<uuid::Uuid> {
use crate::store::NodeKind;
let mut out = Vec::new();
for node in hierarchy.iter() {
if node.kind != NodeKind::Chapter {
continue;
}
let ancestors = hierarchy.ancestors(node);
let under_system = ancestors
.iter()
.any(|a| a.kind == NodeKind::Book && a.system_tag.is_some());
if !under_system {
out.push(node.id);
}
}
out
}
fn read_chapter_prose(
layout: &ProjectLayout,
hierarchy: &crate::store::hierarchy::Hierarchy,
chapter_id: uuid::Uuid,
) -> String {
use crate::store::NodeKind;
let mut body = String::new();
for id in hierarchy.collect_subtree(chapter_id) {
let Some(p) = hierarchy.get(id) else { continue };
if p.kind != NodeKind::Paragraph {
continue;
}
let Some(rel) = p.file.as_ref() else { continue };
let abs = layout.root.join(rel);
let Ok(text) = std::fs::read_to_string(&abs) else { continue };
body.push_str(&text);
body.push('\n');
}
body
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn class_slugs_distinct_and_roundtrip() {
let mut seen = std::collections::HashSet::new();
for c in ScanClass::ALL {
assert!(seen.insert(c.slug()));
assert_eq!(ScanClass::from_slug(c.slug()), Some(c));
}
assert_eq!(ScanClass::from_slug("nonsense"), None);
}
#[test]
fn severity_ordering_critical_warning_info() {
assert!(super::severity_at_or_above(
ScanSeverity::Critical,
ScanSeverity::Warning
));
assert!(super::severity_at_or_above(
ScanSeverity::Warning,
ScanSeverity::Info
));
assert!(!super::severity_at_or_above(
ScanSeverity::Info,
ScanSeverity::Warning
));
}
#[test]
fn count_at_or_above_warning() {
let mut r = ScanReport::new(std::path::Path::new("/tmp/x"));
r.findings.push(ScanFinding {
class: ScanClass::ZeroByteFile,
severity: ScanSeverity::Critical,
path: None,
detail: String::new(),
});
r.findings.push(ScanFinding {
class: ScanClass::CorruptCommentsSidecar,
severity: ScanSeverity::Warning,
path: None,
detail: String::new(),
});
r.findings.push(ScanFinding {
class: ScanClass::OrphanParagraphRow,
severity: ScanSeverity::Info,
path: None,
detail: String::new(),
});
assert_eq!(r.count_at_or_above(ScanSeverity::Warning), 2);
assert_eq!(r.count_at_or_above(ScanSeverity::Critical), 1);
assert_eq!(r.count_at_or_above(ScanSeverity::Info), 3);
}
#[test]
fn sidecar_path_appends_comments_json() {
let p = std::path::Path::new("/tmp/x/foo.typ");
let s = sidecar_path_for(p);
assert_eq!(s.to_string_lossy(), "/tmp/x/foo.typ.comments.json");
}
#[test]
fn report_serialises_roundtrip() {
let mut r = ScanReport::new(std::path::Path::new("/tmp/x"));
r.findings.push(ScanFinding {
class: ScanClass::ZeroByteFile,
severity: ScanSeverity::Critical,
path: Some("/tmp/x/foo.typ".into()),
detail: "prose lost".into(),
});
let json = serde_json::to_string(&r).unwrap();
let parsed: ScanReport = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.findings.len(), 1);
assert_eq!(parsed.findings[0].class, ScanClass::ZeroByteFile);
assert_eq!(parsed.findings[0].path.as_deref(), Some("/tmp/x/foo.typ"));
}
#[test]
fn new_class_slugs_match_kebab_case_pattern() {
for class in [
ScanClass::DroppedCharacter,
ScanClass::PacingCollapse,
ScanClass::StalledThread,
] {
let slug = class.slug();
assert_eq!(
ScanClass::from_slug(slug),
Some(class),
"slug `{slug}` should roundtrip"
);
assert!(slug.contains('-'), "slug `{slug}` should be kebab-case");
}
}
#[test]
fn new_classes_are_in_all_const() {
for class in [
ScanClass::DroppedCharacter,
ScanClass::PacingCollapse,
ScanClass::StalledThread,
] {
assert!(
ScanClass::ALL.contains(&class),
"{class:?} should be in ScanClass::ALL"
);
}
}
#[test]
fn pacing_below_window_size_emits_nothing() {
let counts: Vec<i64> = vec![5000, 5000, 5000, 5000, 5000];
let ids: Vec<uuid::Uuid> = (0..counts.len())
.map(|_| uuid::Uuid::new_v4())
.collect();
let hierarchy = empty_hierarchy_for_tests();
let findings = classify_pacing(&counts, &hierarchy, &ids);
assert!(findings.is_empty());
}
#[test]
fn pacing_uniform_chapters_emit_nothing() {
let counts: Vec<i64> = vec![5000; 12];
let ids: Vec<uuid::Uuid> = (0..counts.len())
.map(|_| uuid::Uuid::new_v4())
.collect();
let hierarchy = empty_hierarchy_for_tests();
let findings = classify_pacing(&counts, &hierarchy, &ids);
assert!(findings.is_empty());
}
#[test]
fn pacing_long_outlier_flagged() {
let counts: Vec<i64> = vec![5000, 5000, 5000, 5000, 5000, 20000];
let ids: Vec<uuid::Uuid> = (0..counts.len())
.map(|_| uuid::Uuid::new_v4())
.collect();
let hierarchy = empty_hierarchy_for_tests();
let findings = classify_pacing(&counts, &hierarchy, &ids);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].class, ScanClass::PacingCollapse);
assert_eq!(findings[0].severity, ScanSeverity::Info);
assert!(findings[0].detail.contains("notably longer"));
assert!(findings[0].detail.contains("4.00×"));
}
#[test]
fn pacing_short_outlier_flagged() {
let counts: Vec<i64> = vec![5000, 5000, 5000, 5000, 5000, 1000];
let ids: Vec<uuid::Uuid> = (0..counts.len())
.map(|_| uuid::Uuid::new_v4())
.collect();
let hierarchy = empty_hierarchy_for_tests();
let findings = classify_pacing(&counts, &hierarchy, &ids);
assert_eq!(findings.len(), 1);
assert!(findings[0].detail.contains("notably shorter"));
assert!(findings[0].detail.contains("0.20×"));
}
#[test]
fn pacing_moderate_variation_passes() {
let counts: Vec<i64> = vec![5000, 5000, 5000, 5000, 5000, 8000];
let ids: Vec<uuid::Uuid> = (0..counts.len())
.map(|_| uuid::Uuid::new_v4())
.collect();
let hierarchy = empty_hierarchy_for_tests();
let findings = classify_pacing(&counts, &hierarchy, &ids);
assert!(findings.is_empty());
}
fn empty_hierarchy_for_tests() -> crate::store::hierarchy::Hierarchy {
crate::store::hierarchy::Hierarchy::default()
}
#[test]
fn levenshtein_zero_for_identical() {
assert_eq!(super::levenshtein("Aerin", "Aerin"), 0);
assert_eq!(super::levenshtein("", ""), 0);
}
#[test]
fn levenshtein_one_for_single_edit() {
assert_eq!(super::levenshtein("cat", "bat"), 1);
assert_eq!(super::levenshtein("cat", "cats"), 1);
assert_eq!(super::levenshtein("cats", "cat"), 1);
}
#[test]
fn levenshtein_handles_multi_char_distance() {
assert_eq!(super::levenshtein("Stormbringer", "Stormbreaker"), 3);
}
#[test]
fn naming_flags_near_miss() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose = "In the morning, Aerin Stormbreaker rode west.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].class, ScanClass::NamingInconsistency);
assert_eq!(findings[0].severity, ScanSeverity::Info);
assert!(findings[0].detail.contains("Aerin Stormbreaker"));
assert!(findings[0].detail.contains("Aerin Stormbringer"));
}
#[test]
fn naming_no_finding_when_canonical_present() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose = "Aerin Stormbringer rode west. Later, Aerin Stormbringer drew her sword.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert!(findings.is_empty());
}
#[test]
fn naming_dedupes_repeated_variants() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose =
"Aerin Stormbreaker rode west. Then Aerin Stormbreaker turned back.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert_eq!(findings.len(), 1);
}
#[test]
fn naming_skips_single_word_canonicals() {
let canonical = vec!["Aerin".to_string()];
let prose = "Aerinn rode west.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert!(findings.is_empty());
}
#[test]
fn naming_skips_wholly_different_continuations() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose = "Aerin and Borin rode west.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert!(findings.is_empty());
}
#[test]
fn naming_respects_word_boundary_on_head() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose = "The aerinet was lowered into the sea Stormbringer waited.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert!(findings.is_empty());
}
#[test]
fn naming_strips_trailing_punctuation() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose = "She called: Aerin Stormbreaker, where are you?";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert_eq!(findings.len(), 1);
assert!(
findings[0].detail.contains("Aerin Stormbreaker"),
"got: {}",
findings[0].detail
);
}
#[test]
fn naming_case_insensitive_match_against_canonical() {
let canonical = vec!["Aerin Stormbringer".to_string()];
let prose = "aerin stormbringer rode west.";
let findings = super::classify_naming_inconsistencies(&canonical, prose);
assert!(findings.is_empty());
}
}