use std::collections::BTreeMap;
use std::fs::{self, File};
use std::io::{Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use chrono::{DateTime, Datelike, FixedOffset, NaiveDateTime, TimeZone, Utc};
use crate::store::Store;
const TS_FORMAT: &str = "%Y-%m-%d %H:%M";
const LOG_FRONTMATTER: &str = "---\ntype: log\n---\n\n# Curator log\n";
const REVERSE_BLOCK: usize = 8 * 1024;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogKind {
Ingest,
Create,
Update,
Delete,
Rename,
Link,
Validate,
IndexRebuild,
Contradiction,
Custom(String),
}
impl LogKind {
pub fn as_str(&self) -> &str {
match self {
LogKind::Ingest => "ingest",
LogKind::Create => "create",
LogKind::Update => "update",
LogKind::Delete => "delete",
LogKind::Rename => "rename",
LogKind::Link => "link",
LogKind::Validate => "validate",
LogKind::IndexRebuild => "index-rebuild",
LogKind::Contradiction => "contradiction",
LogKind::Custom(s) => s,
}
}
pub fn parse(token: &str) -> LogKind {
match token {
"ingest" => LogKind::Ingest,
"create" => LogKind::Create,
"update" => LogKind::Update,
"delete" => LogKind::Delete,
"rename" => LogKind::Rename,
"link" => LogKind::Link,
"validate" => LogKind::Validate,
"index-rebuild" => LogKind::IndexRebuild,
"contradiction" => LogKind::Contradiction,
other => LogKind::Custom(other.to_string()),
}
}
pub fn is_recognized(&self) -> bool {
!matches!(self, LogKind::Custom(_))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LogEntry {
pub timestamp: DateTime<FixedOffset>,
pub kind: LogKind,
pub object: Option<String>,
pub note: String,
}
impl LogEntry {
fn render(&self) -> String {
let ts = self.timestamp.format(TS_FORMAT);
let mut out = String::new();
match &self.object {
Some(obj) => {
out.push_str(&format!("## [{}] {} | {}\n", ts, self.kind.as_str(), obj));
}
None => {
out.push_str(&format!("## [{}] {}\n", ts, self.kind.as_str()));
}
}
let note = self.note.trim_end_matches(['\n', '\r']);
if !note.is_empty() {
for (i, line) in note.split('\n').enumerate() {
if i > 0 {
out.push('\n');
}
out.push_str(&escape_note_line(line));
}
out.push('\n');
}
out.push('\n');
out
}
fn year_month(&self) -> (i32, u32) {
(self.timestamp.year(), self.timestamp.month())
}
}
#[derive(Debug, Clone)]
pub struct Log;
impl Log {
pub fn append(store: &Store, entry: &LogEntry) -> crate::Result<()> {
let active = active_log_path(store);
let _lock = AppendLock::acquire(&active);
let current_ym = entry.year_month();
if active.exists() {
let content = fs::read_to_string(&active)?;
let (header, entries) = parse_active(&content);
let mut by_month: BTreeMap<(i32, u32), Vec<LogEntry>> = BTreeMap::new();
let mut keep: Vec<LogEntry> = Vec::new();
for e in entries {
if e.year_month() < current_ym {
by_month.entry(e.year_month()).or_default().push(e);
} else {
keep.push(e);
}
}
let marker = rotation_marker_path(store);
let recovering = marker.exists();
if !by_month.is_empty() {
let dir = archive_dir(store);
fs::create_dir_all(&dir)?;
if !recovering {
fs::write(&marker, b"")?;
}
for ((y, m), month_entries) in &by_month {
let path = archive_path(store, *y, *m);
append_to_archive(&path, month_entries, recovering)?;
}
let mut body = String::new();
for e in &keep {
body.push_str(&e.render());
}
body.push_str(&entry.render());
let full = compose_active(&header, &body);
crate::fsx::write_atomic(&active, full.as_bytes())?;
let _ = fs::remove_file(&marker);
return Ok(());
}
if recovering {
let _ = fs::remove_file(&marker);
}
let mut full = content;
if !full.ends_with('\n') {
full.push('\n');
}
full.push_str(&entry.render());
crate::fsx::write_atomic(&active, full.as_bytes())?;
Ok(())
} else {
if let Some(parent) = active.parent() {
fs::create_dir_all(parent)?;
}
let body = entry.render();
let full = compose_active(LOG_FRONTMATTER, &body);
crate::fsx::write_atomic(&active, full.as_bytes())?;
Ok(())
}
}
pub fn tail(store: &Store, n: usize) -> crate::Result<Vec<LogEntry>> {
if n == 0 {
return Ok(Vec::new());
}
let mut window = NewestWindow::new(n);
let mut active_seen: std::collections::HashSet<EntryKey> = std::collections::HashSet::new();
let active = active_log_path(store);
if active.exists() {
reverse_collect(&active, |e| {
active_seen.insert(entry_key(&e));
window.consider(e);
false
})?;
}
for archive in list_archives_desc(store)? {
if let (true, Some(cutoff_ym), Some(arch_ym)) = (
window.is_full(),
window.min_year_month(),
archive_year_month(&archive),
) {
if arch_ym < cutoff_ym {
break;
}
}
reverse_collect(&archive, |e| {
if !active_seen.contains(&entry_key(&e)) {
window.consider(e);
}
false
})?;
}
Ok(window.into_sorted())
}
pub fn since(store: &Store, time: DateTime<FixedOffset>) -> crate::Result<Vec<LogEntry>> {
let mut collected: Vec<LogEntry> = Vec::new();
let mut active_seen: std::collections::HashSet<EntryKey> = std::collections::HashSet::new();
let active = active_log_path(store);
if active.exists() {
reverse_collect(&active, |e| {
if e.timestamp > time {
active_seen.insert(entry_key(&e));
collected.push(e);
}
false
})?;
}
let cutoff_utc = time.with_timezone(&Utc);
let cutoff_ym = (cutoff_utc.year(), cutoff_utc.month());
for archive in list_archives_desc(store)? {
if let Some(arch_ym) = archive_year_month(&archive) {
if arch_ym < cutoff_ym {
break;
}
}
reverse_collect(&archive, |e| {
if e.timestamp > time && !active_seen.contains(&entry_key(&e)) {
collected.push(e);
}
false
})?;
}
collected.reverse();
Ok(collected)
}
pub fn last_validate_at(store: &Store) -> crate::Result<Option<DateTime<FixedOffset>>> {
let mut found: Option<DateTime<FixedOffset>> = None;
let active = active_log_path(store);
if active.exists() {
reverse_collect(&active, |e| {
if e.kind == LogKind::Validate {
found = Some(e.timestamp);
true
} else {
false
}
})?;
}
if found.is_none() {
for archive in list_archives_desc(store)? {
reverse_collect(&archive, |e| {
if e.kind == LogKind::Validate {
found = Some(e.timestamp);
true
} else {
false
}
})?;
if found.is_some() {
break;
}
}
}
Ok(found)
}
pub fn parse_header(line: &str) -> Option<(DateTime<FixedOffset>, LogKind, Option<String>)> {
let line = line.trim_end_matches(['\n', '\r']);
let rest = line.strip_prefix("## [")?;
let close = rest.find(']')?;
let ts_str = &rest[..close];
let timestamp = parse_timestamp(ts_str)?;
let after = rest[close + 1..].trim();
if after.is_empty() {
return None;
}
let (kind_str, object) = match after.split_once('|') {
Some((k, o)) => {
let obj = o.trim();
let obj = if obj.is_empty() {
None
} else {
Some(obj.to_string())
};
(k.trim(), obj)
}
None => (after, None),
};
if kind_str.is_empty() {
return None;
}
Some((timestamp, LogKind::parse(kind_str), object))
}
}
struct NewestWindow {
cap: usize,
heap: std::collections::BinaryHeap<WindowItem>,
next_arrival: u64,
}
impl NewestWindow {
fn new(cap: usize) -> Self {
NewestWindow {
cap,
heap: std::collections::BinaryHeap::with_capacity(cap),
next_arrival: 0,
}
}
fn consider(&mut self, entry: LogEntry) {
let arrival = self.next_arrival;
self.next_arrival += 1;
if self.heap.len() < self.cap {
self.heap.push(WindowItem { entry, arrival });
return;
}
let root = self.heap.peek().expect("full window has a root");
if entry.timestamp > root.entry.timestamp {
self.heap.pop();
self.heap.push(WindowItem { entry, arrival });
}
}
fn is_full(&self) -> bool {
self.heap.len() >= self.cap
}
fn min_year_month(&self) -> Option<(i32, u32)> {
self.heap
.peek()
.map(|item| (item.entry.timestamp.year(), item.entry.timestamp.month()))
}
fn into_sorted(self) -> Vec<LogEntry> {
let mut items: Vec<WindowItem> = self.heap.into_vec();
items.sort_by(|a, b| {
a.entry
.timestamp
.cmp(&b.entry.timestamp)
.then(b.arrival.cmp(&a.arrival))
});
items.into_iter().map(|i| i.entry).collect()
}
}
struct WindowItem {
entry: LogEntry,
arrival: u64,
}
impl PartialEq for WindowItem {
fn eq(&self, other: &Self) -> bool {
self.entry.timestamp == other.entry.timestamp && self.arrival == other.arrival
}
}
impl Eq for WindowItem {}
impl Ord for WindowItem {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other
.entry
.timestamp
.cmp(&self.entry.timestamp)
.then(self.arrival.cmp(&other.arrival))
}
}
impl PartialOrd for WindowItem {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
struct AppendLock {
#[cfg(unix)]
file: Option<File>,
}
impl AppendLock {
fn acquire(active: &Path) -> AppendLock {
#[cfg(unix)]
{
let file = Self::open_and_lock(active);
AppendLock { file }
}
#[cfg(not(unix))]
{
let _ = active;
AppendLock {}
}
}
#[cfg(unix)]
fn open_and_lock(active: &Path) -> Option<File> {
use std::os::unix::io::AsRawFd;
if let Some(parent) = active.parent() {
let _ = fs::create_dir_all(parent);
}
let lock_path = lock_path_for(active);
let file = std::fs::OpenOptions::new()
.create(true)
.truncate(false)
.write(true)
.open(&lock_path)
.ok()?;
let rc = unsafe { flock(file.as_raw_fd(), LOCK_EX) };
if rc != 0 {
return None;
}
Some(file)
}
}
#[cfg(unix)]
impl Drop for AppendLock {
fn drop(&mut self) {
use std::os::unix::io::AsRawFd;
if let Some(file) = &self.file {
unsafe { flock(file.as_raw_fd(), LOCK_UN) };
}
}
}
#[cfg(unix)]
extern "C" {
fn flock(fd: std::os::raw::c_int, operation: std::os::raw::c_int) -> std::os::raw::c_int;
}
#[cfg(unix)]
const LOCK_EX: std::os::raw::c_int = 2;
#[cfg(unix)]
const LOCK_UN: std::os::raw::c_int = 8;
#[cfg(unix)]
fn lock_path_for(active: &Path) -> PathBuf {
let mut name = active
.file_name()
.map(|s| s.to_os_string())
.unwrap_or_else(|| std::ffi::OsString::from("log.md"));
name.push(".lock");
match active.parent() {
Some(parent) => parent.join(name),
None => PathBuf::from(name),
}
}
fn active_log_path(store: &Store) -> PathBuf {
store.root.join("log.md")
}
fn archive_dir(store: &Store) -> PathBuf {
store.root.join("log")
}
fn archive_path(store: &Store, year: i32, month: u32) -> PathBuf {
archive_dir(store).join(format!("{:04}-{:02}.md", year, month))
}
fn rotation_marker_path(store: &Store) -> PathBuf {
archive_dir(store).join(".rotating")
}
fn parse_timestamp(s: &str) -> Option<DateTime<FixedOffset>> {
let naive = NaiveDateTime::parse_from_str(s.trim(), TS_FORMAT).ok()?;
let utc = FixedOffset::east_opt(0)?;
utc.from_local_datetime(&naive).single()
}
fn parse_active(content: &str) -> (String, Vec<LogEntry>) {
match find_first_header(content) {
Some(idx) => {
let header = content[..idx].to_string();
let entries = parse_entries(&content[idx..]);
(header, entries)
}
None => (content.to_string(), Vec::new()),
}
}
fn find_first_header(content: &str) -> Option<usize> {
let mut offset = 0usize;
for line in content.split_inclusive('\n') {
let line_str = line.trim_end_matches(['\r', '\n']);
if line_str.starts_with("## [") && Log::parse_header(line_str).is_some() {
return Some(offset);
}
offset += line.len();
}
None
}
fn is_header_ambiguous(line: &str) -> bool {
let stripped = line.trim_start_matches('\\');
Log::parse_header(stripped).is_some()
}
fn escape_note_line(line: &str) -> std::borrow::Cow<'_, str> {
if is_header_ambiguous(line) {
std::borrow::Cow::Owned(format!("\\{line}"))
} else {
std::borrow::Cow::Borrowed(line)
}
}
fn unescape_note_line(line: &str) -> std::borrow::Cow<'_, str> {
if let Some(rest) = line.strip_prefix('\\') {
if is_header_ambiguous(line) {
return std::borrow::Cow::Borrowed(rest);
}
}
std::borrow::Cow::Borrowed(line)
}
fn parse_entries(text: &str) -> Vec<LogEntry> {
let mut entries: Vec<LogEntry> = Vec::new();
let mut cur_header: Option<(DateTime<FixedOffset>, LogKind, Option<String>)> = None;
let mut cur_note: Vec<&str> = Vec::new();
let flush = |entries: &mut Vec<LogEntry>,
header: &mut Option<(DateTime<FixedOffset>, LogKind, Option<String>)>,
note: &mut Vec<&str>| {
if let Some((timestamp, kind, object)) = header.take() {
let joined = note
.iter()
.map(|line| unescape_note_line(line))
.collect::<Vec<_>>()
.join("\n");
let note_str = joined.trim_matches(['\n', '\r']).to_string();
entries.push(LogEntry {
timestamp,
kind,
object,
note: note_str,
});
}
note.clear();
};
for line in text.lines() {
if line.starts_with("## [") {
if let Some(parsed) = Log::parse_header(line) {
flush(&mut entries, &mut cur_header, &mut cur_note);
cur_header = Some(parsed);
continue;
}
}
if cur_header.is_some() {
cur_note.push(line);
}
}
flush(&mut entries, &mut cur_header, &mut cur_note);
entries
}
fn compose_active(header: &str, body: &str) -> String {
let mut out = String::new();
out.push_str(header);
if !header.is_empty() && !header.ends_with('\n') {
out.push('\n');
}
if !header.is_empty() && !out.ends_with("\n\n") {
out.push('\n');
}
out.push_str(body);
out
}
fn append_to_archive(path: &Path, entries: &[LogEntry], recovering: bool) -> crate::Result<()> {
if path.exists() {
let existing = fs::read_to_string(path)?;
let mut body = String::new();
if recovering {
let (_header, existing_entries) = parse_active(&existing);
let mut remaining: std::collections::HashMap<EntryKey, usize> =
std::collections::HashMap::new();
for e in &existing_entries {
*remaining.entry(entry_key(e)).or_insert(0) += 1;
}
for e in entries {
match remaining.get_mut(&entry_key(e)) {
Some(count) if *count > 0 => *count -= 1,
_ => body.push_str(&e.render()),
}
}
} else {
for e in entries {
body.push_str(&e.render());
}
}
if body.is_empty() {
return Ok(());
}
let mut full = existing;
if !full.ends_with('\n') {
full.push('\n');
}
full.push_str(&body);
crate::fsx::write_atomic(path, full.as_bytes())?;
} else {
let mut body = String::new();
for e in entries {
body.push_str(&e.render());
}
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let full = compose_active(LOG_FRONTMATTER, &body);
crate::fsx::write_atomic(path, full.as_bytes())?;
}
Ok(())
}
type EntryKey = (DateTime<FixedOffset>, String, Option<String>, String);
fn entry_key(e: &LogEntry) -> EntryKey {
(
e.timestamp,
e.kind.as_str().to_string(),
e.object.clone(),
e.note.clone(),
)
}
fn list_archives_desc(store: &Store) -> crate::Result<Vec<PathBuf>> {
let dir = archive_dir(store);
if !dir.is_dir() {
return Ok(Vec::new());
}
let mut months: Vec<(String, PathBuf)> = Vec::new();
for entry in fs::read_dir(&dir)? {
let entry = entry?;
let path = entry.path();
if !path.is_file() {
continue;
}
let name = match path.file_name().and_then(|s| s.to_str()) {
Some(n) => n,
None => continue,
};
if let Some(stem) = name.strip_suffix(".md") {
if is_year_month(stem) {
months.push((stem.to_string(), path.clone()));
}
}
}
months.sort_by(|a, b| b.0.cmp(&a.0));
Ok(months.into_iter().map(|(_, p)| p).collect())
}
fn archive_year_month(path: &Path) -> Option<(i32, u32)> {
let stem = path
.file_name()
.and_then(|s| s.to_str())
.and_then(|n| n.strip_suffix(".md"))?;
if !is_year_month(stem) {
return None;
}
let year: i32 = stem[..4].parse().ok()?;
let month: u32 = stem[5..7].parse().ok()?;
if !(1..=12).contains(&month) {
return None;
}
Some((year, month))
}
fn is_year_month(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() != 7 {
return false;
}
bytes[..4].iter().all(u8::is_ascii_digit)
&& bytes[4] == b'-'
&& bytes[5].is_ascii_digit()
&& bytes[6].is_ascii_digit()
}
fn reverse_collect<F>(path: &Path, mut take: F) -> crate::Result<()>
where
F: FnMut(LogEntry) -> bool,
{
let mut file = File::open(path)?;
let len = file.metadata()?.len();
if len == 0 {
return Ok(());
}
let mut buf: Vec<u8> = Vec::new();
let mut start = len;
let mut emitted_abs: std::collections::HashSet<u64> = std::collections::HashSet::new();
let mut headers: Vec<u64> = Vec::new();
let mut stop = false;
let mut first = true;
while start > 0 && !stop {
let block = std::cmp::min(REVERSE_BLOCK as u64, start);
let new_start = start - block;
file.seek(SeekFrom::Start(new_start))?;
let mut chunk = vec![0u8; block as usize];
file.read_exact(&mut chunk)?;
chunk.extend_from_slice(&buf);
buf = chunk;
start = new_start;
let base_is_file_start = start == 0;
let scan_hi = if first { block } else { block + 1 } as usize;
let mut new_headers = header_offsets_range(&buf, start, 0, scan_hi, base_is_file_start);
first = false;
if !new_headers.is_empty() {
new_headers.extend_from_slice(&headers);
headers = new_headers;
}
for i in (0..headers.len()).rev() {
let abs = headers[i];
if emitted_abs.contains(&abs) {
continue;
}
let is_oldest_in_buf = i == 0;
if is_oldest_in_buf && start > 0 {
continue;
}
let entry_text = entry_text_at(&buf, start, abs, &headers, i);
if let Some(entry) = parse_single_entry(&entry_text) {
emitted_abs.insert(abs);
if take(entry) {
stop = true;
break;
}
} else {
emitted_abs.insert(abs);
}
}
}
if !stop && start == 0 {
for i in (0..headers.len()).rev() {
let abs = headers[i];
if emitted_abs.contains(&abs) {
continue;
}
let entry_text = entry_text_at(&buf, start, abs, &headers, i);
if let Some(entry) = parse_single_entry(&entry_text) {
emitted_abs.insert(abs);
if take(entry) {
break;
}
} else {
emitted_abs.insert(abs);
}
}
}
Ok(())
}
#[cfg(test)]
fn header_offsets(buf: &[u8], base: u64) -> Vec<u64> {
header_offsets_range(buf, base, 0, buf.len(), base == 0)
}
fn header_offsets_range(
buf: &[u8],
base: u64,
scan_lo: usize,
scan_hi: usize,
base_is_file_start: bool,
) -> Vec<u64> {
const PAT: &[u8] = b"## [";
let mut out = Vec::new();
let n = buf.len();
let hi = scan_hi.min(n);
let mut i = scan_lo;
while i < hi && i + PAT.len() <= n {
if &buf[i..i + PAT.len()] == PAT {
let at_line_start = if i == 0 {
base_is_file_start
} else {
buf[i - 1] == b'\n'
};
if at_line_start && is_valid_header_line(buf, i) {
out.push(base + i as u64);
i += PAT.len();
continue;
}
}
i += 1;
}
out
}
fn is_valid_header_line(buf: &[u8], i: usize) -> bool {
let line_end = buf[i..]
.iter()
.position(|&b| b == b'\n')
.map(|p| i + p)
.unwrap_or(buf.len());
let line = String::from_utf8_lossy(&buf[i..line_end]);
Log::parse_header(&line).is_some()
}
fn entry_text_at(buf: &[u8], base: u64, header_abs: u64, headers: &[u64], idx: usize) -> String {
let rel_start = (header_abs - base) as usize;
let rel_end = if idx + 1 < headers.len() {
(headers[idx + 1] - base) as usize
} else {
buf.len()
};
String::from_utf8_lossy(&buf[rel_start..rel_end]).into_owned()
}
fn parse_single_entry(text: &str) -> Option<LogEntry> {
parse_entries(text).into_iter().next()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Config;
use std::fs;
use tempfile::TempDir;
fn temp_store() -> (TempDir, Store) {
let dir = tempfile::tempdir().expect("tempdir");
fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").expect("write DB.md");
let store = Store {
root: dir.path().to_path_buf(),
config: Config::default(),
};
(dir, store)
}
#[test]
fn archive_year_month_rejects_out_of_range_months() {
use std::path::Path;
assert_eq!(
archive_year_month(Path::new("log/2026-05.md")),
Some((2026, 5))
);
assert_eq!(
archive_year_month(Path::new("log/2026-01.md")),
Some((2026, 1))
);
assert_eq!(
archive_year_month(Path::new("log/2026-12.md")),
Some((2026, 12))
);
for bad in ["log/2026-00.md", "log/2026-13.md", "log/2026-99.md"] {
assert_eq!(
archive_year_month(Path::new(bad)),
None,
"{bad} has an out-of-range month and must not parse as an archive"
);
}
}
fn ts(y: i32, mo: u32, d: u32, h: u32, mi: u32) -> DateTime<FixedOffset> {
let naive = chrono::NaiveDate::from_ymd_opt(y, mo, d)
.unwrap()
.and_hms_opt(h, mi, 0)
.unwrap();
FixedOffset::east_opt(0)
.unwrap()
.from_local_datetime(&naive)
.single()
.unwrap()
}
#[allow(clippy::too_many_arguments)] fn entry(
y: i32,
mo: u32,
d: u32,
h: u32,
mi: u32,
kind: LogKind,
object: Option<&str>,
note: &str,
) -> LogEntry {
LogEntry {
timestamp: ts(y, mo, d, h, mi),
kind,
object: object.map(|s| s.to_string()),
note: note.to_string(),
}
}
#[test]
fn parse_header_with_object() {
let (t, k, o) =
Log::parse_header("## [2026-05-27 10:00] ingest | sources/emails/x.eml").unwrap();
assert_eq!(t, ts(2026, 5, 27, 10, 0));
assert_eq!(k, LogKind::Ingest);
assert_eq!(o.as_deref(), Some("sources/emails/x.eml"));
}
#[test]
fn parse_header_without_object_is_none_object() {
let (t, k, o) = Log::parse_header("## [2026-05-27 10:20] validate").unwrap();
assert_eq!(t, ts(2026, 5, 27, 10, 20));
assert_eq!(k, LogKind::Validate);
assert_eq!(o, None);
}
#[test]
fn parse_header_custom_kind_roundtrips_token() {
let (_, k, o) = Log::parse_header("## [2026-05-27 10:00] proposal | records/x").unwrap();
assert_eq!(k, LogKind::Custom("proposal".to_string()));
assert!(!k.is_recognized());
assert_eq!(o.as_deref(), Some("records/x"));
}
#[test]
fn parse_header_index_rebuild_hyphenated_kind() {
let (_, k, _) = Log::parse_header("## [2026-05-27 10:00] index-rebuild").unwrap();
assert_eq!(k, LogKind::IndexRebuild);
assert_eq!(k.as_str(), "index-rebuild");
}
#[test]
fn parse_header_rejects_non_headers() {
assert!(Log::parse_header("Not a header").is_none());
assert!(Log::parse_header("# Curator log").is_none());
assert!(Log::parse_header("## [garbage] ingest | x").is_none());
assert!(Log::parse_header("## [2026-05-27 10:00]").is_none()); assert!(Log::parse_header("## [2026-13-40 99:99] ingest | x").is_none());
}
#[test]
fn kind_as_str_parse_roundtrip_for_all_recognized() {
for k in [
LogKind::Ingest,
LogKind::Create,
LogKind::Update,
LogKind::Delete,
LogKind::Rename,
LogKind::Link,
LogKind::Validate,
LogKind::IndexRebuild,
LogKind::Contradiction,
] {
assert_eq!(LogKind::parse(k.as_str()), k);
assert!(k.is_recognized());
}
}
#[test]
fn append_creates_log_with_frontmatter_and_entry() {
let (_d, store) = temp_store();
let e = entry(
2026,
5,
27,
10,
0,
LogKind::Ingest,
Some("sources/emails/x.eml"),
"Email received.",
);
Log::append(&store, &e).unwrap();
let content = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(
content.starts_with("---\ntype: log\n---\n"),
"missing log frontmatter; got:\n{content}"
);
assert!(content.contains("## [2026-05-27 10:00] ingest | sources/emails/x.eml"));
assert!(content.contains("Email received."));
assert!(!store.root.join("log").exists());
}
#[test]
fn append_tail_since_roundtrip() {
let (_d, store) = temp_store();
let e1 = entry(2026, 5, 27, 10, 0, LogKind::Ingest, Some("a"), "first");
let e2 = entry(2026, 5, 27, 10, 5, LogKind::Create, Some("b"), "second");
let e3 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("c"), "third");
Log::append(&store, &e1).unwrap();
Log::append(&store, &e2).unwrap();
Log::append(&store, &e3).unwrap();
let tail = Log::tail(&store, 2).unwrap();
assert_eq!(tail.len(), 2);
assert_eq!(tail[0], e2);
assert_eq!(tail[1], e3);
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![e1.clone(), e2.clone(), e3.clone()]);
let since = Log::since(&store, ts(2026, 5, 27, 10, 5)).unwrap();
assert_eq!(since, vec![e3.clone()]);
let since_all = Log::since(&store, ts(2026, 5, 27, 9, 0)).unwrap();
assert_eq!(since_all, vec![e1, e2, e3]);
}
#[test]
fn tail_zero_is_empty() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 5, 27, 10, 0, LogKind::Ingest, Some("a"), "x"),
)
.unwrap();
assert!(Log::tail(&store, 0).unwrap().is_empty());
}
#[test]
fn tail_and_since_on_missing_log_are_empty() {
let (_d, store) = temp_store();
assert!(Log::tail(&store, 5).unwrap().is_empty());
assert!(Log::since(&store, ts(2000, 1, 1, 0, 0)).unwrap().is_empty());
assert!(Log::last_validate_at(&store).unwrap().is_none());
}
#[test]
fn since_exact_timestamp_is_exclusive() {
let (_d, store) = temp_store();
let e = entry(2026, 5, 27, 10, 0, LogKind::Validate, None, "PASS");
Log::append(&store, &e).unwrap();
assert!(Log::since(&store, ts(2026, 5, 27, 10, 0))
.unwrap()
.is_empty());
}
fn write_raw_log(store: &Store, entries: &[LogEntry]) {
let mut content = String::from(LOG_FRONTMATTER);
content.push('\n');
for e in entries {
content.push_str(&e.render());
}
fs::write(store.root.join("log.md"), content).expect("write raw log.md");
}
#[test]
fn since_returns_newer_entries_even_when_disk_order_is_non_monotonic() {
let (_d, store) = temp_store();
let e_1010 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("c"), "newest");
let e_1005 = entry(2026, 5, 27, 10, 5, LogKind::Create, Some("b"), "middle");
let e_1000 = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("a"),
"backdated fix",
);
write_raw_log(&store, &[e_1010, e_1005, e_1000]);
let got = Log::since(&store, ts(2026, 5, 27, 10, 2)).unwrap();
let stamps: std::collections::BTreeSet<_> = got.iter().map(|e| e.timestamp).collect();
assert_eq!(
stamps,
[ts(2026, 5, 27, 10, 5), ts(2026, 5, 27, 10, 10)]
.into_iter()
.collect(),
"since(10:02) must include both 10:05 and 10:10 despite the backdated \
10:00 entry sitting physically last, and exclude 10:00; got {got:?}"
);
let all = Log::since(&store, ts(2026, 5, 27, 9, 0)).unwrap();
let all_stamps: std::collections::BTreeSet<_> = all.iter().map(|e| e.timestamp).collect();
assert_eq!(
all_stamps,
[
ts(2026, 5, 27, 10, 0),
ts(2026, 5, 27, 10, 5),
ts(2026, 5, 27, 10, 10),
]
.into_iter()
.collect()
);
}
#[test]
fn since_crosses_archive_when_newer_entry_is_out_of_order_inside_it() {
let (_d, store) = temp_store();
let may = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may1");
write_raw_log(&store, &[may]);
let apr_late = entry(
2026,
4,
20,
9,
0,
LogKind::Create,
Some("apr-b"),
"apr-late",
);
let apr_early = entry(
2026,
4,
5,
9,
0,
LogKind::Ingest,
Some("apr-a"),
"apr-early",
);
let dir = store.root.join("log");
fs::create_dir_all(&dir).unwrap();
let mut arch = String::from(LOG_FRONTMATTER);
arch.push('\n');
arch.push_str(&apr_late.render());
arch.push_str(&apr_early.render());
fs::write(dir.join("2026-04.md"), arch).unwrap();
let got = Log::since(&store, ts(2026, 4, 15, 0, 0)).unwrap();
let stamps: std::collections::BTreeSet<_> = got.iter().map(|e| e.timestamp).collect();
assert_eq!(
stamps,
[ts(2026, 4, 20, 9, 0), ts(2026, 5, 2, 8, 0)]
.into_iter()
.collect(),
"since(mid-April) must include the out-of-order later April entry \
and the May entry, and exclude the earlier April entry; got {got:?}"
);
}
#[test]
fn multiline_note_is_preserved() {
let (_d, store) = temp_store();
let e = entry(
2026,
5,
27,
10,
0,
LogKind::Create,
Some("records/x"),
"Line one.\nLine two.\nLine three.",
);
Log::append(&store, &e).unwrap();
let got = Log::tail(&store, 1).unwrap();
assert_eq!(got[0].note, "Line one.\nLine two.\nLine three.");
}
#[test]
fn empty_note_roundtrips_as_empty() {
let (_d, store) = temp_store();
let e = entry(2026, 5, 27, 10, 0, LogKind::Validate, None, "");
Log::append(&store, &e).unwrap();
let got = Log::tail(&store, 1).unwrap();
assert_eq!(got[0], e);
assert_eq!(got[0].note, "");
}
#[test]
fn last_validate_at_finds_most_recent_validate() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 5, 27, 10, 0, LogKind::Validate, None, "first pass"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 27, 10, 5, LogKind::Create, Some("a"), "made a"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 27, 10, 10, LogKind::Validate, None, "second pass"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 27, 10, 15, LogKind::Update, Some("a"), "edit a"),
)
.unwrap();
let last = Log::last_validate_at(&store).unwrap();
assert_eq!(last, Some(ts(2026, 5, 27, 10, 10)));
}
#[test]
fn last_validate_at_none_when_no_validate() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 5, 27, 10, 0, LogKind::Create, Some("a"), "x"),
)
.unwrap();
assert_eq!(Log::last_validate_at(&store).unwrap(), None);
}
#[test]
fn rotation_rolls_prior_months_into_archives() {
let (_d, store) = temp_store();
let a1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr one");
let a2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr two");
Log::append(&store, &a1).unwrap();
Log::append(&store, &a2).unwrap();
assert!(!store.root.join("log").exists());
let m1 = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may one");
Log::append(&store, &m1).unwrap();
let arch_path = store.root.join("log").join("2026-04.md");
assert!(arch_path.exists(), "expected April archive to be created");
let arch = fs::read_to_string(&arch_path).unwrap();
assert!(arch.starts_with("---\ntype: log\n---\n"));
assert!(arch.contains("## [2026-04-10 09:00] ingest | apr-a"));
assert!(arch.contains("## [2026-04-20 09:00] create | apr-b"));
assert!(arch.contains("apr one"));
assert!(arch.contains("apr two"));
let active = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(active.contains("## [2026-05-02 08:00] update | may-a"));
assert!(
!active.contains("apr-a") && !active.contains("apr-b"),
"April entries must be gone from the active file; got:\n{active}"
);
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![a1, a2, m1]);
}
#[test]
fn rotation_groups_distinct_prior_months_into_separate_archives() {
let (_d, store) = temp_store();
let mar = entry(2026, 3, 5, 9, 0, LogKind::Ingest, Some("mar"), "march");
let apr = entry(2026, 4, 5, 9, 0, LogKind::Create, Some("apr"), "april");
Log::append(&store, &mar).unwrap();
Log::append(&store, &apr).unwrap();
assert!(store.root.join("log").join("2026-03.md").exists());
let may = entry(2026, 5, 5, 9, 0, LogKind::Update, Some("may"), "may");
Log::append(&store, &may).unwrap();
assert!(store.root.join("log").join("2026-03.md").exists());
assert!(store.root.join("log").join("2026-04.md").exists());
let mar_arch = fs::read_to_string(store.root.join("log").join("2026-03.md")).unwrap();
let apr_arch = fs::read_to_string(store.root.join("log").join("2026-04.md")).unwrap();
assert!(mar_arch.contains("mar") && !mar_arch.contains("apr"));
assert!(apr_arch.contains("apr") && !apr_arch.contains("mar"));
let active = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(active.contains("may") && !active.contains("mar") && !active.contains("apr"));
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![mar, apr, may]);
}
#[test]
fn tail_crosses_into_archive_when_n_spans_month_boundary() {
let (_d, store) = temp_store();
let a1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr1");
let a2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr2");
let m1 = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may1");
let m2 = entry(2026, 5, 3, 8, 0, LogKind::Update, Some("may-b"), "may2");
for e in [&a1, &a2, &m1, &m2] {
Log::append(&store, e).unwrap();
}
let tail3 = Log::tail(&store, 3).unwrap();
assert_eq!(tail3, vec![a2.clone(), m1.clone(), m2.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(tail2, vec![m1, m2]);
}
#[test]
fn since_crosses_into_archive_and_early_stops() {
let (_d, store) = temp_store();
let a1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr1");
let a2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr2");
let m1 = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may1");
for e in [&a1, &a2, &m1] {
Log::append(&store, e).unwrap();
}
let got = Log::since(&store, ts(2026, 4, 15, 0, 0)).unwrap();
assert_eq!(got, vec![a2, m1]);
}
#[test]
fn last_validate_at_crosses_into_archive() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 4, 10, 9, 0, LogKind::Validate, None, "apr validate"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may work"),
)
.unwrap();
let last = Log::last_validate_at(&store).unwrap();
assert_eq!(last, Some(ts(2026, 4, 10, 9, 0)));
}
#[test]
fn reverse_read_correct_on_large_single_month_log() {
let (_d, store) = temp_store();
let n = 400usize;
let mut expected: Vec<LogEntry> = Vec::new();
for i in 0..n {
let total_min = (i as u32) * 3;
let day = 1 + total_min / (24 * 60);
let hour = (total_min / 60) % 24;
let min = total_min % 60;
let note = format!(
"entry number {i}\nbody line A for {i}\nbody line B for {i} with padding {}",
"x".repeat(40)
);
let e = entry(
2026,
6,
day,
hour,
min,
LogKind::Update,
Some(&format!("records/item-{i:04}")),
¬e,
);
Log::append(&store, &e).unwrap();
expected.push(e);
}
let size = fs::metadata(store.root.join("log.md")).unwrap().len();
assert!(
size > (REVERSE_BLOCK as u64) * 2,
"test log not large enough ({size} bytes) to exercise multi-block reverse-read"
);
let tail5 = Log::tail(&store, 5).unwrap();
assert_eq!(tail5, expected[n - 5..].to_vec());
let tail50 = Log::tail(&store, 50).unwrap();
assert_eq!(tail50, expected[n - 50..].to_vec());
let all = Log::tail(&store, n + 10).unwrap();
assert_eq!(all.len(), n);
assert_eq!(all, expected);
}
fn write_log_physical(store: &Store, entries: &[LogEntry]) {
let mut body = String::new();
for e in entries {
body.push_str(&e.render());
}
let full = compose_active(LOG_FRONTMATTER, &body);
fs::write(store.root.join("log.md"), full).expect("write log.md");
}
#[test]
fn tail_returns_newest_by_timestamp_on_demonstrated_out_of_order_log() {
let (_d, store) = temp_store();
let e_1010 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("c"), "ten-ten");
let e_1005 = entry(
2026,
5,
27,
10,
5,
LogKind::Create,
Some("b"),
"ten-oh-five",
);
let e_1000 = entry(2026, 5, 27, 10, 0, LogKind::Ingest, Some("a"), "ten-oh-oh");
write_log_physical(&store, &[e_1010.clone(), e_1005.clone(), e_1000.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(
tail2,
vec![e_1005.clone(), e_1010.clone()],
"tail(2) must be the two NEWEST by timestamp (chronological), \
not the last two physical entries"
);
assert!(tail2.contains(&e_1010), "newest (10:10) must be included");
assert!(!tail2.contains(&e_1000), "oldest (10:00) must be excluded");
assert_eq!(Log::tail(&store, 1).unwrap(), vec![e_1010.clone()]);
assert_eq!(Log::tail(&store, 99).unwrap(), vec![e_1000, e_1005, e_1010]);
}
#[test]
fn tail_no_early_stop_when_newer_entry_sits_before_an_older_one() {
let (_d, store) = temp_store();
let e55 = entry(2026, 5, 27, 10, 55, LogKind::Update, Some("x55"), "55");
let e10 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("x10"), "10");
let e50 = entry(2026, 5, 27, 10, 50, LogKind::Update, Some("x50"), "50");
let e00 = entry(2026, 5, 27, 10, 0, LogKind::Update, Some("x00"), "00");
write_log_physical(
&store,
&[e55.clone(), e10.clone(), e50.clone(), e00.clone()],
);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(tail2, vec![e50.clone(), e55.clone()]);
let tail3 = Log::tail(&store, 3).unwrap();
assert_eq!(tail3, vec![e10.clone(), e50.clone(), e55.clone()]);
}
#[test]
fn tail_orders_equal_timestamps_by_physical_recency() {
let (_d, store) = temp_store();
let early = entry(2026, 5, 27, 9, 59, LogKind::Create, Some("early"), "before");
let tie_a = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("tie-a"),
"first 10:00",
);
let tie_b = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("tie-b"),
"second 10:00",
);
write_log_physical(&store, &[early.clone(), tie_a.clone(), tie_b.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(
tail2,
vec![tie_a.clone(), tie_b.clone()],
"both 10:00 entries kept, physically-later one (tie_b) last; 09:59 dropped"
);
assert_eq!(Log::tail(&store, 1).unwrap(), vec![tie_b]);
}
#[test]
fn tail_finds_newest_across_a_backdated_entry_spanning_the_month_boundary() {
let (_d, store) = temp_store();
let may1 = entry(2026, 5, 10, 9, 0, LogKind::Ingest, Some("may-1"), "may one");
let may2 = entry(2026, 5, 20, 9, 0, LogKind::Create, Some("may-2"), "may two");
let jun1 = entry(2026, 6, 2, 8, 0, LogKind::Update, Some("jun-1"), "jun one");
Log::append(&store, &may1).unwrap();
Log::append(&store, &may2).unwrap();
Log::append(&store, &jun1).unwrap(); assert!(store.root.join("log").join("2026-05.md").exists());
let may_corr = entry(
2026,
5,
25,
9,
0,
LogKind::Update,
Some("may-2"),
"may correction",
);
Log::append(&store, &may_corr).unwrap();
let active = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(
active.contains("jun-1") && active.contains("may correction"),
"backdated May entry should be in the active file alongside June; got:\n{active}"
);
assert_eq!(Log::tail(&store, 1).unwrap(), vec![jun1.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(tail2, vec![may_corr.clone(), jun1.clone()]);
let tail3 = Log::tail(&store, 3).unwrap();
assert_eq!(tail3, vec![may2.clone(), may_corr.clone(), jun1.clone()]);
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![may1, may2, may_corr, jun1]);
}
#[test]
fn parse_entries_skips_unparseable_header_folding_into_body() {
let text = "\
## [2026-05-27 10:00] create | records/x
Body mentions a literal: ## [not a real header here]
More body.
## [2026-05-27 10:05] update | records/y
Second.
";
let entries = parse_entries(text);
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].kind, LogKind::Create);
assert!(entries[0].note.contains("## [not a real header here]"));
assert!(entries[0].note.contains("More body."));
assert_eq!(entries[1].kind, LogKind::Update);
assert_eq!(entries[1].note, "Second.");
}
#[test]
fn append_only_corrective_entry_goes_on_end_without_rewriting() {
let (_d, store) = temp_store();
let original = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("records/northstar"),
"Seat count 120 -> 175.",
);
Log::append(&store, &original).unwrap();
let after_first = fs::read_to_string(store.root.join("log.md")).unwrap();
let correction = entry(
2026,
5,
27,
11,
0,
LogKind::Update,
Some("records/northstar"),
"Correction: seat count is 165, not 175.",
);
Log::append(&store, &correction).unwrap();
let after_second = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(
after_second.starts_with(&after_first),
"appending must not rewrite earlier bytes"
);
assert!(after_second.contains("Correction: seat count is 165, not 175."));
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![original, correction]);
}
#[test]
fn concurrent_appends_are_atomic_and_total() {
use std::sync::{Arc, Barrier};
use std::thread;
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 7, 1, 0, 0, LogKind::Create, Some("seed"), "seed"),
)
.unwrap();
let threads = 8usize;
let per = 25usize;
let barrier = Arc::new(Barrier::new(threads));
let store = Arc::new(store);
let mut handles = Vec::new();
for tnum in 0..threads {
let b = Arc::clone(&barrier);
let s = Arc::clone(&store);
handles.push(thread::spawn(move || {
b.wait();
for i in 0..per {
let e = entry(
2026,
7,
1,
(tnum % 24) as u32,
(i % 60) as u32,
LogKind::Update,
Some(&format!("t{tnum}-i{i}")),
&format!("thread {tnum} item {i}"),
);
Log::append(&s, &e).unwrap();
}
}));
}
for h in handles {
h.join().unwrap();
}
let content = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(content.starts_with("---\ntype: log\n---\n"));
for line in content.lines() {
if line.starts_with("## [") {
assert!(
Log::parse_header(line).is_some(),
"corrupt/torn header line on disk: {line:?}"
);
}
}
assert!(content.contains("## [2026-07-01 00:00] create | seed"));
let all = Log::tail(&store, 10_000).unwrap();
assert!(!all.is_empty());
for e in &all {
let rendered = e.render();
let reparsed = parse_single_entry(&rendered).unwrap();
assert_eq!(&reparsed, e);
}
}
#[test]
fn render_then_parse_is_identity() {
let cases = vec![
entry(
2026,
1,
2,
3,
4,
LogKind::Ingest,
Some("sources/a.eml"),
"n",
),
entry(
2026,
12,
31,
23,
59,
LogKind::Validate,
None,
"PASS - 0 errors",
),
entry(
2026,
6,
15,
12,
30,
LogKind::Custom("proposal".to_string()),
Some("records/p"),
"multi\nline\nnote",
),
entry(2026, 6, 15, 12, 30, LogKind::Contradiction, Some("obj"), ""),
];
for e in cases {
let rendered = e.render();
let parsed = parse_single_entry(&rendered).unwrap_or_else(|| {
panic!("failed to reparse rendered entry:\n{rendered}");
});
assert_eq!(parsed, e, "round-trip mismatch for {e:?}");
}
}
fn count_occurrences(haystack: &str, needle: &str) -> usize {
haystack.matches(needle).count()
}
#[test]
fn regression_archive_reroll_is_idempotent_after_interrupted_rotation() {
let (_d, store) = temp_store();
let dir = archive_dir(&store);
let arch = archive_path(&store, 2026, 4);
let apr1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr one");
let apr2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr two");
let month = [apr1.clone(), apr2.clone()];
fs::create_dir_all(&dir).unwrap();
append_to_archive(&arch, &month, false).unwrap();
append_to_archive(&arch, &month, true).unwrap();
append_to_archive(&arch, &month, true).unwrap();
let archived = fs::read_to_string(&arch).unwrap();
assert_eq!(
count_occurrences(&archived, "## [2026-04-10 09:00] ingest | apr-a"),
1,
"re-rolled archive duplicated the first April entry; got:\n{archived}"
);
assert_eq!(
count_occurrences(&archived, "## [2026-04-20 09:00] create | apr-b"),
1,
"re-rolled archive duplicated the second April entry; got:\n{archived}"
);
let got = Log::since(&store, ts(2026, 4, 1, 0, 0)).unwrap();
assert_eq!(
got,
vec![apr1, apr2],
"since over the re-rolled archive must return each April entry once"
);
}
#[test]
fn regression_rotation_reroll_after_active_untrimmed_does_not_duplicate() {
let (_d, store) = temp_store();
let apr1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr one");
let apr2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr two");
Log::append(&store, &apr1).unwrap();
Log::append(&store, &apr2).unwrap();
let active_path = active_log_path(&store);
let pre_rotation_active = fs::read_to_string(&active_path).unwrap();
let may = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may one");
Log::append(&store, &may).unwrap();
let arch = archive_path(&store, 2026, 4);
assert!(arch.exists(), "April should have rotated to its archive");
fs::write(&active_path, &pre_rotation_active).unwrap();
fs::write(rotation_marker_path(&store), b"").unwrap();
let may2 = entry(2026, 5, 3, 8, 0, LogKind::Update, Some("may-b"), "may two");
Log::append(&store, &may2).unwrap();
let archived = fs::read_to_string(&arch).unwrap();
assert_eq!(
count_occurrences(&archived, "## [2026-04-10 09:00] ingest | apr-a"),
1,
"retried rotation duplicated an April entry in the archive; got:\n{archived}"
);
assert_eq!(
count_occurrences(&archived, "## [2026-04-20 09:00] create | apr-b"),
1,
"retried rotation duplicated an April entry in the archive; got:\n{archived}"
);
}
#[test]
fn regression_distinct_same_minute_entries_both_survive_rotation() {
let (_d, store) = temp_store();
let apr1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("x"), "dup");
let apr2 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("x"), "dup");
Log::append(&store, &apr1).unwrap();
Log::append(
&store,
&entry(2026, 5, 2, 8, 0, LogKind::Ingest, Some("may"), "m"),
)
.unwrap();
Log::append(&store, &apr2).unwrap();
Log::append(
&store,
&entry(2026, 6, 1, 8, 0, LogKind::Ingest, Some("jun"), "j"),
)
.unwrap();
let archived = fs::read_to_string(archive_path(&store, 2026, 4)).unwrap();
assert_eq!(
count_occurrences(&archived, "## [2026-04-10 09:00] ingest | x"),
2,
"two distinct same-minute April appends must BOTH survive rotation; got:\n{archived}"
);
let got = Log::since(&store, ts(2026, 4, 1, 0, 0)).unwrap();
let dups = got
.iter()
.filter(|e| e.object.as_deref() == Some("x"))
.count();
assert_eq!(
dups, 2,
"since must return both distinct same-minute entries; got {got:#?}"
);
}
#[test]
fn regression_tail_since_return_distinct_same_minute_active_entries() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 6, 10, 9, 0, LogKind::Ingest, Some("x"), "dup"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 6, 10, 9, 0, LogKind::Ingest, Some("x"), "dup"),
)
.unwrap();
let tail = Log::tail(&store, 20).unwrap();
assert_eq!(
tail.len(),
2,
"tail must return both same-minute active entries; got {tail:#?}"
);
let since = Log::since(&store, ts(2026, 6, 1, 0, 0)).unwrap();
assert_eq!(
since.len(),
2,
"since must return both same-minute active entries; got {since:#?}"
);
}
#[test]
fn regression_rotation_preserves_lines_before_first_valid_header() {
let (_d, store) = temp_store();
let active = active_log_path(&store);
let content = "---\ntype: log\n---\n\n## [orphan from a merge] stray text\n## [2026-04-10 09:00] ingest | x\nbody line\n";
fs::write(&active, content).unwrap();
Log::append(
&store,
&entry(2026, 6, 1, 8, 0, LogKind::Ingest, Some("jun"), "j"),
)
.unwrap();
let active_after = fs::read_to_string(&active).unwrap();
let arch_after = fs::read_to_string(archive_path(&store, 2026, 4)).unwrap_or_default();
assert!(
active_after.contains("orphan from a merge") || arch_after.contains("orphan from a merge"),
"the pre-first-valid-header line was erased by rotation;\nactive:\n{active_after}\narchive:\n{arch_after}"
);
assert!(
arch_after.contains("## [2026-04-10 09:00] ingest | x"),
"the valid April entry must still rotate to its archive; got:\n{arch_after}"
);
}
#[test]
fn regression_reverse_reader_preserves_note_line_starting_with_bracket_header() {
let (_d, store) = temp_store();
let multi = "First line.\n## [draft outline] more\nThird line.";
let e = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("records/x"),
multi,
);
write_raw_log(&store, std::slice::from_ref(&e));
let got = Log::tail(&store, 1).unwrap();
assert_eq!(got.len(), 1, "the single entry must be returned");
assert_eq!(
got[0].note, multi,
"reverse reader truncated the note at the `## [` continuation line; \
got {:?}",
got[0].note
);
assert_eq!(got[0], e, "the whole entry must round-trip through tail");
let since = Log::since(&store, ts(2026, 5, 27, 9, 0)).unwrap();
assert_eq!(since, vec![e]);
}
fn ts_offset(
y: i32,
mo: u32,
d: u32,
h: u32,
mi: u32,
offset_hours: i32,
) -> DateTime<FixedOffset> {
let naive = chrono::NaiveDate::from_ymd_opt(y, mo, d)
.unwrap()
.and_hms_opt(h, mi, 0)
.unwrap();
FixedOffset::east_opt(offset_hours * 3600)
.unwrap()
.from_local_datetime(&naive)
.single()
.unwrap()
}
#[test]
fn regression_since_prunes_archives_on_utc_month_not_local_offset_month() {
let (_d, store) = temp_store();
let apr = entry(
2026,
4,
30,
18,
0,
LogKind::Update,
Some("apr-late"),
"april late",
);
let dir = archive_dir(&store);
fs::create_dir_all(&dir).unwrap();
let mut arch = String::from(LOG_FRONTMATTER);
arch.push('\n');
arch.push_str(&apr.render());
fs::write(archive_path(&store, 2026, 4), arch).unwrap();
let may = entry(2026, 5, 5, 8, 0, LogKind::Update, Some("may-a"), "may one");
write_raw_log(&store, std::slice::from_ref(&may));
let cutoff = ts_offset(2026, 5, 1, 0, 30, 7);
assert_eq!((cutoff.year(), cutoff.month()), (2026, 5));
assert_eq!(
(
cutoff.with_timezone(&Utc).year(),
cutoff.with_timezone(&Utc).month()
),
(2026, 4)
);
let got = Log::since(&store, cutoff).unwrap();
let stamps: std::collections::BTreeSet<_> = got.iter().map(|e| e.timestamp).collect();
assert_eq!(
stamps,
[ts(2026, 4, 30, 18, 0), ts(2026, 5, 5, 8, 0)]
.into_iter()
.collect(),
"since(non-UTC cutoff near a month boundary) must include the April \
archive entry newer than the cutoff instant; got {got:?}"
);
}
#[test]
fn note_line_shaped_like_a_header_is_escaped_and_round_trips() {
let (_d, store) = temp_store();
let note = "quoting earlier entry:\n## [2020-01-01 00:00] delete | records/contacts/jane.md\nend of quote";
let e = entry(
2026,
6,
11,
4,
41,
LogKind::Contradiction,
Some("records/contacts/jane.md"),
note,
);
Log::append(&store, &e).unwrap();
let raw = fs::read_to_string(store.root.join("log.md")).unwrap();
let header_lines = raw.lines().filter(|l| l.starts_with("## [")).count();
assert_eq!(
header_lines, 1,
"exactly one real entry header may sit at column 0; got:\n{raw}"
);
let got = Log::tail(&store, 10).unwrap();
assert_eq!(got.len(), 1, "exactly one entry; got {got:?}");
assert_eq!(got[0].note, note, "note must round-trip verbatim");
assert_eq!(got[0], e);
let since = Log::since(&store, ts(2026, 1, 1, 0, 0)).unwrap();
assert_eq!(since, vec![e.clone()]);
}
#[test]
fn header_shaped_note_survives_a_later_rotation_uncorrupted() {
let (_d, store) = temp_store();
let note = "see\n## [2020-01-01 00:00] delete | records/x.md\nbelow";
let first = entry(
2026,
6,
11,
4,
41,
LogKind::Contradiction,
Some("records/x.md"),
note,
);
Log::append(&store, &first).unwrap();
let second = entry(
2026,
6,
11,
5,
0,
LogKind::Update,
Some("records/y.md"),
"y",
);
Log::append(&store, &second).unwrap();
assert!(
!store.root.join("log").join("2020-01.md").exists(),
"a header-shaped note line must not fabricate a 2020 archive"
);
let got = Log::tail(&store, 10).unwrap();
assert_eq!(got.len(), 2, "two real entries only; got {got:?}");
let first_back = got
.iter()
.find(|e| e.object.as_deref() == Some("records/x.md"));
assert_eq!(
first_back.map(|e| e.note.as_str()),
Some(note),
"the header-shaped note must survive the rotation pass intact"
);
}
#[test]
fn escape_unescape_note_line_round_trips_including_literal_backslash() {
let valid_header = "## [2020-01-01 00:00] delete | x";
assert_eq!(
&*escape_note_line(valid_header),
&format!("\\{valid_header}")
);
let escaped = escape_note_line(valid_header).into_owned();
assert_eq!(&*unescape_note_line(&escaped), valid_header);
let pre = format!("\\{valid_header}");
assert_eq!(&*escape_note_line(&pre), &format!("\\{pre}"));
let pre_escaped = escape_note_line(&pre).into_owned();
assert_eq!(&*unescape_note_line(&pre_escaped), &pre);
for plain in ["plain note", "## [not a header]", "\\not a header", ""] {
assert_eq!(&*escape_note_line(plain), plain);
assert_eq!(&*unescape_note_line(plain), plain);
}
}
#[test]
fn reverse_read_correct_with_header_straddling_a_block_boundary() {
let (_d, store) = temp_store();
let n = 600usize;
let mut expected: Vec<LogEntry> = Vec::new();
for i in 0..n {
let total_min = (i as u32) * 2;
let day = 1 + total_min / (24 * 60);
let hour = (total_min / 60) % 24;
let min = total_min % 60;
let note = format!("note {i} {}", "y".repeat(i % 97));
let e = entry(
2026,
6,
day,
hour,
min,
LogKind::Update,
Some(&format!("records/item-{i:05}")),
¬e,
);
Log::append(&store, &e).unwrap();
expected.push(e);
}
let size = fs::metadata(store.root.join("log.md")).unwrap().len();
assert!(
size > (REVERSE_BLOCK as u64) * 3,
"test log not large enough ({size} bytes) to cross several blocks"
);
let all = Log::tail(&store, n + 10).unwrap();
assert_eq!(all, expected, "every entry must reconstruct across blocks");
assert_eq!(Log::tail(&store, 7).unwrap(), expected[n - 7..].to_vec());
}
#[test]
fn header_offsets_range_finds_boundary_straddling_marker_once() {
let buf =
b"## [2026-06-01 00:00] update | a\nnote a\n## [2026-06-01 00:01] update | b\nnote b\n";
let full = header_offsets(buf, 0);
assert_eq!(full.len(), 2, "both headers found over the whole buffer");
let second = full[1] as usize;
let only_second = header_offsets_range(buf, 0, second, second + 1, false);
assert_eq!(only_second, vec![full[1]]);
let only_first = header_offsets_range(buf, 0, 0, 1, true);
assert_eq!(only_first, vec![full[0]]);
let mut combined = header_offsets_range(buf, 0, 0, second, true);
combined.extend(header_offsets_range(buf, 0, second, buf.len(), false));
assert_eq!(combined, full);
}
#[test]
fn reverse_read_does_not_fabricate_entry_from_midline_header_at_block_boundary() {
let (_d, store) = temp_store();
let fragment = "see ## [2020-01-01 00:00] delete | records/x.md";
let hash_in_fragment = fragment.find("##").expect("fragment has `##`");
let header_line = "## [2026-06-14 10:00] update | records/real.md\n";
let mut head = String::from(LOG_FRONTMATTER);
head.push('\n');
head.push_str(header_line);
head.push_str("lead\n");
head.push_str(fragment);
let hash_off = head.len() - fragment.len() + hash_in_fragment;
let after_hash_in_head = head.len() - hash_off;
let tail_len = REVERSE_BLOCK
.checked_sub(after_hash_in_head + 2)
.expect("REVERSE_BLOCK comfortably exceeds the post-`#` head bytes");
let mut body = head;
body.push_str(&"z".repeat(tail_len)); body.push('\n');
body.push('\n');
fs::write(store.root.join("log.md"), &body).unwrap();
assert!(
body.len() as u64 > REVERSE_BLOCK as u64,
"test log must span >1 block (len {})",
body.len()
);
let real_hash_off = body.find("see ##").unwrap() + hash_in_fragment;
assert_eq!(
real_hash_off,
body.len() - REVERSE_BLOCK,
"fragment `#` must land on the first backward block's left edge to exercise the bug"
);
let got = Log::tail(&store, 10).unwrap();
assert_eq!(
got.len(),
1,
"exactly the one real entry; got {} (a fabricated entry means the boundary `#` was mis-read as a header): {got:#?}",
got.len()
);
let only = &got[0];
assert_eq!(only.object.as_deref(), Some("records/real.md"));
assert_eq!(only.timestamp, ts(2026, 6, 14, 10, 0));
assert!(
only.note.contains("lead"),
"note keeps its lead; got {:?}",
only.note
);
assert!(
only.note.contains(fragment),
"note keeps the verbatim mid-line fragment (not truncated); got {:?}",
only.note
);
}
#[test]
fn tail_and_since_dedup_entries_present_in_both_active_and_archive() {
let (_d, store) = temp_store();
let apr_a = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr one");
let apr_b = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr two");
write_raw_log(&store, &[apr_a.clone(), apr_b.clone()]);
let dir = archive_dir(&store);
fs::create_dir_all(&dir).unwrap();
let mut arch = String::from(LOG_FRONTMATTER);
arch.push('\n');
arch.push_str(&apr_a.render());
arch.push_str(&apr_b.render());
fs::write(archive_path(&store, 2026, 4), arch).unwrap();
let since = Log::since(&store, ts(2026, 4, 1, 0, 0)).unwrap();
assert_eq!(
since,
vec![apr_a.clone(), apr_b.clone()],
"since must dedup the doubly-present entries; got {since:?}"
);
let tail = Log::tail(&store, 10).unwrap();
assert_eq!(
tail,
vec![apr_a, apr_b],
"tail must dedup the doubly-present entries; got {tail:?}"
);
}
}