use std::collections::BTreeMap;
use std::fs::{self, File};
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use chrono::{DateTime, Datelike, FixedOffset, NaiveDateTime, TimeZone};
use crate::store::Store;
const TS_FORMAT: &str = "%Y-%m-%d %H:%M";
const LOG_FRONTMATTER: &str = "---\ntype: log\n---\n\n# Curator log\n";
const REVERSE_BLOCK: usize = 8 * 1024;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LogKind {
Ingest,
Create,
Update,
Delete,
Rename,
Link,
Validate,
IndexRebuild,
Contradiction,
Custom(String),
}
impl LogKind {
pub fn as_str(&self) -> &str {
match self {
LogKind::Ingest => "ingest",
LogKind::Create => "create",
LogKind::Update => "update",
LogKind::Delete => "delete",
LogKind::Rename => "rename",
LogKind::Link => "link",
LogKind::Validate => "validate",
LogKind::IndexRebuild => "index-rebuild",
LogKind::Contradiction => "contradiction",
LogKind::Custom(s) => s,
}
}
pub fn parse(token: &str) -> LogKind {
match token {
"ingest" => LogKind::Ingest,
"create" => LogKind::Create,
"update" => LogKind::Update,
"delete" => LogKind::Delete,
"rename" => LogKind::Rename,
"link" => LogKind::Link,
"validate" => LogKind::Validate,
"index-rebuild" => LogKind::IndexRebuild,
"contradiction" => LogKind::Contradiction,
other => LogKind::Custom(other.to_string()),
}
}
pub fn is_recognized(&self) -> bool {
!matches!(self, LogKind::Custom(_))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LogEntry {
pub timestamp: DateTime<FixedOffset>,
pub kind: LogKind,
pub object: Option<String>,
pub note: String,
}
impl LogEntry {
fn render(&self) -> String {
let ts = self.timestamp.format(TS_FORMAT);
let mut out = String::new();
match &self.object {
Some(obj) => {
out.push_str(&format!("## [{}] {} | {}\n", ts, self.kind.as_str(), obj));
}
None => {
out.push_str(&format!("## [{}] {}\n", ts, self.kind.as_str()));
}
}
let note = self.note.trim_end_matches(['\n', '\r', ' ', '\t']);
if !note.is_empty() {
out.push_str(note);
out.push('\n');
}
out.push('\n');
out
}
fn year_month(&self) -> (i32, u32) {
(self.timestamp.year(), self.timestamp.month())
}
}
#[derive(Debug, Clone)]
pub struct Log;
impl Log {
pub fn append(store: &Store, entry: &LogEntry) -> crate::Result<()> {
let active = active_log_path(store);
let current_ym = entry.year_month();
if active.exists() {
let content = fs::read_to_string(&active)?;
let (header, entries) = parse_active(&content);
let mut by_month: BTreeMap<(i32, u32), Vec<LogEntry>> = BTreeMap::new();
let mut keep: Vec<LogEntry> = Vec::new();
for e in entries {
if e.year_month() < current_ym {
by_month.entry(e.year_month()).or_default().push(e);
} else {
keep.push(e);
}
}
if !by_month.is_empty() {
let dir = archive_dir(store);
fs::create_dir_all(&dir)?;
for ((y, m), month_entries) in &by_month {
let path = archive_path(store, *y, *m);
append_to_archive(&path, month_entries)?;
}
let mut body = String::new();
for e in &keep {
body.push_str(&e.render());
}
body.push_str(&entry.render());
let full = compose_active(&header, &body);
write_atomic(&active, full.as_bytes())?;
return Ok(());
}
let mut full = content;
if !full.ends_with('\n') {
full.push('\n');
}
full.push_str(&entry.render());
write_atomic(&active, full.as_bytes())?;
Ok(())
} else {
if let Some(parent) = active.parent() {
fs::create_dir_all(parent)?;
}
let body = entry.render();
let full = compose_active(LOG_FRONTMATTER, &body);
write_atomic(&active, full.as_bytes())?;
Ok(())
}
}
pub fn tail(store: &Store, n: usize) -> crate::Result<Vec<LogEntry>> {
if n == 0 {
return Ok(Vec::new());
}
let mut window = NewestWindow::new(n);
let active = active_log_path(store);
if active.exists() {
reverse_collect(&active, |e| {
window.consider(e);
false
})?;
}
for archive in list_archives_desc(store)? {
if let (true, Some(cutoff_ym), Some(arch_ym)) = (
window.is_full(),
window.min_year_month(),
archive_year_month(&archive),
) {
if arch_ym < cutoff_ym {
break;
}
}
reverse_collect(&archive, |e| {
window.consider(e);
false
})?;
}
Ok(window.into_sorted())
}
pub fn since(store: &Store, time: DateTime<FixedOffset>) -> crate::Result<Vec<LogEntry>> {
let mut collected: Vec<LogEntry> = Vec::new();
let active = active_log_path(store);
if active.exists() {
reverse_collect(&active, |e| {
if e.timestamp > time {
collected.push(e);
}
false
})?;
}
let cutoff_ym = (time.year(), time.month());
for archive in list_archives_desc(store)? {
if let Some(arch_ym) = archive_year_month(&archive) {
if arch_ym < cutoff_ym {
break;
}
}
reverse_collect(&archive, |e| {
if e.timestamp > time {
collected.push(e);
}
false
})?;
}
collected.reverse();
Ok(collected)
}
pub fn last_validate_at(store: &Store) -> crate::Result<Option<DateTime<FixedOffset>>> {
let mut found: Option<DateTime<FixedOffset>> = None;
let active = active_log_path(store);
if active.exists() {
reverse_collect(&active, |e| {
if e.kind == LogKind::Validate {
found = Some(e.timestamp);
true
} else {
false
}
})?;
}
if found.is_none() {
for archive in list_archives_desc(store)? {
reverse_collect(&archive, |e| {
if e.kind == LogKind::Validate {
found = Some(e.timestamp);
true
} else {
false
}
})?;
if found.is_some() {
break;
}
}
}
Ok(found)
}
pub fn parse_header(line: &str) -> Option<(DateTime<FixedOffset>, LogKind, Option<String>)> {
let line = line.trim_end_matches(['\n', '\r']);
let rest = line.strip_prefix("## [")?;
let close = rest.find(']')?;
let ts_str = &rest[..close];
let timestamp = parse_timestamp(ts_str)?;
let after = rest[close + 1..].trim();
if after.is_empty() {
return None;
}
let (kind_str, object) = match after.split_once('|') {
Some((k, o)) => {
let obj = o.trim();
let obj = if obj.is_empty() {
None
} else {
Some(obj.to_string())
};
(k.trim(), obj)
}
None => (after, None),
};
if kind_str.is_empty() {
return None;
}
Some((timestamp, LogKind::parse(kind_str), object))
}
}
struct NewestWindow {
cap: usize,
heap: std::collections::BinaryHeap<WindowItem>,
next_arrival: u64,
}
impl NewestWindow {
fn new(cap: usize) -> Self {
NewestWindow {
cap,
heap: std::collections::BinaryHeap::with_capacity(cap),
next_arrival: 0,
}
}
fn consider(&mut self, entry: LogEntry) {
let arrival = self.next_arrival;
self.next_arrival += 1;
if self.heap.len() < self.cap {
self.heap.push(WindowItem { entry, arrival });
return;
}
let root = self.heap.peek().expect("full window has a root");
if entry.timestamp > root.entry.timestamp {
self.heap.pop();
self.heap.push(WindowItem { entry, arrival });
}
}
fn is_full(&self) -> bool {
self.heap.len() >= self.cap
}
fn min_year_month(&self) -> Option<(i32, u32)> {
self.heap
.peek()
.map(|item| (item.entry.timestamp.year(), item.entry.timestamp.month()))
}
fn into_sorted(self) -> Vec<LogEntry> {
let mut items: Vec<WindowItem> = self.heap.into_vec();
items.sort_by(|a, b| {
a.entry
.timestamp
.cmp(&b.entry.timestamp)
.then(b.arrival.cmp(&a.arrival))
});
items.into_iter().map(|i| i.entry).collect()
}
}
struct WindowItem {
entry: LogEntry,
arrival: u64,
}
impl PartialEq for WindowItem {
fn eq(&self, other: &Self) -> bool {
self.entry.timestamp == other.entry.timestamp && self.arrival == other.arrival
}
}
impl Eq for WindowItem {}
impl Ord for WindowItem {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other
.entry
.timestamp
.cmp(&self.entry.timestamp)
.then(self.arrival.cmp(&other.arrival))
}
}
impl PartialOrd for WindowItem {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
fn active_log_path(store: &Store) -> PathBuf {
store.root.join("log.md")
}
fn archive_dir(store: &Store) -> PathBuf {
store.root.join("log")
}
fn archive_path(store: &Store, year: i32, month: u32) -> PathBuf {
archive_dir(store).join(format!("{:04}-{:02}.md", year, month))
}
fn parse_timestamp(s: &str) -> Option<DateTime<FixedOffset>> {
let naive = NaiveDateTime::parse_from_str(s.trim(), TS_FORMAT).ok()?;
let utc = FixedOffset::east_opt(0)?;
utc.from_local_datetime(&naive).single()
}
fn parse_active(content: &str) -> (String, Vec<LogEntry>) {
match find_first_header(content) {
Some(idx) => {
let header = content[..idx].to_string();
let entries = parse_entries(&content[idx..]);
(header, entries)
}
None => (content.to_string(), Vec::new()),
}
}
fn find_first_header(content: &str) -> Option<usize> {
if content.starts_with("## [") {
return Some(0);
}
content.match_indices("\n## [").next().map(|(i, _)| i + 1)
}
fn parse_entries(text: &str) -> Vec<LogEntry> {
let mut entries: Vec<LogEntry> = Vec::new();
let mut cur_header: Option<(DateTime<FixedOffset>, LogKind, Option<String>)> = None;
let mut cur_note: Vec<&str> = Vec::new();
let flush = |entries: &mut Vec<LogEntry>,
header: &mut Option<(DateTime<FixedOffset>, LogKind, Option<String>)>,
note: &mut Vec<&str>| {
if let Some((timestamp, kind, object)) = header.take() {
let joined = note.join("\n");
let note_str = joined.trim_matches(['\n', '\r']).to_string();
entries.push(LogEntry {
timestamp,
kind,
object,
note: note_str,
});
}
note.clear();
};
for line in text.lines() {
if line.starts_with("## [") {
if let Some(parsed) = Log::parse_header(line) {
flush(&mut entries, &mut cur_header, &mut cur_note);
cur_header = Some(parsed);
continue;
}
}
if cur_header.is_some() {
cur_note.push(line);
}
}
flush(&mut entries, &mut cur_header, &mut cur_note);
entries
}
fn compose_active(header: &str, body: &str) -> String {
let mut out = String::new();
out.push_str(header);
if !header.is_empty() && !header.ends_with('\n') {
out.push('\n');
}
if !header.is_empty() && !out.ends_with("\n\n") {
out.push('\n');
}
out.push_str(body);
out
}
fn append_to_archive(path: &Path, entries: &[LogEntry]) -> crate::Result<()> {
let mut body = String::new();
for e in entries {
body.push_str(&e.render());
}
if path.exists() {
let existing = fs::read_to_string(path)?;
let mut full = existing;
if !full.ends_with('\n') {
full.push('\n');
}
full.push_str(&body);
write_atomic(path, full.as_bytes())?;
} else {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let full = compose_active(LOG_FRONTMATTER, &body);
write_atomic(path, full.as_bytes())?;
}
Ok(())
}
fn write_atomic(dest: &Path, bytes: &[u8]) -> crate::Result<()> {
let dir = dest.parent().unwrap_or_else(|| Path::new("."));
fs::create_dir_all(dir)?;
static TMP_SEQ: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
let pid = std::process::id();
let seq = TMP_SEQ.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
let file_name = dest
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("log.md");
let tmp = dir.join(format!(".{}.{}.{}.tmp", file_name, pid, seq));
{
let mut f = File::create(&tmp)?;
f.write_all(bytes)?;
f.sync_all()?;
}
match fs::rename(&tmp, dest) {
Ok(()) => Ok(()),
Err(e) => {
let _ = fs::remove_file(&tmp);
Err(e.into())
}
}
}
fn list_archives_desc(store: &Store) -> crate::Result<Vec<PathBuf>> {
let dir = archive_dir(store);
if !dir.is_dir() {
return Ok(Vec::new());
}
let mut months: Vec<(String, PathBuf)> = Vec::new();
for entry in fs::read_dir(&dir)? {
let entry = entry?;
let path = entry.path();
if !path.is_file() {
continue;
}
let name = match path.file_name().and_then(|s| s.to_str()) {
Some(n) => n,
None => continue,
};
if let Some(stem) = name.strip_suffix(".md") {
if is_year_month(stem) {
months.push((stem.to_string(), path.clone()));
}
}
}
months.sort_by(|a, b| b.0.cmp(&a.0));
Ok(months.into_iter().map(|(_, p)| p).collect())
}
fn archive_year_month(path: &Path) -> Option<(i32, u32)> {
let stem = path
.file_name()
.and_then(|s| s.to_str())
.and_then(|n| n.strip_suffix(".md"))?;
if !is_year_month(stem) {
return None;
}
let year: i32 = stem[..4].parse().ok()?;
let month: u32 = stem[5..7].parse().ok()?;
Some((year, month))
}
fn is_year_month(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() != 7 {
return false;
}
bytes[..4].iter().all(u8::is_ascii_digit)
&& bytes[4] == b'-'
&& bytes[5].is_ascii_digit()
&& bytes[6].is_ascii_digit()
}
fn reverse_collect<F>(path: &Path, mut take: F) -> crate::Result<()>
where
F: FnMut(LogEntry) -> bool,
{
let mut file = File::open(path)?;
let len = file.metadata()?.len();
if len == 0 {
return Ok(());
}
let mut buf: Vec<u8> = Vec::new();
let mut start = len;
let mut emitted_abs: Vec<u64> = Vec::new();
let mut stop = false;
while start > 0 && !stop {
let block = std::cmp::min(REVERSE_BLOCK as u64, start);
let new_start = start - block;
file.seek(SeekFrom::Start(new_start))?;
let mut chunk = vec![0u8; block as usize];
file.read_exact(&mut chunk)?;
chunk.extend_from_slice(&buf);
buf = chunk;
start = new_start;
let headers = header_offsets(&buf, start);
for i in (0..headers.len()).rev() {
let abs = headers[i];
if emitted_abs.contains(&abs) {
continue;
}
let is_oldest_in_buf = i == 0;
if is_oldest_in_buf && start > 0 {
continue;
}
let entry_text = entry_text_at(&buf, start, abs, &headers, i);
if let Some(entry) = parse_single_entry(&entry_text) {
emitted_abs.push(abs);
if take(entry) {
stop = true;
break;
}
} else {
emitted_abs.push(abs);
}
}
}
if !stop && start == 0 {
let headers = header_offsets(&buf, start);
for i in (0..headers.len()).rev() {
let abs = headers[i];
if emitted_abs.contains(&abs) {
continue;
}
let entry_text = entry_text_at(&buf, start, abs, &headers, i);
if let Some(entry) = parse_single_entry(&entry_text) {
emitted_abs.push(abs);
if take(entry) {
break;
}
} else {
emitted_abs.push(abs);
}
}
}
Ok(())
}
fn header_offsets(buf: &[u8], base: u64) -> Vec<u64> {
const PAT: &[u8] = b"## [";
let mut out = Vec::new();
let n = buf.len();
let mut i = 0;
while i + PAT.len() <= n {
if &buf[i..i + PAT.len()] == PAT {
let at_line_start = i == 0 || buf[i - 1] == b'\n';
if at_line_start {
out.push(base + i as u64);
i += PAT.len();
continue;
}
}
i += 1;
}
out
}
fn entry_text_at(buf: &[u8], base: u64, header_abs: u64, headers: &[u64], idx: usize) -> String {
let rel_start = (header_abs - base) as usize;
let rel_end = if idx + 1 < headers.len() {
(headers[idx + 1] - base) as usize
} else {
buf.len()
};
String::from_utf8_lossy(&buf[rel_start..rel_end]).into_owned()
}
fn parse_single_entry(text: &str) -> Option<LogEntry> {
parse_entries(text).into_iter().next()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Config;
use std::fs;
use tempfile::TempDir;
fn temp_store() -> (TempDir, Store) {
let dir = tempfile::tempdir().expect("tempdir");
fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").expect("write DB.md");
let store = Store {
root: dir.path().to_path_buf(),
config: Config::default(),
};
(dir, store)
}
fn ts(y: i32, mo: u32, d: u32, h: u32, mi: u32) -> DateTime<FixedOffset> {
let naive = chrono::NaiveDate::from_ymd_opt(y, mo, d)
.unwrap()
.and_hms_opt(h, mi, 0)
.unwrap();
FixedOffset::east_opt(0)
.unwrap()
.from_local_datetime(&naive)
.single()
.unwrap()
}
#[allow(clippy::too_many_arguments)] fn entry(
y: i32,
mo: u32,
d: u32,
h: u32,
mi: u32,
kind: LogKind,
object: Option<&str>,
note: &str,
) -> LogEntry {
LogEntry {
timestamp: ts(y, mo, d, h, mi),
kind,
object: object.map(|s| s.to_string()),
note: note.to_string(),
}
}
#[test]
fn parse_header_with_object() {
let (t, k, o) =
Log::parse_header("## [2026-05-27 10:00] ingest | sources/emails/x.eml").unwrap();
assert_eq!(t, ts(2026, 5, 27, 10, 0));
assert_eq!(k, LogKind::Ingest);
assert_eq!(o.as_deref(), Some("sources/emails/x.eml"));
}
#[test]
fn parse_header_without_object_is_none_object() {
let (t, k, o) = Log::parse_header("## [2026-05-27 10:20] validate").unwrap();
assert_eq!(t, ts(2026, 5, 27, 10, 20));
assert_eq!(k, LogKind::Validate);
assert_eq!(o, None);
}
#[test]
fn parse_header_custom_kind_roundtrips_token() {
let (_, k, o) = Log::parse_header("## [2026-05-27 10:00] proposal | records/x").unwrap();
assert_eq!(k, LogKind::Custom("proposal".to_string()));
assert!(!k.is_recognized());
assert_eq!(o.as_deref(), Some("records/x"));
}
#[test]
fn parse_header_index_rebuild_hyphenated_kind() {
let (_, k, _) = Log::parse_header("## [2026-05-27 10:00] index-rebuild").unwrap();
assert_eq!(k, LogKind::IndexRebuild);
assert_eq!(k.as_str(), "index-rebuild");
}
#[test]
fn parse_header_rejects_non_headers() {
assert!(Log::parse_header("Not a header").is_none());
assert!(Log::parse_header("# Curator log").is_none());
assert!(Log::parse_header("## [garbage] ingest | x").is_none());
assert!(Log::parse_header("## [2026-05-27 10:00]").is_none()); assert!(Log::parse_header("## [2026-13-40 99:99] ingest | x").is_none());
}
#[test]
fn kind_as_str_parse_roundtrip_for_all_recognized() {
for k in [
LogKind::Ingest,
LogKind::Create,
LogKind::Update,
LogKind::Delete,
LogKind::Rename,
LogKind::Link,
LogKind::Validate,
LogKind::IndexRebuild,
LogKind::Contradiction,
] {
assert_eq!(LogKind::parse(k.as_str()), k);
assert!(k.is_recognized());
}
}
#[test]
fn append_creates_log_with_frontmatter_and_entry() {
let (_d, store) = temp_store();
let e = entry(
2026,
5,
27,
10,
0,
LogKind::Ingest,
Some("sources/emails/x.eml"),
"Email received.",
);
Log::append(&store, &e).unwrap();
let content = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(
content.starts_with("---\ntype: log\n---\n"),
"missing log frontmatter; got:\n{content}"
);
assert!(content.contains("## [2026-05-27 10:00] ingest | sources/emails/x.eml"));
assert!(content.contains("Email received."));
assert!(!store.root.join("log").exists());
}
#[test]
fn append_tail_since_roundtrip() {
let (_d, store) = temp_store();
let e1 = entry(2026, 5, 27, 10, 0, LogKind::Ingest, Some("a"), "first");
let e2 = entry(2026, 5, 27, 10, 5, LogKind::Create, Some("b"), "second");
let e3 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("c"), "third");
Log::append(&store, &e1).unwrap();
Log::append(&store, &e2).unwrap();
Log::append(&store, &e3).unwrap();
let tail = Log::tail(&store, 2).unwrap();
assert_eq!(tail.len(), 2);
assert_eq!(tail[0], e2);
assert_eq!(tail[1], e3);
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![e1.clone(), e2.clone(), e3.clone()]);
let since = Log::since(&store, ts(2026, 5, 27, 10, 5)).unwrap();
assert_eq!(since, vec![e3.clone()]);
let since_all = Log::since(&store, ts(2026, 5, 27, 9, 0)).unwrap();
assert_eq!(since_all, vec![e1, e2, e3]);
}
#[test]
fn tail_zero_is_empty() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 5, 27, 10, 0, LogKind::Ingest, Some("a"), "x"),
)
.unwrap();
assert!(Log::tail(&store, 0).unwrap().is_empty());
}
#[test]
fn tail_and_since_on_missing_log_are_empty() {
let (_d, store) = temp_store();
assert!(Log::tail(&store, 5).unwrap().is_empty());
assert!(Log::since(&store, ts(2000, 1, 1, 0, 0)).unwrap().is_empty());
assert!(Log::last_validate_at(&store).unwrap().is_none());
}
#[test]
fn since_exact_timestamp_is_exclusive() {
let (_d, store) = temp_store();
let e = entry(2026, 5, 27, 10, 0, LogKind::Validate, None, "PASS");
Log::append(&store, &e).unwrap();
assert!(Log::since(&store, ts(2026, 5, 27, 10, 0))
.unwrap()
.is_empty());
}
fn write_raw_log(store: &Store, entries: &[LogEntry]) {
let mut content = String::from(LOG_FRONTMATTER);
content.push('\n');
for e in entries {
content.push_str(&e.render());
}
fs::write(store.root.join("log.md"), content).expect("write raw log.md");
}
#[test]
fn since_returns_newer_entries_even_when_disk_order_is_non_monotonic() {
let (_d, store) = temp_store();
let e_1010 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("c"), "newest");
let e_1005 = entry(2026, 5, 27, 10, 5, LogKind::Create, Some("b"), "middle");
let e_1000 = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("a"),
"backdated fix",
);
write_raw_log(&store, &[e_1010, e_1005, e_1000]);
let got = Log::since(&store, ts(2026, 5, 27, 10, 2)).unwrap();
let stamps: std::collections::BTreeSet<_> = got.iter().map(|e| e.timestamp).collect();
assert_eq!(
stamps,
[ts(2026, 5, 27, 10, 5), ts(2026, 5, 27, 10, 10)]
.into_iter()
.collect(),
"since(10:02) must include both 10:05 and 10:10 despite the backdated \
10:00 entry sitting physically last, and exclude 10:00; got {got:?}"
);
let all = Log::since(&store, ts(2026, 5, 27, 9, 0)).unwrap();
let all_stamps: std::collections::BTreeSet<_> = all.iter().map(|e| e.timestamp).collect();
assert_eq!(
all_stamps,
[
ts(2026, 5, 27, 10, 0),
ts(2026, 5, 27, 10, 5),
ts(2026, 5, 27, 10, 10),
]
.into_iter()
.collect()
);
}
#[test]
fn since_crosses_archive_when_newer_entry_is_out_of_order_inside_it() {
let (_d, store) = temp_store();
let may = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may1");
write_raw_log(&store, &[may]);
let apr_late = entry(
2026,
4,
20,
9,
0,
LogKind::Create,
Some("apr-b"),
"apr-late",
);
let apr_early = entry(
2026,
4,
5,
9,
0,
LogKind::Ingest,
Some("apr-a"),
"apr-early",
);
let dir = store.root.join("log");
fs::create_dir_all(&dir).unwrap();
let mut arch = String::from(LOG_FRONTMATTER);
arch.push('\n');
arch.push_str(&apr_late.render());
arch.push_str(&apr_early.render());
fs::write(dir.join("2026-04.md"), arch).unwrap();
let got = Log::since(&store, ts(2026, 4, 15, 0, 0)).unwrap();
let stamps: std::collections::BTreeSet<_> = got.iter().map(|e| e.timestamp).collect();
assert_eq!(
stamps,
[ts(2026, 4, 20, 9, 0), ts(2026, 5, 2, 8, 0)]
.into_iter()
.collect(),
"since(mid-April) must include the out-of-order later April entry \
and the May entry, and exclude the earlier April entry; got {got:?}"
);
}
#[test]
fn multiline_note_is_preserved() {
let (_d, store) = temp_store();
let e = entry(
2026,
5,
27,
10,
0,
LogKind::Create,
Some("records/x"),
"Line one.\nLine two.\nLine three.",
);
Log::append(&store, &e).unwrap();
let got = Log::tail(&store, 1).unwrap();
assert_eq!(got[0].note, "Line one.\nLine two.\nLine three.");
}
#[test]
fn empty_note_roundtrips_as_empty() {
let (_d, store) = temp_store();
let e = entry(2026, 5, 27, 10, 0, LogKind::Validate, None, "");
Log::append(&store, &e).unwrap();
let got = Log::tail(&store, 1).unwrap();
assert_eq!(got[0], e);
assert_eq!(got[0].note, "");
}
#[test]
fn last_validate_at_finds_most_recent_validate() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 5, 27, 10, 0, LogKind::Validate, None, "first pass"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 27, 10, 5, LogKind::Create, Some("a"), "made a"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 27, 10, 10, LogKind::Validate, None, "second pass"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 27, 10, 15, LogKind::Update, Some("a"), "edit a"),
)
.unwrap();
let last = Log::last_validate_at(&store).unwrap();
assert_eq!(last, Some(ts(2026, 5, 27, 10, 10)));
}
#[test]
fn last_validate_at_none_when_no_validate() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 5, 27, 10, 0, LogKind::Create, Some("a"), "x"),
)
.unwrap();
assert_eq!(Log::last_validate_at(&store).unwrap(), None);
}
#[test]
fn rotation_rolls_prior_months_into_archives() {
let (_d, store) = temp_store();
let a1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr one");
let a2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr two");
Log::append(&store, &a1).unwrap();
Log::append(&store, &a2).unwrap();
assert!(!store.root.join("log").exists());
let m1 = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may one");
Log::append(&store, &m1).unwrap();
let arch_path = store.root.join("log").join("2026-04.md");
assert!(arch_path.exists(), "expected April archive to be created");
let arch = fs::read_to_string(&arch_path).unwrap();
assert!(arch.starts_with("---\ntype: log\n---\n"));
assert!(arch.contains("## [2026-04-10 09:00] ingest | apr-a"));
assert!(arch.contains("## [2026-04-20 09:00] create | apr-b"));
assert!(arch.contains("apr one"));
assert!(arch.contains("apr two"));
let active = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(active.contains("## [2026-05-02 08:00] update | may-a"));
assert!(
!active.contains("apr-a") && !active.contains("apr-b"),
"April entries must be gone from the active file; got:\n{active}"
);
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![a1, a2, m1]);
}
#[test]
fn rotation_groups_distinct_prior_months_into_separate_archives() {
let (_d, store) = temp_store();
let mar = entry(2026, 3, 5, 9, 0, LogKind::Ingest, Some("mar"), "march");
let apr = entry(2026, 4, 5, 9, 0, LogKind::Create, Some("apr"), "april");
Log::append(&store, &mar).unwrap();
Log::append(&store, &apr).unwrap();
assert!(store.root.join("log").join("2026-03.md").exists());
let may = entry(2026, 5, 5, 9, 0, LogKind::Update, Some("may"), "may");
Log::append(&store, &may).unwrap();
assert!(store.root.join("log").join("2026-03.md").exists());
assert!(store.root.join("log").join("2026-04.md").exists());
let mar_arch = fs::read_to_string(store.root.join("log").join("2026-03.md")).unwrap();
let apr_arch = fs::read_to_string(store.root.join("log").join("2026-04.md")).unwrap();
assert!(mar_arch.contains("mar") && !mar_arch.contains("apr"));
assert!(apr_arch.contains("apr") && !apr_arch.contains("mar"));
let active = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(active.contains("may") && !active.contains("mar") && !active.contains("apr"));
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![mar, apr, may]);
}
#[test]
fn tail_crosses_into_archive_when_n_spans_month_boundary() {
let (_d, store) = temp_store();
let a1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr1");
let a2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr2");
let m1 = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may1");
let m2 = entry(2026, 5, 3, 8, 0, LogKind::Update, Some("may-b"), "may2");
for e in [&a1, &a2, &m1, &m2] {
Log::append(&store, e).unwrap();
}
let tail3 = Log::tail(&store, 3).unwrap();
assert_eq!(tail3, vec![a2.clone(), m1.clone(), m2.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(tail2, vec![m1, m2]);
}
#[test]
fn since_crosses_into_archive_and_early_stops() {
let (_d, store) = temp_store();
let a1 = entry(2026, 4, 10, 9, 0, LogKind::Ingest, Some("apr-a"), "apr1");
let a2 = entry(2026, 4, 20, 9, 0, LogKind::Create, Some("apr-b"), "apr2");
let m1 = entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may1");
for e in [&a1, &a2, &m1] {
Log::append(&store, e).unwrap();
}
let got = Log::since(&store, ts(2026, 4, 15, 0, 0)).unwrap();
assert_eq!(got, vec![a2, m1]);
}
#[test]
fn last_validate_at_crosses_into_archive() {
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 4, 10, 9, 0, LogKind::Validate, None, "apr validate"),
)
.unwrap();
Log::append(
&store,
&entry(2026, 5, 2, 8, 0, LogKind::Update, Some("may-a"), "may work"),
)
.unwrap();
let last = Log::last_validate_at(&store).unwrap();
assert_eq!(last, Some(ts(2026, 4, 10, 9, 0)));
}
#[test]
fn reverse_read_correct_on_large_single_month_log() {
let (_d, store) = temp_store();
let n = 400usize;
let mut expected: Vec<LogEntry> = Vec::new();
for i in 0..n {
let total_min = (i as u32) * 3;
let day = 1 + total_min / (24 * 60);
let hour = (total_min / 60) % 24;
let min = total_min % 60;
let note = format!(
"entry number {i}\nbody line A for {i}\nbody line B for {i} with padding {}",
"x".repeat(40)
);
let e = entry(
2026,
6,
day,
hour,
min,
LogKind::Update,
Some(&format!("records/item-{i:04}")),
¬e,
);
Log::append(&store, &e).unwrap();
expected.push(e);
}
let size = fs::metadata(store.root.join("log.md")).unwrap().len();
assert!(
size > (REVERSE_BLOCK as u64) * 2,
"test log not large enough ({size} bytes) to exercise multi-block reverse-read"
);
let tail5 = Log::tail(&store, 5).unwrap();
assert_eq!(tail5, expected[n - 5..].to_vec());
let tail50 = Log::tail(&store, 50).unwrap();
assert_eq!(tail50, expected[n - 50..].to_vec());
let all = Log::tail(&store, n + 10).unwrap();
assert_eq!(all.len(), n);
assert_eq!(all, expected);
}
fn write_log_physical(store: &Store, entries: &[LogEntry]) {
let mut body = String::new();
for e in entries {
body.push_str(&e.render());
}
let full = compose_active(LOG_FRONTMATTER, &body);
fs::write(store.root.join("log.md"), full).expect("write log.md");
}
#[test]
fn tail_returns_newest_by_timestamp_on_demonstrated_out_of_order_log() {
let (_d, store) = temp_store();
let e_1010 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("c"), "ten-ten");
let e_1005 = entry(
2026,
5,
27,
10,
5,
LogKind::Create,
Some("b"),
"ten-oh-five",
);
let e_1000 = entry(2026, 5, 27, 10, 0, LogKind::Ingest, Some("a"), "ten-oh-oh");
write_log_physical(&store, &[e_1010.clone(), e_1005.clone(), e_1000.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(
tail2,
vec![e_1005.clone(), e_1010.clone()],
"tail(2) must be the two NEWEST by timestamp (chronological), \
not the last two physical entries"
);
assert!(tail2.contains(&e_1010), "newest (10:10) must be included");
assert!(!tail2.contains(&e_1000), "oldest (10:00) must be excluded");
assert_eq!(Log::tail(&store, 1).unwrap(), vec![e_1010.clone()]);
assert_eq!(Log::tail(&store, 99).unwrap(), vec![e_1000, e_1005, e_1010]);
}
#[test]
fn tail_no_early_stop_when_newer_entry_sits_before_an_older_one() {
let (_d, store) = temp_store();
let e55 = entry(2026, 5, 27, 10, 55, LogKind::Update, Some("x55"), "55");
let e10 = entry(2026, 5, 27, 10, 10, LogKind::Update, Some("x10"), "10");
let e50 = entry(2026, 5, 27, 10, 50, LogKind::Update, Some("x50"), "50");
let e00 = entry(2026, 5, 27, 10, 0, LogKind::Update, Some("x00"), "00");
write_log_physical(
&store,
&[e55.clone(), e10.clone(), e50.clone(), e00.clone()],
);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(tail2, vec![e50.clone(), e55.clone()]);
let tail3 = Log::tail(&store, 3).unwrap();
assert_eq!(tail3, vec![e10.clone(), e50.clone(), e55.clone()]);
}
#[test]
fn tail_orders_equal_timestamps_by_physical_recency() {
let (_d, store) = temp_store();
let early = entry(2026, 5, 27, 9, 59, LogKind::Create, Some("early"), "before");
let tie_a = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("tie-a"),
"first 10:00",
);
let tie_b = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("tie-b"),
"second 10:00",
);
write_log_physical(&store, &[early.clone(), tie_a.clone(), tie_b.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(
tail2,
vec![tie_a.clone(), tie_b.clone()],
"both 10:00 entries kept, physically-later one (tie_b) last; 09:59 dropped"
);
assert_eq!(Log::tail(&store, 1).unwrap(), vec![tie_b]);
}
#[test]
fn tail_finds_newest_across_a_backdated_entry_spanning_the_month_boundary() {
let (_d, store) = temp_store();
let may1 = entry(2026, 5, 10, 9, 0, LogKind::Ingest, Some("may-1"), "may one");
let may2 = entry(2026, 5, 20, 9, 0, LogKind::Create, Some("may-2"), "may two");
let jun1 = entry(2026, 6, 2, 8, 0, LogKind::Update, Some("jun-1"), "jun one");
Log::append(&store, &may1).unwrap();
Log::append(&store, &may2).unwrap();
Log::append(&store, &jun1).unwrap(); assert!(store.root.join("log").join("2026-05.md").exists());
let may_corr = entry(
2026,
5,
25,
9,
0,
LogKind::Update,
Some("may-2"),
"may correction",
);
Log::append(&store, &may_corr).unwrap();
let active = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(
active.contains("jun-1") && active.contains("may correction"),
"backdated May entry should be in the active file alongside June; got:\n{active}"
);
assert_eq!(Log::tail(&store, 1).unwrap(), vec![jun1.clone()]);
let tail2 = Log::tail(&store, 2).unwrap();
assert_eq!(tail2, vec![may_corr.clone(), jun1.clone()]);
let tail3 = Log::tail(&store, 3).unwrap();
assert_eq!(tail3, vec![may2.clone(), may_corr.clone(), jun1.clone()]);
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![may1, may2, may_corr, jun1]);
}
#[test]
fn parse_entries_skips_unparseable_header_folding_into_body() {
let text = "\
## [2026-05-27 10:00] create | records/x
Body mentions a literal: ## [not a real header here]
More body.
## [2026-05-27 10:05] update | records/y
Second.
";
let entries = parse_entries(text);
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].kind, LogKind::Create);
assert!(entries[0].note.contains("## [not a real header here]"));
assert!(entries[0].note.contains("More body."));
assert_eq!(entries[1].kind, LogKind::Update);
assert_eq!(entries[1].note, "Second.");
}
#[test]
fn append_only_corrective_entry_goes_on_end_without_rewriting() {
let (_d, store) = temp_store();
let original = entry(
2026,
5,
27,
10,
0,
LogKind::Update,
Some("records/northstar"),
"Seat count 120 -> 175.",
);
Log::append(&store, &original).unwrap();
let after_first = fs::read_to_string(store.root.join("log.md")).unwrap();
let correction = entry(
2026,
5,
27,
11,
0,
LogKind::Update,
Some("records/northstar"),
"Correction: seat count is 165, not 175.",
);
Log::append(&store, &correction).unwrap();
let after_second = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(
after_second.starts_with(&after_first),
"appending must not rewrite earlier bytes"
);
assert!(after_second.contains("Correction: seat count is 165, not 175."));
let all = Log::tail(&store, 99).unwrap();
assert_eq!(all, vec![original, correction]);
}
#[test]
fn concurrent_appends_are_atomic_and_total() {
use std::sync::{Arc, Barrier};
use std::thread;
let (_d, store) = temp_store();
Log::append(
&store,
&entry(2026, 7, 1, 0, 0, LogKind::Create, Some("seed"), "seed"),
)
.unwrap();
let threads = 8usize;
let per = 25usize;
let barrier = Arc::new(Barrier::new(threads));
let store = Arc::new(store);
let mut handles = Vec::new();
for tnum in 0..threads {
let b = Arc::clone(&barrier);
let s = Arc::clone(&store);
handles.push(thread::spawn(move || {
b.wait();
for i in 0..per {
let e = entry(
2026,
7,
1,
(tnum % 24) as u32,
(i % 60) as u32,
LogKind::Update,
Some(&format!("t{tnum}-i{i}")),
&format!("thread {tnum} item {i}"),
);
Log::append(&s, &e).unwrap();
}
}));
}
for h in handles {
h.join().unwrap();
}
let content = fs::read_to_string(store.root.join("log.md")).unwrap();
assert!(content.starts_with("---\ntype: log\n---\n"));
for line in content.lines() {
if line.starts_with("## [") {
assert!(
Log::parse_header(line).is_some(),
"corrupt/torn header line on disk: {line:?}"
);
}
}
assert!(content.contains("## [2026-07-01 00:00] create | seed"));
let all = Log::tail(&store, 10_000).unwrap();
assert!(!all.is_empty());
for e in &all {
let rendered = e.render();
let reparsed = parse_single_entry(&rendered).unwrap();
assert_eq!(&reparsed, e);
}
}
#[test]
fn render_then_parse_is_identity() {
let cases = vec![
entry(
2026,
1,
2,
3,
4,
LogKind::Ingest,
Some("sources/a.eml"),
"n",
),
entry(
2026,
12,
31,
23,
59,
LogKind::Validate,
None,
"PASS - 0 errors",
),
entry(
2026,
6,
15,
12,
30,
LogKind::Custom("proposal".to_string()),
Some("records/p"),
"multi\nline\nnote",
),
entry(2026, 6, 15, 12, 30, LogKind::Contradiction, Some("obj"), ""),
];
for e in cases {
let rendered = e.render();
let parsed = parse_single_entry(&rendered).unwrap_or_else(|| {
panic!("failed to reparse rendered entry:\n{rendered}");
});
assert_eq!(parsed, e, "round-trip mismatch for {e:?}");
}
}
}