use std::path::Path;
use thiserror::Error;
use mimir_core::bind::SymbolTable;
use mimir_core::canonical::{
decode_all, CanonicalRecord, DecodeError, EpiRecord, InfRecord, ProRecord, SemRecord,
};
use mimir_core::clock::ClockTime;
use mimir_core::confidence::Confidence;
use mimir_core::log::{CanonicalLog, LogBackend, LogError};
use mimir_core::pipeline::Pipeline;
use mimir_core::symbol::SymbolId;
use mimir_core::value::Value;
#[derive(Debug, Error, PartialEq, Eq)]
pub enum RenderError {
#[error("unknown symbol id {id:?} in {context}")]
UnknownSymbol {
id: SymbolId,
context: &'static str,
},
#[error("record is not a write-surface memory")]
NotAMemory,
}
pub struct LispRenderer<'a> {
table: &'a SymbolTable,
}
impl<'a> LispRenderer<'a> {
#[must_use]
pub fn new(table: &'a SymbolTable) -> Self {
Self { table }
}
pub fn render_memory(&self, record: &CanonicalRecord) -> Result<String, RenderError> {
match record {
CanonicalRecord::Sem(r) => self.render_sem(r),
CanonicalRecord::Epi(r) => self.render_epi(r),
CanonicalRecord::Pro(r) => self.render_pro(r),
CanonicalRecord::Inf(r) => self.render_inf(r),
_ => Err(RenderError::NotAMemory),
}
}
fn render_sem(&self, r: &SemRecord) -> Result<String, RenderError> {
Ok(format!(
"(sem @{subject} @{predicate} {object} :src @{source} :c {confidence} :v {valid_at})",
subject = self.name_of(r.s, "sem.s")?,
predicate = self.name_of(r.p, "sem.p")?,
object = self.render_value(&r.o, "sem.o")?,
source = self.name_of(r.source, "sem.source")?,
confidence = render_confidence(r.confidence),
valid_at = iso8601_from_millis(r.clocks.valid_at),
))
}
fn render_epi(&self, r: &EpiRecord) -> Result<String, RenderError> {
let mut participants = String::from("(");
for (i, p) in r.participants.iter().enumerate() {
if i > 0 {
participants.push(' ');
}
participants.push('@');
participants.push_str(&self.name_of(*p, "epi.participant")?);
}
participants.push(')');
Ok(format!(
"(epi @{event_id} @{kind} {participants} @{location} :at {at_time} :obs {observed_at} :src @{source} :c {confidence})",
event_id = self.name_of(r.event_id, "epi.event_id")?,
kind = self.name_of(r.kind, "epi.kind")?,
location = self.name_of(r.location, "epi.location")?,
at_time = iso8601_from_millis(r.at_time),
observed_at = iso8601_from_millis(r.observed_at),
source = self.name_of(r.source, "epi.source")?,
confidence = render_confidence(r.confidence),
))
}
fn render_pro(&self, r: &ProRecord) -> Result<String, RenderError> {
let mut out = format!(
"(pro @{rule_id} {trigger} {action}",
rule_id = self.name_of(r.rule_id, "pro.rule_id")?,
trigger = self.render_value(&r.trigger, "pro.trigger")?,
action = self.render_value(&r.action, "pro.action")?,
);
if let Some(pre) = &r.precondition {
out.push_str(" :pre ");
out.push_str(&self.render_value(pre, "pro.precondition")?);
}
out.push_str(" :scp @");
out.push_str(&self.name_of(r.scope, "pro.scope")?);
out.push_str(" :src @");
out.push_str(&self.name_of(r.source, "pro.source")?);
out.push_str(" :c ");
out.push_str(&render_confidence(r.confidence));
out.push(')');
Ok(out)
}
fn render_inf(&self, r: &InfRecord) -> Result<String, RenderError> {
let mut parents = String::from("(");
for (i, p) in r.derived_from.iter().enumerate() {
if i > 0 {
parents.push(' ');
}
parents.push('@');
parents.push_str(&self.name_of(*p, "inf.derived_from")?);
}
parents.push(')');
Ok(format!(
"(inf @{subject} @{predicate} {object} {parents} @{method} :c {confidence} :v {valid_at})",
subject = self.name_of(r.s, "inf.s")?,
predicate = self.name_of(r.p, "inf.p")?,
object = self.render_value(&r.o, "inf.o")?,
method = self.name_of(r.method, "inf.method")?,
confidence = render_confidence(r.confidence),
valid_at = iso8601_from_millis(r.clocks.valid_at),
))
}
fn render_value(&self, value: &Value, context: &'static str) -> Result<String, RenderError> {
Ok(match value {
Value::Symbol(id) => format!("@{}", self.name_of(*id, context)?),
Value::Integer(n) => n.to_string(),
Value::Float(f) => render_float(*f),
Value::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
Value::String(s) => render_string_literal(s),
Value::Timestamp(t) => iso8601_from_millis(*t),
})
}
fn name_of(&self, id: SymbolId, context: &'static str) -> Result<String, RenderError> {
self.table
.entry(id)
.map(|e| e.canonical_name.clone())
.ok_or(RenderError::UnknownSymbol { id, context })
}
}
fn render_confidence(c: Confidence) -> String {
format!("{:.5}", c.as_f32())
}
fn render_float(f: f64) -> String {
if !f.is_finite() {
return "nil".to_string();
}
let s = format!("{f}");
if s.contains('.') || s.contains('e') || s.contains('E') {
s
} else {
format!("{s}.0")
}
}
fn render_string_literal(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c => out.push(c),
}
}
out.push('"');
out
}
#[must_use]
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
pub fn iso8601_from_millis(clock: ClockTime) -> String {
let ms = clock.as_millis() as i64;
let days = ms.div_euclid(86_400_000);
let time_ms = ms.rem_euclid(86_400_000);
let (year, month, day) = civil_from_days(days);
let hour = time_ms / 3_600_000;
let minute = (time_ms % 3_600_000) / 60_000;
let second = (time_ms % 60_000) / 1_000;
format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z")
}
#[allow(
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::similar_names
)]
fn civil_from_days(days: i64) -> (i32, u32, u32) {
let z = days + 719_468;
let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
let doe = (z - era * 146_097) as u64;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let year_raw = yoe as i64 + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let year = if m <= 2 { year_raw + 1 } else { year_raw };
(year as i32, m as u32, d as u32)
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum TailStatus {
Clean,
OrphanTail {
bytes: u64,
},
Corrupt {
bytes: u64,
first_decode_error: mimir_core::canonical::DecodeError,
},
}
impl TailStatus {
#[must_use]
pub const fn is_clean(&self) -> bool {
matches!(self, Self::Clean)
}
#[must_use]
pub const fn is_corrupt(&self) -> bool {
matches!(self, Self::Corrupt { .. })
}
#[must_use]
pub const fn trailing_bytes(&self) -> u64 {
match self {
Self::Clean => 0,
Self::OrphanTail { bytes } | Self::Corrupt { bytes, .. } => *bytes,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct VerifyReport {
pub records_decoded: usize,
pub checkpoints: usize,
pub memory_records: usize,
pub symbol_events: usize,
pub tail: TailStatus,
pub dangling_symbols: usize,
}
impl VerifyReport {
#[must_use]
pub const fn trailing_bytes(&self) -> u64 {
self.tail.trailing_bytes()
}
}
#[derive(Debug, Error)]
pub enum VerifyError {
#[error("verify I/O: {0}")]
Log(#[from] LogError),
#[error("committed canonical bytes failed to decode: {source}")]
CorruptCommittedLog {
#[from]
source: mimir_core::canonical::DecodeError,
},
#[error("symbol-replay conflict during load: {source}")]
SymbolReplay {
#[from]
source: mimir_core::bind::BindError,
},
#[error("committed log offset {offset} exceeds usize on this target")]
CommittedEndOverflow {
offset: u64,
},
}
pub fn verify(log_path: &Path) -> Result<VerifyReport, VerifyError> {
let mut log = CanonicalLog::open(log_path)?;
let bytes = log.read_all()?;
let total_len = bytes.len() as u64;
let mut pos: usize = 0;
let mut records_decoded = 0_usize;
let mut checkpoints = 0_usize;
let mut memory_records = 0_usize;
let mut symbol_events = 0_usize;
let mut table = SymbolTable::new();
let mut first_stop_error: Option<mimir_core::canonical::DecodeError> = None;
while pos < bytes.len() {
let remaining = &bytes[pos..];
match mimir_core::canonical::decode_record(remaining) {
Ok((record, consumed)) => {
pos += consumed;
records_decoded += 1;
apply_for_verify(
&record,
&mut table,
&mut checkpoints,
&mut memory_records,
&mut symbol_events,
);
}
Err(e) => {
first_stop_error = Some(e);
break;
}
}
}
let dangling_symbols = count_dangling_symbols(&bytes[..pos], &table);
let trailing = total_len - pos as u64;
let tail = match (first_stop_error, trailing) {
(None, 0) => TailStatus::Clean,
(None, bytes) => TailStatus::OrphanTail { bytes },
(Some(DecodeError::Truncated { .. } | DecodeError::LengthMismatch { .. }), bytes) => {
TailStatus::OrphanTail { bytes }
}
(Some(err), bytes) => TailStatus::Corrupt {
bytes,
first_decode_error: err,
},
};
Ok(VerifyReport {
records_decoded,
checkpoints,
memory_records,
symbol_events,
tail,
dangling_symbols,
})
}
fn apply_for_verify(
record: &CanonicalRecord,
table: &mut SymbolTable,
checkpoints: &mut usize,
memory_records: &mut usize,
symbol_events: &mut usize,
) {
match record {
CanonicalRecord::SymbolAlloc(e) => {
*symbol_events += 1;
let _ = table.replay_allocate(e.symbol_id, e.name.clone(), e.symbol_kind);
}
CanonicalRecord::SymbolAlias(e) => {
*symbol_events += 1;
let _ = table.replay_alias(e.symbol_id, e.name.clone());
}
CanonicalRecord::SymbolRename(e) => {
*symbol_events += 1;
let _ = table.replay_rename(e.symbol_id, e.name.clone());
}
CanonicalRecord::SymbolRetire(e) => {
*symbol_events += 1;
let _ = table.replay_retire(e.symbol_id, e.name.clone());
}
CanonicalRecord::Checkpoint(_) => {
*checkpoints += 1;
}
CanonicalRecord::Sem(_)
| CanonicalRecord::Epi(_)
| CanonicalRecord::Pro(_)
| CanonicalRecord::Inf(_) => {
*memory_records += 1;
}
_ => {}
}
}
fn count_dangling_symbols(bytes: &[u8], table: &SymbolTable) -> usize {
let Ok(records) = decode_all(bytes) else {
return 0;
};
let mut dangling = 0_usize;
for record in records {
match record {
CanonicalRecord::Sem(r) => {
for id in [r.s, r.p, r.source, r.memory_id] {
if table.entry(id).is_none() {
dangling += 1;
}
}
if let Value::Symbol(id) = r.o {
if table.entry(id).is_none() {
dangling += 1;
}
}
}
CanonicalRecord::Epi(r) => {
for id in [r.event_id, r.kind, r.location, r.source, r.memory_id] {
if table.entry(id).is_none() {
dangling += 1;
}
}
for p in &r.participants {
if table.entry(*p).is_none() {
dangling += 1;
}
}
}
CanonicalRecord::Pro(r) => {
for id in [r.rule_id, r.scope, r.source, r.memory_id] {
if table.entry(id).is_none() {
dangling += 1;
}
}
}
CanonicalRecord::Inf(r) => {
for id in [r.s, r.p, r.method, r.memory_id] {
if table.entry(id).is_none() {
dangling += 1;
}
}
for p in &r.derived_from {
if table.entry(*p).is_none() {
dangling += 1;
}
}
}
_ => {}
}
}
dangling
}
pub fn load_table_from_log(log_path: &Path) -> Result<SymbolTable, VerifyError> {
let mut log = CanonicalLog::open(log_path)?;
let bytes = log.read_all()?;
let committed_end = log.last_checkpoint_end()?;
let committed_end =
usize::try_from(committed_end).map_err(|_| VerifyError::CommittedEndOverflow {
offset: committed_end,
})?;
let records = decode_all(&bytes[..committed_end])?;
let mut pipeline = Pipeline::new();
for record in records {
match record {
CanonicalRecord::SymbolAlloc(e) => {
pipeline.replay_allocate(e.symbol_id, e.name, e.symbol_kind)?;
}
CanonicalRecord::SymbolAlias(e) => {
pipeline.replay_alias(e.symbol_id, e.name)?;
}
CanonicalRecord::SymbolRename(e) => {
pipeline.replay_rename(e.symbol_id, e.name)?;
}
CanonicalRecord::SymbolRetire(e) => {
pipeline.replay_retire(e.symbol_id, e.name)?;
}
_ => {}
}
}
Ok(pipeline.table().clone())
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests {
use super::*;
#[test]
fn iso8601_renders_unix_epoch_zero() {
assert_eq!(
iso8601_from_millis(ClockTime::try_from_millis(0).expect("non-sentinel")),
"1970-01-01T00:00:00Z"
);
}
#[test]
fn iso8601_renders_y2k() {
assert_eq!(
iso8601_from_millis(ClockTime::try_from_millis(946_684_800_000).expect("non-sentinel")),
"2000-01-01T00:00:00Z"
);
}
#[test]
fn iso8601_renders_known_timestamp() {
assert_eq!(
iso8601_from_millis(
ClockTime::try_from_millis(1_705_276_800_000).expect("non-sentinel")
),
"2024-01-15T00:00:00Z"
);
}
#[test]
fn render_float_adds_fractional_for_integers() {
assert_eq!(render_float(3.0), "3.0");
assert_eq!(render_float(0.0), "0.0");
}
#[test]
fn render_float_preserves_fractional() {
assert_eq!(render_float(0.5), "0.5");
}
#[test]
fn render_string_literal_escapes_special_chars() {
assert_eq!(render_string_literal("hi"), r#""hi""#);
assert_eq!(render_string_literal("a\"b"), r#""a\"b""#);
assert_eq!(render_string_literal("x\nn"), r#""x\nn""#);
}
#[test]
fn render_confidence_gives_stable_decimal() {
let c = Confidence::try_from_f32(0.8).unwrap();
let s = render_confidence(c);
assert!(s.starts_with("0.7999") || s.starts_with("0.8000"));
assert_eq!(s.chars().filter(|c| *c == '.').count(), 1);
}
}