1use std::path::Path;
21
22use thiserror::Error;
23
24use mimir_core::bind::SymbolTable;
25use mimir_core::canonical::{
26 decode_all, CanonicalRecord, DecodeError, EpiRecord, InfRecord, ProRecord, SemRecord,
27};
28use mimir_core::clock::ClockTime;
29use mimir_core::confidence::Confidence;
30use mimir_core::log::{CanonicalLog, LogBackend, LogError};
31use mimir_core::pipeline::Pipeline;
32use mimir_core::symbol::SymbolId;
33use mimir_core::value::Value;
34
35#[derive(Debug, Error, PartialEq, Eq)]
43pub enum RenderError {
44 #[error("unknown symbol id {id:?} in {context}")]
49 UnknownSymbol {
50 id: SymbolId,
52 context: &'static str,
54 },
55
56 #[error("record is not a write-surface memory")]
60 NotAMemory,
61}
62
63pub struct LispRenderer<'a> {
74 table: &'a SymbolTable,
75}
76
77impl<'a> LispRenderer<'a> {
78 #[must_use]
82 pub fn new(table: &'a SymbolTable) -> Self {
83 Self { table }
84 }
85
86 pub fn render_memory(&self, record: &CanonicalRecord) -> Result<String, RenderError> {
95 match record {
96 CanonicalRecord::Sem(r) => self.render_sem(r),
97 CanonicalRecord::Epi(r) => self.render_epi(r),
98 CanonicalRecord::Pro(r) => self.render_pro(r),
99 CanonicalRecord::Inf(r) => self.render_inf(r),
100 _ => Err(RenderError::NotAMemory),
101 }
102 }
103
104 fn render_sem(&self, r: &SemRecord) -> Result<String, RenderError> {
105 Ok(format!(
106 "(sem @{subject} @{predicate} {object} :src @{source} :c {confidence} :v {valid_at})",
107 subject = self.name_of(r.s, "sem.s")?,
108 predicate = self.name_of(r.p, "sem.p")?,
109 object = self.render_value(&r.o, "sem.o")?,
110 source = self.name_of(r.source, "sem.source")?,
111 confidence = render_confidence(r.confidence),
112 valid_at = iso8601_from_millis(r.clocks.valid_at),
113 ))
114 }
115
116 fn render_epi(&self, r: &EpiRecord) -> Result<String, RenderError> {
117 let mut participants = String::from("(");
118 for (i, p) in r.participants.iter().enumerate() {
119 if i > 0 {
120 participants.push(' ');
121 }
122 participants.push('@');
123 participants.push_str(&self.name_of(*p, "epi.participant")?);
124 }
125 participants.push(')');
126 Ok(format!(
127 "(epi @{event_id} @{kind} {participants} @{location} :at {at_time} :obs {observed_at} :src @{source} :c {confidence})",
128 event_id = self.name_of(r.event_id, "epi.event_id")?,
129 kind = self.name_of(r.kind, "epi.kind")?,
130 location = self.name_of(r.location, "epi.location")?,
131 at_time = iso8601_from_millis(r.at_time),
132 observed_at = iso8601_from_millis(r.observed_at),
133 source = self.name_of(r.source, "epi.source")?,
134 confidence = render_confidence(r.confidence),
135 ))
136 }
137
138 fn render_pro(&self, r: &ProRecord) -> Result<String, RenderError> {
139 let mut out = format!(
140 "(pro @{rule_id} {trigger} {action}",
141 rule_id = self.name_of(r.rule_id, "pro.rule_id")?,
142 trigger = self.render_value(&r.trigger, "pro.trigger")?,
143 action = self.render_value(&r.action, "pro.action")?,
144 );
145 if let Some(pre) = &r.precondition {
146 out.push_str(" :pre ");
147 out.push_str(&self.render_value(pre, "pro.precondition")?);
148 }
149 out.push_str(" :scp @");
150 out.push_str(&self.name_of(r.scope, "pro.scope")?);
151 out.push_str(" :src @");
152 out.push_str(&self.name_of(r.source, "pro.source")?);
153 out.push_str(" :c ");
154 out.push_str(&render_confidence(r.confidence));
155 out.push(')');
156 Ok(out)
157 }
158
159 fn render_inf(&self, r: &InfRecord) -> Result<String, RenderError> {
160 let mut parents = String::from("(");
161 for (i, p) in r.derived_from.iter().enumerate() {
162 if i > 0 {
163 parents.push(' ');
164 }
165 parents.push('@');
166 parents.push_str(&self.name_of(*p, "inf.derived_from")?);
167 }
168 parents.push(')');
169 Ok(format!(
170 "(inf @{subject} @{predicate} {object} {parents} @{method} :c {confidence} :v {valid_at})",
171 subject = self.name_of(r.s, "inf.s")?,
172 predicate = self.name_of(r.p, "inf.p")?,
173 object = self.render_value(&r.o, "inf.o")?,
174 method = self.name_of(r.method, "inf.method")?,
175 confidence = render_confidence(r.confidence),
176 valid_at = iso8601_from_millis(r.clocks.valid_at),
177 ))
178 }
179
180 fn render_value(&self, value: &Value, context: &'static str) -> Result<String, RenderError> {
181 Ok(match value {
182 Value::Symbol(id) => format!("@{}", self.name_of(*id, context)?),
183 Value::Integer(n) => n.to_string(),
184 Value::Float(f) => render_float(*f),
185 Value::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
186 Value::String(s) => render_string_literal(s),
187 Value::Timestamp(t) => iso8601_from_millis(*t),
188 })
189 }
190
191 fn name_of(&self, id: SymbolId, context: &'static str) -> Result<String, RenderError> {
192 self.table
193 .entry(id)
194 .map(|e| e.canonical_name.clone())
195 .ok_or(RenderError::UnknownSymbol { id, context })
196 }
197}
198
199fn render_confidence(c: Confidence) -> String {
200 format!("{:.5}", c.as_f32())
203}
204
205fn render_float(f: f64) -> String {
206 if !f.is_finite() {
207 return "nil".to_string();
212 }
213 let s = format!("{f}");
217 if s.contains('.') || s.contains('e') || s.contains('E') {
218 s
219 } else {
220 format!("{s}.0")
221 }
222}
223
224fn render_string_literal(s: &str) -> String {
225 let mut out = String::with_capacity(s.len() + 2);
226 out.push('"');
227 for ch in s.chars() {
228 match ch {
229 '"' => out.push_str("\\\""),
230 '\\' => out.push_str("\\\\"),
231 '\n' => out.push_str("\\n"),
232 '\r' => out.push_str("\\r"),
233 '\t' => out.push_str("\\t"),
234 c => out.push(c),
235 }
236 }
237 out.push('"');
238 out
239}
240
241#[must_use]
252#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
253pub fn iso8601_from_millis(clock: ClockTime) -> String {
254 let ms = clock.as_millis() as i64;
258 let days = ms.div_euclid(86_400_000);
259 let time_ms = ms.rem_euclid(86_400_000);
260 let (year, month, day) = civil_from_days(days);
261 let hour = time_ms / 3_600_000;
262 let minute = (time_ms % 3_600_000) / 60_000;
263 let second = (time_ms % 60_000) / 1_000;
264 format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z")
265}
266
267#[allow(
271 clippy::cast_possible_truncation,
272 clippy::cast_possible_wrap,
273 clippy::cast_sign_loss,
274 clippy::similar_names
275)]
276fn civil_from_days(days: i64) -> (i32, u32, u32) {
277 let z = days + 719_468;
278 let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
279 let doe = (z - era * 146_097) as u64;
280 let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
281 let year_raw = yoe as i64 + era * 400;
282 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
283 let mp = (5 * doy + 2) / 153;
284 let d = doy - (153 * mp + 2) / 5 + 1;
285 let m = if mp < 10 { mp + 3 } else { mp - 9 };
286 let year = if m <= 2 { year_raw + 1 } else { year_raw };
287 (year as i32, m as u32, d as u32)
288}
289
290#[derive(Debug, PartialEq, Eq, Clone)]
305pub enum TailStatus {
306 Clean,
308 OrphanTail {
312 bytes: u64,
314 },
315 Corrupt {
320 bytes: u64,
322 first_decode_error: mimir_core::canonical::DecodeError,
324 },
325}
326
327impl TailStatus {
328 #[must_use]
330 pub const fn is_clean(&self) -> bool {
331 matches!(self, Self::Clean)
332 }
333
334 #[must_use]
337 pub const fn is_corrupt(&self) -> bool {
338 matches!(self, Self::Corrupt { .. })
339 }
340
341 #[must_use]
343 pub const fn trailing_bytes(&self) -> u64 {
344 match self {
345 Self::Clean => 0,
346 Self::OrphanTail { bytes } | Self::Corrupt { bytes, .. } => *bytes,
347 }
348 }
349}
350
351#[derive(Debug, PartialEq, Eq)]
353pub struct VerifyReport {
354 pub records_decoded: usize,
356 pub checkpoints: usize,
358 pub memory_records: usize,
360 pub symbol_events: usize,
362 pub tail: TailStatus,
366 pub dangling_symbols: usize,
369}
370
371impl VerifyReport {
372 #[must_use]
375 pub const fn trailing_bytes(&self) -> u64 {
376 self.tail.trailing_bytes()
377 }
378}
379
380#[derive(Debug, Error)]
382pub enum VerifyError {
383 #[error("verify I/O: {0}")]
385 Log(#[from] LogError),
386
387 #[error("committed canonical bytes failed to decode: {source}")]
392 CorruptCommittedLog {
393 #[from]
395 source: mimir_core::canonical::DecodeError,
396 },
397
398 #[error("symbol-replay conflict during load: {source}")]
402 SymbolReplay {
403 #[from]
405 source: mimir_core::bind::BindError,
406 },
407
408 #[error("committed log offset {offset} exceeds usize on this target")]
413 CommittedEndOverflow {
414 offset: u64,
416 },
417}
418
419pub fn verify(log_path: &Path) -> Result<VerifyReport, VerifyError> {
435 let mut log = CanonicalLog::open(log_path)?;
436 let bytes = log.read_all()?;
437 let total_len = bytes.len() as u64;
438
439 let mut pos: usize = 0;
442 let mut records_decoded = 0_usize;
443 let mut checkpoints = 0_usize;
444 let mut memory_records = 0_usize;
445 let mut symbol_events = 0_usize;
446
447 let mut table = SymbolTable::new();
450
451 let mut first_stop_error: Option<mimir_core::canonical::DecodeError> = None;
452 while pos < bytes.len() {
453 let remaining = &bytes[pos..];
454 match mimir_core::canonical::decode_record(remaining) {
455 Ok((record, consumed)) => {
456 pos += consumed;
457 records_decoded += 1;
458 apply_for_verify(
459 &record,
460 &mut table,
461 &mut checkpoints,
462 &mut memory_records,
463 &mut symbol_events,
464 );
465 }
466 Err(e) => {
467 first_stop_error = Some(e);
468 break;
469 }
470 }
471 }
472
473 let dangling_symbols = count_dangling_symbols(&bytes[..pos], &table);
477
478 let trailing = total_len - pos as u64;
479 let tail = match (first_stop_error, trailing) {
486 (None, 0) => TailStatus::Clean,
487 (None, bytes) => TailStatus::OrphanTail { bytes },
488 (Some(DecodeError::Truncated { .. } | DecodeError::LengthMismatch { .. }), bytes) => {
489 TailStatus::OrphanTail { bytes }
490 }
491 (Some(err), bytes) => TailStatus::Corrupt {
492 bytes,
493 first_decode_error: err,
494 },
495 };
496
497 Ok(VerifyReport {
498 records_decoded,
499 checkpoints,
500 memory_records,
501 symbol_events,
502 tail,
503 dangling_symbols,
504 })
505}
506
507fn apply_for_verify(
508 record: &CanonicalRecord,
509 table: &mut SymbolTable,
510 checkpoints: &mut usize,
511 memory_records: &mut usize,
512 symbol_events: &mut usize,
513) {
514 match record {
515 CanonicalRecord::SymbolAlloc(e) => {
516 *symbol_events += 1;
517 let _ = table.replay_allocate(e.symbol_id, e.name.clone(), e.symbol_kind);
521 }
522 CanonicalRecord::SymbolAlias(e) => {
523 *symbol_events += 1;
524 let _ = table.replay_alias(e.symbol_id, e.name.clone());
525 }
526 CanonicalRecord::SymbolRename(e) => {
527 *symbol_events += 1;
528 let _ = table.replay_rename(e.symbol_id, e.name.clone());
529 }
530 CanonicalRecord::SymbolRetire(e) => {
531 *symbol_events += 1;
532 let _ = table.replay_retire(e.symbol_id, e.name.clone());
533 }
534 CanonicalRecord::Checkpoint(_) => {
535 *checkpoints += 1;
536 }
537 CanonicalRecord::Sem(_)
538 | CanonicalRecord::Epi(_)
539 | CanonicalRecord::Pro(_)
540 | CanonicalRecord::Inf(_) => {
541 *memory_records += 1;
542 }
543 _ => {}
544 }
545}
546
547fn count_dangling_symbols(bytes: &[u8], table: &SymbolTable) -> usize {
548 let Ok(records) = decode_all(bytes) else {
549 return 0;
550 };
551 let mut dangling = 0_usize;
552 for record in records {
553 match record {
554 CanonicalRecord::Sem(r) => {
555 for id in [r.s, r.p, r.source, r.memory_id] {
556 if table.entry(id).is_none() {
557 dangling += 1;
558 }
559 }
560 if let Value::Symbol(id) = r.o {
561 if table.entry(id).is_none() {
562 dangling += 1;
563 }
564 }
565 }
566 CanonicalRecord::Epi(r) => {
567 for id in [r.event_id, r.kind, r.location, r.source, r.memory_id] {
568 if table.entry(id).is_none() {
569 dangling += 1;
570 }
571 }
572 for p in &r.participants {
573 if table.entry(*p).is_none() {
574 dangling += 1;
575 }
576 }
577 }
578 CanonicalRecord::Pro(r) => {
579 for id in [r.rule_id, r.scope, r.source, r.memory_id] {
580 if table.entry(id).is_none() {
581 dangling += 1;
582 }
583 }
584 }
585 CanonicalRecord::Inf(r) => {
586 for id in [r.s, r.p, r.method, r.memory_id] {
587 if table.entry(id).is_none() {
588 dangling += 1;
589 }
590 }
591 for p in &r.derived_from {
592 if table.entry(*p).is_none() {
593 dangling += 1;
594 }
595 }
596 }
597 _ => {}
598 }
599 }
600 dangling
601}
602
603pub fn load_table_from_log(log_path: &Path) -> Result<SymbolTable, VerifyError> {
616 let mut log = CanonicalLog::open(log_path)?;
617 let bytes = log.read_all()?;
618 let committed_end = log.last_checkpoint_end()?;
619 let committed_end =
621 usize::try_from(committed_end).map_err(|_| VerifyError::CommittedEndOverflow {
622 offset: committed_end,
623 })?;
624 let records = decode_all(&bytes[..committed_end])?;
625 let mut pipeline = Pipeline::new();
626 for record in records {
627 match record {
628 CanonicalRecord::SymbolAlloc(e) => {
629 pipeline.replay_allocate(e.symbol_id, e.name, e.symbol_kind)?;
630 }
631 CanonicalRecord::SymbolAlias(e) => {
632 pipeline.replay_alias(e.symbol_id, e.name)?;
633 }
634 CanonicalRecord::SymbolRename(e) => {
635 pipeline.replay_rename(e.symbol_id, e.name)?;
636 }
637 CanonicalRecord::SymbolRetire(e) => {
638 pipeline.replay_retire(e.symbol_id, e.name)?;
639 }
640 _ => {}
641 }
642 }
643 Ok(pipeline.table().clone())
644}
645
646#[cfg(test)]
651#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
652mod tests {
653 use super::*;
654
655 #[test]
656 fn iso8601_renders_unix_epoch_zero() {
657 assert_eq!(
658 iso8601_from_millis(ClockTime::try_from_millis(0).expect("non-sentinel")),
659 "1970-01-01T00:00:00Z"
660 );
661 }
662
663 #[test]
664 fn iso8601_renders_y2k() {
665 assert_eq!(
667 iso8601_from_millis(ClockTime::try_from_millis(946_684_800_000).expect("non-sentinel")),
668 "2000-01-01T00:00:00Z"
669 );
670 }
671
672 #[test]
673 fn iso8601_renders_known_timestamp() {
674 assert_eq!(
676 iso8601_from_millis(
677 ClockTime::try_from_millis(1_705_276_800_000).expect("non-sentinel")
678 ),
679 "2024-01-15T00:00:00Z"
680 );
681 }
682
683 #[test]
684 fn render_float_adds_fractional_for_integers() {
685 assert_eq!(render_float(3.0), "3.0");
686 assert_eq!(render_float(0.0), "0.0");
687 }
688
689 #[test]
690 fn render_float_preserves_fractional() {
691 assert_eq!(render_float(0.5), "0.5");
692 }
693
694 #[test]
695 fn render_string_literal_escapes_special_chars() {
696 assert_eq!(render_string_literal("hi"), r#""hi""#);
697 assert_eq!(render_string_literal("a\"b"), r#""a\"b""#);
698 assert_eq!(render_string_literal("x\nn"), r#""x\nn""#);
699 }
700
701 #[test]
702 fn render_confidence_gives_stable_decimal() {
703 let c = Confidence::try_from_f32(0.8).unwrap();
704 let s = render_confidence(c);
706 assert!(s.starts_with("0.7999") || s.starts_with("0.8000"));
707 assert_eq!(s.chars().filter(|c| *c == '.').count(), 1);
708 }
709}