mnem_bench/score/mod.rs
1//! Scoring engines per benchmark.
2//!
3//! Ships [`longmemeval`], [`locomo`], [`convomem`], [`membench`], and
4//! [`hybrid_v4`].
5
6pub mod convomem;
7pub mod hybrid_v4;
8pub mod locomo;
9pub mod longmemeval;
10pub mod membench;
11
12use serde::{Deserialize, Serialize};
13use std::collections::BTreeMap;
14
15/// Common shape every scorer writes to disk as `<bench>.json`.
16#[derive(Clone, Debug, Serialize, Deserialize)]
17pub struct ScoreReport {
18 /// Free-form harness id ("mnem-lme-session", "mnem-locomo", ...).
19 pub harness: String,
20 /// Adapter that ran (e.g. "mnem").
21 pub adapter: String,
22 /// Path to the dataset file consumed.
23 pub dataset: String,
24 /// Total questions scored.
25 pub n_questions: usize,
26 /// Wall-time seconds for the run.
27 pub runtime_seconds: f64,
28 /// Per-phase wall-time split.
29 pub timing: TimingBreakdown,
30 /// Headline metrics (`recall@5`, `recall@10`, ...).
31 pub overall: BTreeMap<String, f64>,
32 /// Optional per-category breakdown. Empty when the bench has
33 /// no category split.
34 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
35 pub by_category: BTreeMap<String, BTreeMap<String, f64>>,
36}
37
38/// Per-phase wall-time split.
39#[derive(Clone, Debug, Default, Serialize, Deserialize)]
40pub struct TimingBreakdown {
41 /// Seconds spent in adapter `ingest` calls.
42 pub ingest_s: f64,
43 /// Seconds spent in adapter `retrieve` calls.
44 pub retrieve_s: f64,
45 /// Seconds spent computing recall + writing rows.
46 pub score_s: f64,
47}
48
49/// One per-question row written to `<bench>.jsonl`.
50#[derive(Clone, Debug, Serialize, Deserialize)]
51pub struct PerQuestionRow {
52 /// Question id (or category-specific synthetic id).
53 pub qid: String,
54 /// Optional question type / category.
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub qtype: Option<String>,
57 /// Hit at top-5 (boolean as 0/1).
58 pub hit_at_5: u8,
59 /// Hit at top-10.
60 pub hit_at_10: u8,
61 /// Top-5 retrieved external ids, in rank order.
62 pub top5: Vec<String>,
63 /// Gold external ids the bench expected to see.
64 pub gold: Vec<String>,
65}