Skip to main content

mnem_bench/score/
mod.rs

1//! Scoring engines per benchmark.
2//!
3//! Ships [`longmemeval`], [`locomo`], [`convomem`], [`membench`], and
4//! [`hybrid_v4`].
5
6pub mod convomem;
7pub mod hybrid_v4;
8pub mod locomo;
9pub mod longmemeval;
10pub mod membench;
11
12use serde::{Deserialize, Serialize};
13use std::collections::BTreeMap;
14
15/// Common shape every scorer writes to disk as `<bench>.json`.
16#[derive(Clone, Debug, Serialize, Deserialize)]
17pub struct ScoreReport {
18    /// Free-form harness id ("mnem-lme-session", "mnem-locomo", ...).
19    pub harness: String,
20    /// Adapter that ran (e.g. "mnem").
21    pub adapter: String,
22    /// Path to the dataset file consumed.
23    pub dataset: String,
24    /// Total questions scored.
25    pub n_questions: usize,
26    /// Wall-time seconds for the run.
27    pub runtime_seconds: f64,
28    /// Per-phase wall-time split.
29    pub timing: TimingBreakdown,
30    /// Headline metrics (`recall@5`, `recall@10`, ...).
31    pub overall: BTreeMap<String, f64>,
32    /// Optional per-category breakdown. Empty when the bench has
33    /// no category split.
34    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
35    pub by_category: BTreeMap<String, BTreeMap<String, f64>>,
36}
37
38/// Per-phase wall-time split.
39#[derive(Clone, Debug, Default, Serialize, Deserialize)]
40pub struct TimingBreakdown {
41    /// Seconds spent in adapter `ingest` calls.
42    pub ingest_s: f64,
43    /// Seconds spent in adapter `retrieve` calls.
44    pub retrieve_s: f64,
45    /// Seconds spent computing recall + writing rows.
46    pub score_s: f64,
47}
48
49/// One per-question row written to `<bench>.jsonl`.
50#[derive(Clone, Debug, Serialize, Deserialize)]
51pub struct PerQuestionRow {
52    /// Question id (or category-specific synthetic id).
53    pub qid: String,
54    /// Optional question type / category.
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub qtype: Option<String>,
57    /// Hit at top-5 (boolean as 0/1).
58    pub hit_at_5: u8,
59    /// Hit at top-10.
60    pub hit_at_10: u8,
61    /// Top-5 retrieved external ids, in rank order.
62    pub top5: Vec<String>,
63    /// Gold external ids the bench expected to see.
64    pub gold: Vec<String>,
65}