Skip to main content

mur_common/skill/
stats.rs

1//! Per-skill runtime statistics. **Not** signed, **not** part of the
2//! publisher manifest. Lives at `<MUR_HOME>/skills/<name>/stats.json`
3//! and is rebuildable from the JSONL trace log via
4//! `mur skill reindex-stats`.
5//!
6//! ## Security
7//!
8//! `stats.json` is host-local mutable state and is **explicitly outside
9//! the DSSE signature scope** (see §2.2 Layer 1 of the skill ecosystem
10//! design). A skill's signature covers `skill.yaml` only. Stats can be
11//! deleted or rebuilt (`mur skill reindex-stats`) without affecting
12//! trust.
13
14use anyhow::{Context, Result};
15use chrono::{DateTime, Utc};
16use fd_lock::RwLock;
17use serde::{Deserialize, Serialize};
18use std::fs::OpenOptions;
19use std::path::{Path, PathBuf};
20use tempfile::NamedTempFile;
21
22pub const STATS_SCHEMA_VERSION: u32 = 1;
23
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Copy, Default)]
25#[serde(rename_all = "snake_case")]
26pub enum LifecycleState {
27    #[default]
28    Draft,
29    Emerging,
30    Stable,
31    Canonical,
32    Deprecated,
33    Archived,
34}
35
36/// Sidecar stats for an installed skill. **Not part of the signed manifest.**
37///
38/// Schema evolution policy: additive only. New fields MUST be marked
39/// `#[serde(default)]` so older `mur` builds reading newer files (and newer
40/// builds reading older files) parse cleanly without migration. Do not
41/// pre-reserve fields without a producer — empty defaults create semantic
42/// ambiguity ("never set" vs "set to empty"). Add fields when their
43/// callers exist.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct SkillStats {
46    pub schema_version: u32,
47    pub skill_name: String,
48    pub skill_version: String,
49    /// SHA-256 of the manifest content at the time these stats were
50    /// (re)initialised. A mismatch on load tells us the skill was
51    /// reinstalled — see `reset_on_manifest_change()`.
52    pub manifest_digest: String,
53
54    pub lifecycle_state: LifecycleState,
55    pub lifecycle_changed_at: DateTime<Utc>,
56    pub pinned: bool,
57    #[serde(default)]
58    pub pinned_reason: String,
59
60    pub usage_count: u64,
61    pub success_count: u64,
62    pub failure_count: u64,
63
64    pub last_used_at: Option<DateTime<Utc>>,
65    pub last_success_at: Option<DateTime<Utc>>,
66    pub first_successful_use_at: Option<DateTime<Utc>>,
67
68    /// Confidence at the moment of the most recent successful use (or
69    /// most recent promotion — see `lifecycle::on_promotion`). Decay is
70    /// computed *from this anchor*, never incrementally — keeps the
71    /// value numerically stable and idempotent on read.
72    pub anchor_confidence: f64,
73
74    /// Watermark for incremental reindex — the trace timestamp that
75    /// these stats have already absorbed. `mur skill reindex-stats`
76    /// resumes from here.
77    pub rebuilt_from_trace_through: Option<DateTime<Utc>>,
78
79    /// Count of inject-time `Resolution::Unresolved` outcomes for this skill.
80    /// A spike here means the skill declares intents that no longer match the
81    /// agent's MCP inventory — doctor's `intent-resolvable` check surfaces this.
82    #[serde(default)]
83    pub resolution_misses: u64,
84}
85
86impl SkillStats {
87    pub fn new(
88        skill_name: &str,
89        skill_version: &str,
90        manifest_digest: &str,
91        now: DateTime<Utc>,
92    ) -> Self {
93        Self {
94            schema_version: STATS_SCHEMA_VERSION,
95            skill_name: skill_name.to_string(),
96            skill_version: skill_version.to_string(),
97            manifest_digest: manifest_digest.to_string(),
98            lifecycle_state: LifecycleState::default(),
99            lifecycle_changed_at: now,
100            pinned: false,
101            pinned_reason: String::new(),
102            usage_count: 0,
103            success_count: 0,
104            failure_count: 0,
105            last_used_at: None,
106            last_success_at: None,
107            first_successful_use_at: None,
108            anchor_confidence: 1.0,
109            rebuilt_from_trace_through: None,
110            resolution_misses: 0,
111        }
112    }
113
114    pub fn path(mur_home: &Path, skill_name: &str) -> PathBuf {
115        mur_home.join("skills").join(skill_name).join("stats.json")
116    }
117
118    /// Per-agent stats path: <MUR_HOME>/agents/<agent>/skills/<name>/stats.json
119    pub fn path_agent(mur_home: &Path, agent: &str, skill_name: &str) -> PathBuf {
120        mur_home
121            .join("agents")
122            .join(agent)
123            .join("skills")
124            .join(skill_name)
125            .join("stats.json")
126    }
127
128    /// Read the sidecar, or return `None` if absent. Lock-free — fine
129    /// for read-mostly callers (doctor, info, stats). Concurrent writers
130    /// going through `merge_in_place` will not corrupt the file because
131    /// they hold the exclusive lock during the write window.
132    pub fn load(path: &Path) -> Result<Option<Self>> {
133        match std::fs::read_to_string(path) {
134            Ok(s) => {
135                let stats: Self = serde_json::from_str(&s).context("deserialise stats.json")?;
136                Ok(Some(stats))
137            }
138            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
139            Err(e) => Err(e).context("read stats.json"),
140        }
141    }
142
143    /// Read-merge-write under an exclusive `fd-lock`. `merge_fn` is
144    /// called with the loaded value (or the supplied default if none
145    /// exists) and is responsible for applying the delta. The lock
146    /// window is microseconds — counter increments only.
147    pub fn merge_in_place(
148        path: &Path,
149        default: impl FnOnce() -> Self,
150        merge_fn: impl FnOnce(&mut Self) -> Result<()>,
151    ) -> Result<()> {
152        // Lock on a sidecar lockfile, not stats.json itself — POSIX
153        // flock(2) on the data file would race with rename. Same
154        // pattern as git/index.lock.
155        let lock_path = path.with_extension("lock");
156        let parent = path.parent().context("stats path has no parent")?;
157        std::fs::create_dir_all(parent).ok();
158
159        let mut lock_file = RwLock::new(
160            OpenOptions::new()
161                .create(true)
162                .truncate(true)
163                .write(true)
164                .read(true)
165                .open(&lock_path)
166                .context("open stats lockfile")?,
167        );
168        let _guard = lock_file.write().context("acquire stats lock")?;
169
170        let mut stats = Self::load(path)?.unwrap_or_else(default);
171        merge_fn(&mut stats)?;
172
173        let tmp = NamedTempFile::new_in(parent).context("create temp file for stats")?;
174        serde_json::to_writer_pretty(&tmp, &stats).context("serialise stats")?;
175        tmp.persist(path).context("persist stats")?;
176        Ok(())
177    }
178
179    /// Returns true if the loaded stats refer to a different manifest
180    /// digest than the one currently installed. Callers (the aggregator
181    /// and reindex) should `reset()` in that case rather than carry
182    /// counters across an upgrade.
183    ///
184    /// A version bump resets `usage_count` / `success_count` /
185    /// `failure_count` but **preserves** `pinned`,
186    /// `first_successful_use_at`, and `lifecycle_state` (a Canonical
187    /// skill bumping to 1.2.0 should not regress to Draft).
188    pub fn is_stale(&self, current_digest: &str) -> bool {
189        self.manifest_digest != current_digest
190    }
191
192    /// Reset counters for a manifest change, preserving pinned state
193    /// and first-success timestamp.
194    pub fn reset_for_new_manifest(
195        &mut self,
196        new_version: &str,
197        new_digest: &str,
198        now: DateTime<Utc>,
199    ) {
200        self.skill_version = new_version.to_string();
201        self.manifest_digest = new_digest.to_string();
202        self.usage_count = 0;
203        self.success_count = 0;
204        self.failure_count = 0;
205        self.last_used_at = None;
206        self.last_success_at = None;
207        self.anchor_confidence = 1.0;
208        self.rebuilt_from_trace_through = None;
209        self.lifecycle_changed_at = now;
210        // Preserve: pinned, pinned_reason, first_successful_use_at, lifecycle_state
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use std::thread;
218
219    fn temp_stats_path() -> (tempfile::TempDir, PathBuf) {
220        let dir = tempfile::tempdir().unwrap();
221        let path = dir.path().join("test_skill").join("stats.json");
222        let parent = path.parent().unwrap();
223        std::fs::create_dir_all(parent).unwrap();
224        (dir, path)
225    }
226
227    fn dummy_stats(name: &str) -> SkillStats {
228        SkillStats::new(name, "1.0.0", "abc123", Utc::now())
229    }
230
231    #[test]
232    fn load_returns_none_for_missing_path() {
233        let (_dir, path) = temp_stats_path();
234        let result = SkillStats::load(&path).unwrap();
235        assert!(result.is_none());
236    }
237
238    #[test]
239    fn load_returns_stats_for_valid_file() {
240        let (_dir, path) = temp_stats_path();
241        let stats = dummy_stats("test-skill");
242        std::fs::write(&path, serde_json::to_string_pretty(&stats).unwrap()).unwrap();
243        let loaded = SkillStats::load(&path).unwrap().unwrap();
244        assert_eq!(loaded.skill_name, "test-skill");
245        assert_eq!(loaded.usage_count, 0);
246    }
247
248    #[test]
249    fn merge_in_place_counter_increment() {
250        let (_dir, path) = temp_stats_path();
251        let skill_name = "merge-test".to_string();
252        let default = || dummy_stats(&skill_name);
253
254        // First merge: increment usage
255        SkillStats::merge_in_place(&path, default, |s| {
256            s.usage_count += 1;
257            Ok(())
258        })
259        .unwrap();
260
261        let loaded = SkillStats::load(&path).unwrap().unwrap();
262        assert_eq!(loaded.usage_count, 1);
263
264        // Second merge: increment again
265        SkillStats::merge_in_place(
266            &path,
267            || panic!("default should not be called"),
268            |s| {
269                s.usage_count += 2;
270                Ok(())
271            },
272        )
273        .unwrap();
274
275        let loaded = SkillStats::load(&path).unwrap().unwrap();
276        assert_eq!(loaded.usage_count, 3);
277    }
278
279    #[test]
280    fn concurrent_merge_both_increments_commit() {
281        let (_dir, path) = temp_stats_path();
282        let skill_name = "concurrent-test".to_string();
283        let path = std::path::PathBuf::from(path); // decouple from tempdir lifetime
284        let path2 = path.clone();
285
286        // Init the file
287        SkillStats::merge_in_place(&path, || dummy_stats(&skill_name), |_| Ok(())).unwrap();
288
289        let t1 = thread::spawn(move || {
290            SkillStats::merge_in_place(
291                &path,
292                || panic!("default should not be called"),
293                |s| {
294                    s.usage_count += 1;
295                    Ok(())
296                },
297            )
298            .unwrap();
299        });
300        let t2 = thread::spawn(move || {
301            SkillStats::merge_in_place(
302                &path2,
303                || panic!("default should not be called"),
304                |s| {
305                    s.usage_count += 2;
306                    Ok(())
307                },
308            )
309            .unwrap();
310        });
311
312        t1.join().unwrap();
313        t2.join().unwrap();
314
315        let loaded = SkillStats::load(&_dir.path().join("test_skill").join("stats.json"))
316            .unwrap()
317            .unwrap();
318        // Both increments should have committed (commutative counters)
319        assert_eq!(loaded.usage_count, 3);
320    }
321
322    #[test]
323    fn is_stale_detects_digest_mismatch() {
324        let stats = dummy_stats("test");
325        assert!(!stats.is_stale("abc123"));
326        assert!(stats.is_stale("different"));
327    }
328
329    #[test]
330    fn schema_version_1_deserialises_fixture() {
331        let fixture = r#"{
332            "schema_version": 1,
333            "skill_name": "research-patterns",
334            "skill_version": "2.3.0",
335            "manifest_digest": "abcdef",
336            "lifecycle_state": "emerging",
337            "lifecycle_changed_at": "2026-05-25T00:00:00Z",
338            "pinned": false,
339            "pinned_reason": "",
340            "usage_count": 42,
341            "success_count": 38,
342            "failure_count": 4,
343            "last_used_at": "2026-05-25T12:00:00Z",
344            "last_success_at": "2026-05-25T11:00:00Z",
345            "first_successful_use_at": "2026-05-01T00:00:00Z",
346            "anchor_confidence": 0.95,
347            "rebuilt_from_trace_through": "2026-05-25T10:00:00Z"
348        }"#;
349        let stats: SkillStats = serde_json::from_str(fixture).unwrap();
350        assert_eq!(stats.schema_version, 1);
351        assert_eq!(stats.lifecycle_state, LifecycleState::Emerging);
352        assert_eq!(stats.usage_count, 42);
353        assert_eq!(stats.anchor_confidence, 0.95);
354        assert!(stats.last_used_at.is_some());
355    }
356
357    #[test]
358    fn reset_for_new_manifest_preserves_pinned_and_state() {
359        let mut stats = SkillStats {
360            pinned: true,
361            pinned_reason: "critical".into(),
362            lifecycle_state: LifecycleState::Canonical,
363            first_successful_use_at: Some(Utc::now()),
364            usage_count: 100,
365            success_count: 95,
366            failure_count: 5,
367            ..dummy_stats("test")
368        };
369        stats.reset_for_new_manifest("2.0.0", "newdigest", Utc::now());
370        assert_eq!(stats.skill_version, "2.0.0");
371        assert_eq!(stats.usage_count, 0);
372        assert!(stats.pinned);
373        assert_eq!(stats.lifecycle_state, LifecycleState::Canonical);
374        assert!(stats.first_successful_use_at.is_some());
375    }
376}