Skip to main content

code_analyze_mcp/
metrics.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Metrics collection and daily-rotating JSONL emission.
4//!
5//! Provides a channel-based pipeline: callers emit [`MetricEvent`] values via [`MetricsSender`],
6//! and [`MetricsWriter`] drains the channel and appends events to a daily-rotated JSONL file
7//! under the XDG data directory (`~/.local/share/code-analyze-mcp/metrics-YYYY-MM-DD.jsonl`).
8//! Files older than 30 days are deleted on startup.
9
10use serde::{Deserialize, Serialize};
11use std::path::{Path, PathBuf};
12use std::time::{SystemTime, UNIX_EPOCH};
13use tokio::io::AsyncWriteExt;
14use tokio::sync::mpsc;
15
16/// A single metric event emitted by a tool invocation.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct MetricEvent {
19    pub ts: u64,
20    pub tool: &'static str,
21    pub duration_ms: u64,
22    pub output_chars: usize,
23    pub param_path_depth: usize,
24    pub max_depth: Option<u32>,
25    pub result: &'static str,
26    pub error_type: Option<String>,
27    #[serde(default)]
28    pub session_id: Option<String>,
29    #[serde(default)]
30    pub seq: Option<u32>,
31    #[serde(default)]
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub cache_hit: Option<bool>,
34}
35
36/// Sender half of the metrics channel; cloned and passed to tools for event emission.
37#[derive(Clone)]
38pub struct MetricsSender(pub tokio::sync::mpsc::UnboundedSender<MetricEvent>);
39
40impl MetricsSender {
41    pub fn send(&self, event: MetricEvent) {
42        let _ = self.0.send(event);
43    }
44}
45
46/// Receiver half of the metrics channel; drains events and writes them to daily-rotated JSONL files.
47pub struct MetricsWriter {
48    rx: tokio::sync::mpsc::UnboundedReceiver<MetricEvent>,
49    base_dir: PathBuf,
50}
51
52impl MetricsWriter {
53    pub fn new(
54        rx: tokio::sync::mpsc::UnboundedReceiver<MetricEvent>,
55        base_dir: Option<PathBuf>,
56    ) -> Self {
57        let dir = base_dir.unwrap_or_else(xdg_metrics_dir);
58        Self { rx, base_dir: dir }
59    }
60
61    pub async fn run(mut self) {
62        cleanup_old_files(&self.base_dir).await;
63        let mut current_date = current_date_str();
64        let mut current_file: Option<PathBuf> = None;
65
66        loop {
67            let mut batch = Vec::new();
68            if let Some(event) = self.rx.recv().await {
69                batch.push(event);
70                for _ in 0..99 {
71                    match self.rx.try_recv() {
72                        Ok(e) => batch.push(e),
73                        Err(
74                            mpsc::error::TryRecvError::Empty
75                            | mpsc::error::TryRecvError::Disconnected,
76                        ) => break,
77                    }
78                }
79            } else {
80                break;
81            }
82
83            let new_date = current_date_str();
84            if new_date != current_date {
85                current_date = new_date;
86                current_file = None;
87            }
88
89            if current_file.is_none() {
90                current_file = Some(
91                    self.base_dir
92                        .join(format!("metrics-{}.jsonl", current_date)),
93                );
94            }
95
96            let path = current_file.as_ref().unwrap();
97
98            // Create directory once per batch
99            if let Some(parent) = path.parent()
100                && !parent.as_os_str().is_empty()
101            {
102                tokio::fs::create_dir_all(parent).await.ok();
103            }
104
105            // Open file once per batch
106            let file = tokio::fs::OpenOptions::new()
107                .create(true)
108                .append(true)
109                .open(path)
110                .await;
111
112            if let Ok(mut file) = file {
113                for event in batch {
114                    if let Ok(mut json) = serde_json::to_string(&event) {
115                        json.push('\n');
116                        let _ = file.write_all(json.as_bytes()).await;
117                    }
118                }
119                let _ = file.flush().await;
120            }
121        }
122    }
123}
124
125/// Returns the current UNIX timestamp in milliseconds.
126#[must_use]
127pub fn unix_ms() -> u64 {
128    SystemTime::now()
129        .duration_since(UNIX_EPOCH)
130        .unwrap_or_default()
131        .as_millis()
132        .try_into()
133        .unwrap_or(u64::MAX)
134}
135
136/// Counts the number of path segments in a file path.
137#[must_use]
138pub fn path_component_count(path: &str) -> usize {
139    Path::new(path).components().count()
140}
141
142fn xdg_metrics_dir() -> PathBuf {
143    if let Ok(xdg_data_home) = std::env::var("XDG_DATA_HOME")
144        && !xdg_data_home.is_empty()
145    {
146        return PathBuf::from(xdg_data_home).join("code-analyze-mcp");
147    }
148
149    if let Ok(home) = std::env::var("HOME") {
150        PathBuf::from(home)
151            .join(".local")
152            .join("share")
153            .join("code-analyze-mcp")
154    } else {
155        PathBuf::from(".")
156    }
157}
158
159async fn cleanup_old_files(base_dir: &Path) {
160    let now_days = u32::try_from(unix_ms() / 86_400_000).unwrap_or(u32::MAX);
161
162    let Ok(mut entries) = tokio::fs::read_dir(base_dir).await else {
163        return;
164    };
165
166    loop {
167        match entries.next_entry().await {
168            Ok(Some(entry)) => {
169                let path = entry.path();
170                let file_name = match path.file_name() {
171                    Some(n) => n.to_string_lossy().into_owned(),
172                    None => continue,
173                };
174
175                // Expected format: metrics-YYYY-MM-DD.jsonl
176                if !file_name.starts_with("metrics-")
177                    || std::path::Path::new(&*file_name)
178                        .extension()
179                        .is_none_or(|e| !e.eq_ignore_ascii_case("jsonl"))
180                {
181                    continue;
182                }
183                let date_part = &file_name[8..file_name.len() - 6];
184                if date_part.len() != 10
185                    || date_part.as_bytes().get(4) != Some(&b'-')
186                    || date_part.as_bytes().get(7) != Some(&b'-')
187                {
188                    continue;
189                }
190                let Ok(year) = date_part[0..4].parse::<u32>() else {
191                    continue;
192                };
193                let Ok(month) = date_part[5..7].parse::<u32>() else {
194                    continue;
195                };
196                let Ok(day) = date_part[8..10].parse::<u32>() else {
197                    continue;
198                };
199                if month == 0 || month > 12 || day == 0 || day > 31 {
200                    continue;
201                }
202
203                let file_days = date_to_days_since_epoch(year, month, day);
204                if now_days > file_days && (now_days - file_days) > 30 {
205                    let _ = tokio::fs::remove_file(&path).await;
206                }
207            }
208            Ok(None) => break,
209            Err(e) => {
210                tracing::warn!("error reading metrics directory entry: {e}");
211            }
212        }
213    }
214}
215
216fn date_to_days_since_epoch(y: u32, m: u32, d: u32) -> u32 {
217    // Shift year so March is month 0
218    let (y, m) = if m <= 2 { (y - 1, m + 9) } else { (y, m - 3) };
219    let era = y / 400;
220    let yoe = y - era * 400;
221    let doy = (153 * m + 2) / 5 + d - 1;
222    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
223    // Compute the proleptic Gregorian day number, then subtract the Unix epoch offset.
224    // The subtraction must wrap the full expression; applying .saturating_sub to `doe`
225    // alone would underflow for recent dates where doe < 719_468.
226    (era * 146_097 + doe).saturating_sub(719_468)
227}
228
229/// Returns the current UTC date as a string in YYYY-MM-DD format.
230#[must_use]
231pub fn current_date_str() -> String {
232    let days = u32::try_from(unix_ms() / 86_400_000).unwrap_or(u32::MAX);
233    let z = days + 719_468;
234    let era = z / 146_097;
235    let doe = z - era * 146_097;
236    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
237    let y = yoe + era * 400;
238    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
239    let mp = (5 * doy + 2) / 153;
240    let d = doy - (153 * mp + 2) / 5 + 1;
241    let m = if mp < 10 { mp + 3 } else { mp - 9 };
242    let y = if m <= 2 { y + 1 } else { y };
243    format!("{y:04}-{m:02}-{d:02}")
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    // --- date math tests ---
251
252    #[test]
253    fn test_date_to_days_since_epoch_known_dates() {
254        // Unix epoch: 1970-01-01 = day 0
255        assert_eq!(date_to_days_since_epoch(1970, 1, 1), 0);
256        // 2020-01-01: known value 18262 (50 years, accounting for leap years)
257        assert_eq!(date_to_days_since_epoch(2020, 1, 1), 18_262);
258        // Leap day 2000-02-29: 2000-02-29 is day 11_016
259        assert_eq!(date_to_days_since_epoch(2000, 2, 29), 11_016);
260    }
261
262    #[test]
263    fn test_current_date_str_format() {
264        let s = current_date_str();
265        assert_eq!(s.len(), 10, "date string must be 10 chars: {s}");
266        assert_eq!(s.as_bytes()[4], b'-', "char at index 4 must be '-': {s}");
267        assert_eq!(s.as_bytes()[7], b'-', "char at index 7 must be '-': {s}");
268        // Sanity: year must parse and be in reasonable range
269        let year: u32 = s[0..4].parse().expect("year must be numeric");
270        assert!(year >= 2020 && year <= 2100, "unexpected year {year}");
271    }
272
273    #[tokio::test]
274    async fn test_metrics_writer_batching() {
275        use tempfile::TempDir;
276
277        let dir = TempDir::new().unwrap();
278        let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<MetricEvent>();
279        let writer = MetricsWriter::new(rx, Some(dir.path().to_path_buf()));
280
281        let make_event = || MetricEvent {
282            ts: unix_ms(),
283            tool: "analyze_directory",
284            duration_ms: 1,
285            output_chars: 10,
286            param_path_depth: 1,
287            max_depth: None,
288            result: "ok",
289            error_type: None,
290            session_id: None,
291            seq: None,
292            cache_hit: None,
293        };
294
295        tx.send(make_event()).unwrap();
296        tx.send(make_event()).unwrap();
297        tx.send(make_event()).unwrap();
298        // Drop sender so run() exits after draining
299        drop(tx);
300
301        writer.run().await;
302
303        // Exactly 1 .jsonl file must exist with exactly 3 lines
304        let entries: Vec<_> = std::fs::read_dir(dir.path())
305            .unwrap()
306            .filter_map(|e| e.ok())
307            .filter(|e| {
308                e.path()
309                    .extension()
310                    .and_then(|x| x.to_str())
311                    .map(|x| x.eq_ignore_ascii_case("jsonl"))
312                    .unwrap_or(false)
313            })
314            .collect();
315        assert_eq!(entries.len(), 1, "expected exactly 1 .jsonl file");
316        let content = std::fs::read_to_string(entries[0].path()).unwrap();
317        let lines: Vec<&str> = content.lines().collect();
318        assert_eq!(lines.len(), 3, "expected exactly 3 lines; got: {content}");
319    }
320
321    #[tokio::test]
322    async fn test_cleanup_old_files_deletes_old_keeps_recent() {
323        use tempfile::TempDir;
324
325        // Create a file with a date far in the past (1970-01-01) which should be deleted,
326        // and a file with today's date which should be kept.
327        let dir = TempDir::new().unwrap();
328        let old_file = dir.path().join("metrics-1970-01-01.jsonl");
329        let today = current_date_str();
330        let recent_file = dir.path().join(format!("metrics-{}.jsonl", today));
331        std::fs::write(&old_file, "old\n").unwrap();
332        std::fs::write(&recent_file, "recent\n").unwrap();
333
334        cleanup_old_files(dir.path()).await;
335
336        assert!(!old_file.exists(), "old file should have been deleted");
337        assert!(recent_file.exists(), "today's file should have been kept");
338    }
339
340    #[test]
341    fn test_metric_event_serialization() {
342        let event = MetricEvent {
343            ts: 1_700_000_000_000,
344            tool: "analyze_directory",
345            duration_ms: 42,
346            output_chars: 100,
347            param_path_depth: 3,
348            max_depth: Some(2),
349            result: "ok",
350            error_type: None,
351            session_id: None,
352            seq: None,
353            cache_hit: None,
354        };
355        let json = serde_json::to_string(&event).unwrap();
356        assert!(json.contains("analyze_directory"));
357        assert!(json.contains(r#""result":"ok""#));
358        assert!(json.contains(r#""output_chars":100"#));
359    }
360
361    #[test]
362    fn test_metric_event_serialization_error() {
363        let event = MetricEvent {
364            ts: 1_700_000_000_000,
365            tool: "analyze_directory",
366            duration_ms: 5,
367            output_chars: 0,
368            param_path_depth: 3,
369            max_depth: Some(3),
370            result: "error",
371            error_type: Some("invalid_params".to_string()),
372            session_id: None,
373            seq: None,
374            cache_hit: None,
375        };
376        let json = serde_json::to_string(&event).unwrap();
377        assert!(json.contains(r#""result":"error""#));
378        assert!(json.contains(r#""error_type":"invalid_params""#));
379        assert!(json.contains(r#""output_chars":0"#));
380        assert!(json.contains(r#""max_depth":3"#));
381    }
382
383    #[test]
384    fn test_metric_event_new_fields_round_trip() {
385        let event = MetricEvent {
386            ts: 1_700_000_000_000,
387            tool: "analyze_file",
388            duration_ms: 100,
389            output_chars: 500,
390            param_path_depth: 2,
391            max_depth: Some(3),
392            result: "ok",
393            error_type: None,
394            session_id: Some("1742468880123-42".to_string()),
395            seq: Some(5),
396            cache_hit: None,
397        };
398        let serialized = serde_json::to_string(&event).unwrap();
399        let json_str = r#"{"ts":1700000000000,"tool":"analyze_file","duration_ms":100,"output_chars":500,"param_path_depth":2,"max_depth":3,"result":"ok","error_type":null,"session_id":"1742468880123-42","seq":5}"#;
400        assert_eq!(serialized, json_str);
401        let parsed: MetricEvent = serde_json::from_str(json_str).unwrap();
402        assert_eq!(parsed.session_id, Some("1742468880123-42".to_string()));
403        assert_eq!(parsed.seq, Some(5));
404    }
405
406    #[test]
407    fn test_metric_event_backward_compat_parse() {
408        let old_jsonl = r#"{"ts":1700000000000,"tool":"analyze_directory","duration_ms":42,"output_chars":100,"param_path_depth":3,"max_depth":2,"result":"ok","error_type":null}"#;
409        let parsed: MetricEvent = serde_json::from_str(old_jsonl).unwrap();
410        assert_eq!(parsed.tool, "analyze_directory");
411        assert_eq!(parsed.session_id, None);
412        assert_eq!(parsed.seq, None);
413    }
414
415    #[test]
416    fn test_session_id_format() {
417        let event = MetricEvent {
418            ts: 1_700_000_000_000,
419            tool: "analyze_symbol",
420            duration_ms: 20,
421            output_chars: 50,
422            param_path_depth: 1,
423            max_depth: None,
424            result: "ok",
425            error_type: None,
426            session_id: Some("1742468880123-0".to_string()),
427            seq: Some(0),
428            cache_hit: None,
429        };
430        let sid = event.session_id.unwrap();
431        assert!(sid.contains('-'), "session_id should contain a dash");
432        let parts: Vec<&str> = sid.split('-').collect();
433        assert_eq!(parts.len(), 2, "session_id should have exactly 2 parts");
434        assert!(parts[0].len() == 13, "millis part should be 13 digits");
435    }
436
437    // --- cache_hit field tests ---
438
439    #[test]
440    fn test_metric_event_cache_hit_backward_compat() {
441        // Arrange: event with cache_hit: None (old-style, field absent)
442        let event = MetricEvent {
443            ts: 1_700_000_000_000,
444            tool: "analyze_directory",
445            duration_ms: 1,
446            output_chars: 10,
447            param_path_depth: 1,
448            max_depth: None,
449            result: "ok",
450            error_type: None,
451            session_id: None,
452            seq: None,
453            cache_hit: None,
454        };
455
456        // Act: serialize
457        let json = serde_json::to_string(&event).unwrap();
458
459        // Assert: cache_hit key must NOT appear (skip_serializing_if = "Option::is_none")
460        assert!(
461            !json.contains("cache_hit"),
462            "cache_hit: None must not appear in JSON; got: {json}"
463        );
464
465        // Edge case: old JSON without cache_hit must deserialize without error
466        let old_json = r#"{"ts":1700000000000,"tool":"analyze_directory","duration_ms":1,"output_chars":10,"param_path_depth":1,"max_depth":null,"result":"ok","error_type":null}"#;
467        let parsed: MetricEvent = serde_json::from_str(old_json).unwrap();
468        assert_eq!(
469            parsed.cache_hit, None,
470            "missing cache_hit must default to None"
471        );
472    }
473}