Skip to main content

aptu_coder/
metrics.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Metrics collection and daily-rotating JSONL emission.
4//!
5//! Provides a channel-based pipeline: callers emit [`MetricEvent`] values via [`MetricsSender`],
6//! and [`MetricsWriter`] drains the channel and appends events to a daily-rotated JSONL file
7//! under the XDG data directory (`~/.local/share/aptu-coder/metrics-YYYY-MM-DD.jsonl`).
8//! Files older than 30 days are deleted on startup.
9
10use aptu_coder_core::lang::language_for_extension;
11use serde::{Deserialize, Serialize};
12use std::path::{Path, PathBuf};
13use std::time::{SystemTime, UNIX_EPOCH};
14use tokio::io::AsyncWriteExt;
15use tokio::sync::mpsc;
16
17/// A single metric event emitted by a tool invocation.
18#[derive(Debug, Clone, Default, Serialize, Deserialize)]
19#[serde(default)]
20pub struct MetricEvent {
21    pub ts: u64,
22    pub tool: &'static str,
23    pub duration_ms: u64,
24    pub output_chars: usize,
25    pub param_path_depth: usize,
26    pub max_depth: Option<u32>,
27    pub result: &'static str,
28    pub error_type: Option<String>,
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub error_subtype: Option<String>,
31    #[serde(default)]
32    pub session_id: Option<String>,
33    #[serde(default)]
34    pub seq: Option<u32>,
35    #[serde(default)]
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub cache_hit: Option<bool>,
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub cache_tier: Option<&'static str>,
40    /// Set to Some(true) when an L2 disk cache write fails (dir, tempfile, write, or rename).
41    /// Drives the cache_write_failures_total OTEL counter.
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    pub cache_write_failure: Option<bool>,
44    #[serde(default, skip_serializing_if = "Option::is_none")]
45    pub exit_code: Option<i32>,
46    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
47    pub timed_out: bool,
48    #[serde(default, skip_serializing_if = "Option::is_none")]
49    pub output_truncated: Option<bool>,
50    /// True when `output_chars > 30_000`; fires for the top ~0.33% of exec_command calls
51    /// (p99.7 of 27,981 observed calls). Early-warning signal for responses approaching
52    /// the per-stream byte-cap threshold.
53    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
54    pub chars_threshold_breach: bool,
55    /// File extension of the analyzed path, lowercased. `Some("rs")` for known extensions,
56    /// `Some("other")` for unrecognized extensions, `None` when the path has no extension.
57    /// Only populated for `analyze_file` and `analyze_module`.
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub file_ext: Option<&'static str>,
60    /// Name of the filter rule that matched and transformed exec_command output.
61    /// `None` when no filter fired or for non-`exec_command` tools.
62    #[serde(default, skip_serializing_if = "Option::is_none")]
63    pub filter_applied: Option<String>,
64}
65
66/// Sender half of the metrics channel; cloned and passed to tools for event emission.
67#[derive(Clone)]
68pub struct MetricsSender(pub tokio::sync::mpsc::UnboundedSender<MetricEvent>);
69
70impl MetricsSender {
71    pub fn send(&self, event: MetricEvent) {
72        let _ = self.0.send(event);
73    }
74}
75
76/// Receiver half of the metrics channel; drains events and writes them to daily-rotated JSONL files.
77pub struct MetricsWriter {
78    rx: tokio::sync::mpsc::UnboundedReceiver<MetricEvent>,
79    base_dir: PathBuf,
80    dir_created: bool,
81}
82
83/// Accumulated metrics for a single tool.
84#[derive(Default, Debug)]
85struct ToolMetrics {
86    count: u64,
87    duration_ms: u64,
88    output_chars: u64,
89}
90
91impl MetricsWriter {
92    pub fn new(
93        rx: tokio::sync::mpsc::UnboundedReceiver<MetricEvent>,
94        base_dir: Option<PathBuf>,
95    ) -> Self {
96        let dir = base_dir.unwrap_or_else(xdg_metrics_dir);
97        Self {
98            rx,
99            base_dir: dir,
100            dir_created: false,
101        }
102    }
103
104    /// Accumulate a metric event into tool_counts and export_session_id.
105    fn accumulate_event(
106        tool_counts: &mut std::collections::HashMap<&'static str, ToolMetrics>,
107        export_session_id: &mut Option<String>,
108        event: &MetricEvent,
109    ) {
110        let entry = tool_counts.entry(event.tool).or_default();
111        entry.count += 1;
112        entry.duration_ms += event.duration_ms;
113        // output_chars is capped at 50 KB per stream (stdout + stderr each), so usize -> u64 is lossless.
114        entry.output_chars += event.output_chars as u64;
115        if export_session_id.is_none() {
116            *export_session_id = event.session_id.clone();
117        }
118    }
119
120    /// Write accumulated batch to file. Fire-and-forget semantics: errors are logged but not propagated.
121    async fn flush_batch(file: &mut tokio::fs::File, batch: Vec<MetricEvent>) {
122        for event in batch {
123            // Record to OTel metrics if available
124            record_otel_metrics(&event);
125
126            // Always write to JSONL as fallback
127            if let Ok(mut json) = serde_json::to_string(&event) {
128                json.push('\n');
129                let _ = file.write_all(json.as_bytes()).await;
130            }
131        }
132        let _ = file.flush().await;
133    }
134
135    /// Check for date transition and rotate metrics file if needed.
136    /// Returns the current file path and updates state if rotation occurred.
137    fn rotate_metrics_file(
138        base_dir: &std::path::Path,
139        current_date: &mut String,
140        current_file: &mut Option<PathBuf>,
141        dir_created: &mut bool,
142    ) -> PathBuf {
143        let new_date = current_date_str();
144        if new_date != *current_date {
145            *current_date = new_date;
146            *current_file = None;
147            *dir_created = false;
148        }
149
150        if current_file.is_none() {
151            *current_file = Some(base_dir.join(format!("metrics-{}.jsonl", current_date)));
152        }
153
154        current_file
155            .as_ref()
156            .expect("current_file is guaranteed Some after check above")
157            .clone()
158    }
159
160    /// Receive and accumulate a batch of events from the channel.
161    async fn receive_batch(
162        rx: &mut tokio::sync::mpsc::UnboundedReceiver<MetricEvent>,
163        tool_counts: &mut std::collections::HashMap<&'static str, ToolMetrics>,
164        export_session_id: &mut Option<String>,
165    ) -> Option<Vec<MetricEvent>> {
166        let mut batch = Vec::new();
167        if let Some(event) = rx.recv().await {
168            Self::accumulate_event(tool_counts, export_session_id, &event);
169            batch.push(event);
170            for _ in 0..99 {
171                match rx.try_recv() {
172                    Ok(e) => {
173                        Self::accumulate_event(tool_counts, export_session_id, &e);
174                        batch.push(e);
175                    }
176                    Err(
177                        mpsc::error::TryRecvError::Empty | mpsc::error::TryRecvError::Disconnected,
178                    ) => break,
179                }
180            }
181            Some(batch)
182        } else {
183            None
184        }
185    }
186
187    /// Ensure metrics directory exists for the given path.
188    async fn ensure_metrics_dir(path: &std::path::Path, dir_created: &mut bool) {
189        if !*dir_created
190            && let Some(parent) = path.parent()
191            && !parent.as_os_str().is_empty()
192        {
193            match tokio::fs::create_dir_all(parent).await {
194                Ok(()) => {
195                    *dir_created = true;
196                }
197                Err(e) => {
198                    tracing::warn!(
199                        error = %e,
200                        path = %parent.display(),
201                        "metrics: failed to create directory; will retry next batch"
202                    );
203                }
204            }
205        }
206    }
207
208    pub async fn run(mut self) {
209        cleanup_old_files(&self.base_dir).await;
210        let mut current_date = current_date_str();
211        let mut current_file: Option<PathBuf> = None;
212
213        // Accumulate per-tool metrics for export on shutdown (issue #773)
214        let mut tool_counts: std::collections::HashMap<&'static str, ToolMetrics> =
215            std::collections::HashMap::new();
216        let mut export_session_id: Option<String> = None;
217
218        loop {
219            let Some(batch) =
220                Self::receive_batch(&mut self.rx, &mut tool_counts, &mut export_session_id).await
221            else {
222                break;
223            };
224
225            let path = Self::rotate_metrics_file(
226                &self.base_dir,
227                &mut current_date,
228                &mut current_file,
229                &mut self.dir_created,
230            );
231
232            Self::ensure_metrics_dir(&path, &mut self.dir_created).await;
233
234            // Open file once per batch
235            let file = tokio::fs::OpenOptions::new()
236                .create(true)
237                .append(true)
238                .open(&path)
239                .await;
240
241            if let Ok(mut file) = file {
242                Self::flush_batch(&mut file, batch).await;
243            }
244        }
245
246        // Export metrics summary on shutdown (issue #773)
247        if let Ok(export_path) = std::env::var("APTU_CODER_METRICS_EXPORT_FILE") {
248            if !std::path::Path::new(&export_path).is_absolute() {
249                tracing::warn!(
250                    path = %export_path,
251                    "metrics: APTU_CODER_METRICS_EXPORT_FILE must be an absolute path; skipping export"
252                );
253            } else {
254                let mut tool_calls = Vec::new();
255                let mut total_duration_ms = 0u64;
256                let mut total_output_chars_sum = 0u64;
257                // Sort by tool name for deterministic JSON output
258                let mut sorted_tools: Vec<_> = tool_counts.iter().collect();
259                sorted_tools.sort_by_key(|&(name, _)| name);
260                for (tool_name, metrics) in sorted_tools {
261                    tool_calls.push(serde_json::json!({
262                        "tool": tool_name,
263                        "call_count": metrics.count,
264                        "total_duration_ms": metrics.duration_ms,
265                        "total_output_chars": metrics.output_chars
266                    }));
267                    total_duration_ms += metrics.duration_ms;
268                    total_output_chars_sum += metrics.output_chars;
269                }
270                let summary = serde_json::json!({
271                    "session_id": export_session_id.unwrap_or_default(),
272                    "tool_calls": tool_calls,
273                    "total_duration_ms": total_duration_ms,
274                    "total_output_chars": total_output_chars_sum
275                });
276                if let Ok(json_str) = serde_json::to_string(&summary)
277                    && let Err(e) = tokio::fs::write(&export_path, json_str).await
278                {
279                    tracing::warn!(
280                        error = %e,
281                        path = %export_path,
282                        "metrics: failed to write export file"
283                    );
284                }
285            }
286        }
287    }
288}
289
290/// Returns the current UNIX timestamp in milliseconds.
291#[must_use]
292pub fn unix_ms() -> u64 {
293    SystemTime::now()
294        .duration_since(UNIX_EPOCH)
295        .unwrap_or_default()
296        .as_millis()
297        .try_into()
298        .unwrap_or(u64::MAX)
299}
300
301/// Counts the number of path segments in a file path.
302#[must_use]
303pub fn path_component_count(path: &str) -> usize {
304    Path::new(path).components().count()
305}
306
307/// Returns the lowercased file extension of `path` as a `&'static str`.
308///
309/// - Returns `Some(ext)` for extensions recognized by [`language_for_extension`] (e.g. `"rs"`).
310/// - Returns `Some("other")` for paths that have an extension but it is not in the known set.
311/// - Returns `None` for paths with no extension or an empty extension.
312#[must_use]
313pub fn path_file_ext(path: &str) -> Option<&'static str> {
314    let ext_os = Path::new(path).extension()?;
315    let ext_str = ext_os.to_str()?;
316    if ext_str.is_empty() {
317        return None;
318    }
319    // language_for_extension does case-insensitive lookup; if found, return the
320    // canonical (lowercased) extension key from EXTENSION_MAP via supported_extensions().
321    if language_for_extension(ext_str).is_some() {
322        aptu_coder_core::lang::supported_extensions()
323            .into_iter()
324            .find(|e| e.eq_ignore_ascii_case(ext_str))
325    } else {
326        Some("other")
327    }
328}
329
330fn xdg_metrics_dir() -> PathBuf {
331    if let Ok(xdg_data_home) = std::env::var("XDG_DATA_HOME")
332        && !xdg_data_home.is_empty()
333    {
334        return PathBuf::from(xdg_data_home).join("aptu-coder");
335    }
336
337    if let Ok(home) = std::env::var("HOME") {
338        PathBuf::from(home)
339            .join(".local")
340            .join("share")
341            .join("aptu-coder")
342    } else {
343        PathBuf::from(".")
344    }
345}
346
347async fn cleanup_old_files(base_dir: &Path) {
348    let now_days = u32::try_from(unix_ms() / 86_400_000).unwrap_or(u32::MAX);
349
350    let Ok(mut entries) = tokio::fs::read_dir(base_dir).await else {
351        return;
352    };
353
354    loop {
355        match entries.next_entry().await {
356            Ok(Some(entry)) => {
357                let path = entry.path();
358                let file_name = match path.file_name() {
359                    Some(n) => n.to_string_lossy().into_owned(),
360                    None => continue,
361                };
362
363                // Expected format: metrics-YYYY-MM-DD.jsonl
364                if !file_name.starts_with("metrics-")
365                    || std::path::Path::new(&*file_name)
366                        .extension()
367                        .is_none_or(|e| !e.eq_ignore_ascii_case("jsonl"))
368                {
369                    continue;
370                }
371                let date_part = &file_name[8..file_name.len() - 6];
372                if date_part.len() != 10
373                    || date_part.as_bytes().get(4) != Some(&b'-')
374                    || date_part.as_bytes().get(7) != Some(&b'-')
375                {
376                    continue;
377                }
378                let Ok(year) = date_part[0..4].parse::<u32>() else {
379                    continue;
380                };
381                let Ok(month) = date_part[5..7].parse::<u32>() else {
382                    continue;
383                };
384                let Ok(day) = date_part[8..10].parse::<u32>() else {
385                    continue;
386                };
387                if month == 0 || month > 12 || day == 0 || day > 31 {
388                    continue;
389                }
390
391                let file_days = date_to_days_since_epoch(year, month, day);
392                if now_days > file_days && (now_days - file_days) > 30 {
393                    let _ = tokio::fs::remove_file(&path).await;
394                }
395            }
396            Ok(None) => break,
397            Err(e) => {
398                tracing::warn!("error reading metrics directory entry: {e}");
399            }
400        }
401    }
402}
403
404fn date_to_days_since_epoch(y: u32, m: u32, d: u32) -> u32 {
405    // Shift year so March is month 0
406    let (y, m) = if m <= 2 { (y - 1, m + 9) } else { (y, m - 3) };
407    let era = y / 400;
408    let yoe = y - era * 400;
409    let doy = (153 * m + 2) / 5 + d - 1;
410    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
411    // Compute the proleptic Gregorian day number, then subtract the Unix epoch offset.
412    // The subtraction must wrap the full expression; applying .saturating_sub to `doe`
413    // alone would underflow for recent dates where doe < 719_468.
414    (era * 146_097 + doe).saturating_sub(719_468)
415}
416
417/// Returns the current UTC date as a string in YYYY-MM-DD format.
418#[must_use]
419pub fn current_date_str() -> String {
420    let days = u32::try_from(unix_ms() / 86_400_000).unwrap_or(u32::MAX);
421    let z = days + 719_468;
422    let era = z / 146_097;
423    let doe = z - era * 146_097;
424    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
425    let y = yoe + era * 400;
426    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
427    let mp = (5 * doy + 2) / 153;
428    let d = doy - (153 * mp + 2) / 5 + 1;
429    let m = if mp < 10 { mp + 3 } else { mp - 9 };
430    let y = if m <= 2 { y + 1 } else { y };
431    format!("{y:04}-{m:02}-{d:02}")
432}
433
434/// Migrate legacy metrics directory from `code-analyze-mcp` to `aptu-coder`.
435///
436/// - If the old directory exists and the new one does not, rename it and log info.
437/// - If both exist, log a warning and do nothing.
438/// - If neither exists, do nothing.
439///
440/// Returns `Ok(())` on success, propagating any I/O errors.
441pub fn migrate_legacy_metrics_dir() -> std::io::Result<()> {
442    let home =
443        std::env::var("HOME").map_err(|e| std::io::Error::new(std::io::ErrorKind::NotFound, e))?;
444    migrate_legacy_metrics_dir_impl(&home)
445}
446
447#[allow(dead_code)]
448fn migrate_legacy_metrics_dir_impl(home: &str) -> std::io::Result<()> {
449    let old_dir = PathBuf::from(home).join(".local/share/code-analyze-mcp");
450    let new_dir = PathBuf::from(home).join(".local/share/aptu-coder");
451
452    let old_exists = old_dir.is_dir();
453    let new_exists = new_dir.is_dir();
454
455    if old_exists && !new_exists {
456        std::fs::rename(&old_dir, &new_dir)?;
457        tracing::info!(
458            "Migrated legacy metrics directory from {:?} to {:?}",
459            old_dir,
460            new_dir
461        );
462    } else if old_exists && new_exists {
463        tracing::warn!("Both legacy and new metrics directories exist; not migrating");
464    }
465    // If old does not exist, nothing to do.
466    Ok(())
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472    use std::fs;
473    use std::sync::{Mutex, OnceLock};
474    use tempfile::TempDir;
475
476    /// Serializes tests that mutate `APTU_CODER_METRICS_EXPORT_FILE` to prevent parallel
477    /// pollution. Recovers from poison caused by panicking tests.
478    fn metrics_export_lock() -> std::sync::MutexGuard<'static, ()> {
479        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
480        let m = LOCK.get_or_init(|| Mutex::new(()));
481        m.lock().unwrap_or_else(|e| e.into_inner())
482    }
483
484    #[test]
485    fn test_migrate_legacy_only_old_exists() {
486        // Arrange
487        let tmp_home = TempDir::new().unwrap();
488        let home_str = tmp_home.path().to_str().unwrap();
489        let old_path = tmp_home.path().join(".local/share/code-analyze-mcp");
490        let new_path = tmp_home.path().join(".local/share/aptu-coder");
491        fs::create_dir_all(&old_path).unwrap();
492        assert!(!new_path.exists());
493
494        // Act
495        let result = migrate_legacy_metrics_dir_impl(home_str);
496
497        // Assert
498        assert!(result.is_ok());
499        assert!(!old_path.exists(), "old dir should be moved");
500        assert!(new_path.is_dir(), "new dir should exist");
501    }
502
503    #[test]
504    fn test_migrate_legacy_both_exist() {
505        // Arrange
506        let tmp_home = TempDir::new().unwrap();
507        let home_str = tmp_home.path().to_str().unwrap();
508        let old_path = tmp_home.path().join(".local/share/code-analyze-mcp");
509        let new_path = tmp_home.path().join(".local/share/aptu-coder");
510        fs::create_dir_all(&old_path).unwrap();
511        fs::create_dir_all(&new_path).unwrap();
512
513        // Act
514        let result = migrate_legacy_metrics_dir_impl(home_str);
515
516        // Assert
517        assert!(result.is_ok());
518        assert!(old_path.is_dir(), "old dir should remain");
519        assert!(new_path.is_dir(), "new dir should remain");
520    }
521
522    #[test]
523    fn test_migrate_legacy_neither_exists() {
524        // Arrange
525        let tmp_home = TempDir::new().unwrap();
526        let home_str = tmp_home.path().to_str().unwrap();
527        let old_path = tmp_home.path().join(".local/share/code-analyze-mcp");
528        let new_path = tmp_home.path().join(".local/share/aptu-coder");
529
530        // Act
531        let result = migrate_legacy_metrics_dir_impl(home_str);
532
533        // Assert
534        assert!(result.is_ok());
535        assert!(!old_path.exists(), "old dir should not exist");
536        assert!(!new_path.exists(), "new dir should not exist");
537    }
538
539    #[test]
540    fn test_date_to_days_since_epoch_known_dates() {
541        assert_eq!(date_to_days_since_epoch(1970, 1, 1), 0);
542        assert_eq!(date_to_days_since_epoch(2020, 1, 1), 18_262);
543        assert_eq!(date_to_days_since_epoch(2000, 2, 29), 11_016);
544    }
545
546    #[test]
547    fn test_current_date_str_format() {
548        let s = current_date_str();
549        assert_eq!(s.len(), 10);
550        assert_eq!(s.as_bytes()[4], b'-');
551        assert_eq!(s.as_bytes()[7], b'-');
552        let year: u32 = s[0..4].parse().expect("year must be numeric");
553        assert!(year >= 2020 && year <= 2100);
554    }
555
556    #[tokio::test]
557    async fn test_metrics_writer_batching() {
558        let dir = TempDir::new().unwrap();
559        let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<MetricEvent>();
560        let writer = MetricsWriter::new(rx, Some(dir.path().to_path_buf()));
561        let make_event = || MetricEvent {
562            ts: unix_ms(),
563            tool: "analyze_directory",
564            duration_ms: 1,
565            output_chars: 10,
566            param_path_depth: 1,
567            max_depth: None,
568            result: "ok",
569            error_type: None,
570            error_subtype: None,
571            session_id: None,
572            seq: None,
573            cache_hit: None,
574            cache_write_failure: None,
575            exit_code: None,
576            timed_out: false,
577            cache_tier: None,
578            output_truncated: None,
579            chars_threshold_breach: false,
580            file_ext: None,
581            filter_applied: None,
582        };
583        tx.send(make_event()).unwrap();
584        tx.send(make_event()).unwrap();
585        tx.send(make_event()).unwrap();
586        drop(tx);
587        writer.run().await;
588        let entries: Vec<_> = std::fs::read_dir(dir.path())
589            .unwrap()
590            .filter_map(|e| e.ok())
591            .filter(|e| {
592                e.path()
593                    .extension()
594                    .and_then(|x| x.to_str())
595                    .map(|x| x.eq_ignore_ascii_case("jsonl"))
596                    .unwrap_or(false)
597            })
598            .collect();
599        assert_eq!(entries.len(), 1);
600        let content = std::fs::read_to_string(entries[0].path()).unwrap();
601        let lines: Vec<&str> = content.lines().collect();
602        assert_eq!(lines.len(), 3);
603    }
604
605    #[tokio::test]
606    async fn test_cleanup_old_files_deletes_old_keeps_recent() {
607        let dir = TempDir::new().unwrap();
608        let old_file = dir.path().join("metrics-1970-01-01.jsonl");
609        let today = current_date_str();
610        let recent_file = dir.path().join(format!("metrics-{}.jsonl", today));
611        std::fs::write(&old_file, "old\n").unwrap();
612        std::fs::write(&recent_file, "recent\n").unwrap();
613        cleanup_old_files(dir.path()).await;
614        assert!(!old_file.exists());
615        assert!(recent_file.exists());
616    }
617
618    #[test]
619    fn test_metric_event_serialization() {
620        let event = MetricEvent {
621            ts: 1_700_000_000_000,
622            tool: "analyze_directory",
623            duration_ms: 42,
624            output_chars: 100,
625            param_path_depth: 3,
626            max_depth: Some(2),
627            result: "ok",
628            error_type: None,
629            error_subtype: None,
630            session_id: None,
631            seq: None,
632            cache_hit: None,
633            cache_write_failure: None,
634            exit_code: None,
635            timed_out: false,
636            cache_tier: None,
637            output_truncated: None,
638            chars_threshold_breach: false,
639            file_ext: None,
640            filter_applied: None,
641        };
642        let json = serde_json::to_string(&event).unwrap();
643        assert!(json.contains("analyze_directory"));
644        assert!(json.contains(r#""result":"ok""#));
645        assert!(json.contains(r#""output_chars":100"#));
646        // Verify error_subtype is omitted when None (backward compat)
647        assert!(!json.contains("error_subtype"));
648    }
649
650    #[test]
651    fn test_metric_event_serialization_error() {
652        let event = MetricEvent {
653            ts: 1_700_000_000_000,
654            tool: "analyze_directory",
655            duration_ms: 5,
656            output_chars: 0,
657            param_path_depth: 3,
658            max_depth: Some(3),
659            result: "error",
660            error_type: Some("invalid_params".to_string()),
661            error_subtype: None,
662            session_id: None,
663            seq: None,
664            cache_hit: None,
665            cache_write_failure: None,
666            exit_code: None,
667            timed_out: false,
668            cache_tier: None,
669            output_truncated: None,
670            chars_threshold_breach: false,
671            file_ext: None,
672            filter_applied: None,
673        };
674        let json = serde_json::to_string(&event).unwrap();
675        assert!(json.contains(r#""result":"error""#));
676        assert!(json.contains(r#""error_type":"invalid_params""#));
677        assert!(json.contains(r#""output_chars":0"#));
678        // Verify error_subtype is omitted when None (backward compat)
679        assert!(!json.contains("error_subtype"));
680    }
681
682    #[test]
683    fn test_metric_event_error_subtype_some_serializes() {
684        let event = MetricEvent {
685            ts: 1_700_000_000_000,
686            tool: "edit_replace",
687            duration_ms: 10,
688            output_chars: 0,
689            param_path_depth: 2,
690            max_depth: None,
691            result: "error",
692            error_type: Some("invalid_params".to_string()),
693            error_subtype: Some("not_found".to_string()),
694            session_id: None,
695            seq: None,
696            cache_hit: None,
697            cache_write_failure: None,
698            exit_code: None,
699            timed_out: false,
700            cache_tier: None,
701            output_truncated: None,
702            chars_threshold_breach: false,
703            file_ext: None,
704            filter_applied: None,
705        };
706        let json = serde_json::to_string(&event).unwrap();
707        assert!(json.contains(r#""error_subtype":"not_found""#));
708    }
709
710    #[test]
711    fn test_metric_event_error_subtype_ambiguous() {
712        let event = MetricEvent {
713            ts: 1_700_000_000_000,
714            tool: "edit_replace",
715            duration_ms: 10,
716            output_chars: 0,
717            param_path_depth: 2,
718            max_depth: None,
719            result: "error",
720            error_type: Some("invalid_params".to_string()),
721            error_subtype: Some("ambiguous".to_string()),
722            session_id: None,
723            seq: None,
724            cache_hit: None,
725            cache_write_failure: None,
726            exit_code: None,
727            timed_out: false,
728            cache_tier: None,
729            output_truncated: None,
730            chars_threshold_breach: false,
731            file_ext: None,
732            filter_applied: None,
733        };
734        let json = serde_json::to_string(&event).unwrap();
735        assert!(json.contains(r#""error_subtype":"ambiguous""#));
736    }
737
738    #[test]
739    fn test_metric_event_new_fields_round_trip() {
740        let event = MetricEvent {
741            ts: 1_700_000_000_000,
742            tool: "analyze_file",
743            duration_ms: 100,
744            output_chars: 500,
745            param_path_depth: 2,
746            max_depth: Some(3),
747            result: "ok",
748            error_type: None,
749            error_subtype: None,
750            session_id: Some("1742468880123-42".to_string()),
751            seq: Some(5),
752            cache_hit: None,
753            cache_write_failure: None,
754            exit_code: None,
755            timed_out: false,
756            cache_tier: None,
757            output_truncated: None,
758            chars_threshold_breach: false,
759            file_ext: None,
760            filter_applied: None,
761        };
762        let serialized = serde_json::to_string(&event).unwrap();
763        let json_str = r#"{"ts":1700000000000,"tool":"analyze_file","duration_ms":100,"output_chars":500,"param_path_depth":2,"max_depth":3,"result":"ok","error_type":null,"session_id":"1742468880123-42","seq":5}"#;
764        assert_eq!(serialized, json_str);
765    }
766
767    #[test]
768    fn test_path_file_ext_known() {
769        // Arrange / Act / Assert: known extension returns the lowercased extension key
770        assert_eq!(path_file_ext("src/main.rs"), Some("rs"));
771    }
772
773    #[test]
774    fn test_path_file_ext_unknown() {
775        // Arrange / Act / Assert: unrecognized extension returns Some("other")
776        assert_eq!(path_file_ext("file.xyz"), Some("other"));
777    }
778
779    #[test]
780    fn test_path_file_ext_no_ext() {
781        // Arrange / Act / Assert: path with no extension returns None
782        assert_eq!(path_file_ext("Makefile"), None);
783    }
784
785    #[test]
786    fn test_path_file_ext_case_insensitive() {
787        // Arrange / Act / Assert: uppercase extension is normalized to lowercase key
788        assert_eq!(path_file_ext("src/main.RS"), Some("rs"));
789    }
790
791    #[test]
792    fn test_path_file_ext_multi_dot() {
793        // Arrange / Act / Assert: multi-dot filename uses the last extension
794        assert_eq!(path_file_ext("file.test.rs"), Some("rs"));
795    }
796
797    #[tokio::test]
798    async fn test_metrics_export_file_created() {
799        let _guard = metrics_export_lock();
800        // Arrange: create temp dir and set export env var
801        let dir = TempDir::new().unwrap();
802        let export_file = dir.path().join("metrics_export.json");
803        let export_path_str = export_file.to_string_lossy().to_string();
804
805        unsafe {
806            std::env::set_var("APTU_CODER_METRICS_EXPORT_FILE", &export_path_str);
807        }
808
809        // Create metrics writer and send events
810        let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<MetricEvent>();
811        let writer = MetricsWriter::new(rx, Some(dir.path().to_path_buf()));
812
813        // Act: send a few events with session_id
814        tx.send(MetricEvent {
815            ts: unix_ms(),
816            tool: "analyze_directory",
817            duration_ms: 100,
818            output_chars: 50,
819            param_path_depth: 1,
820            max_depth: None,
821            result: "ok",
822            error_type: None,
823            error_subtype: None,
824            session_id: Some("test-session-123".to_string()),
825            seq: None,
826            cache_hit: None,
827            cache_write_failure: None,
828            exit_code: None,
829            timed_out: false,
830            cache_tier: None,
831            output_truncated: None,
832            chars_threshold_breach: false,
833            file_ext: None,
834            filter_applied: None,
835        })
836        .unwrap();
837        tx.send(MetricEvent {
838            ts: unix_ms(),
839            tool: "analyze_file",
840            duration_ms: 50,
841            output_chars: 100,
842            param_path_depth: 2,
843            max_depth: Some(3),
844            result: "ok",
845            error_type: None,
846            error_subtype: None,
847            session_id: Some("test-session-123".to_string()),
848            seq: None,
849            cache_hit: None,
850            cache_write_failure: None,
851            exit_code: None,
852            timed_out: false,
853            cache_tier: None,
854            output_truncated: None,
855            chars_threshold_breach: false,
856            file_ext: None,
857            filter_applied: None,
858        })
859        .unwrap();
860        drop(tx);
861        writer.run().await;
862
863        // Assert: export file should exist with correct JSON structure
864        assert!(
865            export_file.exists(),
866            "export file should be created at {:?}",
867            export_file
868        );
869        let content = std::fs::read_to_string(&export_file).unwrap();
870        let json: serde_json::Value = serde_json::from_str(&content).unwrap();
871
872        assert_eq!(
873            json["session_id"], "test-session-123",
874            "export should contain correct session_id"
875        );
876        assert!(
877            json["tool_calls"].is_array(),
878            "export should contain tool_calls array"
879        );
880        let tool_calls = json["tool_calls"].as_array().unwrap();
881        assert_eq!(tool_calls.len(), 2, "should have 2 tool calls");
882        assert!(
883            json["total_duration_ms"].is_number(),
884            "export should contain total_duration_ms"
885        );
886        assert_eq!(
887            json["total_duration_ms"], 150,
888            "total_duration_ms should be sum of all durations"
889        );
890        assert_eq!(
891            json["tool_calls"][0]["total_output_chars"], 50,
892            "first tool call should have total_output_chars=50"
893        );
894        assert_eq!(
895            json["tool_calls"][1]["total_output_chars"], 100,
896            "second tool call should have total_output_chars=100"
897        );
898        assert_eq!(
899            json["total_output_chars"], 150,
900            "total_output_chars should be sum of all output_chars"
901        );
902
903        // Cleanup
904        unsafe {
905            std::env::remove_var("APTU_CODER_METRICS_EXPORT_FILE");
906        }
907    }
908
909    #[tokio::test]
910    async fn test_metrics_export_env_var_unset() {
911        let _guard = metrics_export_lock();
912        // Arrange: ensure env var is not set
913        unsafe {
914            std::env::remove_var("APTU_CODER_METRICS_EXPORT_FILE");
915        }
916        let dir = TempDir::new().unwrap();
917        // Use a unique marker to ensure we don't pick up files from other tests
918        let marker = "metrics_export_unset_test";
919
920        // Create metrics writer and send events
921        let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<MetricEvent>();
922        let writer = MetricsWriter::new(rx, Some(dir.path().to_path_buf()));
923
924        // Act: send events and run writer
925        tx.send(MetricEvent {
926            ts: unix_ms(),
927            tool: "analyze_directory",
928            duration_ms: 100,
929            output_chars: 50,
930            param_path_depth: 1,
931            max_depth: None,
932            result: "ok",
933            error_type: None,
934            error_subtype: None,
935            session_id: Some("test-session-456".to_string()),
936            seq: None,
937            cache_hit: None,
938            cache_write_failure: None,
939            exit_code: None,
940            timed_out: false,
941            cache_tier: None,
942            output_truncated: None,
943            chars_threshold_breach: false,
944            file_ext: None,
945            filter_applied: None,
946        })
947        .unwrap();
948        drop(tx);
949        writer.run().await;
950
951        // Assert: no export file should be created
952        let entries: Vec<_> = std::fs::read_dir(dir.path())
953            .unwrap()
954            .filter_map(|e| e.ok())
955            .filter(|e| {
956                e.path()
957                    .file_name()
958                    .and_then(|n| n.to_str())
959                    .map(|n| n.contains(marker))
960                    .unwrap_or(false)
961            })
962            .collect();
963        assert_eq!(
964            entries.len(),
965            0,
966            "no export file should be created when env var is unset"
967        );
968    }
969
970    #[tokio::test]
971    async fn test_metrics_export_relative_path_rejected() {
972        let _guard = metrics_export_lock();
973        // Arrange: set export env var to a relative path
974        let relative_path = "relative/path/metrics.json";
975        unsafe {
976            std::env::set_var("APTU_CODER_METRICS_EXPORT_FILE", relative_path);
977        }
978
979        let dir = TempDir::new().unwrap();
980        // Use a unique marker to ensure we don't pick up files from other tests
981        let marker = "metrics_export_relative_test";
982
983        // Create metrics writer and send events
984        let (tx, rx) = tokio::sync::mpsc::unbounded_channel::<MetricEvent>();
985        let writer = MetricsWriter::new(rx, Some(dir.path().to_path_buf()));
986
987        // Act: send events and run writer
988        tx.send(MetricEvent {
989            ts: unix_ms(),
990            tool: "analyze_directory",
991            duration_ms: 100,
992            output_chars: 50,
993            param_path_depth: 1,
994            max_depth: None,
995            result: "ok",
996            error_type: None,
997            error_subtype: None,
998            session_id: Some(marker.to_string()),
999            seq: None,
1000            cache_hit: None,
1001            cache_write_failure: None,
1002            exit_code: None,
1003            timed_out: false,
1004            cache_tier: None,
1005            output_truncated: None,
1006            chars_threshold_breach: false,
1007            file_ext: None,
1008            filter_applied: None,
1009        })
1010        .unwrap();
1011        drop(tx);
1012        writer.run().await;
1013
1014        // Assert: no export file should be created for relative path
1015        let entries: Vec<_> = std::fs::read_dir(dir.path())
1016            .unwrap()
1017            .filter_map(|e| e.ok())
1018            .filter(|e| {
1019                e.path()
1020                    .file_name()
1021                    .and_then(|n| n.to_str())
1022                    .map(|n| n.contains("metrics.json"))
1023                    .unwrap_or(false)
1024            })
1025            .collect();
1026        assert_eq!(
1027            entries.len(),
1028            0,
1029            "no export file should be created for relative path"
1030        );
1031
1032        // Cleanup
1033        unsafe {
1034            std::env::remove_var("APTU_CODER_METRICS_EXPORT_FILE");
1035        }
1036    }
1037}
1038
1039/// Record a metric event to OTel metrics if the global meter provider is available.
1040///
1041/// Records:
1042/// - Histogram: mcp.server.operation.duration (in milliseconds)
1043/// - Counter: mcp.server.tool.calls (incremented by 1)
1044///
1045/// Labels: gen_ai.tool.name, error.type (or "none" if no error)
1046///
1047/// Instruments are initialized once via OnceLock to avoid rebuilding them on every call.
1048fn record_otel_metrics(event: &MetricEvent) {
1049    use opentelemetry::metrics::{Counter, Histogram};
1050    use opentelemetry::{KeyValue, global};
1051    use std::sync::OnceLock;
1052
1053    static DURATION_HISTOGRAM: OnceLock<Histogram<f64>> = OnceLock::new();
1054    static CALL_COUNTER: OnceLock<Counter<u64>> = OnceLock::new();
1055    static CACHE_HITS_COUNTER: OnceLock<Counter<u64>> = OnceLock::new();
1056    static CACHE_WRITE_FAILURES_COUNTER: OnceLock<Counter<u64>> = OnceLock::new();
1057
1058    let histogram = DURATION_HISTOGRAM.get_or_init(|| {
1059        global::meter("aptu-coder")
1060            .f64_histogram("mcp.server.operation.duration")
1061            .with_unit("s")
1062            .with_boundaries(vec![
1063                0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0,
1064            ])
1065            .build()
1066    });
1067
1068    let counter = CALL_COUNTER.get_or_init(|| {
1069        global::meter("aptu-coder")
1070            .u64_counter("mcp.server.tool.calls")
1071            .build()
1072    });
1073
1074    let cache_hits_counter = CACHE_HITS_COUNTER.get_or_init(|| {
1075        global::meter("aptu-coder")
1076            .u64_counter("mcp.server.tool.cache_hits_total")
1077            .with_description("Number of tool responses served from cache (l1_memory or l2_disk)")
1078            .build()
1079    });
1080
1081    let cache_write_failures_counter = CACHE_WRITE_FAILURES_COUNTER.get_or_init(|| {
1082        global::meter("aptu-coder")
1083            .u64_counter("mcp.server.tool.cache_write_failures_total")
1084            .with_description(
1085                "Number of L2 disk cache write failures (dir, tempfile, write, rename)",
1086            )
1087            .build()
1088    });
1089
1090    let error_type = event.error_type.as_deref().unwrap_or("success");
1091    let attributes = [
1092        KeyValue::new("gen_ai.tool.name", event.tool.to_string()),
1093        KeyValue::new("error.type", error_type.to_string()),
1094        KeyValue::new("mcp.method.name", "tools/call"),
1095        KeyValue::new("mcp.protocol.version", "2025-11-25"),
1096        KeyValue::new("network.transport", "pipe"),
1097    ];
1098
1099    histogram.record(event.duration_ms as f64 / 1000.0, &attributes);
1100    counter.add(1, &attributes);
1101
1102    if event.cache_hit == Some(true) {
1103        let tier = event.cache_tier.unwrap_or("unknown");
1104        cache_hits_counter.add(
1105            1,
1106            &[
1107                KeyValue::new("gen_ai.tool.name", event.tool.to_string()),
1108                KeyValue::new("cache_tier", tier.to_string()),
1109            ],
1110        );
1111    }
1112
1113    if event.cache_write_failure == Some(true) {
1114        cache_write_failures_counter.add(
1115            1,
1116            &[KeyValue::new("gen_ai.tool.name", event.tool.to_string())],
1117        );
1118    }
1119}