zeph_core/
metrics.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::VecDeque;
5
6use tokio::sync::watch;
7use zeph_common::SecurityEventCategory;
8
9pub use zeph_llm::{ClassifierMetricsSnapshot, TaskMetricsSnapshot};
10pub use zeph_memory::{CategoryScore, ProbeCategory, ProbeVerdict};
11
12/// A single security event record for TUI display.
13#[derive(Debug, Clone)]
14pub struct SecurityEvent {
15    /// Unix timestamp (seconds since epoch).
16    pub timestamp: u64,
17    pub category: SecurityEventCategory,
18    /// Source that triggered the event (e.g., `web_scrape`, `mcp_response`).
19    pub source: String,
20    /// Short description, capped at 128 chars.
21    pub detail: String,
22}
23
24impl SecurityEvent {
25    #[must_use]
26    pub fn new(
27        category: SecurityEventCategory,
28        source: impl Into<String>,
29        detail: impl Into<String>,
30    ) -> Self {
31        // IMP-1: cap source at 64 chars and strip ASCII control chars.
32        let source: String = source
33            .into()
34            .chars()
35            .filter(|c| !c.is_ascii_control())
36            .take(64)
37            .collect();
38        // CR-1: UTF-8 safe truncation using floor_char_boundary (stable since Rust 1.82).
39        let detail = detail.into();
40        let detail = if detail.len() > 128 {
41            let end = detail.floor_char_boundary(127);
42            format!("{}…", &detail[..end])
43        } else {
44            detail
45        };
46        Self {
47            timestamp: std::time::SystemTime::now()
48                .duration_since(std::time::UNIX_EPOCH)
49                .unwrap_or_default()
50                .as_secs(),
51            category,
52            source,
53            detail,
54        }
55    }
56}
57
58/// Ring buffer capacity for security events.
59pub const SECURITY_EVENT_CAP: usize = 100;
60
61/// Lightweight snapshot of a single task row for TUI display.
62///
63/// Captured from the task graph on each metrics tick; kept minimal on purpose.
64#[derive(Debug, Clone)]
65pub struct TaskSnapshotRow {
66    pub id: u32,
67    pub title: String,
68    /// Stringified `TaskStatus` (e.g. `"pending"`, `"running"`, `"completed"`).
69    pub status: String,
70    pub agent: Option<String>,
71    pub duration_ms: u64,
72    /// Truncated error message (first 80 chars) when the task failed.
73    pub error: Option<String>,
74}
75
76/// Lightweight snapshot of a `TaskGraph` for TUI display.
77#[derive(Debug, Clone, Default)]
78pub struct TaskGraphSnapshot {
79    pub graph_id: String,
80    pub goal: String,
81    /// Stringified `GraphStatus` (e.g. `"created"`, `"running"`, `"completed"`).
82    pub status: String,
83    pub tasks: Vec<TaskSnapshotRow>,
84    pub completed_at: Option<std::time::Instant>,
85}
86
87impl TaskGraphSnapshot {
88    /// Returns `true` if this snapshot represents a terminal plan that finished
89    /// more than 30 seconds ago and should no longer be shown in the TUI.
90    #[must_use]
91    pub fn is_stale(&self) -> bool {
92        self.completed_at
93            .is_some_and(|t| t.elapsed().as_secs() > 30)
94    }
95}
96
97/// Counters for the task orchestration subsystem.
98///
99/// Always present in [`MetricsSnapshot`]; zero-valued when orchestration is inactive.
100#[derive(Debug, Clone, Default)]
101pub struct OrchestrationMetrics {
102    pub plans_total: u64,
103    pub tasks_total: u64,
104    pub tasks_completed: u64,
105    pub tasks_failed: u64,
106    pub tasks_skipped: u64,
107}
108
109#[non_exhaustive]
110/// Connection status of a single MCP server for TUI display.
111#[derive(Debug, Clone, PartialEq, Eq)]
112pub enum McpServerConnectionStatus {
113    Connected,
114    Failed,
115}
116
117/// Per-server MCP status snapshot for TUI display.
118#[derive(Debug, Clone)]
119pub struct McpServerStatus {
120    pub id: String,
121    pub status: McpServerConnectionStatus,
122    /// Number of tools provided by this server (0 when failed).
123    pub tool_count: usize,
124    /// Human-readable failure reason. Empty when connected.
125    pub error: String,
126}
127
128/// Bayesian confidence data for a single skill, used by TUI confidence bar.
129#[derive(Debug, Clone, Default)]
130pub struct SkillConfidence {
131    pub name: String,
132    pub posterior: f64,
133    pub total_uses: u32,
134}
135
136/// Snapshot of a single sub-agent's runtime status.
137#[derive(Debug, Clone, Default)]
138pub struct SubAgentMetrics {
139    pub id: String,
140    pub name: String,
141    /// Stringified `TaskState`: "working", "completed", "failed", "canceled", etc.
142    pub state: String,
143    pub turns_used: u32,
144    pub max_turns: u32,
145    pub background: bool,
146    pub elapsed_secs: u64,
147    /// Stringified `PermissionMode`: `"default"`, `"accept_edits"`, `"dont_ask"`,
148    /// `"bypass_permissions"`, `"plan"`. Empty string when mode is `Default`.
149    pub permission_mode: String,
150    /// Path to the directory containing this agent's JSONL transcript file.
151    /// `None` when transcript writing is disabled for this agent.
152    pub transcript_dir: Option<String>,
153}
154
155/// Per-turn latency breakdown for the four agent hot-path phases.
156///
157/// Populated with `Instant`-based measurements at each phase boundary.
158/// All values are in milliseconds.
159#[derive(Debug, Clone, Default)]
160pub struct TurnTimings {
161    pub prepare_context_ms: u64,
162    pub llm_chat_ms: u64,
163    pub tool_exec_ms: u64,
164    pub persist_message_ms: u64,
165}
166
167/// Live snapshot of agent metrics broadcast via a [`tokio::sync::watch`] channel.
168///
169/// Fields are updated at different rates: some once at startup (static), others every turn
170/// (dynamic). For fields that are known at agent startup and do not change during the session,
171/// use [`StaticMetricsInit`] and `AgentBuilder::with_static_metrics` instead of
172/// adding a raw `send_modify` call in the runner.
173#[derive(Debug, Clone, Default)]
174#[allow(clippy::struct_excessive_bools)] // independent boolean flags; bitflags or enum would obscure semantics without reducing complexity
175pub struct MetricsSnapshot {
176    pub prompt_tokens: u64,
177    pub completion_tokens: u64,
178    pub total_tokens: u64,
179    /// Reasoning tokens from the last turn (`OpenAI` o-series only).
180    ///
181    /// This is a **subset** of `completion_tokens` and must not be added to cost separately.
182    pub reasoning_tokens: u64,
183    pub context_tokens: u64,
184    pub api_calls: u64,
185    pub active_skills: Vec<String>,
186    pub total_skills: usize,
187    /// Total configured MCP servers (connected + failed).
188    pub mcp_server_count: usize,
189    pub mcp_tool_count: usize,
190    /// Number of successfully connected MCP servers.
191    pub mcp_connected_count: usize,
192    /// Per-server connection status list.
193    pub mcp_servers: Vec<McpServerStatus>,
194    pub active_mcp_tools: Vec<String>,
195    pub sqlite_message_count: u64,
196    pub sqlite_conversation_id: Option<zeph_memory::ConversationId>,
197    pub qdrant_available: bool,
198    pub vector_backend: String,
199    pub embeddings_generated: u64,
200    pub last_llm_latency_ms: u64,
201    pub uptime_seconds: u64,
202    pub provider_name: String,
203    pub model_name: String,
204    pub summaries_count: u64,
205    pub context_compactions: u64,
206    /// Number of times the agent entered the Hard compaction tier, including cooldown-skipped
207    /// turns. Not equal to the actual LLM summarization count — reflects pressure, not action.
208    pub compaction_hard_count: u64,
209    /// User-message turns elapsed after each hard compaction event.
210    /// Entry i = turns between hard compaction i and hard compaction i+1 (or session end).
211    /// Empty when no hard compaction occurred during the session.
212    pub compaction_turns_after_hard: Vec<u64>,
213    pub compression_events: u64,
214    pub compression_tokens_saved: u64,
215    /// Tool results compressed by Acon (#4021) this session.
216    pub acon_results_compressed: u64,
217    /// Tokens saved by Acon tool-result compression (#4021) this session.
218    pub acon_tokens_saved: u64,
219    pub tool_output_prunes: u64,
220    /// Compaction probe outcomes (#1609).
221    pub compaction_probe_passes: u64,
222    /// Compaction probe soft failures (summary borderline — compaction proceeded with warning).
223    pub compaction_probe_soft_failures: u64,
224    /// Compaction probe hard failures (compaction blocked due to lossy summary).
225    pub compaction_probe_failures: u64,
226    /// Compaction probe errors (LLM/timeout — non-blocking, compaction proceeded).
227    pub compaction_probe_errors: u64,
228    /// Last compaction probe verdict. `None` before the first probe completes.
229    pub last_probe_verdict: Option<zeph_memory::ProbeVerdict>,
230    /// Last compaction probe score in [0.0, 1.0]. `None` before the first probe
231    /// completes or after an Error verdict (errors produce no score).
232    pub last_probe_score: Option<f32>,
233    /// Per-category scores from the last completed probe.
234    pub last_probe_category_scores: Option<Vec<zeph_memory::CategoryScore>>,
235    /// Configured pass threshold for the compaction probe. Used by TUI for category color-coding.
236    pub compaction_probe_threshold: f32,
237    /// Configured hard-fail threshold for the compaction probe.
238    pub compaction_probe_hard_fail_threshold: f32,
239    pub cache_read_tokens: u64,
240    pub cache_creation_tokens: u64,
241    pub cost_spent_cents: f64,
242    /// Cost per successful task in cents. `None` until at least one task completes.
243    pub cost_cps_cents: Option<f64>,
244    /// Number of successful tasks recorded today.
245    pub cost_successful_tasks: u64,
246    /// Per-provider cost breakdown, sorted by cost descending.
247    pub provider_cost_breakdown: Vec<(String, crate::cost::ProviderUsage)>,
248    pub filter_raw_tokens: u64,
249    pub filter_saved_tokens: u64,
250    pub filter_applications: u64,
251    pub filter_total_commands: u64,
252    pub filter_filtered_commands: u64,
253    pub filter_confidence_full: u64,
254    pub filter_confidence_partial: u64,
255    pub filter_confidence_fallback: u64,
256    pub cancellations: u64,
257    pub server_compaction_events: u64,
258    pub sanitizer_runs: u64,
259    pub sanitizer_injection_flags: u64,
260    /// Injection pattern hits on `ToolResult` (local) sources — likely false positives.
261    ///
262    /// Counts regex hits that fired on content from `shell`, `read_file`, `search_code`, etc.
263    /// These sources are user-owned and not adversarial; a non-zero value indicates a pattern
264    /// that needs tightening or a source reclassification.
265    pub sanitizer_injection_fp_local: u64,
266    pub sanitizer_truncations: u64,
267    pub quarantine_invocations: u64,
268    pub quarantine_failures: u64,
269    /// ML classifier hard-blocked tool outputs (`enforcement_mode=block` only).
270    pub classifier_tool_blocks: u64,
271    /// ML classifier suspicious tool outputs (both enforcement modes).
272    pub classifier_tool_suspicious: u64,
273    /// `TurnCausalAnalyzer` flags: behavioral deviation detected at tool-return boundary.
274    pub causal_ipi_flags: u64,
275    /// VIGIL pre-sanitizer flags: tool outputs matched injection patterns (any action).
276    pub vigil_flags_total: u64,
277    /// VIGIL pre-sanitizer blocks: tool outputs replaced with sentinel (`strict_mode=true`).
278    pub vigil_blocks_total: u64,
279    pub exfiltration_images_blocked: u64,
280    pub exfiltration_tool_urls_flagged: u64,
281    pub exfiltration_memory_guards: u64,
282    pub pii_scrub_count: u64,
283    /// Number of times the PII NER classifier timed out; input fell back to regex-only.
284    pub pii_ner_timeouts: u64,
285    /// Number of times the PII NER circuit breaker tripped (disabled NER for the session).
286    pub pii_ner_circuit_breaker_trips: u64,
287    pub memory_validation_failures: u64,
288    pub rate_limit_trips: u64,
289    pub pre_execution_blocks: u64,
290    pub pre_execution_warnings: u64,
291    /// `true` when a guardrail filter is active for this session.
292    pub guardrail_enabled: bool,
293    /// `true` when guardrail is in warn-only mode (action = warn).
294    pub guardrail_warn_mode: bool,
295    pub sub_agents: Vec<SubAgentMetrics>,
296    pub skill_confidence: Vec<SkillConfidence>,
297    /// Scheduled task summaries: `[name, kind, mode, next_run]`.
298    pub scheduled_tasks: Vec<[String; 4]>,
299    /// Thompson Sampling distribution snapshots: `(provider, alpha, beta)`.
300    pub router_thompson_stats: Vec<(String, f64, f64)>,
301    /// Ring buffer of recent security events (cap 100, FIFO eviction).
302    pub security_events: VecDeque<SecurityEvent>,
303    pub orchestration: OrchestrationMetrics,
304    /// Live snapshot of the currently active task graph. `None` when no plan is active.
305    pub orchestration_graph: Option<TaskGraphSnapshot>,
306    pub graph_community_detection_failures: u64,
307    pub graph_entities_total: u64,
308    pub graph_edges_total: u64,
309    pub graph_communities_total: u64,
310    pub graph_extraction_count: u64,
311    pub graph_extraction_failures: u64,
312    /// `true` when `config.llm.cloud.enable_extended_context = true`.
313    /// Never set for other providers to avoid false positives.
314    pub extended_context: bool,
315    /// Latest compression-guidelines version (0 = no guidelines yet).
316    pub guidelines_version: u32,
317    /// ISO 8601 timestamp of the latest guidelines update (empty if none).
318    pub guidelines_updated_at: String,
319    pub tool_cache_hits: u64,
320    pub tool_cache_misses: u64,
321    pub tool_cache_entries: usize,
322    /// Number of semantic-tier facts in memory (0 when tier promotion disabled).
323    pub semantic_fact_count: u64,
324    /// STT model name (e.g. "whisper-1"). `None` when STT is not configured.
325    pub stt_model: Option<String>,
326    /// Model used for context compaction/summarization. `None` when no summary provider is set.
327    pub compaction_model: Option<String>,
328    /// Temperature of the active provider when using Candle. `None` for API providers.
329    pub provider_temperature: Option<f32>,
330    /// Top-p of the active provider when using Candle. `None` for API providers.
331    pub provider_top_p: Option<f32>,
332    /// Embedding model name (e.g. `"nomic-embed-text"`). Empty when embeddings are disabled.
333    pub embedding_model: String,
334    /// Token budget for context window. `None` when not configured.
335    pub token_budget: Option<u64>,
336    /// Token threshold that triggers soft compaction. `None` when not configured.
337    pub compaction_threshold: Option<u32>,
338    /// Vault backend identifier: "age", "env", or "none".
339    pub vault_backend: String,
340    /// Active I/O channel name: `"cli"`, `"telegram"`, `"tui"`, `"discord"`, `"slack"`.
341    pub active_channel: String,
342    /// Background supervisor: inflight tasks across all classes.
343    pub bg_inflight: u64,
344    /// Background supervisor: total tasks dropped due to concurrency limit (all classes).
345    pub bg_dropped: u64,
346    /// Background supervisor: total tasks completed (all classes).
347    pub bg_completed: u64,
348    /// Background supervisor: inflight enrichment tasks.
349    pub bg_enrichment_inflight: u64,
350    /// Background supervisor: inflight telemetry tasks.
351    pub bg_telemetry_inflight: u64,
352    /// In-flight background shell runs. Empty when none are running or no `ShellExecutor` is wired.
353    pub shell_background_runs: Vec<ShellBackgroundRunRow>,
354    /// Whether self-learning (skill evolution) is enabled.
355    pub self_learning_enabled: bool,
356    /// Whether the semantic response cache is enabled.
357    pub semantic_cache_enabled: bool,
358    /// Whether semantic response caching is enabled (alias for `semantic_cache_enabled`).
359    pub cache_enabled: bool,
360    /// Whether assistant messages are auto-saved to memory.
361    pub autosave_enabled: bool,
362    /// Classifier p50/p95 latency metrics per task (injection, pii, feedback).
363    pub classifier: ClassifierMetricsSnapshot,
364    /// Latency breakdown for the most recently completed agent turn.
365    pub last_turn_timings: TurnTimings,
366    /// Rolling average of per-phase latency over the last 10 turns.
367    pub avg_turn_timings: TurnTimings,
368    /// Maximum per-phase latency observed within the rolling window (tail-latency visibility).
369    ///
370    /// M3: exposes `max_in_window` alongside the rolling average for operational monitoring.
371    pub max_turn_timings: TurnTimings,
372    /// Number of turns included in `avg_turn_timings` and `max_turn_timings` (capped at 10).
373    pub timing_sample_count: u64,
374    /// Total egress (outbound HTTP) requests attempted this session.
375    pub egress_requests_total: u64,
376    /// Egress events dropped due to bounded channel backpressure.
377    pub egress_dropped_total: u64,
378    /// Egress requests blocked by scheme/domain/SSRF policy.
379    pub egress_blocked_total: u64,
380    /// Runtime-resolved context window limit (tokens).
381    ///
382    /// Populated from `resolve_context_budget` after provider pool construction and refreshed on
383    /// every `/provider` switch. `0` means unknown (pre-init or provider has no declared window);
384    /// the TUI gauge renders `"—"` in this case to avoid divide-by-zero.
385    pub context_max_tokens: u64,
386    /// Token count at the time the most recent compaction was triggered. `0` = never compacted.
387    pub compaction_last_before: u64,
388    /// Token count after the most recent compaction completed. `0` = never compacted.
389    pub compaction_last_after: u64,
390    /// Unix epoch milliseconds when the most recent compaction occurred. `0` = never compacted.
391    pub compaction_last_at_ms: u64,
392    /// Active long-horizon goal for TUI display. `None` when no goal is active.
393    pub active_goal: Option<crate::goal::GoalSnapshot>,
394    /// Cocoon sidecar connection state. `None` when Cocoon is not configured.
395    /// `Some(true)` = proxy connected, `Some(false)` = unreachable or disconnected.
396    pub cocoon_connected: Option<bool>,
397    /// Worker count reported by the Cocoon sidecar. `0` when not connected or not configured.
398    pub cocoon_worker_count: u32,
399    /// Number of models available through the Cocoon sidecar.
400    pub cocoon_model_count: usize,
401    /// TON wallet balance in TON units. `None` when unknown or Cocoon not configured.
402    pub cocoon_ton_balance: Option<f64>,
403}
404
405/// Snapshot of a single in-flight background shell run for TUI display.
406///
407/// Populated from [`zeph_tools::BackgroundRunSnapshot`] during the per-turn metrics update in
408/// `reap_background_tasks_and_update_metrics`. The `run_id` is truncated to 8 hex chars for
409/// compact TUI rendering.
410#[derive(Debug, Clone, Default, serde::Serialize)]
411pub struct ShellBackgroundRunRow {
412    /// First 8 hex characters of the run's UUID, sufficient for unique identification in TUI.
413    pub run_id: String,
414    /// Original command, truncated to 80 characters.
415    pub command: String,
416    /// Wall-clock elapsed seconds since spawn.
417    pub elapsed_secs: u64,
418}
419
420/// Configuration-derived fields of [`MetricsSnapshot`] that are known at agent startup and do
421/// not change during the session.
422///
423/// Pass this struct to `AgentBuilder::with_static_metrics` immediately after
424/// `AgentBuilder::with_metrics` to initialize all static fields in one place rather than
425/// through scattered `send_modify` calls in the runner.
426///
427/// # Examples
428///
429/// ```no_run
430/// use zeph_core::metrics::StaticMetricsInit;
431///
432/// let init = StaticMetricsInit {
433///     active_channel: "cli".to_owned(),
434///     ..StaticMetricsInit::default()
435/// };
436/// ```
437#[derive(Debug, Default)]
438pub struct StaticMetricsInit {
439    /// STT model name (e.g. `"whisper-1"`). `None` when STT is not configured.
440    pub stt_model: Option<String>,
441    /// Model used for context compaction/summarization. `None` when no summary provider is set.
442    pub compaction_model: Option<String>,
443    /// Whether the semantic response cache is enabled.
444    ///
445    /// This value is also written to [`MetricsSnapshot::cache_enabled`] which is an alias for the
446    /// same concept.
447    pub semantic_cache_enabled: bool,
448    /// Embedding model name (e.g. `"nomic-embed-text"`). Empty when embeddings are disabled.
449    pub embedding_model: String,
450    /// Whether self-learning (skill evolution) is enabled.
451    pub self_learning_enabled: bool,
452    /// Active I/O channel name: `"cli"`, `"telegram"`, `"tui"`, `"discord"`, `"slack"`.
453    pub active_channel: String,
454    /// Token budget for context window. `None` when not configured.
455    pub token_budget: Option<u64>,
456    /// Token threshold that triggers soft compaction. `None` when not configured.
457    pub compaction_threshold: Option<u32>,
458    /// Vault backend identifier: `"age"`, `"env"`, or `"none"`.
459    pub vault_backend: String,
460    /// Whether assistant messages are auto-saved to memory.
461    pub autosave_enabled: bool,
462    /// Override for the active model name. When `Some`, replaces the model name set by the
463    /// builder from `runtime.model_name` (which may be a placeholder) with the effective model
464    /// resolved from the LLM provider configuration.
465    pub model_name_override: Option<String>,
466}
467
468/// Strip ASCII control characters and ANSI escape sequences from a string for safe TUI display.
469///
470/// Allows tab, LF, and CR; removes everything else in the `0x00–0x1F` range including full
471/// ANSI CSI sequences (`ESC[...`). This prevents escape-sequence injection from LLM planner
472/// output into the TUI.
473fn strip_ctrl(s: &str) -> String {
474    let mut out = String::with_capacity(s.len());
475    let mut chars = s.chars().peekable();
476    while let Some(c) = chars.next() {
477        if c == '\x1b' {
478            // Consume an ANSI CSI sequence: ESC [ <params> <final-byte in 0x40–0x7E>
479            if chars.peek() == Some(&'[') {
480                chars.next(); // consume '['
481                for inner in chars.by_ref() {
482                    if ('\x40'..='\x7e').contains(&inner) {
483                        break;
484                    }
485                }
486            }
487            // Drop ESC and any consumed sequence — write nothing.
488        } else if c.is_control() && c != '\t' && c != '\n' && c != '\r' {
489            // drop other control chars
490        } else {
491            out.push(c);
492        }
493    }
494    out
495}
496
497/// Convert a live `TaskGraph` into a lightweight snapshot for TUI display.
498impl From<&zeph_orchestration::TaskGraph> for TaskGraphSnapshot {
499    fn from(graph: &zeph_orchestration::TaskGraph) -> Self {
500        let tasks = graph
501            .tasks
502            .iter()
503            .map(|t| {
504                let error = t
505                    .result
506                    .as_ref()
507                    .filter(|_| t.status == zeph_orchestration::TaskStatus::Failed)
508                    .and_then(|r| {
509                        if r.output.is_empty() {
510                            None
511                        } else {
512                            // Strip control chars, then truncate at 80 chars (SEC-P6-01).
513                            let s = strip_ctrl(&r.output);
514                            if s.len() > 80 {
515                                let end = s.floor_char_boundary(79);
516                                Some(format!("{}…", &s[..end]))
517                            } else {
518                                Some(s)
519                            }
520                        }
521                    });
522                let duration_ms = t.result.as_ref().map_or(0, |r| r.duration_ms);
523                TaskSnapshotRow {
524                    id: t.id.as_u32(),
525                    title: strip_ctrl(&t.title),
526                    status: t.status.to_string(),
527                    agent: t.assigned_agent.as_deref().map(strip_ctrl),
528                    duration_ms,
529                    error,
530                }
531            })
532            .collect();
533        Self {
534            graph_id: graph.id.to_string(),
535            goal: strip_ctrl(&graph.goal),
536            status: graph.status.to_string(),
537            tasks,
538            completed_at: None,
539        }
540    }
541}
542
543pub struct MetricsCollector {
544    tx: watch::Sender<MetricsSnapshot>,
545}
546
547impl MetricsCollector {
548    #[must_use]
549    pub fn new() -> (Self, watch::Receiver<MetricsSnapshot>) {
550        let (tx, rx) = watch::channel(MetricsSnapshot::default());
551        (Self { tx }, rx)
552    }
553
554    pub fn update(&self, f: impl FnOnce(&mut MetricsSnapshot)) {
555        self.tx.send_modify(f);
556    }
557
558    /// Publish the runtime-resolved context window limit.
559    ///
560    /// Call after the provider pool is constructed (builder) and on every successful
561    /// `/provider` switch so the TUI context gauge reflects the active provider's window.
562    ///
563    /// # Examples
564    ///
565    /// ```rust
566    /// use zeph_core::metrics::MetricsCollector;
567    ///
568    /// let (collector, rx) = MetricsCollector::new();
569    /// collector.set_context_max_tokens(128_000);
570    /// assert_eq!(rx.borrow().context_max_tokens, 128_000);
571    /// ```
572    pub fn set_context_max_tokens(&self, max_tokens: u64) {
573        self.tx.send_modify(|m| m.context_max_tokens = max_tokens);
574    }
575
576    /// Record the outcome of the most recent compaction event.
577    ///
578    /// Sets all three `compaction_last_*` fields atomically. `at_ms` is the Unix epoch in
579    /// milliseconds — obtain via `SystemTime::UNIX_EPOCH.elapsed().unwrap_or_default().as_millis()`.
580    ///
581    /// # Examples
582    ///
583    /// ```rust
584    /// use zeph_core::metrics::MetricsCollector;
585    ///
586    /// let (collector, rx) = MetricsCollector::new();
587    /// collector.record_compaction(50_000, 12_000, 1_700_000_000_000);
588    /// let snap = rx.borrow();
589    /// assert_eq!(snap.compaction_last_before, 50_000);
590    /// assert_eq!(snap.compaction_last_after, 12_000);
591    /// assert_eq!(snap.compaction_last_at_ms, 1_700_000_000_000);
592    /// ```
593    pub fn record_compaction(&self, before: u64, after: u64, at_ms: u64) {
594        self.tx.send_modify(|m| {
595            m.compaction_last_before = before;
596            m.compaction_last_after = after;
597            m.compaction_last_at_ms = at_ms;
598        });
599    }
600
601    /// Returns a clone of the underlying [`watch::Sender`].
602    ///
603    /// Use this to pass the sender to code that requires a raw
604    /// `watch::Sender<MetricsSnapshot>` while the [`MetricsCollector`] is
605    /// also shared (e.g., passed to a `MetricsBridge` layer).
606    #[must_use]
607    pub fn sender(&self) -> watch::Sender<MetricsSnapshot> {
608        self.tx.clone()
609    }
610}
611
612// ---------------------------------------------------------------------------
613// HistogramRecorder
614// ---------------------------------------------------------------------------
615
616/// Per-event histogram recording contract for the agent loop.
617///
618/// Implementors record individual latency observations into Prometheus histograms
619/// (or any other backend). The trait is object-safe: the agent stores an
620/// `Option<Arc<dyn HistogramRecorder>>` and calls these methods at each measurement
621/// point. When `None`, recording is a no-op with zero overhead.
622///
623/// # Contract for implementors
624///
625/// - All methods must be non-blocking; they must not call async code or acquire
626///   mutexes that may block.
627/// - Implementations must be `Send + Sync` — the agent loop runs on the tokio
628///   thread pool and the recorder may be called from multiple tasks.
629///
630/// # Examples
631///
632/// ```rust
633/// use std::sync::Arc;
634/// use std::time::Duration;
635/// use zeph_core::metrics::HistogramRecorder;
636///
637/// struct NoOpRecorder;
638///
639/// impl HistogramRecorder for NoOpRecorder {
640///     fn observe_llm_latency(&self, _: Duration) {}
641///     fn observe_turn_duration(&self, _: Duration) {}
642///     fn observe_tool_execution(&self, _: Duration) {}
643///     fn observe_bg_task(&self, _: &str, _: Duration) {}
644/// }
645///
646/// let recorder: Arc<dyn HistogramRecorder> = Arc::new(NoOpRecorder);
647/// recorder.observe_llm_latency(Duration::from_millis(500));
648/// ```
649pub trait HistogramRecorder: Send + Sync {
650    /// Record a single LLM API call latency observation.
651    fn observe_llm_latency(&self, duration: std::time::Duration);
652
653    /// Record a full agent turn duration observation (context prep + LLM + tools + persist).
654    fn observe_turn_duration(&self, duration: std::time::Duration);
655
656    /// Record a single tool execution latency observation.
657    fn observe_tool_execution(&self, duration: std::time::Duration);
658
659    /// Record a background task completion latency.
660    ///
661    /// `class_label` is `"enrichment"` or `"telemetry"` (from `TaskClass::name()`).
662    fn observe_bg_task(&self, class_label: &str, duration: std::time::Duration);
663}
664
665#[cfg(test)]
666mod tests {
667    #![allow(clippy::field_reassign_with_default)]
668
669    use super::*;
670
671    #[test]
672    fn default_metrics_snapshot() {
673        let m = MetricsSnapshot::default();
674        assert_eq!(m.total_tokens, 0);
675        assert_eq!(m.api_calls, 0);
676        assert!(m.active_skills.is_empty());
677        assert!(m.active_mcp_tools.is_empty());
678        assert_eq!(m.mcp_tool_count, 0);
679        assert_eq!(m.mcp_server_count, 0);
680        assert!(m.provider_name.is_empty());
681        assert_eq!(m.summaries_count, 0);
682        // Phase 2 fields
683        assert!(m.stt_model.is_none());
684        assert!(m.compaction_model.is_none());
685        assert!(m.provider_temperature.is_none());
686        assert!(m.provider_top_p.is_none());
687        assert!(m.active_channel.is_empty());
688        assert!(m.embedding_model.is_empty());
689        assert!(m.token_budget.is_none());
690        assert!(!m.self_learning_enabled);
691        assert!(!m.semantic_cache_enabled);
692    }
693
694    #[test]
695    fn metrics_collector_update_phase2_fields() {
696        let (collector, rx) = MetricsCollector::new();
697        collector.update(|m| {
698            m.stt_model = Some("whisper-1".into());
699            m.compaction_model = Some("haiku".into());
700            m.provider_temperature = Some(0.7);
701            m.provider_top_p = Some(0.95);
702            m.active_channel = "tui".into();
703            m.embedding_model = "nomic-embed-text".into();
704            m.token_budget = Some(200_000);
705            m.self_learning_enabled = true;
706            m.semantic_cache_enabled = true;
707        });
708        let s = rx.borrow();
709        assert_eq!(s.stt_model.as_deref(), Some("whisper-1"));
710        assert_eq!(s.compaction_model.as_deref(), Some("haiku"));
711        assert_eq!(s.provider_temperature, Some(0.7));
712        assert_eq!(s.provider_top_p, Some(0.95));
713        assert_eq!(s.active_channel, "tui");
714        assert_eq!(s.embedding_model, "nomic-embed-text");
715        assert_eq!(s.token_budget, Some(200_000));
716        assert!(s.self_learning_enabled);
717        assert!(s.semantic_cache_enabled);
718    }
719
720    #[test]
721    fn metrics_collector_update() {
722        let (collector, rx) = MetricsCollector::new();
723        collector.update(|m| {
724            m.api_calls = 5;
725            m.total_tokens = 1000;
726        });
727        let snapshot = rx.borrow().clone();
728        assert_eq!(snapshot.api_calls, 5);
729        assert_eq!(snapshot.total_tokens, 1000);
730    }
731
732    #[test]
733    fn metrics_collector_multiple_updates() {
734        let (collector, rx) = MetricsCollector::new();
735        collector.update(|m| m.api_calls = 1);
736        collector.update(|m| m.api_calls += 1);
737        assert_eq!(rx.borrow().api_calls, 2);
738    }
739
740    #[test]
741    fn metrics_snapshot_clone() {
742        let mut m = MetricsSnapshot::default();
743        m.provider_name = "ollama".into();
744        let cloned = m.clone();
745        assert_eq!(cloned.provider_name, "ollama");
746    }
747
748    #[test]
749    fn filter_metrics_tracking() {
750        let (collector, rx) = MetricsCollector::new();
751        collector.update(|m| {
752            m.filter_raw_tokens += 250;
753            m.filter_saved_tokens += 200;
754            m.filter_applications += 1;
755        });
756        collector.update(|m| {
757            m.filter_raw_tokens += 100;
758            m.filter_saved_tokens += 80;
759            m.filter_applications += 1;
760        });
761        let s = rx.borrow();
762        assert_eq!(s.filter_raw_tokens, 350);
763        assert_eq!(s.filter_saved_tokens, 280);
764        assert_eq!(s.filter_applications, 2);
765    }
766
767    #[test]
768    fn filter_confidence_and_command_metrics() {
769        let (collector, rx) = MetricsCollector::new();
770        collector.update(|m| {
771            m.filter_total_commands += 1;
772            m.filter_filtered_commands += 1;
773            m.filter_confidence_full += 1;
774        });
775        collector.update(|m| {
776            m.filter_total_commands += 1;
777            m.filter_confidence_partial += 1;
778        });
779        let s = rx.borrow();
780        assert_eq!(s.filter_total_commands, 2);
781        assert_eq!(s.filter_filtered_commands, 1);
782        assert_eq!(s.filter_confidence_full, 1);
783        assert_eq!(s.filter_confidence_partial, 1);
784        assert_eq!(s.filter_confidence_fallback, 0);
785    }
786
787    #[test]
788    fn summaries_count_tracks_summarizations() {
789        let (collector, rx) = MetricsCollector::new();
790        collector.update(|m| m.summaries_count += 1);
791        collector.update(|m| m.summaries_count += 1);
792        assert_eq!(rx.borrow().summaries_count, 2);
793    }
794
795    #[test]
796    fn cancellations_counter_increments() {
797        let (collector, rx) = MetricsCollector::new();
798        assert_eq!(rx.borrow().cancellations, 0);
799        collector.update(|m| m.cancellations += 1);
800        collector.update(|m| m.cancellations += 1);
801        assert_eq!(rx.borrow().cancellations, 2);
802    }
803
804    #[test]
805    fn security_event_detail_exact_128_not_truncated() {
806        let s = "a".repeat(128);
807        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s.clone());
808        assert_eq!(ev.detail, s, "128-char string must not be truncated");
809    }
810
811    #[test]
812    fn security_event_detail_129_is_truncated() {
813        let s = "a".repeat(129);
814        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s);
815        assert!(
816            ev.detail.ends_with('…'),
817            "129-char string must end with ellipsis"
818        );
819        assert!(
820            ev.detail.len() <= 130,
821            "truncated detail must be at most 130 bytes"
822        );
823    }
824
825    #[test]
826    fn security_event_detail_multibyte_utf8_no_panic() {
827        // Each '中' is 3 bytes. 43 chars = 129 bytes — triggers truncation at a multi-byte boundary.
828        let s = "中".repeat(43);
829        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s);
830        assert!(ev.detail.ends_with('…'));
831    }
832
833    #[test]
834    fn security_event_source_capped_at_64_chars() {
835        let long_source = "x".repeat(200);
836        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, long_source, "detail");
837        assert_eq!(ev.source.len(), 64);
838    }
839
840    #[test]
841    fn security_event_source_strips_control_chars() {
842        let source = "tool\x00name\x1b[31m";
843        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, source, "detail");
844        assert!(!ev.source.contains('\x00'));
845        assert!(!ev.source.contains('\x1b'));
846    }
847
848    #[test]
849    fn security_event_category_as_str() {
850        assert_eq!(SecurityEventCategory::InjectionFlag.as_str(), "injection");
851        assert_eq!(SecurityEventCategory::ExfiltrationBlock.as_str(), "exfil");
852        assert_eq!(SecurityEventCategory::Quarantine.as_str(), "quarantine");
853        assert_eq!(SecurityEventCategory::Truncation.as_str(), "truncation");
854        assert_eq!(
855            SecurityEventCategory::CrossBoundaryMcpToAcp.as_str(),
856            "cross_boundary_mcp_to_acp"
857        );
858    }
859
860    #[test]
861    fn ring_buffer_respects_cap_via_update() {
862        let (collector, rx) = MetricsCollector::new();
863        for i in 0..110u64 {
864            let event = SecurityEvent::new(
865                SecurityEventCategory::InjectionFlag,
866                "src",
867                format!("event {i}"),
868            );
869            collector.update(|m| {
870                if m.security_events.len() >= SECURITY_EVENT_CAP {
871                    m.security_events.pop_front();
872                }
873                m.security_events.push_back(event);
874            });
875        }
876        let snap = rx.borrow();
877        assert_eq!(snap.security_events.len(), SECURITY_EVENT_CAP);
878        // FIFO: earliest events evicted, last one present
879        assert!(snap.security_events.back().unwrap().detail.contains("109"));
880    }
881
882    #[test]
883    fn security_events_empty_by_default() {
884        let m = MetricsSnapshot::default();
885        assert!(m.security_events.is_empty());
886    }
887
888    #[test]
889    fn orchestration_metrics_default_zero() {
890        let m = OrchestrationMetrics::default();
891        assert_eq!(m.plans_total, 0);
892        assert_eq!(m.tasks_total, 0);
893        assert_eq!(m.tasks_completed, 0);
894        assert_eq!(m.tasks_failed, 0);
895        assert_eq!(m.tasks_skipped, 0);
896    }
897
898    #[test]
899    fn metrics_snapshot_includes_orchestration_default_zero() {
900        let m = MetricsSnapshot::default();
901        assert_eq!(m.orchestration.plans_total, 0);
902        assert_eq!(m.orchestration.tasks_total, 0);
903        assert_eq!(m.orchestration.tasks_completed, 0);
904    }
905
906    #[test]
907    fn orchestration_metrics_update_via_collector() {
908        let (collector, rx) = MetricsCollector::new();
909        collector.update(|m| {
910            m.orchestration.plans_total += 1;
911            m.orchestration.tasks_total += 5;
912            m.orchestration.tasks_completed += 3;
913            m.orchestration.tasks_failed += 1;
914            m.orchestration.tasks_skipped += 1;
915        });
916        let s = rx.borrow();
917        assert_eq!(s.orchestration.plans_total, 1);
918        assert_eq!(s.orchestration.tasks_total, 5);
919        assert_eq!(s.orchestration.tasks_completed, 3);
920        assert_eq!(s.orchestration.tasks_failed, 1);
921        assert_eq!(s.orchestration.tasks_skipped, 1);
922    }
923
924    #[test]
925    fn strip_ctrl_removes_escape_sequences() {
926        let input = "hello\x1b[31mworld\x00end";
927        let result = strip_ctrl(input);
928        assert_eq!(result, "helloworldend");
929    }
930
931    #[test]
932    fn strip_ctrl_allows_tab_lf_cr() {
933        let input = "a\tb\nc\rd";
934        let result = strip_ctrl(input);
935        assert_eq!(result, "a\tb\nc\rd");
936    }
937
938    #[test]
939    fn task_graph_snapshot_is_stale_after_30s() {
940        let mut snap = TaskGraphSnapshot::default();
941        // Not stale if no completed_at.
942        assert!(!snap.is_stale());
943        // Not stale if just completed.
944        snap.completed_at = Some(std::time::Instant::now());
945        assert!(!snap.is_stale());
946        // Stale if completed more than 30s ago.
947        snap.completed_at = Some(
948            std::time::Instant::now()
949                .checked_sub(std::time::Duration::from_secs(31))
950                .unwrap(),
951        );
952        assert!(snap.is_stale());
953    }
954
955    // T1: From<&TaskGraph> correctly maps fields including duration_ms and error truncation.
956    #[test]
957    fn task_graph_snapshot_from_task_graph_maps_fields() {
958        use zeph_orchestration::{GraphStatus, TaskGraph, TaskNode, TaskResult, TaskStatus};
959
960        let mut graph = TaskGraph::new("My goal");
961        let mut task = TaskNode::new(0, "Do work", "description");
962        task.status = TaskStatus::Failed;
963        task.assigned_agent = Some("agent-1".into());
964        task.result = Some(TaskResult {
965            output: "error occurred here".into(),
966            artifacts: vec![],
967            duration_ms: 1234,
968            agent_id: None,
969            agent_def: None,
970        });
971        graph.tasks.push(task);
972        graph.status = GraphStatus::Failed;
973
974        let snap = TaskGraphSnapshot::from(&graph);
975        assert_eq!(snap.goal, "My goal");
976        assert_eq!(snap.status, "failed");
977        assert_eq!(snap.tasks.len(), 1);
978        let row = &snap.tasks[0];
979        assert_eq!(row.title, "Do work");
980        assert_eq!(row.status, "failed");
981        assert_eq!(row.agent.as_deref(), Some("agent-1"));
982        assert_eq!(row.duration_ms, 1234);
983        assert!(row.error.as_deref().unwrap().contains("error occurred"));
984    }
985
986    // T2: From impl compiles with orchestration feature active.
987    #[test]
988    fn task_graph_snapshot_from_compiles_with_feature() {
989        use zeph_orchestration::TaskGraph;
990        let graph = TaskGraph::new("feature flag test");
991        let snap = TaskGraphSnapshot::from(&graph);
992        assert_eq!(snap.goal, "feature flag test");
993        assert!(snap.tasks.is_empty());
994        assert!(!snap.is_stale());
995    }
996
997    // T1-extra: long error is truncated with ellipsis.
998    #[test]
999    fn task_graph_snapshot_error_truncated_at_80_chars() {
1000        use zeph_orchestration::{TaskGraph, TaskNode, TaskResult, TaskStatus};
1001
1002        let mut graph = TaskGraph::new("goal");
1003        let mut task = TaskNode::new(0, "t", "d");
1004        task.status = TaskStatus::Failed;
1005        task.result = Some(TaskResult {
1006            output: "e".repeat(100),
1007            artifacts: vec![],
1008            duration_ms: 0,
1009            agent_id: None,
1010            agent_def: None,
1011        });
1012        graph.tasks.push(task);
1013
1014        let snap = TaskGraphSnapshot::from(&graph);
1015        let err = snap.tasks[0].error.as_ref().unwrap();
1016        assert!(err.ends_with('…'), "truncated error must end with ellipsis");
1017        assert!(
1018            err.len() <= 83,
1019            "truncated error must not exceed 80 chars + ellipsis"
1020        );
1021    }
1022
1023    // SEC-P6-01: control chars in task title are stripped.
1024    #[test]
1025    fn task_graph_snapshot_strips_control_chars_from_title() {
1026        use zeph_orchestration::{TaskGraph, TaskNode};
1027
1028        let mut graph = TaskGraph::new("goal\x1b[31m");
1029        let task = TaskNode::new(0, "title\x00injected", "d");
1030        graph.tasks.push(task);
1031
1032        let snap = TaskGraphSnapshot::from(&graph);
1033        assert!(!snap.goal.contains('\x1b'), "goal must not contain escape");
1034        assert!(
1035            !snap.tasks[0].title.contains('\x00'),
1036            "title must not contain null byte"
1037        );
1038    }
1039
1040    #[test]
1041    fn graph_metrics_default_zero() {
1042        let m = MetricsSnapshot::default();
1043        assert_eq!(m.graph_entities_total, 0);
1044        assert_eq!(m.graph_edges_total, 0);
1045        assert_eq!(m.graph_communities_total, 0);
1046        assert_eq!(m.graph_extraction_count, 0);
1047        assert_eq!(m.graph_extraction_failures, 0);
1048    }
1049
1050    #[test]
1051    fn graph_metrics_update_via_collector() {
1052        let (collector, rx) = MetricsCollector::new();
1053        collector.update(|m| {
1054            m.graph_entities_total = 5;
1055            m.graph_edges_total = 10;
1056            m.graph_communities_total = 2;
1057            m.graph_extraction_count = 7;
1058            m.graph_extraction_failures = 1;
1059        });
1060        let snapshot = rx.borrow().clone();
1061        assert_eq!(snapshot.graph_entities_total, 5);
1062        assert_eq!(snapshot.graph_edges_total, 10);
1063        assert_eq!(snapshot.graph_communities_total, 2);
1064        assert_eq!(snapshot.graph_extraction_count, 7);
1065        assert_eq!(snapshot.graph_extraction_failures, 1);
1066    }
1067
1068    #[test]
1069    fn histogram_recorder_trait_is_object_safe() {
1070        use std::sync::Arc;
1071        use std::time::Duration;
1072
1073        struct NoOpRecorder;
1074        impl HistogramRecorder for NoOpRecorder {
1075            fn observe_llm_latency(&self, _: Duration) {}
1076            fn observe_turn_duration(&self, _: Duration) {}
1077            fn observe_tool_execution(&self, _: Duration) {}
1078            fn observe_bg_task(&self, _: &str, _: Duration) {}
1079        }
1080
1081        // Verify the trait can be used as a trait object (object-safe).
1082        let recorder: Arc<dyn HistogramRecorder> = Arc::new(NoOpRecorder);
1083        recorder.observe_llm_latency(Duration::from_millis(500));
1084        recorder.observe_turn_duration(Duration::from_secs(3));
1085        recorder.observe_tool_execution(Duration::from_millis(100));
1086    }
1087}
zeph_core/metrics.rs

zeph_core/
metrics.rs