zeph_core/
metrics.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::VecDeque;
5
6use tokio::sync::watch;
7
8pub use zeph_llm::{ClassifierMetricsSnapshot, TaskMetricsSnapshot};
9pub use zeph_memory::{CategoryScore, ProbeCategory, ProbeVerdict};
10
11/// Category of a security event for TUI display.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum SecurityEventCategory {
14    InjectionFlag,
15    /// ML classifier hard-blocked tool output (`enforcement_mode=block` only).
16    InjectionBlocked,
17    ExfiltrationBlock,
18    Quarantine,
19    Truncation,
20    RateLimit,
21    MemoryValidation,
22    PreExecutionBlock,
23    PreExecutionWarn,
24    ResponseVerification,
25    /// `TurnCausalAnalyzer` flagged behavioral deviation at tool-return boundary.
26    CausalIpiFlag,
27    /// MCP tool result crossing into an ACP-serving session boundary.
28    CrossBoundaryMcpToAcp,
29    /// VIGIL pre-sanitizer gate flagged a tool output.
30    VigilFlag,
31}
32
33impl SecurityEventCategory {
34    #[must_use]
35    pub fn as_str(self) -> &'static str {
36        match self {
37            Self::InjectionFlag => "injection",
38            Self::InjectionBlocked => "injection_blocked",
39            Self::ExfiltrationBlock => "exfil",
40            Self::Quarantine => "quarantine",
41            Self::Truncation => "truncation",
42            Self::RateLimit => "rate_limit",
43            Self::MemoryValidation => "memory_validation",
44            Self::PreExecutionBlock => "pre_exec_block",
45            Self::PreExecutionWarn => "pre_exec_warn",
46            Self::ResponseVerification => "response_verify",
47            Self::CausalIpiFlag => "causal_ipi",
48            Self::CrossBoundaryMcpToAcp => "cross_boundary_mcp_to_acp",
49            Self::VigilFlag => "vigil",
50        }
51    }
52}
53
54/// A single security event record for TUI display.
55#[derive(Debug, Clone)]
56pub struct SecurityEvent {
57    /// Unix timestamp (seconds since epoch).
58    pub timestamp: u64,
59    pub category: SecurityEventCategory,
60    /// Source that triggered the event (e.g., `web_scrape`, `mcp_response`).
61    pub source: String,
62    /// Short description, capped at 128 chars.
63    pub detail: String,
64}
65
66impl SecurityEvent {
67    #[must_use]
68    pub fn new(
69        category: SecurityEventCategory,
70        source: impl Into<String>,
71        detail: impl Into<String>,
72    ) -> Self {
73        // IMP-1: cap source at 64 chars and strip ASCII control chars.
74        let source: String = source
75            .into()
76            .chars()
77            .filter(|c| !c.is_ascii_control())
78            .take(64)
79            .collect();
80        // CR-1: UTF-8 safe truncation using floor_char_boundary (stable since Rust 1.82).
81        let detail = detail.into();
82        let detail = if detail.len() > 128 {
83            let end = detail.floor_char_boundary(127);
84            format!("{}…", &detail[..end])
85        } else {
86            detail
87        };
88        Self {
89            timestamp: std::time::SystemTime::now()
90                .duration_since(std::time::UNIX_EPOCH)
91                .unwrap_or_default()
92                .as_secs(),
93            category,
94            source,
95            detail,
96        }
97    }
98}
99
100/// Ring buffer capacity for security events.
101pub const SECURITY_EVENT_CAP: usize = 100;
102
103/// Lightweight snapshot of a single task row for TUI display.
104///
105/// Captured from the task graph on each metrics tick; kept minimal on purpose.
106#[derive(Debug, Clone)]
107pub struct TaskSnapshotRow {
108    pub id: u32,
109    pub title: String,
110    /// Stringified `TaskStatus` (e.g. `"pending"`, `"running"`, `"completed"`).
111    pub status: String,
112    pub agent: Option<String>,
113    pub duration_ms: u64,
114    /// Truncated error message (first 80 chars) when the task failed.
115    pub error: Option<String>,
116}
117
118/// Lightweight snapshot of a `TaskGraph` for TUI display.
119#[derive(Debug, Clone, Default)]
120pub struct TaskGraphSnapshot {
121    pub graph_id: String,
122    pub goal: String,
123    /// Stringified `GraphStatus` (e.g. `"created"`, `"running"`, `"completed"`).
124    pub status: String,
125    pub tasks: Vec<TaskSnapshotRow>,
126    pub completed_at: Option<std::time::Instant>,
127}
128
129impl TaskGraphSnapshot {
130    /// Returns `true` if this snapshot represents a terminal plan that finished
131    /// more than 30 seconds ago and should no longer be shown in the TUI.
132    #[must_use]
133    pub fn is_stale(&self) -> bool {
134        self.completed_at
135            .is_some_and(|t| t.elapsed().as_secs() > 30)
136    }
137}
138
139/// Counters for the task orchestration subsystem.
140///
141/// Always present in [`MetricsSnapshot`]; zero-valued when orchestration is inactive.
142#[derive(Debug, Clone, Default)]
143pub struct OrchestrationMetrics {
144    pub plans_total: u64,
145    pub tasks_total: u64,
146    pub tasks_completed: u64,
147    pub tasks_failed: u64,
148    pub tasks_skipped: u64,
149}
150
151/// Connection status of a single MCP server for TUI display.
152#[derive(Debug, Clone, PartialEq, Eq)]
153pub enum McpServerConnectionStatus {
154    Connected,
155    Failed,
156}
157
158/// Per-server MCP status snapshot for TUI display.
159#[derive(Debug, Clone)]
160pub struct McpServerStatus {
161    pub id: String,
162    pub status: McpServerConnectionStatus,
163    /// Number of tools provided by this server (0 when failed).
164    pub tool_count: usize,
165    /// Human-readable failure reason. Empty when connected.
166    pub error: String,
167}
168
169/// Bayesian confidence data for a single skill, used by TUI confidence bar.
170#[derive(Debug, Clone, Default)]
171pub struct SkillConfidence {
172    pub name: String,
173    pub posterior: f64,
174    pub total_uses: u32,
175}
176
177/// Snapshot of a single sub-agent's runtime status.
178#[derive(Debug, Clone, Default)]
179pub struct SubAgentMetrics {
180    pub id: String,
181    pub name: String,
182    /// Stringified `TaskState`: "working", "completed", "failed", "canceled", etc.
183    pub state: String,
184    pub turns_used: u32,
185    pub max_turns: u32,
186    pub background: bool,
187    pub elapsed_secs: u64,
188    /// Stringified `PermissionMode`: `"default"`, `"accept_edits"`, `"dont_ask"`,
189    /// `"bypass_permissions"`, `"plan"`. Empty string when mode is `Default`.
190    pub permission_mode: String,
191    /// Path to the directory containing this agent's JSONL transcript file.
192    /// `None` when transcript writing is disabled for this agent.
193    pub transcript_dir: Option<String>,
194}
195
196/// Per-turn latency breakdown for the four agent hot-path phases.
197///
198/// Populated with `Instant`-based measurements at each phase boundary.
199/// All values are in milliseconds.
200#[derive(Debug, Clone, Default)]
201pub struct TurnTimings {
202    pub prepare_context_ms: u64,
203    pub llm_chat_ms: u64,
204    pub tool_exec_ms: u64,
205    pub persist_message_ms: u64,
206}
207
208/// Live snapshot of agent metrics broadcast via a [`tokio::sync::watch`] channel.
209///
210/// Fields are updated at different rates: some once at startup (static), others every turn
211/// (dynamic). For fields that are known at agent startup and do not change during the session,
212/// use [`StaticMetricsInit`] and `AgentBuilder::with_static_metrics` instead of
213/// adding a raw `send_modify` call in the runner.
214#[derive(Debug, Clone, Default)]
215#[allow(clippy::struct_excessive_bools)]
216pub struct MetricsSnapshot {
217    pub prompt_tokens: u64,
218    pub completion_tokens: u64,
219    pub total_tokens: u64,
220    pub context_tokens: u64,
221    pub api_calls: u64,
222    pub active_skills: Vec<String>,
223    pub total_skills: usize,
224    /// Total configured MCP servers (connected + failed).
225    pub mcp_server_count: usize,
226    pub mcp_tool_count: usize,
227    /// Number of successfully connected MCP servers.
228    pub mcp_connected_count: usize,
229    /// Per-server connection status list.
230    pub mcp_servers: Vec<McpServerStatus>,
231    pub active_mcp_tools: Vec<String>,
232    pub sqlite_message_count: u64,
233    pub sqlite_conversation_id: Option<zeph_memory::ConversationId>,
234    pub qdrant_available: bool,
235    pub vector_backend: String,
236    pub embeddings_generated: u64,
237    pub last_llm_latency_ms: u64,
238    pub uptime_seconds: u64,
239    pub provider_name: String,
240    pub model_name: String,
241    pub summaries_count: u64,
242    pub context_compactions: u64,
243    /// Number of times the agent entered the Hard compaction tier, including cooldown-skipped
244    /// turns. Not equal to the actual LLM summarization count — reflects pressure, not action.
245    pub compaction_hard_count: u64,
246    /// User-message turns elapsed after each hard compaction event.
247    /// Entry i = turns between hard compaction i and hard compaction i+1 (or session end).
248    /// Empty when no hard compaction occurred during the session.
249    pub compaction_turns_after_hard: Vec<u64>,
250    pub compression_events: u64,
251    pub compression_tokens_saved: u64,
252    pub tool_output_prunes: u64,
253    /// Compaction probe outcomes (#1609).
254    pub compaction_probe_passes: u64,
255    /// Compaction probe soft failures (summary borderline — compaction proceeded with warning).
256    pub compaction_probe_soft_failures: u64,
257    /// Compaction probe hard failures (compaction blocked due to lossy summary).
258    pub compaction_probe_failures: u64,
259    /// Compaction probe errors (LLM/timeout — non-blocking, compaction proceeded).
260    pub compaction_probe_errors: u64,
261    /// Last compaction probe verdict. `None` before the first probe completes.
262    pub last_probe_verdict: Option<zeph_memory::ProbeVerdict>,
263    /// Last compaction probe score in [0.0, 1.0]. `None` before the first probe
264    /// completes or after an Error verdict (errors produce no score).
265    pub last_probe_score: Option<f32>,
266    /// Per-category scores from the last completed probe.
267    pub last_probe_category_scores: Option<Vec<zeph_memory::CategoryScore>>,
268    /// Configured pass threshold for the compaction probe. Used by TUI for category color-coding.
269    pub compaction_probe_threshold: f32,
270    /// Configured hard-fail threshold for the compaction probe.
271    pub compaction_probe_hard_fail_threshold: f32,
272    pub cache_read_tokens: u64,
273    pub cache_creation_tokens: u64,
274    pub cost_spent_cents: f64,
275    /// Cost per successful task in cents. `None` until at least one task completes.
276    pub cost_cps_cents: Option<f64>,
277    /// Number of successful tasks recorded today.
278    pub cost_successful_tasks: u64,
279    /// Per-provider cost breakdown, sorted by cost descending.
280    pub provider_cost_breakdown: Vec<(String, crate::cost::ProviderUsage)>,
281    pub filter_raw_tokens: u64,
282    pub filter_saved_tokens: u64,
283    pub filter_applications: u64,
284    pub filter_total_commands: u64,
285    pub filter_filtered_commands: u64,
286    pub filter_confidence_full: u64,
287    pub filter_confidence_partial: u64,
288    pub filter_confidence_fallback: u64,
289    pub cancellations: u64,
290    pub server_compaction_events: u64,
291    pub sanitizer_runs: u64,
292    pub sanitizer_injection_flags: u64,
293    /// Injection pattern hits on `ToolResult` (local) sources — likely false positives.
294    ///
295    /// Counts regex hits that fired on content from `shell`, `read_file`, `search_code`, etc.
296    /// These sources are user-owned and not adversarial; a non-zero value indicates a pattern
297    /// that needs tightening or a source reclassification.
298    pub sanitizer_injection_fp_local: u64,
299    pub sanitizer_truncations: u64,
300    pub quarantine_invocations: u64,
301    pub quarantine_failures: u64,
302    /// ML classifier hard-blocked tool outputs (`enforcement_mode=block` only).
303    pub classifier_tool_blocks: u64,
304    /// ML classifier suspicious tool outputs (both enforcement modes).
305    pub classifier_tool_suspicious: u64,
306    /// `TurnCausalAnalyzer` flags: behavioral deviation detected at tool-return boundary.
307    pub causal_ipi_flags: u64,
308    /// VIGIL pre-sanitizer flags: tool outputs matched injection patterns (any action).
309    pub vigil_flags_total: u64,
310    /// VIGIL pre-sanitizer blocks: tool outputs replaced with sentinel (`strict_mode=true`).
311    pub vigil_blocks_total: u64,
312    pub exfiltration_images_blocked: u64,
313    pub exfiltration_tool_urls_flagged: u64,
314    pub exfiltration_memory_guards: u64,
315    pub pii_scrub_count: u64,
316    /// Number of times the PII NER classifier timed out; input fell back to regex-only.
317    pub pii_ner_timeouts: u64,
318    /// Number of times the PII NER circuit breaker tripped (disabled NER for the session).
319    pub pii_ner_circuit_breaker_trips: u64,
320    pub memory_validation_failures: u64,
321    pub rate_limit_trips: u64,
322    pub pre_execution_blocks: u64,
323    pub pre_execution_warnings: u64,
324    /// `true` when a guardrail filter is active for this session.
325    pub guardrail_enabled: bool,
326    /// `true` when guardrail is in warn-only mode (action = warn).
327    pub guardrail_warn_mode: bool,
328    pub sub_agents: Vec<SubAgentMetrics>,
329    pub skill_confidence: Vec<SkillConfidence>,
330    /// Scheduled task summaries: `[name, kind, mode, next_run]`.
331    pub scheduled_tasks: Vec<[String; 4]>,
332    /// Thompson Sampling distribution snapshots: `(provider, alpha, beta)`.
333    pub router_thompson_stats: Vec<(String, f64, f64)>,
334    /// Ring buffer of recent security events (cap 100, FIFO eviction).
335    pub security_events: VecDeque<SecurityEvent>,
336    pub orchestration: OrchestrationMetrics,
337    /// Live snapshot of the currently active task graph. `None` when no plan is active.
338    pub orchestration_graph: Option<TaskGraphSnapshot>,
339    pub graph_community_detection_failures: u64,
340    pub graph_entities_total: u64,
341    pub graph_edges_total: u64,
342    pub graph_communities_total: u64,
343    pub graph_extraction_count: u64,
344    pub graph_extraction_failures: u64,
345    /// `true` when `config.llm.cloud.enable_extended_context = true`.
346    /// Never set for other providers to avoid false positives.
347    pub extended_context: bool,
348    /// Latest compression-guidelines version (0 = no guidelines yet).
349    pub guidelines_version: u32,
350    /// ISO 8601 timestamp of the latest guidelines update (empty if none).
351    pub guidelines_updated_at: String,
352    pub tool_cache_hits: u64,
353    pub tool_cache_misses: u64,
354    pub tool_cache_entries: usize,
355    /// Number of semantic-tier facts in memory (0 when tier promotion disabled).
356    pub semantic_fact_count: u64,
357    /// STT model name (e.g. "whisper-1"). `None` when STT is not configured.
358    pub stt_model: Option<String>,
359    /// Model used for context compaction/summarization. `None` when no summary provider is set.
360    pub compaction_model: Option<String>,
361    /// Temperature of the active provider when using Candle. `None` for API providers.
362    pub provider_temperature: Option<f32>,
363    /// Top-p of the active provider when using Candle. `None` for API providers.
364    pub provider_top_p: Option<f32>,
365    /// Embedding model name (e.g. `"nomic-embed-text"`). Empty when embeddings are disabled.
366    pub embedding_model: String,
367    /// Token budget for context window. `None` when not configured.
368    pub token_budget: Option<u64>,
369    /// Token threshold that triggers soft compaction. `None` when not configured.
370    pub compaction_threshold: Option<u32>,
371    /// Vault backend identifier: "age", "env", or "none".
372    pub vault_backend: String,
373    /// Active I/O channel name: `"cli"`, `"telegram"`, `"tui"`, `"discord"`, `"slack"`.
374    pub active_channel: String,
375    /// Background supervisor: inflight tasks across all classes.
376    pub bg_inflight: u64,
377    /// Background supervisor: total tasks dropped due to concurrency limit (all classes).
378    pub bg_dropped: u64,
379    /// Background supervisor: total tasks completed (all classes).
380    pub bg_completed: u64,
381    /// Background supervisor: inflight enrichment tasks.
382    pub bg_enrichment_inflight: u64,
383    /// Background supervisor: inflight telemetry tasks.
384    pub bg_telemetry_inflight: u64,
385    /// Whether self-learning (skill evolution) is enabled.
386    pub self_learning_enabled: bool,
387    /// Whether the semantic response cache is enabled.
388    pub semantic_cache_enabled: bool,
389    /// Whether semantic response caching is enabled (alias for `semantic_cache_enabled`).
390    pub cache_enabled: bool,
391    /// Whether assistant messages are auto-saved to memory.
392    pub autosave_enabled: bool,
393    /// Classifier p50/p95 latency metrics per task (injection, pii, feedback).
394    pub classifier: ClassifierMetricsSnapshot,
395    /// Latency breakdown for the most recently completed agent turn.
396    pub last_turn_timings: TurnTimings,
397    /// Rolling average of per-phase latency over the last 10 turns.
398    pub avg_turn_timings: TurnTimings,
399    /// Maximum per-phase latency observed within the rolling window (tail-latency visibility).
400    ///
401    /// M3: exposes `max_in_window` alongside the rolling average for operational monitoring.
402    pub max_turn_timings: TurnTimings,
403    /// Number of turns included in `avg_turn_timings` and `max_turn_timings` (capped at 10).
404    pub timing_sample_count: u64,
405    /// Total egress (outbound HTTP) requests attempted this session.
406    pub egress_requests_total: u64,
407    /// Egress events dropped due to bounded channel backpressure.
408    pub egress_dropped_total: u64,
409    /// Egress requests blocked by scheme/domain/SSRF policy.
410    pub egress_blocked_total: u64,
411    /// Runtime-resolved context window limit (tokens).
412    ///
413    /// Populated from `resolve_context_budget` after provider pool construction and refreshed on
414    /// every `/provider` switch. `0` means unknown (pre-init or provider has no declared window);
415    /// the TUI gauge renders `"—"` in this case to avoid divide-by-zero.
416    pub context_max_tokens: u64,
417    /// Token count at the time the most recent compaction was triggered. `0` = never compacted.
418    pub compaction_last_before: u64,
419    /// Token count after the most recent compaction completed. `0` = never compacted.
420    pub compaction_last_after: u64,
421    /// Unix epoch milliseconds when the most recent compaction occurred. `0` = never compacted.
422    pub compaction_last_at_ms: u64,
423}
424
425/// Configuration-derived fields of [`MetricsSnapshot`] that are known at agent startup and do
426/// not change during the session.
427///
428/// Pass this struct to `AgentBuilder::with_static_metrics` immediately after
429/// `AgentBuilder::with_metrics` to initialize all static fields in one place rather than
430/// through scattered `send_modify` calls in the runner.
431///
432/// # Examples
433///
434/// ```no_run
435/// use zeph_core::metrics::StaticMetricsInit;
436///
437/// let init = StaticMetricsInit {
438///     active_channel: "cli".to_owned(),
439///     ..StaticMetricsInit::default()
440/// };
441/// ```
442#[derive(Debug, Default)]
443pub struct StaticMetricsInit {
444    /// STT model name (e.g. `"whisper-1"`). `None` when STT is not configured.
445    pub stt_model: Option<String>,
446    /// Model used for context compaction/summarization. `None` when no summary provider is set.
447    pub compaction_model: Option<String>,
448    /// Whether the semantic response cache is enabled.
449    ///
450    /// This value is also written to [`MetricsSnapshot::cache_enabled`] which is an alias for the
451    /// same concept.
452    pub semantic_cache_enabled: bool,
453    /// Embedding model name (e.g. `"nomic-embed-text"`). Empty when embeddings are disabled.
454    pub embedding_model: String,
455    /// Whether self-learning (skill evolution) is enabled.
456    pub self_learning_enabled: bool,
457    /// Active I/O channel name: `"cli"`, `"telegram"`, `"tui"`, `"discord"`, `"slack"`.
458    pub active_channel: String,
459    /// Token budget for context window. `None` when not configured.
460    pub token_budget: Option<u64>,
461    /// Token threshold that triggers soft compaction. `None` when not configured.
462    pub compaction_threshold: Option<u32>,
463    /// Vault backend identifier: `"age"`, `"env"`, or `"none"`.
464    pub vault_backend: String,
465    /// Whether assistant messages are auto-saved to memory.
466    pub autosave_enabled: bool,
467    /// Override for the active model name. When `Some`, replaces the model name set by the
468    /// builder from `runtime.model_name` (which may be a placeholder) with the effective model
469    /// resolved from the LLM provider configuration.
470    pub model_name_override: Option<String>,
471}
472
473/// Strip ASCII control characters and ANSI escape sequences from a string for safe TUI display.
474///
475/// Allows tab, LF, and CR; removes everything else in the `0x00–0x1F` range including full
476/// ANSI CSI sequences (`ESC[...`). This prevents escape-sequence injection from LLM planner
477/// output into the TUI.
478fn strip_ctrl(s: &str) -> String {
479    let mut out = String::with_capacity(s.len());
480    let mut chars = s.chars().peekable();
481    while let Some(c) = chars.next() {
482        if c == '\x1b' {
483            // Consume an ANSI CSI sequence: ESC [ <params> <final-byte in 0x40–0x7E>
484            if chars.peek() == Some(&'[') {
485                chars.next(); // consume '['
486                for inner in chars.by_ref() {
487                    if ('\x40'..='\x7e').contains(&inner) {
488                        break;
489                    }
490                }
491            }
492            // Drop ESC and any consumed sequence — write nothing.
493        } else if c.is_control() && c != '\t' && c != '\n' && c != '\r' {
494            // drop other control chars
495        } else {
496            out.push(c);
497        }
498    }
499    out
500}
501
502/// Convert a live `TaskGraph` into a lightweight snapshot for TUI display.
503impl From<&zeph_orchestration::TaskGraph> for TaskGraphSnapshot {
504    fn from(graph: &zeph_orchestration::TaskGraph) -> Self {
505        let tasks = graph
506            .tasks
507            .iter()
508            .map(|t| {
509                let error = t
510                    .result
511                    .as_ref()
512                    .filter(|_| t.status == zeph_orchestration::TaskStatus::Failed)
513                    .and_then(|r| {
514                        if r.output.is_empty() {
515                            None
516                        } else {
517                            // Strip control chars, then truncate at 80 chars (SEC-P6-01).
518                            let s = strip_ctrl(&r.output);
519                            if s.len() > 80 {
520                                let end = s.floor_char_boundary(79);
521                                Some(format!("{}…", &s[..end]))
522                            } else {
523                                Some(s)
524                            }
525                        }
526                    });
527                let duration_ms = t.result.as_ref().map_or(0, |r| r.duration_ms);
528                TaskSnapshotRow {
529                    id: t.id.as_u32(),
530                    title: strip_ctrl(&t.title),
531                    status: t.status.to_string(),
532                    agent: t.assigned_agent.as_deref().map(strip_ctrl),
533                    duration_ms,
534                    error,
535                }
536            })
537            .collect();
538        Self {
539            graph_id: graph.id.to_string(),
540            goal: strip_ctrl(&graph.goal),
541            status: graph.status.to_string(),
542            tasks,
543            completed_at: None,
544        }
545    }
546}
547
548pub struct MetricsCollector {
549    tx: watch::Sender<MetricsSnapshot>,
550}
551
552impl MetricsCollector {
553    #[must_use]
554    pub fn new() -> (Self, watch::Receiver<MetricsSnapshot>) {
555        let (tx, rx) = watch::channel(MetricsSnapshot::default());
556        (Self { tx }, rx)
557    }
558
559    pub fn update(&self, f: impl FnOnce(&mut MetricsSnapshot)) {
560        self.tx.send_modify(f);
561    }
562
563    /// Publish the runtime-resolved context window limit.
564    ///
565    /// Call after the provider pool is constructed (builder) and on every successful
566    /// `/provider` switch so the TUI context gauge reflects the active provider's window.
567    ///
568    /// # Examples
569    ///
570    /// ```rust
571    /// use zeph_core::metrics::MetricsCollector;
572    ///
573    /// let (collector, rx) = MetricsCollector::new();
574    /// collector.set_context_max_tokens(128_000);
575    /// assert_eq!(rx.borrow().context_max_tokens, 128_000);
576    /// ```
577    pub fn set_context_max_tokens(&self, max_tokens: u64) {
578        self.tx.send_modify(|m| m.context_max_tokens = max_tokens);
579    }
580
581    /// Record the outcome of the most recent compaction event.
582    ///
583    /// Sets all three `compaction_last_*` fields atomically. `at_ms` is the Unix epoch in
584    /// milliseconds — obtain via `SystemTime::UNIX_EPOCH.elapsed().unwrap_or_default().as_millis()`.
585    ///
586    /// # Examples
587    ///
588    /// ```rust
589    /// use zeph_core::metrics::MetricsCollector;
590    ///
591    /// let (collector, rx) = MetricsCollector::new();
592    /// collector.record_compaction(50_000, 12_000, 1_700_000_000_000);
593    /// let snap = rx.borrow();
594    /// assert_eq!(snap.compaction_last_before, 50_000);
595    /// assert_eq!(snap.compaction_last_after, 12_000);
596    /// assert_eq!(snap.compaction_last_at_ms, 1_700_000_000_000);
597    /// ```
598    pub fn record_compaction(&self, before: u64, after: u64, at_ms: u64) {
599        self.tx.send_modify(|m| {
600            m.compaction_last_before = before;
601            m.compaction_last_after = after;
602            m.compaction_last_at_ms = at_ms;
603        });
604    }
605
606    /// Returns a clone of the underlying [`watch::Sender`].
607    ///
608    /// Use this to pass the sender to code that requires a raw
609    /// `watch::Sender<MetricsSnapshot>` while the [`MetricsCollector`] is
610    /// also shared (e.g., passed to a `MetricsBridge` layer).
611    #[must_use]
612    pub fn sender(&self) -> watch::Sender<MetricsSnapshot> {
613        self.tx.clone()
614    }
615}
616
617// ---------------------------------------------------------------------------
618// HistogramRecorder
619// ---------------------------------------------------------------------------
620
621/// Per-event histogram recording contract for the agent loop.
622///
623/// Implementors record individual latency observations into Prometheus histograms
624/// (or any other backend). The trait is object-safe: the agent stores an
625/// `Option<Arc<dyn HistogramRecorder>>` and calls these methods at each measurement
626/// point. When `None`, recording is a no-op with zero overhead.
627///
628/// # Contract for implementors
629///
630/// - All methods must be non-blocking; they must not call async code or acquire
631///   mutexes that may block.
632/// - Implementations must be `Send + Sync` — the agent loop runs on the tokio
633///   thread pool and the recorder may be called from multiple tasks.
634///
635/// # Examples
636///
637/// ```rust
638/// use std::sync::Arc;
639/// use std::time::Duration;
640/// use zeph_core::metrics::HistogramRecorder;
641///
642/// struct NoOpRecorder;
643///
644/// impl HistogramRecorder for NoOpRecorder {
645///     fn observe_llm_latency(&self, _: Duration) {}
646///     fn observe_turn_duration(&self, _: Duration) {}
647///     fn observe_tool_execution(&self, _: Duration) {}
648///     fn observe_bg_task(&self, _: &str, _: Duration) {}
649/// }
650///
651/// let recorder: Arc<dyn HistogramRecorder> = Arc::new(NoOpRecorder);
652/// recorder.observe_llm_latency(Duration::from_millis(500));
653/// ```
654pub trait HistogramRecorder: Send + Sync {
655    /// Record a single LLM API call latency observation.
656    fn observe_llm_latency(&self, duration: std::time::Duration);
657
658    /// Record a full agent turn duration observation (context prep + LLM + tools + persist).
659    fn observe_turn_duration(&self, duration: std::time::Duration);
660
661    /// Record a single tool execution latency observation.
662    fn observe_tool_execution(&self, duration: std::time::Duration);
663
664    /// Record a background task completion latency.
665    ///
666    /// `class_label` is `"enrichment"` or `"telemetry"` (from `TaskClass::name()`).
667    fn observe_bg_task(&self, class_label: &str, duration: std::time::Duration);
668}
669
670#[cfg(test)]
671mod tests {
672    #![allow(clippy::field_reassign_with_default)]
673
674    use super::*;
675
676    #[test]
677    fn default_metrics_snapshot() {
678        let m = MetricsSnapshot::default();
679        assert_eq!(m.total_tokens, 0);
680        assert_eq!(m.api_calls, 0);
681        assert!(m.active_skills.is_empty());
682        assert!(m.active_mcp_tools.is_empty());
683        assert_eq!(m.mcp_tool_count, 0);
684        assert_eq!(m.mcp_server_count, 0);
685        assert!(m.provider_name.is_empty());
686        assert_eq!(m.summaries_count, 0);
687        // Phase 2 fields
688        assert!(m.stt_model.is_none());
689        assert!(m.compaction_model.is_none());
690        assert!(m.provider_temperature.is_none());
691        assert!(m.provider_top_p.is_none());
692        assert!(m.active_channel.is_empty());
693        assert!(m.embedding_model.is_empty());
694        assert!(m.token_budget.is_none());
695        assert!(!m.self_learning_enabled);
696        assert!(!m.semantic_cache_enabled);
697    }
698
699    #[test]
700    fn metrics_collector_update_phase2_fields() {
701        let (collector, rx) = MetricsCollector::new();
702        collector.update(|m| {
703            m.stt_model = Some("whisper-1".into());
704            m.compaction_model = Some("haiku".into());
705            m.provider_temperature = Some(0.7);
706            m.provider_top_p = Some(0.95);
707            m.active_channel = "tui".into();
708            m.embedding_model = "nomic-embed-text".into();
709            m.token_budget = Some(200_000);
710            m.self_learning_enabled = true;
711            m.semantic_cache_enabled = true;
712        });
713        let s = rx.borrow();
714        assert_eq!(s.stt_model.as_deref(), Some("whisper-1"));
715        assert_eq!(s.compaction_model.as_deref(), Some("haiku"));
716        assert_eq!(s.provider_temperature, Some(0.7));
717        assert_eq!(s.provider_top_p, Some(0.95));
718        assert_eq!(s.active_channel, "tui");
719        assert_eq!(s.embedding_model, "nomic-embed-text");
720        assert_eq!(s.token_budget, Some(200_000));
721        assert!(s.self_learning_enabled);
722        assert!(s.semantic_cache_enabled);
723    }
724
725    #[test]
726    fn metrics_collector_update() {
727        let (collector, rx) = MetricsCollector::new();
728        collector.update(|m| {
729            m.api_calls = 5;
730            m.total_tokens = 1000;
731        });
732        let snapshot = rx.borrow().clone();
733        assert_eq!(snapshot.api_calls, 5);
734        assert_eq!(snapshot.total_tokens, 1000);
735    }
736
737    #[test]
738    fn metrics_collector_multiple_updates() {
739        let (collector, rx) = MetricsCollector::new();
740        collector.update(|m| m.api_calls = 1);
741        collector.update(|m| m.api_calls += 1);
742        assert_eq!(rx.borrow().api_calls, 2);
743    }
744
745    #[test]
746    fn metrics_snapshot_clone() {
747        let mut m = MetricsSnapshot::default();
748        m.provider_name = "ollama".into();
749        let cloned = m.clone();
750        assert_eq!(cloned.provider_name, "ollama");
751    }
752
753    #[test]
754    fn filter_metrics_tracking() {
755        let (collector, rx) = MetricsCollector::new();
756        collector.update(|m| {
757            m.filter_raw_tokens += 250;
758            m.filter_saved_tokens += 200;
759            m.filter_applications += 1;
760        });
761        collector.update(|m| {
762            m.filter_raw_tokens += 100;
763            m.filter_saved_tokens += 80;
764            m.filter_applications += 1;
765        });
766        let s = rx.borrow();
767        assert_eq!(s.filter_raw_tokens, 350);
768        assert_eq!(s.filter_saved_tokens, 280);
769        assert_eq!(s.filter_applications, 2);
770    }
771
772    #[test]
773    fn filter_confidence_and_command_metrics() {
774        let (collector, rx) = MetricsCollector::new();
775        collector.update(|m| {
776            m.filter_total_commands += 1;
777            m.filter_filtered_commands += 1;
778            m.filter_confidence_full += 1;
779        });
780        collector.update(|m| {
781            m.filter_total_commands += 1;
782            m.filter_confidence_partial += 1;
783        });
784        let s = rx.borrow();
785        assert_eq!(s.filter_total_commands, 2);
786        assert_eq!(s.filter_filtered_commands, 1);
787        assert_eq!(s.filter_confidence_full, 1);
788        assert_eq!(s.filter_confidence_partial, 1);
789        assert_eq!(s.filter_confidence_fallback, 0);
790    }
791
792    #[test]
793    fn summaries_count_tracks_summarizations() {
794        let (collector, rx) = MetricsCollector::new();
795        collector.update(|m| m.summaries_count += 1);
796        collector.update(|m| m.summaries_count += 1);
797        assert_eq!(rx.borrow().summaries_count, 2);
798    }
799
800    #[test]
801    fn cancellations_counter_increments() {
802        let (collector, rx) = MetricsCollector::new();
803        assert_eq!(rx.borrow().cancellations, 0);
804        collector.update(|m| m.cancellations += 1);
805        collector.update(|m| m.cancellations += 1);
806        assert_eq!(rx.borrow().cancellations, 2);
807    }
808
809    #[test]
810    fn security_event_detail_exact_128_not_truncated() {
811        let s = "a".repeat(128);
812        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s.clone());
813        assert_eq!(ev.detail, s, "128-char string must not be truncated");
814    }
815
816    #[test]
817    fn security_event_detail_129_is_truncated() {
818        let s = "a".repeat(129);
819        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s);
820        assert!(
821            ev.detail.ends_with('…'),
822            "129-char string must end with ellipsis"
823        );
824        assert!(
825            ev.detail.len() <= 130,
826            "truncated detail must be at most 130 bytes"
827        );
828    }
829
830    #[test]
831    fn security_event_detail_multibyte_utf8_no_panic() {
832        // Each '中' is 3 bytes. 43 chars = 129 bytes — triggers truncation at a multi-byte boundary.
833        let s = "中".repeat(43);
834        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s);
835        assert!(ev.detail.ends_with('…'));
836    }
837
838    #[test]
839    fn security_event_source_capped_at_64_chars() {
840        let long_source = "x".repeat(200);
841        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, long_source, "detail");
842        assert_eq!(ev.source.len(), 64);
843    }
844
845    #[test]
846    fn security_event_source_strips_control_chars() {
847        let source = "tool\x00name\x1b[31m";
848        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, source, "detail");
849        assert!(!ev.source.contains('\x00'));
850        assert!(!ev.source.contains('\x1b'));
851    }
852
853    #[test]
854    fn security_event_category_as_str() {
855        assert_eq!(SecurityEventCategory::InjectionFlag.as_str(), "injection");
856        assert_eq!(SecurityEventCategory::ExfiltrationBlock.as_str(), "exfil");
857        assert_eq!(SecurityEventCategory::Quarantine.as_str(), "quarantine");
858        assert_eq!(SecurityEventCategory::Truncation.as_str(), "truncation");
859        assert_eq!(
860            SecurityEventCategory::CrossBoundaryMcpToAcp.as_str(),
861            "cross_boundary_mcp_to_acp"
862        );
863    }
864
865    #[test]
866    fn ring_buffer_respects_cap_via_update() {
867        let (collector, rx) = MetricsCollector::new();
868        for i in 0..110u64 {
869            let event = SecurityEvent::new(
870                SecurityEventCategory::InjectionFlag,
871                "src",
872                format!("event {i}"),
873            );
874            collector.update(|m| {
875                if m.security_events.len() >= SECURITY_EVENT_CAP {
876                    m.security_events.pop_front();
877                }
878                m.security_events.push_back(event);
879            });
880        }
881        let snap = rx.borrow();
882        assert_eq!(snap.security_events.len(), SECURITY_EVENT_CAP);
883        // FIFO: earliest events evicted, last one present
884        assert!(snap.security_events.back().unwrap().detail.contains("109"));
885    }
886
887    #[test]
888    fn security_events_empty_by_default() {
889        let m = MetricsSnapshot::default();
890        assert!(m.security_events.is_empty());
891    }
892
893    #[test]
894    fn orchestration_metrics_default_zero() {
895        let m = OrchestrationMetrics::default();
896        assert_eq!(m.plans_total, 0);
897        assert_eq!(m.tasks_total, 0);
898        assert_eq!(m.tasks_completed, 0);
899        assert_eq!(m.tasks_failed, 0);
900        assert_eq!(m.tasks_skipped, 0);
901    }
902
903    #[test]
904    fn metrics_snapshot_includes_orchestration_default_zero() {
905        let m = MetricsSnapshot::default();
906        assert_eq!(m.orchestration.plans_total, 0);
907        assert_eq!(m.orchestration.tasks_total, 0);
908        assert_eq!(m.orchestration.tasks_completed, 0);
909    }
910
911    #[test]
912    fn orchestration_metrics_update_via_collector() {
913        let (collector, rx) = MetricsCollector::new();
914        collector.update(|m| {
915            m.orchestration.plans_total += 1;
916            m.orchestration.tasks_total += 5;
917            m.orchestration.tasks_completed += 3;
918            m.orchestration.tasks_failed += 1;
919            m.orchestration.tasks_skipped += 1;
920        });
921        let s = rx.borrow();
922        assert_eq!(s.orchestration.plans_total, 1);
923        assert_eq!(s.orchestration.tasks_total, 5);
924        assert_eq!(s.orchestration.tasks_completed, 3);
925        assert_eq!(s.orchestration.tasks_failed, 1);
926        assert_eq!(s.orchestration.tasks_skipped, 1);
927    }
928
929    #[test]
930    fn strip_ctrl_removes_escape_sequences() {
931        let input = "hello\x1b[31mworld\x00end";
932        let result = strip_ctrl(input);
933        assert_eq!(result, "helloworldend");
934    }
935
936    #[test]
937    fn strip_ctrl_allows_tab_lf_cr() {
938        let input = "a\tb\nc\rd";
939        let result = strip_ctrl(input);
940        assert_eq!(result, "a\tb\nc\rd");
941    }
942
943    #[test]
944    fn task_graph_snapshot_is_stale_after_30s() {
945        let mut snap = TaskGraphSnapshot::default();
946        // Not stale if no completed_at.
947        assert!(!snap.is_stale());
948        // Not stale if just completed.
949        snap.completed_at = Some(std::time::Instant::now());
950        assert!(!snap.is_stale());
951        // Stale if completed more than 30s ago.
952        snap.completed_at = Some(
953            std::time::Instant::now()
954                .checked_sub(std::time::Duration::from_secs(31))
955                .unwrap(),
956        );
957        assert!(snap.is_stale());
958    }
959
960    // T1: From<&TaskGraph> correctly maps fields including duration_ms and error truncation.
961    #[test]
962    fn task_graph_snapshot_from_task_graph_maps_fields() {
963        use zeph_orchestration::{GraphStatus, TaskGraph, TaskNode, TaskResult, TaskStatus};
964
965        let mut graph = TaskGraph::new("My goal");
966        let mut task = TaskNode::new(0, "Do work", "description");
967        task.status = TaskStatus::Failed;
968        task.assigned_agent = Some("agent-1".into());
969        task.result = Some(TaskResult {
970            output: "error occurred here".into(),
971            artifacts: vec![],
972            duration_ms: 1234,
973            agent_id: None,
974            agent_def: None,
975        });
976        graph.tasks.push(task);
977        graph.status = GraphStatus::Failed;
978
979        let snap = TaskGraphSnapshot::from(&graph);
980        assert_eq!(snap.goal, "My goal");
981        assert_eq!(snap.status, "failed");
982        assert_eq!(snap.tasks.len(), 1);
983        let row = &snap.tasks[0];
984        assert_eq!(row.title, "Do work");
985        assert_eq!(row.status, "failed");
986        assert_eq!(row.agent.as_deref(), Some("agent-1"));
987        assert_eq!(row.duration_ms, 1234);
988        assert!(row.error.as_deref().unwrap().contains("error occurred"));
989    }
990
991    // T2: From impl compiles with orchestration feature active.
992    #[test]
993    fn task_graph_snapshot_from_compiles_with_feature() {
994        use zeph_orchestration::TaskGraph;
995        let graph = TaskGraph::new("feature flag test");
996        let snap = TaskGraphSnapshot::from(&graph);
997        assert_eq!(snap.goal, "feature flag test");
998        assert!(snap.tasks.is_empty());
999        assert!(!snap.is_stale());
1000    }
1001
1002    // T1-extra: long error is truncated with ellipsis.
1003    #[test]
1004    fn task_graph_snapshot_error_truncated_at_80_chars() {
1005        use zeph_orchestration::{TaskGraph, TaskNode, TaskResult, TaskStatus};
1006
1007        let mut graph = TaskGraph::new("goal");
1008        let mut task = TaskNode::new(0, "t", "d");
1009        task.status = TaskStatus::Failed;
1010        task.result = Some(TaskResult {
1011            output: "e".repeat(100),
1012            artifacts: vec![],
1013            duration_ms: 0,
1014            agent_id: None,
1015            agent_def: None,
1016        });
1017        graph.tasks.push(task);
1018
1019        let snap = TaskGraphSnapshot::from(&graph);
1020        let err = snap.tasks[0].error.as_ref().unwrap();
1021        assert!(err.ends_with('…'), "truncated error must end with ellipsis");
1022        assert!(
1023            err.len() <= 83,
1024            "truncated error must not exceed 80 chars + ellipsis"
1025        );
1026    }
1027
1028    // SEC-P6-01: control chars in task title are stripped.
1029    #[test]
1030    fn task_graph_snapshot_strips_control_chars_from_title() {
1031        use zeph_orchestration::{TaskGraph, TaskNode};
1032
1033        let mut graph = TaskGraph::new("goal\x1b[31m");
1034        let task = TaskNode::new(0, "title\x00injected", "d");
1035        graph.tasks.push(task);
1036
1037        let snap = TaskGraphSnapshot::from(&graph);
1038        assert!(!snap.goal.contains('\x1b'), "goal must not contain escape");
1039        assert!(
1040            !snap.tasks[0].title.contains('\x00'),
1041            "title must not contain null byte"
1042        );
1043    }
1044
1045    #[test]
1046    fn graph_metrics_default_zero() {
1047        let m = MetricsSnapshot::default();
1048        assert_eq!(m.graph_entities_total, 0);
1049        assert_eq!(m.graph_edges_total, 0);
1050        assert_eq!(m.graph_communities_total, 0);
1051        assert_eq!(m.graph_extraction_count, 0);
1052        assert_eq!(m.graph_extraction_failures, 0);
1053    }
1054
1055    #[test]
1056    fn graph_metrics_update_via_collector() {
1057        let (collector, rx) = MetricsCollector::new();
1058        collector.update(|m| {
1059            m.graph_entities_total = 5;
1060            m.graph_edges_total = 10;
1061            m.graph_communities_total = 2;
1062            m.graph_extraction_count = 7;
1063            m.graph_extraction_failures = 1;
1064        });
1065        let snapshot = rx.borrow().clone();
1066        assert_eq!(snapshot.graph_entities_total, 5);
1067        assert_eq!(snapshot.graph_edges_total, 10);
1068        assert_eq!(snapshot.graph_communities_total, 2);
1069        assert_eq!(snapshot.graph_extraction_count, 7);
1070        assert_eq!(snapshot.graph_extraction_failures, 1);
1071    }
1072
1073    #[test]
1074    fn histogram_recorder_trait_is_object_safe() {
1075        use std::sync::Arc;
1076        use std::time::Duration;
1077
1078        struct NoOpRecorder;
1079        impl HistogramRecorder for NoOpRecorder {
1080            fn observe_llm_latency(&self, _: Duration) {}
1081            fn observe_turn_duration(&self, _: Duration) {}
1082            fn observe_tool_execution(&self, _: Duration) {}
1083            fn observe_bg_task(&self, _: &str, _: Duration) {}
1084        }
1085
1086        // Verify the trait can be used as a trait object (object-safe).
1087        let recorder: Arc<dyn HistogramRecorder> = Arc::new(NoOpRecorder);
1088        recorder.observe_llm_latency(Duration::from_millis(500));
1089        recorder.observe_turn_duration(Duration::from_secs(3));
1090        recorder.observe_tool_execution(Duration::from_millis(100));
1091    }
1092}
zeph_core/metrics.rs

zeph_core/
metrics.rs