zeph_core/
metrics.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::collections::VecDeque;
5
6use tokio::sync::watch;
7
8pub use zeph_llm::{ClassifierMetricsSnapshot, TaskMetricsSnapshot};
9pub use zeph_memory::{CategoryScore, ProbeCategory, ProbeVerdict};
10
11/// Category of a security event for TUI display.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum SecurityEventCategory {
14    InjectionFlag,
15    /// ML classifier hard-blocked tool output (`enforcement_mode=block` only).
16    InjectionBlocked,
17    ExfiltrationBlock,
18    Quarantine,
19    Truncation,
20    RateLimit,
21    MemoryValidation,
22    PreExecutionBlock,
23    PreExecutionWarn,
24    ResponseVerification,
25    /// `TurnCausalAnalyzer` flagged behavioral deviation at tool-return boundary.
26    CausalIpiFlag,
27    /// MCP tool result crossing into an ACP-serving session boundary.
28    CrossBoundaryMcpToAcp,
29}
30
31impl SecurityEventCategory {
32    #[must_use]
33    pub fn as_str(self) -> &'static str {
34        match self {
35            Self::InjectionFlag => "injection",
36            Self::InjectionBlocked => "injection_blocked",
37            Self::ExfiltrationBlock => "exfil",
38            Self::Quarantine => "quarantine",
39            Self::Truncation => "truncation",
40            Self::RateLimit => "rate_limit",
41            Self::MemoryValidation => "memory_validation",
42            Self::PreExecutionBlock => "pre_exec_block",
43            Self::PreExecutionWarn => "pre_exec_warn",
44            Self::ResponseVerification => "response_verify",
45            Self::CausalIpiFlag => "causal_ipi",
46            Self::CrossBoundaryMcpToAcp => "cross_boundary_mcp_to_acp",
47        }
48    }
49}
50
51/// A single security event record for TUI display.
52#[derive(Debug, Clone)]
53pub struct SecurityEvent {
54    /// Unix timestamp (seconds since epoch).
55    pub timestamp: u64,
56    pub category: SecurityEventCategory,
57    /// Source that triggered the event (e.g., `web_scrape`, `mcp_response`).
58    pub source: String,
59    /// Short description, capped at 128 chars.
60    pub detail: String,
61}
62
63impl SecurityEvent {
64    #[must_use]
65    pub fn new(
66        category: SecurityEventCategory,
67        source: impl Into<String>,
68        detail: impl Into<String>,
69    ) -> Self {
70        // IMP-1: cap source at 64 chars and strip ASCII control chars.
71        let source: String = source
72            .into()
73            .chars()
74            .filter(|c| !c.is_ascii_control())
75            .take(64)
76            .collect();
77        // CR-1: UTF-8 safe truncation using floor_char_boundary (stable since Rust 1.82).
78        let detail = detail.into();
79        let detail = if detail.len() > 128 {
80            let end = detail.floor_char_boundary(127);
81            format!("{}…", &detail[..end])
82        } else {
83            detail
84        };
85        Self {
86            timestamp: std::time::SystemTime::now()
87                .duration_since(std::time::UNIX_EPOCH)
88                .unwrap_or_default()
89                .as_secs(),
90            category,
91            source,
92            detail,
93        }
94    }
95}
96
97/// Ring buffer capacity for security events.
98pub const SECURITY_EVENT_CAP: usize = 100;
99
100/// Lightweight snapshot of a single task row for TUI display.
101///
102/// Captured from the task graph on each metrics tick; kept minimal on purpose.
103#[derive(Debug, Clone)]
104pub struct TaskSnapshotRow {
105    pub id: u32,
106    pub title: String,
107    /// Stringified `TaskStatus` (e.g. `"pending"`, `"running"`, `"completed"`).
108    pub status: String,
109    pub agent: Option<String>,
110    pub duration_ms: u64,
111    /// Truncated error message (first 80 chars) when the task failed.
112    pub error: Option<String>,
113}
114
115/// Lightweight snapshot of a `TaskGraph` for TUI display.
116#[derive(Debug, Clone, Default)]
117pub struct TaskGraphSnapshot {
118    pub graph_id: String,
119    pub goal: String,
120    /// Stringified `GraphStatus` (e.g. `"created"`, `"running"`, `"completed"`).
121    pub status: String,
122    pub tasks: Vec<TaskSnapshotRow>,
123    pub completed_at: Option<std::time::Instant>,
124}
125
126impl TaskGraphSnapshot {
127    /// Returns `true` if this snapshot represents a terminal plan that finished
128    /// more than 30 seconds ago and should no longer be shown in the TUI.
129    #[must_use]
130    pub fn is_stale(&self) -> bool {
131        self.completed_at
132            .is_some_and(|t| t.elapsed().as_secs() > 30)
133    }
134}
135
136/// Counters for the task orchestration subsystem.
137///
138/// Always present in [`MetricsSnapshot`]; zero-valued when orchestration is inactive.
139#[derive(Debug, Clone, Default)]
140pub struct OrchestrationMetrics {
141    pub plans_total: u64,
142    pub tasks_total: u64,
143    pub tasks_completed: u64,
144    pub tasks_failed: u64,
145    pub tasks_skipped: u64,
146}
147
148/// Connection status of a single MCP server for TUI display.
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub enum McpServerConnectionStatus {
151    Connected,
152    Failed,
153}
154
155/// Per-server MCP status snapshot for TUI display.
156#[derive(Debug, Clone)]
157pub struct McpServerStatus {
158    pub id: String,
159    pub status: McpServerConnectionStatus,
160    /// Number of tools provided by this server (0 when failed).
161    pub tool_count: usize,
162    /// Human-readable failure reason. Empty when connected.
163    pub error: String,
164}
165
166/// Bayesian confidence data for a single skill, used by TUI confidence bar.
167#[derive(Debug, Clone, Default)]
168pub struct SkillConfidence {
169    pub name: String,
170    pub posterior: f64,
171    pub total_uses: u32,
172}
173
174/// Snapshot of a single sub-agent's runtime status.
175#[derive(Debug, Clone, Default)]
176pub struct SubAgentMetrics {
177    pub id: String,
178    pub name: String,
179    /// Stringified `TaskState`: "working", "completed", "failed", "canceled", etc.
180    pub state: String,
181    pub turns_used: u32,
182    pub max_turns: u32,
183    pub background: bool,
184    pub elapsed_secs: u64,
185    /// Stringified `PermissionMode`: `"default"`, `"accept_edits"`, `"dont_ask"`,
186    /// `"bypass_permissions"`, `"plan"`. Empty string when mode is `Default`.
187    pub permission_mode: String,
188    /// Path to the directory containing this agent's JSONL transcript file.
189    /// `None` when transcript writing is disabled for this agent.
190    pub transcript_dir: Option<String>,
191}
192
193/// Per-turn latency breakdown for the four agent hot-path phases.
194///
195/// Populated with `Instant`-based measurements at each phase boundary.
196/// All values are in milliseconds.
197#[derive(Debug, Clone, Default)]
198pub struct TurnTimings {
199    pub prepare_context_ms: u64,
200    pub llm_chat_ms: u64,
201    pub tool_exec_ms: u64,
202    pub persist_message_ms: u64,
203}
204
205#[derive(Debug, Clone, Default)]
206#[allow(clippy::struct_excessive_bools)]
207pub struct MetricsSnapshot {
208    pub prompt_tokens: u64,
209    pub completion_tokens: u64,
210    pub total_tokens: u64,
211    pub context_tokens: u64,
212    pub api_calls: u64,
213    pub active_skills: Vec<String>,
214    pub total_skills: usize,
215    /// Total configured MCP servers (connected + failed).
216    pub mcp_server_count: usize,
217    pub mcp_tool_count: usize,
218    /// Number of successfully connected MCP servers.
219    pub mcp_connected_count: usize,
220    /// Per-server connection status list.
221    pub mcp_servers: Vec<McpServerStatus>,
222    pub active_mcp_tools: Vec<String>,
223    pub sqlite_message_count: u64,
224    pub sqlite_conversation_id: Option<zeph_memory::ConversationId>,
225    pub qdrant_available: bool,
226    pub vector_backend: String,
227    pub embeddings_generated: u64,
228    pub last_llm_latency_ms: u64,
229    pub uptime_seconds: u64,
230    pub provider_name: String,
231    pub model_name: String,
232    pub summaries_count: u64,
233    pub context_compactions: u64,
234    /// Number of times the agent entered the Hard compaction tier, including cooldown-skipped
235    /// turns. Not equal to the actual LLM summarization count — reflects pressure, not action.
236    pub compaction_hard_count: u64,
237    /// User-message turns elapsed after each hard compaction event.
238    /// Entry i = turns between hard compaction i and hard compaction i+1 (or session end).
239    /// Empty when no hard compaction occurred during the session.
240    pub compaction_turns_after_hard: Vec<u64>,
241    pub compression_events: u64,
242    pub compression_tokens_saved: u64,
243    pub tool_output_prunes: u64,
244    /// Compaction probe outcomes (#1609).
245    pub compaction_probe_passes: u64,
246    /// Compaction probe soft failures (summary borderline — compaction proceeded with warning).
247    pub compaction_probe_soft_failures: u64,
248    /// Compaction probe hard failures (compaction blocked due to lossy summary).
249    pub compaction_probe_failures: u64,
250    /// Compaction probe errors (LLM/timeout — non-blocking, compaction proceeded).
251    pub compaction_probe_errors: u64,
252    /// Last compaction probe verdict. `None` before the first probe completes.
253    pub last_probe_verdict: Option<zeph_memory::ProbeVerdict>,
254    /// Last compaction probe score in [0.0, 1.0]. `None` before the first probe
255    /// completes or after an Error verdict (errors produce no score).
256    pub last_probe_score: Option<f32>,
257    /// Per-category scores from the last completed probe.
258    pub last_probe_category_scores: Option<Vec<zeph_memory::CategoryScore>>,
259    /// Configured pass threshold for the compaction probe. Used by TUI for category color-coding.
260    pub compaction_probe_threshold: f32,
261    /// Configured hard-fail threshold for the compaction probe.
262    pub compaction_probe_hard_fail_threshold: f32,
263    pub cache_read_tokens: u64,
264    pub cache_creation_tokens: u64,
265    pub cost_spent_cents: f64,
266    /// Per-provider cost breakdown, sorted by cost descending.
267    pub provider_cost_breakdown: Vec<(String, crate::cost::ProviderUsage)>,
268    pub filter_raw_tokens: u64,
269    pub filter_saved_tokens: u64,
270    pub filter_applications: u64,
271    pub filter_total_commands: u64,
272    pub filter_filtered_commands: u64,
273    pub filter_confidence_full: u64,
274    pub filter_confidence_partial: u64,
275    pub filter_confidence_fallback: u64,
276    pub cancellations: u64,
277    pub server_compaction_events: u64,
278    pub sanitizer_runs: u64,
279    pub sanitizer_injection_flags: u64,
280    /// Injection pattern hits on `ToolResult` (local) sources — likely false positives.
281    ///
282    /// Counts regex hits that fired on content from `shell`, `read_file`, `search_code`, etc.
283    /// These sources are user-owned and not adversarial; a non-zero value indicates a pattern
284    /// that needs tightening or a source reclassification.
285    pub sanitizer_injection_fp_local: u64,
286    pub sanitizer_truncations: u64,
287    pub quarantine_invocations: u64,
288    pub quarantine_failures: u64,
289    /// ML classifier hard-blocked tool outputs (`enforcement_mode=block` only).
290    pub classifier_tool_blocks: u64,
291    /// ML classifier suspicious tool outputs (both enforcement modes).
292    pub classifier_tool_suspicious: u64,
293    /// `TurnCausalAnalyzer` flags: behavioral deviation detected at tool-return boundary.
294    pub causal_ipi_flags: u64,
295    pub exfiltration_images_blocked: u64,
296    pub exfiltration_tool_urls_flagged: u64,
297    pub exfiltration_memory_guards: u64,
298    pub pii_scrub_count: u64,
299    /// Number of times the PII NER classifier timed out; input fell back to regex-only.
300    pub pii_ner_timeouts: u64,
301    /// Number of times the PII NER circuit breaker tripped (disabled NER for the session).
302    pub pii_ner_circuit_breaker_trips: u64,
303    pub memory_validation_failures: u64,
304    pub rate_limit_trips: u64,
305    pub pre_execution_blocks: u64,
306    pub pre_execution_warnings: u64,
307    /// `true` when a guardrail filter is active for this session.
308    pub guardrail_enabled: bool,
309    /// `true` when guardrail is in warn-only mode (action = warn).
310    pub guardrail_warn_mode: bool,
311    pub sub_agents: Vec<SubAgentMetrics>,
312    pub skill_confidence: Vec<SkillConfidence>,
313    /// Scheduled task summaries: `[name, kind, mode, next_run]`.
314    pub scheduled_tasks: Vec<[String; 4]>,
315    /// Thompson Sampling distribution snapshots: `(provider, alpha, beta)`.
316    pub router_thompson_stats: Vec<(String, f64, f64)>,
317    /// Ring buffer of recent security events (cap 100, FIFO eviction).
318    pub security_events: VecDeque<SecurityEvent>,
319    pub orchestration: OrchestrationMetrics,
320    /// Live snapshot of the currently active task graph. `None` when no plan is active.
321    pub orchestration_graph: Option<TaskGraphSnapshot>,
322    pub graph_community_detection_failures: u64,
323    pub graph_entities_total: u64,
324    pub graph_edges_total: u64,
325    pub graph_communities_total: u64,
326    pub graph_extraction_count: u64,
327    pub graph_extraction_failures: u64,
328    /// `true` when `config.llm.cloud.enable_extended_context = true`.
329    /// Never set for other providers to avoid false positives.
330    pub extended_context: bool,
331    /// Latest compression-guidelines version (0 = no guidelines yet).
332    pub guidelines_version: u32,
333    /// ISO 8601 timestamp of the latest guidelines update (empty if none).
334    pub guidelines_updated_at: String,
335    pub tool_cache_hits: u64,
336    pub tool_cache_misses: u64,
337    pub tool_cache_entries: usize,
338    /// Number of semantic-tier facts in memory (0 when tier promotion disabled).
339    pub semantic_fact_count: u64,
340    /// STT model name (e.g. "whisper-1"). `None` when STT is not configured.
341    pub stt_model: Option<String>,
342    /// Model used for context compaction/summarization. `None` when no summary provider is set.
343    pub compaction_model: Option<String>,
344    /// Temperature of the active provider when using Candle. `None` for API providers.
345    pub provider_temperature: Option<f32>,
346    /// Top-p of the active provider when using Candle. `None` for API providers.
347    pub provider_top_p: Option<f32>,
348    /// Embedding model name (e.g. `"nomic-embed-text"`). Empty when embeddings are disabled.
349    pub embedding_model: String,
350    /// Token budget for context window. `None` when not configured.
351    pub token_budget: Option<u64>,
352    /// Token threshold that triggers soft compaction. `None` when not configured.
353    pub compaction_threshold: Option<u32>,
354    /// Vault backend identifier: "age", "env", or "none".
355    pub vault_backend: String,
356    /// Active I/O channel name: `"cli"`, `"telegram"`, `"tui"`, `"discord"`, `"slack"`.
357    pub active_channel: String,
358    /// Background supervisor: inflight tasks across all classes.
359    pub bg_inflight: u64,
360    /// Background supervisor: total tasks dropped due to concurrency limit (all classes).
361    pub bg_dropped: u64,
362    /// Background supervisor: total tasks completed (all classes).
363    pub bg_completed: u64,
364    /// Background supervisor: inflight enrichment tasks.
365    pub bg_enrichment_inflight: u64,
366    /// Background supervisor: inflight telemetry tasks.
367    pub bg_telemetry_inflight: u64,
368    /// Whether self-learning (skill evolution) is enabled.
369    pub self_learning_enabled: bool,
370    /// Whether the semantic response cache is enabled.
371    pub semantic_cache_enabled: bool,
372    /// Whether semantic response caching is enabled (alias for `semantic_cache_enabled`).
373    pub cache_enabled: bool,
374    /// Whether assistant messages are auto-saved to memory.
375    pub autosave_enabled: bool,
376    /// Classifier p50/p95 latency metrics per task (injection, pii, feedback).
377    pub classifier: ClassifierMetricsSnapshot,
378    /// Latency breakdown for the most recently completed agent turn.
379    pub last_turn_timings: TurnTimings,
380    /// Rolling average of per-phase latency over the last 10 turns.
381    pub avg_turn_timings: TurnTimings,
382    /// Maximum per-phase latency observed within the rolling window (tail-latency visibility).
383    ///
384    /// M3: exposes `max_in_window` alongside the rolling average for operational monitoring.
385    pub max_turn_timings: TurnTimings,
386    /// Number of turns included in `avg_turn_timings` and `max_turn_timings` (capped at 10).
387    pub timing_sample_count: u64,
388}
389
390/// Strip ASCII control characters and ANSI escape sequences from a string for safe TUI display.
391///
392/// Allows tab, LF, and CR; removes everything else in the `0x00–0x1F` range including full
393/// ANSI CSI sequences (`ESC[...`). This prevents escape-sequence injection from LLM planner
394/// output into the TUI.
395fn strip_ctrl(s: &str) -> String {
396    let mut out = String::with_capacity(s.len());
397    let mut chars = s.chars().peekable();
398    while let Some(c) = chars.next() {
399        if c == '\x1b' {
400            // Consume an ANSI CSI sequence: ESC [ <params> <final-byte in 0x40–0x7E>
401            if chars.peek() == Some(&'[') {
402                chars.next(); // consume '['
403                for inner in chars.by_ref() {
404                    if ('\x40'..='\x7e').contains(&inner) {
405                        break;
406                    }
407                }
408            }
409            // Drop ESC and any consumed sequence — write nothing.
410        } else if c.is_control() && c != '\t' && c != '\n' && c != '\r' {
411            // drop other control chars
412        } else {
413            out.push(c);
414        }
415    }
416    out
417}
418
419/// Convert a live `TaskGraph` into a lightweight snapshot for TUI display.
420impl From<&zeph_orchestration::TaskGraph> for TaskGraphSnapshot {
421    fn from(graph: &zeph_orchestration::TaskGraph) -> Self {
422        let tasks = graph
423            .tasks
424            .iter()
425            .map(|t| {
426                let error = t
427                    .result
428                    .as_ref()
429                    .filter(|_| t.status == zeph_orchestration::TaskStatus::Failed)
430                    .and_then(|r| {
431                        if r.output.is_empty() {
432                            None
433                        } else {
434                            // Strip control chars, then truncate at 80 chars (SEC-P6-01).
435                            let s = strip_ctrl(&r.output);
436                            if s.len() > 80 {
437                                let end = s.floor_char_boundary(79);
438                                Some(format!("{}…", &s[..end]))
439                            } else {
440                                Some(s)
441                            }
442                        }
443                    });
444                let duration_ms = t.result.as_ref().map_or(0, |r| r.duration_ms);
445                TaskSnapshotRow {
446                    id: t.id.as_u32(),
447                    title: strip_ctrl(&t.title),
448                    status: t.status.to_string(),
449                    agent: t.assigned_agent.as_deref().map(strip_ctrl),
450                    duration_ms,
451                    error,
452                }
453            })
454            .collect();
455        Self {
456            graph_id: graph.id.to_string(),
457            goal: strip_ctrl(&graph.goal),
458            status: graph.status.to_string(),
459            tasks,
460            completed_at: None,
461        }
462    }
463}
464
465pub struct MetricsCollector {
466    tx: watch::Sender<MetricsSnapshot>,
467}
468
469impl MetricsCollector {
470    #[must_use]
471    pub fn new() -> (Self, watch::Receiver<MetricsSnapshot>) {
472        let (tx, rx) = watch::channel(MetricsSnapshot::default());
473        (Self { tx }, rx)
474    }
475
476    pub fn update(&self, f: impl FnOnce(&mut MetricsSnapshot)) {
477        self.tx.send_modify(f);
478    }
479
480    /// Returns a clone of the underlying [`watch::Sender`].
481    ///
482    /// Use this to pass the sender to code that requires a raw
483    /// `watch::Sender<MetricsSnapshot>` while the [`MetricsCollector`] is
484    /// also shared (e.g., passed to a `MetricsBridge` layer).
485    #[must_use]
486    pub fn sender(&self) -> watch::Sender<MetricsSnapshot> {
487        self.tx.clone()
488    }
489}
490
491// ---------------------------------------------------------------------------
492// HistogramRecorder
493// ---------------------------------------------------------------------------
494
495/// Per-event histogram recording contract for the agent loop.
496///
497/// Implementors record individual latency observations into Prometheus histograms
498/// (or any other backend). The trait is object-safe: the agent stores an
499/// `Option<Arc<dyn HistogramRecorder>>` and calls these methods at each measurement
500/// point. When `None`, recording is a no-op with zero overhead.
501///
502/// # Contract for implementors
503///
504/// - All methods must be non-blocking; they must not call async code or acquire
505///   mutexes that may block.
506/// - Implementations must be `Send + Sync` — the agent loop runs on the tokio
507///   thread pool and the recorder may be called from multiple tasks.
508///
509/// # Examples
510///
511/// ```rust
512/// use std::sync::Arc;
513/// use std::time::Duration;
514/// use zeph_core::metrics::HistogramRecorder;
515///
516/// struct NoOpRecorder;
517///
518/// impl HistogramRecorder for NoOpRecorder {
519///     fn observe_llm_latency(&self, _: Duration) {}
520///     fn observe_turn_duration(&self, _: Duration) {}
521///     fn observe_tool_execution(&self, _: Duration) {}
522///     fn observe_bg_task(&self, _: &str, _: Duration) {}
523/// }
524///
525/// let recorder: Arc<dyn HistogramRecorder> = Arc::new(NoOpRecorder);
526/// recorder.observe_llm_latency(Duration::from_millis(500));
527/// ```
528pub trait HistogramRecorder: Send + Sync {
529    /// Record a single LLM API call latency observation.
530    fn observe_llm_latency(&self, duration: std::time::Duration);
531
532    /// Record a full agent turn duration observation (context prep + LLM + tools + persist).
533    fn observe_turn_duration(&self, duration: std::time::Duration);
534
535    /// Record a single tool execution latency observation.
536    fn observe_tool_execution(&self, duration: std::time::Duration);
537
538    /// Record a background task completion latency.
539    ///
540    /// `class_label` is `"enrichment"` or `"telemetry"` (from `TaskClass::name()`).
541    fn observe_bg_task(&self, class_label: &str, duration: std::time::Duration);
542}
543
544#[cfg(test)]
545mod tests {
546    #![allow(clippy::field_reassign_with_default)]
547
548    use super::*;
549
550    #[test]
551    fn default_metrics_snapshot() {
552        let m = MetricsSnapshot::default();
553        assert_eq!(m.total_tokens, 0);
554        assert_eq!(m.api_calls, 0);
555        assert!(m.active_skills.is_empty());
556        assert!(m.active_mcp_tools.is_empty());
557        assert_eq!(m.mcp_tool_count, 0);
558        assert_eq!(m.mcp_server_count, 0);
559        assert!(m.provider_name.is_empty());
560        assert_eq!(m.summaries_count, 0);
561        // Phase 2 fields
562        assert!(m.stt_model.is_none());
563        assert!(m.compaction_model.is_none());
564        assert!(m.provider_temperature.is_none());
565        assert!(m.provider_top_p.is_none());
566        assert!(m.active_channel.is_empty());
567        assert!(m.embedding_model.is_empty());
568        assert!(m.token_budget.is_none());
569        assert!(!m.self_learning_enabled);
570        assert!(!m.semantic_cache_enabled);
571    }
572
573    #[test]
574    fn metrics_collector_update_phase2_fields() {
575        let (collector, rx) = MetricsCollector::new();
576        collector.update(|m| {
577            m.stt_model = Some("whisper-1".into());
578            m.compaction_model = Some("haiku".into());
579            m.provider_temperature = Some(0.7);
580            m.provider_top_p = Some(0.95);
581            m.active_channel = "tui".into();
582            m.embedding_model = "nomic-embed-text".into();
583            m.token_budget = Some(200_000);
584            m.self_learning_enabled = true;
585            m.semantic_cache_enabled = true;
586        });
587        let s = rx.borrow();
588        assert_eq!(s.stt_model.as_deref(), Some("whisper-1"));
589        assert_eq!(s.compaction_model.as_deref(), Some("haiku"));
590        assert_eq!(s.provider_temperature, Some(0.7));
591        assert_eq!(s.provider_top_p, Some(0.95));
592        assert_eq!(s.active_channel, "tui");
593        assert_eq!(s.embedding_model, "nomic-embed-text");
594        assert_eq!(s.token_budget, Some(200_000));
595        assert!(s.self_learning_enabled);
596        assert!(s.semantic_cache_enabled);
597    }
598
599    #[test]
600    fn metrics_collector_update() {
601        let (collector, rx) = MetricsCollector::new();
602        collector.update(|m| {
603            m.api_calls = 5;
604            m.total_tokens = 1000;
605        });
606        let snapshot = rx.borrow().clone();
607        assert_eq!(snapshot.api_calls, 5);
608        assert_eq!(snapshot.total_tokens, 1000);
609    }
610
611    #[test]
612    fn metrics_collector_multiple_updates() {
613        let (collector, rx) = MetricsCollector::new();
614        collector.update(|m| m.api_calls = 1);
615        collector.update(|m| m.api_calls += 1);
616        assert_eq!(rx.borrow().api_calls, 2);
617    }
618
619    #[test]
620    fn metrics_snapshot_clone() {
621        let mut m = MetricsSnapshot::default();
622        m.provider_name = "ollama".into();
623        let cloned = m.clone();
624        assert_eq!(cloned.provider_name, "ollama");
625    }
626
627    #[test]
628    fn filter_metrics_tracking() {
629        let (collector, rx) = MetricsCollector::new();
630        collector.update(|m| {
631            m.filter_raw_tokens += 250;
632            m.filter_saved_tokens += 200;
633            m.filter_applications += 1;
634        });
635        collector.update(|m| {
636            m.filter_raw_tokens += 100;
637            m.filter_saved_tokens += 80;
638            m.filter_applications += 1;
639        });
640        let s = rx.borrow();
641        assert_eq!(s.filter_raw_tokens, 350);
642        assert_eq!(s.filter_saved_tokens, 280);
643        assert_eq!(s.filter_applications, 2);
644    }
645
646    #[test]
647    fn filter_confidence_and_command_metrics() {
648        let (collector, rx) = MetricsCollector::new();
649        collector.update(|m| {
650            m.filter_total_commands += 1;
651            m.filter_filtered_commands += 1;
652            m.filter_confidence_full += 1;
653        });
654        collector.update(|m| {
655            m.filter_total_commands += 1;
656            m.filter_confidence_partial += 1;
657        });
658        let s = rx.borrow();
659        assert_eq!(s.filter_total_commands, 2);
660        assert_eq!(s.filter_filtered_commands, 1);
661        assert_eq!(s.filter_confidence_full, 1);
662        assert_eq!(s.filter_confidence_partial, 1);
663        assert_eq!(s.filter_confidence_fallback, 0);
664    }
665
666    #[test]
667    fn summaries_count_tracks_summarizations() {
668        let (collector, rx) = MetricsCollector::new();
669        collector.update(|m| m.summaries_count += 1);
670        collector.update(|m| m.summaries_count += 1);
671        assert_eq!(rx.borrow().summaries_count, 2);
672    }
673
674    #[test]
675    fn cancellations_counter_increments() {
676        let (collector, rx) = MetricsCollector::new();
677        assert_eq!(rx.borrow().cancellations, 0);
678        collector.update(|m| m.cancellations += 1);
679        collector.update(|m| m.cancellations += 1);
680        assert_eq!(rx.borrow().cancellations, 2);
681    }
682
683    #[test]
684    fn security_event_detail_exact_128_not_truncated() {
685        let s = "a".repeat(128);
686        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s.clone());
687        assert_eq!(ev.detail, s, "128-char string must not be truncated");
688    }
689
690    #[test]
691    fn security_event_detail_129_is_truncated() {
692        let s = "a".repeat(129);
693        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s);
694        assert!(
695            ev.detail.ends_with('…'),
696            "129-char string must end with ellipsis"
697        );
698        assert!(
699            ev.detail.len() <= 130,
700            "truncated detail must be at most 130 bytes"
701        );
702    }
703
704    #[test]
705    fn security_event_detail_multibyte_utf8_no_panic() {
706        // Each '中' is 3 bytes. 43 chars = 129 bytes — triggers truncation at a multi-byte boundary.
707        let s = "中".repeat(43);
708        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, "src", s);
709        assert!(ev.detail.ends_with('…'));
710    }
711
712    #[test]
713    fn security_event_source_capped_at_64_chars() {
714        let long_source = "x".repeat(200);
715        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, long_source, "detail");
716        assert_eq!(ev.source.len(), 64);
717    }
718
719    #[test]
720    fn security_event_source_strips_control_chars() {
721        let source = "tool\x00name\x1b[31m";
722        let ev = SecurityEvent::new(SecurityEventCategory::InjectionFlag, source, "detail");
723        assert!(!ev.source.contains('\x00'));
724        assert!(!ev.source.contains('\x1b'));
725    }
726
727    #[test]
728    fn security_event_category_as_str() {
729        assert_eq!(SecurityEventCategory::InjectionFlag.as_str(), "injection");
730        assert_eq!(SecurityEventCategory::ExfiltrationBlock.as_str(), "exfil");
731        assert_eq!(SecurityEventCategory::Quarantine.as_str(), "quarantine");
732        assert_eq!(SecurityEventCategory::Truncation.as_str(), "truncation");
733        assert_eq!(
734            SecurityEventCategory::CrossBoundaryMcpToAcp.as_str(),
735            "cross_boundary_mcp_to_acp"
736        );
737    }
738
739    #[test]
740    fn ring_buffer_respects_cap_via_update() {
741        let (collector, rx) = MetricsCollector::new();
742        for i in 0..110u64 {
743            let event = SecurityEvent::new(
744                SecurityEventCategory::InjectionFlag,
745                "src",
746                format!("event {i}"),
747            );
748            collector.update(|m| {
749                if m.security_events.len() >= SECURITY_EVENT_CAP {
750                    m.security_events.pop_front();
751                }
752                m.security_events.push_back(event);
753            });
754        }
755        let snap = rx.borrow();
756        assert_eq!(snap.security_events.len(), SECURITY_EVENT_CAP);
757        // FIFO: earliest events evicted, last one present
758        assert!(snap.security_events.back().unwrap().detail.contains("109"));
759    }
760
761    #[test]
762    fn security_events_empty_by_default() {
763        let m = MetricsSnapshot::default();
764        assert!(m.security_events.is_empty());
765    }
766
767    #[test]
768    fn orchestration_metrics_default_zero() {
769        let m = OrchestrationMetrics::default();
770        assert_eq!(m.plans_total, 0);
771        assert_eq!(m.tasks_total, 0);
772        assert_eq!(m.tasks_completed, 0);
773        assert_eq!(m.tasks_failed, 0);
774        assert_eq!(m.tasks_skipped, 0);
775    }
776
777    #[test]
778    fn metrics_snapshot_includes_orchestration_default_zero() {
779        let m = MetricsSnapshot::default();
780        assert_eq!(m.orchestration.plans_total, 0);
781        assert_eq!(m.orchestration.tasks_total, 0);
782        assert_eq!(m.orchestration.tasks_completed, 0);
783    }
784
785    #[test]
786    fn orchestration_metrics_update_via_collector() {
787        let (collector, rx) = MetricsCollector::new();
788        collector.update(|m| {
789            m.orchestration.plans_total += 1;
790            m.orchestration.tasks_total += 5;
791            m.orchestration.tasks_completed += 3;
792            m.orchestration.tasks_failed += 1;
793            m.orchestration.tasks_skipped += 1;
794        });
795        let s = rx.borrow();
796        assert_eq!(s.orchestration.plans_total, 1);
797        assert_eq!(s.orchestration.tasks_total, 5);
798        assert_eq!(s.orchestration.tasks_completed, 3);
799        assert_eq!(s.orchestration.tasks_failed, 1);
800        assert_eq!(s.orchestration.tasks_skipped, 1);
801    }
802
803    #[test]
804    fn strip_ctrl_removes_escape_sequences() {
805        let input = "hello\x1b[31mworld\x00end";
806        let result = strip_ctrl(input);
807        assert_eq!(result, "helloworldend");
808    }
809
810    #[test]
811    fn strip_ctrl_allows_tab_lf_cr() {
812        let input = "a\tb\nc\rd";
813        let result = strip_ctrl(input);
814        assert_eq!(result, "a\tb\nc\rd");
815    }
816
817    #[test]
818    fn task_graph_snapshot_is_stale_after_30s() {
819        let mut snap = TaskGraphSnapshot::default();
820        // Not stale if no completed_at.
821        assert!(!snap.is_stale());
822        // Not stale if just completed.
823        snap.completed_at = Some(std::time::Instant::now());
824        assert!(!snap.is_stale());
825        // Stale if completed more than 30s ago.
826        snap.completed_at = Some(
827            std::time::Instant::now()
828                .checked_sub(std::time::Duration::from_secs(31))
829                .unwrap(),
830        );
831        assert!(snap.is_stale());
832    }
833
834    // T1: From<&TaskGraph> correctly maps fields including duration_ms and error truncation.
835    #[test]
836    fn task_graph_snapshot_from_task_graph_maps_fields() {
837        use zeph_orchestration::{GraphStatus, TaskGraph, TaskNode, TaskResult, TaskStatus};
838
839        let mut graph = TaskGraph::new("My goal");
840        let mut task = TaskNode::new(0, "Do work", "description");
841        task.status = TaskStatus::Failed;
842        task.assigned_agent = Some("agent-1".into());
843        task.result = Some(TaskResult {
844            output: "error occurred here".into(),
845            artifacts: vec![],
846            duration_ms: 1234,
847            agent_id: None,
848            agent_def: None,
849        });
850        graph.tasks.push(task);
851        graph.status = GraphStatus::Failed;
852
853        let snap = TaskGraphSnapshot::from(&graph);
854        assert_eq!(snap.goal, "My goal");
855        assert_eq!(snap.status, "failed");
856        assert_eq!(snap.tasks.len(), 1);
857        let row = &snap.tasks[0];
858        assert_eq!(row.title, "Do work");
859        assert_eq!(row.status, "failed");
860        assert_eq!(row.agent.as_deref(), Some("agent-1"));
861        assert_eq!(row.duration_ms, 1234);
862        assert!(row.error.as_deref().unwrap().contains("error occurred"));
863    }
864
865    // T2: From impl compiles with orchestration feature active.
866    #[test]
867    fn task_graph_snapshot_from_compiles_with_feature() {
868        use zeph_orchestration::TaskGraph;
869        let graph = TaskGraph::new("feature flag test");
870        let snap = TaskGraphSnapshot::from(&graph);
871        assert_eq!(snap.goal, "feature flag test");
872        assert!(snap.tasks.is_empty());
873        assert!(!snap.is_stale());
874    }
875
876    // T1-extra: long error is truncated with ellipsis.
877    #[test]
878    fn task_graph_snapshot_error_truncated_at_80_chars() {
879        use zeph_orchestration::{TaskGraph, TaskNode, TaskResult, TaskStatus};
880
881        let mut graph = TaskGraph::new("goal");
882        let mut task = TaskNode::new(0, "t", "d");
883        task.status = TaskStatus::Failed;
884        task.result = Some(TaskResult {
885            output: "e".repeat(100),
886            artifacts: vec![],
887            duration_ms: 0,
888            agent_id: None,
889            agent_def: None,
890        });
891        graph.tasks.push(task);
892
893        let snap = TaskGraphSnapshot::from(&graph);
894        let err = snap.tasks[0].error.as_ref().unwrap();
895        assert!(err.ends_with('…'), "truncated error must end with ellipsis");
896        assert!(
897            err.len() <= 83,
898            "truncated error must not exceed 80 chars + ellipsis"
899        );
900    }
901
902    // SEC-P6-01: control chars in task title are stripped.
903    #[test]
904    fn task_graph_snapshot_strips_control_chars_from_title() {
905        use zeph_orchestration::{TaskGraph, TaskNode};
906
907        let mut graph = TaskGraph::new("goal\x1b[31m");
908        let task = TaskNode::new(0, "title\x00injected", "d");
909        graph.tasks.push(task);
910
911        let snap = TaskGraphSnapshot::from(&graph);
912        assert!(!snap.goal.contains('\x1b'), "goal must not contain escape");
913        assert!(
914            !snap.tasks[0].title.contains('\x00'),
915            "title must not contain null byte"
916        );
917    }
918
919    #[test]
920    fn graph_metrics_default_zero() {
921        let m = MetricsSnapshot::default();
922        assert_eq!(m.graph_entities_total, 0);
923        assert_eq!(m.graph_edges_total, 0);
924        assert_eq!(m.graph_communities_total, 0);
925        assert_eq!(m.graph_extraction_count, 0);
926        assert_eq!(m.graph_extraction_failures, 0);
927    }
928
929    #[test]
930    fn graph_metrics_update_via_collector() {
931        let (collector, rx) = MetricsCollector::new();
932        collector.update(|m| {
933            m.graph_entities_total = 5;
934            m.graph_edges_total = 10;
935            m.graph_communities_total = 2;
936            m.graph_extraction_count = 7;
937            m.graph_extraction_failures = 1;
938        });
939        let snapshot = rx.borrow().clone();
940        assert_eq!(snapshot.graph_entities_total, 5);
941        assert_eq!(snapshot.graph_edges_total, 10);
942        assert_eq!(snapshot.graph_communities_total, 2);
943        assert_eq!(snapshot.graph_extraction_count, 7);
944        assert_eq!(snapshot.graph_extraction_failures, 1);
945    }
946
947    #[test]
948    fn histogram_recorder_trait_is_object_safe() {
949        use std::sync::Arc;
950        use std::time::Duration;
951
952        struct NoOpRecorder;
953        impl HistogramRecorder for NoOpRecorder {
954            fn observe_llm_latency(&self, _: Duration) {}
955            fn observe_turn_duration(&self, _: Duration) {}
956            fn observe_tool_execution(&self, _: Duration) {}
957            fn observe_bg_task(&self, _: &str, _: Duration) {}
958        }
959
960        // Verify the trait can be used as a trait object (object-safe).
961        let recorder: Arc<dyn HistogramRecorder> = Arc::new(NoOpRecorder);
962        recorder.observe_llm_latency(Duration::from_millis(500));
963        recorder.observe_turn_duration(Duration::from_millis(3000));
964        recorder.observe_tool_execution(Duration::from_millis(100));
965    }
966}
zeph_core/metrics.rs

zeph_core/
metrics.rs