aardvark_core/
host.rs

1//! Host-facing helpers for consuming sandbox diagnostics.
2
3use crate::outcome::{
4    Diagnostics, FilesystemViolation, NetworkDeniedHost, NetworkHostContact, ResetMode,
5    ResetSummary,
6};
7use crate::persistent::PoolStats;
8
9/// Aggregated telemetry derived from [`Diagnostics`] for host integrations.
10#[derive(Clone, Debug, Default)]
11pub struct SandboxTelemetry {
12    /// CPU milliseconds consumed by the guest (thread time).
13    pub cpu_ms_used: Option<u64>,
14    /// Milliseconds spent waiting for an isolate when coming from a pool.
15    pub queue_wait_ms: Option<u64>,
16    /// Milliseconds spent preparing the runtime before invocation.
17    pub prepare_ms: Option<u64>,
18    /// Milliseconds spent cleaning up after invocation.
19    pub cleanup_ms: Option<u64>,
20    /// Filesystem activity summary (bytes written + violations).
21    pub filesystem: FilesystemTelemetry,
22    /// Network allow/deny observations.
23    pub network: NetworkTelemetry,
24    /// Reset behaviour captured before the call.
25    pub reset: Option<ResetTelemetry>,
26    /// RSS and heap usage snapshots collected around the call.
27    pub memory: MemoryTelemetry,
28}
29
30/// Filesystem usage and violation details.
31#[derive(Clone, Debug, Default)]
32pub struct FilesystemTelemetry {
33    /// Bytes written to the writable sandbox during the invocation.
34    pub bytes_written: Option<u64>,
35    /// Policy violations observed while enforcing filesystem rules.
36    pub violations: Vec<FilesystemViolation>,
37}
38
39/// Network allow/deny observations captured during execution.
40#[derive(Clone, Debug, Default)]
41pub struct NetworkTelemetry {
42    /// Hosts successfully contacted under the allowlist.
43    pub allowed: Vec<NetworkHostContact>,
44    /// Hosts that were blocked by policy enforcement.
45    pub blocked: Vec<NetworkDeniedHost>,
46}
47
48/// Memory usage snapshots captured during execution.
49#[derive(Clone, Debug, Default)]
50pub struct MemoryTelemetry {
51    /// Python heap usage reported by the guest during the call.
52    pub py_heap_kib: Option<u64>,
53    /// RSS of the host process before invocation.
54    pub rss_kib_before: Option<u64>,
55    /// RSS of the host process after invocation.
56    pub rss_kib_after: Option<u64>,
57}
58
59/// Reset data captured prior to invocation.
60#[derive(Clone, Debug)]
61pub struct ResetTelemetry {
62    /// Reset mode applied before invocation (in-place vs full recreate).
63    pub mode: ResetMode,
64    /// Duration of the reset in milliseconds.
65    pub duration_ms: u64,
66    /// Engine generation identifier after the reset.
67    pub engine_generation: u64,
68}
69
70/// Aggregated pool-level telemetry derived from [`PoolStats`].
71#[derive(Clone, Debug, Default)]
72pub struct PoolTelemetry {
73    /// Total isolates managed by the pool.
74    pub total_isolates: usize,
75    /// Number of idle isolates ready to accept work.
76    pub idle_isolates: usize,
77    /// Number of isolates currently executing handlers.
78    pub busy_isolates: usize,
79    /// Calls waiting in the pool queue.
80    pub waiting_calls: usize,
81    /// Total invocation count served since startup.
82    pub invocations: u64,
83    /// Average queue wait in milliseconds (rolling window).
84    pub average_queue_wait_ms: f64,
85    /// 50th percentile queue wait in milliseconds (if computed).
86    pub queue_wait_p50_ms: Option<f64>,
87    /// 95th percentile queue wait in milliseconds (if computed).
88    pub queue_wait_p95_ms: Option<f64>,
89    /// Total number of times isolates were quarantined.
90    pub quarantine_events: u64,
91    /// Number of quarantines triggered by heap guard rails.
92    pub quarantine_heap_hits: u64,
93    /// Number of quarantines triggered by RSS guard rails.
94    pub quarantine_rss_hits: u64,
95    /// Occurrences of deliberate pool scale-downs.
96    pub scaledown_events: u64,
97}
98
99impl From<&Diagnostics> for SandboxTelemetry {
100    fn from(value: &Diagnostics) -> Self {
101        Self {
102            cpu_ms_used: value.cpu_ms_used,
103            queue_wait_ms: value.queue_wait_ms,
104            prepare_ms: value.prepare_ms,
105            cleanup_ms: value.cleanup_ms,
106            filesystem: FilesystemTelemetry {
107                bytes_written: value.filesystem_bytes_written,
108                violations: value.filesystem_violations.clone(),
109            },
110            network: NetworkTelemetry {
111                allowed: value.network_hosts_contacted.clone(),
112                blocked: value.network_hosts_blocked.clone(),
113            },
114            reset: value.reset.as_ref().map(ResetTelemetry::from),
115            memory: MemoryTelemetry {
116                py_heap_kib: value.py_heap_kib,
117                rss_kib_before: value.rss_kib_before,
118                rss_kib_after: value.rss_kib_after,
119            },
120        }
121    }
122}
123
124impl From<&ResetSummary> for ResetTelemetry {
125    fn from(summary: &ResetSummary) -> Self {
126        Self {
127            mode: summary.mode.clone(),
128            duration_ms: summary.duration_ms,
129            engine_generation: summary.engine_generation,
130        }
131    }
132}
133
134impl SandboxTelemetry {
135    /// Returns `true` when any sandbox policy blocked the invocation.
136    pub fn has_policy_violations(&self) -> bool {
137        !self.network.blocked.is_empty() || !self.filesystem.violations.is_empty()
138    }
139}
140
141impl From<&PoolStats> for PoolTelemetry {
142    fn from(stats: &PoolStats) -> Self {
143        Self {
144            total_isolates: stats.total,
145            idle_isolates: stats.idle,
146            busy_isolates: stats.busy,
147            waiting_calls: stats.waiting,
148            invocations: stats.invocations,
149            average_queue_wait_ms: stats.average_queue_wait_ms,
150            queue_wait_p50_ms: stats.queue_wait_p50_ms,
151            queue_wait_p95_ms: stats.queue_wait_p95_ms,
152            quarantine_events: stats.quarantine_events,
153            quarantine_heap_hits: stats.quarantine_heap_hits,
154            quarantine_rss_hits: stats.quarantine_rss_hits,
155            scaledown_events: stats.scaledown_events,
156        }
157    }
158}