Skip to main content

zeph_core/
notifications.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Best-effort per-turn completion notifier.
5//!
6//! Fires after each agent turn completes via two independent channels:
7//! - **macOS native** — `osascript` banner via stdin (no argument-injection risk)
8//! - **ntfy webhook** — JSON POST to an ntfy-compatible endpoint
9//!
10//! All notifications are fire-and-forget: failures are logged at `warn` level and
11//! never propagated to the caller. Secrets are redacted before any payload leaves
12//! the process.
13//!
14//! # Gating
15//!
16//! [`Notifier::should_fire`] applies all gate conditions in order:
17//! 1. Master `enabled` switch must be `true`
18//! 2. `llm_requests == 0` → skip (slash commands, cache-only, security-blocked turns)
19//! 3. `only_on_error && !is_error` → skip
20//! 4. Duration gate (`min_turn_duration_ms`) applies only to successful turns;
21//!    error turns always fire regardless of duration
22//!
23//! # Examples
24//!
25//! ```no_run
26//! use zeph_core::notifications::{Notifier, TurnSummary, TurnExitStatus};
27//! use zeph_config::NotificationsConfig;
28//!
29//! let cfg = NotificationsConfig {
30//!     enabled: true,
31//!     macos_native: true,
32//!     ..Default::default()
33//! };
34//! let notifier = Notifier::new(cfg);
35//! let summary = TurnSummary {
36//!     duration_ms: 5000,
37//!     preview: "Done. Files updated.".to_owned(),
38//!     tool_calls: 2,
39//!     llm_requests: 1,
40//!     exit_status: TurnExitStatus::Success,
41//! };
42//! // Fire and forget — errors are logged, never propagated.
43//! notifier.fire(&summary);
44//! ```
45
46use std::time::Duration;
47
48use serde::Serialize;
49use tracing::warn;
50use zeph_config::NotificationsConfig;
51
52use crate::redact::scrub_content;
53
54// ── Public types ─────────────────────────────────────────────────────────────
55
56#[non_exhaustive]
57/// Whether a turn completed successfully or with an error.
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum TurnExitStatus {
60    /// Turn completed without error.
61    Success,
62    /// Turn completed with an error (tool failure, LLM error, etc.).
63    Error,
64}
65
66/// Lightweight summary of a completed agent turn used as notification input.
67///
68/// Built by the agent loop after `channel.flush_chunks()` and passed to
69/// [`Notifier::fire`]. Contains only what is needed for gate decisions and
70/// notification body assembly — no LLM payloads or raw tool outputs.
71#[derive(Debug, Clone)]
72pub struct TurnSummary {
73    /// Total wall-clock duration of the turn in milliseconds.
74    pub duration_ms: u64,
75    /// First ≤ 160 chars of the assistant response, already redacted by the caller.
76    pub preview: String,
77    /// Number of tool calls dispatched this turn.
78    pub tool_calls: u32,
79    /// Number of completed LLM round-trips this turn.
80    /// Zero for slash commands, cache-only turns, and security-blocked inputs.
81    pub llm_requests: u32,
82    /// Whether the turn ended with an error.
83    pub exit_status: TurnExitStatus,
84}
85
86/// Per-turn completion notifier.
87///
88/// Holds a shared [`reqwest::Client`] and the resolved config. Construct once at
89/// agent startup via [`Notifier::new`] and call [`Notifier::fire`] after each turn.
90///
91/// All I/O is spawned onto the Tokio runtime via `tokio::spawn`; `fire` returns
92/// immediately without blocking the agent loop.
93///
94/// Cloning is cheap — `reqwest::Client` is an `Arc`-backed handle.
95#[derive(Clone)]
96pub struct Notifier {
97    cfg: NotificationsConfig,
98    http: reqwest::Client,
99}
100
101impl Notifier {
102    /// Create a notifier from a [`NotificationsConfig`].
103    ///
104    /// Constructs a shared HTTP client with a 5-second connect timeout. The client
105    /// is reused across all webhook calls for the agent session.
106    ///
107    /// If `webhook_url` is set but fails URL validation (unparseable or non-HTTP(S)
108    /// scheme), it is cleared to `None` and a warning is logged. This prevents SSRF
109    /// via malformed URLs (e.g. `file://`, `ftp://`).
110    #[must_use]
111    pub fn new(cfg: NotificationsConfig) -> Self {
112        let http = reqwest::Client::builder()
113            .connect_timeout(Duration::from_secs(5))
114            .timeout(Duration::from_secs(5))
115            .build()
116            .unwrap_or_default();
117        let mut cfg = cfg;
118        if cfg
119            .webhook_url
120            .as_deref()
121            .is_some_and(|url| !validate_webhook_url(url, cfg.webhook_allow_insecure))
122        {
123            cfg.webhook_url = None;
124        }
125        Self { cfg, http }
126    }
127
128    /// Evaluate all gate conditions and return `true` when the notification should fire.
129    ///
130    /// Gates applied in order (all must pass):
131    /// 1. `enabled` is `true`
132    /// 2. `summary.llm_requests > 0` (zero-LLM turns are never notified)
133    /// 3. If `only_on_error`: turn must have errored
134    /// 4. For successful turns: `duration_ms >= min_turn_duration_ms`
135    ///    (error turns bypass the duration gate)
136    #[must_use]
137    pub fn should_fire(&self, summary: &TurnSummary) -> bool {
138        if !self.cfg.enabled {
139            return false;
140        }
141        // Gate S6: never notify for zero-LLM turns (slash commands, cache hits, etc.)
142        // Exception M8 from critic: allow zero-LLM errors through so setup failures surface.
143        if summary.llm_requests == 0 && summary.exit_status == TurnExitStatus::Success {
144            return false;
145        }
146        match summary.exit_status {
147            // Gate S4: errors always fire, bypassing the duration gate.
148            TurnExitStatus::Error => true,
149            TurnExitStatus::Success => {
150                if self.cfg.only_on_error {
151                    return false;
152                }
153                // Duration gate applies only to successful turns.
154                summary.duration_ms >= self.cfg.min_turn_duration_ms
155            }
156        }
157    }
158
159    /// Fire all enabled notification channels for this turn summary.
160    ///
161    /// Returns immediately — all I/O is spawned as a background task. Failures
162    /// are logged at `warn` level and never propagated. The spawned task has an
163    /// internal 5-second per-channel timeout.
164    pub fn fire(&self, summary: &TurnSummary) {
165        let cfg = self.cfg.clone();
166        let http = self.http.clone();
167        let summary = summary.clone();
168
169        tokio::spawn(async move {
170            fire_all_channels(&cfg, &http, &summary).await;
171        });
172    }
173
174    /// Fire a test notification with a fixed message.
175    ///
176    /// Used by the `zeph notify test` CLI subcommand. Returns an error if all
177    /// channels are disabled or if every channel failed.
178    ///
179    /// # Errors
180    ///
181    /// - `NotifyTestError::AllDisabled` — no channel is enabled
182    /// - `NotifyTestError::MacOsFailed` — macOS notification failed (macOS only)
183    /// - `NotifyTestError::WebhookFailed` — webhook POST failed
184    pub async fn fire_test(&self) -> Result<(), NotifyTestError> {
185        if !self.cfg.enabled {
186            return Err(NotifyTestError::MasterSwitchDisabled);
187        }
188
189        let macos_enabled = self.cfg.macos_native;
190        let webhook_enabled = self.cfg.webhook_url.is_some() && self.cfg.webhook_topic.is_some();
191
192        if !macos_enabled && !webhook_enabled {
193            return Err(NotifyTestError::AllDisabled);
194        }
195
196        let summary = TurnSummary {
197            duration_ms: 0,
198            preview: "Zeph is working".to_owned(),
199            tool_calls: 0,
200            llm_requests: 1,
201            exit_status: TurnExitStatus::Success,
202        };
203
204        #[cfg(target_os = "macos")]
205        if macos_enabled {
206            fire_macos_native(&self.cfg.title, "Zeph is working")
207                .await
208                .map_err(|e| NotifyTestError::MacOsFailed(e.to_string()))?;
209        }
210
211        if let (Some(url), Some(topic)) = (&self.cfg.webhook_url, &self.cfg.webhook_topic) {
212            fire_webhook(&self.http, url, &self.cfg.title, topic, &summary)
213                .await
214                .map_err(|e| NotifyTestError::WebhookFailed(e.to_string()))?;
215        }
216
217        Ok(())
218    }
219}
220
221#[non_exhaustive]
222/// Error returned by [`Notifier::fire_test`].
223#[derive(Debug, thiserror::Error)]
224pub enum NotifyTestError {
225    /// The master `notifications.enabled` switch is `false`.
226    #[error("notifications are disabled (set notifications.enabled = true to enable)")]
227    MasterSwitchDisabled,
228    /// No channels are enabled in the current configuration.
229    #[error("all notification channels are disabled")]
230    AllDisabled,
231    /// macOS notification failed.
232    #[error("macOS notification failed: {0}")]
233    MacOsFailed(String),
234    /// Webhook POST failed.
235    #[error("webhook notification failed: {0}")]
236    WebhookFailed(String),
237}
238
239// ── Internal helpers ──────────────────────────────────────────────────────────
240
241/// Fire all enabled channels for `summary`. Called from a spawned task.
242async fn fire_all_channels(
243    cfg: &NotificationsConfig,
244    http: &reqwest::Client,
245    summary: &TurnSummary,
246) {
247    let title = &cfg.title;
248
249    #[cfg(target_os = "macos")]
250    {
251        let message = build_notification_message(summary);
252        if cfg.macos_native
253            && let Err(e) = fire_macos_native(title, &message).await
254        {
255            warn!(error = %e, "macOS notification failed");
256        }
257    }
258
259    if let (Some(url), Some(topic)) = (&cfg.webhook_url, &cfg.webhook_topic)
260        && let Err(e) = fire_webhook(http, url, title, topic, summary).await
261    {
262        warn!(error = %e, "webhook notification failed");
263    }
264}
265
266/// Build the notification body from a turn summary, applying secret redaction.
267fn build_notification_message(summary: &TurnSummary) -> String {
268    let status = if summary.exit_status == TurnExitStatus::Error {
269        "Error"
270    } else {
271        "Done"
272    };
273
274    // Apply scrub_content to redact any secrets that may be in the preview.
275    let safe_preview = scrub_content(&summary.preview);
276
277    if safe_preview.is_empty() {
278        format!("{status} — {dur}ms", dur = summary.duration_ms)
279    } else {
280        format!(
281            "{status} — {dur}ms\n{preview}",
282            dur = summary.duration_ms,
283            preview = safe_preview,
284        )
285    }
286}
287
288/// Sanitize a string for safe inclusion inside an `AppleScript` `"..."` literal.
289///
290/// Steps applied in order (order is important):
291/// 1. Replace all ASCII control characters (< 0x20) and Unicode control chars with space
292///    (tab `\t` is also replaced — single-line banners only)
293/// 2. Replace newlines `\n` and carriage returns `\r` with a single space
294/// 3. Truncate to `max` chars, appending `…` when cut
295/// 4. Strip `\` and `"` — `AppleScript` does not support backslash escaping inside strings,
296///    so these characters must be removed to prevent injection
297///
298/// # Examples
299///
300/// ```
301/// # use zeph_core::notifications::sanitize_applescript_payload;
302/// let s = sanitize_applescript_payload("Hello\nWorld\"", 200);
303/// assert_eq!(s, "Hello World");
304/// ```
305#[must_use]
306pub fn sanitize_applescript_payload(s: &str, max: usize) -> String {
307    // Step 1 + 2: normalise control characters and Unicode line/paragraph separators.
308    // U+2028 (LINE SEPARATOR) and U+2029 (PARAGRAPH SEPARATOR) are not in the Unicode Cc
309    // category but still break AppleScript string literals, so they are explicitly replaced.
310    let cleaned: String = s
311        .chars()
312        .map(|c| {
313            if c.is_control() || c == '\u{2028}' || c == '\u{2029}' {
314                ' '
315            } else {
316                c
317            }
318        })
319        .collect();
320    // Step 3: truncate to `max` char count (not bytes).
321    let char_count = cleaned.chars().count();
322    let truncated: String = if char_count > max {
323        let end = cleaned
324            .char_indices()
325            .nth(max)
326            .map_or(cleaned.len(), |(i, _)| i);
327        let mut t = cleaned[..end].to_owned();
328        t.push('…');
329        t
330    } else {
331        cleaned
332    };
333    // Step 4: strip characters that cannot be safely embedded in an AppleScript string literal.
334    // AppleScript does not use backslash escaping inside strings — the only safe approach
335    // is to remove double-quote and backslash characters entirely.
336    truncated.replace(['\\', '"'], "")
337}
338
339/// Validate a webhook URL for safe use as a notification endpoint.
340///
341/// Returns `true` when the URL is acceptable. Logs a warning and returns `false`
342/// when the URL is unparseable or uses a non-HTTP(S) scheme. Accepts `http://`
343/// only when `allow_insecure` is `true` (opt-in for local testing).
344fn validate_webhook_url(url: &str, allow_insecure: bool) -> bool {
345    match url.parse::<reqwest::Url>() {
346        Ok(parsed) => {
347            if parsed.scheme() == "https" {
348                return true;
349            }
350            if allow_insecure && parsed.scheme() == "http" {
351                warn!(
352                    "webhook_url uses insecure HTTP scheme; set webhook_allow_insecure=false for production"
353                );
354                return true;
355            }
356            warn!(
357                scheme = parsed.scheme(),
358                "webhook_url has non-HTTP(S) scheme — channel disabled"
359            );
360            false
361        }
362        Err(e) => {
363            warn!(error = %e, "webhook_url is not a valid URL — channel disabled");
364            false
365        }
366    }
367}
368
369/// Fire a macOS Notification Center banner via osascript (stdin-fed to avoid arg injection).
370#[cfg(target_os = "macos")]
371async fn fire_macos_native(
372    title: &str,
373    message: &str,
374) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
375    use tokio::io::AsyncWriteExt as _;
376    use tokio::process::Command;
377
378    let safe_title = sanitize_applescript_payload(title, 120);
379    let safe_message = sanitize_applescript_payload(message, 240);
380
381    let script = format!(r#"display notification "{safe_message}" with title "{safe_title}""#);
382
383    let mut child = Command::new("osascript")
384        .stdin(std::process::Stdio::piped())
385        .stdout(std::process::Stdio::null())
386        .stderr(std::process::Stdio::null())
387        .spawn()?;
388
389    if let Some(mut stdin) = child.stdin.take() {
390        stdin.write_all(script.as_bytes()).await?;
391        stdin.shutdown().await?;
392    }
393
394    // Wait up to 5s for osascript to complete; ignore exit status (best-effort).
395    let _ = tokio::time::timeout(Duration::from_secs(5), child.wait()).await;
396
397    Ok(())
398}
399
400/// ntfy-compatible JSON webhook body.
401///
402/// Matches the [ntfy publish-as-JSON](https://docs.ntfy.sh/publish/#publish-as-json) schema.
403#[derive(Serialize)]
404struct NtfyWebhookBody<'a> {
405    topic: &'a str,
406    title: &'a str,
407    message: &'a str,
408    tags: Vec<&'a str>,
409    /// Priority 1–5. Default 3; error turns use 4.
410    priority: u8,
411}
412
413/// POST a notification to an ntfy-compatible JSON endpoint.
414async fn fire_webhook(
415    client: &reqwest::Client,
416    url: &str,
417    title: &str,
418    topic: &str,
419    summary: &TurnSummary,
420) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
421    let message = build_notification_message(summary);
422    let (tags, priority) = if summary.exit_status == TurnExitStatus::Error {
423        (vec!["zeph", "error"], 4u8)
424    } else {
425        (vec!["zeph", "turn-complete"], 3u8)
426    };
427
428    let body = NtfyWebhookBody {
429        topic,
430        title,
431        message: &message,
432        tags,
433        priority,
434    };
435
436    // Timeout is already set on the client (5s), but wrap for clarity.
437    tokio::time::timeout(Duration::from_secs(5), client.post(url).json(&body).send())
438        .await??
439        .error_for_status()?;
440
441    Ok(())
442}
443
444// ── Tests ─────────────────────────────────────────────────────────────────────
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use zeph_config::NotificationsConfig;
450
451    fn make_notifier(cfg: NotificationsConfig) -> Notifier {
452        Notifier::new(cfg)
453    }
454
455    fn success_summary(duration_ms: u64, llm_requests: u32) -> TurnSummary {
456        TurnSummary {
457            duration_ms,
458            preview: "All done.".to_owned(),
459            tool_calls: 0,
460            llm_requests,
461            exit_status: TurnExitStatus::Success,
462        }
463    }
464
465    fn error_summary(duration_ms: u64, llm_requests: u32) -> TurnSummary {
466        TurnSummary {
467            duration_ms,
468            preview: "Error occurred.".to_owned(),
469            tool_calls: 0,
470            llm_requests,
471            exit_status: TurnExitStatus::Error,
472        }
473    }
474
475    // ── should_fire gate tests ────────────────────────────────────────────────
476
477    #[test]
478    fn should_fire_disabled_master_switch() {
479        let n = make_notifier(NotificationsConfig {
480            enabled: false,
481            ..Default::default()
482        });
483        assert!(!n.should_fire(&success_summary(5000, 1)));
484    }
485
486    #[test]
487    fn should_fire_zero_llm_success_skipped() {
488        let n = make_notifier(NotificationsConfig {
489            enabled: true,
490            ..Default::default()
491        });
492        // Zero-LLM successful turns (slash commands, cache hits) are never notified.
493        assert!(!n.should_fire(&success_summary(0, 0)));
494    }
495
496    #[test]
497    fn should_fire_zero_llm_error_fires() {
498        // Critic M8: zero-LLM errors (setup failures) should still fire.
499        let n = make_notifier(NotificationsConfig {
500            enabled: true,
501            ..Default::default()
502        });
503        assert!(n.should_fire(&error_summary(0, 0)));
504    }
505
506    #[test]
507    fn should_fire_only_on_error_skips_success() {
508        let n = make_notifier(NotificationsConfig {
509            enabled: true,
510            only_on_error: true,
511            ..Default::default()
512        });
513        assert!(!n.should_fire(&success_summary(5000, 1)));
514    }
515
516    #[test]
517    fn should_fire_only_on_error_fires_on_error() {
518        let n = make_notifier(NotificationsConfig {
519            enabled: true,
520            only_on_error: true,
521            ..Default::default()
522        });
523        assert!(n.should_fire(&error_summary(100, 1)));
524    }
525
526    #[test]
527    fn should_fire_duration_gate_success_below_threshold() {
528        let n = make_notifier(NotificationsConfig {
529            enabled: true,
530            min_turn_duration_ms: 3000,
531            ..Default::default()
532        });
533        assert!(!n.should_fire(&success_summary(2999, 1)));
534    }
535
536    #[test]
537    fn should_fire_duration_gate_success_at_threshold() {
538        let n = make_notifier(NotificationsConfig {
539            enabled: true,
540            min_turn_duration_ms: 3000,
541            ..Default::default()
542        });
543        assert!(n.should_fire(&success_summary(3000, 1)));
544    }
545
546    #[test]
547    fn should_fire_error_bypasses_duration_gate() {
548        // Gate S4: errors always fire even when below min_turn_duration_ms.
549        let n = make_notifier(NotificationsConfig {
550            enabled: true,
551            min_turn_duration_ms: 3000,
552            ..Default::default()
553        });
554        assert!(n.should_fire(&error_summary(100, 1)));
555    }
556
557    // ── sanitize_applescript_payload tests ────────────────────────────────────
558
559    #[test]
560    fn sanitize_control_chars_replaced_with_space() {
561        let result = sanitize_applescript_payload("Hello\nWorld", 200);
562        // Newline becomes space, no quotes broken
563        assert!(!result.contains('\n'));
564        assert!(result.contains("Hello World"));
565    }
566
567    #[test]
568    fn sanitize_quotes_stripped() {
569        // AppleScript has no backslash escape — double quotes are stripped entirely.
570        let result = sanitize_applescript_payload(r#"say "hi""#, 200);
571        assert!(!result.contains('"'));
572        assert_eq!(result, "say hi");
573    }
574
575    #[test]
576    fn sanitize_backslash_stripped() {
577        // AppleScript has no backslash escape — backslashes are stripped entirely.
578        let result = sanitize_applescript_payload(r"C:\Users\foo", 200);
579        assert_eq!(result, "C:Usersfoo");
580    }
581
582    #[test]
583    fn sanitize_truncation_appends_ellipsis() {
584        let long = "a".repeat(300);
585        let result = sanitize_applescript_payload(&long, 200);
586        assert!(result.ends_with('…'));
587        // Char count should be max + 1 for the ellipsis.
588        assert_eq!(result.chars().count(), 201);
589    }
590
591    #[test]
592    fn sanitize_no_truncation_when_short() {
593        let result = sanitize_applescript_payload("short", 200);
594        assert_eq!(result, "short");
595    }
596
597    #[test]
598    fn sanitize_injection_attempt() {
599        // Classic AppleScript injection via closing the string and calling display dialog.
600        let payload = r#""; display dialog "gotcha"; ""#;
601        let result = sanitize_applescript_payload(payload, 200);
602        // All `"` must be escaped; the script cannot terminate the outer string.
603        assert!(!result.contains('"'));
604    }
605
606    #[test]
607    fn sanitize_applescript_payload_empty() {
608        assert_eq!(sanitize_applescript_payload("", 200), "");
609    }
610
611    #[test]
612    fn sanitize_tab_replaced() {
613        let result = sanitize_applescript_payload("a\tb", 200);
614        assert_eq!(result, "a b");
615    }
616
617    #[test]
618    fn sanitize_line_separators() {
619        let s = "hello\u{2028}world\u{2029}end";
620        let result = sanitize_applescript_payload(s, 200);
621        assert!(!result.contains('\u{2028}'));
622        assert!(!result.contains('\u{2029}'));
623        assert_eq!(result, "hello world end");
624    }
625
626    // ── build_notification_message tests ──────────────────────────────────────
627
628    #[test]
629    fn notification_message_success() {
630        let summary = success_summary(1234, 1);
631        let msg = build_notification_message(&summary);
632        assert!(msg.starts_with("Done"));
633        assert!(msg.contains("1234ms"));
634    }
635
636    #[test]
637    fn notification_message_error() {
638        let summary = error_summary(500, 1);
639        let msg = build_notification_message(&summary);
640        assert!(msg.starts_with("Error"));
641    }
642
643    #[test]
644    fn notification_message_redacts_secrets() {
645        let summary = TurnSummary {
646            duration_ms: 100,
647            preview: "Done. Key: sk-abc123xyz".to_owned(),
648            tool_calls: 0,
649            llm_requests: 1,
650            exit_status: TurnExitStatus::Success,
651        };
652        let msg = build_notification_message(&summary);
653        assert!(!msg.contains("sk-abc123xyz"), "secret must be redacted");
654        assert!(
655            msg.contains("[REDACTED]"),
656            "should contain redaction marker"
657        );
658    }
659}