Skip to main content

zeph_core/
notifications.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Best-effort per-turn completion notifier.
5//!
6//! Fires after each agent turn completes via two independent channels:
7//! - **macOS native** — `osascript` banner via stdin (no argument-injection risk)
8//! - **ntfy webhook** — JSON POST to an ntfy-compatible endpoint
9//!
10//! All notifications are fire-and-forget: failures are logged at `warn` level and
11//! never propagated to the caller. Secrets are redacted before any payload leaves
12//! the process.
13//!
14//! # Gating
15//!
16//! [`Notifier::should_fire`] applies all gate conditions in order:
17//! 1. Master `enabled` switch must be `true`
18//! 2. `llm_requests == 0` → skip (slash commands, cache-only, security-blocked turns)
19//! 3. `only_on_error && !is_error` → skip
20//! 4. Duration gate (`min_turn_duration_ms`) applies only to successful turns;
21//!    error turns always fire regardless of duration
22//!
23//! # Examples
24//!
25//! ```no_run
26//! use zeph_core::notifications::{Notifier, TurnSummary, TurnExitStatus};
27//! use zeph_config::NotificationsConfig;
28//!
29//! let cfg = NotificationsConfig {
30//!     enabled: true,
31//!     macos_native: true,
32//!     ..Default::default()
33//! };
34//! let notifier = Notifier::new(cfg);
35//! let summary = TurnSummary {
36//!     duration_ms: 5000,
37//!     preview: "Done. Files updated.".to_owned(),
38//!     tool_calls: 2,
39//!     llm_requests: 1,
40//!     exit_status: TurnExitStatus::Success,
41//! };
42//! // Fire and forget — errors are logged, never propagated.
43//! notifier.fire(&summary);
44//! ```
45
46use std::time::Duration;
47
48use serde::Serialize;
49use tracing::warn;
50use zeph_config::NotificationsConfig;
51
52use crate::redact::scrub_content;
53
54// ── Public types ─────────────────────────────────────────────────────────────
55
56/// Whether a turn completed successfully or with an error.
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum TurnExitStatus {
59    /// Turn completed without error.
60    Success,
61    /// Turn completed with an error (tool failure, LLM error, etc.).
62    Error,
63}
64
65/// Lightweight summary of a completed agent turn used as notification input.
66///
67/// Built by the agent loop after `channel.flush_chunks()` and passed to
68/// [`Notifier::fire`]. Contains only what is needed for gate decisions and
69/// notification body assembly — no LLM payloads or raw tool outputs.
70#[derive(Debug, Clone)]
71pub struct TurnSummary {
72    /// Total wall-clock duration of the turn in milliseconds.
73    pub duration_ms: u64,
74    /// First ≤ 160 chars of the assistant response, already redacted by the caller.
75    pub preview: String,
76    /// Number of tool calls dispatched this turn.
77    pub tool_calls: u32,
78    /// Number of completed LLM round-trips this turn.
79    /// Zero for slash commands, cache-only turns, and security-blocked inputs.
80    pub llm_requests: u32,
81    /// Whether the turn ended with an error.
82    pub exit_status: TurnExitStatus,
83}
84
85/// Per-turn completion notifier.
86///
87/// Holds a shared [`reqwest::Client`] and the resolved config. Construct once at
88/// agent startup via [`Notifier::new`] and call [`Notifier::fire`] after each turn.
89///
90/// All I/O is spawned onto the Tokio runtime via `tokio::spawn`; `fire` returns
91/// immediately without blocking the agent loop.
92///
93/// Cloning is cheap — `reqwest::Client` is an `Arc`-backed handle.
94#[derive(Clone)]
95pub struct Notifier {
96    cfg: NotificationsConfig,
97    http: reqwest::Client,
98}
99
100impl Notifier {
101    /// Create a notifier from a [`NotificationsConfig`].
102    ///
103    /// Constructs a shared HTTP client with a 5-second connect timeout. The client
104    /// is reused across all webhook calls for the agent session.
105    ///
106    /// If `webhook_url` is set but fails URL validation (unparseable or non-HTTP(S)
107    /// scheme), it is cleared to `None` and a warning is logged. This prevents SSRF
108    /// via malformed URLs (e.g. `file://`, `ftp://`).
109    #[must_use]
110    pub fn new(cfg: NotificationsConfig) -> Self {
111        let http = reqwest::Client::builder()
112            .connect_timeout(Duration::from_secs(5))
113            .timeout(Duration::from_secs(5))
114            .build()
115            .unwrap_or_default();
116        let mut cfg = cfg;
117        if cfg
118            .webhook_url
119            .as_deref()
120            .is_some_and(|url| !validate_webhook_url(url, cfg.webhook_allow_insecure))
121        {
122            cfg.webhook_url = None;
123        }
124        Self { cfg, http }
125    }
126
127    /// Evaluate all gate conditions and return `true` when the notification should fire.
128    ///
129    /// Gates applied in order (all must pass):
130    /// 1. `enabled` is `true`
131    /// 2. `summary.llm_requests > 0` (zero-LLM turns are never notified)
132    /// 3. If `only_on_error`: turn must have errored
133    /// 4. For successful turns: `duration_ms >= min_turn_duration_ms`
134    ///    (error turns bypass the duration gate)
135    #[must_use]
136    pub fn should_fire(&self, summary: &TurnSummary) -> bool {
137        if !self.cfg.enabled {
138            return false;
139        }
140        // Gate S6: never notify for zero-LLM turns (slash commands, cache hits, etc.)
141        // Exception M8 from critic: allow zero-LLM errors through so setup failures surface.
142        if summary.llm_requests == 0 && summary.exit_status == TurnExitStatus::Success {
143            return false;
144        }
145        match summary.exit_status {
146            // Gate S4: errors always fire, bypassing the duration gate.
147            TurnExitStatus::Error => true,
148            TurnExitStatus::Success => {
149                if self.cfg.only_on_error {
150                    return false;
151                }
152                // Duration gate applies only to successful turns.
153                summary.duration_ms >= self.cfg.min_turn_duration_ms
154            }
155        }
156    }
157
158    /// Fire all enabled notification channels for this turn summary.
159    ///
160    /// Returns immediately — all I/O is spawned as a background task. Failures
161    /// are logged at `warn` level and never propagated. The spawned task has an
162    /// internal 5-second per-channel timeout.
163    pub fn fire(&self, summary: &TurnSummary) {
164        let cfg = self.cfg.clone();
165        let http = self.http.clone();
166        let summary = summary.clone();
167
168        tokio::spawn(async move {
169            fire_all_channels(&cfg, &http, &summary).await;
170        });
171    }
172
173    /// Fire a test notification with a fixed message.
174    ///
175    /// Used by the `zeph notify test` CLI subcommand. Returns an error if all
176    /// channels are disabled or if every channel failed.
177    ///
178    /// # Errors
179    ///
180    /// - `NotifyTestError::AllDisabled` — no channel is enabled
181    /// - `NotifyTestError::MacOsFailed` — macOS notification failed (macOS only)
182    /// - `NotifyTestError::WebhookFailed` — webhook POST failed
183    pub async fn fire_test(&self) -> Result<(), NotifyTestError> {
184        if !self.cfg.enabled {
185            return Err(NotifyTestError::MasterSwitchDisabled);
186        }
187
188        let macos_enabled = self.cfg.macos_native;
189        let webhook_enabled = self.cfg.webhook_url.is_some() && self.cfg.webhook_topic.is_some();
190
191        if !macos_enabled && !webhook_enabled {
192            return Err(NotifyTestError::AllDisabled);
193        }
194
195        let summary = TurnSummary {
196            duration_ms: 0,
197            preview: "Zeph is working".to_owned(),
198            tool_calls: 0,
199            llm_requests: 1,
200            exit_status: TurnExitStatus::Success,
201        };
202
203        #[cfg(target_os = "macos")]
204        if macos_enabled {
205            fire_macos_native(&self.cfg.title, "Zeph is working")
206                .await
207                .map_err(|e| NotifyTestError::MacOsFailed(e.to_string()))?;
208        }
209
210        if let (Some(url), Some(topic)) = (&self.cfg.webhook_url, &self.cfg.webhook_topic) {
211            fire_webhook(&self.http, url, &self.cfg.title, topic, &summary)
212                .await
213                .map_err(|e| NotifyTestError::WebhookFailed(e.to_string()))?;
214        }
215
216        Ok(())
217    }
218}
219
220/// Error returned by [`Notifier::fire_test`].
221#[derive(Debug, thiserror::Error)]
222pub enum NotifyTestError {
223    /// The master `notifications.enabled` switch is `false`.
224    #[error("notifications are disabled (set notifications.enabled = true to enable)")]
225    MasterSwitchDisabled,
226    /// No channels are enabled in the current configuration.
227    #[error("all notification channels are disabled")]
228    AllDisabled,
229    /// macOS notification failed.
230    #[error("macOS notification failed: {0}")]
231    MacOsFailed(String),
232    /// Webhook POST failed.
233    #[error("webhook notification failed: {0}")]
234    WebhookFailed(String),
235}
236
237// ── Internal helpers ──────────────────────────────────────────────────────────
238
239/// Fire all enabled channels for `summary`. Called from a spawned task.
240async fn fire_all_channels(
241    cfg: &NotificationsConfig,
242    http: &reqwest::Client,
243    summary: &TurnSummary,
244) {
245    let title = &cfg.title;
246
247    #[cfg(target_os = "macos")]
248    {
249        let message = build_notification_message(summary);
250        if cfg.macos_native
251            && let Err(e) = fire_macos_native(title, &message).await
252        {
253            warn!(error = %e, "macOS notification failed");
254        }
255    }
256
257    if let (Some(url), Some(topic)) = (&cfg.webhook_url, &cfg.webhook_topic)
258        && let Err(e) = fire_webhook(http, url, title, topic, summary).await
259    {
260        warn!(error = %e, "webhook notification failed");
261    }
262}
263
264/// Build the notification body from a turn summary, applying secret redaction.
265fn build_notification_message(summary: &TurnSummary) -> String {
266    let status = if summary.exit_status == TurnExitStatus::Error {
267        "Error"
268    } else {
269        "Done"
270    };
271
272    // Apply scrub_content to redact any secrets that may be in the preview.
273    let safe_preview = scrub_content(&summary.preview);
274
275    if safe_preview.is_empty() {
276        format!("{status} — {dur}ms", dur = summary.duration_ms)
277    } else {
278        format!(
279            "{status} — {dur}ms\n{preview}",
280            dur = summary.duration_ms,
281            preview = safe_preview,
282        )
283    }
284}
285
286/// Sanitize a string for safe inclusion inside an `AppleScript` `"..."` literal.
287///
288/// Steps applied in order (order is important):
289/// 1. Replace all ASCII control characters (< 0x20) and Unicode control chars with space
290///    (tab `\t` is also replaced — single-line banners only)
291/// 2. Replace newlines `\n` and carriage returns `\r` with a single space
292/// 3. Truncate to `max` chars, appending `…` when cut
293/// 4. Strip `\` and `"` — `AppleScript` does not support backslash escaping inside strings,
294///    so these characters must be removed to prevent injection
295///
296/// # Examples
297///
298/// ```
299/// # use zeph_core::notifications::sanitize_applescript_payload;
300/// let s = sanitize_applescript_payload("Hello\nWorld\"", 200);
301/// assert_eq!(s, "Hello World");
302/// ```
303#[must_use]
304pub fn sanitize_applescript_payload(s: &str, max: usize) -> String {
305    // Step 1 + 2: normalise control characters and Unicode line/paragraph separators.
306    // U+2028 (LINE SEPARATOR) and U+2029 (PARAGRAPH SEPARATOR) are not in the Unicode Cc
307    // category but still break AppleScript string literals, so they are explicitly replaced.
308    let cleaned: String = s
309        .chars()
310        .map(|c| {
311            if c.is_control() || c == '\u{2028}' || c == '\u{2029}' {
312                ' '
313            } else {
314                c
315            }
316        })
317        .collect();
318    // Step 3: truncate to `max` char count (not bytes).
319    let char_count = cleaned.chars().count();
320    let truncated: String = if char_count > max {
321        let end = cleaned
322            .char_indices()
323            .nth(max)
324            .map_or(cleaned.len(), |(i, _)| i);
325        let mut t = cleaned[..end].to_owned();
326        t.push('…');
327        t
328    } else {
329        cleaned
330    };
331    // Step 4: strip characters that cannot be safely embedded in an AppleScript string literal.
332    // AppleScript does not use backslash escaping inside strings — the only safe approach
333    // is to remove double-quote and backslash characters entirely.
334    truncated.replace(['\\', '"'], "")
335}
336
337/// Validate a webhook URL for safe use as a notification endpoint.
338///
339/// Returns `true` when the URL is acceptable. Logs a warning and returns `false`
340/// when the URL is unparseable or uses a non-HTTP(S) scheme. Accepts `http://`
341/// only when `allow_insecure` is `true` (opt-in for local testing).
342fn validate_webhook_url(url: &str, allow_insecure: bool) -> bool {
343    match url.parse::<reqwest::Url>() {
344        Ok(parsed) => {
345            if parsed.scheme() == "https" {
346                return true;
347            }
348            if allow_insecure && parsed.scheme() == "http" {
349                warn!(
350                    "webhook_url uses insecure HTTP scheme; set webhook_allow_insecure=false for production"
351                );
352                return true;
353            }
354            warn!(
355                scheme = parsed.scheme(),
356                "webhook_url has non-HTTP(S) scheme — channel disabled"
357            );
358            false
359        }
360        Err(e) => {
361            warn!(error = %e, "webhook_url is not a valid URL — channel disabled");
362            false
363        }
364    }
365}
366
367/// Fire a macOS Notification Center banner via osascript (stdin-fed to avoid arg injection).
368#[cfg(target_os = "macos")]
369async fn fire_macos_native(
370    title: &str,
371    message: &str,
372) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
373    use tokio::io::AsyncWriteExt as _;
374    use tokio::process::Command;
375
376    let safe_title = sanitize_applescript_payload(title, 120);
377    let safe_message = sanitize_applescript_payload(message, 240);
378
379    let script = format!(r#"display notification "{safe_message}" with title "{safe_title}""#);
380
381    let mut child = Command::new("osascript")
382        .stdin(std::process::Stdio::piped())
383        .stdout(std::process::Stdio::null())
384        .stderr(std::process::Stdio::null())
385        .spawn()?;
386
387    if let Some(mut stdin) = child.stdin.take() {
388        stdin.write_all(script.as_bytes()).await?;
389        stdin.shutdown().await?;
390    }
391
392    // Wait up to 5s for osascript to complete; ignore exit status (best-effort).
393    let _ = tokio::time::timeout(Duration::from_secs(5), child.wait()).await;
394
395    Ok(())
396}
397
398/// ntfy-compatible JSON webhook body.
399///
400/// Matches the [ntfy publish-as-JSON](https://docs.ntfy.sh/publish/#publish-as-json) schema.
401#[derive(Serialize)]
402struct NtfyWebhookBody<'a> {
403    topic: &'a str,
404    title: &'a str,
405    message: &'a str,
406    tags: Vec<&'a str>,
407    /// Priority 1–5. Default 3; error turns use 4.
408    priority: u8,
409}
410
411/// POST a notification to an ntfy-compatible JSON endpoint.
412async fn fire_webhook(
413    client: &reqwest::Client,
414    url: &str,
415    title: &str,
416    topic: &str,
417    summary: &TurnSummary,
418) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
419    let message = build_notification_message(summary);
420    let (tags, priority) = if summary.exit_status == TurnExitStatus::Error {
421        (vec!["zeph", "error"], 4u8)
422    } else {
423        (vec!["zeph", "turn-complete"], 3u8)
424    };
425
426    let body = NtfyWebhookBody {
427        topic,
428        title,
429        message: &message,
430        tags,
431        priority,
432    };
433
434    // Timeout is already set on the client (5s), but wrap for clarity.
435    tokio::time::timeout(Duration::from_secs(5), client.post(url).json(&body).send())
436        .await??
437        .error_for_status()?;
438
439    Ok(())
440}
441
442// ── Tests ─────────────────────────────────────────────────────────────────────
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447    use zeph_config::NotificationsConfig;
448
449    fn make_notifier(cfg: NotificationsConfig) -> Notifier {
450        Notifier::new(cfg)
451    }
452
453    fn success_summary(duration_ms: u64, llm_requests: u32) -> TurnSummary {
454        TurnSummary {
455            duration_ms,
456            preview: "All done.".to_owned(),
457            tool_calls: 0,
458            llm_requests,
459            exit_status: TurnExitStatus::Success,
460        }
461    }
462
463    fn error_summary(duration_ms: u64, llm_requests: u32) -> TurnSummary {
464        TurnSummary {
465            duration_ms,
466            preview: "Error occurred.".to_owned(),
467            tool_calls: 0,
468            llm_requests,
469            exit_status: TurnExitStatus::Error,
470        }
471    }
472
473    // ── should_fire gate tests ────────────────────────────────────────────────
474
475    #[test]
476    fn should_fire_disabled_master_switch() {
477        let n = make_notifier(NotificationsConfig {
478            enabled: false,
479            ..Default::default()
480        });
481        assert!(!n.should_fire(&success_summary(5000, 1)));
482    }
483
484    #[test]
485    fn should_fire_zero_llm_success_skipped() {
486        let n = make_notifier(NotificationsConfig {
487            enabled: true,
488            ..Default::default()
489        });
490        // Zero-LLM successful turns (slash commands, cache hits) are never notified.
491        assert!(!n.should_fire(&success_summary(0, 0)));
492    }
493
494    #[test]
495    fn should_fire_zero_llm_error_fires() {
496        // Critic M8: zero-LLM errors (setup failures) should still fire.
497        let n = make_notifier(NotificationsConfig {
498            enabled: true,
499            ..Default::default()
500        });
501        assert!(n.should_fire(&error_summary(0, 0)));
502    }
503
504    #[test]
505    fn should_fire_only_on_error_skips_success() {
506        let n = make_notifier(NotificationsConfig {
507            enabled: true,
508            only_on_error: true,
509            ..Default::default()
510        });
511        assert!(!n.should_fire(&success_summary(5000, 1)));
512    }
513
514    #[test]
515    fn should_fire_only_on_error_fires_on_error() {
516        let n = make_notifier(NotificationsConfig {
517            enabled: true,
518            only_on_error: true,
519            ..Default::default()
520        });
521        assert!(n.should_fire(&error_summary(100, 1)));
522    }
523
524    #[test]
525    fn should_fire_duration_gate_success_below_threshold() {
526        let n = make_notifier(NotificationsConfig {
527            enabled: true,
528            min_turn_duration_ms: 3000,
529            ..Default::default()
530        });
531        assert!(!n.should_fire(&success_summary(2999, 1)));
532    }
533
534    #[test]
535    fn should_fire_duration_gate_success_at_threshold() {
536        let n = make_notifier(NotificationsConfig {
537            enabled: true,
538            min_turn_duration_ms: 3000,
539            ..Default::default()
540        });
541        assert!(n.should_fire(&success_summary(3000, 1)));
542    }
543
544    #[test]
545    fn should_fire_error_bypasses_duration_gate() {
546        // Gate S4: errors always fire even when below min_turn_duration_ms.
547        let n = make_notifier(NotificationsConfig {
548            enabled: true,
549            min_turn_duration_ms: 3000,
550            ..Default::default()
551        });
552        assert!(n.should_fire(&error_summary(100, 1)));
553    }
554
555    // ── sanitize_applescript_payload tests ────────────────────────────────────
556
557    #[test]
558    fn sanitize_control_chars_replaced_with_space() {
559        let result = sanitize_applescript_payload("Hello\nWorld", 200);
560        // Newline becomes space, no quotes broken
561        assert!(!result.contains('\n'));
562        assert!(result.contains("Hello World"));
563    }
564
565    #[test]
566    fn sanitize_quotes_stripped() {
567        // AppleScript has no backslash escape — double quotes are stripped entirely.
568        let result = sanitize_applescript_payload(r#"say "hi""#, 200);
569        assert!(!result.contains('"'));
570        assert_eq!(result, "say hi");
571    }
572
573    #[test]
574    fn sanitize_backslash_stripped() {
575        // AppleScript has no backslash escape — backslashes are stripped entirely.
576        let result = sanitize_applescript_payload(r"C:\Users\foo", 200);
577        assert_eq!(result, "C:Usersfoo");
578    }
579
580    #[test]
581    fn sanitize_truncation_appends_ellipsis() {
582        let long = "a".repeat(300);
583        let result = sanitize_applescript_payload(&long, 200);
584        assert!(result.ends_with('…'));
585        // Char count should be max + 1 for the ellipsis.
586        assert_eq!(result.chars().count(), 201);
587    }
588
589    #[test]
590    fn sanitize_no_truncation_when_short() {
591        let result = sanitize_applescript_payload("short", 200);
592        assert_eq!(result, "short");
593    }
594
595    #[test]
596    fn sanitize_injection_attempt() {
597        // Classic AppleScript injection via closing the string and calling display dialog.
598        let payload = r#""; display dialog "gotcha"; ""#;
599        let result = sanitize_applescript_payload(payload, 200);
600        // All `"` must be escaped; the script cannot terminate the outer string.
601        assert!(!result.contains('"'));
602    }
603
604    #[test]
605    fn sanitize_applescript_payload_empty() {
606        assert_eq!(sanitize_applescript_payload("", 200), "");
607    }
608
609    #[test]
610    fn sanitize_tab_replaced() {
611        let result = sanitize_applescript_payload("a\tb", 200);
612        assert_eq!(result, "a b");
613    }
614
615    #[test]
616    fn sanitize_line_separators() {
617        let s = "hello\u{2028}world\u{2029}end";
618        let result = sanitize_applescript_payload(s, 200);
619        assert!(!result.contains('\u{2028}'));
620        assert!(!result.contains('\u{2029}'));
621        assert_eq!(result, "hello world end");
622    }
623
624    // ── build_notification_message tests ──────────────────────────────────────
625
626    #[test]
627    fn notification_message_success() {
628        let summary = success_summary(1234, 1);
629        let msg = build_notification_message(&summary);
630        assert!(msg.starts_with("Done"));
631        assert!(msg.contains("1234ms"));
632    }
633
634    #[test]
635    fn notification_message_error() {
636        let summary = error_summary(500, 1);
637        let msg = build_notification_message(&summary);
638        assert!(msg.starts_with("Error"));
639    }
640
641    #[test]
642    fn notification_message_redacts_secrets() {
643        let summary = TurnSummary {
644            duration_ms: 100,
645            preview: "Done. Key: sk-abc123xyz".to_owned(),
646            tool_calls: 0,
647            llm_requests: 1,
648            exit_status: TurnExitStatus::Success,
649        };
650        let msg = build_notification_message(&summary);
651        assert!(!msg.contains("sk-abc123xyz"), "secret must be redacted");
652        assert!(
653            msg.contains("[REDACTED]"),
654            "should contain redaction marker"
655        );
656    }
657}