Skip to main content

everruns_core/
error_reporter.rs

1// Vendor-neutral error reporting hook for embedding.
2//
3// Decision: Stay vendor-neutral — no Sentry, Datadog, or Rollbar types leak
4//   into OSS. Wrappers map `ErrorReport` onto whatever backend they use.
5// Decision: Separate the "report this error" contract (ErrorReporter) from
6//   the "where did it happen" context (ErrorScope). The scope is structured
7//   so wrappers can attach tags/breadcrumbs without string-parsing.
8// Decision: Async trait so reporters can perform IO (HTTP, channel send) if
9//   they want to. Implementations should be best-effort and swallow errors —
10//   a failing reporter must never propagate into the request/task path.
11
12use async_trait::async_trait;
13use std::collections::BTreeMap;
14use std::sync::Arc;
15
16/// Severity of the reported error.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum ErrorSeverity {
19    /// Unexpected but non-fatal condition.
20    Warning,
21    /// Handled error that did not crash the process.
22    Error,
23    /// Unhandled panic or fatal condition.
24    Fatal,
25}
26
27/// Structured request/task scope surrounding the error.
28///
29/// Every field is optional because the surrounding context differs between
30/// HTTP requests, worker tasks, and background jobs. Wrappers map these
31/// onto vendor-specific tag/context APIs (e.g. Sentry scope tags).
32#[derive(Debug, Clone, Default)]
33pub struct ErrorScope {
34    pub user_id: Option<String>,
35    pub org_id: Option<String>,
36    pub session_id: Option<String>,
37    pub request_id: Option<String>,
38    pub route: Option<String>,
39    pub component: Option<String>,
40    pub task_id: Option<String>,
41    pub workflow_id: Option<String>,
42    /// Extra key/value metadata (provider id, feature flag, etc.).
43    pub extra: BTreeMap<String, String>,
44}
45
46impl ErrorScope {
47    pub fn new() -> Self {
48        Self::default()
49    }
50
51    pub fn with_user(mut self, user_id: impl Into<String>) -> Self {
52        self.user_id = Some(user_id.into());
53        self
54    }
55
56    pub fn with_org(mut self, org_id: impl Into<String>) -> Self {
57        self.org_id = Some(org_id.into());
58        self
59    }
60
61    pub fn with_session(mut self, session_id: impl Into<String>) -> Self {
62        self.session_id = Some(session_id.into());
63        self
64    }
65
66    pub fn with_request(mut self, request_id: impl Into<String>) -> Self {
67        self.request_id = Some(request_id.into());
68        self
69    }
70
71    pub fn with_route(mut self, route: impl Into<String>) -> Self {
72        self.route = Some(route.into());
73        self
74    }
75
76    pub fn with_component(mut self, component: impl Into<String>) -> Self {
77        self.component = Some(component.into());
78        self
79    }
80
81    pub fn with_task(mut self, task_id: impl Into<String>) -> Self {
82        self.task_id = Some(task_id.into());
83        self
84    }
85
86    pub fn with_workflow(mut self, workflow_id: impl Into<String>) -> Self {
87        self.workflow_id = Some(workflow_id.into());
88        self
89    }
90
91    pub fn with_extra(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
92        self.extra.insert(key.into(), value.into());
93        self
94    }
95}
96
97/// A single error report passed to the embedder-provided reporter.
98#[derive(Debug, Clone)]
99pub struct ErrorReport {
100    pub severity: ErrorSeverity,
101    /// Short machine-stable identifier (e.g. `"worker.task.failed"`,
102    /// `"server.request.panic"`). Vendor-neutral.
103    pub kind: String,
104    /// Human-readable error message.
105    pub message: String,
106    pub scope: ErrorScope,
107}
108
109impl ErrorReport {
110    pub fn new(
111        severity: ErrorSeverity,
112        kind: impl Into<String>,
113        message: impl Into<String>,
114    ) -> Self {
115        Self {
116            severity,
117            kind: kind.into(),
118            message: message.into(),
119            scope: ErrorScope::default(),
120        }
121    }
122
123    pub fn error(kind: impl Into<String>, message: impl Into<String>) -> Self {
124        Self::new(ErrorSeverity::Error, kind, message)
125    }
126
127    pub fn warning(kind: impl Into<String>, message: impl Into<String>) -> Self {
128        Self::new(ErrorSeverity::Warning, kind, message)
129    }
130
131    pub fn fatal(kind: impl Into<String>, message: impl Into<String>) -> Self {
132        Self::new(ErrorSeverity::Fatal, kind, message)
133    }
134
135    pub fn with_scope(mut self, scope: ErrorScope) -> Self {
136        self.scope = scope;
137        self
138    }
139}
140
141/// Vendor-neutral error reporting hook.
142///
143/// Implementations are supplied by embedders (SaaS wrappers) via
144/// `ServerAppBuilder::error_reporter` / `WorkerAppBuilder::error_reporter`.
145/// OSS never imports vendor-specific SDKs (Sentry, Datadog, etc.).
146///
147/// Implementations must be best-effort: a slow or failing reporter must
148/// never propagate into the request or task execution path. Spawn background
149/// work for heavy reporting if needed.
150#[async_trait]
151pub trait ErrorReporter: Send + Sync {
152    /// Deliver a report to the embedder-owned backend.
153    async fn report(&self, report: ErrorReport);
154
155    /// Human-readable name for logging and diagnostics.
156    fn name(&self) -> &'static str {
157        "ErrorReporter"
158    }
159}
160
161/// Convenience type for handing a reporter to consumers.
162pub type SharedErrorReporter = Arc<dyn ErrorReporter>;
163
164/// No-op reporter. Default when no embedder reporter is installed.
165#[derive(Debug, Clone, Default)]
166pub struct NoopErrorReporter;
167
168#[async_trait]
169impl ErrorReporter for NoopErrorReporter {
170    async fn report(&self, _report: ErrorReport) {}
171
172    fn name(&self) -> &'static str {
173        "NoopErrorReporter"
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180    use std::sync::Mutex;
181
182    struct CaptureReporter {
183        reports: Mutex<Vec<ErrorReport>>,
184    }
185
186    #[async_trait]
187    impl ErrorReporter for CaptureReporter {
188        async fn report(&self, report: ErrorReport) {
189            self.reports.lock().unwrap().push(report);
190        }
191    }
192
193    #[tokio::test]
194    async fn captures_report_with_scope() {
195        let reporter = CaptureReporter {
196            reports: Mutex::new(Vec::new()),
197        };
198        let scope = ErrorScope::new()
199            .with_user("user_1")
200            .with_org("org_1")
201            .with_request("req_1")
202            .with_extra("provider", "openai");
203        let report = ErrorReport::error("server.request", "boom").with_scope(scope);
204        reporter.report(report).await;
205
206        let reports = reporter.reports.lock().unwrap();
207        assert_eq!(reports.len(), 1);
208        assert_eq!(reports[0].kind, "server.request");
209        assert_eq!(reports[0].scope.user_id.as_deref(), Some("user_1"));
210        assert_eq!(
211            reports[0].scope.extra.get("provider").map(String::as_str),
212            Some("openai")
213        );
214    }
215
216    #[tokio::test]
217    async fn noop_reporter_does_not_panic() {
218        let reporter = NoopErrorReporter;
219        reporter
220            .report(ErrorReport::fatal("panic", "ignored"))
221            .await;
222    }
223}