Skip to main content

ati/core/
logging.rs

1//! Structured logging initialization for ATI.
2//!
3//! - **Proxy mode**: JSON to stderr (Docker/container friendly, machine-parseable)
4//! - **CLI mode**: Compact human-readable to stderr
5//!
6//! Sentry integration is behind the `sentry` cargo feature (off by default).
7
8use tracing_subscriber::layer::SubscriberExt;
9use tracing_subscriber::util::SubscriberInitExt;
10use tracing_subscriber::{fmt, EnvFilter};
11
12/// Controls the log output format.
13pub enum LogMode {
14    /// CLI commands — compact human-readable stderr.
15    Cli,
16    /// Proxy server — structured JSON to stderr.
17    Proxy,
18}
19
20/// Opaque guard type. When the `sentry` feature is enabled this is
21/// `sentry::ClientInitGuard` (must be held for program lifetime).
22/// Otherwise it is `()`.
23#[cfg(feature = "sentry")]
24pub type SentryGuard = sentry::ClientInitGuard;
25#[cfg(not(feature = "sentry"))]
26pub type SentryGuard = ();
27
28/// Initialize the tracing subscriber and (optionally) Sentry.
29///
30/// Call once at program startup, before any `tracing` macros fire.
31/// The returned guard (if `Some`) must be held until program exit so
32/// that pending Sentry events are flushed on drop.
33pub fn init(mode: LogMode, verbose: bool) -> Option<SentryGuard> {
34    let filter = match std::env::var("RUST_LOG") {
35        Ok(val) if !val.is_empty() => EnvFilter::from_default_env(),
36        _ if verbose => EnvFilter::new("debug"),
37        _ => EnvFilter::new("info"),
38    };
39
40    // Init Sentry first (before subscriber) so sentry-tracing layer can be wired in.
41    let sentry_guard = init_sentry();
42
43    // Build the layered subscriber.
44    // The sentry-tracing layer (when enabled) bridges tracing events to Sentry:
45    //   error! → Sentry issue, warn!/info! → breadcrumbs.
46    let registry = tracing_subscriber::registry().with(filter);
47
48    #[cfg(feature = "sentry")]
49    let registry = registry.with(sentry_guard.as_ref().map(|_| sentry_tracing::layer()));
50
51    match mode {
52        LogMode::Proxy => {
53            registry
54                .with(
55                    fmt::layer()
56                        .json()
57                        .flatten_event(true)
58                        .with_writer(std::io::stderr)
59                        .with_target(true)
60                        .with_current_span(false),
61                )
62                .init();
63        }
64        LogMode::Cli => {
65            registry
66                .with(
67                    fmt::layer()
68                        .compact()
69                        .with_writer(std::io::stderr)
70                        .with_target(false),
71                )
72                .init();
73        }
74    }
75
76    // Warn after subscriber is initialized so the message actually appears.
77    #[cfg(not(feature = "sentry"))]
78    if std::env::var("SENTRY_DSN").is_ok() || std::env::var("GREP_SENTRY_DSN").is_ok() {
79        tracing::warn!(
80            "SENTRY_DSN is set but this binary was compiled without the sentry feature — ignoring. \
81             Build with: cargo build --features sentry"
82        );
83    }
84
85    sentry_guard
86}
87
88/// Flush the Sentry transport queue before a non-returning exit
89/// (e.g. `process::exit`, which bypasses destructors). No-op when the
90/// `sentry` feature is disabled.
91#[cfg(feature = "sentry")]
92pub fn shutdown(guard: Option<SentryGuard>) {
93    drop(guard);
94}
95
96#[cfg(not(feature = "sentry"))]
97#[inline]
98pub fn shutdown(_guard: Option<SentryGuard>) {}
99
100/// Sentry `before_send` hook. Runs on every event the SDK is about to
101/// transport. Today's job is narrow: drop events whose
102/// `upstream_error_class` tag identifies them as quota / rate-limited
103/// noise.
104///
105/// Why: 402 (out of credit) and 429 (rate limited) are real and worth
106/// breadcrumbing, but they're billing/throttling outcomes — not code
107/// bugs — and the user reported thousands of them spamming the issue
108/// list and burning quota. The `report_upstream_error` helper sets
109/// `tags.upstream_error_class` for every classified event; we read
110/// that tag here.
111///
112/// We keep the helper's call to `tracing::warn!` for the quota/rate-
113/// limited classes so the breadcrumb buffer still records context for
114/// the *next* real error — but we never let the standalone Sentry
115/// event ship.
116///
117/// Anything without the tag (panics, unrelated `tracing::error!` from
118/// other code paths) passes through unchanged.
119#[cfg(feature = "sentry")]
120fn before_send(
121    event: sentry::protocol::Event<'static>,
122) -> Option<sentry::protocol::Event<'static>> {
123    if let Some(class) = event.tags.get("upstream_error_class").map(String::as_str) {
124        if class == "quota" || class == "rate_limited" {
125            return None;
126        }
127    }
128    Some(event)
129}
130
131/// Initialize Sentry if a DSN is configured. Returns `None` when Sentry is
132/// disabled (no DSN, or feature not compiled in).
133fn init_sentry() -> Option<SentryGuard> {
134    #[cfg(feature = "sentry")]
135    {
136        let dsn = std::env::var("GREP_SENTRY_DSN")
137            .or_else(|_| std::env::var("SENTRY_DSN"))
138            .ok()?;
139
140        let environment =
141            std::env::var("ENVIRONMENT_TIER").unwrap_or_else(|_| "development".into());
142
143        // Only send to Sentry in production/staging/demo — skip in development
144        match environment.as_str() {
145            "production" | "staging" | "demo" => {}
146            _ => {
147                tracing::debug!(environment = %environment, "sentry disabled for this environment");
148                return None;
149            }
150        }
151
152        let service = std::env::var("SERVICE_NAME").unwrap_or_else(|_| "ati-proxy".into());
153
154        let sample_rate = match environment.as_str() {
155            "production" => 0.25,
156            "staging" => 0.5,
157            _ => 1.0,
158        };
159
160        let sentry_debug = std::env::var("ATI_SENTRY_DEBUG")
161            .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
162            .unwrap_or(false);
163
164        let guard = sentry::init((
165            dsn,
166            sentry::ClientOptions {
167                release: Some(env!("CARGO_PKG_VERSION").into()),
168                environment: Some(environment.into()),
169                server_name: Some(service.into()),
170                traces_sample_rate: sample_rate,
171                attach_stacktrace: true,
172                send_default_pii: false,
173                // Bumped from the SDK default (30) so that a slow-burn
174                // failure — where the eventual error is preceded by a
175                // long tail of tool_call info breadcrumbs — still shows
176                // the breadcrumbs that matter. The breadcrumb buffer is
177                // per-scope so there's no global memory concern.
178                max_breadcrumbs: 100,
179                // Sentry's Logs product — when enabled, structured
180                // tracing fields ride along on every event so we can
181                // search/filter in Sentry's log explorer too, not just
182                // in the issue grouping.
183                enable_logs: true,
184                debug: sentry_debug,
185                before_send: Some(std::sync::Arc::new(before_send)),
186                ..Default::default()
187            },
188        ));
189
190        if guard.is_enabled() {
191            Some(guard)
192        } else {
193            None
194        }
195    }
196
197    #[cfg(not(feature = "sentry"))]
198    {
199        None
200    }
201}
202
203#[cfg(all(test, feature = "sentry"))]
204mod tests {
205    use super::*;
206    use sentry::protocol::Event;
207
208    fn event_with_class(class: &str) -> Event<'static> {
209        let mut ev = Event::default();
210        ev.tags
211            .insert("upstream_error_class".to_string(), class.to_string());
212        ev
213    }
214
215    #[test]
216    fn before_send_drops_quota_class_events() {
217        // 402 path — `report_upstream_error` classifies as `quota` and
218        // emits at Warning. Without before_send, the user reported these
219        // were spamming the issue list and burning Sentry quota.
220        let ev = event_with_class("quota");
221        assert!(before_send(ev).is_none());
222    }
223
224    #[test]
225    fn before_send_drops_rate_limited_class_events() {
226        // 429 path — same noise problem. Breadcrumbs still record the
227        // context for the next real error; this just stops the
228        // standalone events from shipping.
229        let ev = event_with_class("rate_limited");
230        assert!(before_send(ev).is_none());
231    }
232
233    #[test]
234    fn before_send_passes_bad_input_through() {
235        // 400 / 422 is a real bug we want to see.
236        let ev = event_with_class("bad_input");
237        assert!(before_send(ev).is_some());
238    }
239
240    #[test]
241    fn before_send_passes_server_error_through() {
242        let ev = event_with_class("server_error");
243        assert!(before_send(ev).is_some());
244    }
245
246    #[test]
247    fn before_send_passes_auth_error_through() {
248        let ev = event_with_class("auth_error");
249        assert!(before_send(ev).is_some());
250    }
251
252    #[test]
253    fn before_send_passes_transport_error_through() {
254        let ev = event_with_class("transport_error");
255        assert!(before_send(ev).is_some());
256    }
257
258    #[test]
259    fn before_send_passes_events_without_class_tag() {
260        // Panics, generic tracing::error! from elsewhere — no class tag.
261        // Must not be dropped.
262        let ev = Event::default();
263        assert!(before_send(ev).is_some());
264    }
265}