ati/core/logging.rs
1//! Structured logging initialization for ATI.
2//!
3//! - **Proxy mode**: JSON to stderr (Docker/container friendly, machine-parseable)
4//! - **CLI mode**: Compact human-readable to stderr
5//!
6//! Sentry integration is behind the `sentry` cargo feature (off by default).
7
8use tracing_subscriber::layer::SubscriberExt;
9use tracing_subscriber::util::SubscriberInitExt;
10use tracing_subscriber::{fmt, EnvFilter};
11
12/// Controls the log output format.
13pub enum LogMode {
14 /// CLI commands — compact human-readable stderr.
15 Cli,
16 /// Proxy server — structured JSON to stderr.
17 Proxy,
18}
19
20/// Opaque guard type. When the `sentry` feature is enabled this is
21/// `sentry::ClientInitGuard` (must be held for program lifetime).
22/// Otherwise it is `()`.
23#[cfg(feature = "sentry")]
24pub type SentryGuard = sentry::ClientInitGuard;
25#[cfg(not(feature = "sentry"))]
26pub type SentryGuard = ();
27
28/// Initialize the tracing subscriber and (optionally) Sentry.
29///
30/// Call once at program startup, before any `tracing` macros fire.
31/// The returned guard (if `Some`) must be held until program exit so
32/// that pending Sentry events are flushed on drop.
33pub fn init(mode: LogMode, verbose: bool) -> Option<SentryGuard> {
34 let filter = match std::env::var("RUST_LOG") {
35 Ok(val) if !val.is_empty() => EnvFilter::from_default_env(),
36 _ if verbose => EnvFilter::new("debug"),
37 _ => EnvFilter::new("info"),
38 };
39
40 // Init Sentry first (before subscriber) so sentry-tracing layer can be wired in.
41 let sentry_guard = init_sentry();
42
43 // Build the layered subscriber.
44 // The sentry-tracing layer (when enabled) bridges tracing events to Sentry:
45 // error! → Sentry issue, warn!/info! → breadcrumbs.
46 let registry = tracing_subscriber::registry().with(filter);
47
48 #[cfg(feature = "sentry")]
49 let registry = registry.with(sentry_guard.as_ref().map(|_| sentry_tracing::layer()));
50
51 match mode {
52 LogMode::Proxy => {
53 registry
54 .with(
55 fmt::layer()
56 .json()
57 .flatten_event(true)
58 .with_writer(std::io::stderr)
59 .with_target(true)
60 .with_current_span(false),
61 )
62 .init();
63 }
64 LogMode::Cli => {
65 registry
66 .with(
67 fmt::layer()
68 .compact()
69 .with_writer(std::io::stderr)
70 .with_target(false),
71 )
72 .init();
73 }
74 }
75
76 // Warn after subscriber is initialized so the message actually appears.
77 #[cfg(not(feature = "sentry"))]
78 if std::env::var("SENTRY_DSN").is_ok() || std::env::var("GREP_SENTRY_DSN").is_ok() {
79 tracing::warn!(
80 "SENTRY_DSN is set but this binary was compiled without the sentry feature — ignoring. \
81 Build with: cargo build --features sentry"
82 );
83 }
84
85 sentry_guard
86}
87
88/// Flush the Sentry transport queue before a non-returning exit
89/// (e.g. `process::exit`, which bypasses destructors). No-op when the
90/// `sentry` feature is disabled.
91#[cfg(feature = "sentry")]
92pub fn shutdown(guard: Option<SentryGuard>) {
93 drop(guard);
94}
95
96#[cfg(not(feature = "sentry"))]
97#[inline]
98pub fn shutdown(_guard: Option<SentryGuard>) {}
99
100/// Sentry `before_send` hook. Runs on every event the SDK is about to
101/// transport. Today's job is narrow: drop events whose
102/// `upstream_error_class` tag identifies them as quota / rate-limited
103/// noise.
104///
105/// Why: 402 (out of credit) and 429 (rate limited) are real and worth
106/// breadcrumbing, but they're billing/throttling outcomes — not code
107/// bugs — and the user reported thousands of them spamming the issue
108/// list and burning quota. The `report_upstream_error` helper sets
109/// `tags.upstream_error_class` for every classified event; we read
110/// that tag here.
111///
112/// We keep the helper's call to `tracing::warn!` for the quota/rate-
113/// limited classes so the breadcrumb buffer still records context for
114/// the *next* real error — but we never let the standalone Sentry
115/// event ship.
116///
117/// Anything without the tag (panics, unrelated `tracing::error!` from
118/// other code paths) passes through unchanged.
119#[cfg(feature = "sentry")]
120fn before_send(
121 event: sentry::protocol::Event<'static>,
122) -> Option<sentry::protocol::Event<'static>> {
123 if let Some(class) = event.tags.get("upstream_error_class").map(String::as_str) {
124 if class == "quota" || class == "rate_limited" {
125 return None;
126 }
127 }
128 Some(event)
129}
130
131/// Initialize Sentry if a DSN is configured. Returns `None` when Sentry is
132/// disabled (no DSN, or feature not compiled in).
133fn init_sentry() -> Option<SentryGuard> {
134 #[cfg(feature = "sentry")]
135 {
136 let dsn = std::env::var("GREP_SENTRY_DSN")
137 .or_else(|_| std::env::var("SENTRY_DSN"))
138 .ok()?;
139
140 let environment =
141 std::env::var("ENVIRONMENT_TIER").unwrap_or_else(|_| "development".into());
142
143 // Only send to Sentry in production/staging/demo — skip in development
144 match environment.as_str() {
145 "production" | "staging" | "demo" => {}
146 _ => {
147 tracing::debug!(environment = %environment, "sentry disabled for this environment");
148 return None;
149 }
150 }
151
152 let service = std::env::var("SERVICE_NAME").unwrap_or_else(|_| "ati-proxy".into());
153
154 let sample_rate = match environment.as_str() {
155 "production" => 0.25,
156 "staging" => 0.5,
157 _ => 1.0,
158 };
159
160 let sentry_debug = std::env::var("ATI_SENTRY_DEBUG")
161 .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
162 .unwrap_or(false);
163
164 let guard = sentry::init((
165 dsn,
166 sentry::ClientOptions {
167 release: Some(env!("CARGO_PKG_VERSION").into()),
168 environment: Some(environment.into()),
169 server_name: Some(service.into()),
170 traces_sample_rate: sample_rate,
171 attach_stacktrace: true,
172 send_default_pii: false,
173 // Bumped from the SDK default (30) so that a slow-burn
174 // failure — where the eventual error is preceded by a
175 // long tail of tool_call info breadcrumbs — still shows
176 // the breadcrumbs that matter. The breadcrumb buffer is
177 // per-scope so there's no global memory concern.
178 max_breadcrumbs: 100,
179 // Sentry's Logs product — when enabled, structured
180 // tracing fields ride along on every event so we can
181 // search/filter in Sentry's log explorer too, not just
182 // in the issue grouping.
183 enable_logs: true,
184 debug: sentry_debug,
185 before_send: Some(std::sync::Arc::new(before_send)),
186 ..Default::default()
187 },
188 ));
189
190 if guard.is_enabled() {
191 Some(guard)
192 } else {
193 None
194 }
195 }
196
197 #[cfg(not(feature = "sentry"))]
198 {
199 None
200 }
201}
202
203#[cfg(all(test, feature = "sentry"))]
204mod tests {
205 use super::*;
206 use sentry::protocol::Event;
207
208 fn event_with_class(class: &str) -> Event<'static> {
209 let mut ev = Event::default();
210 ev.tags
211 .insert("upstream_error_class".to_string(), class.to_string());
212 ev
213 }
214
215 #[test]
216 fn before_send_drops_quota_class_events() {
217 // 402 path — `report_upstream_error` classifies as `quota` and
218 // emits at Warning. Without before_send, the user reported these
219 // were spamming the issue list and burning Sentry quota.
220 let ev = event_with_class("quota");
221 assert!(before_send(ev).is_none());
222 }
223
224 #[test]
225 fn before_send_drops_rate_limited_class_events() {
226 // 429 path — same noise problem. Breadcrumbs still record the
227 // context for the next real error; this just stops the
228 // standalone events from shipping.
229 let ev = event_with_class("rate_limited");
230 assert!(before_send(ev).is_none());
231 }
232
233 #[test]
234 fn before_send_passes_bad_input_through() {
235 // 400 / 422 is a real bug we want to see.
236 let ev = event_with_class("bad_input");
237 assert!(before_send(ev).is_some());
238 }
239
240 #[test]
241 fn before_send_passes_server_error_through() {
242 let ev = event_with_class("server_error");
243 assert!(before_send(ev).is_some());
244 }
245
246 #[test]
247 fn before_send_passes_auth_error_through() {
248 let ev = event_with_class("auth_error");
249 assert!(before_send(ev).is_some());
250 }
251
252 #[test]
253 fn before_send_passes_transport_error_through() {
254 let ev = event_with_class("transport_error");
255 assert!(before_send(ev).is_some());
256 }
257
258 #[test]
259 fn before_send_passes_events_without_class_tag() {
260 // Panics, generic tracing::error! from elsewhere — no class tag.
261 // Must not be dropped.
262 let ev = Event::default();
263 assert!(before_send(ev).is_some());
264 }
265}