cellos_core/observability.rs
1//! Operator-facing tracing-subscriber primitives.
2//!
3//! # HIGH-B5 — redact bearer/Authorization in tracing output
4//!
5//! `reqwest` (and the surrounding `hyper` / `h2` / `rustls` stack) emit
6//! verbose, byte-level diagnostics at TRACE. Those diagnostics include the
7//! raw outbound request — and that means `Authorization: Bearer <secret>`,
8//! `Cookie:`, `Proxy-Authorization:` and friends land in any log stream
9//! whose `RUST_LOG` pulls those targets in at `trace`.
10//!
11//! `reqwest` does not provide a redaction hook because the offending log
12//! calls happen *inside* the crate. The right fix is to attach a filter to
13//! the `fmt` layer at the workspace's tracing-init sites so the dangerous
14//! events never reach a writer.
15//!
16//! This module ships that filter. Binaries that initialize the global
17//! subscriber compose it onto their existing `fmt` layer:
18//!
19//! ```ignore
20//! use tracing_subscriber::layer::SubscriberExt;
21//! use tracing_subscriber::util::SubscriberInitExt;
22//!
23//! let fmt_layer = tracing_subscriber::fmt::layer()
24//! .with_filter(cellos_core::observability::redacted_filter());
25//!
26//! tracing_subscriber::registry()
27//! .with(tracing_subscriber::EnvFilter::from_default_env())
28//! .with(fmt_layer)
29//! .init();
30//! ```
31//!
32//! # What the filter drops
33//!
34//! 1. **HTTP-stack TRACE events.** Any event whose `target` starts with one
35//! of [`HTTP_STACK_TARGETS`] at `Level::TRACE` is suppressed unconditionally.
36//! reqwest's TRACE log lines are not structured — the bearer token is
37//! embedded in a `Debug`-formatted `HeaderMap`, so field-name redaction
38//! cannot reach it. Suppression is the only reliable mitigation.
39//!
40//! 2. **Sensitive field names anywhere.** Any event whose recorded fields
41//! include a name in [`SENSITIVE_HEADER_NAMES`] (case-insensitive) is
42//! suppressed regardless of target or level. Catches our own code paths
43//! that might accidentally `info!(authorization = %h, ...)`.
44//!
45//! Operators still see `reqwest=debug` (status codes, request URLs without
46//! headers) and every workspace-emitted event at INFO/DEBUG. The escape
47//! hatch — `RUST_LOG=reqwest=trace` in production — is closed.
48//!
49//! Approach C from the wave-1 audit. ADR-0018 (FIX-B4 — credential
50//! redaction posture) cross-references this module.
51
52use tracing::field::{Field, Visit};
53#[cfg(feature = "tracing-layer")]
54use tracing::{Event, Metadata, Subscriber};
55#[cfg(feature = "tracing-layer")]
56use tracing_subscriber::layer::{Context, Filter};
57
58/// Targets whose TRACE-level events are suppressed wholesale.
59///
60/// Prefix-matched against `Metadata::target()`. Each entry covers the crate
61/// root **and** its child modules (e.g. `"reqwest"` matches
62/// `reqwest::connect`, `reqwest::async_impl::client`, …).
63///
64/// Ordered roughly by likelihood; the linear scan is cheap.
65pub const HTTP_STACK_TARGETS: &[&str] = &[
66 "reqwest",
67 "hyper",
68 "hyper_util",
69 "hyper_rustls",
70 "h2",
71 "rustls",
72 "tokio_rustls",
73 // `tower-http` and `tower` can emit request/response spans whose fields
74 // include header maps. Trace-level only — DEBUG and above stay visible.
75 "tower_http",
76 "tower",
77];
78
79/// Header / field names whose presence in *any* event causes the event to be
80/// dropped. Case-insensitive match on the field name reported by
81/// [`tracing::field::Field::name`].
82///
83/// Includes both the canonical HTTP header forms (`authorization`,
84/// `cookie`, …) and the structured-logging conventions our own code uses
85/// (`bearer_token`, `api_key`, …). When in doubt, add — false positives
86/// only cost a missing log line.
87pub const SENSITIVE_HEADER_NAMES: &[&str] = &[
88 // Standard HTTP request/response credential headers.
89 "authorization",
90 "proxy-authorization",
91 "cookie",
92 "set-cookie",
93 // Vendor-specific bearer carriers (AWS SigV4, Vault, GCP, generic API keys).
94 "x-amz-security-token",
95 "x-vault-token",
96 "x-api-key",
97 "x-auth-token",
98 "x-csrf-token",
99 // Variant spellings we might see when code records header values into
100 // events using snake_case field names rather than HTTP-header spelling.
101 "authorization_header",
102 "bearer_token",
103 "access_token",
104 "refresh_token",
105 "id_token",
106 "api_key",
107 "session_token",
108 "secret",
109 "secret_value",
110];
111
112/// Build the [`Filter`] that drops events matching the HIGH-B5 policy.
113///
114/// Wire onto a fmt layer with [`tracing_subscriber::Layer::with_filter`]:
115///
116/// ```ignore
117/// let fmt_layer = tracing_subscriber::fmt::layer()
118/// .with_filter(cellos_core::observability::redacted_filter());
119/// ```
120///
121/// Combine with an existing [`tracing_subscriber::EnvFilter`] by stacking
122/// the filters in a [`tracing_subscriber::filter::Filtered`] chain — or
123/// simpler, attach the env filter to the registry and this filter to the
124/// fmt layer (see the module-level example).
125///
126/// The returned filter is stateless and `Copy`, so it can be cloned
127/// freely if multiple fmt layers each need their own copy.
128#[cfg(feature = "tracing-layer")]
129#[must_use]
130pub fn redacted_filter() -> RedactedFilter {
131 RedactedFilter::new()
132}
133
134/// A [`Filter`] that suppresses TRACE-level HTTP-stack events and any event
135/// carrying a sensitive header field name.
136///
137/// Created via [`redacted_filter`]. Public so downstream code can hold the
138/// concrete type when needed (otherwise prefer the constructor).
139#[cfg(feature = "tracing-layer")]
140#[derive(Debug, Default, Clone, Copy)]
141pub struct RedactedFilter {
142 _private: (),
143}
144
145#[cfg(feature = "tracing-layer")]
146impl RedactedFilter {
147 /// Construct a fresh filter. Stateless — every instance behaves
148 /// identically. Provided as a method primarily so `RedactedFilter::new()`
149 /// reads naturally at call sites.
150 #[must_use]
151 pub fn new() -> Self {
152 Self { _private: () }
153 }
154}
155
156#[cfg(feature = "tracing-layer")]
157impl<S> Filter<S> for RedactedFilter
158where
159 S: Subscriber,
160{
161 /// Cheap, metadata-only gate. Drops HTTP-stack TRACE without needing to
162 /// inspect fields.
163 fn enabled(&self, metadata: &Metadata<'_>, _: &Context<'_, S>) -> bool {
164 if is_http_stack_trace(metadata) {
165 return false;
166 }
167 true
168 }
169
170 /// Field-level gate. Runs only for events `enabled` already approved —
171 /// inspects the field names recorded on this specific event and drops
172 /// the event if any name matches a sensitive header.
173 fn event_enabled(&self, event: &Event<'_>, _: &Context<'_, S>) -> bool {
174 if is_http_stack_trace(event.metadata()) {
175 return false;
176 }
177 let mut visitor = SensitiveFieldVisitor::default();
178 event.record(&mut visitor);
179 !visitor.found_sensitive
180 }
181}
182
183/// Internal: does this event metadata point at the HTTP stack at TRACE?
184#[cfg(feature = "tracing-layer")]
185fn is_http_stack_trace(metadata: &Metadata<'_>) -> bool {
186 if *metadata.level() != tracing::Level::TRACE {
187 return false;
188 }
189 let target = metadata.target();
190 HTTP_STACK_TARGETS.iter().any(|prefix| {
191 target == *prefix
192 || target
193 .strip_prefix(*prefix)
194 .is_some_and(|rest| rest.starts_with("::"))
195 })
196}
197
198/// Tracing `Visit` impl that sets `found_sensitive` if any recorded field
199/// name appears in [`SENSITIVE_HEADER_NAMES`] (case-insensitive).
200///
201/// Public so integration tests in downstream crates can reuse it without
202/// re-implementing the lookup. Field *values* are not inspected — once the
203/// name matches, the event is dropped wholesale; we never read the secret.
204#[derive(Debug, Default)]
205pub struct SensitiveFieldVisitor {
206 /// Set to `true` the first time a sensitive field name is observed.
207 /// Sticky — subsequent non-sensitive fields cannot reset it.
208 pub found_sensitive: bool,
209}
210
211impl Visit for SensitiveFieldVisitor {
212 fn record_debug(&mut self, field: &Field, _value: &dyn std::fmt::Debug) {
213 self.check(field);
214 }
215
216 fn record_str(&mut self, field: &Field, _value: &str) {
217 self.check(field);
218 }
219
220 fn record_i64(&mut self, field: &Field, _value: i64) {
221 self.check(field);
222 }
223
224 fn record_u64(&mut self, field: &Field, _value: u64) {
225 self.check(field);
226 }
227
228 fn record_bool(&mut self, field: &Field, _value: bool) {
229 self.check(field);
230 }
231
232 fn record_f64(&mut self, field: &Field, _value: f64) {
233 self.check(field);
234 }
235
236 fn record_error(&mut self, field: &Field, _value: &(dyn std::error::Error + 'static)) {
237 self.check(field);
238 }
239}
240
241impl SensitiveFieldVisitor {
242 fn check(&mut self, field: &Field) {
243 if self.found_sensitive {
244 return;
245 }
246 if is_sensitive_field_name(field.name()) {
247 self.found_sensitive = true;
248 }
249 }
250}
251
252/// Case-insensitive membership check against [`SENSITIVE_HEADER_NAMES`].
253///
254/// Free function so downstream code can use the same canonical list when
255/// rendering its own logs (e.g. a custom `Display` for an outbound request
256/// that elides matching headers before formatting).
257#[must_use]
258pub fn is_sensitive_field_name(name: &str) -> bool {
259 SENSITIVE_HEADER_NAMES
260 .iter()
261 .any(|candidate| candidate.eq_ignore_ascii_case(name))
262}
263
264#[cfg(all(test, feature = "tracing-layer"))]
265mod tests {
266 use super::*;
267 use std::sync::{Arc, Mutex};
268 use tracing::Level;
269 use tracing_subscriber::fmt::MakeWriter;
270 use tracing_subscriber::layer::SubscriberExt;
271 use tracing_subscriber::Layer;
272
273 /// `MakeWriter` that buffers everything into a shared `Vec<u8>`. Used by
274 /// the integration-shape tests below so we can assert on what the
275 /// subscriber actually emitted.
276 #[derive(Clone, Default)]
277 struct CapturingWriter {
278 buf: Arc<Mutex<Vec<u8>>>,
279 }
280
281 impl CapturingWriter {
282 fn snapshot(&self) -> String {
283 let guard = self.buf.lock().unwrap();
284 String::from_utf8_lossy(&guard).into_owned()
285 }
286 }
287
288 impl std::io::Write for CapturingWriter {
289 fn write(&mut self, b: &[u8]) -> std::io::Result<usize> {
290 self.buf.lock().unwrap().extend_from_slice(b);
291 Ok(b.len())
292 }
293 fn flush(&mut self) -> std::io::Result<()> {
294 Ok(())
295 }
296 }
297
298 impl<'a> MakeWriter<'a> for CapturingWriter {
299 type Writer = Self;
300 fn make_writer(&'a self) -> Self::Writer {
301 self.clone()
302 }
303 }
304
305 #[test]
306 fn sensitive_field_visitor_flags_authorization() {
307 // Build a synthetic event by emitting via a private subscriber and
308 // capturing the visit. Easier: drive the lookup directly.
309 assert!(is_sensitive_field_name("authorization"));
310 assert!(is_sensitive_field_name("AUTHORIZATION"));
311 assert!(is_sensitive_field_name("Authorization"));
312 assert!(is_sensitive_field_name("cookie"));
313 assert!(is_sensitive_field_name("x-vault-token"));
314 assert!(is_sensitive_field_name("X-Vault-Token"));
315 assert!(is_sensitive_field_name("bearer_token"));
316 }
317
318 #[test]
319 fn sensitive_field_visitor_lets_normal_fields_through() {
320 assert!(!is_sensitive_field_name("status"));
321 assert!(!is_sensitive_field_name("url"));
322 assert!(!is_sensitive_field_name("method"));
323 assert!(!is_sensitive_field_name("duration_ms"));
324 // Substring matches must NOT trigger — only exact (case-insensitive)
325 // name equality. The threat is a field literally named
326 // "authorization", not a field named "authorization_method".
327 // (If it WAS named "authorization_method" with a Bearer value, that's
328 // already a separate bug; we don't try to read field values.)
329 assert!(!is_sensitive_field_name("authorize_route"));
330 assert!(!is_sensitive_field_name("cookies_accepted"));
331 }
332
333 #[test]
334 fn http_stack_target_match_is_prefix_aware() {
335 // Direct prefix-match assertion — avoids the gymnastics of
336 // synthesizing a `tracing::Metadata` (which requires a static
337 // callsite) just to drive `is_http_stack_trace` from the outside.
338 // The function is private; the prefix logic is what we're testing.
339 for tgt in [
340 "reqwest",
341 "reqwest::connect",
342 "reqwest::async_impl::client",
343 "hyper",
344 "hyper::client::conn",
345 "hyper_util::client::legacy::pool",
346 "h2::proto::streams::send",
347 "rustls::client::tls13",
348 ] {
349 assert!(
350 HTTP_STACK_TARGETS.iter().any(|p| tgt == *p
351 || tgt.strip_prefix(*p).is_some_and(|r| r.starts_with("::"))),
352 "expected target {tgt} to match HTTP_STACK_TARGETS"
353 );
354 }
355 // Non-matches.
356 for tgt in [
357 "reqwesting", // not a real crate; guards against bare prefix match
358 "cellos_supervisor", // workspace code
359 "tokio", // runtime — operator may want trace
360 "tracing", // tracing internals
361 ] {
362 assert!(
363 !HTTP_STACK_TARGETS.iter().any(|p| tgt == *p
364 || tgt.strip_prefix(*p).is_some_and(|r| r.starts_with("::"))),
365 "did NOT expect target {tgt} to match HTTP_STACK_TARGETS"
366 );
367 }
368 }
369
370 /// End-to-end: drive a real subscriber with the redacted filter, emit a
371 /// synthetic "reqwest-shaped" event carrying a bearer-looking string,
372 /// and assert that the captured output contains neither the field name
373 /// nor the secret.
374 ///
375 /// Uses obviously-fake values per gitleaks contract.
376 #[test]
377 fn integration_drops_event_with_authorization_field() {
378 let writer = CapturingWriter::default();
379 let fmt_layer = tracing_subscriber::fmt::layer()
380 .with_writer(writer.clone())
381 .with_target(true)
382 .with_filter(RedactedFilter::new());
383
384 let subscriber = tracing_subscriber::registry().with(fmt_layer);
385
386 tracing::subscriber::with_default(subscriber, || {
387 // Emitted via tracing::info!: workspace code accidentally
388 // recording a header value into a structured event.
389 tracing::info!(
390 authorization = "Bearer test-bearer-xyzzy-not-real",
391 "outbound request"
392 );
393 // Also test a target NOT in the HTTP stack list — the field
394 // name alone should suffice to suppress the event.
395 tracing::info!(
396 target: "cellos_export_http::client",
397 authorization = "Bearer test-bearer-xyzzy-not-real",
398 "outbound request"
399 );
400 });
401
402 let captured = writer.snapshot();
403 assert!(
404 !captured.contains("xyzzy"),
405 "captured tracing output leaked the bearer token: {captured}"
406 );
407 assert!(
408 !captured.contains("Bearer"),
409 "captured tracing output leaked the Bearer prefix: {captured}"
410 );
411 // The whole event should be gone — including the message.
412 assert!(
413 !captured.contains("outbound request"),
414 "expected event to be suppressed entirely; got: {captured}"
415 );
416 }
417
418 #[test]
419 fn integration_passes_non_sensitive_event_through() {
420 let writer = CapturingWriter::default();
421 let fmt_layer = tracing_subscriber::fmt::layer()
422 .with_writer(writer.clone())
423 .with_target(true)
424 .with_ansi(false) // strip color codes — assertions are substring-based
425 .with_filter(RedactedFilter::new());
426
427 let subscriber = tracing_subscriber::registry().with(fmt_layer);
428
429 tracing::subscriber::with_default(subscriber, || {
430 tracing::info!(
431 target: "cellos_export_http::client",
432 status = 200_u64,
433 url = "https://example.invalid/health",
434 duration_ms = 42_u64,
435 "request completed"
436 );
437 });
438
439 let captured = writer.snapshot();
440 assert!(
441 captured.contains("request completed"),
442 "non-sensitive event was suppressed unexpectedly: {captured}"
443 );
444 assert!(
445 captured.contains("status=200"),
446 "non-sensitive structured field missing: {captured}"
447 );
448 }
449
450 #[test]
451 fn integration_drops_reqwest_trace_target() {
452 let writer = CapturingWriter::default();
453 let fmt_layer = tracing_subscriber::fmt::layer()
454 .with_writer(writer.clone())
455 .with_target(true)
456 .with_filter(RedactedFilter::new());
457
458 // EnvFilter at TRACE so the runtime would normally let everything
459 // through; the redacted filter is the line of defense.
460 let env_filter = tracing_subscriber::EnvFilter::new("trace");
461 let subscriber = tracing_subscriber::registry()
462 .with(env_filter)
463 .with(fmt_layer);
464
465 tracing::subscriber::with_default(subscriber, || {
466 // Simulate reqwest's TRACE log: arbitrary message, no
467 // structured fields. The threat is the message body
468 // containing the header dump.
469 tracing::event!(
470 target: "reqwest::async_impl::client",
471 Level::TRACE,
472 "request headers: Authorization=Bearer test-bearer-xyzzy-not-real"
473 );
474 // Hyper TRACE event with header value embedded in message.
475 tracing::event!(
476 target: "hyper::proto::h1::role",
477 Level::TRACE,
478 "writing header: Authorization: Bearer test-bearer-xyzzy-not-real"
479 );
480 // h2 TRACE event.
481 tracing::event!(
482 target: "h2::proto::streams::send",
483 Level::TRACE,
484 "sending frame: Authorization=Bearer test-bearer-xyzzy-not-real"
485 );
486 // And one that should NOT be dropped: reqwest at DEBUG.
487 tracing::event!(
488 target: "reqwest::async_impl::client",
489 Level::DEBUG,
490 "request completed"
491 );
492 });
493
494 let captured = writer.snapshot();
495 assert!(
496 !captured.contains("xyzzy"),
497 "reqwest TRACE event leaked secret: {captured}"
498 );
499 assert!(
500 !captured.contains("Bearer"),
501 "reqwest TRACE event leaked Bearer prefix: {captured}"
502 );
503 assert!(
504 captured.contains("request completed"),
505 "expected reqwest DEBUG event to survive the filter: {captured}"
506 );
507 }
508
509 #[test]
510 fn every_sensitive_name_round_trips_through_the_lookup() {
511 // Sweep across the full constant. Catches typos and (importantly)
512 // catches any new entry that someone adds in uppercase / mixed case
513 // — the constant must stay lowercase for the case-insensitive
514 // compare to remain canonical.
515 for &name in SENSITIVE_HEADER_NAMES {
516 assert!(
517 is_sensitive_field_name(name),
518 "SENSITIVE_HEADER_NAMES entry {name:?} failed self-lookup"
519 );
520 // Round-trip uppercase to prove case-insensitivity.
521 let upper = name.to_ascii_uppercase();
522 assert!(
523 is_sensitive_field_name(&upper),
524 "SENSITIVE_HEADER_NAMES entry {name:?} failed uppercase lookup"
525 );
526 // Canonical form is lowercase ASCII — anchor it.
527 assert_eq!(
528 name,
529 name.to_ascii_lowercase(),
530 "SENSITIVE_HEADER_NAMES entry {name:?} is not lowercase"
531 );
532 }
533 }
534}