Skip to main content

tailtriage_core/
config.rs

1use serde::{Deserialize, Serialize};
2use std::path::Path;
3use std::sync::Arc;
4
5use crate::{LocalJsonSink, RunSink};
6
7/// Capture mode used during a run.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum CaptureMode {
11    /// Lower-runtime-cost mode for core-only capture categories.
12    ///
13    /// Core-owned defaults in this mode are:
14    ///
15    /// - `max_requests = 100_000`
16    /// - `max_stages = 200_000`
17    /// - `max_queues = 200_000`
18    /// - `max_inflight_snapshots = 200_000`
19    /// - `max_runtime_snapshots = 100_000`
20    Light,
21    /// Higher-retention mode for incident investigation.
22    ///
23    /// Core-owned defaults in this mode are:
24    ///
25    /// - `max_requests = 300_000`
26    /// - `max_stages = 600_000`
27    /// - `max_queues = 600_000`
28    /// - `max_inflight_snapshots = 600_000`
29    /// - `max_runtime_snapshots = 300_000`
30    Investigation,
31}
32
33impl CaptureMode {
34    /// Returns core-owned default capture limits for this mode.
35    ///
36    /// These mode defaults only affect retention limits in `tailtriage-core`.
37    /// They do not change event types or request lifecycle semantics, and they
38    /// do not auto-start Tokio runtime sampling.
39    #[must_use]
40    pub const fn core_defaults(self) -> CaptureLimits {
41        match self {
42            Self::Light => CaptureLimits {
43                max_requests: 100_000,
44                max_stages: 200_000,
45                max_queues: 200_000,
46                max_inflight_snapshots: 200_000,
47                // Runtime snapshot defaults are carried in core artifacts for schema
48                // consistency and are used by integration crates as needed.
49                max_runtime_snapshots: 100_000,
50            },
51            Self::Investigation => CaptureLimits {
52                max_requests: 300_000,
53                max_stages: 600_000,
54                max_queues: 600_000,
55                max_inflight_snapshots: 600_000,
56                max_runtime_snapshots: 300_000,
57            },
58        }
59    }
60}
61
62/// Limits that bound in-memory capture growth for one run.
63///
64/// Limits apply to retained in-memory data while capture is active. When a
65/// section reaches its cap, additional entries are dropped and truncation
66/// counters are updated.
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
68pub struct CaptureLimits {
69    /// Maximum number of request events retained in-memory for the run.
70    pub max_requests: usize,
71    /// Maximum number of stage events retained in-memory for the run.
72    pub max_stages: usize,
73    /// Maximum number of queue events retained in-memory for the run.
74    pub max_queues: usize,
75    /// Maximum number of in-flight snapshots retained in-memory for the run.
76    pub max_inflight_snapshots: usize,
77    /// Maximum number of runtime snapshots retained in-memory for the run.
78    pub max_runtime_snapshots: usize,
79}
80
81impl Default for CaptureLimits {
82    fn default() -> Self {
83        CaptureMode::Light.core_defaults()
84    }
85}
86
87/// Field-level capture limit overrides applied on top of mode defaults.
88///
89/// This additive API preserves [`TailtriageBuilder::capture_limits`] as a
90/// full-override path.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
92pub struct CaptureLimitsOverride {
93    /// Optional override for [`CaptureLimits::max_requests`].
94    pub max_requests: Option<usize>,
95    /// Optional override for [`CaptureLimits::max_stages`].
96    pub max_stages: Option<usize>,
97    /// Optional override for [`CaptureLimits::max_queues`].
98    pub max_queues: Option<usize>,
99    /// Optional override for [`CaptureLimits::max_inflight_snapshots`].
100    pub max_inflight_snapshots: Option<usize>,
101    /// Optional override for [`CaptureLimits::max_runtime_snapshots`].
102    pub max_runtime_snapshots: Option<usize>,
103}
104
105impl CaptureLimitsOverride {
106    /// Applies this override to an existing limit set and returns the result.
107    #[must_use]
108    pub const fn apply(self, base: CaptureLimits) -> CaptureLimits {
109        CaptureLimits {
110            max_requests: match self.max_requests {
111                Some(value) => value,
112                None => base.max_requests,
113            },
114            max_stages: match self.max_stages {
115                Some(value) => value,
116                None => base.max_stages,
117            },
118            max_queues: match self.max_queues {
119                Some(value) => value,
120                None => base.max_queues,
121            },
122            max_inflight_snapshots: match self.max_inflight_snapshots {
123                Some(value) => value,
124                None => base.max_inflight_snapshots,
125            },
126            max_runtime_snapshots: match self.max_runtime_snapshots {
127                Some(value) => value,
128                None => base.max_runtime_snapshots,
129            },
130        }
131    }
132
133    const fn merge(self, newer: Self) -> Self {
134        Self {
135            max_requests: match newer.max_requests {
136                Some(value) => Some(value),
137                None => self.max_requests,
138            },
139            max_stages: match newer.max_stages {
140                Some(value) => Some(value),
141                None => self.max_stages,
142            },
143            max_queues: match newer.max_queues {
144                Some(value) => Some(value),
145                None => self.max_queues,
146            },
147            max_inflight_snapshots: match newer.max_inflight_snapshots {
148                Some(value) => Some(value),
149                None => self.max_inflight_snapshots,
150            },
151            max_runtime_snapshots: match newer.max_runtime_snapshots {
152                Some(value) => Some(value),
153                None => self.max_runtime_snapshots,
154            },
155        }
156    }
157}
158
159/// Stable, resolved core configuration used by one capture run.
160#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
161pub struct EffectiveCoreConfig {
162    /// Selected capture mode.
163    pub mode: CaptureMode,
164    /// Effective resolved retention limits used for this run.
165    pub capture_limits: CaptureLimits,
166    /// Effective strict lifecycle behavior for this run.
167    pub strict_lifecycle: bool,
168}
169
170#[derive(Clone)]
171pub(crate) struct Config {
172    pub service_name: String,
173    pub service_version: Option<String>,
174    pub run_id: Option<String>,
175    pub mode: CaptureMode,
176    pub sink: Arc<dyn RunSink + Send + Sync>,
177    pub effective_core: EffectiveCoreConfig,
178    pub strict_lifecycle: bool,
179}
180
181impl Config {
182    pub(crate) fn from_builder(builder: &TailtriageBuilder) -> Self {
183        let mode_defaults = builder.mode.core_defaults();
184        let effective_limits = match builder.capture_limits {
185            Some(full_override) => full_override,
186            None => builder.capture_limits_override.apply(mode_defaults),
187        };
188        let effective_core = EffectiveCoreConfig {
189            mode: builder.mode,
190            capture_limits: effective_limits,
191            strict_lifecycle: builder.strict_lifecycle,
192        };
193
194        Self {
195            service_name: builder.service_name.clone(),
196            service_version: builder.service_version.clone(),
197            run_id: builder.run_id.clone(),
198            mode: builder.mode,
199            sink: Arc::clone(&builder.sink),
200            effective_core,
201            strict_lifecycle: builder.strict_lifecycle,
202        }
203    }
204}
205
206/// Errors emitted while building one tailtriage capture instance.
207#[derive(Debug, Clone, PartialEq, Eq)]
208pub enum BuildError {
209    /// Service name was empty.
210    EmptyServiceName,
211}
212
213impl std::fmt::Display for BuildError {
214    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215        match self {
216            Self::EmptyServiceName => write!(f, "service_name cannot be empty"),
217        }
218    }
219}
220
221impl std::error::Error for BuildError {}
222
223/// Builder for constructing a [`crate::Tailtriage`] run.
224#[derive(Clone)]
225pub struct TailtriageBuilder {
226    pub(crate) service_name: String,
227    pub(crate) service_version: Option<String>,
228    pub(crate) run_id: Option<String>,
229    pub(crate) mode: CaptureMode,
230    pub(crate) sink: Arc<dyn RunSink + Send + Sync>,
231    pub(crate) capture_limits: Option<CaptureLimits>,
232    pub(crate) capture_limits_override: CaptureLimitsOverride,
233    pub(crate) strict_lifecycle: bool,
234}
235
236impl TailtriageBuilder {
237    pub(crate) fn new(service_name: impl Into<String>) -> Self {
238        Self {
239            service_name: service_name.into(),
240            service_version: None,
241            run_id: None,
242            mode: CaptureMode::Light,
243            sink: Arc::new(LocalJsonSink::new("tailtriage-run.json")),
244            capture_limits: None,
245            capture_limits_override: CaptureLimitsOverride::default(),
246            strict_lifecycle: false,
247        }
248    }
249
250    /// Sets capture mode to [`CaptureMode::Light`].
251    ///
252    /// Light mode is the default and favors lower runtime cost in core-only capture categories with enough signal for first-pass triage.
253    #[must_use]
254    pub fn light(mut self) -> Self {
255        self.mode = CaptureMode::Light;
256        self
257    }
258
259    /// Sets capture mode to [`CaptureMode::Investigation`].
260    ///
261    /// Use this mode when you need more detailed evidence during an incident.
262    #[must_use]
263    pub fn investigation(mut self) -> Self {
264        self.mode = CaptureMode::Investigation;
265        self
266    }
267
268    /// Writes run output to a local JSON file sink at `output_path`.
269    ///
270    /// The default output path is `tailtriage-run.json`.
271    #[must_use]
272    pub fn output(mut self, output_path: impl AsRef<Path>) -> Self {
273        self.sink = Arc::new(LocalJsonSink::new(output_path));
274        self
275    }
276
277    /// Uses a custom run sink implementation.
278    #[must_use]
279    pub fn sink<S>(mut self, sink: S) -> Self
280    where
281        S: RunSink + Send + Sync + 'static,
282    {
283        self.sink = Arc::new(sink);
284        self
285    }
286
287    /// Sets an optional service version recorded in run metadata.
288    #[must_use]
289    pub fn service_version(mut self, service_version: impl Into<String>) -> Self {
290        self.service_version = Some(service_version.into());
291        self
292    }
293
294    /// Sets an explicit run identifier for metadata.
295    ///
296    /// If not set, `tailtriage` generates a run ID automatically.
297    #[must_use]
298    pub fn run_id(mut self, run_id: impl Into<String>) -> Self {
299        self.run_id = Some(run_id.into());
300        self
301    }
302
303    /// Overrides default capture limits for bounded in-memory collection.
304    #[must_use]
305    pub fn capture_limits(mut self, limits: CaptureLimits) -> Self {
306        self.capture_limits = Some(limits);
307        self
308    }
309
310    /// Applies field-level capture limit overrides on top of mode defaults.
311    ///
312    /// This additive override path does not change full-override behavior from
313    /// [`Self::capture_limits`]. If both are provided, `capture_limits(...)`
314    /// remains authoritative.
315    #[must_use]
316    pub fn capture_limits_override(mut self, overrides: CaptureLimitsOverride) -> Self {
317        self.capture_limits_override = self.capture_limits_override.merge(overrides);
318        self
319    }
320
321    /// Enables strict lifecycle validation on shutdown.
322    ///
323    /// When enabled, [`crate::Tailtriage::shutdown`] returns an error if unfinished
324    /// requests remain pending.
325    #[must_use]
326    pub fn strict_lifecycle(mut self, strict_lifecycle: bool) -> Self {
327        self.strict_lifecycle = strict_lifecycle;
328        self
329    }
330
331    /// Builds one [`crate::Tailtriage`] collector for the configured service.
332    ///
333    /// # Errors
334    ///
335    /// Returns [`BuildError::EmptyServiceName`] when the configured service name is blank.
336    pub fn build(self) -> Result<crate::Tailtriage, BuildError> {
337        crate::Tailtriage::from_config(Config::from_builder(&self))
338    }
339}
340
341/// Optional request start settings used by [`crate::Tailtriage::begin_request_with`].
342///
343/// When `request_id` is not provided, a request ID is generated automatically.
344///
345/// `RequestOptions` configures start metadata only. It does not change request
346/// completion semantics: each request must still be finished exactly once.
347#[derive(Debug, Clone, PartialEq, Eq, Default)]
348pub struct RequestOptions {
349    /// Optional caller-provided request ID used for request correlation.
350    pub request_id: Option<String>,
351    /// Optional semantic request kind (for example `http` or `job`).
352    pub kind: Option<String>,
353}
354
355impl RequestOptions {
356    /// Creates default request options with autogenerated request IDs.
357    #[must_use]
358    pub fn new() -> Self {
359        Self::default()
360    }
361
362    /// Sets an explicit request ID for the next request context.
363    #[must_use]
364    pub fn request_id(mut self, request_id: impl Into<String>) -> Self {
365        self.request_id = Some(request_id.into());
366        self
367    }
368
369    /// Sets an optional semantic kind recorded on completion.
370    #[must_use]
371    pub fn kind(mut self, kind: impl Into<String>) -> Self {
372        self.kind = Some(kind.into());
373        self
374    }
375}