Skip to main content

tokio_process_tools/output_stream/line/
options.rs

1//! Configuration for the line parser: maximum line length and how to handle overflow.
2
3use crate::output_stream::num_bytes::NumBytes;
4use typed_builder::TypedBuilder;
5
6/// Default maximum line length used by [`LineParsingOptions::default`]. 16 kilobytes.
7pub const DEFAULT_MAX_LINE_LENGTH: NumBytes = NumBytes(16 * 1024);
8
9/// What should happen when a line is too long?
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
11pub enum LineOverflowBehavior {
12    /// Drop any additional data received after the current line was considered too long until
13    /// the next newline character is observed, which then starts a new line.
14    ///
15    /// The discard state persists across chunk boundaries. Once the limit is reached, subsequent
16    /// bytes are ignored until a real newline is observed.
17    #[default]
18    DropAdditionalData,
19
20    /// Emit the current line when the maximum allowed length is reached.
21    /// Any additional data received is immediately taken as the content of the next line.
22    ///
23    /// This option really just adds intermediate line breaks to not let any emitted line exceed the
24    /// length limit.
25    ///
26    /// No data is dropped with this behavior.
27    EmitAdditionalAsNewLines,
28}
29
30/// Configuration options for parsing lines from a stream.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, TypedBuilder)]
32pub struct LineParsingOptions {
33    /// Maximum length of a single line in bytes.
34    /// When reached, further data won't be appended to the current line.
35    /// The line will be emitted in its current state.
36    ///
37    /// **Must be greater than zero** for any line-consuming visitor (`inspect_lines`,
38    /// `collect_lines`, `wait_for_line`, `collect_lines_into_write`, `collect_lines_into_vec`).
39    /// Constructing such a consumer with `max_line_length = 0` panics. If you want effectively
40    /// unbounded line parsing — i.e. accept arbitrarily long lines from a trusted source —
41    /// pass [`NumBytes::MAX`] explicitly instead of zero. Remember that a malicious or
42    /// misbehaving stream that writes endless data without a line break would otherwise hold
43    /// memory until the process runs out: the explicit `MAX` makes that decision visible at
44    /// the call site.
45    ///
46    /// Defaults to [`DEFAULT_MAX_LINE_LENGTH`].
47    pub max_line_length: NumBytes,
48
49    /// What should happen when a line is too long?
50    ///
51    /// When lossy buffering drops chunks before they reach the parser, line-based consumers
52    /// conservatively discard any partial line and resynchronizes at the next newline instead of
53    /// joining bytes across the gap.
54    ///
55    /// Defaults to `LineOverflowBehavior::DropAdditionalData`.
56    pub overflow_behavior: LineOverflowBehavior,
57
58    /// Optional cap on each parser's long-term-retained buffer capacity.
59    ///
60    /// The parser keeps two `BytesMut` buffers (the in-progress line and the most-recently emitted
61    /// line): each retains capacity for the parser's lifetime, growing to fit the largest line they
62    /// have ever held. For most workloads this is fine. The worst case is roughly
63    /// `2 × `[`max_line_length`](Self::max_line_length) memory used per parser.
64    ///
65    /// Set this to `Some(n)` when a stream has mostly small lines but occasional large outliers
66    /// (especially under [`NumBytes::MAX`] / "trusted unbounded" line parsing) and you want the
67    /// buffers to release their allocations after each outlier instead of staying at outlier-size
68    /// forever. At the start of each [`LineParser::next_line`](crate::LineParser::next_line) call,
69    /// any buffer whose capacity exceeds `n` is dropped and replaced with an empty buffer. The next
70    /// line re-grows it from zero.
71    ///
72    /// `None` (the default) preserves the "no compaction" behavior. Buffers stay at their largest
73    /// observed size. A sensible enabled value could be `1.5 × typical_line_size`. Setting it to
74    /// close to (or below) typical line sizes will trigger reallocation almost every line and slow
75    /// the parser unnecessarily. When your `max_line_length` is already small, you may ignore this
76    /// setting if max consumption is not an issue on your system.
77    ///
78    /// Compaction reduces the parser's *steady-state* memory after outliers; it does not change
79    /// the peak. Peak memory is bounded by [`max_line_length`](Self::max_line_length) regardless
80    /// of this setting. Compaction is also best-effort: a partially-buffered line that has not
81    /// yet finished may briefly retain over-threshold capacity until the line completes.
82    ///
83    /// Defaults to `None`.
84    pub buffer_compaction_threshold: Option<NumBytes>,
85}
86
87impl Default for LineParsingOptions {
88    fn default() -> Self {
89        Self {
90            max_line_length: DEFAULT_MAX_LINE_LENGTH,
91            overflow_behavior: LineOverflowBehavior::default(),
92            buffer_compaction_threshold: None,
93        }
94    }
95}
96
97/// Asserts that `options.max_line_length` is non-zero. Funneled through one location so the
98/// invariant is checked once per visitor at construction time, not on every chunk.
99pub(crate) fn assert_max_line_length_non_zero(options: &LineParsingOptions) {
100    assert!(
101        options.max_line_length.bytes() > 0,
102        "LineParsingOptions::max_line_length must be greater than zero. \
103         If you want effectively unbounded line parsing, pass `NumBytes::MAX` (or another \
104         large explicit value) — zero is never a valid configuration for line-consuming \
105         visitors."
106    );
107}