Skip to main content

tokio_process_tools/output_stream/line/
options.rs

1//! Configuration for the line parser: maximum line length and how to handle overflow.
2
3use crate::output_stream::num_bytes::NumBytes;
4use typed_builder::TypedBuilder;
5
6/// Default maximum line length used by [`LineParsingOptions::default`]. 16 kilobytes.
7pub const DEFAULT_MAX_LINE_LENGTH: NumBytes = NumBytes(16 * 1024);
8
9/// What should happen when a line is too long?
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
11pub enum LineOverflowBehavior {
12    /// Drop any additional data received after the current line was considered too long until
13    /// the next newline character is observed, which then starts a new line.
14    ///
15    /// The discard state persists across chunk boundaries. Once the limit is reached, subsequent
16    /// bytes are ignored until a real newline is observed.
17    #[default]
18    DropAdditionalData,
19
20    /// Emit the current line when the maximum allowed length is reached.
21    /// Any additional data received is immediately taken as the content of the next line.
22    ///
23    /// This option really just adds intermediate line breaks to not let any emitted line exceed the
24    /// length limit.
25    ///
26    /// No data is dropped with this behavior.
27    EmitAdditionalAsNewLines,
28}
29
30/// Configuration options for parsing lines from a stream.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, TypedBuilder)]
32pub struct LineParsingOptions {
33    /// Maximum length of a single line in bytes.
34    /// When reached, further data won't be appended to the current line.
35    /// The line will be emitted in its current state.
36    ///
37    /// **Must be greater than zero** for any line-consuming visitor built via
38    /// [`ParseLines`](crate::ParseLines) (and the backends' `wait_for_line` helpers).
39    /// Constructing such a consumer with `max_line_length = 0` panics. If you want effectively
40    /// unbounded line parsing, i.e. accept arbitrarily long lines from a trusted source, pass
41    /// [`NumBytes::MAX`] explicitly instead of zero. Remember that a malicious or misbehaving
42    /// stream that writes endless data without a line break would otherwise hold memory until
43    /// the process runs out: the explicit `MAX` makes that decision visible at the call site.
44    ///
45    /// Defaults to [`DEFAULT_MAX_LINE_LENGTH`].
46    pub max_line_length: NumBytes,
47
48    /// What should happen when a line is too long?
49    ///
50    /// When lossy buffering drops chunks before they reach the parser, line-based consumers
51    /// conservatively discard any partial line and resynchronizes at the next newline instead of
52    /// joining bytes across the gap.
53    ///
54    /// Defaults to `LineOverflowBehavior::DropAdditionalData`.
55    pub overflow_behavior: LineOverflowBehavior,
56
57    /// Optional cap on each parser's long-term-retained buffer capacity.
58    ///
59    /// The parser keeps two `BytesMut` buffers (the in-progress line and the most-recently emitted
60    /// line): each retains capacity for the parser's lifetime, growing to fit the largest line they
61    /// have ever held. For most workloads this is fine. The worst case is roughly
62    /// `2 × `[`max_line_length`](Self::max_line_length) memory used per parser.
63    ///
64    /// Set this to `Some(n)` when a stream has mostly small lines but occasional large outliers
65    /// (especially under [`NumBytes::MAX`] / "trusted unbounded" line parsing) and you want the
66    /// buffers to release their allocations after each outlier instead of staying at outlier-size
67    /// forever. At the start of each [`LineParser::next_line`](crate::LineParser::next_line) call,
68    /// any buffer whose capacity exceeds `n` is dropped and replaced with an empty buffer. The next
69    /// line re-grows it from zero.
70    ///
71    /// `None` (the default) preserves the "no compaction" behavior. Buffers stay at their largest
72    /// observed size. A sensible enabled value could be `1.5 × typical_line_size`. Setting it to
73    /// close to (or below) typical line sizes will trigger reallocation almost every line and slow
74    /// the parser unnecessarily. When your `max_line_length` is already small, you may ignore this
75    /// setting if max consumption is not an issue on your system.
76    ///
77    /// Compaction reduces the parser's *steady-state* memory after outliers; it does not change
78    /// the peak. Peak per-parser memory is roughly `2 ×
79    /// `[`max_line_length`](Self::max_line_length) regardless of this setting (the in-progress
80    /// line and the most-recently emitted line each retain capacity). Compaction is also
81    /// best-effort: a partially-buffered line that has not yet finished may briefly retain
82    /// over-threshold capacity until the line completes.
83    ///
84    /// Defaults to `None`.
85    pub buffer_compaction_threshold: Option<NumBytes>,
86}
87
88impl Default for LineParsingOptions {
89    fn default() -> Self {
90        Self {
91            max_line_length: DEFAULT_MAX_LINE_LENGTH,
92            overflow_behavior: LineOverflowBehavior::default(),
93            buffer_compaction_threshold: None,
94        }
95    }
96}
97
98/// Asserts that `options.max_line_length` is non-zero. Funneled through one location so the
99/// invariant is checked once per visitor at construction time, not on every chunk.
100pub(crate) fn assert_max_line_length_non_zero(options: &LineParsingOptions) {
101    assert!(
102        options.max_line_length.bytes() > 0,
103        "LineParsingOptions::max_line_length must be greater than zero. If you want effectively \
104        unbounded line parsing, pass `NumBytes::MAX` (or another large explicit value). Zero is \
105        never a valid configuration for line-consuming visitors."
106    );
107}