tokio_process_tools/output_stream/line/options.rs
1//! Configuration for the line parser: maximum line length and how to handle overflow.
2
3use crate::output_stream::num_bytes::NumBytes;
4use typed_builder::TypedBuilder;
5
6/// Default maximum line length used by [`LineParsingOptions::default`]. 16 kilobytes.
7pub const DEFAULT_MAX_LINE_LENGTH: NumBytes = NumBytes(16 * 1024);
8
9/// What should happen when a line is too long?
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
11pub enum LineOverflowBehavior {
12 /// Drop any additional data received after the current line was considered too long until
13 /// the next newline character is observed, which then starts a new line.
14 ///
15 /// The discard state persists across chunk boundaries. Once the limit is reached, subsequent
16 /// bytes are ignored until a real newline is observed.
17 #[default]
18 DropAdditionalData,
19
20 /// Emit the current line when the maximum allowed length is reached.
21 /// Any additional data received is immediately taken as the content of the next line.
22 ///
23 /// This option really just adds intermediate line breaks to not let any emitted line exceed the
24 /// length limit.
25 ///
26 /// No data is dropped with this behavior.
27 EmitAdditionalAsNewLines,
28}
29
30/// Configuration options for parsing lines from a stream.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, TypedBuilder)]
32pub struct LineParsingOptions {
33 /// Maximum length of a single line in bytes.
34 /// When reached, further data won't be appended to the current line.
35 /// The line will be emitted in its current state.
36 ///
37 /// **Must be greater than zero** for any line-consuming visitor built via
38 /// [`ParseLines`](crate::ParseLines) (and the backends' `wait_for_line` helpers).
39 /// Constructing such a consumer with `max_line_length = 0` panics. If you want effectively
40 /// unbounded line parsing, i.e. accept arbitrarily long lines from a trusted source, pass
41 /// [`NumBytes::MAX`] explicitly instead of zero. Remember that a malicious or misbehaving
42 /// stream that writes endless data without a line break would otherwise hold memory until
43 /// the process runs out: the explicit `MAX` makes that decision visible at the call site.
44 ///
45 /// Defaults to [`DEFAULT_MAX_LINE_LENGTH`].
46 pub max_line_length: NumBytes,
47
48 /// What should happen when a line is too long?
49 ///
50 /// When lossy buffering drops chunks before they reach the parser, line-based consumers
51 /// conservatively discard any partial line and resynchronizes at the next newline instead of
52 /// joining bytes across the gap.
53 ///
54 /// Defaults to `LineOverflowBehavior::DropAdditionalData`.
55 pub overflow_behavior: LineOverflowBehavior,
56
57 /// Optional cap on each parser's long-term-retained buffer capacity.
58 ///
59 /// The parser keeps two `BytesMut` buffers (the in-progress line and the most-recently emitted
60 /// line): each retains capacity for the parser's lifetime, growing to fit the largest line they
61 /// have ever held. For most workloads this is fine. The worst case is roughly
62 /// `2 × `[`max_line_length`](Self::max_line_length) memory used per parser.
63 ///
64 /// Set this to `Some(n)` when a stream has mostly small lines but occasional large outliers
65 /// (especially under [`NumBytes::MAX`] / "trusted unbounded" line parsing) and you want the
66 /// buffers to release their allocations after each outlier instead of staying at outlier-size
67 /// forever. At the start of each [`LineParser::next_line`](crate::LineParser::next_line) call,
68 /// any buffer whose capacity exceeds `n` is dropped and replaced with an empty buffer. The next
69 /// line re-grows it from zero.
70 ///
71 /// `None` (the default) preserves the "no compaction" behavior. Buffers stay at their largest
72 /// observed size. A sensible enabled value could be `1.5 × typical_line_size`. Setting it to
73 /// close to (or below) typical line sizes will trigger reallocation almost every line and slow
74 /// the parser unnecessarily. When your `max_line_length` is already small, you may ignore this
75 /// setting if max consumption is not an issue on your system.
76 ///
77 /// Compaction reduces the parser's *steady-state* memory after outliers; it does not change
78 /// the peak. Peak per-parser memory is roughly `2 ×
79 /// `[`max_line_length`](Self::max_line_length) regardless of this setting (the in-progress
80 /// line and the most-recently emitted line each retain capacity). Compaction is also
81 /// best-effort: a partially-buffered line that has not yet finished may briefly retain
82 /// over-threshold capacity until the line completes.
83 ///
84 /// Defaults to `None`.
85 pub buffer_compaction_threshold: Option<NumBytes>,
86}
87
88impl Default for LineParsingOptions {
89 fn default() -> Self {
90 Self {
91 max_line_length: DEFAULT_MAX_LINE_LENGTH,
92 overflow_behavior: LineOverflowBehavior::default(),
93 buffer_compaction_threshold: None,
94 }
95 }
96}
97
98/// Asserts that `options.max_line_length` is non-zero. Funneled through one location so the
99/// invariant is checked once per visitor at construction time, not on every chunk.
100pub(crate) fn assert_max_line_length_non_zero(options: &LineParsingOptions) {
101 assert!(
102 options.max_line_length.bytes() > 0,
103 "LineParsingOptions::max_line_length must be greater than zero. If you want effectively \
104 unbounded line parsing, pass `NumBytes::MAX` (or another large explicit value). Zero is \
105 never a valid configuration for line-consuming visitors."
106 );
107}