tokio_process_tools/output_stream/line/options.rs
1//! Configuration for the line parser: maximum line length and how to handle overflow.
2
3use crate::output_stream::num_bytes::NumBytes;
4use typed_builder::TypedBuilder;
5
6/// Default maximum line length used by [`LineParsingOptions::default`]. 16 kilobytes.
7pub const DEFAULT_MAX_LINE_LENGTH: NumBytes = NumBytes(16 * 1024);
8
9/// What should happen when a line is too long?
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
11pub enum LineOverflowBehavior {
12 /// Drop any additional data received after the current line was considered too long until
13 /// the next newline character is observed, which then starts a new line.
14 ///
15 /// The discard state persists across chunk boundaries. Once the limit is reached, subsequent
16 /// bytes are ignored until a real newline is observed.
17 #[default]
18 DropAdditionalData,
19
20 /// Emit the current line when the maximum allowed length is reached.
21 /// Any additional data received is immediately taken as the content of the next line.
22 ///
23 /// This option really just adds intermediate line breaks to not let any emitted line exceed the
24 /// length limit.
25 ///
26 /// No data is dropped with this behavior.
27 EmitAdditionalAsNewLines,
28}
29
30/// Configuration options for parsing lines from a stream.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, TypedBuilder)]
32pub struct LineParsingOptions {
33 /// Maximum length of a single line in bytes.
34 /// When reached, further data won't be appended to the current line.
35 /// The line will be emitted in its current state.
36 ///
37 /// **Must be greater than zero** for any line-consuming visitor (`inspect_lines`,
38 /// `collect_lines`, `wait_for_line`, `collect_lines_into_write`, `collect_lines_into_vec`).
39 /// Constructing such a consumer with `max_line_length = 0` panics. If you want effectively
40 /// unbounded line parsing — i.e. accept arbitrarily long lines from a trusted source —
41 /// pass [`NumBytes::MAX`] explicitly instead of zero. Remember that a malicious or
42 /// misbehaving stream that writes endless data without a line break would otherwise hold
43 /// memory until the process runs out: the explicit `MAX` makes that decision visible at
44 /// the call site.
45 ///
46 /// Defaults to [`DEFAULT_MAX_LINE_LENGTH`].
47 pub max_line_length: NumBytes,
48
49 /// What should happen when a line is too long?
50 ///
51 /// When lossy buffering drops chunks before they reach the parser, line-based consumers
52 /// conservatively discard any partial line and resynchronizes at the next newline instead of
53 /// joining bytes across the gap.
54 ///
55 /// Defaults to `LineOverflowBehavior::DropAdditionalData`.
56 pub overflow_behavior: LineOverflowBehavior,
57
58 /// Optional cap on each parser's long-term-retained buffer capacity.
59 ///
60 /// The parser keeps two `BytesMut` buffers (the in-progress line and the most-recently emitted
61 /// line): each retains capacity for the parser's lifetime, growing to fit the largest line they
62 /// have ever held. For most workloads this is fine. The worst case is roughly
63 /// `2 × `[`max_line_length`](Self::max_line_length) memory used per parser.
64 ///
65 /// Set this to `Some(n)` when a stream has mostly small lines but occasional large outliers
66 /// (especially under [`NumBytes::MAX`] / "trusted unbounded" line parsing) and you want the
67 /// buffers to release their allocations after each outlier instead of staying at outlier-size
68 /// forever. At the start of each [`LineParser::next_line`](crate::LineParser::next_line) call,
69 /// any buffer whose capacity exceeds `n` is dropped and replaced with an empty buffer. The next
70 /// line re-grows it from zero.
71 ///
72 /// `None` (the default) preserves the "no compaction" behavior. Buffers stay at their largest
73 /// observed size. A sensible enabled value could be `1.5 × typical_line_size`. Setting it to
74 /// close to (or below) typical line sizes will trigger reallocation almost every line and slow
75 /// the parser unnecessarily. When your `max_line_length` is already small, you may ignore this
76 /// setting if max consumption is not an issue on your system.
77 ///
78 /// Compaction reduces the parser's *steady-state* memory after outliers; it does not change
79 /// the peak. Peak memory is bounded by [`max_line_length`](Self::max_line_length) regardless
80 /// of this setting. Compaction is also best-effort: a partially-buffered line that has not
81 /// yet finished may briefly retain over-threshold capacity until the line completes.
82 ///
83 /// Defaults to `None`.
84 pub buffer_compaction_threshold: Option<NumBytes>,
85}
86
87impl Default for LineParsingOptions {
88 fn default() -> Self {
89 Self {
90 max_line_length: DEFAULT_MAX_LINE_LENGTH,
91 overflow_behavior: LineOverflowBehavior::default(),
92 buffer_compaction_threshold: None,
93 }
94 }
95}
96
97/// Asserts that `options.max_line_length` is non-zero. Funneled through one location so the
98/// invariant is checked once per visitor at construction time, not on every chunk.
99pub(crate) fn assert_max_line_length_non_zero(options: &LineParsingOptions) {
100 assert!(
101 options.max_line_length.bytes() > 0,
102 "LineParsingOptions::max_line_length must be greater than zero. \
103 If you want effectively unbounded line parsing, pass `NumBytes::MAX` (or another \
104 large explicit value) — zero is never a valid configuration for line-consuming \
105 visitors."
106 );
107}