line_wrap/
lib.rs

1//! Efficiently insert line endings.
2//!
3//! If you have a buffer full of data and want to insert any sort of regularly-spaced separator,
4//! this will do it with a minimum of data copying. Commonly, this is to insert `\n` (see `lf()`) or `\r\n` (`crlf()`), but
5//! any byte sequence can be used.
6//!
7//! 1. Pick a line ending. For single byte separators, see `ByteLineEnding`, or for two bytes, `TwoByteLineEnding`. For
8//! arbitrary byte slices, use `SliceLineEnding`.
9//! 2. Call `line_wrap`.
10//! 3. Your data has been rearranged in place with the specified line ending inserted.
11//!
12//! # Examples
13//!
14//! ```
15//! use line_wrap::*;
16//! // suppose we have 80 bytes of data in a buffer and we want to wrap as per MIME.
17//! // Buffer is large enough to hold line endings.
18//! let mut data = vec![0; 82];
19//!
20//! assert_eq!(2, line_wrap(&mut data, 80, 76, &crlf()));
21//!
22//! // first line of zeroes
23//! let mut expected_data = vec![0; 76];
24//! // line ending
25//! expected_data.extend_from_slice(b"\r\n");
26//! // next line
27//! expected_data.extend_from_slice(&[0, 0, 0, 0]);
28//! assert_eq!(expected_data, data);
29//! ```
30//!
31//! # Performance
32//!
33//! On an i7 6850k:
34//!
35//! - 10 byte input, 1 byte line length takes ~60ns (~160MiB/s)
36//! - 100 byte input, 10 byte lines takes ~60ns (~1.6GiB/s)
37//!     - Startup costs dominate at these small lengths
38//! - 1,000 byte input, 100 byte lines takes ~65ns (~15GiB/s)
39//! - 10,000 byte input, 100 byte lines takes ~550ns (~17GiB/s)
40//! - In general, `SliceLineEncoding` is about 75% the speed of the fixed-length impls.
41//!
42//! Naturally, try `cargo +nightly bench` on your hardware to get more representative data.
43
44#![no_std]
45#![deny(unsafe_code, missing_docs)]
46
47use core::num::NonZeroUsize;
48
49/// Unix-style line ending.
50pub fn lf() -> ByteLineEnding {
51    ByteLineEnding::new(b'\n')
52}
53
54/// Windows-style line ending.
55pub fn crlf() -> TwoByteLineEnding {
56    TwoByteLineEnding::new(b'\r', b'\n')
57}
58
59/// Writes line endings.
60///
61/// The trait allows specialization for the common single and double byte cases, netting nice
62/// throughput improvements over simply using a slice for everything.
63pub trait LineEnding {
64    /// Write the line ending into the slice, which starts at the point where the ending should be written and is len() in length
65    fn write_ending(&self, slice: &mut [u8]);
66    /// The length of this particular line ending (must be constant)
67    fn len(&self) -> NonZeroUsize;
68}
69
70/// A single byte line ending.
71///
72/// See `lf()`.
73///
74/// # Examples
75///
76/// ```
77/// use line_wrap::*;
78///
79/// let ending = ByteLineEnding::new(b'\n');
80///
81/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255];
82///
83/// assert_eq!(2, line_wrap(&mut data[..], 6, 2, &ending));
84///
85/// assert_eq!(vec![1, 2, b'\n', 3, 4, b'\n', 5, 6], data);
86/// ```
87pub struct ByteLineEnding {
88    byte: u8,
89}
90
91impl ByteLineEnding {
92    /// Construct a new single byte line ending
93    pub fn new(byte: u8) -> ByteLineEnding {
94        ByteLineEnding { byte }
95    }
96}
97
98impl LineEnding for ByteLineEnding {
99    #[inline]
100    fn write_ending(&self, slice: &mut [u8]) {
101        slice[0] = self.byte;
102    }
103
104    #[inline]
105    fn len(&self) -> NonZeroUsize {
106        NonZeroUsize::new(1_usize).unwrap()
107    }
108}
109
110/// A double byte line ending.
111///
112/// See `crlf()`.
113///
114/// # Examples
115///
116/// ```
117/// use line_wrap::*;
118///
119/// let ending = TwoByteLineEnding::new(b'\r', b'\n');
120///
121/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255];
122///
123/// assert_eq!(4, line_wrap(&mut data[..], 6, 2, &ending));
124///
125/// assert_eq!(vec![1, 2, b'\r', b'\n', 3, 4, b'\r', b'\n', 5, 6], data);
126/// ```
127pub struct TwoByteLineEnding {
128    byte0: u8,
129    byte1: u8,
130}
131
132impl TwoByteLineEnding {
133    /// Construct a new two byte line ending
134    pub fn new(byte0: u8, byte1: u8) -> TwoByteLineEnding {
135        TwoByteLineEnding { byte0, byte1 }
136    }
137}
138
139impl LineEnding for TwoByteLineEnding {
140    #[inline]
141    fn write_ending(&self, slice: &mut [u8]) {
142        slice[0] = self.byte0;
143        slice[1] = self.byte1;
144    }
145
146    #[inline]
147    fn len(&self) -> NonZeroUsize {
148        NonZeroUsize::new(2_usize).unwrap()
149    }
150}
151
152/// A byte slice line ending.
153///
154/// Gives up some throughput compared to the specialized single/double byte impls, but works with
155/// any length.
156///
157/// # Examples
158///
159/// ```
160/// use line_wrap::*;
161///
162/// let ending = SliceLineEnding::new(b"xyz").expect("not empty");
163///
164/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255, 255, 255];
165///
166/// assert_eq!(6, line_wrap(&mut data[..], 6, 2, &ending));
167///
168/// assert_eq!(vec![1, 2, b'x', b'y', b'z', 3, 4, b'x', b'y', b'z', 5, 6], data);
169/// ```
170pub struct SliceLineEnding<'a> {
171    slice: &'a [u8],
172}
173
174impl<'a> SliceLineEnding<'a> {
175    /// Create a line ending from the slice, returning None if the slice is empty.
176    pub fn new(slice: &[u8]) -> Option<SliceLineEnding> {
177        if slice.is_empty() {
178            None
179        } else {
180            Some(SliceLineEnding { slice })
181        }
182    }
183}
184
185impl<'a> LineEnding for SliceLineEnding<'a> {
186    #[inline]
187    fn write_ending(&self, slice: &mut [u8]) {
188        slice.copy_from_slice(self.slice);
189    }
190
191    #[inline]
192    fn len(&self) -> NonZeroUsize {
193        NonZeroUsize::new(self.slice.len()).expect("Length already checked")
194    }
195}
196
197/// Insert line endings into the input.
198///
199/// Endings are inserted after each complete line, except the last line, even if the last line takes
200/// up the full line width.
201///
202/// - `buf` must be large enough to handle the increased size after endings are inserted. In other
203/// words, `buf.len() >= input_len / line_len * line_ending.len()`.
204/// - `input_len` is the length of the unwrapped  in `buf`.
205/// - `line_len` is the desired line width without line ending characters.
206///
207/// Returns the number of line ending bytes added.
208///
209/// # Panics
210///
211/// - When `buf` is too small to contain the original input and its new line endings
212pub fn line_wrap<L: LineEnding>(
213    buf: &mut [u8],
214    input_len: usize,
215    line_len: usize,
216    line_ending: &L,
217) -> usize {
218    if input_len <= line_len {
219        return 0;
220    }
221
222    let line_ending_len = line_ending.len();
223    let line_wrap_params = line_wrap_parameters(input_len, line_len, line_ending_len);
224
225    // ptr.offset() is undefined if it wraps, and there is no checked_offset(). However, because
226    // we perform this check up front to make sure we have enough capacity, we know that none of
227    // the subsequent pointer operations (assuming they implement the desired behavior of course!)
228    // will overflow.
229    assert!(
230        buf.len() >= line_wrap_params.total_len,
231        "Buffer must be able to hold encoded data after line wrapping"
232    );
233
234    // Move the last line, either partial or full, by itself as it does not have a line ending
235    // afterwards
236    let last_line_start = input_len
237        .checked_sub(line_wrap_params.last_line_len)
238        .expect("Last line start index underflow");
239    // last line starts immediately after all the wrapped full lines
240    let new_line_start = line_wrap_params.total_full_wrapped_lines_len;
241
242    buf.copy_within(
243        last_line_start..(last_line_start + line_wrap_params.last_line_len),
244        new_line_start,
245    );
246
247    let mut total_line_ending_bytes = 0;
248
249    // initialize so that the initial decrement will set them correctly
250    let mut old_line_start = last_line_start;
251    let mut new_line_start = line_wrap_params.total_full_wrapped_lines_len;
252
253    // handle the full lines
254    for _ in 0..line_wrap_params.lines_with_endings {
255        // the index after the end of the line ending we're about to write is the start of the next
256        // line
257        let end_of_line_ending = new_line_start;
258        let start_of_line_ending = end_of_line_ending
259            .checked_sub(line_ending_len.get())
260            .expect("Line ending start index underflow");
261
262        // doesn't underflow because it's decremented `line_wrap_params.lines_with_endings` times
263        old_line_start = old_line_start
264            .checked_sub(line_len)
265            .expect("Old line start index underflow");
266        new_line_start = new_line_start
267            .checked_sub(line_wrap_params.line_with_ending_len)
268            .expect("New line start index underflow");
269
270        buf.copy_within(old_line_start..old_line_start + line_len, new_line_start);
271
272        line_ending.write_ending(&mut buf[start_of_line_ending..end_of_line_ending]);
273        total_line_ending_bytes += line_ending_len.get();
274    }
275
276    assert_eq!(
277        line_wrap_params.total_line_endings_len,
278        total_line_ending_bytes
279    );
280
281    total_line_ending_bytes
282}
283
284#[derive(Debug, PartialEq)]
285struct LineWrapParameters {
286    line_with_ending_len: usize,
287    // number of lines that need an ending
288    lines_with_endings: usize,
289    // length of last line (which never needs an ending)
290    last_line_len: usize,
291    // length of lines that need an ending (which are always full lines), with their endings
292    total_full_wrapped_lines_len: usize,
293    // length of all lines, including endings for the ones that need them
294    total_len: usize,
295    // length of the line endings only
296    total_line_endings_len: usize,
297}
298
299/// Calculations about how many lines we'll get for a given line length, line ending, etc.
300/// This assumes that the last line will not get an ending, even if it is the full line length.
301// Inlining improves short input single-byte by 25%.
302#[inline]
303fn line_wrap_parameters(
304    input_len: usize,
305    line_len: usize,
306    line_ending_len: NonZeroUsize,
307) -> LineWrapParameters {
308    let line_with_ending_len = line_len
309        .checked_add(line_ending_len.get())
310        .expect("Line length with ending exceeds usize");
311
312    if input_len <= line_len {
313        // no wrapping needed
314        return LineWrapParameters {
315            line_with_ending_len,
316            lines_with_endings: 0,
317            last_line_len: input_len,
318            total_full_wrapped_lines_len: 0,
319            total_len: input_len,
320            total_line_endings_len: 0,
321        };
322    };
323
324    // lines_with_endings > 0, last_line_len > 0
325    let (lines_with_endings, last_line_len) = if input_len % line_len > 0 {
326        // Every full line has an ending since there is a partial line at the end
327        (input_len / line_len, input_len % line_len)
328    } else {
329        // Every line is a full line, but no trailing ending.
330        // Subtraction will not underflow since we know input_len > line_len.
331        (input_len / line_len - 1, line_len)
332    };
333
334    // Should we expose exceeding usize via Result to be kind to 16-bit users? Or is that
335    // always going to be a panic anyway in practice?
336
337    // length of just the full lines with line endings
338    let total_full_wrapped_lines_len = lines_with_endings
339        .checked_mul(line_with_ending_len)
340        .expect("Full lines with endings length exceeds usize");
341    // all lines with appropriate endings, including the last line
342    let total_len = total_full_wrapped_lines_len
343        .checked_add(last_line_len)
344        .expect("All lines with endings length exceeds usize");
345    let total_line_endings_len = lines_with_endings
346        .checked_mul(line_ending_len.get())
347        .expect("Total line endings length exceeds usize");
348
349    LineWrapParameters {
350        line_with_ending_len,
351        lines_with_endings,
352        last_line_len,
353        total_full_wrapped_lines_len,
354        total_len,
355        total_line_endings_len,
356    }
357}
358
359#[cfg(test)]
360mod tests;