coreutils_rs/fold/
core.rs

1use std::io::Write;
2
3/// Fold (wrap) lines to a given width.
4///
5/// Modes:
6/// - `bytes` mode (-b): count bytes, break at byte boundaries
7/// - default mode: count columns (tab = advance to next tab stop, backspace = decrement)
8///
9/// If `spaces` (-s): break at the last space within the width instead of mid-word.
10pub fn fold_bytes(
11    data: &[u8],
12    width: usize,
13    count_bytes: bool,
14    break_at_spaces: bool,
15    out: &mut impl Write,
16) -> std::io::Result<()> {
17    if data.is_empty() {
18        return Ok(());
19    }
20
21    if width == 0 {
22        return fold_width_zero(data, out);
23    }
24
25    // Fast path: byte mode, use SIMD-accelerated scanning
26    if count_bytes {
27        if break_at_spaces {
28            return fold_byte_fast_spaces(data, width, out);
29        } else {
30            return fold_byte_fast(data, width, out);
31        }
32    }
33
34    let mut output = Vec::with_capacity(data.len() + data.len() / width);
35    fold_column_mode(data, width, break_at_spaces, &mut output);
36    out.write_all(&output)
37}
38
39/// Width 0: GNU fold behavior — each byte becomes a newline.
40fn fold_width_zero(data: &[u8], out: &mut impl Write) -> std::io::Result<()> {
41    let output = vec![b'\n'; data.len()];
42    out.write_all(&output)
43}
44
45/// Fast fold by byte count without -s flag.
46/// Uses memchr to find newlines, bulk-copies runs, inserts breaks at exact positions.
47fn fold_byte_fast(data: &[u8], width: usize, out: &mut impl Write) -> std::io::Result<()> {
48    // Each line can have at most one extra newline inserted
49    let mut output = Vec::with_capacity(data.len() + data.len() / width + 1);
50    let mut pos: usize = 0;
51
52    while pos < data.len() {
53        // Find the next newline within the remaining data
54        let remaining = &data[pos..];
55
56        match memchr::memchr(b'\n', remaining) {
57            Some(nl_offset) => {
58                // Process the segment up to (and including) the newline
59                let segment = &data[pos..pos + nl_offset + 1];
60                fold_segment_bytes(&mut output, segment, width);
61                pos += nl_offset + 1;
62            }
63            None => {
64                // No more newlines: process the rest
65                fold_segment_bytes(&mut output, &data[pos..], width);
66                break;
67            }
68        }
69    }
70
71    out.write_all(&output)
72}
73
74/// Fast fold by byte count with -s (break at spaces).
75/// Uses memchr to find newlines and memrchr to find last space in each chunk.
76fn fold_byte_fast_spaces(data: &[u8], width: usize, out: &mut impl Write) -> std::io::Result<()> {
77    let mut output = Vec::with_capacity(data.len() + data.len() / width + 1);
78    let mut pos: usize = 0;
79
80    while pos < data.len() {
81        let remaining = &data[pos..];
82
83        match memchr::memchr(b'\n', remaining) {
84            Some(nl_offset) => {
85                let segment = &data[pos..pos + nl_offset + 1];
86                fold_segment_bytes_spaces(&mut output, segment, width);
87                pos += nl_offset + 1;
88            }
89            None => {
90                fold_segment_bytes_spaces(&mut output, &data[pos..], width);
91                break;
92            }
93        }
94    }
95
96    out.write_all(&output)
97}
98
99/// Fold a single line segment by bytes with -s (break at spaces).
100///
101/// # Invariant
102/// `segment` must contain at most one `\n`, and only as its final byte.
103#[inline]
104fn fold_segment_bytes_spaces(output: &mut Vec<u8>, segment: &[u8], width: usize) {
105    debug_assert!(
106        !segment[..segment.len().saturating_sub(1)].contains(&b'\n'),
107        "fold_segment_bytes_spaces: invariant violated — internal newline in segment"
108    );
109    let mut start = 0;
110    while start + width < segment.len() {
111        // SAFETY: loop guard ensures start + width < segment.len()
112        if segment[start + width] == b'\n' {
113            output.extend_from_slice(&segment[start..start + width + 1]);
114            return;
115        }
116        let chunk = &segment[start..start + width];
117        // In byte mode, tab is 1 byte; break after it just like a space.
118        // Column mode uses memrchr(b' ') only — tabs are handled via is_ascii_simple fallback.
119        match memchr::memrchr2(b' ', b'\t', chunk) {
120            Some(sp_offset) => {
121                let break_at = start + sp_offset + 1;
122                output.extend_from_slice(&segment[start..break_at]);
123                output.push(b'\n');
124                start = break_at;
125            }
126            None => {
127                output.extend_from_slice(&segment[start..start + width]);
128                output.push(b'\n');
129                start += width;
130            }
131        }
132    }
133    if start < segment.len() {
134        output.extend_from_slice(&segment[start..]);
135    }
136}
137
138/// Fold a single line segment (no internal newlines except possibly trailing) by bytes.
139#[inline]
140fn fold_segment_bytes(output: &mut Vec<u8>, segment: &[u8], width: usize) {
141    debug_assert!(
142        !segment[..segment.len().saturating_sub(1)].contains(&b'\n'),
143        "fold_segment_bytes: invariant violated — internal newline in segment"
144    );
145    let mut start = 0;
146    while start + width < segment.len() {
147        // SAFETY: loop guard ensures start + width < segment.len()
148        if segment[start + width] == b'\n' {
149            output.extend_from_slice(&segment[start..start + width + 1]);
150            return;
151        }
152        output.extend_from_slice(&segment[start..start + width]);
153        output.push(b'\n');
154        start += width;
155    }
156    // Remaining bytes
157    if start < segment.len() {
158        output.extend_from_slice(&segment[start..]);
159    }
160}
161
162/// Fold by column count (default mode, handles tabs, backspaces, and UTF-8).
163fn fold_column_mode(data: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
164    // For -s mode, use the lazy-checked path that avoids scanning entire lines
165    // with is_ascii_simple upfront, instead checking each chunk during fold.
166    if break_at_spaces {
167        return fold_column_mode_spaces(data, width, output);
168    }
169
170    let mut pos = 0;
171
172    while pos < data.len() {
173        // Find the next newline using SIMD
174        let remaining = &data[pos..];
175        let line_end = memchr::memchr(b'\n', remaining).map(|p| pos + p);
176        let line_data = match line_end {
177            Some(nl) => &data[pos..nl],
178            None => &data[pos..],
179        };
180
181        // Fast path: pure ASCII, no tabs/backspaces — column == byte count
182        if is_ascii_simple(line_data) {
183            if line_data.len() <= width {
184                // Short line: no wrapping needed
185                output.extend_from_slice(line_data);
186            } else {
187                fold_segment_bytes(output, line_data, width);
188            }
189        } else {
190            // Slow path: process character by character for this line
191            fold_one_line_column(line_data, width, false, output);
192        }
193
194        if let Some(nl) = line_end {
195            output.push(b'\n');
196            pos = nl + 1;
197        } else {
198            break;
199        }
200    }
201}
202
203/// Fold column mode with -s (break at spaces).
204/// Avoids scanning entire lines with is_ascii_simple upfront.
205/// Instead, checks each chunk lazily during fold and falls back to slow path
206/// when non-simple bytes (tabs, backspaces, CR) are encountered.
207fn fold_column_mode_spaces(data: &[u8], width: usize, output: &mut Vec<u8>) {
208    let mut pos = 0;
209
210    while pos < data.len() {
211        let remaining = &data[pos..];
212        let line_end = memchr::memchr(b'\n', remaining).map(|p| pos + p);
213        let line_data = match line_end {
214            Some(nl) => &data[pos..nl],
215            None => &data[pos..],
216        };
217
218        if line_data.len() <= width {
219            if is_ascii_simple(line_data) {
220                // Short ASCII-simple line: byte length == display width, no wrapping needed
221                output.extend_from_slice(line_data);
222            } else {
223                // Short but contains tabs/control chars: display width may exceed byte length
224                fold_one_line_column(line_data, width, true, output);
225            }
226        } else {
227            fold_line_spaces_checked(line_data, width, output);
228        }
229
230        if let Some(nl) = line_end {
231            output.push(b'\n');
232            pos = nl + 1;
233        } else {
234            break;
235        }
236    }
237}
238
239/// Fold a line with -s, checking each chunk for non-simple bytes.
240/// For ASCII-simple chunks (the common case), uses memrchr for fast space search.
241/// Falls back to the full column-mode handler when non-simple bytes are found.
242///
243/// Note: worst case O(n·width/8) SWAR word-ops when spaces cluster at chunk offset 0
244/// (start advances 1 byte per iteration, each paying O(width/8) for is_ascii_simple).
245/// Typical ASCII prose converges to O(n/width) iterations.
246fn fold_line_spaces_checked(line: &[u8], width: usize, output: &mut Vec<u8>) {
247    let mut start = 0;
248    while start + width < line.len() {
249        let chunk = &line[start..start + width];
250        // Lazy ASCII check: only examine this chunk, not the whole line.
251        // Uses SWAR word-at-a-time processing for speed.
252        // NOTE: this scans `chunk` twice (is_ascii_simple + memrchr below).
253        // A fused memrchr2(b' ',b'\t',chunk) approach could reduce this to
254        // one pass, but benchmarks show the SWAR check is cheap enough that
255        // the two-pass cost is negligible for the common ASCII-only case.
256        if !is_ascii_simple(chunk) {
257            // Non-simple byte found: fall back to slow path for the rest.
258            // col=0 invariant: either start=0 (beginning of this input
259            // line, outer loop consumed previous \n) or a prior
260            // space/hard-break emitted b'\n'.
261            fold_one_line_column(&line[start..], width, true, output);
262            return;
263        }
264        // is_ascii_simple guarantees no tabs in this chunk; search for spaces only.
265        match memchr::memrchr(b' ', chunk) {
266            Some(sp_offset) => {
267                let break_at = start + sp_offset + 1;
268                output.extend_from_slice(&line[start..break_at]);
269                output.push(b'\n');
270                start = break_at;
271            }
272            None => {
273                output.extend_from_slice(&line[start..start + width]);
274                output.push(b'\n');
275                start += width;
276            }
277        }
278    }
279    if start < line.len() {
280        let tail = &line[start..];
281        if is_ascii_simple(tail) {
282            output.extend_from_slice(tail);
283        } else {
284            // col=0 invariant: either start=0 (beginning of this input
285            // line, outer loop consumed previous \n) or a prior
286            // space/hard-break emitted b'\n'.
287            fold_one_line_column(tail, width, true, output);
288        }
289    }
290}
291
292/// Check if data is pure ASCII with no tabs, backspaces, CR, or control chars.
293/// Uses SWAR (SIMD Within A Register) to process 8 bytes at a time.
294#[inline]
295fn is_ascii_simple(data: &[u8]) -> bool {
296    let mut i = 0;
297    // Process 8 bytes at a time using u64 word operations
298    while i + 8 <= data.len() {
299        let word = u64::from_ne_bytes(data[i..i + 8].try_into().unwrap());
300        if !word_is_ascii_simple(word) {
301            return false;
302        }
303        i += 8;
304    }
305    // Handle remaining bytes
306    for &b in &data[i..] {
307        if b < 0x20 || b > 0x7E {
308            return false;
309        }
310    }
311    true
312}
313
314/// Check if all 8 bytes in a u64 word are in the ASCII printable range [0x20, 0x7E].
315/// Uses SWAR bit tricks to check all bytes in parallel.
316#[inline(always)]
317fn word_is_ascii_simple(word: u64) -> bool {
318    // Check 1: no byte has high bit set (all < 0x80)
319    if word & 0x8080808080808080 != 0 {
320        return false;
321    }
322    // Check 2: all bytes >= 0x20
323    // Since all bytes < 0x80 (check 1), adding 0x60 cannot carry between bytes.
324    // byte + 0x60: [0x00..0x1F] -> [0x60..0x7F] (high bit clear = bad)
325    //              [0x20..=0x7F] -> [0x80..0xDF] (high bit set = good)
326    // Note: 0x7F (DEL) passes here; check 3 rejects it.
327    let added = word.wrapping_add(0x6060606060606060);
328    if added & 0x8080808080808080 != 0x8080808080808080 {
329        return false;
330    }
331    // Check 3: no byte == 0x7F (DEL)
332    // XOR with 0x7F turns 0x7F bytes into 0x00; we then detect zero bytes via
333    // the standard (x - 0x01) & !x & 0x80 trick.
334    // When no 0x7F is present, all xored bytes are in [0x01..0x5F] — none
335    // underflow on -0x01 — so no inter-byte borrow occurs and has_zero == 0.
336    // When a 0x7F IS present, the zero byte flags correctly; adjacent bytes
337    // may show false positives in has_zero but the overall != 0 is still correct.
338    let xored = word ^ 0x7F7F7F7F7F7F7F7F;
339    let has_zero = xored.wrapping_sub(0x0101010101010101) & !xored & 0x8080808080808080;
340    has_zero == 0
341}
342
343/// Get the column width and byte length of a byte at `data[pos]`.
344/// Returns (column_width, byte_length) — always (1, 1) for non-special bytes.
345///
346/// GNU fold's multibyte path is guarded by:
347///   `#if HAVE_MBRTOC32 && (! defined __GLIBC__ || defined __UCLIBC__)`
348/// On glibc (every mainstream Linux distro), that condition is false, so
349/// fold counts bytes — one column per byte, same as -b mode.
350/// Tab, backspace, and CR are handled by the caller.
351#[inline]
352fn char_info(data: &[u8], pos: usize) -> (usize, usize) {
353    let b = data[pos];
354    if b < 0x80 {
355        // ASCII: tab/backspace handled by caller; control chars have 0 width
356        if b < 0x20 || b == 0x7f {
357            (0, 1)
358        } else {
359            (1, 1)
360        }
361    } else {
362        // High byte: count as 1 column, 1 byte (GNU glibc compat)
363        (1, 1)
364    }
365}
366
367/// Process a single line (no newlines) in column mode, writing to output.
368///
369/// Uses a scan-and-flush approach: tracks break points in the INPUT data,
370/// then writes complete segments. Avoids copy_within for -s mode.
371fn fold_one_line_column(line: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
372    let mut col: usize = 0;
373    // For -s mode: track last space in input, not output
374    let mut last_space_in: Option<usize> = None; // byte index in `line` AFTER the space
375    let mut col_at_space: usize = 0;
376    // CR/backspace change col non-linearly, invalidating `col - col_at_space`.
377    // When set, we must use recalc_column() to replay from the space marker.
378    let mut needs_recalc = false;
379    let mut seg_start: usize = 0; // start of current unflushed segment in `line`
380    let mut i = 0;
381
382    while i < line.len() {
383        let byte = line[i];
384
385        // Handle tab specially
386        if byte == b'\t' {
387            let tab_width = ((col / 8) + 1) * 8 - col;
388
389            if col > 0 && col + tab_width > width {
390                // Need to break before this tab (skip when col==0: can't break before first char)
391                if break_at_spaces {
392                    if let Some(sp_after) = last_space_in {
393                        // Flush up to and including the space, then newline
394                        output.extend_from_slice(&line[seg_start..sp_after]);
395                        output.push(b'\n');
396                        seg_start = sp_after;
397                        col = if needs_recalc {
398                            recalc_column(&line[sp_after..i])
399                        } else {
400                            col - col_at_space
401                        };
402                        last_space_in = None;
403                        needs_recalc = false;
404                        // Re-evaluate this tab with the new col — it may
405                        // still exceed width after the space break.
406                        continue;
407                    } else {
408                        output.extend_from_slice(&line[seg_start..i]);
409                        output.push(b'\n');
410                        seg_start = i;
411                        col = 0;
412                    }
413                } else {
414                    output.extend_from_slice(&line[seg_start..i]);
415                    output.push(b'\n');
416                    seg_start = i;
417                    col = 0;
418                }
419            }
420
421            if break_at_spaces {
422                last_space_in = Some(i + 1);
423                col_at_space = col + ((col / 8) + 1) * 8 - col;
424                needs_recalc = false;
425            }
426            col += ((col / 8) + 1) * 8 - col;
427            i += 1;
428            continue;
429        }
430
431        // Handle carriage return: resets column to 0 (GNU adjust_column compat).
432        // Invalidates `col - col_at_space` but keeps the space marker —
433        // GNU fold still breaks at the last space even after CR.
434        if byte == b'\r' {
435            col = 0;
436            if last_space_in.is_some() {
437                needs_recalc = true;
438            }
439            i += 1;
440            continue;
441        }
442
443        // Handle backspace: decrements column non-linearly.
444        // Invalidates `col - col_at_space` but keeps the space marker.
445        if byte == b'\x08' {
446            if col > 0 {
447                col -= 1;
448            }
449            if last_space_in.is_some() {
450                needs_recalc = true;
451            }
452            i += 1;
453            continue;
454        }
455
456        // Get character info (display width + byte length)
457        let (cw, byte_len) = char_info(line, i);
458
459        // Check if adding this character would exceed width
460        if col + cw > width && cw > 0 {
461            if break_at_spaces {
462                if let Some(sp_after) = last_space_in {
463                    output.extend_from_slice(&line[seg_start..sp_after]);
464                    output.push(b'\n');
465                    seg_start = sp_after;
466                    col = if needs_recalc {
467                        recalc_column(&line[sp_after..i])
468                    } else {
469                        col - col_at_space
470                    };
471                    last_space_in = None;
472                    needs_recalc = false;
473                    // Re-evaluate this character with the new col — it may
474                    // still exceed width after the space break.
475                    continue;
476                } else {
477                    output.extend_from_slice(&line[seg_start..i]);
478                    output.push(b'\n');
479                    seg_start = i;
480                    col = 0;
481                }
482            } else {
483                output.extend_from_slice(&line[seg_start..i]);
484                output.push(b'\n');
485                seg_start = i;
486                col = 0;
487            }
488        }
489
490        if break_at_spaces && byte == b' ' {
491            last_space_in = Some(i + 1);
492            col_at_space = col + cw;
493            needs_recalc = false;
494        }
495
496        col += cw;
497        i += byte_len;
498    }
499
500    // Flush remaining segment
501    if seg_start < line.len() {
502        output.extend_from_slice(&line[seg_start..]);
503    }
504}
505
506/// Recalculate column position by replaying a segment (handles tabs, CR, backspace).
507/// Used when non-linear column operations (CR, backspace) invalidate the fast
508/// `col - col_at_space` delta formula.
509fn recalc_column(data: &[u8]) -> usize {
510    let mut col = 0;
511    let mut i = 0;
512    while i < data.len() {
513        let b = data[i];
514        if b == b'\r' {
515            col = 0;
516            i += 1;
517        } else if b == b'\t' {
518            col = ((col / 8) + 1) * 8;
519            i += 1;
520        } else if b == b'\x08' {
521            if col > 0 {
522                col -= 1;
523            }
524            i += 1;
525        } else if b < 0x80 {
526            if b >= 0x20 && b != 0x7f {
527                col += 1;
528            }
529            i += 1;
530        } else {
531            let (cw, byte_len) = char_info(data, i);
532            col += cw;
533            i += byte_len;
534        }
535    }
536    col
537}
coreutils_rs/fold/core.rs

coreutils_rs/fold/
core.rs