coreutils_rs/fold/core.rs
1use std::io::Write;
2
3/// Fold (wrap) lines to a given width.
4///
5/// Modes:
6/// - `bytes` mode (-b): count bytes, break at byte boundaries
7/// - default mode: count columns (tab = advance to next tab stop, backspace = decrement)
8///
9/// If `spaces` (-s): break at the last space within the width instead of mid-word.
10pub fn fold_bytes(
11 data: &[u8],
12 width: usize,
13 count_bytes: bool,
14 break_at_spaces: bool,
15 out: &mut impl Write,
16) -> std::io::Result<()> {
17 if data.is_empty() {
18 return Ok(());
19 }
20
21 if width == 0 {
22 return fold_width_zero(data, out);
23 }
24
25 // Fast path: byte mode, use SIMD-accelerated scanning
26 if count_bytes {
27 if break_at_spaces {
28 return fold_byte_fast_spaces(data, width, out);
29 } else {
30 return fold_byte_fast(data, width, out);
31 }
32 }
33
34 let mut output = Vec::with_capacity(data.len() + data.len() / width);
35 fold_column_mode(data, width, break_at_spaces, &mut output);
36 out.write_all(&output)
37}
38
39/// Width 0: GNU fold behavior — each byte becomes a newline.
40fn fold_width_zero(data: &[u8], out: &mut impl Write) -> std::io::Result<()> {
41 let output = vec![b'\n'; data.len()];
42 out.write_all(&output)
43}
44
45/// Fast fold by byte count without -s flag.
46/// Uses memchr to find newlines, bulk-copies runs, inserts breaks at exact positions.
47fn fold_byte_fast(data: &[u8], width: usize, out: &mut impl Write) -> std::io::Result<()> {
48 // Each line can have at most one extra newline inserted
49 let mut output = Vec::with_capacity(data.len() + data.len() / width + 1);
50 let mut pos: usize = 0;
51
52 while pos < data.len() {
53 // Find the next newline within the remaining data
54 let remaining = &data[pos..];
55
56 match memchr::memchr(b'\n', remaining) {
57 Some(nl_offset) => {
58 // Process the segment up to (and including) the newline
59 let segment = &data[pos..pos + nl_offset + 1];
60 fold_segment_bytes(&mut output, segment, width);
61 pos += nl_offset + 1;
62 }
63 None => {
64 // No more newlines: process the rest
65 fold_segment_bytes(&mut output, &data[pos..], width);
66 break;
67 }
68 }
69 }
70
71 out.write_all(&output)
72}
73
74/// Fast fold by byte count with -s (break at spaces).
75/// Uses memchr to find newlines and memrchr to find last space in each chunk.
76fn fold_byte_fast_spaces(data: &[u8], width: usize, out: &mut impl Write) -> std::io::Result<()> {
77 let mut output = Vec::with_capacity(data.len() + data.len() / width + 1);
78 let mut pos: usize = 0;
79
80 while pos < data.len() {
81 let remaining = &data[pos..];
82
83 match memchr::memchr(b'\n', remaining) {
84 Some(nl_offset) => {
85 let segment = &data[pos..pos + nl_offset + 1];
86 fold_segment_bytes_spaces(&mut output, segment, width);
87 pos += nl_offset + 1;
88 }
89 None => {
90 fold_segment_bytes_spaces(&mut output, &data[pos..], width);
91 break;
92 }
93 }
94 }
95
96 out.write_all(&output)
97}
98
99/// Fold a single line segment by bytes with -s (break at spaces).
100///
101/// # Invariant
102/// `segment` must contain at most one `\n`, and only as its final byte.
103#[inline]
104fn fold_segment_bytes_spaces(output: &mut Vec<u8>, segment: &[u8], width: usize) {
105 debug_assert!(
106 !segment[..segment.len().saturating_sub(1)].contains(&b'\n'),
107 "fold_segment_bytes_spaces: invariant violated — internal newline in segment"
108 );
109 let mut start = 0;
110 while start + width < segment.len() {
111 // SAFETY: loop guard ensures start + width < segment.len()
112 if segment[start + width] == b'\n' {
113 output.extend_from_slice(&segment[start..start + width + 1]);
114 return;
115 }
116 let chunk = &segment[start..start + width];
117 // In byte mode, tab is 1 byte; break after it just like a space.
118 // Column mode uses memrchr(b' ') only — tabs are handled via is_ascii_simple fallback.
119 match memchr::memrchr2(b' ', b'\t', chunk) {
120 Some(sp_offset) => {
121 let break_at = start + sp_offset + 1;
122 output.extend_from_slice(&segment[start..break_at]);
123 output.push(b'\n');
124 start = break_at;
125 }
126 None => {
127 output.extend_from_slice(&segment[start..start + width]);
128 output.push(b'\n');
129 start += width;
130 }
131 }
132 }
133 if start < segment.len() {
134 output.extend_from_slice(&segment[start..]);
135 }
136}
137
138/// Fold a single line segment (no internal newlines except possibly trailing) by bytes.
139#[inline]
140fn fold_segment_bytes(output: &mut Vec<u8>, segment: &[u8], width: usize) {
141 debug_assert!(
142 !segment[..segment.len().saturating_sub(1)].contains(&b'\n'),
143 "fold_segment_bytes: invariant violated — internal newline in segment"
144 );
145 let mut start = 0;
146 while start + width < segment.len() {
147 // SAFETY: loop guard ensures start + width < segment.len()
148 if segment[start + width] == b'\n' {
149 output.extend_from_slice(&segment[start..start + width + 1]);
150 return;
151 }
152 output.extend_from_slice(&segment[start..start + width]);
153 output.push(b'\n');
154 start += width;
155 }
156 // Remaining bytes
157 if start < segment.len() {
158 output.extend_from_slice(&segment[start..]);
159 }
160}
161
162/// Fold by column count (default mode, handles tabs, backspaces, and UTF-8).
163fn fold_column_mode(data: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
164 // For -s mode, use the lazy-checked path that avoids scanning entire lines
165 // with is_ascii_simple upfront, instead checking each chunk during fold.
166 if break_at_spaces {
167 return fold_column_mode_spaces(data, width, output);
168 }
169
170 let mut pos = 0;
171
172 while pos < data.len() {
173 // Find the next newline using SIMD
174 let remaining = &data[pos..];
175 let line_end = memchr::memchr(b'\n', remaining).map(|p| pos + p);
176 let line_data = match line_end {
177 Some(nl) => &data[pos..nl],
178 None => &data[pos..],
179 };
180
181 // Fast path: pure ASCII, no tabs/backspaces — column == byte count
182 if is_ascii_simple(line_data) {
183 if line_data.len() <= width {
184 // Short line: no wrapping needed
185 output.extend_from_slice(line_data);
186 } else {
187 fold_segment_bytes(output, line_data, width);
188 }
189 } else {
190 // Slow path: process character by character for this line
191 fold_one_line_column(line_data, width, false, output);
192 }
193
194 if let Some(nl) = line_end {
195 output.push(b'\n');
196 pos = nl + 1;
197 } else {
198 break;
199 }
200 }
201}
202
203/// Fold column mode with -s (break at spaces).
204/// Avoids scanning entire lines with is_ascii_simple upfront.
205/// Instead, checks each chunk lazily during fold and falls back to slow path
206/// when non-simple bytes (tabs, backspaces, CR) are encountered.
207fn fold_column_mode_spaces(data: &[u8], width: usize, output: &mut Vec<u8>) {
208 let mut pos = 0;
209
210 while pos < data.len() {
211 let remaining = &data[pos..];
212 let line_end = memchr::memchr(b'\n', remaining).map(|p| pos + p);
213 let line_data = match line_end {
214 Some(nl) => &data[pos..nl],
215 None => &data[pos..],
216 };
217
218 if line_data.len() <= width {
219 if is_ascii_simple(line_data) {
220 // Short ASCII-simple line: byte length == display width, no wrapping needed
221 output.extend_from_slice(line_data);
222 } else {
223 // Short but contains tabs/control chars: display width may exceed byte length
224 fold_one_line_column(line_data, width, true, output);
225 }
226 } else {
227 fold_line_spaces_checked(line_data, width, output);
228 }
229
230 if let Some(nl) = line_end {
231 output.push(b'\n');
232 pos = nl + 1;
233 } else {
234 break;
235 }
236 }
237}
238
239/// Fold a line with -s, checking each chunk for non-simple bytes.
240/// For ASCII-simple chunks (the common case), uses memrchr for fast space search.
241/// Falls back to the full column-mode handler when non-simple bytes are found.
242///
243/// Note: worst case O(n·width/8) SWAR word-ops when spaces cluster at chunk offset 0
244/// (start advances 1 byte per iteration, each paying O(width/8) for is_ascii_simple).
245/// Typical ASCII prose converges to O(n/width) iterations.
246fn fold_line_spaces_checked(line: &[u8], width: usize, output: &mut Vec<u8>) {
247 let mut start = 0;
248 while start + width < line.len() {
249 let chunk = &line[start..start + width];
250 // Lazy ASCII check: only examine this chunk, not the whole line.
251 // Uses SWAR word-at-a-time processing for speed.
252 // NOTE: this scans `chunk` twice (is_ascii_simple + memrchr below).
253 // A fused memrchr2(b' ',b'\t',chunk) approach could reduce this to
254 // one pass, but benchmarks show the SWAR check is cheap enough that
255 // the two-pass cost is negligible for the common ASCII-only case.
256 if !is_ascii_simple(chunk) {
257 // Non-simple byte found: fall back to slow path for the rest.
258 // col=0 invariant: either start=0 (beginning of this input
259 // line, outer loop consumed previous \n) or a prior
260 // space/hard-break emitted b'\n'.
261 fold_one_line_column(&line[start..], width, true, output);
262 return;
263 }
264 // is_ascii_simple guarantees no tabs in this chunk; search for spaces only.
265 match memchr::memrchr(b' ', chunk) {
266 Some(sp_offset) => {
267 let break_at = start + sp_offset + 1;
268 output.extend_from_slice(&line[start..break_at]);
269 output.push(b'\n');
270 start = break_at;
271 }
272 None => {
273 output.extend_from_slice(&line[start..start + width]);
274 output.push(b'\n');
275 start += width;
276 }
277 }
278 }
279 if start < line.len() {
280 let tail = &line[start..];
281 if is_ascii_simple(tail) {
282 output.extend_from_slice(tail);
283 } else {
284 // col=0 invariant: either start=0 (beginning of this input
285 // line, outer loop consumed previous \n) or a prior
286 // space/hard-break emitted b'\n'.
287 fold_one_line_column(tail, width, true, output);
288 }
289 }
290}
291
292/// Check if data is pure ASCII with no tabs, backspaces, CR, or control chars.
293/// Uses SWAR (SIMD Within A Register) to process 8 bytes at a time.
294#[inline]
295fn is_ascii_simple(data: &[u8]) -> bool {
296 let mut i = 0;
297 // Process 8 bytes at a time using u64 word operations
298 while i + 8 <= data.len() {
299 let word = u64::from_ne_bytes(data[i..i + 8].try_into().unwrap());
300 if !word_is_ascii_simple(word) {
301 return false;
302 }
303 i += 8;
304 }
305 // Handle remaining bytes
306 for &b in &data[i..] {
307 if b < 0x20 || b > 0x7E {
308 return false;
309 }
310 }
311 true
312}
313
314/// Check if all 8 bytes in a u64 word are in the ASCII printable range [0x20, 0x7E].
315/// Uses SWAR bit tricks to check all bytes in parallel.
316#[inline(always)]
317fn word_is_ascii_simple(word: u64) -> bool {
318 // Check 1: no byte has high bit set (all < 0x80)
319 if word & 0x8080808080808080 != 0 {
320 return false;
321 }
322 // Check 2: all bytes >= 0x20
323 // Since all bytes < 0x80 (check 1), adding 0x60 cannot carry between bytes.
324 // byte + 0x60: [0x00..0x1F] -> [0x60..0x7F] (high bit clear = bad)
325 // [0x20..=0x7F] -> [0x80..0xDF] (high bit set = good)
326 // Note: 0x7F (DEL) passes here; check 3 rejects it.
327 let added = word.wrapping_add(0x6060606060606060);
328 if added & 0x8080808080808080 != 0x8080808080808080 {
329 return false;
330 }
331 // Check 3: no byte == 0x7F (DEL)
332 // XOR with 0x7F turns 0x7F bytes into 0x00; we then detect zero bytes via
333 // the standard (x - 0x01) & !x & 0x80 trick.
334 // When no 0x7F is present, all xored bytes are in [0x01..0x5F] — none
335 // underflow on -0x01 — so no inter-byte borrow occurs and has_zero == 0.
336 // When a 0x7F IS present, the zero byte flags correctly; adjacent bytes
337 // may show false positives in has_zero but the overall != 0 is still correct.
338 let xored = word ^ 0x7F7F7F7F7F7F7F7F;
339 let has_zero = xored.wrapping_sub(0x0101010101010101) & !xored & 0x8080808080808080;
340 has_zero == 0
341}
342
343/// Get the column width and byte length of a byte at `data[pos]`.
344/// Returns (column_width, byte_length) — always (1, 1) for non-special bytes.
345///
346/// GNU fold's multibyte path is guarded by:
347/// `#if HAVE_MBRTOC32 && (! defined __GLIBC__ || defined __UCLIBC__)`
348/// On glibc (every mainstream Linux distro), that condition is false, so
349/// fold counts bytes — one column per byte, same as -b mode.
350/// Tab, backspace, and CR are handled by the caller.
351#[inline]
352fn char_info(data: &[u8], pos: usize) -> (usize, usize) {
353 let b = data[pos];
354 if b < 0x80 {
355 // ASCII: tab/backspace handled by caller; control chars have 0 width
356 if b < 0x20 || b == 0x7f {
357 (0, 1)
358 } else {
359 (1, 1)
360 }
361 } else {
362 // High byte: count as 1 column, 1 byte (GNU glibc compat)
363 (1, 1)
364 }
365}
366
367/// Process a single line (no newlines) in column mode, writing to output.
368///
369/// Uses a scan-and-flush approach: tracks break points in the INPUT data,
370/// then writes complete segments. Avoids copy_within for -s mode.
371fn fold_one_line_column(line: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
372 let mut col: usize = 0;
373 // For -s mode: track last space in input, not output
374 let mut last_space_in: Option<usize> = None; // byte index in `line` AFTER the space
375 let mut col_at_space: usize = 0;
376 // CR/backspace change col non-linearly, invalidating `col - col_at_space`.
377 // When set, we must use recalc_column() to replay from the space marker.
378 let mut needs_recalc = false;
379 let mut seg_start: usize = 0; // start of current unflushed segment in `line`
380 let mut i = 0;
381
382 while i < line.len() {
383 let byte = line[i];
384
385 // Handle tab specially
386 if byte == b'\t' {
387 let tab_width = ((col / 8) + 1) * 8 - col;
388
389 if col > 0 && col + tab_width > width {
390 // Need to break before this tab (skip when col==0: can't break before first char)
391 if break_at_spaces {
392 if let Some(sp_after) = last_space_in {
393 // Flush up to and including the space, then newline
394 output.extend_from_slice(&line[seg_start..sp_after]);
395 output.push(b'\n');
396 seg_start = sp_after;
397 col = if needs_recalc {
398 recalc_column(&line[sp_after..i])
399 } else {
400 col - col_at_space
401 };
402 last_space_in = None;
403 needs_recalc = false;
404 // Re-evaluate this tab with the new col — it may
405 // still exceed width after the space break.
406 continue;
407 } else {
408 output.extend_from_slice(&line[seg_start..i]);
409 output.push(b'\n');
410 seg_start = i;
411 col = 0;
412 }
413 } else {
414 output.extend_from_slice(&line[seg_start..i]);
415 output.push(b'\n');
416 seg_start = i;
417 col = 0;
418 }
419 }
420
421 if break_at_spaces {
422 last_space_in = Some(i + 1);
423 col_at_space = col + ((col / 8) + 1) * 8 - col;
424 needs_recalc = false;
425 }
426 col += ((col / 8) + 1) * 8 - col;
427 i += 1;
428 continue;
429 }
430
431 // Handle carriage return: resets column to 0 (GNU adjust_column compat).
432 // Invalidates `col - col_at_space` but keeps the space marker —
433 // GNU fold still breaks at the last space even after CR.
434 if byte == b'\r' {
435 col = 0;
436 if last_space_in.is_some() {
437 needs_recalc = true;
438 }
439 i += 1;
440 continue;
441 }
442
443 // Handle backspace: decrements column non-linearly.
444 // Invalidates `col - col_at_space` but keeps the space marker.
445 if byte == b'\x08' {
446 if col > 0 {
447 col -= 1;
448 }
449 if last_space_in.is_some() {
450 needs_recalc = true;
451 }
452 i += 1;
453 continue;
454 }
455
456 // Get character info (display width + byte length)
457 let (cw, byte_len) = char_info(line, i);
458
459 // Check if adding this character would exceed width
460 if col + cw > width && cw > 0 {
461 if break_at_spaces {
462 if let Some(sp_after) = last_space_in {
463 output.extend_from_slice(&line[seg_start..sp_after]);
464 output.push(b'\n');
465 seg_start = sp_after;
466 col = if needs_recalc {
467 recalc_column(&line[sp_after..i])
468 } else {
469 col - col_at_space
470 };
471 last_space_in = None;
472 needs_recalc = false;
473 // Re-evaluate this character with the new col — it may
474 // still exceed width after the space break.
475 continue;
476 } else {
477 output.extend_from_slice(&line[seg_start..i]);
478 output.push(b'\n');
479 seg_start = i;
480 col = 0;
481 }
482 } else {
483 output.extend_from_slice(&line[seg_start..i]);
484 output.push(b'\n');
485 seg_start = i;
486 col = 0;
487 }
488 }
489
490 if break_at_spaces && byte == b' ' {
491 last_space_in = Some(i + 1);
492 col_at_space = col + cw;
493 needs_recalc = false;
494 }
495
496 col += cw;
497 i += byte_len;
498 }
499
500 // Flush remaining segment
501 if seg_start < line.len() {
502 output.extend_from_slice(&line[seg_start..]);
503 }
504}
505
506/// Recalculate column position by replaying a segment (handles tabs, CR, backspace).
507/// Used when non-linear column operations (CR, backspace) invalidate the fast
508/// `col - col_at_space` delta formula.
509fn recalc_column(data: &[u8]) -> usize {
510 let mut col = 0;
511 let mut i = 0;
512 while i < data.len() {
513 let b = data[i];
514 if b == b'\r' {
515 col = 0;
516 i += 1;
517 } else if b == b'\t' {
518 col = ((col / 8) + 1) * 8;
519 i += 1;
520 } else if b == b'\x08' {
521 if col > 0 {
522 col -= 1;
523 }
524 i += 1;
525 } else if b < 0x80 {
526 if b >= 0x20 && b != 0x7f {
527 col += 1;
528 }
529 i += 1;
530 } else {
531 let (cw, byte_len) = char_info(data, i);
532 col += cw;
533 i += byte_len;
534 }
535 }
536 col
537}