Skip to main content

coreutils_rs/fold/
core.rs

1use std::io::Write;
2
3/// Fold (wrap) lines to a given width.
4///
5/// Modes:
6/// - `bytes` mode (-b): count bytes, break at byte boundaries
7/// - default mode: count columns (tab = advance to next tab stop, backspace = decrement)
8///
9/// If `spaces` (-s): break at the last space within the width instead of mid-word.
10pub fn fold_bytes(
11    data: &[u8],
12    width: usize,
13    count_bytes: bool,
14    break_at_spaces: bool,
15    out: &mut impl Write,
16) -> std::io::Result<()> {
17    if data.is_empty() {
18        return Ok(());
19    }
20
21    if width == 0 {
22        return fold_width_zero(data, out);
23    }
24
25    // Fast path: byte mode without -s, use SIMD-accelerated scanning
26    if count_bytes && !break_at_spaces {
27        return fold_byte_fast(data, width, out);
28    }
29
30    let mut output = Vec::with_capacity(data.len() + data.len() / width);
31
32    if count_bytes {
33        fold_byte_mode(data, width, break_at_spaces, &mut output);
34    } else {
35        fold_column_mode(data, width, break_at_spaces, &mut output);
36    }
37
38    out.write_all(&output)
39}
40
41/// Width 0: GNU fold behavior — each byte becomes a newline.
42fn fold_width_zero(data: &[u8], out: &mut impl Write) -> std::io::Result<()> {
43    let output = vec![b'\n'; data.len()];
44    out.write_all(&output)
45}
46
47/// Fast fold by byte count without -s flag.
48/// Uses memchr to find newlines, bulk-copies runs, inserts breaks at exact positions.
49fn fold_byte_fast(data: &[u8], width: usize, out: &mut impl Write) -> std::io::Result<()> {
50    // Each line can have at most one extra newline inserted
51    let mut output = Vec::with_capacity(data.len() + data.len() / width + 1);
52    let mut pos: usize = 0;
53
54    while pos < data.len() {
55        // Find the next newline within the remaining data
56        let remaining = &data[pos..];
57
58        match memchr::memchr(b'\n', remaining) {
59            Some(nl_offset) => {
60                // Process the segment up to (and including) the newline
61                let segment = &data[pos..pos + nl_offset + 1];
62                fold_segment_bytes(&mut output, segment, width);
63                pos += nl_offset + 1;
64            }
65            None => {
66                // No more newlines: process the rest
67                fold_segment_bytes(&mut output, &data[pos..], width);
68                break;
69            }
70        }
71    }
72
73    out.write_all(&output)
74}
75
76/// Fold a single line segment (no internal newlines except possibly trailing) by bytes.
77#[inline]
78fn fold_segment_bytes(output: &mut Vec<u8>, segment: &[u8], width: usize) {
79    let mut start = 0;
80    while start + width < segment.len() {
81        // Check if the character at start+width is a newline (end of line)
82        if segment[start + width] == b'\n' {
83            output.extend_from_slice(&segment[start..start + width + 1]);
84            return;
85        }
86        output.extend_from_slice(&segment[start..start + width]);
87        output.push(b'\n');
88        start += width;
89    }
90    // Remaining bytes
91    if start < segment.len() {
92        output.extend_from_slice(&segment[start..]);
93    }
94}
95
96/// Fold by byte count with -s (break at spaces).
97fn fold_byte_mode(data: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
98    let mut col: usize = 0;
99    let mut last_space_out_pos: Option<usize> = None;
100
101    for &byte in data {
102        if byte == b'\n' {
103            output.push(b'\n');
104            col = 0;
105            last_space_out_pos = None;
106            continue;
107        }
108
109        if col >= width {
110            if break_at_spaces {
111                if let Some(sp_pos) = last_space_out_pos {
112                    let after_space = output[sp_pos + 1..].to_vec();
113                    output.truncate(sp_pos + 1);
114                    output.push(b'\n');
115                    output.extend_from_slice(&after_space);
116                    col = after_space.len();
117                    last_space_out_pos = None;
118                } else {
119                    output.push(b'\n');
120                    col = 0;
121                }
122            } else {
123                output.push(b'\n');
124                col = 0;
125            }
126        }
127
128        if break_at_spaces && (byte == b' ' || byte == b'\t') {
129            last_space_out_pos = Some(output.len());
130        }
131
132        output.push(byte);
133        col += 1;
134    }
135}
136
137/// Fold by column count (default mode, handles tabs and backspaces).
138fn fold_column_mode(data: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
139    let mut col: usize = 0;
140    let mut last_space_out_pos: Option<usize> = None;
141
142    for &byte in data {
143        if byte == b'\n' {
144            output.push(b'\n');
145            col = 0;
146            last_space_out_pos = None;
147            continue;
148        }
149
150        // Calculate display width of this byte
151        let char_width = if byte == b'\t' {
152            let next_stop = ((col / 8) + 1) * 8;
153            next_stop - col
154        } else if byte == b'\x08' || byte < 0x20 || byte == 0x7f {
155            // Backspace and other control chars: 0 width
156            0
157        } else {
158            1
159        };
160
161        // Handle backspace
162        if byte == b'\x08' {
163            output.push(byte);
164            if col > 0 {
165                col -= 1;
166            }
167            continue;
168        }
169
170        // Check if adding this character would exceed width
171        if col + char_width > width && char_width > 0 {
172            if break_at_spaces {
173                if let Some(sp_pos) = last_space_out_pos {
174                    let after_space = output[sp_pos + 1..].to_vec();
175                    output.truncate(sp_pos + 1);
176                    output.push(b'\n');
177                    col = recalc_column(&after_space);
178                    output.extend_from_slice(&after_space);
179                    last_space_out_pos = None;
180                } else {
181                    output.push(b'\n');
182                    col = 0;
183                }
184            } else {
185                output.push(b'\n');
186                col = 0;
187            }
188        }
189
190        if break_at_spaces && (byte == b' ' || byte == b'\t') {
191            last_space_out_pos = Some(output.len());
192        }
193
194        output.push(byte);
195        col += char_width;
196    }
197}
198
199/// Recalculate column position for a segment of output.
200fn recalc_column(data: &[u8]) -> usize {
201    let mut col = 0;
202    for &b in data {
203        if b == b'\t' {
204            col = ((col / 8) + 1) * 8;
205        } else if b == b'\x08' {
206            if col > 0 {
207                col -= 1;
208            }
209        } else if b >= 0x20 && b != 0x7f {
210            col += 1;
211        }
212    }
213    col
214}