coreutils_rs/fold/
core.rs1use std::io::Write;
2
3pub fn fold_bytes(
11 data: &[u8],
12 width: usize,
13 count_bytes: bool,
14 break_at_spaces: bool,
15 out: &mut impl Write,
16) -> std::io::Result<()> {
17 if data.is_empty() {
18 return Ok(());
19 }
20
21 if width == 0 {
22 return fold_width_zero(data, out);
23 }
24
25 if count_bytes && !break_at_spaces {
27 return fold_byte_fast(data, width, out);
28 }
29
30 let mut output = Vec::with_capacity(data.len() + data.len() / width);
31
32 if count_bytes {
33 fold_byte_mode(data, width, break_at_spaces, &mut output);
34 } else {
35 fold_column_mode(data, width, break_at_spaces, &mut output);
36 }
37
38 out.write_all(&output)
39}
40
41fn fold_width_zero(data: &[u8], out: &mut impl Write) -> std::io::Result<()> {
43 let output = vec![b'\n'; data.len()];
44 out.write_all(&output)
45}
46
47fn fold_byte_fast(data: &[u8], width: usize, out: &mut impl Write) -> std::io::Result<()> {
50 let mut output = Vec::with_capacity(data.len() + data.len() / width + 1);
52 let mut pos: usize = 0;
53
54 while pos < data.len() {
55 let remaining = &data[pos..];
57
58 match memchr::memchr(b'\n', remaining) {
59 Some(nl_offset) => {
60 let segment = &data[pos..pos + nl_offset + 1];
62 fold_segment_bytes(&mut output, segment, width);
63 pos += nl_offset + 1;
64 }
65 None => {
66 fold_segment_bytes(&mut output, &data[pos..], width);
68 break;
69 }
70 }
71 }
72
73 out.write_all(&output)
74}
75
76#[inline]
78fn fold_segment_bytes(output: &mut Vec<u8>, segment: &[u8], width: usize) {
79 let mut start = 0;
80 while start + width < segment.len() {
81 if segment[start + width] == b'\n' {
83 output.extend_from_slice(&segment[start..start + width + 1]);
84 return;
85 }
86 output.extend_from_slice(&segment[start..start + width]);
87 output.push(b'\n');
88 start += width;
89 }
90 if start < segment.len() {
92 output.extend_from_slice(&segment[start..]);
93 }
94}
95
96fn fold_byte_mode(data: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
99 let mut col: usize = 0;
100 let mut last_space_out_pos: Option<usize> = None;
101
102 for &byte in data {
103 if byte == b'\n' {
104 output.push(b'\n');
105 col = 0;
106 last_space_out_pos = None;
107 continue;
108 }
109
110 if col >= width {
111 if break_at_spaces {
112 if let Some(sp_pos) = last_space_out_pos {
113 let tail_start = sp_pos + 1;
115 let tail_end = output.len();
116 let after_len = tail_end - tail_start;
117 output.push(0); output.copy_within(tail_start..tail_end, tail_start + 1);
119 output[tail_start] = b'\n';
120 col = after_len;
121 last_space_out_pos = None;
122 } else {
123 output.push(b'\n');
124 col = 0;
125 }
126 } else {
127 output.push(b'\n');
128 col = 0;
129 }
130 }
131
132 if break_at_spaces && (byte == b' ' || byte == b'\t') {
133 last_space_out_pos = Some(output.len());
134 }
135
136 output.push(byte);
137 col += 1;
138 }
139}
140
141fn fold_column_mode(data: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
143 let mut pos = 0;
144
145 while pos < data.len() {
146 let remaining = &data[pos..];
148 let line_end = memchr::memchr(b'\n', remaining).map(|p| pos + p);
149 let line_data = match line_end {
150 Some(nl) => &data[pos..nl],
151 None => &data[pos..],
152 };
153
154 if is_ascii_simple(line_data) {
156 if line_data.len() <= width {
157 output.extend_from_slice(line_data);
159 } else if break_at_spaces {
160 fold_ascii_line_spaces(line_data, width, output);
161 } else {
162 fold_segment_bytes(output, line_data, width);
163 }
164 if let Some(nl) = line_end {
165 output.push(b'\n');
166 pos = nl + 1;
167 } else {
168 break;
169 }
170 continue;
171 }
172
173 fold_one_line_column(line_data, width, break_at_spaces, output);
175 if let Some(nl) = line_end {
176 output.push(b'\n');
177 pos = nl + 1;
178 } else {
179 break;
180 }
181 }
182}
183
184fn fold_ascii_line_spaces(line: &[u8], width: usize, output: &mut Vec<u8>) {
187 let mut start = 0;
188 while start + width < line.len() {
189 let chunk = &line[start..start + width];
191 match memchr::memrchr(b' ', chunk) {
192 Some(sp_offset) => {
193 let break_at = start + sp_offset + 1;
195 output.extend_from_slice(&line[start..break_at]);
196 output.push(b'\n');
197 start = break_at;
198 }
199 None => {
200 output.extend_from_slice(&line[start..start + width]);
202 output.push(b'\n');
203 start += width;
204 }
205 }
206 }
207 if start < line.len() {
209 output.extend_from_slice(&line[start..]);
210 }
211}
212
213#[inline]
215fn is_ascii_simple(data: &[u8]) -> bool {
216 data.iter().all(|&b| b >= 0x20 && b <= 0x7E)
218}
219
220#[inline]
229fn char_info(data: &[u8], pos: usize) -> (usize, usize) {
230 let b = data[pos];
231 if b < 0x80 {
232 if b < 0x20 || b == 0x7f {
234 (0, 1)
235 } else {
236 (1, 1)
237 }
238 } else {
239 (1, 1)
241 }
242}
243
244fn fold_one_line_column(line: &[u8], width: usize, break_at_spaces: bool, output: &mut Vec<u8>) {
249 let mut col: usize = 0;
250 let mut last_space_in: Option<usize> = None; let mut col_at_space: usize = 0;
253 let mut needs_recalc = false;
256 let mut seg_start: usize = 0; let mut i = 0;
258
259 while i < line.len() {
260 let byte = line[i];
261
262 if byte == b'\t' {
264 let tab_width = ((col / 8) + 1) * 8 - col;
265
266 if col + tab_width > width && tab_width > 0 {
267 if break_at_spaces {
269 if let Some(sp_after) = last_space_in {
270 output.extend_from_slice(&line[seg_start..sp_after]);
272 output.push(b'\n');
273 seg_start = sp_after;
274 col = if needs_recalc {
275 recalc_column(&line[sp_after..i])
276 } else {
277 col - col_at_space
278 };
279 last_space_in = None;
280 needs_recalc = false;
281 continue;
284 } else {
285 output.extend_from_slice(&line[seg_start..i]);
286 output.push(b'\n');
287 seg_start = i;
288 col = 0;
289 }
290 } else {
291 output.extend_from_slice(&line[seg_start..i]);
292 output.push(b'\n');
293 seg_start = i;
294 col = 0;
295 }
296 }
297
298 if break_at_spaces {
299 last_space_in = Some(i + 1);
300 col_at_space = col + ((col / 8) + 1) * 8 - col;
301 needs_recalc = false;
302 }
303 col += ((col / 8) + 1) * 8 - col;
304 i += 1;
305 continue;
306 }
307
308 if byte == b'\r' {
312 col = 0;
313 if last_space_in.is_some() {
314 needs_recalc = true;
315 }
316 i += 1;
317 continue;
318 }
319
320 if byte == b'\x08' {
323 if col > 0 {
324 col -= 1;
325 }
326 if last_space_in.is_some() {
327 needs_recalc = true;
328 }
329 i += 1;
330 continue;
331 }
332
333 let (cw, byte_len) = char_info(line, i);
335
336 if col + cw > width && cw > 0 {
338 if break_at_spaces {
339 if let Some(sp_after) = last_space_in {
340 output.extend_from_slice(&line[seg_start..sp_after]);
341 output.push(b'\n');
342 seg_start = sp_after;
343 col = if needs_recalc {
344 recalc_column(&line[sp_after..i])
345 } else {
346 col - col_at_space
347 };
348 last_space_in = None;
349 needs_recalc = false;
350 continue;
353 } else {
354 output.extend_from_slice(&line[seg_start..i]);
355 output.push(b'\n');
356 seg_start = i;
357 col = 0;
358 }
359 } else {
360 output.extend_from_slice(&line[seg_start..i]);
361 output.push(b'\n');
362 seg_start = i;
363 col = 0;
364 }
365 }
366
367 if break_at_spaces && byte == b' ' {
368 last_space_in = Some(i + 1);
369 col_at_space = col + cw;
370 needs_recalc = false;
371 }
372
373 col += cw;
374 i += byte_len;
375 }
376
377 if seg_start < line.len() {
379 output.extend_from_slice(&line[seg_start..]);
380 }
381}
382
383fn recalc_column(data: &[u8]) -> usize {
387 let mut col = 0;
388 let mut i = 0;
389 while i < data.len() {
390 let b = data[i];
391 if b == b'\r' {
392 col = 0;
393 i += 1;
394 } else if b == b'\t' {
395 col = ((col / 8) + 1) * 8;
396 i += 1;
397 } else if b == b'\x08' {
398 if col > 0 {
399 col -= 1;
400 }
401 i += 1;
402 } else if b < 0x80 {
403 if b >= 0x20 && b != 0x7f {
404 col += 1;
405 }
406 i += 1;
407 } else {
408 let (cw, byte_len) = char_info(data, i);
409 col += cw;
410 i += byte_len;
411 }
412 }
413 col
414}