coreutils_rs/expand/
core.rs1use std::io::Write;
2
3#[derive(Clone, Debug)]
5pub enum TabStops {
6 Regular(usize),
8 List(Vec<usize>),
10}
11
12impl TabStops {
13 #[inline]
15 fn spaces_to_next(&self, column: usize) -> usize {
16 match self {
17 TabStops::Regular(n) => {
18 if *n == 0 {
19 return 0;
20 }
21 *n - (column % *n)
22 }
23 TabStops::List(stops) => {
24 match stops.binary_search(&(column + 1)) {
26 Ok(idx) => stops[idx] - column,
27 Err(idx) => {
28 if idx < stops.len() {
29 stops[idx] - column
30 } else {
31 1
33 }
34 }
35 }
36 }
37 }
38 }
39
40 #[inline]
42 fn next_tab_stop(&self, column: usize) -> usize {
43 column + self.spaces_to_next(column)
44 }
45}
46
47pub fn parse_tab_stops(spec: &str) -> Result<TabStops, String> {
49 let spec = spec.trim();
50 if spec.is_empty() {
51 return Ok(TabStops::Regular(8));
52 }
53
54 if let Ok(n) = spec.parse::<usize>() {
56 if n == 0 {
57 return Err("tab size cannot be 0".to_string());
58 }
59 return Ok(TabStops::Regular(n));
60 }
61
62 let mut stops: Vec<usize> = Vec::new();
64 for part in spec.split([',', ' ']) {
65 let part = part.trim();
66 if part.is_empty() {
67 continue;
68 }
69 if let Some(rest) = part.strip_prefix('/') {
71 let n: usize = rest
72 .parse()
73 .map_err(|_| format!("'{}' is not a valid number", part))?;
74 if n == 0 {
75 return Err("tab size cannot be 0".to_string());
76 }
77 let last = stops.last().copied().unwrap_or(0);
78 let mut pos = last + n;
79 while pos < 10000 {
80 stops.push(pos);
81 pos += n;
82 }
83 continue;
84 }
85 match part.parse::<usize>() {
86 Ok(n) => {
87 if !stops.is_empty() && n <= *stops.last().unwrap() {
88 return Err("tab sizes must be ascending".to_string());
89 }
90 stops.push(n);
91 }
92 Err(_) => return Err(format!("'{}' is not a valid number", part)),
93 }
94 }
95
96 if stops.is_empty() {
97 return Err("tab specification is empty".to_string());
98 }
99
100 if stops.len() == 1 {
101 return Ok(TabStops::Regular(stops[0]));
102 }
103
104 Ok(TabStops::List(stops))
105}
106
107const SPACES: [u8; 256] = [b' '; 256];
110
111#[inline]
113fn push_spaces(output: &mut Vec<u8>, n: usize) {
114 let mut remaining = n;
115 while remaining > 0 {
116 let chunk = remaining.min(SPACES.len());
117 output.extend_from_slice(&SPACES[..chunk]);
118 remaining -= chunk;
119 }
120}
121
122pub fn expand_bytes(
125 data: &[u8],
126 tabs: &TabStops,
127 initial_only: bool,
128 out: &mut impl Write,
129) -> std::io::Result<()> {
130 if data.is_empty() {
131 return Ok(());
132 }
133
134 if memchr::memchr(b'\t', data).is_none() {
136 return out.write_all(data);
137 }
138
139 if let TabStops::Regular(tab_size) = tabs {
141 if !initial_only && memchr::memchr(b'\x08', data).is_none() {
142 return expand_regular_fast(data, *tab_size, out);
143 }
144 }
145
146 expand_generic(data, tabs, initial_only, out)
148}
149
150fn expand_regular_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
153 let mut column: usize = 0;
154 let mut pos: usize = 0;
155
156 while pos < data.len() {
157 match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
158 Some(offset) => {
159 if offset > 0 {
161 out.write_all(&data[pos..pos + offset])?;
162 column += offset;
163 }
164 let byte = data[pos + offset];
165 pos += offset + 1;
166
167 if byte == b'\n' {
168 out.write_all(b"\n")?;
169 column = 0;
170 } else {
171 let spaces = tab_size - (column % tab_size);
173 out.write_all(&SPACES[..spaces])?;
174 column += spaces;
175 }
176 }
177 None => {
178 out.write_all(&data[pos..])?;
179 break;
180 }
181 }
182 }
183
184 Ok(())
185}
186
187fn expand_generic(
189 data: &[u8],
190 tabs: &TabStops,
191 initial_only: bool,
192 out: &mut impl Write,
193) -> std::io::Result<()> {
194 let mut output = Vec::with_capacity(data.len() + data.len() / 8);
195 let mut column: usize = 0;
196 let mut in_initial = true;
197
198 for &byte in data {
199 match byte {
200 b'\t' => {
201 if initial_only && !in_initial {
202 output.push(b'\t');
203 column = tabs.next_tab_stop(column);
204 } else {
205 let spaces = tabs.spaces_to_next(column);
206 push_spaces(&mut output, spaces);
207 column += spaces;
208 }
209 }
210 b'\n' => {
211 output.push(b'\n');
212 column = 0;
213 in_initial = true;
214 }
215 b'\x08' => {
216 output.push(b'\x08');
217 if column > 0 {
218 column -= 1;
219 }
220 }
221 _ => {
222 if initial_only && in_initial && byte != b' ' {
223 in_initial = false;
224 }
225 output.push(byte);
226 column += 1;
227 }
228 }
229 }
230
231 out.write_all(&output)
232}
233
234pub fn unexpand_bytes(
237 data: &[u8],
238 tabs: &TabStops,
239 all: bool,
240 out: &mut impl Write,
241) -> std::io::Result<()> {
242 if data.is_empty() {
243 return Ok(());
244 }
245
246 if memchr::memchr2(b' ', b'\t', data).is_none() {
248 return out.write_all(data);
249 }
250
251 if let TabStops::Regular(tab_size) = tabs {
253 if memchr::memchr(b'\x08', data).is_none() {
254 return unexpand_regular_fast(data, *tab_size, all, out);
255 }
256 }
257
258 unexpand_generic(data, tabs, all, out)
260}
261
262#[inline]
266fn emit_blanks(
267 out: &mut impl Write,
268 start_col: usize,
269 count: usize,
270 tab_size: usize,
271) -> std::io::Result<()> {
272 if count == 0 {
273 return Ok(());
274 }
275 let end_col = start_col + count;
276 let mut col = start_col;
277
278 loop {
280 let next_tab = col + (tab_size - col % tab_size);
281 if next_tab > end_col {
282 break;
283 }
284 let blanks_consumed = next_tab - col;
285 if blanks_consumed >= 2 || next_tab < end_col {
286 out.write_all(b"\t")?;
288 col = next_tab;
289 } else {
290 break;
292 }
293 }
294
295 let remaining = end_col - col;
297 if remaining > 0 {
298 let mut r = remaining;
299 while r > 0 {
300 let chunk = r.min(SPACES.len());
301 out.write_all(&SPACES[..chunk])?;
302 r -= chunk;
303 }
304 }
305 Ok(())
306}
307
308fn unexpand_regular_fast(
311 data: &[u8],
312 tab_size: usize,
313 all: bool,
314 out: &mut impl Write,
315) -> std::io::Result<()> {
316 let mut column: usize = 0;
317 let mut pos: usize = 0;
318 let mut in_initial = true;
319
320 while pos < data.len() {
321 if in_initial || all {
322 if data[pos] == b' ' || data[pos] == b'\t' {
324 let blank_start_col = column;
326 while pos < data.len() && (data[pos] == b' ' || data[pos] == b'\t') {
327 if data[pos] == b'\t' {
328 column += tab_size - column % tab_size;
329 } else {
330 column += 1;
331 }
332 pos += 1;
333 }
334 emit_blanks(out, blank_start_col, column - blank_start_col, tab_size)?;
336 continue;
337 }
338 if data[pos] == b'\n' {
339 out.write_all(b"\n")?;
340 column = 0;
341 in_initial = true;
342 pos += 1;
343 continue;
344 }
345 in_initial = false;
347 }
348
349 if !all {
351 match memchr::memchr(b'\n', &data[pos..]) {
353 Some(offset) => {
354 out.write_all(&data[pos..pos + offset + 1])?;
355 column = 0;
356 in_initial = true;
357 pos += offset + 1;
358 }
359 None => {
360 out.write_all(&data[pos..])?;
361 return Ok(());
362 }
363 }
364 } else {
365 match memchr::memchr3(b' ', b'\t', b'\n', &data[pos..]) {
367 Some(offset) => {
368 if offset > 0 {
369 out.write_all(&data[pos..pos + offset])?;
370 column += offset;
371 }
372 pos += offset;
373 }
374 None => {
375 out.write_all(&data[pos..])?;
376 return Ok(());
377 }
378 }
379 }
380 }
381
382 Ok(())
383}
384
385fn unexpand_generic(
387 data: &[u8],
388 tabs: &TabStops,
389 all: bool,
390 out: &mut impl Write,
391) -> std::io::Result<()> {
392 let tab_size = match tabs {
393 TabStops::Regular(n) => *n,
394 TabStops::List(_) => 0, };
396 let mut column: usize = 0;
397 let mut space_start_col: Option<usize> = None;
398 let mut in_initial = true;
399
400 for &byte in data {
401 match byte {
402 b' ' => {
403 if !all && !in_initial {
404 out.write_all(b" ")?;
405 column += 1;
406 } else {
407 if space_start_col.is_none() {
408 space_start_col = Some(column);
409 }
410 column += 1;
411 }
413 }
414 b'\t' => {
415 if !all && !in_initial {
416 if let Some(start_col) = space_start_col.take() {
418 let n = column - start_col;
419 out.write_all(&SPACES[..n.min(SPACES.len())])?;
420 }
421 out.write_all(b"\t")?;
422 column = tabs.next_tab_stop(column);
423 } else {
424 if space_start_col.is_none() {
425 space_start_col = Some(column);
426 }
427 column = tabs.next_tab_stop(column);
428 }
429 }
430 _ => {
431 if let Some(start_col) = space_start_col.take() {
433 let count = column - start_col;
434 if tab_size > 0 {
435 emit_blanks(out, start_col, count, tab_size)?;
436 } else {
437 emit_blanks_tablist(out, start_col, count, tabs)?;
439 }
440 }
441
442 if byte == b'\n' {
443 out.write_all(b"\n")?;
444 column = 0;
445 in_initial = true;
446 } else if byte == b'\x08' {
447 out.write_all(b"\x08")?;
448 if column > 0 {
449 column -= 1;
450 }
451 } else {
452 if in_initial {
453 in_initial = false;
454 }
455 out.write_all(&[byte])?;
456 column += 1;
457 }
458 }
459 }
460 }
461
462 if let Some(start_col) = space_start_col {
463 let count = column - start_col;
464 if tab_size > 0 {
465 emit_blanks(out, start_col, count, tab_size)?;
466 } else {
467 emit_blanks_tablist(out, start_col, count, tabs)?;
468 }
469 }
470
471 Ok(())
472}
473
474fn emit_blanks_tablist(
477 out: &mut impl Write,
478 start_col: usize,
479 count: usize,
480 tabs: &TabStops,
481) -> std::io::Result<()> {
482 if count == 0 {
483 return Ok(());
484 }
485 let end_col = start_col + count;
486 let mut col = start_col;
487
488 let last_stop = match tabs {
490 TabStops::List(stops) => stops.last().copied().unwrap_or(0),
491 TabStops::Regular(_) => usize::MAX,
492 };
493
494 while col < last_stop {
495 let next_tab = tabs.next_tab_stop(col);
496 if next_tab > end_col || next_tab > last_stop {
497 break;
498 }
499 let blanks_consumed = next_tab - col;
500 if blanks_consumed >= 2 || next_tab < end_col {
501 out.write_all(b"\t")?;
502 col = next_tab;
503 } else {
504 break;
505 }
506 }
507
508 let remaining = end_col - col;
509 if remaining > 0 {
510 let mut r = remaining;
511 while r > 0 {
512 let chunk = r.min(SPACES.len());
513 out.write_all(&SPACES[..chunk])?;
514 r -= chunk;
515 }
516 }
517 Ok(())
518}