1use std::io::Write;
2
3#[derive(Clone, Debug)]
5pub enum TabStops {
6 Regular(usize),
8 List(Vec<usize>),
10}
11
12impl TabStops {
13 #[inline]
15 fn spaces_to_next(&self, column: usize) -> usize {
16 match self {
17 TabStops::Regular(n) => {
18 if *n == 0 {
19 return 0;
20 }
21 *n - (column % *n)
22 }
23 TabStops::List(stops) => {
24 match stops.binary_search(&(column + 1)) {
26 Ok(idx) => stops[idx] - column,
27 Err(idx) => {
28 if idx < stops.len() {
29 stops[idx] - column
30 } else {
31 1
33 }
34 }
35 }
36 }
37 }
38 }
39
40 #[inline]
42 fn next_tab_stop(&self, column: usize) -> usize {
43 column + self.spaces_to_next(column)
44 }
45}
46
47pub fn parse_tab_stops(spec: &str) -> Result<TabStops, String> {
49 let spec = spec.trim();
50 if spec.is_empty() {
51 return Ok(TabStops::Regular(8));
52 }
53
54 if let Ok(n) = spec.parse::<usize>() {
56 if n == 0 {
57 return Err("tab size cannot be 0".to_string());
58 }
59 return Ok(TabStops::Regular(n));
60 }
61
62 let mut stops: Vec<usize> = Vec::new();
64 for part in spec.split([',', ' ']) {
65 let part = part.trim();
66 if part.is_empty() {
67 continue;
68 }
69 if let Some(rest) = part.strip_prefix('/') {
71 let n: usize = rest
72 .parse()
73 .map_err(|_| format!("'{}' is not a valid number", part))?;
74 if n == 0 {
75 return Err("tab size cannot be 0".to_string());
76 }
77 let last = stops.last().copied().unwrap_or(0);
78 let mut pos = last + n;
79 while pos < 10000 {
80 stops.push(pos);
81 pos += n;
82 }
83 continue;
84 }
85 match part.parse::<usize>() {
86 Ok(n) => {
87 if !stops.is_empty() && n <= *stops.last().unwrap() {
88 return Err("tab sizes must be ascending".to_string());
89 }
90 stops.push(n);
91 }
92 Err(_) => return Err(format!("'{}' is not a valid number", part)),
93 }
94 }
95
96 if stops.is_empty() {
97 return Err("tab specification is empty".to_string());
98 }
99
100 if stops.len() == 1 {
101 return Ok(TabStops::Regular(stops[0]));
102 }
103
104 Ok(TabStops::List(stops))
105}
106
107const SPACES: [u8; 4096] = [b' '; 4096];
110
111#[inline]
113fn push_spaces(output: &mut Vec<u8>, n: usize) {
114 let mut remaining = n;
115 while remaining > 0 {
116 let chunk = remaining.min(SPACES.len());
117 output.extend_from_slice(&SPACES[..chunk]);
118 remaining -= chunk;
119 }
120}
121
122#[inline]
124fn write_spaces(out: &mut impl Write, n: usize) -> std::io::Result<()> {
125 let mut remaining = n;
126 while remaining > 0 {
127 let chunk = remaining.min(SPACES.len());
128 out.write_all(&SPACES[..chunk])?;
129 remaining -= chunk;
130 }
131 Ok(())
132}
133
134pub fn expand_bytes(
137 data: &[u8],
138 tabs: &TabStops,
139 initial_only: bool,
140 out: &mut impl Write,
141) -> std::io::Result<()> {
142 if data.is_empty() {
143 return Ok(());
144 }
145
146 if memchr::memchr(b'\t', data).is_none() {
148 return out.write_all(data);
149 }
150
151 if let TabStops::Regular(tab_size) = tabs {
153 if initial_only {
154 return expand_initial_fast(data, *tab_size, out);
157 } else if memchr::memchr(b'\x08', data).is_none() {
158 return expand_regular_fast(data, *tab_size, out);
159 }
160 }
161
162 let has_backspace = match tabs {
167 TabStops::Regular(_) => true,
168 TabStops::List(_) => memchr::memchr(b'\x08', data).is_some(),
169 };
170 expand_generic(data, tabs, initial_only, has_backspace, out)
171}
172
173fn expand_regular_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
177 debug_assert!(tab_size > 0, "tab_size must be > 0");
178 const FLUSH_THRESHOLD: usize = 256 * 1024;
179 let cap = data.len().min(FLUSH_THRESHOLD) + data.len().min(FLUSH_THRESHOLD) / 8;
180 let mut output = Vec::with_capacity(cap);
181 let mut column: usize = 0;
182 let mut pos: usize = 0;
183
184 let is_pow2 = tab_size.is_power_of_two();
186 let mask = tab_size - 1; while pos < data.len() {
189 match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
190 Some(offset) => {
191 if offset > 0 {
193 output.extend_from_slice(&data[pos..pos + offset]);
194 column += offset;
195 }
196 let byte = data[pos + offset];
197 pos += offset + 1;
198
199 if byte == b'\n' {
200 output.push(b'\n');
201 column = 0;
202 } else {
203 let rem = if is_pow2 {
205 column & mask
206 } else {
207 column % tab_size
208 };
209 let spaces = tab_size - rem;
210 push_spaces(&mut output, spaces);
211 column += spaces;
212 }
213
214 if output.len() >= FLUSH_THRESHOLD {
216 out.write_all(&output)?;
217 output.clear();
218 }
219 }
220 None => {
221 output.extend_from_slice(&data[pos..]);
222 break;
223 }
224 }
225 }
226
227 if !output.is_empty() {
228 out.write_all(&output)?;
229 }
230 Ok(())
231}
232
233fn expand_initial_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
238 debug_assert!(tab_size > 0, "tab_size must be > 0");
239 let tabs = TabStops::Regular(tab_size);
240 let mut pos: usize = 0;
241
242 while pos < data.len() {
243 let line_end = memchr::memchr(b'\n', &data[pos..])
245 .map(|off| pos + off + 1)
246 .unwrap_or(data.len());
247
248 let line = &data[pos..line_end];
249 debug_assert!(!line.is_empty());
250
251 let first = line[0];
253 if first != b'\t' && first != b' ' {
254 out.write_all(line)?;
255 pos = line_end;
256 continue;
257 }
258
259 if memchr::memchr(b'\x08', line).is_some() {
261 expand_generic(line, &tabs, true, true, out)?;
262 pos = line_end;
263 continue;
264 }
265
266 let mut column: usize = 0;
268 let mut i = 0; while i < line.len() {
270 let byte = line[i];
271 if byte == b'\t' {
272 let spaces = tab_size - (column % tab_size);
273 write_spaces(out, spaces)?;
274 column += spaces;
275 i += 1;
276 } else if byte == b' ' {
277 let space_start = i;
279 while i < line.len() && line[i] == b' ' {
280 i += 1;
281 }
282 out.write_all(&line[space_start..i])?;
283 column += i - space_start;
284 } else {
285 break;
287 }
288 }
289
290 if i < line.len() {
292 out.write_all(&line[i..])?;
293 }
294
295 pos = line_end;
296 }
297
298 Ok(())
299}
300
301fn expand_generic(
305 data: &[u8],
306 tabs: &TabStops,
307 initial_only: bool,
308 has_backspace: bool,
309 out: &mut impl Write,
310) -> std::io::Result<()> {
311 const FLUSH_THRESHOLD: usize = 256 * 1024;
312 let cap = data.len().min(FLUSH_THRESHOLD) + data.len().min(FLUSH_THRESHOLD) / 8;
313 let mut output = Vec::with_capacity(cap);
314
315 if !initial_only && !has_backspace {
317 let mut column: usize = 0;
318 let mut pos: usize = 0;
319
320 while pos < data.len() {
321 match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
322 Some(offset) => {
323 if offset > 0 {
324 output.extend_from_slice(&data[pos..pos + offset]);
325 column += offset;
326 }
327 let byte = data[pos + offset];
328 pos += offset + 1;
329
330 if byte == b'\n' {
331 output.push(b'\n');
332 column = 0;
333 } else {
334 let spaces = tabs.spaces_to_next(column);
335 push_spaces(&mut output, spaces);
336 column += spaces;
337 }
338 if output.len() >= FLUSH_THRESHOLD {
339 out.write_all(&output)?;
340 output.clear();
341 }
342 }
343 None => {
344 output.extend_from_slice(&data[pos..]);
345 break;
346 }
347 }
348 }
349 } else {
350 let mut column: usize = 0;
352 let mut in_initial = true;
353
354 for &byte in data {
355 match byte {
356 b'\t' => {
357 if initial_only && !in_initial {
358 output.push(b'\t');
359 column = tabs.next_tab_stop(column);
360 } else {
361 let spaces = tabs.spaces_to_next(column);
362 push_spaces(&mut output, spaces);
363 column += spaces;
364 }
365 }
366 b'\n' => {
367 output.push(b'\n');
368 column = 0;
369 in_initial = true;
370 if output.len() >= FLUSH_THRESHOLD {
371 out.write_all(&output)?;
372 output.clear();
373 }
374 }
375 b'\x08' => {
376 output.push(b'\x08');
377 if column > 0 {
378 column -= 1;
379 }
380 }
381 _ => {
382 if initial_only && in_initial && byte != b' ' {
383 in_initial = false;
384 }
385 output.push(byte);
386 column += 1;
387 }
388 }
389 }
390 }
391
392 if !output.is_empty() {
393 out.write_all(&output)?;
394 }
395 Ok(())
396}
397
398pub fn unexpand_bytes(
401 data: &[u8],
402 tabs: &TabStops,
403 all: bool,
404 out: &mut impl Write,
405) -> std::io::Result<()> {
406 if data.is_empty() {
407 return Ok(());
408 }
409
410 if memchr::memchr2(b' ', b'\t', data).is_none() {
412 return out.write_all(data);
413 }
414
415 if let TabStops::Regular(tab_size) = tabs {
417 if memchr::memchr(b'\x08', data).is_none() {
418 return unexpand_regular_fast(data, *tab_size, all, out);
419 }
420 }
421
422 unexpand_generic(data, tabs, all, out)
424}
425
426#[inline]
430fn emit_blanks(
431 out: &mut impl Write,
432 start_col: usize,
433 count: usize,
434 tab_size: usize,
435) -> std::io::Result<()> {
436 if count == 0 {
437 return Ok(());
438 }
439 let end_col = start_col + count;
440 let mut col = start_col;
441
442 loop {
444 let next_tab = col + (tab_size - col % tab_size);
445 if next_tab > end_col {
446 break;
447 }
448 let blanks_consumed = next_tab - col;
449 if blanks_consumed >= 2 || next_tab < end_col {
450 out.write_all(b"\t")?;
452 col = next_tab;
453 } else {
454 break;
456 }
457 }
458
459 let remaining = end_col - col;
461 if remaining > 0 {
462 let mut r = remaining;
463 while r > 0 {
464 let chunk = r.min(SPACES.len());
465 out.write_all(&SPACES[..chunk])?;
466 r -= chunk;
467 }
468 }
469 Ok(())
470}
471
472fn unexpand_regular_fast(
475 data: &[u8],
476 tab_size: usize,
477 all: bool,
478 out: &mut impl Write,
479) -> std::io::Result<()> {
480 let mut column: usize = 0;
481 let mut pos: usize = 0;
482 let mut in_initial = true;
483
484 while pos < data.len() {
485 if in_initial || all {
486 if data[pos] == b' ' || data[pos] == b'\t' {
488 let blank_start_col = column;
490 while pos < data.len() && (data[pos] == b' ' || data[pos] == b'\t') {
491 if data[pos] == b'\t' {
492 column += tab_size - column % tab_size;
493 } else {
494 column += 1;
495 }
496 pos += 1;
497 }
498 emit_blanks(out, blank_start_col, column - blank_start_col, tab_size)?;
500 continue;
501 }
502 if data[pos] == b'\n' {
503 out.write_all(b"\n")?;
504 column = 0;
505 in_initial = true;
506 pos += 1;
507 continue;
508 }
509 in_initial = false;
511 }
512
513 if !all {
515 match memchr::memchr(b'\n', &data[pos..]) {
517 Some(offset) => {
518 out.write_all(&data[pos..pos + offset + 1])?;
519 column = 0;
520 in_initial = true;
521 pos += offset + 1;
522 }
523 None => {
524 out.write_all(&data[pos..])?;
525 return Ok(());
526 }
527 }
528 } else {
529 match memchr::memchr3(b' ', b'\t', b'\n', &data[pos..]) {
531 Some(offset) => {
532 if offset > 0 {
533 out.write_all(&data[pos..pos + offset])?;
534 column += offset;
535 }
536 pos += offset;
537 }
538 None => {
539 out.write_all(&data[pos..])?;
540 return Ok(());
541 }
542 }
543 }
544 }
545
546 Ok(())
547}
548
549fn unexpand_generic(
551 data: &[u8],
552 tabs: &TabStops,
553 all: bool,
554 out: &mut impl Write,
555) -> std::io::Result<()> {
556 let tab_size = match tabs {
557 TabStops::Regular(n) => *n,
558 TabStops::List(_) => 0, };
560 let mut column: usize = 0;
561 let mut space_start_col: Option<usize> = None;
562 let mut in_initial = true;
563
564 for &byte in data {
565 match byte {
566 b' ' => {
567 if !all && !in_initial {
568 out.write_all(b" ")?;
569 column += 1;
570 } else {
571 if space_start_col.is_none() {
572 space_start_col = Some(column);
573 }
574 column += 1;
575 }
577 }
578 b'\t' => {
579 if !all && !in_initial {
580 if let Some(start_col) = space_start_col.take() {
582 let n = column - start_col;
583 out.write_all(&SPACES[..n.min(SPACES.len())])?;
584 }
585 out.write_all(b"\t")?;
586 column = tabs.next_tab_stop(column);
587 } else {
588 if space_start_col.is_none() {
589 space_start_col = Some(column);
590 }
591 column = tabs.next_tab_stop(column);
592 }
593 }
594 _ => {
595 if let Some(start_col) = space_start_col.take() {
597 let count = column - start_col;
598 if tab_size > 0 {
599 emit_blanks(out, start_col, count, tab_size)?;
600 } else {
601 emit_blanks_tablist(out, start_col, count, tabs)?;
603 }
604 }
605
606 if byte == b'\n' {
607 out.write_all(b"\n")?;
608 column = 0;
609 in_initial = true;
610 } else if byte == b'\x08' {
611 out.write_all(b"\x08")?;
612 if column > 0 {
613 column -= 1;
614 }
615 } else {
616 if in_initial {
617 in_initial = false;
618 }
619 out.write_all(&[byte])?;
620 column += 1;
621 }
622 }
623 }
624 }
625
626 if let Some(start_col) = space_start_col {
627 let count = column - start_col;
628 if tab_size > 0 {
629 emit_blanks(out, start_col, count, tab_size)?;
630 } else {
631 emit_blanks_tablist(out, start_col, count, tabs)?;
632 }
633 }
634
635 Ok(())
636}
637
638fn emit_blanks_tablist(
641 out: &mut impl Write,
642 start_col: usize,
643 count: usize,
644 tabs: &TabStops,
645) -> std::io::Result<()> {
646 if count == 0 {
647 return Ok(());
648 }
649 let end_col = start_col + count;
650 let mut col = start_col;
651
652 let last_stop = match tabs {
654 TabStops::List(stops) => stops.last().copied().unwrap_or(0),
655 TabStops::Regular(_) => usize::MAX,
656 };
657
658 while col < last_stop {
659 let next_tab = tabs.next_tab_stop(col);
660 if next_tab > end_col || next_tab > last_stop {
661 break;
662 }
663 let blanks_consumed = next_tab - col;
664 if blanks_consumed >= 2 || next_tab < end_col {
665 out.write_all(b"\t")?;
666 col = next_tab;
667 } else {
668 break;
669 }
670 }
671
672 let remaining = end_col - col;
673 if remaining > 0 {
674 let mut r = remaining;
675 while r > 0 {
676 let chunk = r.min(SPACES.len());
677 out.write_all(&SPACES[..chunk])?;
678 r -= chunk;
679 }
680 }
681 Ok(())
682}