1use std::io::Write;
2
3#[derive(Clone, Debug)]
5pub enum TabStops {
6 Regular(usize),
8 List(Vec<usize>),
10}
11
12impl TabStops {
13 #[inline]
15 fn spaces_to_next(&self, column: usize) -> usize {
16 match self {
17 TabStops::Regular(n) => {
18 if *n == 0 {
19 return 0;
20 }
21 *n - (column % *n)
22 }
23 TabStops::List(stops) => {
24 match stops.binary_search(&(column + 1)) {
26 Ok(idx) => stops[idx] - column,
27 Err(idx) => {
28 if idx < stops.len() {
29 stops[idx] - column
30 } else {
31 1
33 }
34 }
35 }
36 }
37 }
38 }
39
40 #[inline]
42 fn next_tab_stop(&self, column: usize) -> usize {
43 column + self.spaces_to_next(column)
44 }
45}
46
47pub fn parse_tab_stops(spec: &str) -> Result<TabStops, String> {
49 let spec = spec.trim();
50 if spec.is_empty() {
51 return Ok(TabStops::Regular(8));
52 }
53
54 if let Ok(n) = spec.parse::<usize>() {
56 if n == 0 {
57 return Err("tab size cannot be 0".to_string());
58 }
59 return Ok(TabStops::Regular(n));
60 }
61
62 let mut stops: Vec<usize> = Vec::new();
64 for part in spec.split([',', ' ']) {
65 let part = part.trim();
66 if part.is_empty() {
67 continue;
68 }
69 if let Some(rest) = part.strip_prefix('/') {
71 let n: usize = rest
72 .parse()
73 .map_err(|_| format!("'{}' is not a valid number", part))?;
74 if n == 0 {
75 return Err("tab size cannot be 0".to_string());
76 }
77 let last = stops.last().copied().unwrap_or(0);
78 let mut pos = last + n;
79 while pos < 10000 {
80 stops.push(pos);
81 pos += n;
82 }
83 continue;
84 }
85 match part.parse::<usize>() {
86 Ok(n) => {
87 if !stops.is_empty() && n <= *stops.last().unwrap() {
88 return Err("tab sizes must be ascending".to_string());
89 }
90 stops.push(n);
91 }
92 Err(_) => return Err(format!("'{}' is not a valid number", part)),
93 }
94 }
95
96 if stops.is_empty() {
97 return Err("tab specification is empty".to_string());
98 }
99
100 if stops.len() == 1 {
101 return Ok(TabStops::Regular(stops[0]));
102 }
103
104 Ok(TabStops::List(stops))
105}
106
107const SPACES: [u8; 4096] = [b' '; 4096];
110
111#[inline]
113fn push_spaces(output: &mut Vec<u8>, n: usize) {
114 let mut remaining = n;
115 while remaining > 0 {
116 let chunk = remaining.min(SPACES.len());
117 output.extend_from_slice(&SPACES[..chunk]);
118 remaining -= chunk;
119 }
120}
121
122#[inline]
124fn write_spaces(out: &mut impl Write, n: usize) -> std::io::Result<()> {
125 let mut remaining = n;
126 while remaining > 0 {
127 let chunk = remaining.min(SPACES.len());
128 out.write_all(&SPACES[..chunk])?;
129 remaining -= chunk;
130 }
131 Ok(())
132}
133
134pub fn expand_bytes(
137 data: &[u8],
138 tabs: &TabStops,
139 initial_only: bool,
140 out: &mut impl Write,
141) -> std::io::Result<()> {
142 if data.is_empty() {
143 return Ok(());
144 }
145
146 if let TabStops::Regular(tab_size) = tabs {
149 if initial_only {
150 if memchr::memchr(b'\t', data).is_none() {
152 return out.write_all(data);
153 }
154 return expand_initial_fast(data, *tab_size, out);
155 }
156
157 if memchr::memchr(b'\t', data).is_none() {
159 return out.write_all(data);
160 }
161
162 if memchr::memchr(b'\x08', data).is_none() {
164 return expand_regular_fast(data, *tab_size, out);
165 }
166
167 return expand_generic(data, tabs, initial_only, true, out);
169 }
170
171 if memchr::memchr(b'\t', data).is_none() {
173 return out.write_all(data);
174 }
175 let has_backspace = memchr::memchr(b'\x08', data).is_some();
176 expand_generic(data, tabs, initial_only, has_backspace, out)
177}
178
179const PARALLEL_THRESHOLD: usize = 4 * 1024 * 1024; fn expand_regular_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
187 debug_assert!(tab_size > 0, "tab_size must be > 0");
188
189 if data.len() >= PARALLEL_THRESHOLD {
190 expand_regular_parallel(data, tab_size, out)
191 } else {
192 expand_regular_single(data, tab_size, out)
193 }
194}
195
196fn expand_regular_single(
198 data: &[u8],
199 tab_size: usize,
200 out: &mut impl Write,
201) -> std::io::Result<()> {
202 let is_pow2 = tab_size.is_power_of_two();
203 let mask = tab_size.wrapping_sub(1);
204
205 const INPUT_CHUNK: usize = 256 * 1024;
206 let worst_output = INPUT_CHUNK * tab_size + tab_size;
207 let mut output: Vec<u8> = Vec::with_capacity(worst_output);
208
209 let mut column: usize = 0;
210 let mut data_pos: usize = 0;
211
212 while data_pos < data.len() {
213 let chunk_end = if data_pos + INPUT_CHUNK >= data.len() {
214 data.len()
215 } else {
216 let search_end = data_pos + INPUT_CHUNK;
217 match memchr::memrchr(b'\n', &data[data_pos..search_end]) {
218 Some(off) => data_pos + off + 1,
219 None => search_end,
220 }
221 };
222
223 let chunk = &data[data_pos..chunk_end];
224 output.clear();
225
226 let out_ptr = output.as_mut_ptr();
227 let src = chunk.as_ptr();
228 let mut wp: usize = 0;
229 let mut pos: usize = 0;
230
231 for hit in memchr::memchr2_iter(b'\t', b'\n', chunk) {
232 let seg_len = hit - pos;
233 if seg_len > 0 {
234 unsafe {
235 std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), seg_len);
236 }
237 wp += seg_len;
238 column += seg_len;
239 }
240
241 if unsafe { *src.add(hit) } == b'\n' {
242 unsafe {
243 *out_ptr.add(wp) = b'\n';
244 }
245 wp += 1;
246 column = 0;
247 } else {
248 let rem = if is_pow2 {
249 column & mask
250 } else {
251 column % tab_size
252 };
253 let spaces = tab_size - rem;
254 unsafe {
255 std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
256 }
257 wp += spaces;
258 column += spaces;
259 }
260
261 pos = hit + 1;
262 }
263
264 if pos < chunk.len() {
265 let tail = chunk.len() - pos;
266 unsafe {
267 std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), tail);
268 }
269 wp += tail;
270 column += tail;
271 }
272
273 unsafe {
274 output.set_len(wp);
275 }
276 if wp > 0 {
277 out.write_all(&output)?;
278 }
279
280 data_pos = chunk_end;
281 }
282
283 Ok(())
284}
285
286fn expand_chunk(chunk: &[u8], tab_size: usize, is_pow2: bool, mask: usize) -> Vec<u8> {
291 let worst_case = chunk.len() * tab_size;
295 let mut output: Vec<u8> = Vec::with_capacity(worst_case);
296
297 let out_ptr = output.as_mut_ptr();
298 let src = chunk.as_ptr();
299 let mut wp: usize = 0;
300 let mut column: usize = 0;
301 let mut pos: usize = 0;
302
303 for hit in memchr::memchr2_iter(b'\t', b'\n', chunk) {
306 let seg_len = hit - pos;
307 if seg_len > 0 {
308 unsafe {
309 std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), seg_len);
310 }
311 wp += seg_len;
312 column += seg_len;
313 }
314
315 if unsafe { *src.add(hit) } == b'\n' {
316 unsafe {
317 *out_ptr.add(wp) = b'\n';
318 }
319 wp += 1;
320 column = 0;
321 } else {
322 let rem = if is_pow2 {
323 column & mask
324 } else {
325 column % tab_size
326 };
327 let spaces = tab_size - rem;
328 unsafe {
329 std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
330 }
331 wp += spaces;
332 column += spaces;
333 }
334
335 pos = hit + 1;
336 }
337
338 if pos < chunk.len() {
340 let tail = chunk.len() - pos;
341 unsafe {
342 std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), tail);
343 }
344 wp += tail;
345 }
346
347 unsafe {
348 output.set_len(wp);
349 }
350 output
351}
352
353fn expand_regular_parallel(
357 data: &[u8],
358 tab_size: usize,
359 out: &mut impl Write,
360) -> std::io::Result<()> {
361 use rayon::prelude::*;
362
363 let is_pow2 = tab_size.is_power_of_two();
364 let mask = tab_size.wrapping_sub(1);
365
366 let num_chunks = rayon::current_num_threads().max(2);
369 let target_chunk_size = data.len() / num_chunks;
370 let mut chunks: Vec<&[u8]> = Vec::with_capacity(num_chunks + 1);
371 let mut pos: usize = 0;
372
373 for _ in 0..num_chunks - 1 {
374 if pos >= data.len() {
375 break;
376 }
377 let target_end = (pos + target_chunk_size).min(data.len());
378 let chunk_end = if target_end >= data.len() {
380 data.len()
381 } else {
382 match memchr::memchr(b'\n', &data[target_end..]) {
383 Some(off) => target_end + off + 1,
384 None => data.len(),
385 }
386 };
387 chunks.push(&data[pos..chunk_end]);
388 pos = chunk_end;
389 }
390 if pos < data.len() {
392 chunks.push(&data[pos..]);
393 }
394
395 let results: Vec<Vec<u8>> = chunks
397 .par_iter()
398 .map(|chunk| expand_chunk(chunk, tab_size, is_pow2, mask))
399 .collect();
400
401 for result in &results {
403 if !result.is_empty() {
404 out.write_all(result)?;
405 }
406 }
407
408 Ok(())
409}
410
411fn expand_initial_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
416 debug_assert!(tab_size > 0, "tab_size must be > 0");
417 let tabs = TabStops::Regular(tab_size);
418 let mut pos: usize = 0;
419
420 while pos < data.len() {
421 let line_end = memchr::memchr(b'\n', &data[pos..])
423 .map(|off| pos + off + 1)
424 .unwrap_or(data.len());
425
426 let line = &data[pos..line_end];
427 debug_assert!(!line.is_empty());
428
429 let first = line[0];
431 if first != b'\t' && first != b' ' {
432 out.write_all(line)?;
433 pos = line_end;
434 continue;
435 }
436
437 if memchr::memchr(b'\x08', line).is_some() {
439 expand_generic(line, &tabs, true, true, out)?;
440 pos = line_end;
441 continue;
442 }
443
444 let mut column: usize = 0;
446 let mut i = 0; while i < line.len() {
448 let byte = line[i];
449 if byte == b'\t' {
450 let spaces = tab_size - (column % tab_size);
451 write_spaces(out, spaces)?;
452 column += spaces;
453 i += 1;
454 } else if byte == b' ' {
455 let space_start = i;
457 while i < line.len() && line[i] == b' ' {
458 i += 1;
459 }
460 out.write_all(&line[space_start..i])?;
461 column += i - space_start;
462 } else {
463 break;
465 }
466 }
467
468 if i < line.len() {
470 out.write_all(&line[i..])?;
471 }
472
473 pos = line_end;
474 }
475
476 Ok(())
477}
478
479fn expand_generic(
483 data: &[u8],
484 tabs: &TabStops,
485 initial_only: bool,
486 has_backspace: bool,
487 out: &mut impl Write,
488) -> std::io::Result<()> {
489 const FLUSH_THRESHOLD: usize = 256 * 1024;
490 let cap = data.len().min(FLUSH_THRESHOLD) + data.len().min(FLUSH_THRESHOLD) / 8;
491 let mut output = Vec::with_capacity(cap);
492
493 if !initial_only && !has_backspace {
495 let mut column: usize = 0;
496 let mut pos: usize = 0;
497
498 while pos < data.len() {
499 match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
500 Some(offset) => {
501 if offset > 0 {
502 output.extend_from_slice(&data[pos..pos + offset]);
503 column += offset;
504 }
505 let byte = data[pos + offset];
506 pos += offset + 1;
507
508 if byte == b'\n' {
509 output.push(b'\n');
510 column = 0;
511 } else {
512 let spaces = tabs.spaces_to_next(column);
513 push_spaces(&mut output, spaces);
514 column += spaces;
515 }
516 if output.len() >= FLUSH_THRESHOLD {
517 out.write_all(&output)?;
518 output.clear();
519 }
520 }
521 None => {
522 output.extend_from_slice(&data[pos..]);
523 break;
524 }
525 }
526 }
527 } else {
528 let mut column: usize = 0;
530 let mut in_initial = true;
531
532 for &byte in data {
533 match byte {
534 b'\t' => {
535 if initial_only && !in_initial {
536 output.push(b'\t');
537 column = tabs.next_tab_stop(column);
538 } else {
539 let spaces = tabs.spaces_to_next(column);
540 push_spaces(&mut output, spaces);
541 column += spaces;
542 }
543 }
544 b'\n' => {
545 output.push(b'\n');
546 column = 0;
547 in_initial = true;
548 if output.len() >= FLUSH_THRESHOLD {
549 out.write_all(&output)?;
550 output.clear();
551 }
552 }
553 b'\x08' => {
554 output.push(b'\x08');
555 if column > 0 {
556 column -= 1;
557 }
558 }
559 _ => {
560 if initial_only && in_initial && byte != b' ' {
561 in_initial = false;
562 }
563 output.push(byte);
564 column += 1;
565 }
566 }
567 }
568 }
569
570 if !output.is_empty() {
571 out.write_all(&output)?;
572 }
573 Ok(())
574}
575
576pub fn unexpand_bytes(
579 data: &[u8],
580 tabs: &TabStops,
581 all: bool,
582 out: &mut impl Write,
583) -> std::io::Result<()> {
584 if data.is_empty() {
585 return Ok(());
586 }
587
588 if memchr::memchr2(b' ', b'\t', data).is_none() {
590 return out.write_all(data);
591 }
592
593 if let TabStops::Regular(tab_size) = tabs {
595 if memchr::memchr(b'\x08', data).is_none() {
596 return unexpand_regular_fast(data, *tab_size, all, out);
597 }
598 }
599
600 unexpand_generic(data, tabs, all, out)
602}
603
604#[inline]
608fn emit_blanks(
609 out: &mut impl Write,
610 start_col: usize,
611 count: usize,
612 tab_size: usize,
613) -> std::io::Result<()> {
614 if count == 0 {
615 return Ok(());
616 }
617 let end_col = start_col + count;
618 let mut col = start_col;
619
620 loop {
622 let next_tab = col + (tab_size - col % tab_size);
623 if next_tab > end_col {
624 break;
625 }
626 let blanks_consumed = next_tab - col;
627 if blanks_consumed >= 2 || next_tab < end_col {
628 out.write_all(b"\t")?;
630 col = next_tab;
631 } else {
632 break;
634 }
635 }
636
637 let remaining = end_col - col;
639 if remaining > 0 {
640 let mut r = remaining;
641 while r > 0 {
642 let chunk = r.min(SPACES.len());
643 out.write_all(&SPACES[..chunk])?;
644 r -= chunk;
645 }
646 }
647 Ok(())
648}
649
650fn unexpand_regular_fast(
653 data: &[u8],
654 tab_size: usize,
655 all: bool,
656 out: &mut impl Write,
657) -> std::io::Result<()> {
658 if all {
659 return unexpand_regular_fast_all(data, tab_size, out);
660 }
661
662 let mut column: usize = 0;
663 let mut pos: usize = 0;
664 let mut in_initial = true;
665
666 while pos < data.len() {
667 if in_initial {
668 if data[pos] == b' ' || data[pos] == b'\t' {
670 let blank_start_col = column;
672 while pos < data.len() && (data[pos] == b' ' || data[pos] == b'\t') {
673 if data[pos] == b'\t' {
674 column += tab_size - column % tab_size;
675 } else {
676 column += 1;
677 }
678 pos += 1;
679 }
680 emit_blanks(out, blank_start_col, column - blank_start_col, tab_size)?;
682 continue;
683 }
684 if data[pos] == b'\n' {
685 out.write_all(b"\n")?;
686 column = 0;
687 in_initial = true;
688 pos += 1;
689 continue;
690 }
691 }
693
694 match memchr::memchr(b'\n', &data[pos..]) {
696 Some(offset) => {
697 out.write_all(&data[pos..pos + offset + 1])?;
698 column = 0;
699 in_initial = true;
700 pos += offset + 1;
701 }
702 None => {
703 out.write_all(&data[pos..])?;
704 return Ok(());
705 }
706 }
707 }
708
709 Ok(())
710}
711
712fn unexpand_regular_fast_all(
716 data: &[u8],
717 tab_size: usize,
718 out: &mut impl Write,
719) -> std::io::Result<()> {
720 let mut output: Vec<u8> = Vec::with_capacity(data.len());
724 let mut pos: usize = 0;
725
726 for nl_pos in memchr::memchr_iter(b'\n', data) {
727 let line = &data[pos..nl_pos];
728
729 let needs_processing =
733 memchr::memchr(b'\t', line).is_some() || memchr::memmem::find(line, b" ").is_some();
734
735 if !needs_processing {
736 output.extend_from_slice(line);
738 } else {
739 unexpand_line_all(line, tab_size, &mut output);
741 }
742 output.push(b'\n');
743
744 if output.len() >= 256 * 1024 {
746 out.write_all(&output)?;
747 output.clear();
748 }
749 pos = nl_pos + 1;
750 }
751
752 if pos < data.len() {
754 let line = &data[pos..];
755 let needs_processing =
756 memchr::memchr(b'\t', line).is_some() || memchr::memmem::find(line, b" ").is_some();
757 if !needs_processing {
758 output.extend_from_slice(line);
759 } else {
760 unexpand_line_all(line, tab_size, &mut output);
761 }
762 }
763
764 if !output.is_empty() {
765 out.write_all(&output)?;
766 }
767 Ok(())
768}
769
770#[inline]
772fn unexpand_line_all(line: &[u8], tab_size: usize, output: &mut Vec<u8>) {
773 let mut column: usize = 0;
774 let mut pos: usize = 0;
775
776 while pos < line.len() {
777 if line[pos] == b' ' || line[pos] == b'\t' {
779 let blank_start_col = column;
781 while pos < line.len() && (line[pos] == b' ' || line[pos] == b'\t') {
782 if line[pos] == b'\t' {
783 column += tab_size - column % tab_size;
784 } else {
785 column += 1;
786 }
787 pos += 1;
788 }
789 emit_blanks(output, blank_start_col, column - blank_start_col, tab_size).unwrap();
791 continue;
792 }
793
794 match memchr::memchr2(b' ', b'\t', &line[pos..]) {
796 Some(offset) => {
797 output.extend_from_slice(&line[pos..pos + offset]);
798 column += offset;
799 pos += offset;
800 }
801 None => {
802 output.extend_from_slice(&line[pos..]);
803 break;
804 }
805 }
806 }
807}
808
809fn unexpand_generic(
811 data: &[u8],
812 tabs: &TabStops,
813 all: bool,
814 out: &mut impl Write,
815) -> std::io::Result<()> {
816 let tab_size = match tabs {
817 TabStops::Regular(n) => *n,
818 TabStops::List(_) => 0, };
820 let mut column: usize = 0;
821 let mut space_start_col: Option<usize> = None;
822 let mut in_initial = true;
823
824 for &byte in data {
825 match byte {
826 b' ' => {
827 if !all && !in_initial {
828 out.write_all(b" ")?;
829 column += 1;
830 } else {
831 if space_start_col.is_none() {
832 space_start_col = Some(column);
833 }
834 column += 1;
835 }
837 }
838 b'\t' => {
839 if !all && !in_initial {
840 if let Some(start_col) = space_start_col.take() {
842 let n = column - start_col;
843 out.write_all(&SPACES[..n.min(SPACES.len())])?;
844 }
845 out.write_all(b"\t")?;
846 column = tabs.next_tab_stop(column);
847 } else {
848 if space_start_col.is_none() {
849 space_start_col = Some(column);
850 }
851 column = tabs.next_tab_stop(column);
852 }
853 }
854 _ => {
855 if let Some(start_col) = space_start_col.take() {
857 let count = column - start_col;
858 if tab_size > 0 {
859 emit_blanks(out, start_col, count, tab_size)?;
860 } else {
861 emit_blanks_tablist(out, start_col, count, tabs)?;
863 }
864 }
865
866 if byte == b'\n' {
867 out.write_all(b"\n")?;
868 column = 0;
869 in_initial = true;
870 } else if byte == b'\x08' {
871 out.write_all(b"\x08")?;
872 if column > 0 {
873 column -= 1;
874 }
875 } else {
876 if in_initial {
877 in_initial = false;
878 }
879 out.write_all(&[byte])?;
880 column += 1;
881 }
882 }
883 }
884 }
885
886 if let Some(start_col) = space_start_col {
887 let count = column - start_col;
888 if tab_size > 0 {
889 emit_blanks(out, start_col, count, tab_size)?;
890 } else {
891 emit_blanks_tablist(out, start_col, count, tabs)?;
892 }
893 }
894
895 Ok(())
896}
897
898fn emit_blanks_tablist(
901 out: &mut impl Write,
902 start_col: usize,
903 count: usize,
904 tabs: &TabStops,
905) -> std::io::Result<()> {
906 if count == 0 {
907 return Ok(());
908 }
909 let end_col = start_col + count;
910 let mut col = start_col;
911
912 let last_stop = match tabs {
914 TabStops::List(stops) => stops.last().copied().unwrap_or(0),
915 TabStops::Regular(_) => usize::MAX,
916 };
917
918 while col < last_stop {
919 let next_tab = tabs.next_tab_stop(col);
920 if next_tab > end_col || next_tab > last_stop {
921 break;
922 }
923 let blanks_consumed = next_tab - col;
924 if blanks_consumed >= 2 || next_tab < end_col {
925 out.write_all(b"\t")?;
926 col = next_tab;
927 } else {
928 break;
929 }
930 }
931
932 let remaining = end_col - col;
933 if remaining > 0 {
934 let mut r = remaining;
935 while r > 0 {
936 let chunk = r.min(SPACES.len());
937 out.write_all(&SPACES[..chunk])?;
938 r -= chunk;
939 }
940 }
941 Ok(())
942}