Skip to main content

coreutils_rs/tr/
core.rs

1use std::io::{self, Read, Write};
2
3/// Main processing buffer: 4MB — large enough to amortize write() syscall overhead
4/// while still fitting in L3 cache.
5const BUF_SIZE: usize = 4 * 1024 * 1024;
6
7/// Stream buffer: 1MB — process data immediately after each read().
8/// Larger than typical pipe buffer (64KB) to batch multiple reads into one process cycle.
9const STREAM_BUF: usize = 1024 * 1024;
10
11/// Build a 256-byte lookup table mapping set1[i] -> set2[i].
12#[inline]
13fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
14    let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
15    let last = set2.last().copied();
16    for (i, &from) in set1.iter().enumerate() {
17        table[from as usize] = if i < set2.len() {
18            set2[i]
19        } else {
20            last.unwrap_or(from)
21        };
22    }
23    table
24}
25
26/// Build a 256-bit (32-byte) membership set for O(1) byte lookup.
27#[inline]
28fn build_member_set(chars: &[u8]) -> [u8; 32] {
29    let mut set = [0u8; 32];
30    for &ch in chars {
31        set[ch as usize >> 3] |= 1 << (ch & 7);
32    }
33    set
34}
35
36#[inline(always)]
37fn is_member(set: &[u8; 32], ch: u8) -> bool {
38    unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
39}
40
41/// Translate bytes in-place using a 256-byte lookup table.
42/// The table fits in L1 cache (256 bytes). The simple loop auto-vectorizes via LLVM.
43#[inline(always)]
44fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
45    for b in data.iter_mut() {
46        *b = table[*b as usize];
47    }
48}
49
50// ============================================================================
51// Streaming functions (Read + Write)
52// ============================================================================
53
54pub fn translate(
55    set1: &[u8],
56    set2: &[u8],
57    reader: &mut impl Read,
58    writer: &mut impl Write,
59) -> io::Result<()> {
60    let table = build_translate_table(set1, set2);
61    let mut buf = vec![0u8; BUF_SIZE];
62    loop {
63        let n = match reader.read(&mut buf) {
64            Ok(0) => break,
65            Ok(n) => n,
66            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
67            Err(e) => return Err(e),
68        };
69        translate_inplace(&mut buf[..n], &table);
70        writer.write_all(&buf[..n])?;
71    }
72    Ok(())
73}
74
75pub fn translate_squeeze(
76    set1: &[u8],
77    set2: &[u8],
78    reader: &mut impl Read,
79    writer: &mut impl Write,
80) -> io::Result<()> {
81    let table = build_translate_table(set1, set2);
82    let squeeze_set = build_member_set(set2);
83
84    let mut buf = vec![0u8; STREAM_BUF];
85    let mut last_squeezed: u16 = 256;
86
87    loop {
88        let n = match reader.read(&mut buf) {
89            Ok(0) => break,
90            Ok(n) => n,
91            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
92            Err(e) => return Err(e),
93        };
94        // Phase 1: translate in-place
95        translate_inplace(&mut buf[..n], &table);
96        // Phase 2: squeeze in-place compaction (wp <= i always, safe)
97        let mut wp = 0;
98        unsafe {
99            let ptr = buf.as_mut_ptr();
100            for i in 0..n {
101                let b = *ptr.add(i);
102                if is_member(&squeeze_set, b) {
103                    if last_squeezed == b as u16 {
104                        continue;
105                    }
106                    last_squeezed = b as u16;
107                } else {
108                    last_squeezed = 256;
109                }
110                *ptr.add(wp) = b;
111                wp += 1;
112            }
113        }
114        writer.write_all(&buf[..wp])?;
115    }
116    Ok(())
117}
118
119pub fn delete(
120    delete_chars: &[u8],
121    reader: &mut impl Read,
122    writer: &mut impl Write,
123) -> io::Result<()> {
124    // Fast path: single character delete using SIMD memchr
125    if delete_chars.len() == 1 {
126        return delete_single_streaming(delete_chars[0], reader, writer);
127    }
128
129    // Fast paths: 2-3 char delete using SIMD memchr2/memchr3
130    if delete_chars.len() <= 3 {
131        return delete_multi_streaming(delete_chars, reader, writer);
132    }
133
134    let member = build_member_set(delete_chars);
135    // Single buffer with in-place compaction — eliminates outbuf allocation + memcpy
136    let mut buf = vec![0u8; STREAM_BUF];
137
138    loop {
139        let n = match reader.read(&mut buf) {
140            Ok(0) => break,
141            Ok(n) => n,
142            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
143            Err(e) => return Err(e),
144        };
145        let mut wp = 0;
146        unsafe {
147            let ptr = buf.as_mut_ptr();
148            let mut i = 0;
149            // 8-byte unrolled in-place compaction
150            while i + 8 <= n {
151                let b0 = *ptr.add(i);
152                let b1 = *ptr.add(i + 1);
153                let b2 = *ptr.add(i + 2);
154                let b3 = *ptr.add(i + 3);
155                let b4 = *ptr.add(i + 4);
156                let b5 = *ptr.add(i + 5);
157                let b6 = *ptr.add(i + 6);
158                let b7 = *ptr.add(i + 7);
159
160                if !is_member(&member, b0) {
161                    *ptr.add(wp) = b0;
162                    wp += 1;
163                }
164                if !is_member(&member, b1) {
165                    *ptr.add(wp) = b1;
166                    wp += 1;
167                }
168                if !is_member(&member, b2) {
169                    *ptr.add(wp) = b2;
170                    wp += 1;
171                }
172                if !is_member(&member, b3) {
173                    *ptr.add(wp) = b3;
174                    wp += 1;
175                }
176                if !is_member(&member, b4) {
177                    *ptr.add(wp) = b4;
178                    wp += 1;
179                }
180                if !is_member(&member, b5) {
181                    *ptr.add(wp) = b5;
182                    wp += 1;
183                }
184                if !is_member(&member, b6) {
185                    *ptr.add(wp) = b6;
186                    wp += 1;
187                }
188                if !is_member(&member, b7) {
189                    *ptr.add(wp) = b7;
190                    wp += 1;
191                }
192                i += 8;
193            }
194            while i < n {
195                let b = *ptr.add(i);
196                if !is_member(&member, b) {
197                    *ptr.add(wp) = b;
198                    wp += 1;
199                }
200                i += 1;
201            }
202        }
203        writer.write_all(&buf[..wp])?;
204    }
205    Ok(())
206}
207
208/// Single-character delete from a reader — in-place compaction with SIMD memchr.
209fn delete_single_streaming(
210    ch: u8,
211    reader: &mut impl Read,
212    writer: &mut impl Write,
213) -> io::Result<()> {
214    let mut buf = vec![0u8; STREAM_BUF];
215    loop {
216        let n = match reader.read(&mut buf) {
217            Ok(0) => break,
218            Ok(n) => n,
219            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
220            Err(e) => return Err(e),
221        };
222        let mut wp = 0;
223        let mut i = 0;
224        while i < n {
225            match memchr::memchr(ch, &buf[i..n]) {
226                Some(offset) => {
227                    if offset > 0 {
228                        if wp != i {
229                            unsafe {
230                                std::ptr::copy(
231                                    buf.as_ptr().add(i),
232                                    buf.as_mut_ptr().add(wp),
233                                    offset,
234                                );
235                            }
236                        }
237                        wp += offset;
238                    }
239                    i += offset + 1; // skip the deleted char
240                }
241                None => {
242                    let run_len = n - i;
243                    if run_len > 0 {
244                        if wp != i {
245                            unsafe {
246                                std::ptr::copy(
247                                    buf.as_ptr().add(i),
248                                    buf.as_mut_ptr().add(wp),
249                                    run_len,
250                                );
251                            }
252                        }
253                        wp += run_len;
254                    }
255                    break;
256                }
257            }
258        }
259        writer.write_all(&buf[..wp])?;
260    }
261    Ok(())
262}
263
264/// Multi-character delete (2-3 chars) — in-place compaction with SIMD memchr2/memchr3.
265fn delete_multi_streaming(
266    chars: &[u8],
267    reader: &mut impl Read,
268    writer: &mut impl Write,
269) -> io::Result<()> {
270    let mut buf = vec![0u8; STREAM_BUF];
271    loop {
272        let n = match reader.read(&mut buf) {
273            Ok(0) => break,
274            Ok(n) => n,
275            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
276            Err(e) => return Err(e),
277        };
278        let mut wp = 0;
279        let mut i = 0;
280        while i < n {
281            let found = if chars.len() == 2 {
282                memchr::memchr2(chars[0], chars[1], &buf[i..n])
283            } else {
284                memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
285            };
286            match found {
287                Some(offset) => {
288                    if offset > 0 {
289                        if wp != i {
290                            unsafe {
291                                std::ptr::copy(
292                                    buf.as_ptr().add(i),
293                                    buf.as_mut_ptr().add(wp),
294                                    offset,
295                                );
296                            }
297                        }
298                        wp += offset;
299                    }
300                    i += offset + 1;
301                }
302                None => {
303                    let run_len = n - i;
304                    if run_len > 0 {
305                        if wp != i {
306                            unsafe {
307                                std::ptr::copy(
308                                    buf.as_ptr().add(i),
309                                    buf.as_mut_ptr().add(wp),
310                                    run_len,
311                                );
312                            }
313                        }
314                        wp += run_len;
315                    }
316                    break;
317                }
318            }
319        }
320        writer.write_all(&buf[..wp])?;
321    }
322    Ok(())
323}
324
325pub fn delete_squeeze(
326    delete_chars: &[u8],
327    squeeze_chars: &[u8],
328    reader: &mut impl Read,
329    writer: &mut impl Write,
330) -> io::Result<()> {
331    let delete_set = build_member_set(delete_chars);
332    let squeeze_set = build_member_set(squeeze_chars);
333    // Single buffer with in-place compaction
334    let mut buf = vec![0u8; STREAM_BUF];
335    let mut last_squeezed: u16 = 256;
336
337    loop {
338        let n = match reader.read(&mut buf) {
339            Ok(0) => break,
340            Ok(n) => n,
341            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
342            Err(e) => return Err(e),
343        };
344        let mut wp = 0;
345        unsafe {
346            let ptr = buf.as_mut_ptr();
347            for i in 0..n {
348                let b = *ptr.add(i);
349                if is_member(&delete_set, b) {
350                    continue;
351                }
352                if is_member(&squeeze_set, b) {
353                    if last_squeezed == b as u16 {
354                        continue;
355                    }
356                    last_squeezed = b as u16;
357                } else {
358                    last_squeezed = 256;
359                }
360                *ptr.add(wp) = b;
361                wp += 1;
362            }
363        }
364        writer.write_all(&buf[..wp])?;
365    }
366    Ok(())
367}
368
369pub fn squeeze(
370    squeeze_chars: &[u8],
371    reader: &mut impl Read,
372    writer: &mut impl Write,
373) -> io::Result<()> {
374    // Fast path: single squeeze char — bulk copy non-match runs
375    if squeeze_chars.len() == 1 {
376        return squeeze_single_stream(squeeze_chars[0], reader, writer);
377    }
378
379    let member = build_member_set(squeeze_chars);
380    // Single buffer with in-place compaction — eliminates outbuf allocation + memcpy
381    let mut buf = vec![0u8; STREAM_BUF];
382    let mut last_squeezed: u16 = 256;
383
384    loop {
385        let n = match reader.read(&mut buf) {
386            Ok(0) => break,
387            Ok(n) => n,
388            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
389            Err(e) => return Err(e),
390        };
391        let mut wp = 0;
392        unsafe {
393            let ptr = buf.as_mut_ptr();
394            for i in 0..n {
395                let b = *ptr.add(i);
396                if is_member(&member, b) {
397                    if last_squeezed == b as u16 {
398                        continue;
399                    }
400                    last_squeezed = b as u16;
401                } else {
402                    last_squeezed = 256;
403                }
404                *ptr.add(wp) = b;
405                wp += 1;
406            }
407        }
408        writer.write_all(&buf[..wp])?;
409    }
410    Ok(())
411}
412
413/// Squeeze a single character from a stream — in-place compaction with SIMD memchr.
414fn squeeze_single_stream(
415    ch: u8,
416    reader: &mut impl Read,
417    writer: &mut impl Write,
418) -> io::Result<()> {
419    let mut buf = vec![0u8; STREAM_BUF];
420    let mut was_squeeze_char = false;
421
422    loop {
423        let n = match reader.read(&mut buf) {
424            Ok(0) => break,
425            Ok(n) => n,
426            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
427            Err(e) => return Err(e),
428        };
429
430        let mut wp = 0;
431        let mut i = 0;
432
433        while i < n {
434            // Cross-chunk continuation: skip squeeze chars from previous chunk
435            if was_squeeze_char && buf[i] == ch {
436                i += 1;
437                while i < n && buf[i] == ch {
438                    i += 1;
439                }
440                if i >= n {
441                    break;
442                }
443            }
444
445            // Find next occurrence of squeeze char using SIMD memchr
446            match memchr::memchr(ch, &buf[i..n]) {
447                Some(offset) => {
448                    let run_len = offset;
449                    if run_len > 0 {
450                        if wp != i {
451                            unsafe {
452                                std::ptr::copy(
453                                    buf.as_ptr().add(i),
454                                    buf.as_mut_ptr().add(wp),
455                                    run_len,
456                                );
457                            }
458                        }
459                        wp += run_len;
460                    }
461                    i += run_len;
462
463                    // Emit one squeeze char, skip consecutive duplicates
464                    unsafe {
465                        *buf.as_mut_ptr().add(wp) = ch;
466                    }
467                    wp += 1;
468                    was_squeeze_char = true;
469                    i += 1;
470                    while i < n && buf[i] == ch {
471                        i += 1;
472                    }
473                }
474                None => {
475                    let run_len = n - i;
476                    if run_len > 0 {
477                        if wp != i {
478                            unsafe {
479                                std::ptr::copy(
480                                    buf.as_ptr().add(i),
481                                    buf.as_mut_ptr().add(wp),
482                                    run_len,
483                                );
484                            }
485                        }
486                        wp += run_len;
487                    }
488                    was_squeeze_char = false;
489                    break;
490                }
491            }
492        }
493
494        writer.write_all(&buf[..wp])?;
495    }
496    Ok(())
497}
498
499// ============================================================================
500// Mmap-based functions (zero-copy input from byte slice)
501// ============================================================================
502
503/// Translate bytes from an mmap'd byte slice.
504/// Simple chunked approach: translate into a 4MB buffer, write each chunk.
505pub fn translate_mmap(
506    set1: &[u8],
507    set2: &[u8],
508    data: &[u8],
509    writer: &mut impl Write,
510) -> io::Result<()> {
511    let table = build_translate_table(set1, set2);
512    let buf_size = data.len().min(BUF_SIZE);
513    let mut buf = vec![0u8; buf_size];
514    for chunk in data.chunks(buf_size) {
515        let out = &mut buf[..chunk.len()];
516        for (o, &b) in out.iter_mut().zip(chunk) {
517            *o = table[b as usize];
518        }
519        writer.write_all(out)?;
520    }
521    Ok(())
522}
523
524/// Translate + squeeze from mmap'd byte slice.
525pub fn translate_squeeze_mmap(
526    set1: &[u8],
527    set2: &[u8],
528    data: &[u8],
529    writer: &mut impl Write,
530) -> io::Result<()> {
531    let table = build_translate_table(set1, set2);
532    let squeeze_set = build_member_set(set2);
533    let buf_size = data.len().min(BUF_SIZE);
534    let mut buf = vec![0u8; buf_size];
535    let mut last_squeezed: u16 = 256;
536
537    for chunk in data.chunks(buf_size) {
538        // Translate chunk into buf
539        let out = &mut buf[..chunk.len()];
540        for (o, &b) in out.iter_mut().zip(chunk) {
541            *o = table[b as usize];
542        }
543        // Squeeze in-place compaction
544        let mut wp = 0;
545        unsafe {
546            let ptr = buf.as_mut_ptr();
547            for i in 0..chunk.len() {
548                let b = *ptr.add(i);
549                if is_member(&squeeze_set, b) {
550                    if last_squeezed == b as u16 {
551                        continue;
552                    }
553                    last_squeezed = b as u16;
554                } else {
555                    last_squeezed = 256;
556                }
557                *ptr.add(wp) = b;
558                wp += 1;
559            }
560        }
561        writer.write_all(&buf[..wp])?;
562    }
563    Ok(())
564}
565
566/// Delete from mmap'd byte slice.
567/// Uses SIMD memchr for single/multi char fast paths.
568pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
569    // Fast path: single character delete uses SIMD memchr
570    if delete_chars.len() == 1 {
571        return delete_single_char_mmap(delete_chars[0], data, writer);
572    }
573
574    // Fast path: 2-3 char delete uses SIMD memchr2/memchr3
575    if delete_chars.len() <= 3 {
576        return delete_multi_memchr_mmap(delete_chars, data, writer);
577    }
578
579    let member = build_member_set(delete_chars);
580    let buf_size = data.len().min(BUF_SIZE);
581    let mut outbuf = vec![0u8; buf_size];
582
583    for chunk in data.chunks(buf_size) {
584        let mut out_pos = 0;
585        let len = chunk.len();
586        let mut i = 0;
587
588        while i + 8 <= len {
589            unsafe {
590                let b0 = *chunk.get_unchecked(i);
591                let b1 = *chunk.get_unchecked(i + 1);
592                let b2 = *chunk.get_unchecked(i + 2);
593                let b3 = *chunk.get_unchecked(i + 3);
594                let b4 = *chunk.get_unchecked(i + 4);
595                let b5 = *chunk.get_unchecked(i + 5);
596                let b6 = *chunk.get_unchecked(i + 6);
597                let b7 = *chunk.get_unchecked(i + 7);
598
599                *outbuf.get_unchecked_mut(out_pos) = b0;
600                out_pos += !is_member(&member, b0) as usize;
601                *outbuf.get_unchecked_mut(out_pos) = b1;
602                out_pos += !is_member(&member, b1) as usize;
603                *outbuf.get_unchecked_mut(out_pos) = b2;
604                out_pos += !is_member(&member, b2) as usize;
605                *outbuf.get_unchecked_mut(out_pos) = b3;
606                out_pos += !is_member(&member, b3) as usize;
607                *outbuf.get_unchecked_mut(out_pos) = b4;
608                out_pos += !is_member(&member, b4) as usize;
609                *outbuf.get_unchecked_mut(out_pos) = b5;
610                out_pos += !is_member(&member, b5) as usize;
611                *outbuf.get_unchecked_mut(out_pos) = b6;
612                out_pos += !is_member(&member, b6) as usize;
613                *outbuf.get_unchecked_mut(out_pos) = b7;
614                out_pos += !is_member(&member, b7) as usize;
615            }
616            i += 8;
617        }
618
619        while i < len {
620            unsafe {
621                let b = *chunk.get_unchecked(i);
622                *outbuf.get_unchecked_mut(out_pos) = b;
623                out_pos += !is_member(&member, b) as usize;
624            }
625            i += 1;
626        }
627
628        writer.write_all(&outbuf[..out_pos])?;
629    }
630    Ok(())
631}
632
633/// Single-character delete from mmap using SIMD memchr + bulk copy between matches.
634fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
635    let buf_size = data.len().min(BUF_SIZE);
636    let mut outbuf = vec![0u8; buf_size];
637
638    for chunk in data.chunks(buf_size) {
639        let mut wp = 0;
640        let mut last = 0;
641        for pos in memchr::memchr_iter(ch, chunk) {
642            if pos > last {
643                let run = pos - last;
644                outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
645                wp += run;
646            }
647            last = pos + 1;
648        }
649        if last < chunk.len() {
650            let run = chunk.len() - last;
651            outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
652            wp += run;
653        }
654        writer.write_all(&outbuf[..wp])?;
655    }
656    Ok(())
657}
658
659/// Multi-character delete (2-3 chars) using SIMD memchr2/memchr3 + bulk copy.
660fn delete_multi_memchr_mmap(chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
661    let c0 = chars[0];
662    let c1 = if chars.len() >= 2 { chars[1] } else { 0 };
663    let c2 = if chars.len() >= 3 { chars[2] } else { 0 };
664    let is_three = chars.len() >= 3;
665
666    let buf_size = data.len().min(BUF_SIZE);
667    let mut outbuf = vec![0u8; buf_size];
668
669    for chunk in data.chunks(buf_size) {
670        let mut wp = 0;
671        let mut last = 0;
672
673        let iter_fn = |chunk: &[u8]| -> Vec<usize> {
674            if is_three {
675                memchr::memchr3_iter(c0, c1, c2, chunk).collect()
676            } else {
677                memchr::memchr2_iter(c0, c1, chunk).collect()
678            }
679        };
680
681        for pos in iter_fn(chunk) {
682            if pos > last {
683                let run = pos - last;
684                outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
685                wp += run;
686            }
687            last = pos + 1;
688        }
689
690        if last < chunk.len() {
691            let run = chunk.len() - last;
692            outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
693            wp += run;
694        }
695        writer.write_all(&outbuf[..wp])?;
696    }
697    Ok(())
698}
699
700/// Delete + squeeze from mmap'd byte slice.
701pub fn delete_squeeze_mmap(
702    delete_chars: &[u8],
703    squeeze_chars: &[u8],
704    data: &[u8],
705    writer: &mut impl Write,
706) -> io::Result<()> {
707    let delete_set = build_member_set(delete_chars);
708    let squeeze_set = build_member_set(squeeze_chars);
709    let buf_size = data.len().min(BUF_SIZE);
710    let mut outbuf = vec![0u8; buf_size];
711    let mut last_squeezed: u16 = 256;
712
713    for chunk in data.chunks(buf_size) {
714        let mut out_pos = 0;
715        for &b in chunk {
716            if is_member(&delete_set, b) {
717                continue;
718            }
719            if is_member(&squeeze_set, b) {
720                if last_squeezed == b as u16 {
721                    continue;
722                }
723                last_squeezed = b as u16;
724            } else {
725                last_squeezed = 256;
726            }
727            unsafe {
728                *outbuf.get_unchecked_mut(out_pos) = b;
729            }
730            out_pos += 1;
731        }
732        writer.write_all(&outbuf[..out_pos])?;
733    }
734    Ok(())
735}
736
737/// Squeeze from mmap'd byte slice.
738pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
739    // Fast path: single squeeze character — use SIMD memchr to find runs
740    if squeeze_chars.len() == 1 {
741        return squeeze_single_mmap(squeeze_chars[0], data, writer);
742    }
743
744    // Fast path: 2-3 squeeze chars — use memchr2/memchr3 for SIMD scanning
745    if squeeze_chars.len() == 2 {
746        return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
747    }
748    if squeeze_chars.len() == 3 {
749        return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
750    }
751
752    // General path: chunked output buffer with member check
753    let member = build_member_set(squeeze_chars);
754    let buf_size = data.len().min(BUF_SIZE);
755    let mut outbuf = vec![0u8; buf_size];
756    let mut last_squeezed: u16 = 256;
757
758    for chunk in data.chunks(buf_size) {
759        let len = chunk.len();
760        let mut wp = 0;
761        let mut i = 0;
762
763        unsafe {
764            let inp = chunk.as_ptr();
765            let outp = outbuf.as_mut_ptr();
766
767            while i < len {
768                let b = *inp.add(i);
769                if is_member(&member, b) {
770                    if last_squeezed != b as u16 {
771                        *outp.add(wp) = b;
772                        wp += 1;
773                        last_squeezed = b as u16;
774                    }
775                    i += 1;
776                    // Skip consecutive duplicates
777                    while i < len && *inp.add(i) == b {
778                        i += 1;
779                    }
780                } else {
781                    last_squeezed = 256;
782                    *outp.add(wp) = b;
783                    wp += 1;
784                    i += 1;
785                }
786            }
787        }
788        writer.write_all(&outbuf[..wp])?;
789    }
790    Ok(())
791}
792
793/// Squeeze with 2-3 char sets using SIMD memchr2/memchr3 for fast scanning.
794fn squeeze_multi_mmap<const N: usize>(
795    chars: &[u8],
796    data: &[u8],
797    writer: &mut impl Write,
798) -> io::Result<()> {
799    let buf_size = data.len().min(BUF_SIZE);
800    let mut outbuf = vec![0u8; buf_size];
801    let mut wp = 0;
802    let mut last_squeezed: u16 = 256;
803    let mut cursor = 0;
804
805    macro_rules! find_next {
806        ($data:expr) => {
807            if N == 2 {
808                memchr::memchr2(chars[0], chars[1], $data)
809            } else {
810                memchr::memchr3(chars[0], chars[1], chars[2], $data)
811            }
812        };
813    }
814
815    macro_rules! flush_and_copy {
816        ($src:expr, $len:expr) => {
817            if wp + $len > buf_size {
818                writer.write_all(&outbuf[..wp])?;
819                wp = 0;
820            }
821            if $len > buf_size {
822                writer.write_all($src)?;
823            } else {
824                outbuf[wp..wp + $len].copy_from_slice($src);
825                wp += $len;
826            }
827        };
828    }
829
830    while cursor < data.len() {
831        match find_next!(&data[cursor..]) {
832            Some(offset) => {
833                let pos = cursor + offset;
834                let b = data[pos];
835                // Copy non-member span to output buffer
836                if pos > cursor {
837                    let span = pos - cursor;
838                    flush_and_copy!(&data[cursor..pos], span);
839                    last_squeezed = 256;
840                }
841                if last_squeezed != b as u16 {
842                    if wp >= buf_size {
843                        writer.write_all(&outbuf[..wp])?;
844                        wp = 0;
845                    }
846                    outbuf[wp] = b;
847                    wp += 1;
848                    last_squeezed = b as u16;
849                }
850                // Skip consecutive duplicates of same byte
851                let mut skip = pos + 1;
852                while skip < data.len() && data[skip] == b {
853                    skip += 1;
854                }
855                cursor = skip;
856            }
857            None => {
858                let remaining = data.len() - cursor;
859                flush_and_copy!(&data[cursor..], remaining);
860                break;
861            }
862        }
863    }
864    if wp > 0 {
865        writer.write_all(&outbuf[..wp])?;
866    }
867    Ok(())
868}
869
870/// Squeeze a single repeated character from mmap'd data.
871/// Uses SIMD memchr for fast scanning + buffered output.
872fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
873    if data.is_empty() {
874        return Ok(());
875    }
876
877    // Fast path: no consecutive duplicates — zero-copy output
878    if memchr::memmem::find(data, &[ch, ch]).is_none() {
879        return writer.write_all(data);
880    }
881
882    let buf_size = data.len().min(BUF_SIZE);
883    let mut outbuf = vec![0u8; buf_size];
884    let len = data.len();
885    let mut wp = 0;
886    let mut cursor = 0;
887
888    while cursor < len {
889        match memchr::memchr(ch, &data[cursor..]) {
890            Some(offset) => {
891                let pos = cursor + offset;
892                let gap = pos - cursor;
893                if gap > 0 {
894                    if wp + gap > buf_size {
895                        writer.write_all(&outbuf[..wp])?;
896                        wp = 0;
897                    }
898                    if gap > buf_size {
899                        writer.write_all(&data[cursor..pos])?;
900                    } else {
901                        outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
902                        wp += gap;
903                    }
904                }
905                if wp >= buf_size {
906                    writer.write_all(&outbuf[..wp])?;
907                    wp = 0;
908                }
909                outbuf[wp] = ch;
910                wp += 1;
911                cursor = pos + 1;
912                while cursor < len && data[cursor] == ch {
913                    cursor += 1;
914                }
915            }
916            None => {
917                let remaining = len - cursor;
918                if remaining > 0 {
919                    if wp + remaining > buf_size {
920                        writer.write_all(&outbuf[..wp])?;
921                        wp = 0;
922                    }
923                    if remaining > buf_size {
924                        writer.write_all(&data[cursor..])?;
925                    } else {
926                        outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
927                        wp += remaining;
928                    }
929                }
930                break;
931            }
932        }
933    }
934
935    if wp > 0 {
936        writer.write_all(&outbuf[..wp])?;
937    }
938    Ok(())
939}