1use std::io::{self, Read, Write};
2
3use rayon::prelude::*;
4
5const MAX_IOV: usize = 1024;
8
9const BUF_SIZE: usize = 4 * 1024 * 1024;
11
12const STREAM_BUF: usize = 8 * 1024 * 1024;
14
15const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
18
19#[inline]
22fn write_ioslices(writer: &mut impl Write, slices: &[std::io::IoSlice]) -> io::Result<()> {
23 if slices.is_empty() {
24 return Ok(());
25 }
26 for batch in slices.chunks(MAX_IOV) {
27 let total: usize = batch.iter().map(|s| s.len()).sum();
28 match writer.write_vectored(batch) {
29 Ok(n) if n >= total => continue,
30 Ok(mut written) => {
31 for slice in batch {
33 let slen = slice.len();
34 if written >= slen {
35 written -= slen;
36 continue;
37 }
38 if written > 0 {
39 writer.write_all(&slice[written..])?;
40 written = 0;
41 } else {
42 writer.write_all(slice)?;
43 }
44 }
45 }
46 Err(e) => return Err(e),
47 }
48 }
49 Ok(())
50}
51
52#[inline]
55#[allow(clippy::uninit_vec)]
56fn alloc_uninit_vec(len: usize) -> Vec<u8> {
57 let mut v = Vec::with_capacity(len);
58 unsafe {
60 v.set_len(len);
61 }
62 v
63}
64
65#[inline]
67fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
68 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
69 let last = set2.last().copied();
70 for (i, &from) in set1.iter().enumerate() {
71 table[from as usize] = if i < set2.len() {
72 set2[i]
73 } else {
74 last.unwrap_or(from)
75 };
76 }
77 table
78}
79
80#[inline]
82fn build_member_set(chars: &[u8]) -> [u8; 32] {
83 let mut set = [0u8; 32];
84 for &ch in chars {
85 set[ch as usize >> 3] |= 1 << (ch & 7);
86 }
87 set
88}
89
90#[inline(always)]
91fn is_member(set: &[u8; 32], ch: u8) -> bool {
92 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
93}
94
95#[inline(always)]
97fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
98 for b in data.iter_mut() {
99 *b = unsafe { *table.get_unchecked(*b as usize) };
100 }
101}
102
103#[inline(always)]
105fn translate_to(src: &[u8], dst: &mut [u8], table: &[u8; 256]) {
106 debug_assert!(dst.len() >= src.len());
107 unsafe {
108 let sp = src.as_ptr();
109 let dp = dst.as_mut_ptr();
110 let len = src.len();
111 let mut i = 0;
112 while i + 8 <= len {
113 *dp.add(i) = *table.get_unchecked(*sp.add(i) as usize);
114 *dp.add(i + 1) = *table.get_unchecked(*sp.add(i + 1) as usize);
115 *dp.add(i + 2) = *table.get_unchecked(*sp.add(i + 2) as usize);
116 *dp.add(i + 3) = *table.get_unchecked(*sp.add(i + 3) as usize);
117 *dp.add(i + 4) = *table.get_unchecked(*sp.add(i + 4) as usize);
118 *dp.add(i + 5) = *table.get_unchecked(*sp.add(i + 5) as usize);
119 *dp.add(i + 6) = *table.get_unchecked(*sp.add(i + 6) as usize);
120 *dp.add(i + 7) = *table.get_unchecked(*sp.add(i + 7) as usize);
121 i += 8;
122 }
123 while i < len {
124 *dp.add(i) = *table.get_unchecked(*sp.add(i) as usize);
125 i += 1;
126 }
127 }
128}
129
130#[inline]
138fn detect_range_offset(table: &[u8; 256]) -> Option<(u8, u8, i8)> {
139 let mut lo: Option<u8> = None;
140 let mut hi = 0u8;
141 let mut offset = 0i16;
142
143 for i in 0..256 {
144 if table[i] != i as u8 {
145 let diff = table[i] as i16 - i as i16;
146 match lo {
147 None => {
148 lo = Some(i as u8);
149 hi = i as u8;
150 offset = diff;
151 }
152 Some(_) => {
153 if diff != offset || i as u8 != hi.wrapping_add(1) {
154 return None;
155 }
156 hi = i as u8;
157 }
158 }
159 }
160 }
161
162 lo.map(|l| (l, hi, offset as i8))
163}
164
165#[cfg(target_arch = "x86_64")]
169fn translate_range_simd(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
170 if is_x86_feature_detected!("avx2") {
171 unsafe { translate_range_avx2(src, dst, lo, hi, offset) };
172 } else {
173 unsafe { translate_range_sse2(src, dst, lo, hi, offset) };
174 }
175}
176
177#[cfg(target_arch = "x86_64")]
178#[target_feature(enable = "avx2")]
179unsafe fn translate_range_avx2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
180 use std::arch::x86_64::*;
181
182 unsafe {
183 let range = hi - lo;
184 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
189 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
190 let offset_v = _mm256_set1_epi8(offset);
191 let zero = _mm256_setzero_si256();
192
193 let len = src.len();
194 let mut i = 0;
195
196 while i + 32 <= len {
197 let input = _mm256_loadu_si256(src.as_ptr().add(i) as *const _);
198 let biased = _mm256_add_epi8(input, bias_v);
199 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
201 let mask = _mm256_cmpeq_epi8(gt, zero);
203 let offset_masked = _mm256_and_si256(mask, offset_v);
204 let result = _mm256_add_epi8(input, offset_masked);
205 _mm256_storeu_si256(dst.as_mut_ptr().add(i) as *mut _, result);
206 i += 32;
207 }
208
209 if i + 16 <= len {
211 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
212 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
213 let offset_v128 = _mm_set1_epi8(offset);
214 let zero128 = _mm_setzero_si128();
215
216 let input = _mm_loadu_si128(src.as_ptr().add(i) as *const _);
217 let biased = _mm_add_epi8(input, bias_v128);
218 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
219 let mask = _mm_cmpeq_epi8(gt, zero128);
220 let offset_masked = _mm_and_si128(mask, offset_v128);
221 let result = _mm_add_epi8(input, offset_masked);
222 _mm_storeu_si128(dst.as_mut_ptr().add(i) as *mut _, result);
223 i += 16;
224 }
225
226 while i < len {
228 let b = *src.get_unchecked(i);
229 *dst.get_unchecked_mut(i) = if b >= lo && b <= hi {
230 b.wrapping_add(offset as u8)
231 } else {
232 b
233 };
234 i += 1;
235 }
236 }
237}
238
239#[cfg(target_arch = "x86_64")]
240#[target_feature(enable = "sse2")]
241unsafe fn translate_range_sse2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
242 use std::arch::x86_64::*;
243
244 unsafe {
245 let range = hi - lo;
246 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
247 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
248 let offset_v = _mm_set1_epi8(offset);
249 let zero = _mm_setzero_si128();
250
251 let len = src.len();
252 let mut i = 0;
253
254 while i + 16 <= len {
255 let input = _mm_loadu_si128(src.as_ptr().add(i) as *const _);
256 let biased = _mm_add_epi8(input, bias_v);
257 let gt = _mm_cmpgt_epi8(biased, threshold_v);
258 let mask = _mm_cmpeq_epi8(gt, zero);
259 let offset_masked = _mm_and_si128(mask, offset_v);
260 let result = _mm_add_epi8(input, offset_masked);
261 _mm_storeu_si128(dst.as_mut_ptr().add(i) as *mut _, result);
262 i += 16;
263 }
264
265 while i < len {
266 let b = *src.get_unchecked(i);
267 *dst.get_unchecked_mut(i) = if b >= lo && b <= hi {
268 b.wrapping_add(offset as u8)
269 } else {
270 b
271 };
272 i += 1;
273 }
274 }
275}
276
277#[cfg(not(target_arch = "x86_64"))]
279fn translate_range_simd(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
280 for (i, &b) in src.iter().enumerate() {
281 dst[i] = if b >= lo && b <= hi {
282 b.wrapping_add(offset as u8)
283 } else {
284 b
285 };
286 }
287}
288
289#[cfg(target_arch = "x86_64")]
297fn translate_range_simd_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
298 if is_x86_feature_detected!("avx2") {
299 unsafe { translate_range_avx2_inplace(data, lo, hi, offset) };
300 } else {
301 unsafe { translate_range_sse2_inplace(data, lo, hi, offset) };
302 }
303}
304
305#[cfg(target_arch = "x86_64")]
306#[target_feature(enable = "avx2")]
307unsafe fn translate_range_avx2_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
308 use std::arch::x86_64::*;
309
310 unsafe {
311 let range = hi - lo;
312 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
313 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
314 let offset_v = _mm256_set1_epi8(offset);
315 let zero = _mm256_setzero_si256();
316
317 let len = data.len();
318 let ptr = data.as_mut_ptr();
319 let mut i = 0;
320
321 while i + 32 <= len {
322 let input = _mm256_loadu_si256(ptr.add(i) as *const _);
323 let biased = _mm256_add_epi8(input, bias_v);
324 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
325 let mask = _mm256_cmpeq_epi8(gt, zero);
326 let offset_masked = _mm256_and_si256(mask, offset_v);
327 let result = _mm256_add_epi8(input, offset_masked);
328 _mm256_storeu_si256(ptr.add(i) as *mut _, result);
329 i += 32;
330 }
331
332 if i + 16 <= len {
333 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
334 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
335 let offset_v128 = _mm_set1_epi8(offset);
336 let zero128 = _mm_setzero_si128();
337
338 let input = _mm_loadu_si128(ptr.add(i) as *const _);
339 let biased = _mm_add_epi8(input, bias_v128);
340 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
341 let mask = _mm_cmpeq_epi8(gt, zero128);
342 let offset_masked = _mm_and_si128(mask, offset_v128);
343 let result = _mm_add_epi8(input, offset_masked);
344 _mm_storeu_si128(ptr.add(i) as *mut _, result);
345 i += 16;
346 }
347
348 while i < len {
349 let b = *ptr.add(i);
350 *ptr.add(i) = if b >= lo && b <= hi {
351 b.wrapping_add(offset as u8)
352 } else {
353 b
354 };
355 i += 1;
356 }
357 }
358}
359
360#[cfg(target_arch = "x86_64")]
361#[target_feature(enable = "sse2")]
362unsafe fn translate_range_sse2_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
363 use std::arch::x86_64::*;
364
365 unsafe {
366 let range = hi - lo;
367 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
368 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
369 let offset_v = _mm_set1_epi8(offset);
370 let zero = _mm_setzero_si128();
371
372 let len = data.len();
373 let ptr = data.as_mut_ptr();
374 let mut i = 0;
375
376 while i + 16 <= len {
377 let input = _mm_loadu_si128(ptr.add(i) as *const _);
378 let biased = _mm_add_epi8(input, bias_v);
379 let gt = _mm_cmpgt_epi8(biased, threshold_v);
380 let mask = _mm_cmpeq_epi8(gt, zero);
381 let offset_masked = _mm_and_si128(mask, offset_v);
382 let result = _mm_add_epi8(input, offset_masked);
383 _mm_storeu_si128(ptr.add(i) as *mut _, result);
384 i += 16;
385 }
386
387 while i < len {
388 let b = *ptr.add(i);
389 *ptr.add(i) = if b >= lo && b <= hi {
390 b.wrapping_add(offset as u8)
391 } else {
392 b
393 };
394 i += 1;
395 }
396 }
397}
398
399#[cfg(not(target_arch = "x86_64"))]
400fn translate_range_simd_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
401 for b in data.iter_mut() {
402 if *b >= lo && *b <= hi {
403 *b = b.wrapping_add(offset as u8);
404 }
405 }
406}
407
408pub fn translate(
413 set1: &[u8],
414 set2: &[u8],
415 reader: &mut impl Read,
416 writer: &mut impl Write,
417) -> io::Result<()> {
418 let table = build_translate_table(set1, set2);
419
420 if let Some((lo, hi, offset)) = detect_range_offset(&table) {
422 return translate_range_stream(lo, hi, offset, reader, writer);
423 }
424
425 let mut src = vec![0u8; STREAM_BUF];
430 let mut dst = alloc_uninit_vec(STREAM_BUF);
431 loop {
432 let n = read_full(reader, &mut src)?;
433 if n == 0 {
434 break;
435 }
436 translate_to(&src[..n], &mut dst[..n], &table);
437 writer.write_all(&dst[..n])?;
438 }
439 Ok(())
440}
441
442fn translate_range_stream(
445 lo: u8,
446 hi: u8,
447 offset: i8,
448 reader: &mut impl Read,
449 writer: &mut impl Write,
450) -> io::Result<()> {
451 let mut buf = vec![0u8; STREAM_BUF];
452 loop {
453 let n = read_full(reader, &mut buf)?;
454 if n == 0 {
455 break;
456 }
457 translate_range_simd_inplace(&mut buf[..n], lo, hi, offset);
458 writer.write_all(&buf[..n])?;
459 }
460 Ok(())
461}
462
463#[inline]
465fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
466 let mut total = 0;
467 while total < buf.len() {
468 match reader.read(&mut buf[total..]) {
469 Ok(0) => break,
470 Ok(n) => total += n,
471 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
472 Err(e) => return Err(e),
473 }
474 }
475 Ok(total)
476}
477
478pub fn translate_squeeze(
479 set1: &[u8],
480 set2: &[u8],
481 reader: &mut impl Read,
482 writer: &mut impl Write,
483) -> io::Result<()> {
484 let table = build_translate_table(set1, set2);
485 let squeeze_set = build_member_set(set2);
486
487 let range_info = detect_range_offset(&table);
493
494 let mut buf = vec![0u8; STREAM_BUF];
495 let mut last_squeezed: u16 = 256;
496
497 loop {
498 let n = read_full(reader, &mut buf)?;
499 if n == 0 {
500 break;
501 }
502 if let Some((lo, hi, offset)) = range_info {
504 translate_range_simd_inplace(&mut buf[..n], lo, hi, offset);
505 } else {
506 translate_inplace(&mut buf[..n], &table);
507 }
508 let mut wp = 0;
510 unsafe {
511 let ptr = buf.as_mut_ptr();
512 for i in 0..n {
513 let b = *ptr.add(i);
514 if is_member(&squeeze_set, b) {
515 if last_squeezed == b as u16 {
516 continue;
517 }
518 last_squeezed = b as u16;
519 } else {
520 last_squeezed = 256;
521 }
522 *ptr.add(wp) = b;
523 wp += 1;
524 }
525 }
526 writer.write_all(&buf[..wp])?;
527 }
528 Ok(())
529}
530
531pub fn delete(
532 delete_chars: &[u8],
533 reader: &mut impl Read,
534 writer: &mut impl Write,
535) -> io::Result<()> {
536 if delete_chars.len() == 1 {
537 return delete_single_streaming(delete_chars[0], reader, writer);
538 }
539 if delete_chars.len() <= 3 {
540 return delete_multi_streaming(delete_chars, reader, writer);
541 }
542
543 let member = build_member_set(delete_chars);
544 let mut buf = vec![0u8; STREAM_BUF];
545
546 loop {
547 let n = read_full(reader, &mut buf)?;
548 if n == 0 {
549 break;
550 }
551 let mut wp = 0;
552 unsafe {
553 let ptr = buf.as_mut_ptr();
554 let mut i = 0;
555 while i + 8 <= n {
556 let b0 = *ptr.add(i);
557 let b1 = *ptr.add(i + 1);
558 let b2 = *ptr.add(i + 2);
559 let b3 = *ptr.add(i + 3);
560 let b4 = *ptr.add(i + 4);
561 let b5 = *ptr.add(i + 5);
562 let b6 = *ptr.add(i + 6);
563 let b7 = *ptr.add(i + 7);
564
565 if !is_member(&member, b0) {
566 *ptr.add(wp) = b0;
567 wp += 1;
568 }
569 if !is_member(&member, b1) {
570 *ptr.add(wp) = b1;
571 wp += 1;
572 }
573 if !is_member(&member, b2) {
574 *ptr.add(wp) = b2;
575 wp += 1;
576 }
577 if !is_member(&member, b3) {
578 *ptr.add(wp) = b3;
579 wp += 1;
580 }
581 if !is_member(&member, b4) {
582 *ptr.add(wp) = b4;
583 wp += 1;
584 }
585 if !is_member(&member, b5) {
586 *ptr.add(wp) = b5;
587 wp += 1;
588 }
589 if !is_member(&member, b6) {
590 *ptr.add(wp) = b6;
591 wp += 1;
592 }
593 if !is_member(&member, b7) {
594 *ptr.add(wp) = b7;
595 wp += 1;
596 }
597 i += 8;
598 }
599 while i < n {
600 let b = *ptr.add(i);
601 if !is_member(&member, b) {
602 *ptr.add(wp) = b;
603 wp += 1;
604 }
605 i += 1;
606 }
607 }
608 writer.write_all(&buf[..wp])?;
609 }
610 Ok(())
611}
612
613fn delete_single_streaming(
614 ch: u8,
615 reader: &mut impl Read,
616 writer: &mut impl Write,
617) -> io::Result<()> {
618 let mut buf = vec![0u8; STREAM_BUF];
619 loop {
620 let n = match reader.read(&mut buf) {
621 Ok(0) => break,
622 Ok(n) => n,
623 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
624 Err(e) => return Err(e),
625 };
626 let mut wp = 0;
627 let mut i = 0;
628 while i < n {
629 match memchr::memchr(ch, &buf[i..n]) {
630 Some(offset) => {
631 if offset > 0 {
632 if wp != i {
633 unsafe {
634 std::ptr::copy(
635 buf.as_ptr().add(i),
636 buf.as_mut_ptr().add(wp),
637 offset,
638 );
639 }
640 }
641 wp += offset;
642 }
643 i += offset + 1;
644 }
645 None => {
646 let run_len = n - i;
647 if run_len > 0 {
648 if wp != i {
649 unsafe {
650 std::ptr::copy(
651 buf.as_ptr().add(i),
652 buf.as_mut_ptr().add(wp),
653 run_len,
654 );
655 }
656 }
657 wp += run_len;
658 }
659 break;
660 }
661 }
662 }
663 writer.write_all(&buf[..wp])?;
664 }
665 Ok(())
666}
667
668fn delete_multi_streaming(
669 chars: &[u8],
670 reader: &mut impl Read,
671 writer: &mut impl Write,
672) -> io::Result<()> {
673 let mut buf = vec![0u8; STREAM_BUF];
674 loop {
675 let n = match reader.read(&mut buf) {
676 Ok(0) => break,
677 Ok(n) => n,
678 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
679 Err(e) => return Err(e),
680 };
681 let mut wp = 0;
682 let mut i = 0;
683 while i < n {
684 let found = if chars.len() == 2 {
685 memchr::memchr2(chars[0], chars[1], &buf[i..n])
686 } else {
687 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
688 };
689 match found {
690 Some(offset) => {
691 if offset > 0 {
692 if wp != i {
693 unsafe {
694 std::ptr::copy(
695 buf.as_ptr().add(i),
696 buf.as_mut_ptr().add(wp),
697 offset,
698 );
699 }
700 }
701 wp += offset;
702 }
703 i += offset + 1;
704 }
705 None => {
706 let run_len = n - i;
707 if run_len > 0 {
708 if wp != i {
709 unsafe {
710 std::ptr::copy(
711 buf.as_ptr().add(i),
712 buf.as_mut_ptr().add(wp),
713 run_len,
714 );
715 }
716 }
717 wp += run_len;
718 }
719 break;
720 }
721 }
722 }
723 writer.write_all(&buf[..wp])?;
724 }
725 Ok(())
726}
727
728pub fn delete_squeeze(
729 delete_chars: &[u8],
730 squeeze_chars: &[u8],
731 reader: &mut impl Read,
732 writer: &mut impl Write,
733) -> io::Result<()> {
734 let delete_set = build_member_set(delete_chars);
735 let squeeze_set = build_member_set(squeeze_chars);
736 let mut buf = vec![0u8; STREAM_BUF];
737 let mut last_squeezed: u16 = 256;
738
739 loop {
740 let n = read_full(reader, &mut buf)?;
741 if n == 0 {
742 break;
743 }
744 let mut wp = 0;
745 unsafe {
746 let ptr = buf.as_mut_ptr();
747 for i in 0..n {
748 let b = *ptr.add(i);
749 if is_member(&delete_set, b) {
750 continue;
751 }
752 if is_member(&squeeze_set, b) {
753 if last_squeezed == b as u16 {
754 continue;
755 }
756 last_squeezed = b as u16;
757 } else {
758 last_squeezed = 256;
759 }
760 *ptr.add(wp) = b;
761 wp += 1;
762 }
763 }
764 writer.write_all(&buf[..wp])?;
765 }
766 Ok(())
767}
768
769pub fn squeeze(
770 squeeze_chars: &[u8],
771 reader: &mut impl Read,
772 writer: &mut impl Write,
773) -> io::Result<()> {
774 if squeeze_chars.len() == 1 {
775 return squeeze_single_stream(squeeze_chars[0], reader, writer);
776 }
777
778 let member = build_member_set(squeeze_chars);
779 let mut buf = vec![0u8; STREAM_BUF];
780 let mut last_squeezed: u16 = 256;
781
782 loop {
783 let n = read_full(reader, &mut buf)?;
784 if n == 0 {
785 break;
786 }
787 let mut wp = 0;
788 unsafe {
789 let ptr = buf.as_mut_ptr();
790 for i in 0..n {
791 let b = *ptr.add(i);
792 if is_member(&member, b) {
793 if last_squeezed == b as u16 {
794 continue;
795 }
796 last_squeezed = b as u16;
797 } else {
798 last_squeezed = 256;
799 }
800 *ptr.add(wp) = b;
801 wp += 1;
802 }
803 }
804 writer.write_all(&buf[..wp])?;
805 }
806 Ok(())
807}
808
809fn squeeze_single_stream(
810 ch: u8,
811 reader: &mut impl Read,
812 writer: &mut impl Write,
813) -> io::Result<()> {
814 let mut buf = vec![0u8; STREAM_BUF];
815 let mut was_squeeze_char = false;
816
817 loop {
818 let n = match reader.read(&mut buf) {
819 Ok(0) => break,
820 Ok(n) => n,
821 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
822 Err(e) => return Err(e),
823 };
824
825 let mut wp = 0;
826 let mut i = 0;
827
828 while i < n {
829 if was_squeeze_char && buf[i] == ch {
830 i += 1;
831 while i < n && buf[i] == ch {
832 i += 1;
833 }
834 if i >= n {
835 break;
836 }
837 }
838
839 match memchr::memchr(ch, &buf[i..n]) {
840 Some(offset) => {
841 let run_len = offset;
842 if run_len > 0 {
843 if wp != i {
844 unsafe {
845 std::ptr::copy(
846 buf.as_ptr().add(i),
847 buf.as_mut_ptr().add(wp),
848 run_len,
849 );
850 }
851 }
852 wp += run_len;
853 }
854 i += run_len;
855
856 unsafe {
857 *buf.as_mut_ptr().add(wp) = ch;
858 }
859 wp += 1;
860 was_squeeze_char = true;
861 i += 1;
862 while i < n && buf[i] == ch {
863 i += 1;
864 }
865 }
866 None => {
867 let run_len = n - i;
868 if run_len > 0 {
869 if wp != i {
870 unsafe {
871 std::ptr::copy(
872 buf.as_ptr().add(i),
873 buf.as_mut_ptr().add(wp),
874 run_len,
875 );
876 }
877 }
878 wp += run_len;
879 }
880 was_squeeze_char = false;
881 break;
882 }
883 }
884 }
885
886 writer.write_all(&buf[..wp])?;
887 }
888 Ok(())
889}
890
891const SINGLE_WRITE_LIMIT: usize = 16 * 1024 * 1024;
899
900pub fn translate_mmap(
909 set1: &[u8],
910 set2: &[u8],
911 data: &[u8],
912 writer: &mut impl Write,
913) -> io::Result<()> {
914 let table = build_translate_table(set1, set2);
915
916 let is_identity = table.iter().enumerate().all(|(i, &v)| v == i as u8);
918 if is_identity {
919 return writer.write_all(data);
920 }
921
922 if let Some((lo, hi, offset)) = detect_range_offset(&table) {
924 return translate_mmap_range(data, writer, lo, hi, offset);
925 }
926
927 translate_mmap_table(data, writer, &table)
929}
930
931fn translate_mmap_range(
933 data: &[u8],
934 writer: &mut impl Write,
935 lo: u8,
936 hi: u8,
937 offset: i8,
938) -> io::Result<()> {
939 if data.len() >= PARALLEL_THRESHOLD {
941 let mut buf = alloc_uninit_vec(data.len());
942 let n_threads = rayon::current_num_threads().max(1);
943 let chunk_size = (data.len() / n_threads).max(32 * 1024);
944
945 data.par_chunks(chunk_size)
947 .zip(buf.par_chunks_mut(chunk_size))
948 .for_each(|(src_chunk, dst_chunk)| {
949 translate_range_simd(src_chunk, &mut dst_chunk[..src_chunk.len()], lo, hi, offset);
950 });
951
952 return writer.write_all(&buf);
953 }
954
955 if data.len() <= SINGLE_WRITE_LIMIT {
957 let mut buf = alloc_uninit_vec(data.len());
958 translate_range_simd(data, &mut buf, lo, hi, offset);
959 return writer.write_all(&buf);
960 }
961 let mut buf = alloc_uninit_vec(BUF_SIZE);
963 for chunk in data.chunks(BUF_SIZE) {
964 translate_range_simd(chunk, &mut buf[..chunk.len()], lo, hi, offset);
965 writer.write_all(&buf[..chunk.len()])?;
966 }
967 Ok(())
968}
969
970fn translate_mmap_table(data: &[u8], writer: &mut impl Write, table: &[u8; 256]) -> io::Result<()> {
972 if data.len() >= PARALLEL_THRESHOLD {
974 let mut buf = alloc_uninit_vec(data.len());
975 let n_threads = rayon::current_num_threads().max(1);
976 let chunk_size = (data.len() / n_threads).max(32 * 1024);
977
978 data.par_chunks(chunk_size)
979 .zip(buf.par_chunks_mut(chunk_size))
980 .for_each(|(src_chunk, dst_chunk)| {
981 translate_to(src_chunk, &mut dst_chunk[..src_chunk.len()], table);
982 });
983
984 return writer.write_all(&buf);
985 }
986
987 if data.len() <= SINGLE_WRITE_LIMIT {
989 let mut buf = alloc_uninit_vec(data.len());
990 translate_to(data, &mut buf, table);
991 return writer.write_all(&buf);
992 }
993 let mut buf = alloc_uninit_vec(BUF_SIZE);
994 for chunk in data.chunks(BUF_SIZE) {
995 translate_to(chunk, &mut buf[..chunk.len()], table);
996 writer.write_all(&buf[..chunk.len()])?;
997 }
998 Ok(())
999}
1000
1001pub fn translate_squeeze_mmap(
1007 set1: &[u8],
1008 set2: &[u8],
1009 data: &[u8],
1010 writer: &mut impl Write,
1011) -> io::Result<()> {
1012 let table = build_translate_table(set1, set2);
1013 let squeeze_set = build_member_set(set2);
1014
1015 if data.len() >= PARALLEL_THRESHOLD {
1020 let mut translated = alloc_uninit_vec(data.len());
1022 let range_info = detect_range_offset(&table);
1023 let n_threads = rayon::current_num_threads().max(1);
1024 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1025
1026 if let Some((lo, hi, offset)) = range_info {
1027 data.par_chunks(chunk_size)
1028 .zip(translated.par_chunks_mut(chunk_size))
1029 .for_each(|(src_chunk, dst_chunk)| {
1030 translate_range_simd(
1031 src_chunk,
1032 &mut dst_chunk[..src_chunk.len()],
1033 lo,
1034 hi,
1035 offset,
1036 );
1037 });
1038 } else {
1039 data.par_chunks(chunk_size)
1040 .zip(translated.par_chunks_mut(chunk_size))
1041 .for_each(|(src_chunk, dst_chunk)| {
1042 translate_to(src_chunk, &mut dst_chunk[..src_chunk.len()], &table);
1043 });
1044 }
1045
1046 let mut last_squeezed: u16 = 256;
1050 let len = translated.len();
1051 let mut wp = 0;
1052 unsafe {
1053 let ptr = translated.as_mut_ptr();
1054 let mut i = 0;
1055 while i < len {
1056 let b = *ptr.add(i);
1057 if is_member(&squeeze_set, b) {
1058 if last_squeezed == b as u16 {
1059 i += 1;
1060 continue;
1061 }
1062 last_squeezed = b as u16;
1063 } else {
1064 last_squeezed = 256;
1065 }
1066 *ptr.add(wp) = b;
1067 wp += 1;
1068 i += 1;
1069 }
1070 }
1071 return writer.write_all(&translated[..wp]);
1072 }
1073
1074 if data.len() <= SINGLE_WRITE_LIMIT {
1076 let mut buf: Vec<u8> = Vec::with_capacity(data.len());
1077 let mut last_squeezed: u16 = 256;
1078 unsafe {
1079 buf.set_len(data.len());
1080 let outp: *mut u8 = buf.as_mut_ptr();
1081 let inp = data.as_ptr();
1082 let len = data.len();
1083 let mut wp = 0;
1084 let mut i = 0;
1085 while i < len {
1086 let translated = *table.get_unchecked(*inp.add(i) as usize);
1087 if is_member(&squeeze_set, translated) {
1088 if last_squeezed == translated as u16 {
1089 i += 1;
1090 continue;
1091 }
1092 last_squeezed = translated as u16;
1093 } else {
1094 last_squeezed = 256;
1095 }
1096 *outp.add(wp) = translated;
1097 wp += 1;
1098 i += 1;
1099 }
1100 buf.set_len(wp);
1101 }
1102 return writer.write_all(&buf);
1103 }
1104
1105 let buf_size = data.len().min(BUF_SIZE);
1107 let mut buf = vec![0u8; buf_size];
1108 let mut last_squeezed: u16 = 256;
1109
1110 for chunk in data.chunks(buf_size) {
1111 translate_to(chunk, &mut buf[..chunk.len()], &table);
1112 let mut wp = 0;
1113 unsafe {
1114 let ptr = buf.as_mut_ptr();
1115 for i in 0..chunk.len() {
1116 let b = *ptr.add(i);
1117 if is_member(&squeeze_set, b) {
1118 if last_squeezed == b as u16 {
1119 continue;
1120 }
1121 last_squeezed = b as u16;
1122 } else {
1123 last_squeezed = 256;
1124 }
1125 *ptr.add(wp) = b;
1126 wp += 1;
1127 }
1128 }
1129 writer.write_all(&buf[..wp])?;
1130 }
1131 Ok(())
1132}
1133
1134pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1140 if delete_chars.len() == 1 {
1141 return delete_single_char_mmap(delete_chars[0], data, writer);
1142 }
1143 if delete_chars.len() <= 3 {
1144 return delete_multi_memchr_mmap(delete_chars, data, writer);
1145 }
1146
1147 let member = build_member_set(delete_chars);
1148
1149 if data.len() >= PARALLEL_THRESHOLD {
1153 let n_threads = rayon::current_num_threads().max(1);
1154 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1155
1156 let mut outbuf = alloc_uninit_vec(data.len());
1158 let chunk_lens: Vec<usize> = data
1159 .par_chunks(chunk_size)
1160 .zip(outbuf.par_chunks_mut(chunk_size))
1161 .map(|(src_chunk, dst_chunk)| delete_chunk_bitset_into(src_chunk, &member, dst_chunk))
1162 .collect();
1163
1164 let mut write_pos = 0;
1168 let mut src_offset = 0;
1169 for &clen in &chunk_lens {
1170 if clen > 0 && src_offset != write_pos {
1171 unsafe {
1172 std::ptr::copy(
1173 outbuf.as_ptr().add(src_offset),
1174 outbuf.as_mut_ptr().add(write_pos),
1175 clen,
1176 );
1177 }
1178 }
1179 write_pos += clen;
1180 src_offset += chunk_size;
1181 }
1182
1183 return writer.write_all(&outbuf[..write_pos]);
1184 }
1185
1186 if data.len() <= SINGLE_WRITE_LIMIT {
1188 let mut outbuf = alloc_uninit_vec(data.len());
1189 let out_pos = delete_chunk_bitset_into(data, &member, &mut outbuf);
1190 return writer.write_all(&outbuf[..out_pos]);
1191 }
1192
1193 let buf_size = data.len().min(BUF_SIZE);
1195 let mut outbuf = alloc_uninit_vec(buf_size);
1196
1197 for chunk in data.chunks(buf_size) {
1198 let out_pos = delete_chunk_bitset_into(chunk, &member, &mut outbuf);
1199 writer.write_all(&outbuf[..out_pos])?;
1200 }
1201 Ok(())
1202}
1203
1204#[inline]
1207fn delete_chunk_bitset_into(chunk: &[u8], member: &[u8; 32], outbuf: &mut [u8]) -> usize {
1208 let len = chunk.len();
1209 let mut out_pos = 0;
1210 let mut i = 0;
1211
1212 while i + 8 <= len {
1213 unsafe {
1214 let b0 = *chunk.get_unchecked(i);
1215 let b1 = *chunk.get_unchecked(i + 1);
1216 let b2 = *chunk.get_unchecked(i + 2);
1217 let b3 = *chunk.get_unchecked(i + 3);
1218 let b4 = *chunk.get_unchecked(i + 4);
1219 let b5 = *chunk.get_unchecked(i + 5);
1220 let b6 = *chunk.get_unchecked(i + 6);
1221 let b7 = *chunk.get_unchecked(i + 7);
1222
1223 *outbuf.get_unchecked_mut(out_pos) = b0;
1224 out_pos += !is_member(member, b0) as usize;
1225 *outbuf.get_unchecked_mut(out_pos) = b1;
1226 out_pos += !is_member(member, b1) as usize;
1227 *outbuf.get_unchecked_mut(out_pos) = b2;
1228 out_pos += !is_member(member, b2) as usize;
1229 *outbuf.get_unchecked_mut(out_pos) = b3;
1230 out_pos += !is_member(member, b3) as usize;
1231 *outbuf.get_unchecked_mut(out_pos) = b4;
1232 out_pos += !is_member(member, b4) as usize;
1233 *outbuf.get_unchecked_mut(out_pos) = b5;
1234 out_pos += !is_member(member, b5) as usize;
1235 *outbuf.get_unchecked_mut(out_pos) = b6;
1236 out_pos += !is_member(member, b6) as usize;
1237 *outbuf.get_unchecked_mut(out_pos) = b7;
1238 out_pos += !is_member(member, b7) as usize;
1239 }
1240 i += 8;
1241 }
1242
1243 while i < len {
1244 unsafe {
1245 let b = *chunk.get_unchecked(i);
1246 *outbuf.get_unchecked_mut(out_pos) = b;
1247 out_pos += !is_member(member, b) as usize;
1248 }
1249 i += 1;
1250 }
1251
1252 out_pos
1253}
1254
1255fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1256 if data.len() >= PARALLEL_THRESHOLD {
1259 let n_threads = rayon::current_num_threads().max(1);
1260 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1261
1262 let results: Vec<Vec<u8>> = data
1263 .par_chunks(chunk_size)
1264 .map(|chunk| {
1265 let mut out = Vec::with_capacity(chunk.len());
1266 let mut last = 0;
1267 for pos in memchr::memchr_iter(ch, chunk) {
1268 if pos > last {
1269 out.extend_from_slice(&chunk[last..pos]);
1270 }
1271 last = pos + 1;
1272 }
1273 if last < chunk.len() {
1274 out.extend_from_slice(&chunk[last..]);
1275 }
1276 out
1277 })
1278 .collect();
1279
1280 let slices: Vec<std::io::IoSlice> = results
1282 .iter()
1283 .filter(|r| !r.is_empty())
1284 .map(|r| std::io::IoSlice::new(r))
1285 .collect();
1286 return write_ioslices(writer, &slices);
1287 }
1288
1289 if data.len() <= SINGLE_WRITE_LIMIT {
1291 let mut outbuf = Vec::with_capacity(data.len());
1292 let mut last = 0;
1293 for pos in memchr::memchr_iter(ch, data) {
1294 if pos > last {
1295 outbuf.extend_from_slice(&data[last..pos]);
1296 }
1297 last = pos + 1;
1298 }
1299 if last < data.len() {
1300 outbuf.extend_from_slice(&data[last..]);
1301 }
1302 return writer.write_all(&outbuf);
1303 }
1304
1305 let buf_size = data.len().min(BUF_SIZE);
1307 let mut outbuf = vec![0u8; buf_size];
1308
1309 for chunk in data.chunks(buf_size) {
1310 let mut wp = 0;
1311 let mut last = 0;
1312 for pos in memchr::memchr_iter(ch, chunk) {
1313 if pos > last {
1314 let run = pos - last;
1315 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1316 wp += run;
1317 }
1318 last = pos + 1;
1319 }
1320 if last < chunk.len() {
1321 let run = chunk.len() - last;
1322 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1323 wp += run;
1324 }
1325 writer.write_all(&outbuf[..wp])?;
1326 }
1327 Ok(())
1328}
1329
1330fn delete_multi_memchr_mmap(chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1331 let c0 = chars[0];
1332 let c1 = if chars.len() >= 2 { chars[1] } else { 0 };
1333 let c2 = if chars.len() >= 3 { chars[2] } else { 0 };
1334 let is_three = chars.len() >= 3;
1335
1336 if data.len() >= PARALLEL_THRESHOLD {
1338 let n_threads = rayon::current_num_threads().max(1);
1339 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1340
1341 let results: Vec<Vec<u8>> = data
1342 .par_chunks(chunk_size)
1343 .map(|chunk| {
1344 let mut out = Vec::with_capacity(chunk.len());
1345 let mut last = 0;
1346 if is_three {
1347 for pos in memchr::memchr3_iter(c0, c1, c2, chunk) {
1348 if pos > last {
1349 out.extend_from_slice(&chunk[last..pos]);
1350 }
1351 last = pos + 1;
1352 }
1353 } else {
1354 for pos in memchr::memchr2_iter(c0, c1, chunk) {
1355 if pos > last {
1356 out.extend_from_slice(&chunk[last..pos]);
1357 }
1358 last = pos + 1;
1359 }
1360 }
1361 if last < chunk.len() {
1362 out.extend_from_slice(&chunk[last..]);
1363 }
1364 out
1365 })
1366 .collect();
1367
1368 let slices: Vec<std::io::IoSlice> = results
1370 .iter()
1371 .filter(|r| !r.is_empty())
1372 .map(|r| std::io::IoSlice::new(r))
1373 .collect();
1374 return write_ioslices(writer, &slices);
1375 }
1376
1377 if data.len() <= SINGLE_WRITE_LIMIT {
1379 let mut outbuf = Vec::with_capacity(data.len());
1380 let mut last = 0;
1381 if is_three {
1382 for pos in memchr::memchr3_iter(c0, c1, c2, data) {
1383 if pos > last {
1384 outbuf.extend_from_slice(&data[last..pos]);
1385 }
1386 last = pos + 1;
1387 }
1388 } else {
1389 for pos in memchr::memchr2_iter(c0, c1, data) {
1390 if pos > last {
1391 outbuf.extend_from_slice(&data[last..pos]);
1392 }
1393 last = pos + 1;
1394 }
1395 }
1396 if last < data.len() {
1397 outbuf.extend_from_slice(&data[last..]);
1398 }
1399 return writer.write_all(&outbuf);
1400 }
1401
1402 let buf_size = data.len().min(BUF_SIZE);
1404 let mut outbuf = vec![0u8; buf_size];
1405
1406 for chunk in data.chunks(buf_size) {
1407 let mut wp = 0;
1408 let mut last = 0;
1409
1410 if is_three {
1413 for pos in memchr::memchr3_iter(c0, c1, c2, chunk) {
1414 if pos > last {
1415 let run = pos - last;
1416 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1417 wp += run;
1418 }
1419 last = pos + 1;
1420 }
1421 } else {
1422 for pos in memchr::memchr2_iter(c0, c1, chunk) {
1423 if pos > last {
1424 let run = pos - last;
1425 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1426 wp += run;
1427 }
1428 last = pos + 1;
1429 }
1430 }
1431
1432 if last < chunk.len() {
1433 let run = chunk.len() - last;
1434 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1435 wp += run;
1436 }
1437 writer.write_all(&outbuf[..wp])?;
1438 }
1439 Ok(())
1440}
1441
1442pub fn delete_squeeze_mmap(
1447 delete_chars: &[u8],
1448 squeeze_chars: &[u8],
1449 data: &[u8],
1450 writer: &mut impl Write,
1451) -> io::Result<()> {
1452 let delete_set = build_member_set(delete_chars);
1453 let squeeze_set = build_member_set(squeeze_chars);
1454
1455 if data.len() <= SINGLE_WRITE_LIMIT {
1457 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
1458 let mut last_squeezed: u16 = 256;
1459 unsafe {
1460 outbuf.set_len(data.len());
1461 let outp: *mut u8 = outbuf.as_mut_ptr();
1462 let inp = data.as_ptr();
1463 let len = data.len();
1464 let mut out_pos = 0;
1465 let mut i = 0;
1466 while i < len {
1467 let b = *inp.add(i);
1468 if is_member(&delete_set, b) {
1469 i += 1;
1470 continue;
1471 }
1472 if is_member(&squeeze_set, b) {
1473 if last_squeezed == b as u16 {
1474 i += 1;
1475 continue;
1476 }
1477 last_squeezed = b as u16;
1478 } else {
1479 last_squeezed = 256;
1480 }
1481 *outp.add(out_pos) = b;
1482 out_pos += 1;
1483 i += 1;
1484 }
1485 outbuf.set_len(out_pos);
1486 }
1487 return writer.write_all(&outbuf);
1488 }
1489
1490 let buf_size = data.len().min(BUF_SIZE);
1492 let mut outbuf = vec![0u8; buf_size];
1493 let mut last_squeezed: u16 = 256;
1494
1495 for chunk in data.chunks(buf_size) {
1496 let mut out_pos = 0;
1497 for &b in chunk {
1498 if is_member(&delete_set, b) {
1499 continue;
1500 }
1501 if is_member(&squeeze_set, b) {
1502 if last_squeezed == b as u16 {
1503 continue;
1504 }
1505 last_squeezed = b as u16;
1506 } else {
1507 last_squeezed = 256;
1508 }
1509 unsafe {
1510 *outbuf.get_unchecked_mut(out_pos) = b;
1511 }
1512 out_pos += 1;
1513 }
1514 writer.write_all(&outbuf[..out_pos])?;
1515 }
1516 Ok(())
1517}
1518
1519pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1525 if squeeze_chars.len() == 1 {
1526 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1527 }
1528 if squeeze_chars.len() == 2 {
1529 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1530 }
1531 if squeeze_chars.len() == 3 {
1532 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1533 }
1534
1535 let member = build_member_set(squeeze_chars);
1536
1537 if data.len() >= PARALLEL_THRESHOLD {
1539 let n_threads = rayon::current_num_threads().max(1);
1540 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1541
1542 let results: Vec<Vec<u8>> = data
1543 .par_chunks(chunk_size)
1544 .map(|chunk| squeeze_chunk_bitset(chunk, &member))
1545 .collect();
1546
1547 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
1552 for (idx, result) in results.iter().enumerate() {
1553 if result.is_empty() {
1554 continue;
1555 }
1556 if idx > 0 {
1557 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
1559 if is_member(&member, prev_last) {
1560 let skip = result.iter().take_while(|&&b| b == prev_last).count();
1562 if skip < result.len() {
1563 slices.push(std::io::IoSlice::new(&result[skip..]));
1564 }
1565 continue;
1566 }
1567 }
1568 }
1569 slices.push(std::io::IoSlice::new(result));
1570 }
1571 return write_ioslices(writer, &slices);
1572 }
1573
1574 if data.len() <= SINGLE_WRITE_LIMIT {
1576 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
1577 let mut last_squeezed: u16 = 256;
1578 let len = data.len();
1579 let mut wp = 0;
1580 let mut i = 0;
1581
1582 unsafe {
1583 outbuf.set_len(data.len());
1584 let inp = data.as_ptr();
1585 let outp: *mut u8 = outbuf.as_mut_ptr();
1586
1587 while i < len {
1588 let b = *inp.add(i);
1589 if is_member(&member, b) {
1590 if last_squeezed != b as u16 {
1591 *outp.add(wp) = b;
1592 wp += 1;
1593 last_squeezed = b as u16;
1594 }
1595 i += 1;
1596 while i < len && *inp.add(i) == b {
1597 i += 1;
1598 }
1599 } else {
1600 last_squeezed = 256;
1601 *outp.add(wp) = b;
1602 wp += 1;
1603 i += 1;
1604 }
1605 }
1606 outbuf.set_len(wp);
1607 }
1608 return writer.write_all(&outbuf);
1609 }
1610
1611 let buf_size = data.len().min(BUF_SIZE);
1613 let mut outbuf = vec![0u8; buf_size];
1614 let mut last_squeezed: u16 = 256;
1615
1616 for chunk in data.chunks(buf_size) {
1617 let len = chunk.len();
1618 let mut wp = 0;
1619 let mut i = 0;
1620
1621 unsafe {
1622 let inp = chunk.as_ptr();
1623 let outp = outbuf.as_mut_ptr();
1624
1625 while i < len {
1626 let b = *inp.add(i);
1627 if is_member(&member, b) {
1628 if last_squeezed != b as u16 {
1629 *outp.add(wp) = b;
1630 wp += 1;
1631 last_squeezed = b as u16;
1632 }
1633 i += 1;
1634 while i < len && *inp.add(i) == b {
1635 i += 1;
1636 }
1637 } else {
1638 last_squeezed = 256;
1639 *outp.add(wp) = b;
1640 wp += 1;
1641 i += 1;
1642 }
1643 }
1644 }
1645 writer.write_all(&outbuf[..wp])?;
1646 }
1647 Ok(())
1648}
1649
1650fn squeeze_chunk_bitset(chunk: &[u8], member: &[u8; 32]) -> Vec<u8> {
1652 let len = chunk.len();
1653 let mut out = Vec::with_capacity(len);
1654 let mut last_squeezed: u16 = 256;
1655 let mut i = 0;
1656
1657 unsafe {
1658 out.set_len(len);
1659 let inp = chunk.as_ptr();
1660 let outp: *mut u8 = out.as_mut_ptr();
1661 let mut wp = 0;
1662
1663 while i < len {
1664 let b = *inp.add(i);
1665 if is_member(member, b) {
1666 if last_squeezed != b as u16 {
1667 *outp.add(wp) = b;
1668 wp += 1;
1669 last_squeezed = b as u16;
1670 }
1671 i += 1;
1672 while i < len && *inp.add(i) == b {
1673 i += 1;
1674 }
1675 } else {
1676 last_squeezed = 256;
1677 *outp.add(wp) = b;
1678 wp += 1;
1679 i += 1;
1680 }
1681 }
1682 out.set_len(wp);
1683 }
1684 out
1685}
1686
1687fn squeeze_multi_mmap<const N: usize>(
1688 chars: &[u8],
1689 data: &[u8],
1690 writer: &mut impl Write,
1691) -> io::Result<()> {
1692 if data.len() >= PARALLEL_THRESHOLD {
1694 let member = build_member_set(chars);
1695 let n_threads = rayon::current_num_threads().max(1);
1696 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1697
1698 let results: Vec<Vec<u8>> = data
1699 .par_chunks(chunk_size)
1700 .map(|chunk| squeeze_chunk_bitset(chunk, &member))
1701 .collect();
1702
1703 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
1705 for (idx, result) in results.iter().enumerate() {
1706 if result.is_empty() {
1707 continue;
1708 }
1709 if idx > 0 {
1710 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
1711 if is_member(&member, prev_last) {
1712 let skip = result.iter().take_while(|&&b| b == prev_last).count();
1713 if skip < result.len() {
1714 slices.push(std::io::IoSlice::new(&result[skip..]));
1715 }
1716 continue;
1717 }
1718 }
1719 }
1720 slices.push(std::io::IoSlice::new(result));
1721 }
1722 return write_ioslices(writer, &slices);
1723 }
1724
1725 let buf_size = data.len().min(BUF_SIZE);
1726 let mut outbuf = vec![0u8; buf_size];
1727 let mut wp = 0;
1728 let mut last_squeezed: u16 = 256;
1729 let mut cursor = 0;
1730
1731 macro_rules! find_next {
1732 ($data:expr) => {
1733 if N == 2 {
1734 memchr::memchr2(chars[0], chars[1], $data)
1735 } else {
1736 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1737 }
1738 };
1739 }
1740
1741 macro_rules! flush_and_copy {
1742 ($src:expr, $len:expr) => {
1743 if wp + $len > buf_size {
1744 writer.write_all(&outbuf[..wp])?;
1745 wp = 0;
1746 }
1747 if $len > buf_size {
1748 writer.write_all($src)?;
1749 } else {
1750 outbuf[wp..wp + $len].copy_from_slice($src);
1751 wp += $len;
1752 }
1753 };
1754 }
1755
1756 while cursor < data.len() {
1757 match find_next!(&data[cursor..]) {
1758 Some(offset) => {
1759 let pos = cursor + offset;
1760 let b = data[pos];
1761 if pos > cursor {
1762 let span = pos - cursor;
1763 flush_and_copy!(&data[cursor..pos], span);
1764 last_squeezed = 256;
1765 }
1766 if last_squeezed != b as u16 {
1767 if wp >= buf_size {
1768 writer.write_all(&outbuf[..wp])?;
1769 wp = 0;
1770 }
1771 outbuf[wp] = b;
1772 wp += 1;
1773 last_squeezed = b as u16;
1774 }
1775 let mut skip = pos + 1;
1776 while skip < data.len() && data[skip] == b {
1777 skip += 1;
1778 }
1779 cursor = skip;
1780 }
1781 None => {
1782 let remaining = data.len() - cursor;
1783 flush_and_copy!(&data[cursor..], remaining);
1784 break;
1785 }
1786 }
1787 }
1788 if wp > 0 {
1789 writer.write_all(&outbuf[..wp])?;
1790 }
1791 Ok(())
1792}
1793
1794fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1795 if data.is_empty() {
1796 return Ok(());
1797 }
1798
1799 if memchr::memmem::find(data, &[ch, ch]).is_none() {
1800 return writer.write_all(data);
1801 }
1802
1803 if data.len() >= PARALLEL_THRESHOLD {
1805 let n_threads = rayon::current_num_threads().max(1);
1806 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1807
1808 let results: Vec<Vec<u8>> = data
1809 .par_chunks(chunk_size)
1810 .map(|chunk| {
1811 let mut out = Vec::with_capacity(chunk.len());
1812 let mut cursor = 0;
1813 while cursor < chunk.len() {
1814 match memchr::memchr(ch, &chunk[cursor..]) {
1815 Some(offset) => {
1816 let pos = cursor + offset;
1817 if pos > cursor {
1818 out.extend_from_slice(&chunk[cursor..pos]);
1819 }
1820 out.push(ch);
1821 cursor = pos + 1;
1822 while cursor < chunk.len() && chunk[cursor] == ch {
1823 cursor += 1;
1824 }
1825 }
1826 None => {
1827 out.extend_from_slice(&chunk[cursor..]);
1828 break;
1829 }
1830 }
1831 }
1832 out
1833 })
1834 .collect();
1835
1836 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
1839 for (idx, result) in results.iter().enumerate() {
1840 if result.is_empty() {
1841 continue;
1842 }
1843 if idx > 0 {
1844 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
1845 if prev_last == ch {
1846 let skip = result.iter().take_while(|&&b| b == ch).count();
1848 if skip < result.len() {
1849 slices.push(std::io::IoSlice::new(&result[skip..]));
1850 }
1851 continue;
1852 }
1853 }
1854 }
1855 slices.push(std::io::IoSlice::new(result));
1856 }
1857 return write_ioslices(writer, &slices);
1858 }
1859
1860 let buf_size = data.len().min(BUF_SIZE);
1861 let mut outbuf = vec![0u8; buf_size];
1862 let len = data.len();
1863 let mut wp = 0;
1864 let mut cursor = 0;
1865
1866 while cursor < len {
1867 match memchr::memchr(ch, &data[cursor..]) {
1868 Some(offset) => {
1869 let pos = cursor + offset;
1870 let gap = pos - cursor;
1871 if gap > 0 {
1872 if wp + gap > buf_size {
1873 writer.write_all(&outbuf[..wp])?;
1874 wp = 0;
1875 }
1876 if gap > buf_size {
1877 writer.write_all(&data[cursor..pos])?;
1878 } else {
1879 outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
1880 wp += gap;
1881 }
1882 }
1883 if wp >= buf_size {
1884 writer.write_all(&outbuf[..wp])?;
1885 wp = 0;
1886 }
1887 outbuf[wp] = ch;
1888 wp += 1;
1889 cursor = pos + 1;
1890 while cursor < len && data[cursor] == ch {
1891 cursor += 1;
1892 }
1893 }
1894 None => {
1895 let remaining = len - cursor;
1896 if remaining > 0 {
1897 if wp + remaining > buf_size {
1898 writer.write_all(&outbuf[..wp])?;
1899 wp = 0;
1900 }
1901 if remaining > buf_size {
1902 writer.write_all(&data[cursor..])?;
1903 } else {
1904 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
1905 wp += remaining;
1906 }
1907 }
1908 break;
1909 }
1910 }
1911 }
1912
1913 if wp > 0 {
1914 writer.write_all(&outbuf[..wp])?;
1915 }
1916 Ok(())
1917}