1use std::io::{self, Read, Write};
2
3use rayon::prelude::*;
4
5const MAX_IOV: usize = 1024;
8
9const BUF_SIZE: usize = 4 * 1024 * 1024;
11
12const STREAM_BUF: usize = 1024 * 1024;
15
16const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
19
20#[inline]
23fn write_ioslices(writer: &mut impl Write, slices: &[std::io::IoSlice]) -> io::Result<()> {
24 if slices.is_empty() {
25 return Ok(());
26 }
27 for batch in slices.chunks(MAX_IOV) {
28 let total: usize = batch.iter().map(|s| s.len()).sum();
29 match writer.write_vectored(batch) {
30 Ok(n) if n >= total => continue,
31 Ok(mut written) => {
32 for slice in batch {
34 let slen = slice.len();
35 if written >= slen {
36 written -= slen;
37 continue;
38 }
39 if written > 0 {
40 writer.write_all(&slice[written..])?;
41 written = 0;
42 } else {
43 writer.write_all(slice)?;
44 }
45 }
46 }
47 Err(e) => return Err(e),
48 }
49 }
50 Ok(())
51}
52
53#[inline]
56#[allow(clippy::uninit_vec)]
57fn alloc_uninit_vec(len: usize) -> Vec<u8> {
58 let mut v = Vec::with_capacity(len);
59 unsafe {
61 v.set_len(len);
62 }
63 v
64}
65
66#[inline]
68fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
69 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
70 let last = set2.last().copied();
71 for (i, &from) in set1.iter().enumerate() {
72 table[from as usize] = if i < set2.len() {
73 set2[i]
74 } else {
75 last.unwrap_or(from)
76 };
77 }
78 table
79}
80
81#[inline]
83fn build_member_set(chars: &[u8]) -> [u8; 32] {
84 let mut set = [0u8; 32];
85 for &ch in chars {
86 set[ch as usize >> 3] |= 1 << (ch & 7);
87 }
88 set
89}
90
91#[inline(always)]
92fn is_member(set: &[u8; 32], ch: u8) -> bool {
93 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
94}
95
96#[inline(always)]
98fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
99 for b in data.iter_mut() {
100 *b = unsafe { *table.get_unchecked(*b as usize) };
101 }
102}
103
104#[inline(always)]
106fn translate_to(src: &[u8], dst: &mut [u8], table: &[u8; 256]) {
107 debug_assert!(dst.len() >= src.len());
108 unsafe {
109 let sp = src.as_ptr();
110 let dp = dst.as_mut_ptr();
111 let len = src.len();
112 let mut i = 0;
113 while i + 8 <= len {
114 *dp.add(i) = *table.get_unchecked(*sp.add(i) as usize);
115 *dp.add(i + 1) = *table.get_unchecked(*sp.add(i + 1) as usize);
116 *dp.add(i + 2) = *table.get_unchecked(*sp.add(i + 2) as usize);
117 *dp.add(i + 3) = *table.get_unchecked(*sp.add(i + 3) as usize);
118 *dp.add(i + 4) = *table.get_unchecked(*sp.add(i + 4) as usize);
119 *dp.add(i + 5) = *table.get_unchecked(*sp.add(i + 5) as usize);
120 *dp.add(i + 6) = *table.get_unchecked(*sp.add(i + 6) as usize);
121 *dp.add(i + 7) = *table.get_unchecked(*sp.add(i + 7) as usize);
122 i += 8;
123 }
124 while i < len {
125 *dp.add(i) = *table.get_unchecked(*sp.add(i) as usize);
126 i += 1;
127 }
128 }
129}
130
131#[inline]
139fn detect_range_offset(table: &[u8; 256]) -> Option<(u8, u8, i8)> {
140 let mut lo: Option<u8> = None;
141 let mut hi = 0u8;
142 let mut offset = 0i16;
143
144 for i in 0..256 {
145 if table[i] != i as u8 {
146 let diff = table[i] as i16 - i as i16;
147 match lo {
148 None => {
149 lo = Some(i as u8);
150 hi = i as u8;
151 offset = diff;
152 }
153 Some(_) => {
154 if diff != offset || i as u8 != hi.wrapping_add(1) {
155 return None;
156 }
157 hi = i as u8;
158 }
159 }
160 }
161 }
162
163 lo.map(|l| (l, hi, offset as i8))
164}
165
166#[cfg(target_arch = "x86_64")]
170fn translate_range_simd(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
171 if is_x86_feature_detected!("avx2") {
172 unsafe { translate_range_avx2(src, dst, lo, hi, offset) };
173 } else {
174 unsafe { translate_range_sse2(src, dst, lo, hi, offset) };
175 }
176}
177
178#[cfg(target_arch = "x86_64")]
179#[target_feature(enable = "avx2")]
180unsafe fn translate_range_avx2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
181 use std::arch::x86_64::*;
182
183 unsafe {
184 let range = hi - lo;
185 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
190 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
191 let offset_v = _mm256_set1_epi8(offset);
192 let zero = _mm256_setzero_si256();
193
194 let len = src.len();
195 let mut i = 0;
196
197 while i + 32 <= len {
198 let input = _mm256_loadu_si256(src.as_ptr().add(i) as *const _);
199 let biased = _mm256_add_epi8(input, bias_v);
200 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
202 let mask = _mm256_cmpeq_epi8(gt, zero);
204 let offset_masked = _mm256_and_si256(mask, offset_v);
205 let result = _mm256_add_epi8(input, offset_masked);
206 _mm256_storeu_si256(dst.as_mut_ptr().add(i) as *mut _, result);
207 i += 32;
208 }
209
210 if i + 16 <= len {
212 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
213 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
214 let offset_v128 = _mm_set1_epi8(offset);
215 let zero128 = _mm_setzero_si128();
216
217 let input = _mm_loadu_si128(src.as_ptr().add(i) as *const _);
218 let biased = _mm_add_epi8(input, bias_v128);
219 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
220 let mask = _mm_cmpeq_epi8(gt, zero128);
221 let offset_masked = _mm_and_si128(mask, offset_v128);
222 let result = _mm_add_epi8(input, offset_masked);
223 _mm_storeu_si128(dst.as_mut_ptr().add(i) as *mut _, result);
224 i += 16;
225 }
226
227 while i < len {
229 let b = *src.get_unchecked(i);
230 *dst.get_unchecked_mut(i) = if b >= lo && b <= hi {
231 b.wrapping_add(offset as u8)
232 } else {
233 b
234 };
235 i += 1;
236 }
237 }
238}
239
240#[cfg(target_arch = "x86_64")]
241#[target_feature(enable = "sse2")]
242unsafe fn translate_range_sse2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
243 use std::arch::x86_64::*;
244
245 unsafe {
246 let range = hi - lo;
247 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
248 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
249 let offset_v = _mm_set1_epi8(offset);
250 let zero = _mm_setzero_si128();
251
252 let len = src.len();
253 let mut i = 0;
254
255 while i + 16 <= len {
256 let input = _mm_loadu_si128(src.as_ptr().add(i) as *const _);
257 let biased = _mm_add_epi8(input, bias_v);
258 let gt = _mm_cmpgt_epi8(biased, threshold_v);
259 let mask = _mm_cmpeq_epi8(gt, zero);
260 let offset_masked = _mm_and_si128(mask, offset_v);
261 let result = _mm_add_epi8(input, offset_masked);
262 _mm_storeu_si128(dst.as_mut_ptr().add(i) as *mut _, result);
263 i += 16;
264 }
265
266 while i < len {
267 let b = *src.get_unchecked(i);
268 *dst.get_unchecked_mut(i) = if b >= lo && b <= hi {
269 b.wrapping_add(offset as u8)
270 } else {
271 b
272 };
273 i += 1;
274 }
275 }
276}
277
278#[cfg(not(target_arch = "x86_64"))]
280fn translate_range_simd(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
281 for (i, &b) in src.iter().enumerate() {
282 dst[i] = if b >= lo && b <= hi {
283 b.wrapping_add(offset as u8)
284 } else {
285 b
286 };
287 }
288}
289
290#[cfg(target_arch = "x86_64")]
298fn translate_range_simd_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
299 if is_x86_feature_detected!("avx2") {
300 unsafe { translate_range_avx2_inplace(data, lo, hi, offset) };
301 } else {
302 unsafe { translate_range_sse2_inplace(data, lo, hi, offset) };
303 }
304}
305
306#[cfg(target_arch = "x86_64")]
307#[target_feature(enable = "avx2")]
308unsafe fn translate_range_avx2_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
309 use std::arch::x86_64::*;
310
311 unsafe {
312 let range = hi - lo;
313 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
314 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
315 let offset_v = _mm256_set1_epi8(offset);
316 let zero = _mm256_setzero_si256();
317
318 let len = data.len();
319 let ptr = data.as_mut_ptr();
320 let mut i = 0;
321
322 while i + 32 <= len {
323 let input = _mm256_loadu_si256(ptr.add(i) as *const _);
324 let biased = _mm256_add_epi8(input, bias_v);
325 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
326 let mask = _mm256_cmpeq_epi8(gt, zero);
327 let offset_masked = _mm256_and_si256(mask, offset_v);
328 let result = _mm256_add_epi8(input, offset_masked);
329 _mm256_storeu_si256(ptr.add(i) as *mut _, result);
330 i += 32;
331 }
332
333 if i + 16 <= len {
334 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
335 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
336 let offset_v128 = _mm_set1_epi8(offset);
337 let zero128 = _mm_setzero_si128();
338
339 let input = _mm_loadu_si128(ptr.add(i) as *const _);
340 let biased = _mm_add_epi8(input, bias_v128);
341 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
342 let mask = _mm_cmpeq_epi8(gt, zero128);
343 let offset_masked = _mm_and_si128(mask, offset_v128);
344 let result = _mm_add_epi8(input, offset_masked);
345 _mm_storeu_si128(ptr.add(i) as *mut _, result);
346 i += 16;
347 }
348
349 while i < len {
350 let b = *ptr.add(i);
351 *ptr.add(i) = if b >= lo && b <= hi {
352 b.wrapping_add(offset as u8)
353 } else {
354 b
355 };
356 i += 1;
357 }
358 }
359}
360
361#[cfg(target_arch = "x86_64")]
362#[target_feature(enable = "sse2")]
363unsafe fn translate_range_sse2_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
364 use std::arch::x86_64::*;
365
366 unsafe {
367 let range = hi - lo;
368 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
369 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
370 let offset_v = _mm_set1_epi8(offset);
371 let zero = _mm_setzero_si128();
372
373 let len = data.len();
374 let ptr = data.as_mut_ptr();
375 let mut i = 0;
376
377 while i + 16 <= len {
378 let input = _mm_loadu_si128(ptr.add(i) as *const _);
379 let biased = _mm_add_epi8(input, bias_v);
380 let gt = _mm_cmpgt_epi8(biased, threshold_v);
381 let mask = _mm_cmpeq_epi8(gt, zero);
382 let offset_masked = _mm_and_si128(mask, offset_v);
383 let result = _mm_add_epi8(input, offset_masked);
384 _mm_storeu_si128(ptr.add(i) as *mut _, result);
385 i += 16;
386 }
387
388 while i < len {
389 let b = *ptr.add(i);
390 *ptr.add(i) = if b >= lo && b <= hi {
391 b.wrapping_add(offset as u8)
392 } else {
393 b
394 };
395 i += 1;
396 }
397 }
398}
399
400#[cfg(not(target_arch = "x86_64"))]
401fn translate_range_simd_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
402 for b in data.iter_mut() {
403 if *b >= lo && *b <= hi {
404 *b = b.wrapping_add(offset as u8);
405 }
406 }
407}
408
409#[inline]
419fn detect_delete_range(chars: &[u8]) -> Option<(u8, u8)> {
420 if chars.is_empty() {
421 return None;
422 }
423 let mut lo = chars[0];
424 let mut hi = chars[0];
425 for &c in &chars[1..] {
426 if c < lo {
427 lo = c;
428 }
429 if c > hi {
430 hi = c;
431 }
432 }
433 if (hi - lo + 1) as usize == chars.len() {
435 Some((lo, hi))
436 } else {
437 None
438 }
439}
440
441#[cfg(target_arch = "x86_64")]
445fn delete_range_chunk(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
446 if is_x86_feature_detected!("avx2") {
447 unsafe { delete_range_avx2(src, dst, lo, hi) }
448 } else {
449 unsafe { delete_range_sse2(src, dst, lo, hi) }
450 }
451}
452
453#[cfg(target_arch = "x86_64")]
454#[target_feature(enable = "avx2")]
455unsafe fn delete_range_avx2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
456 use std::arch::x86_64::*;
457
458 unsafe {
459 let range = hi - lo;
460 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
461 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
462 let zero = _mm256_setzero_si256();
463
464 let len = src.len();
465 let sp = src.as_ptr();
466 let dp = dst.as_mut_ptr();
467 let mut ri = 0;
468 let mut wp = 0;
469
470 while ri + 32 <= len {
471 let input = _mm256_loadu_si256(sp.add(ri) as *const _);
472 let biased = _mm256_add_epi8(input, bias_v);
473 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
475 let in_range = _mm256_cmpeq_epi8(gt, zero);
477 let keep_mask = !(_mm256_movemask_epi8(in_range) as u32);
479
480 let mut mask = keep_mask;
482 while mask != 0 {
483 let bit = mask.trailing_zeros() as usize;
484 *dp.add(wp) = *sp.add(ri + bit);
485 wp += 1;
486 mask &= mask - 1; }
488 ri += 32;
489 }
490
491 if ri + 16 <= len {
493 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
494 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
495 let zero128 = _mm_setzero_si128();
496
497 let input = _mm_loadu_si128(sp.add(ri) as *const _);
498 let biased = _mm_add_epi8(input, bias_v128);
499 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
500 let in_range = _mm_cmpeq_epi8(gt, zero128);
501 let keep_mask = !(_mm_movemask_epi8(in_range) as u32) & 0xFFFF;
502
503 let mut mask = keep_mask;
504 while mask != 0 {
505 let bit = mask.trailing_zeros() as usize;
506 *dp.add(wp) = *sp.add(ri + bit);
507 wp += 1;
508 mask &= mask - 1;
509 }
510 ri += 16;
511 }
512
513 while ri < len {
515 let b = *sp.add(ri);
516 if b < lo || b > hi {
517 *dp.add(wp) = b;
518 wp += 1;
519 }
520 ri += 1;
521 }
522
523 wp
524 }
525}
526
527#[cfg(target_arch = "x86_64")]
528#[target_feature(enable = "sse2")]
529unsafe fn delete_range_sse2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
530 use std::arch::x86_64::*;
531
532 unsafe {
533 let range = hi - lo;
534 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
535 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
536 let zero = _mm_setzero_si128();
537
538 let len = src.len();
539 let sp = src.as_ptr();
540 let dp = dst.as_mut_ptr();
541 let mut ri = 0;
542 let mut wp = 0;
543
544 while ri + 16 <= len {
545 let input = _mm_loadu_si128(sp.add(ri) as *const _);
546 let biased = _mm_add_epi8(input, bias_v);
547 let gt = _mm_cmpgt_epi8(biased, threshold_v);
548 let in_range = _mm_cmpeq_epi8(gt, zero);
549 let keep_mask = !(_mm_movemask_epi8(in_range) as u32) & 0xFFFF;
550
551 let mut mask = keep_mask;
552 while mask != 0 {
553 let bit = mask.trailing_zeros() as usize;
554 *dp.add(wp) = *sp.add(ri + bit);
555 wp += 1;
556 mask &= mask - 1;
557 }
558 ri += 16;
559 }
560
561 while ri < len {
562 let b = *sp.add(ri);
563 if b < lo || b > hi {
564 *dp.add(wp) = b;
565 wp += 1;
566 }
567 ri += 1;
568 }
569
570 wp
571 }
572}
573
574#[cfg(not(target_arch = "x86_64"))]
576fn delete_range_chunk(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
577 let mut wp = 0;
578 for &b in src {
579 if b < lo || b > hi {
580 dst[wp] = b;
581 wp += 1;
582 }
583 }
584 wp
585}
586
587fn delete_range_streaming(
589 lo: u8,
590 hi: u8,
591 reader: &mut impl Read,
592 writer: &mut impl Write,
593) -> io::Result<()> {
594 let mut src = vec![0u8; STREAM_BUF];
595 let mut dst = alloc_uninit_vec(STREAM_BUF);
596 loop {
597 let n = read_full(reader, &mut src)?;
598 if n == 0 {
599 break;
600 }
601 let wp = delete_range_chunk(&src[..n], &mut dst, lo, hi);
602 if wp > 0 {
603 writer.write_all(&dst[..wp])?;
604 }
605 }
606 Ok(())
607}
608
609pub fn translate(
614 set1: &[u8],
615 set2: &[u8],
616 reader: &mut impl Read,
617 writer: &mut impl Write,
618) -> io::Result<()> {
619 let table = build_translate_table(set1, set2);
620
621 if let Some((lo, hi, offset)) = detect_range_offset(&table) {
623 return translate_range_stream(lo, hi, offset, reader, writer);
624 }
625
626 let mut src = vec![0u8; STREAM_BUF];
631 let mut dst = alloc_uninit_vec(STREAM_BUF);
632 loop {
633 let n = read_full(reader, &mut src)?;
634 if n == 0 {
635 break;
636 }
637 translate_to(&src[..n], &mut dst[..n], &table);
638 writer.write_all(&dst[..n])?;
639 }
640 Ok(())
641}
642
643fn translate_range_stream(
646 lo: u8,
647 hi: u8,
648 offset: i8,
649 reader: &mut impl Read,
650 writer: &mut impl Write,
651) -> io::Result<()> {
652 let mut buf = vec![0u8; STREAM_BUF];
653 loop {
654 let n = read_full(reader, &mut buf)?;
655 if n == 0 {
656 break;
657 }
658 translate_range_simd_inplace(&mut buf[..n], lo, hi, offset);
659 writer.write_all(&buf[..n])?;
660 }
661 Ok(())
662}
663
664#[inline]
666fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
667 let mut total = 0;
668 while total < buf.len() {
669 match reader.read(&mut buf[total..]) {
670 Ok(0) => break,
671 Ok(n) => total += n,
672 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
673 Err(e) => return Err(e),
674 }
675 }
676 Ok(total)
677}
678
679pub fn translate_squeeze(
680 set1: &[u8],
681 set2: &[u8],
682 reader: &mut impl Read,
683 writer: &mut impl Write,
684) -> io::Result<()> {
685 let table = build_translate_table(set1, set2);
686 let squeeze_set = build_member_set(set2);
687
688 let range_info = detect_range_offset(&table);
694
695 let mut buf = vec![0u8; STREAM_BUF];
696 let mut last_squeezed: u16 = 256;
697
698 loop {
699 let n = read_full(reader, &mut buf)?;
700 if n == 0 {
701 break;
702 }
703 if let Some((lo, hi, offset)) = range_info {
705 translate_range_simd_inplace(&mut buf[..n], lo, hi, offset);
706 } else {
707 translate_inplace(&mut buf[..n], &table);
708 }
709 let mut wp = 0;
711 unsafe {
712 let ptr = buf.as_mut_ptr();
713 for i in 0..n {
714 let b = *ptr.add(i);
715 if is_member(&squeeze_set, b) {
716 if last_squeezed == b as u16 {
717 continue;
718 }
719 last_squeezed = b as u16;
720 } else {
721 last_squeezed = 256;
722 }
723 *ptr.add(wp) = b;
724 wp += 1;
725 }
726 }
727 writer.write_all(&buf[..wp])?;
728 }
729 Ok(())
730}
731
732pub fn delete(
733 delete_chars: &[u8],
734 reader: &mut impl Read,
735 writer: &mut impl Write,
736) -> io::Result<()> {
737 if delete_chars.len() == 1 {
738 return delete_single_streaming(delete_chars[0], reader, writer);
739 }
740 if delete_chars.len() <= 3 {
741 return delete_multi_streaming(delete_chars, reader, writer);
742 }
743
744 if let Some((lo, hi)) = detect_delete_range(delete_chars) {
748 return delete_range_streaming(lo, hi, reader, writer);
749 }
750
751 let member = build_member_set(delete_chars);
752 let mut buf = vec![0u8; STREAM_BUF];
753
754 loop {
755 let n = read_full(reader, &mut buf)?;
756 if n == 0 {
757 break;
758 }
759 let mut wp = 0;
760 unsafe {
761 let ptr = buf.as_mut_ptr();
762 let mut i = 0;
763 while i + 8 <= n {
764 let b0 = *ptr.add(i);
765 let b1 = *ptr.add(i + 1);
766 let b2 = *ptr.add(i + 2);
767 let b3 = *ptr.add(i + 3);
768 let b4 = *ptr.add(i + 4);
769 let b5 = *ptr.add(i + 5);
770 let b6 = *ptr.add(i + 6);
771 let b7 = *ptr.add(i + 7);
772
773 if !is_member(&member, b0) {
774 *ptr.add(wp) = b0;
775 wp += 1;
776 }
777 if !is_member(&member, b1) {
778 *ptr.add(wp) = b1;
779 wp += 1;
780 }
781 if !is_member(&member, b2) {
782 *ptr.add(wp) = b2;
783 wp += 1;
784 }
785 if !is_member(&member, b3) {
786 *ptr.add(wp) = b3;
787 wp += 1;
788 }
789 if !is_member(&member, b4) {
790 *ptr.add(wp) = b4;
791 wp += 1;
792 }
793 if !is_member(&member, b5) {
794 *ptr.add(wp) = b5;
795 wp += 1;
796 }
797 if !is_member(&member, b6) {
798 *ptr.add(wp) = b6;
799 wp += 1;
800 }
801 if !is_member(&member, b7) {
802 *ptr.add(wp) = b7;
803 wp += 1;
804 }
805 i += 8;
806 }
807 while i < n {
808 let b = *ptr.add(i);
809 if !is_member(&member, b) {
810 *ptr.add(wp) = b;
811 wp += 1;
812 }
813 i += 1;
814 }
815 }
816 writer.write_all(&buf[..wp])?;
817 }
818 Ok(())
819}
820
821fn delete_single_streaming(
822 ch: u8,
823 reader: &mut impl Read,
824 writer: &mut impl Write,
825) -> io::Result<()> {
826 let mut buf = vec![0u8; STREAM_BUF];
827 loop {
828 let n = match reader.read(&mut buf) {
829 Ok(0) => break,
830 Ok(n) => n,
831 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
832 Err(e) => return Err(e),
833 };
834 let mut wp = 0;
835 let mut i = 0;
836 while i < n {
837 match memchr::memchr(ch, &buf[i..n]) {
838 Some(offset) => {
839 if offset > 0 {
840 if wp != i {
841 unsafe {
842 std::ptr::copy(
843 buf.as_ptr().add(i),
844 buf.as_mut_ptr().add(wp),
845 offset,
846 );
847 }
848 }
849 wp += offset;
850 }
851 i += offset + 1;
852 }
853 None => {
854 let run_len = n - i;
855 if run_len > 0 {
856 if wp != i {
857 unsafe {
858 std::ptr::copy(
859 buf.as_ptr().add(i),
860 buf.as_mut_ptr().add(wp),
861 run_len,
862 );
863 }
864 }
865 wp += run_len;
866 }
867 break;
868 }
869 }
870 }
871 writer.write_all(&buf[..wp])?;
872 }
873 Ok(())
874}
875
876fn delete_multi_streaming(
877 chars: &[u8],
878 reader: &mut impl Read,
879 writer: &mut impl Write,
880) -> io::Result<()> {
881 let mut buf = vec![0u8; STREAM_BUF];
882 loop {
883 let n = match reader.read(&mut buf) {
884 Ok(0) => break,
885 Ok(n) => n,
886 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
887 Err(e) => return Err(e),
888 };
889 let mut wp = 0;
890 let mut i = 0;
891 while i < n {
892 let found = if chars.len() == 2 {
893 memchr::memchr2(chars[0], chars[1], &buf[i..n])
894 } else {
895 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
896 };
897 match found {
898 Some(offset) => {
899 if offset > 0 {
900 if wp != i {
901 unsafe {
902 std::ptr::copy(
903 buf.as_ptr().add(i),
904 buf.as_mut_ptr().add(wp),
905 offset,
906 );
907 }
908 }
909 wp += offset;
910 }
911 i += offset + 1;
912 }
913 None => {
914 let run_len = n - i;
915 if run_len > 0 {
916 if wp != i {
917 unsafe {
918 std::ptr::copy(
919 buf.as_ptr().add(i),
920 buf.as_mut_ptr().add(wp),
921 run_len,
922 );
923 }
924 }
925 wp += run_len;
926 }
927 break;
928 }
929 }
930 }
931 writer.write_all(&buf[..wp])?;
932 }
933 Ok(())
934}
935
936pub fn delete_squeeze(
937 delete_chars: &[u8],
938 squeeze_chars: &[u8],
939 reader: &mut impl Read,
940 writer: &mut impl Write,
941) -> io::Result<()> {
942 let delete_set = build_member_set(delete_chars);
943 let squeeze_set = build_member_set(squeeze_chars);
944 let mut buf = vec![0u8; STREAM_BUF];
945 let mut last_squeezed: u16 = 256;
946
947 loop {
948 let n = read_full(reader, &mut buf)?;
949 if n == 0 {
950 break;
951 }
952 let mut wp = 0;
953 unsafe {
954 let ptr = buf.as_mut_ptr();
955 for i in 0..n {
956 let b = *ptr.add(i);
957 if is_member(&delete_set, b) {
958 continue;
959 }
960 if is_member(&squeeze_set, b) {
961 if last_squeezed == b as u16 {
962 continue;
963 }
964 last_squeezed = b as u16;
965 } else {
966 last_squeezed = 256;
967 }
968 *ptr.add(wp) = b;
969 wp += 1;
970 }
971 }
972 writer.write_all(&buf[..wp])?;
973 }
974 Ok(())
975}
976
977pub fn squeeze(
978 squeeze_chars: &[u8],
979 reader: &mut impl Read,
980 writer: &mut impl Write,
981) -> io::Result<()> {
982 if squeeze_chars.len() == 1 {
983 return squeeze_single_stream(squeeze_chars[0], reader, writer);
984 }
985
986 let member = build_member_set(squeeze_chars);
987 let mut buf = vec![0u8; STREAM_BUF];
988 let mut last_squeezed: u16 = 256;
989
990 loop {
991 let n = read_full(reader, &mut buf)?;
992 if n == 0 {
993 break;
994 }
995 let mut wp = 0;
996 unsafe {
997 let ptr = buf.as_mut_ptr();
998 for i in 0..n {
999 let b = *ptr.add(i);
1000 if is_member(&member, b) {
1001 if last_squeezed == b as u16 {
1002 continue;
1003 }
1004 last_squeezed = b as u16;
1005 } else {
1006 last_squeezed = 256;
1007 }
1008 *ptr.add(wp) = b;
1009 wp += 1;
1010 }
1011 }
1012 writer.write_all(&buf[..wp])?;
1013 }
1014 Ok(())
1015}
1016
1017fn squeeze_single_stream(
1018 ch: u8,
1019 reader: &mut impl Read,
1020 writer: &mut impl Write,
1021) -> io::Result<()> {
1022 let mut buf = vec![0u8; STREAM_BUF];
1023 let mut was_squeeze_char = false;
1024
1025 loop {
1026 let n = match reader.read(&mut buf) {
1027 Ok(0) => break,
1028 Ok(n) => n,
1029 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1030 Err(e) => return Err(e),
1031 };
1032
1033 let mut wp = 0;
1034 let mut i = 0;
1035
1036 while i < n {
1037 if was_squeeze_char && buf[i] == ch {
1038 i += 1;
1039 while i < n && buf[i] == ch {
1040 i += 1;
1041 }
1042 if i >= n {
1043 break;
1044 }
1045 }
1046
1047 match memchr::memchr(ch, &buf[i..n]) {
1048 Some(offset) => {
1049 let run_len = offset;
1050 if run_len > 0 {
1051 if wp != i {
1052 unsafe {
1053 std::ptr::copy(
1054 buf.as_ptr().add(i),
1055 buf.as_mut_ptr().add(wp),
1056 run_len,
1057 );
1058 }
1059 }
1060 wp += run_len;
1061 }
1062 i += run_len;
1063
1064 unsafe {
1065 *buf.as_mut_ptr().add(wp) = ch;
1066 }
1067 wp += 1;
1068 was_squeeze_char = true;
1069 i += 1;
1070 while i < n && buf[i] == ch {
1071 i += 1;
1072 }
1073 }
1074 None => {
1075 let run_len = n - i;
1076 if run_len > 0 {
1077 if wp != i {
1078 unsafe {
1079 std::ptr::copy(
1080 buf.as_ptr().add(i),
1081 buf.as_mut_ptr().add(wp),
1082 run_len,
1083 );
1084 }
1085 }
1086 wp += run_len;
1087 }
1088 was_squeeze_char = false;
1089 break;
1090 }
1091 }
1092 }
1093
1094 writer.write_all(&buf[..wp])?;
1095 }
1096 Ok(())
1097}
1098
1099const SINGLE_WRITE_LIMIT: usize = 16 * 1024 * 1024;
1107
1108pub fn translate_mmap(
1117 set1: &[u8],
1118 set2: &[u8],
1119 data: &[u8],
1120 writer: &mut impl Write,
1121) -> io::Result<()> {
1122 let table = build_translate_table(set1, set2);
1123
1124 let is_identity = table.iter().enumerate().all(|(i, &v)| v == i as u8);
1126 if is_identity {
1127 return writer.write_all(data);
1128 }
1129
1130 if let Some((lo, hi, offset)) = detect_range_offset(&table) {
1132 return translate_mmap_range(data, writer, lo, hi, offset);
1133 }
1134
1135 translate_mmap_table(data, writer, &table)
1137}
1138
1139fn translate_mmap_range(
1141 data: &[u8],
1142 writer: &mut impl Write,
1143 lo: u8,
1144 hi: u8,
1145 offset: i8,
1146) -> io::Result<()> {
1147 if data.len() >= PARALLEL_THRESHOLD {
1149 let mut buf = alloc_uninit_vec(data.len());
1150 let n_threads = rayon::current_num_threads().max(1);
1151 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1152
1153 data.par_chunks(chunk_size)
1155 .zip(buf.par_chunks_mut(chunk_size))
1156 .for_each(|(src_chunk, dst_chunk)| {
1157 translate_range_simd(src_chunk, &mut dst_chunk[..src_chunk.len()], lo, hi, offset);
1158 });
1159
1160 return writer.write_all(&buf);
1161 }
1162
1163 if data.len() <= SINGLE_WRITE_LIMIT {
1165 let mut buf = alloc_uninit_vec(data.len());
1166 translate_range_simd(data, &mut buf, lo, hi, offset);
1167 return writer.write_all(&buf);
1168 }
1169 let mut buf = alloc_uninit_vec(BUF_SIZE);
1171 for chunk in data.chunks(BUF_SIZE) {
1172 translate_range_simd(chunk, &mut buf[..chunk.len()], lo, hi, offset);
1173 writer.write_all(&buf[..chunk.len()])?;
1174 }
1175 Ok(())
1176}
1177
1178fn translate_mmap_table(data: &[u8], writer: &mut impl Write, table: &[u8; 256]) -> io::Result<()> {
1180 if data.len() >= PARALLEL_THRESHOLD {
1182 let mut buf = alloc_uninit_vec(data.len());
1183 let n_threads = rayon::current_num_threads().max(1);
1184 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1185
1186 data.par_chunks(chunk_size)
1187 .zip(buf.par_chunks_mut(chunk_size))
1188 .for_each(|(src_chunk, dst_chunk)| {
1189 translate_to(src_chunk, &mut dst_chunk[..src_chunk.len()], table);
1190 });
1191
1192 return writer.write_all(&buf);
1193 }
1194
1195 if data.len() <= SINGLE_WRITE_LIMIT {
1197 let mut buf = alloc_uninit_vec(data.len());
1198 translate_to(data, &mut buf, table);
1199 return writer.write_all(&buf);
1200 }
1201 let mut buf = alloc_uninit_vec(BUF_SIZE);
1202 for chunk in data.chunks(BUF_SIZE) {
1203 translate_to(chunk, &mut buf[..chunk.len()], table);
1204 writer.write_all(&buf[..chunk.len()])?;
1205 }
1206 Ok(())
1207}
1208
1209pub fn translate_squeeze_mmap(
1215 set1: &[u8],
1216 set2: &[u8],
1217 data: &[u8],
1218 writer: &mut impl Write,
1219) -> io::Result<()> {
1220 let table = build_translate_table(set1, set2);
1221 let squeeze_set = build_member_set(set2);
1222
1223 if data.len() >= PARALLEL_THRESHOLD {
1228 let mut translated = alloc_uninit_vec(data.len());
1230 let range_info = detect_range_offset(&table);
1231 let n_threads = rayon::current_num_threads().max(1);
1232 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1233
1234 if let Some((lo, hi, offset)) = range_info {
1235 data.par_chunks(chunk_size)
1236 .zip(translated.par_chunks_mut(chunk_size))
1237 .for_each(|(src_chunk, dst_chunk)| {
1238 translate_range_simd(
1239 src_chunk,
1240 &mut dst_chunk[..src_chunk.len()],
1241 lo,
1242 hi,
1243 offset,
1244 );
1245 });
1246 } else {
1247 data.par_chunks(chunk_size)
1248 .zip(translated.par_chunks_mut(chunk_size))
1249 .for_each(|(src_chunk, dst_chunk)| {
1250 translate_to(src_chunk, &mut dst_chunk[..src_chunk.len()], &table);
1251 });
1252 }
1253
1254 let mut last_squeezed: u16 = 256;
1258 let len = translated.len();
1259 let mut wp = 0;
1260 unsafe {
1261 let ptr = translated.as_mut_ptr();
1262 let mut i = 0;
1263 while i < len {
1264 let b = *ptr.add(i);
1265 if is_member(&squeeze_set, b) {
1266 if last_squeezed == b as u16 {
1267 i += 1;
1268 continue;
1269 }
1270 last_squeezed = b as u16;
1271 } else {
1272 last_squeezed = 256;
1273 }
1274 *ptr.add(wp) = b;
1275 wp += 1;
1276 i += 1;
1277 }
1278 }
1279 return writer.write_all(&translated[..wp]);
1280 }
1281
1282 if data.len() <= SINGLE_WRITE_LIMIT {
1284 let mut buf: Vec<u8> = Vec::with_capacity(data.len());
1285 let mut last_squeezed: u16 = 256;
1286 unsafe {
1287 buf.set_len(data.len());
1288 let outp: *mut u8 = buf.as_mut_ptr();
1289 let inp = data.as_ptr();
1290 let len = data.len();
1291 let mut wp = 0;
1292 let mut i = 0;
1293 while i < len {
1294 let translated = *table.get_unchecked(*inp.add(i) as usize);
1295 if is_member(&squeeze_set, translated) {
1296 if last_squeezed == translated as u16 {
1297 i += 1;
1298 continue;
1299 }
1300 last_squeezed = translated as u16;
1301 } else {
1302 last_squeezed = 256;
1303 }
1304 *outp.add(wp) = translated;
1305 wp += 1;
1306 i += 1;
1307 }
1308 buf.set_len(wp);
1309 }
1310 return writer.write_all(&buf);
1311 }
1312
1313 let buf_size = data.len().min(BUF_SIZE);
1315 let mut buf = vec![0u8; buf_size];
1316 let mut last_squeezed: u16 = 256;
1317
1318 for chunk in data.chunks(buf_size) {
1319 translate_to(chunk, &mut buf[..chunk.len()], &table);
1320 let mut wp = 0;
1321 unsafe {
1322 let ptr = buf.as_mut_ptr();
1323 for i in 0..chunk.len() {
1324 let b = *ptr.add(i);
1325 if is_member(&squeeze_set, b) {
1326 if last_squeezed == b as u16 {
1327 continue;
1328 }
1329 last_squeezed = b as u16;
1330 } else {
1331 last_squeezed = 256;
1332 }
1333 *ptr.add(wp) = b;
1334 wp += 1;
1335 }
1336 }
1337 writer.write_all(&buf[..wp])?;
1338 }
1339 Ok(())
1340}
1341
1342pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1348 if delete_chars.len() == 1 {
1349 return delete_single_char_mmap(delete_chars[0], data, writer);
1350 }
1351 if delete_chars.len() <= 3 {
1352 return delete_multi_memchr_mmap(delete_chars, data, writer);
1353 }
1354
1355 if let Some((lo, hi)) = detect_delete_range(delete_chars) {
1357 return delete_range_mmap(data, writer, lo, hi);
1358 }
1359
1360 let member = build_member_set(delete_chars);
1361
1362 if data.len() >= PARALLEL_THRESHOLD {
1366 let n_threads = rayon::current_num_threads().max(1);
1367 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1368
1369 let mut outbuf = alloc_uninit_vec(data.len());
1371 let chunk_lens: Vec<usize> = data
1372 .par_chunks(chunk_size)
1373 .zip(outbuf.par_chunks_mut(chunk_size))
1374 .map(|(src_chunk, dst_chunk)| delete_chunk_bitset_into(src_chunk, &member, dst_chunk))
1375 .collect();
1376
1377 let mut write_pos = 0;
1381 let mut src_offset = 0;
1382 for &clen in &chunk_lens {
1383 if clen > 0 && src_offset != write_pos {
1384 unsafe {
1385 std::ptr::copy(
1386 outbuf.as_ptr().add(src_offset),
1387 outbuf.as_mut_ptr().add(write_pos),
1388 clen,
1389 );
1390 }
1391 }
1392 write_pos += clen;
1393 src_offset += chunk_size;
1394 }
1395
1396 return writer.write_all(&outbuf[..write_pos]);
1397 }
1398
1399 if data.len() <= SINGLE_WRITE_LIMIT {
1401 let mut outbuf = alloc_uninit_vec(data.len());
1402 let out_pos = delete_chunk_bitset_into(data, &member, &mut outbuf);
1403 return writer.write_all(&outbuf[..out_pos]);
1404 }
1405
1406 let buf_size = data.len().min(BUF_SIZE);
1408 let mut outbuf = alloc_uninit_vec(buf_size);
1409
1410 for chunk in data.chunks(buf_size) {
1411 let out_pos = delete_chunk_bitset_into(chunk, &member, &mut outbuf);
1412 writer.write_all(&outbuf[..out_pos])?;
1413 }
1414 Ok(())
1415}
1416
1417fn delete_range_mmap(data: &[u8], writer: &mut impl Write, lo: u8, hi: u8) -> io::Result<()> {
1419 if data.len() >= PARALLEL_THRESHOLD {
1421 let n_threads = rayon::current_num_threads().max(1);
1422 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1423
1424 let results: Vec<Vec<u8>> = data
1425 .par_chunks(chunk_size)
1426 .map(|chunk| {
1427 let mut out = alloc_uninit_vec(chunk.len());
1428 let wp = delete_range_chunk(chunk, &mut out, lo, hi);
1429 unsafe { out.set_len(wp) };
1430 out
1431 })
1432 .collect();
1433
1434 let slices: Vec<std::io::IoSlice> = results
1435 .iter()
1436 .filter(|r| !r.is_empty())
1437 .map(|r| std::io::IoSlice::new(r))
1438 .collect();
1439 return write_ioslices(writer, &slices);
1440 }
1441
1442 if data.len() <= SINGLE_WRITE_LIMIT {
1444 let mut outbuf = alloc_uninit_vec(data.len());
1445 let wp = delete_range_chunk(data, &mut outbuf, lo, hi);
1446 return writer.write_all(&outbuf[..wp]);
1447 }
1448
1449 let mut outbuf = alloc_uninit_vec(BUF_SIZE);
1451 for chunk in data.chunks(BUF_SIZE) {
1452 let wp = delete_range_chunk(chunk, &mut outbuf, lo, hi);
1453 writer.write_all(&outbuf[..wp])?;
1454 }
1455 Ok(())
1456}
1457
1458#[inline]
1461fn delete_chunk_bitset_into(chunk: &[u8], member: &[u8; 32], outbuf: &mut [u8]) -> usize {
1462 let len = chunk.len();
1463 let mut out_pos = 0;
1464 let mut i = 0;
1465
1466 while i + 8 <= len {
1467 unsafe {
1468 let b0 = *chunk.get_unchecked(i);
1469 let b1 = *chunk.get_unchecked(i + 1);
1470 let b2 = *chunk.get_unchecked(i + 2);
1471 let b3 = *chunk.get_unchecked(i + 3);
1472 let b4 = *chunk.get_unchecked(i + 4);
1473 let b5 = *chunk.get_unchecked(i + 5);
1474 let b6 = *chunk.get_unchecked(i + 6);
1475 let b7 = *chunk.get_unchecked(i + 7);
1476
1477 *outbuf.get_unchecked_mut(out_pos) = b0;
1478 out_pos += !is_member(member, b0) as usize;
1479 *outbuf.get_unchecked_mut(out_pos) = b1;
1480 out_pos += !is_member(member, b1) as usize;
1481 *outbuf.get_unchecked_mut(out_pos) = b2;
1482 out_pos += !is_member(member, b2) as usize;
1483 *outbuf.get_unchecked_mut(out_pos) = b3;
1484 out_pos += !is_member(member, b3) as usize;
1485 *outbuf.get_unchecked_mut(out_pos) = b4;
1486 out_pos += !is_member(member, b4) as usize;
1487 *outbuf.get_unchecked_mut(out_pos) = b5;
1488 out_pos += !is_member(member, b5) as usize;
1489 *outbuf.get_unchecked_mut(out_pos) = b6;
1490 out_pos += !is_member(member, b6) as usize;
1491 *outbuf.get_unchecked_mut(out_pos) = b7;
1492 out_pos += !is_member(member, b7) as usize;
1493 }
1494 i += 8;
1495 }
1496
1497 while i < len {
1498 unsafe {
1499 let b = *chunk.get_unchecked(i);
1500 *outbuf.get_unchecked_mut(out_pos) = b;
1501 out_pos += !is_member(member, b) as usize;
1502 }
1503 i += 1;
1504 }
1505
1506 out_pos
1507}
1508
1509fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1510 if data.len() >= PARALLEL_THRESHOLD {
1513 let n_threads = rayon::current_num_threads().max(1);
1514 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1515
1516 let results: Vec<Vec<u8>> = data
1517 .par_chunks(chunk_size)
1518 .map(|chunk| {
1519 let mut out = Vec::with_capacity(chunk.len());
1520 let mut last = 0;
1521 for pos in memchr::memchr_iter(ch, chunk) {
1522 if pos > last {
1523 out.extend_from_slice(&chunk[last..pos]);
1524 }
1525 last = pos + 1;
1526 }
1527 if last < chunk.len() {
1528 out.extend_from_slice(&chunk[last..]);
1529 }
1530 out
1531 })
1532 .collect();
1533
1534 let slices: Vec<std::io::IoSlice> = results
1536 .iter()
1537 .filter(|r| !r.is_empty())
1538 .map(|r| std::io::IoSlice::new(r))
1539 .collect();
1540 return write_ioslices(writer, &slices);
1541 }
1542
1543 if data.len() <= SINGLE_WRITE_LIMIT {
1545 let mut outbuf = Vec::with_capacity(data.len());
1546 let mut last = 0;
1547 for pos in memchr::memchr_iter(ch, data) {
1548 if pos > last {
1549 outbuf.extend_from_slice(&data[last..pos]);
1550 }
1551 last = pos + 1;
1552 }
1553 if last < data.len() {
1554 outbuf.extend_from_slice(&data[last..]);
1555 }
1556 return writer.write_all(&outbuf);
1557 }
1558
1559 let buf_size = data.len().min(BUF_SIZE);
1561 let mut outbuf = vec![0u8; buf_size];
1562
1563 for chunk in data.chunks(buf_size) {
1564 let mut wp = 0;
1565 let mut last = 0;
1566 for pos in memchr::memchr_iter(ch, chunk) {
1567 if pos > last {
1568 let run = pos - last;
1569 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1570 wp += run;
1571 }
1572 last = pos + 1;
1573 }
1574 if last < chunk.len() {
1575 let run = chunk.len() - last;
1576 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1577 wp += run;
1578 }
1579 writer.write_all(&outbuf[..wp])?;
1580 }
1581 Ok(())
1582}
1583
1584fn delete_multi_memchr_mmap(chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1585 let c0 = chars[0];
1586 let c1 = if chars.len() >= 2 { chars[1] } else { 0 };
1587 let c2 = if chars.len() >= 3 { chars[2] } else { 0 };
1588 let is_three = chars.len() >= 3;
1589
1590 if data.len() >= PARALLEL_THRESHOLD {
1592 let n_threads = rayon::current_num_threads().max(1);
1593 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1594
1595 let results: Vec<Vec<u8>> = data
1596 .par_chunks(chunk_size)
1597 .map(|chunk| {
1598 let mut out = Vec::with_capacity(chunk.len());
1599 let mut last = 0;
1600 if is_three {
1601 for pos in memchr::memchr3_iter(c0, c1, c2, chunk) {
1602 if pos > last {
1603 out.extend_from_slice(&chunk[last..pos]);
1604 }
1605 last = pos + 1;
1606 }
1607 } else {
1608 for pos in memchr::memchr2_iter(c0, c1, chunk) {
1609 if pos > last {
1610 out.extend_from_slice(&chunk[last..pos]);
1611 }
1612 last = pos + 1;
1613 }
1614 }
1615 if last < chunk.len() {
1616 out.extend_from_slice(&chunk[last..]);
1617 }
1618 out
1619 })
1620 .collect();
1621
1622 let slices: Vec<std::io::IoSlice> = results
1624 .iter()
1625 .filter(|r| !r.is_empty())
1626 .map(|r| std::io::IoSlice::new(r))
1627 .collect();
1628 return write_ioslices(writer, &slices);
1629 }
1630
1631 if data.len() <= SINGLE_WRITE_LIMIT {
1633 let mut outbuf = Vec::with_capacity(data.len());
1634 let mut last = 0;
1635 if is_three {
1636 for pos in memchr::memchr3_iter(c0, c1, c2, data) {
1637 if pos > last {
1638 outbuf.extend_from_slice(&data[last..pos]);
1639 }
1640 last = pos + 1;
1641 }
1642 } else {
1643 for pos in memchr::memchr2_iter(c0, c1, data) {
1644 if pos > last {
1645 outbuf.extend_from_slice(&data[last..pos]);
1646 }
1647 last = pos + 1;
1648 }
1649 }
1650 if last < data.len() {
1651 outbuf.extend_from_slice(&data[last..]);
1652 }
1653 return writer.write_all(&outbuf);
1654 }
1655
1656 let buf_size = data.len().min(BUF_SIZE);
1658 let mut outbuf = vec![0u8; buf_size];
1659
1660 for chunk in data.chunks(buf_size) {
1661 let mut wp = 0;
1662 let mut last = 0;
1663
1664 if is_three {
1667 for pos in memchr::memchr3_iter(c0, c1, c2, chunk) {
1668 if pos > last {
1669 let run = pos - last;
1670 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1671 wp += run;
1672 }
1673 last = pos + 1;
1674 }
1675 } else {
1676 for pos in memchr::memchr2_iter(c0, c1, chunk) {
1677 if pos > last {
1678 let run = pos - last;
1679 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1680 wp += run;
1681 }
1682 last = pos + 1;
1683 }
1684 }
1685
1686 if last < chunk.len() {
1687 let run = chunk.len() - last;
1688 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1689 wp += run;
1690 }
1691 writer.write_all(&outbuf[..wp])?;
1692 }
1693 Ok(())
1694}
1695
1696pub fn delete_squeeze_mmap(
1701 delete_chars: &[u8],
1702 squeeze_chars: &[u8],
1703 data: &[u8],
1704 writer: &mut impl Write,
1705) -> io::Result<()> {
1706 let delete_set = build_member_set(delete_chars);
1707 let squeeze_set = build_member_set(squeeze_chars);
1708
1709 if data.len() <= SINGLE_WRITE_LIMIT {
1711 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
1712 let mut last_squeezed: u16 = 256;
1713 unsafe {
1714 outbuf.set_len(data.len());
1715 let outp: *mut u8 = outbuf.as_mut_ptr();
1716 let inp = data.as_ptr();
1717 let len = data.len();
1718 let mut out_pos = 0;
1719 let mut i = 0;
1720 while i < len {
1721 let b = *inp.add(i);
1722 if is_member(&delete_set, b) {
1723 i += 1;
1724 continue;
1725 }
1726 if is_member(&squeeze_set, b) {
1727 if last_squeezed == b as u16 {
1728 i += 1;
1729 continue;
1730 }
1731 last_squeezed = b as u16;
1732 } else {
1733 last_squeezed = 256;
1734 }
1735 *outp.add(out_pos) = b;
1736 out_pos += 1;
1737 i += 1;
1738 }
1739 outbuf.set_len(out_pos);
1740 }
1741 return writer.write_all(&outbuf);
1742 }
1743
1744 let buf_size = data.len().min(BUF_SIZE);
1746 let mut outbuf = vec![0u8; buf_size];
1747 let mut last_squeezed: u16 = 256;
1748
1749 for chunk in data.chunks(buf_size) {
1750 let mut out_pos = 0;
1751 for &b in chunk {
1752 if is_member(&delete_set, b) {
1753 continue;
1754 }
1755 if is_member(&squeeze_set, b) {
1756 if last_squeezed == b as u16 {
1757 continue;
1758 }
1759 last_squeezed = b as u16;
1760 } else {
1761 last_squeezed = 256;
1762 }
1763 unsafe {
1764 *outbuf.get_unchecked_mut(out_pos) = b;
1765 }
1766 out_pos += 1;
1767 }
1768 writer.write_all(&outbuf[..out_pos])?;
1769 }
1770 Ok(())
1771}
1772
1773pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1779 if squeeze_chars.len() == 1 {
1780 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1781 }
1782 if squeeze_chars.len() == 2 {
1783 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1784 }
1785 if squeeze_chars.len() == 3 {
1786 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1787 }
1788
1789 let member = build_member_set(squeeze_chars);
1790
1791 if data.len() >= PARALLEL_THRESHOLD {
1793 let n_threads = rayon::current_num_threads().max(1);
1794 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1795
1796 let results: Vec<Vec<u8>> = data
1797 .par_chunks(chunk_size)
1798 .map(|chunk| squeeze_chunk_bitset(chunk, &member))
1799 .collect();
1800
1801 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
1806 for (idx, result) in results.iter().enumerate() {
1807 if result.is_empty() {
1808 continue;
1809 }
1810 if idx > 0 {
1811 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
1813 if is_member(&member, prev_last) {
1814 let skip = result.iter().take_while(|&&b| b == prev_last).count();
1816 if skip < result.len() {
1817 slices.push(std::io::IoSlice::new(&result[skip..]));
1818 }
1819 continue;
1820 }
1821 }
1822 }
1823 slices.push(std::io::IoSlice::new(result));
1824 }
1825 return write_ioslices(writer, &slices);
1826 }
1827
1828 if data.len() <= SINGLE_WRITE_LIMIT {
1830 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
1831 let mut last_squeezed: u16 = 256;
1832 let len = data.len();
1833 let mut wp = 0;
1834 let mut i = 0;
1835
1836 unsafe {
1837 outbuf.set_len(data.len());
1838 let inp = data.as_ptr();
1839 let outp: *mut u8 = outbuf.as_mut_ptr();
1840
1841 while i < len {
1842 let b = *inp.add(i);
1843 if is_member(&member, b) {
1844 if last_squeezed != b as u16 {
1845 *outp.add(wp) = b;
1846 wp += 1;
1847 last_squeezed = b as u16;
1848 }
1849 i += 1;
1850 while i < len && *inp.add(i) == b {
1851 i += 1;
1852 }
1853 } else {
1854 last_squeezed = 256;
1855 *outp.add(wp) = b;
1856 wp += 1;
1857 i += 1;
1858 }
1859 }
1860 outbuf.set_len(wp);
1861 }
1862 return writer.write_all(&outbuf);
1863 }
1864
1865 let buf_size = data.len().min(BUF_SIZE);
1867 let mut outbuf = vec![0u8; buf_size];
1868 let mut last_squeezed: u16 = 256;
1869
1870 for chunk in data.chunks(buf_size) {
1871 let len = chunk.len();
1872 let mut wp = 0;
1873 let mut i = 0;
1874
1875 unsafe {
1876 let inp = chunk.as_ptr();
1877 let outp = outbuf.as_mut_ptr();
1878
1879 while i < len {
1880 let b = *inp.add(i);
1881 if is_member(&member, b) {
1882 if last_squeezed != b as u16 {
1883 *outp.add(wp) = b;
1884 wp += 1;
1885 last_squeezed = b as u16;
1886 }
1887 i += 1;
1888 while i < len && *inp.add(i) == b {
1889 i += 1;
1890 }
1891 } else {
1892 last_squeezed = 256;
1893 *outp.add(wp) = b;
1894 wp += 1;
1895 i += 1;
1896 }
1897 }
1898 }
1899 writer.write_all(&outbuf[..wp])?;
1900 }
1901 Ok(())
1902}
1903
1904fn squeeze_chunk_bitset(chunk: &[u8], member: &[u8; 32]) -> Vec<u8> {
1906 let len = chunk.len();
1907 let mut out = Vec::with_capacity(len);
1908 let mut last_squeezed: u16 = 256;
1909 let mut i = 0;
1910
1911 unsafe {
1912 out.set_len(len);
1913 let inp = chunk.as_ptr();
1914 let outp: *mut u8 = out.as_mut_ptr();
1915 let mut wp = 0;
1916
1917 while i < len {
1918 let b = *inp.add(i);
1919 if is_member(member, b) {
1920 if last_squeezed != b as u16 {
1921 *outp.add(wp) = b;
1922 wp += 1;
1923 last_squeezed = b as u16;
1924 }
1925 i += 1;
1926 while i < len && *inp.add(i) == b {
1927 i += 1;
1928 }
1929 } else {
1930 last_squeezed = 256;
1931 *outp.add(wp) = b;
1932 wp += 1;
1933 i += 1;
1934 }
1935 }
1936 out.set_len(wp);
1937 }
1938 out
1939}
1940
1941fn squeeze_multi_mmap<const N: usize>(
1942 chars: &[u8],
1943 data: &[u8],
1944 writer: &mut impl Write,
1945) -> io::Result<()> {
1946 if data.len() >= PARALLEL_THRESHOLD {
1948 let member = build_member_set(chars);
1949 let n_threads = rayon::current_num_threads().max(1);
1950 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1951
1952 let results: Vec<Vec<u8>> = data
1953 .par_chunks(chunk_size)
1954 .map(|chunk| squeeze_chunk_bitset(chunk, &member))
1955 .collect();
1956
1957 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
1959 for (idx, result) in results.iter().enumerate() {
1960 if result.is_empty() {
1961 continue;
1962 }
1963 if idx > 0 {
1964 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
1965 if is_member(&member, prev_last) {
1966 let skip = result.iter().take_while(|&&b| b == prev_last).count();
1967 if skip < result.len() {
1968 slices.push(std::io::IoSlice::new(&result[skip..]));
1969 }
1970 continue;
1971 }
1972 }
1973 }
1974 slices.push(std::io::IoSlice::new(result));
1975 }
1976 return write_ioslices(writer, &slices);
1977 }
1978
1979 let buf_size = data.len().min(BUF_SIZE);
1980 let mut outbuf = vec![0u8; buf_size];
1981 let mut wp = 0;
1982 let mut last_squeezed: u16 = 256;
1983 let mut cursor = 0;
1984
1985 macro_rules! find_next {
1986 ($data:expr) => {
1987 if N == 2 {
1988 memchr::memchr2(chars[0], chars[1], $data)
1989 } else {
1990 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1991 }
1992 };
1993 }
1994
1995 macro_rules! flush_and_copy {
1996 ($src:expr, $len:expr) => {
1997 if wp + $len > buf_size {
1998 writer.write_all(&outbuf[..wp])?;
1999 wp = 0;
2000 }
2001 if $len > buf_size {
2002 writer.write_all($src)?;
2003 } else {
2004 outbuf[wp..wp + $len].copy_from_slice($src);
2005 wp += $len;
2006 }
2007 };
2008 }
2009
2010 while cursor < data.len() {
2011 match find_next!(&data[cursor..]) {
2012 Some(offset) => {
2013 let pos = cursor + offset;
2014 let b = data[pos];
2015 if pos > cursor {
2016 let span = pos - cursor;
2017 flush_and_copy!(&data[cursor..pos], span);
2018 last_squeezed = 256;
2019 }
2020 if last_squeezed != b as u16 {
2021 if wp >= buf_size {
2022 writer.write_all(&outbuf[..wp])?;
2023 wp = 0;
2024 }
2025 outbuf[wp] = b;
2026 wp += 1;
2027 last_squeezed = b as u16;
2028 }
2029 let mut skip = pos + 1;
2030 while skip < data.len() && data[skip] == b {
2031 skip += 1;
2032 }
2033 cursor = skip;
2034 }
2035 None => {
2036 let remaining = data.len() - cursor;
2037 flush_and_copy!(&data[cursor..], remaining);
2038 break;
2039 }
2040 }
2041 }
2042 if wp > 0 {
2043 writer.write_all(&outbuf[..wp])?;
2044 }
2045 Ok(())
2046}
2047
2048fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
2049 if data.is_empty() {
2050 return Ok(());
2051 }
2052
2053 if memchr::memmem::find(data, &[ch, ch]).is_none() {
2054 return writer.write_all(data);
2055 }
2056
2057 if data.len() >= PARALLEL_THRESHOLD {
2059 let n_threads = rayon::current_num_threads().max(1);
2060 let chunk_size = (data.len() / n_threads).max(32 * 1024);
2061
2062 let results: Vec<Vec<u8>> = data
2063 .par_chunks(chunk_size)
2064 .map(|chunk| {
2065 let mut out = Vec::with_capacity(chunk.len());
2066 let mut cursor = 0;
2067 while cursor < chunk.len() {
2068 match memchr::memchr(ch, &chunk[cursor..]) {
2069 Some(offset) => {
2070 let pos = cursor + offset;
2071 if pos > cursor {
2072 out.extend_from_slice(&chunk[cursor..pos]);
2073 }
2074 out.push(ch);
2075 cursor = pos + 1;
2076 while cursor < chunk.len() && chunk[cursor] == ch {
2077 cursor += 1;
2078 }
2079 }
2080 None => {
2081 out.extend_from_slice(&chunk[cursor..]);
2082 break;
2083 }
2084 }
2085 }
2086 out
2087 })
2088 .collect();
2089
2090 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
2093 for (idx, result) in results.iter().enumerate() {
2094 if result.is_empty() {
2095 continue;
2096 }
2097 if idx > 0 {
2098 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
2099 if prev_last == ch {
2100 let skip = result.iter().take_while(|&&b| b == ch).count();
2102 if skip < result.len() {
2103 slices.push(std::io::IoSlice::new(&result[skip..]));
2104 }
2105 continue;
2106 }
2107 }
2108 }
2109 slices.push(std::io::IoSlice::new(result));
2110 }
2111 return write_ioslices(writer, &slices);
2112 }
2113
2114 let buf_size = data.len().min(BUF_SIZE);
2115 let mut outbuf = vec![0u8; buf_size];
2116 let len = data.len();
2117 let mut wp = 0;
2118 let mut cursor = 0;
2119
2120 while cursor < len {
2121 match memchr::memchr(ch, &data[cursor..]) {
2122 Some(offset) => {
2123 let pos = cursor + offset;
2124 let gap = pos - cursor;
2125 if gap > 0 {
2126 if wp + gap > buf_size {
2127 writer.write_all(&outbuf[..wp])?;
2128 wp = 0;
2129 }
2130 if gap > buf_size {
2131 writer.write_all(&data[cursor..pos])?;
2132 } else {
2133 outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
2134 wp += gap;
2135 }
2136 }
2137 if wp >= buf_size {
2138 writer.write_all(&outbuf[..wp])?;
2139 wp = 0;
2140 }
2141 outbuf[wp] = ch;
2142 wp += 1;
2143 cursor = pos + 1;
2144 while cursor < len && data[cursor] == ch {
2145 cursor += 1;
2146 }
2147 }
2148 None => {
2149 let remaining = len - cursor;
2150 if remaining > 0 {
2151 if wp + remaining > buf_size {
2152 writer.write_all(&outbuf[..wp])?;
2153 wp = 0;
2154 }
2155 if remaining > buf_size {
2156 writer.write_all(&data[cursor..])?;
2157 } else {
2158 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
2159 wp += remaining;
2160 }
2161 }
2162 break;
2163 }
2164 }
2165 }
2166
2167 if wp > 0 {
2168 writer.write_all(&outbuf[..wp])?;
2169 }
2170 Ok(())
2171}