1use std::io::{self, Read, Write};
2
3use rayon::prelude::*;
4
5const MAX_IOV: usize = 1024;
8
9const BUF_SIZE: usize = 4 * 1024 * 1024;
11
12const STREAM_BUF: usize = 1024 * 1024;
15
16const TRANSLATE_STREAM_BUF: usize = 4 * 1024 * 1024;
20
21const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
24
25#[inline]
28fn write_ioslices(writer: &mut impl Write, slices: &[std::io::IoSlice]) -> io::Result<()> {
29 if slices.is_empty() {
30 return Ok(());
31 }
32 for batch in slices.chunks(MAX_IOV) {
33 let total: usize = batch.iter().map(|s| s.len()).sum();
34 match writer.write_vectored(batch) {
35 Ok(n) if n >= total => continue,
36 Ok(mut written) => {
37 for slice in batch {
39 let slen = slice.len();
40 if written >= slen {
41 written -= slen;
42 continue;
43 }
44 if written > 0 {
45 writer.write_all(&slice[written..])?;
46 written = 0;
47 } else {
48 writer.write_all(slice)?;
49 }
50 }
51 }
52 Err(e) => return Err(e),
53 }
54 }
55 Ok(())
56}
57
58#[inline]
61#[allow(clippy::uninit_vec)]
62fn alloc_uninit_vec(len: usize) -> Vec<u8> {
63 let mut v = Vec::with_capacity(len);
64 unsafe {
66 v.set_len(len);
67 }
68 v
69}
70
71#[inline]
73fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
74 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
75 let last = set2.last().copied();
76 for (i, &from) in set1.iter().enumerate() {
77 table[from as usize] = if i < set2.len() {
78 set2[i]
79 } else {
80 last.unwrap_or(from)
81 };
82 }
83 table
84}
85
86#[inline]
88fn build_member_set(chars: &[u8]) -> [u8; 32] {
89 let mut set = [0u8; 32];
90 for &ch in chars {
91 set[ch as usize >> 3] |= 1 << (ch & 7);
92 }
93 set
94}
95
96#[inline(always)]
97fn is_member(set: &[u8; 32], ch: u8) -> bool {
98 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
99}
100
101#[inline(always)]
104fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
105 let len = data.len();
106 let ptr = data.as_mut_ptr();
107 let mut i = 0;
108 unsafe {
109 while i + 8 <= len {
110 *ptr.add(i) = *table.get_unchecked(*ptr.add(i) as usize);
111 *ptr.add(i + 1) = *table.get_unchecked(*ptr.add(i + 1) as usize);
112 *ptr.add(i + 2) = *table.get_unchecked(*ptr.add(i + 2) as usize);
113 *ptr.add(i + 3) = *table.get_unchecked(*ptr.add(i + 3) as usize);
114 *ptr.add(i + 4) = *table.get_unchecked(*ptr.add(i + 4) as usize);
115 *ptr.add(i + 5) = *table.get_unchecked(*ptr.add(i + 5) as usize);
116 *ptr.add(i + 6) = *table.get_unchecked(*ptr.add(i + 6) as usize);
117 *ptr.add(i + 7) = *table.get_unchecked(*ptr.add(i + 7) as usize);
118 i += 8;
119 }
120 while i < len {
121 *ptr.add(i) = *table.get_unchecked(*ptr.add(i) as usize);
122 i += 1;
123 }
124 }
125}
126
127#[inline(always)]
129fn translate_to(src: &[u8], dst: &mut [u8], table: &[u8; 256]) {
130 debug_assert!(dst.len() >= src.len());
131 unsafe {
132 let sp = src.as_ptr();
133 let dp = dst.as_mut_ptr();
134 let len = src.len();
135 let mut i = 0;
136 while i + 8 <= len {
137 *dp.add(i) = *table.get_unchecked(*sp.add(i) as usize);
138 *dp.add(i + 1) = *table.get_unchecked(*sp.add(i + 1) as usize);
139 *dp.add(i + 2) = *table.get_unchecked(*sp.add(i + 2) as usize);
140 *dp.add(i + 3) = *table.get_unchecked(*sp.add(i + 3) as usize);
141 *dp.add(i + 4) = *table.get_unchecked(*sp.add(i + 4) as usize);
142 *dp.add(i + 5) = *table.get_unchecked(*sp.add(i + 5) as usize);
143 *dp.add(i + 6) = *table.get_unchecked(*sp.add(i + 6) as usize);
144 *dp.add(i + 7) = *table.get_unchecked(*sp.add(i + 7) as usize);
145 i += 8;
146 }
147 while i < len {
148 *dp.add(i) = *table.get_unchecked(*sp.add(i) as usize);
149 i += 1;
150 }
151 }
152}
153
154#[inline]
162fn detect_range_offset(table: &[u8; 256]) -> Option<(u8, u8, i8)> {
163 let mut lo: Option<u8> = None;
164 let mut hi = 0u8;
165 let mut offset = 0i16;
166
167 for i in 0..256 {
168 if table[i] != i as u8 {
169 let diff = table[i] as i16 - i as i16;
170 match lo {
171 None => {
172 lo = Some(i as u8);
173 hi = i as u8;
174 offset = diff;
175 }
176 Some(_) => {
177 if diff != offset || i as u8 != hi.wrapping_add(1) {
178 return None;
179 }
180 hi = i as u8;
181 }
182 }
183 }
184 }
185
186 lo.map(|l| (l, hi, offset as i8))
187}
188
189#[cfg(target_arch = "x86_64")]
193fn translate_range_simd(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
194 if is_x86_feature_detected!("avx2") {
195 unsafe { translate_range_avx2(src, dst, lo, hi, offset) };
196 } else {
197 unsafe { translate_range_sse2(src, dst, lo, hi, offset) };
198 }
199}
200
201#[cfg(target_arch = "x86_64")]
202#[target_feature(enable = "avx2")]
203unsafe fn translate_range_avx2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
204 use std::arch::x86_64::*;
205
206 unsafe {
207 let range = hi - lo;
208 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
213 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
214 let offset_v = _mm256_set1_epi8(offset);
215 let zero = _mm256_setzero_si256();
216
217 let len = src.len();
218 let mut i = 0;
219
220 while i + 32 <= len {
221 let input = _mm256_loadu_si256(src.as_ptr().add(i) as *const _);
222 let biased = _mm256_add_epi8(input, bias_v);
223 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
225 let mask = _mm256_cmpeq_epi8(gt, zero);
227 let offset_masked = _mm256_and_si256(mask, offset_v);
228 let result = _mm256_add_epi8(input, offset_masked);
229 _mm256_storeu_si256(dst.as_mut_ptr().add(i) as *mut _, result);
230 i += 32;
231 }
232
233 if i + 16 <= len {
235 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
236 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
237 let offset_v128 = _mm_set1_epi8(offset);
238 let zero128 = _mm_setzero_si128();
239
240 let input = _mm_loadu_si128(src.as_ptr().add(i) as *const _);
241 let biased = _mm_add_epi8(input, bias_v128);
242 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
243 let mask = _mm_cmpeq_epi8(gt, zero128);
244 let offset_masked = _mm_and_si128(mask, offset_v128);
245 let result = _mm_add_epi8(input, offset_masked);
246 _mm_storeu_si128(dst.as_mut_ptr().add(i) as *mut _, result);
247 i += 16;
248 }
249
250 while i < len {
252 let b = *src.get_unchecked(i);
253 *dst.get_unchecked_mut(i) = if b >= lo && b <= hi {
254 b.wrapping_add(offset as u8)
255 } else {
256 b
257 };
258 i += 1;
259 }
260 }
261}
262
263#[cfg(target_arch = "x86_64")]
264#[target_feature(enable = "sse2")]
265unsafe fn translate_range_sse2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
266 use std::arch::x86_64::*;
267
268 unsafe {
269 let range = hi - lo;
270 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
271 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
272 let offset_v = _mm_set1_epi8(offset);
273 let zero = _mm_setzero_si128();
274
275 let len = src.len();
276 let mut i = 0;
277
278 while i + 16 <= len {
279 let input = _mm_loadu_si128(src.as_ptr().add(i) as *const _);
280 let biased = _mm_add_epi8(input, bias_v);
281 let gt = _mm_cmpgt_epi8(biased, threshold_v);
282 let mask = _mm_cmpeq_epi8(gt, zero);
283 let offset_masked = _mm_and_si128(mask, offset_v);
284 let result = _mm_add_epi8(input, offset_masked);
285 _mm_storeu_si128(dst.as_mut_ptr().add(i) as *mut _, result);
286 i += 16;
287 }
288
289 while i < len {
290 let b = *src.get_unchecked(i);
291 *dst.get_unchecked_mut(i) = if b >= lo && b <= hi {
292 b.wrapping_add(offset as u8)
293 } else {
294 b
295 };
296 i += 1;
297 }
298 }
299}
300
301#[cfg(not(target_arch = "x86_64"))]
303fn translate_range_simd(src: &[u8], dst: &mut [u8], lo: u8, hi: u8, offset: i8) {
304 for (i, &b) in src.iter().enumerate() {
305 dst[i] = if b >= lo && b <= hi {
306 b.wrapping_add(offset as u8)
307 } else {
308 b
309 };
310 }
311}
312
313#[cfg(target_arch = "x86_64")]
321fn translate_range_simd_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
322 if is_x86_feature_detected!("avx2") {
323 unsafe { translate_range_avx2_inplace(data, lo, hi, offset) };
324 } else {
325 unsafe { translate_range_sse2_inplace(data, lo, hi, offset) };
326 }
327}
328
329#[cfg(target_arch = "x86_64")]
330#[target_feature(enable = "avx2")]
331unsafe fn translate_range_avx2_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
332 use std::arch::x86_64::*;
333
334 unsafe {
335 let range = hi - lo;
336 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
337 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
338 let offset_v = _mm256_set1_epi8(offset);
339 let zero = _mm256_setzero_si256();
340
341 let len = data.len();
342 let ptr = data.as_mut_ptr();
343 let mut i = 0;
344
345 while i + 32 <= len {
346 let input = _mm256_loadu_si256(ptr.add(i) as *const _);
347 let biased = _mm256_add_epi8(input, bias_v);
348 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
349 let mask = _mm256_cmpeq_epi8(gt, zero);
350 let offset_masked = _mm256_and_si256(mask, offset_v);
351 let result = _mm256_add_epi8(input, offset_masked);
352 _mm256_storeu_si256(ptr.add(i) as *mut _, result);
353 i += 32;
354 }
355
356 if i + 16 <= len {
357 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
358 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
359 let offset_v128 = _mm_set1_epi8(offset);
360 let zero128 = _mm_setzero_si128();
361
362 let input = _mm_loadu_si128(ptr.add(i) as *const _);
363 let biased = _mm_add_epi8(input, bias_v128);
364 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
365 let mask = _mm_cmpeq_epi8(gt, zero128);
366 let offset_masked = _mm_and_si128(mask, offset_v128);
367 let result = _mm_add_epi8(input, offset_masked);
368 _mm_storeu_si128(ptr.add(i) as *mut _, result);
369 i += 16;
370 }
371
372 while i < len {
373 let b = *ptr.add(i);
374 *ptr.add(i) = if b >= lo && b <= hi {
375 b.wrapping_add(offset as u8)
376 } else {
377 b
378 };
379 i += 1;
380 }
381 }
382}
383
384#[cfg(target_arch = "x86_64")]
385#[target_feature(enable = "sse2")]
386unsafe fn translate_range_sse2_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
387 use std::arch::x86_64::*;
388
389 unsafe {
390 let range = hi - lo;
391 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
392 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
393 let offset_v = _mm_set1_epi8(offset);
394 let zero = _mm_setzero_si128();
395
396 let len = data.len();
397 let ptr = data.as_mut_ptr();
398 let mut i = 0;
399
400 while i + 16 <= len {
401 let input = _mm_loadu_si128(ptr.add(i) as *const _);
402 let biased = _mm_add_epi8(input, bias_v);
403 let gt = _mm_cmpgt_epi8(biased, threshold_v);
404 let mask = _mm_cmpeq_epi8(gt, zero);
405 let offset_masked = _mm_and_si128(mask, offset_v);
406 let result = _mm_add_epi8(input, offset_masked);
407 _mm_storeu_si128(ptr.add(i) as *mut _, result);
408 i += 16;
409 }
410
411 while i < len {
412 let b = *ptr.add(i);
413 *ptr.add(i) = if b >= lo && b <= hi {
414 b.wrapping_add(offset as u8)
415 } else {
416 b
417 };
418 i += 1;
419 }
420 }
421}
422
423#[cfg(not(target_arch = "x86_64"))]
424fn translate_range_simd_inplace(data: &mut [u8], lo: u8, hi: u8, offset: i8) {
425 for b in data.iter_mut() {
426 if *b >= lo && *b <= hi {
427 *b = b.wrapping_add(offset as u8);
428 }
429 }
430}
431
432#[inline]
442fn detect_delete_range(chars: &[u8]) -> Option<(u8, u8)> {
443 if chars.is_empty() {
444 return None;
445 }
446 let mut lo = chars[0];
447 let mut hi = chars[0];
448 for &c in &chars[1..] {
449 if c < lo {
450 lo = c;
451 }
452 if c > hi {
453 hi = c;
454 }
455 }
456 if (hi - lo + 1) as usize == chars.len() {
458 Some((lo, hi))
459 } else {
460 None
461 }
462}
463
464#[cfg(target_arch = "x86_64")]
468fn delete_range_chunk(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
469 if is_x86_feature_detected!("avx2") {
470 unsafe { delete_range_avx2(src, dst, lo, hi) }
471 } else {
472 unsafe { delete_range_sse2(src, dst, lo, hi) }
473 }
474}
475
476#[cfg(target_arch = "x86_64")]
477#[target_feature(enable = "avx2")]
478unsafe fn delete_range_avx2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
479 use std::arch::x86_64::*;
480
481 unsafe {
482 let range = hi - lo;
483 let bias_v = _mm256_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
484 let threshold_v = _mm256_set1_epi8(0x80u8.wrapping_add(range) as i8);
485 let zero = _mm256_setzero_si256();
486
487 let len = src.len();
488 let sp = src.as_ptr();
489 let dp = dst.as_mut_ptr();
490 let mut ri = 0;
491 let mut wp = 0;
492
493 while ri + 32 <= len {
494 let input = _mm256_loadu_si256(sp.add(ri) as *const _);
495 let biased = _mm256_add_epi8(input, bias_v);
496 let gt = _mm256_cmpgt_epi8(biased, threshold_v);
498 let in_range = _mm256_cmpeq_epi8(gt, zero);
500 let keep_mask = !(_mm256_movemask_epi8(in_range) as u32);
502
503 let mut mask = keep_mask;
505 while mask != 0 {
506 let bit = mask.trailing_zeros() as usize;
507 *dp.add(wp) = *sp.add(ri + bit);
508 wp += 1;
509 mask &= mask - 1; }
511 ri += 32;
512 }
513
514 if ri + 16 <= len {
516 let bias_v128 = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
517 let threshold_v128 = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
518 let zero128 = _mm_setzero_si128();
519
520 let input = _mm_loadu_si128(sp.add(ri) as *const _);
521 let biased = _mm_add_epi8(input, bias_v128);
522 let gt = _mm_cmpgt_epi8(biased, threshold_v128);
523 let in_range = _mm_cmpeq_epi8(gt, zero128);
524 let keep_mask = !(_mm_movemask_epi8(in_range) as u32) & 0xFFFF;
525
526 let mut mask = keep_mask;
527 while mask != 0 {
528 let bit = mask.trailing_zeros() as usize;
529 *dp.add(wp) = *sp.add(ri + bit);
530 wp += 1;
531 mask &= mask - 1;
532 }
533 ri += 16;
534 }
535
536 while ri < len {
538 let b = *sp.add(ri);
539 if b < lo || b > hi {
540 *dp.add(wp) = b;
541 wp += 1;
542 }
543 ri += 1;
544 }
545
546 wp
547 }
548}
549
550#[cfg(target_arch = "x86_64")]
551#[target_feature(enable = "sse2")]
552unsafe fn delete_range_sse2(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
553 use std::arch::x86_64::*;
554
555 unsafe {
556 let range = hi - lo;
557 let bias_v = _mm_set1_epi8(0x80u8.wrapping_sub(lo) as i8);
558 let threshold_v = _mm_set1_epi8(0x80u8.wrapping_add(range) as i8);
559 let zero = _mm_setzero_si128();
560
561 let len = src.len();
562 let sp = src.as_ptr();
563 let dp = dst.as_mut_ptr();
564 let mut ri = 0;
565 let mut wp = 0;
566
567 while ri + 16 <= len {
568 let input = _mm_loadu_si128(sp.add(ri) as *const _);
569 let biased = _mm_add_epi8(input, bias_v);
570 let gt = _mm_cmpgt_epi8(biased, threshold_v);
571 let in_range = _mm_cmpeq_epi8(gt, zero);
572 let keep_mask = !(_mm_movemask_epi8(in_range) as u32) & 0xFFFF;
573
574 let mut mask = keep_mask;
575 while mask != 0 {
576 let bit = mask.trailing_zeros() as usize;
577 *dp.add(wp) = *sp.add(ri + bit);
578 wp += 1;
579 mask &= mask - 1;
580 }
581 ri += 16;
582 }
583
584 while ri < len {
585 let b = *sp.add(ri);
586 if b < lo || b > hi {
587 *dp.add(wp) = b;
588 wp += 1;
589 }
590 ri += 1;
591 }
592
593 wp
594 }
595}
596
597#[cfg(not(target_arch = "x86_64"))]
599fn delete_range_chunk(src: &[u8], dst: &mut [u8], lo: u8, hi: u8) -> usize {
600 let mut wp = 0;
601 for &b in src {
602 if b < lo || b > hi {
603 dst[wp] = b;
604 wp += 1;
605 }
606 }
607 wp
608}
609
610fn delete_range_streaming(
612 lo: u8,
613 hi: u8,
614 reader: &mut impl Read,
615 writer: &mut impl Write,
616) -> io::Result<()> {
617 let mut src = vec![0u8; STREAM_BUF];
618 let mut dst = alloc_uninit_vec(STREAM_BUF);
619 loop {
620 let n = read_full(reader, &mut src)?;
621 if n == 0 {
622 break;
623 }
624 let wp = delete_range_chunk(&src[..n], &mut dst, lo, hi);
625 if wp > 0 {
626 writer.write_all(&dst[..wp])?;
627 }
628 }
629 Ok(())
630}
631
632pub fn translate(
637 set1: &[u8],
638 set2: &[u8],
639 reader: &mut impl Read,
640 writer: &mut impl Write,
641) -> io::Result<()> {
642 let table = build_translate_table(set1, set2);
643
644 let is_identity = table.iter().enumerate().all(|(i, &v)| v == i as u8);
646 if is_identity {
647 return passthrough_stream(reader, writer);
648 }
649
650 if let Some((lo, hi, offset)) = detect_range_offset(&table) {
652 return translate_range_stream(lo, hi, offset, reader, writer);
653 }
654
655 let mut buf = vec![0u8; TRANSLATE_STREAM_BUF];
663 loop {
664 let n = read_full(reader, &mut buf)?;
665 if n == 0 {
666 break;
667 }
668 translate_inplace(&mut buf[..n], &table);
669 writer.write_all(&buf[..n])?;
670 }
671 Ok(())
672}
673
674fn translate_range_stream(
677 lo: u8,
678 hi: u8,
679 offset: i8,
680 reader: &mut impl Read,
681 writer: &mut impl Write,
682) -> io::Result<()> {
683 let mut buf = vec![0u8; TRANSLATE_STREAM_BUF];
684 loop {
685 let n = read_full(reader, &mut buf)?;
686 if n == 0 {
687 break;
688 }
689 translate_range_simd_inplace(&mut buf[..n], lo, hi, offset);
690 writer.write_all(&buf[..n])?;
691 }
692 Ok(())
693}
694
695fn passthrough_stream(reader: &mut impl Read, writer: &mut impl Write) -> io::Result<()> {
698 let mut buf = vec![0u8; TRANSLATE_STREAM_BUF];
699 loop {
700 let n = read_full(reader, &mut buf)?;
701 if n == 0 {
702 break;
703 }
704 writer.write_all(&buf[..n])?;
705 }
706 Ok(())
707}
708
709#[inline]
711fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
712 let mut total = 0;
713 while total < buf.len() {
714 match reader.read(&mut buf[total..]) {
715 Ok(0) => break,
716 Ok(n) => total += n,
717 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
718 Err(e) => return Err(e),
719 }
720 }
721 Ok(total)
722}
723
724pub fn translate_squeeze(
725 set1: &[u8],
726 set2: &[u8],
727 reader: &mut impl Read,
728 writer: &mut impl Write,
729) -> io::Result<()> {
730 let table = build_translate_table(set1, set2);
731 let squeeze_set = build_member_set(set2);
732
733 let range_info = detect_range_offset(&table);
739
740 let mut buf = vec![0u8; STREAM_BUF];
741 let mut last_squeezed: u16 = 256;
742
743 loop {
744 let n = read_full(reader, &mut buf)?;
745 if n == 0 {
746 break;
747 }
748 if let Some((lo, hi, offset)) = range_info {
750 translate_range_simd_inplace(&mut buf[..n], lo, hi, offset);
751 } else {
752 translate_inplace(&mut buf[..n], &table);
753 }
754 let mut wp = 0;
756 unsafe {
757 let ptr = buf.as_mut_ptr();
758 for i in 0..n {
759 let b = *ptr.add(i);
760 if is_member(&squeeze_set, b) {
761 if last_squeezed == b as u16 {
762 continue;
763 }
764 last_squeezed = b as u16;
765 } else {
766 last_squeezed = 256;
767 }
768 *ptr.add(wp) = b;
769 wp += 1;
770 }
771 }
772 writer.write_all(&buf[..wp])?;
773 }
774 Ok(())
775}
776
777pub fn delete(
778 delete_chars: &[u8],
779 reader: &mut impl Read,
780 writer: &mut impl Write,
781) -> io::Result<()> {
782 if delete_chars.len() == 1 {
783 return delete_single_streaming(delete_chars[0], reader, writer);
784 }
785 if delete_chars.len() <= 3 {
786 return delete_multi_streaming(delete_chars, reader, writer);
787 }
788
789 if let Some((lo, hi)) = detect_delete_range(delete_chars) {
793 return delete_range_streaming(lo, hi, reader, writer);
794 }
795
796 let member = build_member_set(delete_chars);
797 let mut buf = vec![0u8; STREAM_BUF];
798
799 loop {
800 let n = read_full(reader, &mut buf)?;
801 if n == 0 {
802 break;
803 }
804 let mut wp = 0;
805 unsafe {
806 let ptr = buf.as_mut_ptr();
807 let mut i = 0;
808 while i + 8 <= n {
809 let b0 = *ptr.add(i);
810 let b1 = *ptr.add(i + 1);
811 let b2 = *ptr.add(i + 2);
812 let b3 = *ptr.add(i + 3);
813 let b4 = *ptr.add(i + 4);
814 let b5 = *ptr.add(i + 5);
815 let b6 = *ptr.add(i + 6);
816 let b7 = *ptr.add(i + 7);
817
818 if !is_member(&member, b0) {
819 *ptr.add(wp) = b0;
820 wp += 1;
821 }
822 if !is_member(&member, b1) {
823 *ptr.add(wp) = b1;
824 wp += 1;
825 }
826 if !is_member(&member, b2) {
827 *ptr.add(wp) = b2;
828 wp += 1;
829 }
830 if !is_member(&member, b3) {
831 *ptr.add(wp) = b3;
832 wp += 1;
833 }
834 if !is_member(&member, b4) {
835 *ptr.add(wp) = b4;
836 wp += 1;
837 }
838 if !is_member(&member, b5) {
839 *ptr.add(wp) = b5;
840 wp += 1;
841 }
842 if !is_member(&member, b6) {
843 *ptr.add(wp) = b6;
844 wp += 1;
845 }
846 if !is_member(&member, b7) {
847 *ptr.add(wp) = b7;
848 wp += 1;
849 }
850 i += 8;
851 }
852 while i < n {
853 let b = *ptr.add(i);
854 if !is_member(&member, b) {
855 *ptr.add(wp) = b;
856 wp += 1;
857 }
858 i += 1;
859 }
860 }
861 writer.write_all(&buf[..wp])?;
862 }
863 Ok(())
864}
865
866fn delete_single_streaming(
867 ch: u8,
868 reader: &mut impl Read,
869 writer: &mut impl Write,
870) -> io::Result<()> {
871 let mut buf = vec![0u8; STREAM_BUF];
872 loop {
873 let n = match reader.read(&mut buf) {
874 Ok(0) => break,
875 Ok(n) => n,
876 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
877 Err(e) => return Err(e),
878 };
879 let mut wp = 0;
880 let mut i = 0;
881 while i < n {
882 match memchr::memchr(ch, &buf[i..n]) {
883 Some(offset) => {
884 if offset > 0 {
885 if wp != i {
886 unsafe {
887 std::ptr::copy(
888 buf.as_ptr().add(i),
889 buf.as_mut_ptr().add(wp),
890 offset,
891 );
892 }
893 }
894 wp += offset;
895 }
896 i += offset + 1;
897 }
898 None => {
899 let run_len = n - i;
900 if run_len > 0 {
901 if wp != i {
902 unsafe {
903 std::ptr::copy(
904 buf.as_ptr().add(i),
905 buf.as_mut_ptr().add(wp),
906 run_len,
907 );
908 }
909 }
910 wp += run_len;
911 }
912 break;
913 }
914 }
915 }
916 writer.write_all(&buf[..wp])?;
917 }
918 Ok(())
919}
920
921fn delete_multi_streaming(
922 chars: &[u8],
923 reader: &mut impl Read,
924 writer: &mut impl Write,
925) -> io::Result<()> {
926 let mut buf = vec![0u8; STREAM_BUF];
927 loop {
928 let n = match reader.read(&mut buf) {
929 Ok(0) => break,
930 Ok(n) => n,
931 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
932 Err(e) => return Err(e),
933 };
934 let mut wp = 0;
935 let mut i = 0;
936 while i < n {
937 let found = if chars.len() == 2 {
938 memchr::memchr2(chars[0], chars[1], &buf[i..n])
939 } else {
940 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
941 };
942 match found {
943 Some(offset) => {
944 if offset > 0 {
945 if wp != i {
946 unsafe {
947 std::ptr::copy(
948 buf.as_ptr().add(i),
949 buf.as_mut_ptr().add(wp),
950 offset,
951 );
952 }
953 }
954 wp += offset;
955 }
956 i += offset + 1;
957 }
958 None => {
959 let run_len = n - i;
960 if run_len > 0 {
961 if wp != i {
962 unsafe {
963 std::ptr::copy(
964 buf.as_ptr().add(i),
965 buf.as_mut_ptr().add(wp),
966 run_len,
967 );
968 }
969 }
970 wp += run_len;
971 }
972 break;
973 }
974 }
975 }
976 writer.write_all(&buf[..wp])?;
977 }
978 Ok(())
979}
980
981pub fn delete_squeeze(
982 delete_chars: &[u8],
983 squeeze_chars: &[u8],
984 reader: &mut impl Read,
985 writer: &mut impl Write,
986) -> io::Result<()> {
987 let delete_set = build_member_set(delete_chars);
988 let squeeze_set = build_member_set(squeeze_chars);
989 let mut buf = vec![0u8; STREAM_BUF];
990 let mut last_squeezed: u16 = 256;
991
992 loop {
993 let n = read_full(reader, &mut buf)?;
994 if n == 0 {
995 break;
996 }
997 let mut wp = 0;
998 unsafe {
999 let ptr = buf.as_mut_ptr();
1000 for i in 0..n {
1001 let b = *ptr.add(i);
1002 if is_member(&delete_set, b) {
1003 continue;
1004 }
1005 if is_member(&squeeze_set, b) {
1006 if last_squeezed == b as u16 {
1007 continue;
1008 }
1009 last_squeezed = b as u16;
1010 } else {
1011 last_squeezed = 256;
1012 }
1013 *ptr.add(wp) = b;
1014 wp += 1;
1015 }
1016 }
1017 writer.write_all(&buf[..wp])?;
1018 }
1019 Ok(())
1020}
1021
1022pub fn squeeze(
1023 squeeze_chars: &[u8],
1024 reader: &mut impl Read,
1025 writer: &mut impl Write,
1026) -> io::Result<()> {
1027 if squeeze_chars.len() == 1 {
1028 return squeeze_single_stream(squeeze_chars[0], reader, writer);
1029 }
1030
1031 let member = build_member_set(squeeze_chars);
1032 let mut buf = vec![0u8; STREAM_BUF];
1033 let mut last_squeezed: u16 = 256;
1034
1035 loop {
1036 let n = read_full(reader, &mut buf)?;
1037 if n == 0 {
1038 break;
1039 }
1040 let mut wp = 0;
1041 unsafe {
1042 let ptr = buf.as_mut_ptr();
1043 for i in 0..n {
1044 let b = *ptr.add(i);
1045 if is_member(&member, b) {
1046 if last_squeezed == b as u16 {
1047 continue;
1048 }
1049 last_squeezed = b as u16;
1050 } else {
1051 last_squeezed = 256;
1052 }
1053 *ptr.add(wp) = b;
1054 wp += 1;
1055 }
1056 }
1057 writer.write_all(&buf[..wp])?;
1058 }
1059 Ok(())
1060}
1061
1062fn squeeze_single_stream(
1063 ch: u8,
1064 reader: &mut impl Read,
1065 writer: &mut impl Write,
1066) -> io::Result<()> {
1067 let mut buf = vec![0u8; STREAM_BUF];
1068 let mut was_squeeze_char = false;
1069
1070 loop {
1071 let n = match reader.read(&mut buf) {
1072 Ok(0) => break,
1073 Ok(n) => n,
1074 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1075 Err(e) => return Err(e),
1076 };
1077
1078 let mut wp = 0;
1079 let mut i = 0;
1080
1081 while i < n {
1082 if was_squeeze_char && buf[i] == ch {
1083 i += 1;
1084 while i < n && buf[i] == ch {
1085 i += 1;
1086 }
1087 if i >= n {
1088 break;
1089 }
1090 }
1091
1092 match memchr::memchr(ch, &buf[i..n]) {
1093 Some(offset) => {
1094 let run_len = offset;
1095 if run_len > 0 {
1096 if wp != i {
1097 unsafe {
1098 std::ptr::copy(
1099 buf.as_ptr().add(i),
1100 buf.as_mut_ptr().add(wp),
1101 run_len,
1102 );
1103 }
1104 }
1105 wp += run_len;
1106 }
1107 i += run_len;
1108
1109 unsafe {
1110 *buf.as_mut_ptr().add(wp) = ch;
1111 }
1112 wp += 1;
1113 was_squeeze_char = true;
1114 i += 1;
1115 while i < n && buf[i] == ch {
1116 i += 1;
1117 }
1118 }
1119 None => {
1120 let run_len = n - i;
1121 if run_len > 0 {
1122 if wp != i {
1123 unsafe {
1124 std::ptr::copy(
1125 buf.as_ptr().add(i),
1126 buf.as_mut_ptr().add(wp),
1127 run_len,
1128 );
1129 }
1130 }
1131 wp += run_len;
1132 }
1133 was_squeeze_char = false;
1134 break;
1135 }
1136 }
1137 }
1138
1139 writer.write_all(&buf[..wp])?;
1140 }
1141 Ok(())
1142}
1143
1144const SINGLE_WRITE_LIMIT: usize = 16 * 1024 * 1024;
1152
1153pub fn translate_mmap(
1162 set1: &[u8],
1163 set2: &[u8],
1164 data: &[u8],
1165 writer: &mut impl Write,
1166) -> io::Result<()> {
1167 let table = build_translate_table(set1, set2);
1168
1169 let is_identity = table.iter().enumerate().all(|(i, &v)| v == i as u8);
1171 if is_identity {
1172 return writer.write_all(data);
1173 }
1174
1175 if let Some((lo, hi, offset)) = detect_range_offset(&table) {
1177 return translate_mmap_range(data, writer, lo, hi, offset);
1178 }
1179
1180 translate_mmap_table(data, writer, &table)
1182}
1183
1184fn translate_mmap_range(
1186 data: &[u8],
1187 writer: &mut impl Write,
1188 lo: u8,
1189 hi: u8,
1190 offset: i8,
1191) -> io::Result<()> {
1192 if data.len() >= PARALLEL_THRESHOLD {
1194 let mut buf = alloc_uninit_vec(data.len());
1195 let n_threads = rayon::current_num_threads().max(1);
1196 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1197
1198 data.par_chunks(chunk_size)
1200 .zip(buf.par_chunks_mut(chunk_size))
1201 .for_each(|(src_chunk, dst_chunk)| {
1202 translate_range_simd(src_chunk, &mut dst_chunk[..src_chunk.len()], lo, hi, offset);
1203 });
1204
1205 return writer.write_all(&buf);
1206 }
1207
1208 if data.len() <= SINGLE_WRITE_LIMIT {
1210 let mut buf = alloc_uninit_vec(data.len());
1211 translate_range_simd(data, &mut buf, lo, hi, offset);
1212 return writer.write_all(&buf);
1213 }
1214 let mut buf = alloc_uninit_vec(BUF_SIZE);
1216 for chunk in data.chunks(BUF_SIZE) {
1217 translate_range_simd(chunk, &mut buf[..chunk.len()], lo, hi, offset);
1218 writer.write_all(&buf[..chunk.len()])?;
1219 }
1220 Ok(())
1221}
1222
1223fn translate_mmap_table(data: &[u8], writer: &mut impl Write, table: &[u8; 256]) -> io::Result<()> {
1225 if data.len() >= PARALLEL_THRESHOLD {
1227 let mut buf = alloc_uninit_vec(data.len());
1228 let n_threads = rayon::current_num_threads().max(1);
1229 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1230
1231 data.par_chunks(chunk_size)
1232 .zip(buf.par_chunks_mut(chunk_size))
1233 .for_each(|(src_chunk, dst_chunk)| {
1234 translate_to(src_chunk, &mut dst_chunk[..src_chunk.len()], table);
1235 });
1236
1237 return writer.write_all(&buf);
1238 }
1239
1240 if data.len() <= SINGLE_WRITE_LIMIT {
1242 let mut buf = alloc_uninit_vec(data.len());
1243 translate_to(data, &mut buf, table);
1244 return writer.write_all(&buf);
1245 }
1246 let mut buf = alloc_uninit_vec(BUF_SIZE);
1247 for chunk in data.chunks(BUF_SIZE) {
1248 translate_to(chunk, &mut buf[..chunk.len()], table);
1249 writer.write_all(&buf[..chunk.len()])?;
1250 }
1251 Ok(())
1252}
1253
1254pub fn translate_squeeze_mmap(
1260 set1: &[u8],
1261 set2: &[u8],
1262 data: &[u8],
1263 writer: &mut impl Write,
1264) -> io::Result<()> {
1265 let table = build_translate_table(set1, set2);
1266 let squeeze_set = build_member_set(set2);
1267
1268 if data.len() >= PARALLEL_THRESHOLD {
1273 let mut translated = alloc_uninit_vec(data.len());
1275 let range_info = detect_range_offset(&table);
1276 let n_threads = rayon::current_num_threads().max(1);
1277 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1278
1279 if let Some((lo, hi, offset)) = range_info {
1280 data.par_chunks(chunk_size)
1281 .zip(translated.par_chunks_mut(chunk_size))
1282 .for_each(|(src_chunk, dst_chunk)| {
1283 translate_range_simd(
1284 src_chunk,
1285 &mut dst_chunk[..src_chunk.len()],
1286 lo,
1287 hi,
1288 offset,
1289 );
1290 });
1291 } else {
1292 data.par_chunks(chunk_size)
1293 .zip(translated.par_chunks_mut(chunk_size))
1294 .for_each(|(src_chunk, dst_chunk)| {
1295 translate_to(src_chunk, &mut dst_chunk[..src_chunk.len()], &table);
1296 });
1297 }
1298
1299 let mut last_squeezed: u16 = 256;
1303 let len = translated.len();
1304 let mut wp = 0;
1305 unsafe {
1306 let ptr = translated.as_mut_ptr();
1307 let mut i = 0;
1308 while i < len {
1309 let b = *ptr.add(i);
1310 if is_member(&squeeze_set, b) {
1311 if last_squeezed == b as u16 {
1312 i += 1;
1313 continue;
1314 }
1315 last_squeezed = b as u16;
1316 } else {
1317 last_squeezed = 256;
1318 }
1319 *ptr.add(wp) = b;
1320 wp += 1;
1321 i += 1;
1322 }
1323 }
1324 return writer.write_all(&translated[..wp]);
1325 }
1326
1327 if data.len() <= SINGLE_WRITE_LIMIT {
1329 let mut buf: Vec<u8> = Vec::with_capacity(data.len());
1330 let mut last_squeezed: u16 = 256;
1331 unsafe {
1332 buf.set_len(data.len());
1333 let outp: *mut u8 = buf.as_mut_ptr();
1334 let inp = data.as_ptr();
1335 let len = data.len();
1336 let mut wp = 0;
1337 let mut i = 0;
1338 while i < len {
1339 let translated = *table.get_unchecked(*inp.add(i) as usize);
1340 if is_member(&squeeze_set, translated) {
1341 if last_squeezed == translated as u16 {
1342 i += 1;
1343 continue;
1344 }
1345 last_squeezed = translated as u16;
1346 } else {
1347 last_squeezed = 256;
1348 }
1349 *outp.add(wp) = translated;
1350 wp += 1;
1351 i += 1;
1352 }
1353 buf.set_len(wp);
1354 }
1355 return writer.write_all(&buf);
1356 }
1357
1358 let buf_size = data.len().min(BUF_SIZE);
1360 let mut buf = vec![0u8; buf_size];
1361 let mut last_squeezed: u16 = 256;
1362
1363 for chunk in data.chunks(buf_size) {
1364 translate_to(chunk, &mut buf[..chunk.len()], &table);
1365 let mut wp = 0;
1366 unsafe {
1367 let ptr = buf.as_mut_ptr();
1368 for i in 0..chunk.len() {
1369 let b = *ptr.add(i);
1370 if is_member(&squeeze_set, b) {
1371 if last_squeezed == b as u16 {
1372 continue;
1373 }
1374 last_squeezed = b as u16;
1375 } else {
1376 last_squeezed = 256;
1377 }
1378 *ptr.add(wp) = b;
1379 wp += 1;
1380 }
1381 }
1382 writer.write_all(&buf[..wp])?;
1383 }
1384 Ok(())
1385}
1386
1387pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1393 if delete_chars.len() == 1 {
1394 return delete_single_char_mmap(delete_chars[0], data, writer);
1395 }
1396 if delete_chars.len() <= 3 {
1397 return delete_multi_memchr_mmap(delete_chars, data, writer);
1398 }
1399
1400 if let Some((lo, hi)) = detect_delete_range(delete_chars) {
1402 return delete_range_mmap(data, writer, lo, hi);
1403 }
1404
1405 let member = build_member_set(delete_chars);
1406
1407 if data.len() >= PARALLEL_THRESHOLD {
1411 let n_threads = rayon::current_num_threads().max(1);
1412 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1413
1414 let mut outbuf = alloc_uninit_vec(data.len());
1416 let chunk_lens: Vec<usize> = data
1417 .par_chunks(chunk_size)
1418 .zip(outbuf.par_chunks_mut(chunk_size))
1419 .map(|(src_chunk, dst_chunk)| delete_chunk_bitset_into(src_chunk, &member, dst_chunk))
1420 .collect();
1421
1422 let mut write_pos = 0;
1426 let mut src_offset = 0;
1427 for &clen in &chunk_lens {
1428 if clen > 0 && src_offset != write_pos {
1429 unsafe {
1430 std::ptr::copy(
1431 outbuf.as_ptr().add(src_offset),
1432 outbuf.as_mut_ptr().add(write_pos),
1433 clen,
1434 );
1435 }
1436 }
1437 write_pos += clen;
1438 src_offset += chunk_size;
1439 }
1440
1441 return writer.write_all(&outbuf[..write_pos]);
1442 }
1443
1444 if data.len() <= SINGLE_WRITE_LIMIT {
1446 let mut outbuf = alloc_uninit_vec(data.len());
1447 let out_pos = delete_chunk_bitset_into(data, &member, &mut outbuf);
1448 return writer.write_all(&outbuf[..out_pos]);
1449 }
1450
1451 let buf_size = data.len().min(BUF_SIZE);
1453 let mut outbuf = alloc_uninit_vec(buf_size);
1454
1455 for chunk in data.chunks(buf_size) {
1456 let out_pos = delete_chunk_bitset_into(chunk, &member, &mut outbuf);
1457 writer.write_all(&outbuf[..out_pos])?;
1458 }
1459 Ok(())
1460}
1461
1462fn delete_range_mmap(data: &[u8], writer: &mut impl Write, lo: u8, hi: u8) -> io::Result<()> {
1464 if data.len() >= PARALLEL_THRESHOLD {
1466 let n_threads = rayon::current_num_threads().max(1);
1467 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1468
1469 let results: Vec<Vec<u8>> = data
1470 .par_chunks(chunk_size)
1471 .map(|chunk| {
1472 let mut out = alloc_uninit_vec(chunk.len());
1473 let wp = delete_range_chunk(chunk, &mut out, lo, hi);
1474 unsafe { out.set_len(wp) };
1475 out
1476 })
1477 .collect();
1478
1479 let slices: Vec<std::io::IoSlice> = results
1480 .iter()
1481 .filter(|r| !r.is_empty())
1482 .map(|r| std::io::IoSlice::new(r))
1483 .collect();
1484 return write_ioslices(writer, &slices);
1485 }
1486
1487 if data.len() <= SINGLE_WRITE_LIMIT {
1489 let mut outbuf = alloc_uninit_vec(data.len());
1490 let wp = delete_range_chunk(data, &mut outbuf, lo, hi);
1491 return writer.write_all(&outbuf[..wp]);
1492 }
1493
1494 let mut outbuf = alloc_uninit_vec(BUF_SIZE);
1496 for chunk in data.chunks(BUF_SIZE) {
1497 let wp = delete_range_chunk(chunk, &mut outbuf, lo, hi);
1498 writer.write_all(&outbuf[..wp])?;
1499 }
1500 Ok(())
1501}
1502
1503#[inline]
1506fn delete_chunk_bitset_into(chunk: &[u8], member: &[u8; 32], outbuf: &mut [u8]) -> usize {
1507 let len = chunk.len();
1508 let mut out_pos = 0;
1509 let mut i = 0;
1510
1511 while i + 8 <= len {
1512 unsafe {
1513 let b0 = *chunk.get_unchecked(i);
1514 let b1 = *chunk.get_unchecked(i + 1);
1515 let b2 = *chunk.get_unchecked(i + 2);
1516 let b3 = *chunk.get_unchecked(i + 3);
1517 let b4 = *chunk.get_unchecked(i + 4);
1518 let b5 = *chunk.get_unchecked(i + 5);
1519 let b6 = *chunk.get_unchecked(i + 6);
1520 let b7 = *chunk.get_unchecked(i + 7);
1521
1522 *outbuf.get_unchecked_mut(out_pos) = b0;
1523 out_pos += !is_member(member, b0) as usize;
1524 *outbuf.get_unchecked_mut(out_pos) = b1;
1525 out_pos += !is_member(member, b1) as usize;
1526 *outbuf.get_unchecked_mut(out_pos) = b2;
1527 out_pos += !is_member(member, b2) as usize;
1528 *outbuf.get_unchecked_mut(out_pos) = b3;
1529 out_pos += !is_member(member, b3) as usize;
1530 *outbuf.get_unchecked_mut(out_pos) = b4;
1531 out_pos += !is_member(member, b4) as usize;
1532 *outbuf.get_unchecked_mut(out_pos) = b5;
1533 out_pos += !is_member(member, b5) as usize;
1534 *outbuf.get_unchecked_mut(out_pos) = b6;
1535 out_pos += !is_member(member, b6) as usize;
1536 *outbuf.get_unchecked_mut(out_pos) = b7;
1537 out_pos += !is_member(member, b7) as usize;
1538 }
1539 i += 8;
1540 }
1541
1542 while i < len {
1543 unsafe {
1544 let b = *chunk.get_unchecked(i);
1545 *outbuf.get_unchecked_mut(out_pos) = b;
1546 out_pos += !is_member(member, b) as usize;
1547 }
1548 i += 1;
1549 }
1550
1551 out_pos
1552}
1553
1554fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1555 if data.len() >= PARALLEL_THRESHOLD {
1558 let n_threads = rayon::current_num_threads().max(1);
1559 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1560
1561 let results: Vec<Vec<u8>> = data
1562 .par_chunks(chunk_size)
1563 .map(|chunk| {
1564 let mut out = Vec::with_capacity(chunk.len());
1565 let mut last = 0;
1566 for pos in memchr::memchr_iter(ch, chunk) {
1567 if pos > last {
1568 out.extend_from_slice(&chunk[last..pos]);
1569 }
1570 last = pos + 1;
1571 }
1572 if last < chunk.len() {
1573 out.extend_from_slice(&chunk[last..]);
1574 }
1575 out
1576 })
1577 .collect();
1578
1579 let slices: Vec<std::io::IoSlice> = results
1581 .iter()
1582 .filter(|r| !r.is_empty())
1583 .map(|r| std::io::IoSlice::new(r))
1584 .collect();
1585 return write_ioslices(writer, &slices);
1586 }
1587
1588 if data.len() <= SINGLE_WRITE_LIMIT {
1590 let mut outbuf = Vec::with_capacity(data.len());
1591 let mut last = 0;
1592 for pos in memchr::memchr_iter(ch, data) {
1593 if pos > last {
1594 outbuf.extend_from_slice(&data[last..pos]);
1595 }
1596 last = pos + 1;
1597 }
1598 if last < data.len() {
1599 outbuf.extend_from_slice(&data[last..]);
1600 }
1601 return writer.write_all(&outbuf);
1602 }
1603
1604 let buf_size = data.len().min(BUF_SIZE);
1606 let mut outbuf = vec![0u8; buf_size];
1607
1608 for chunk in data.chunks(buf_size) {
1609 let mut wp = 0;
1610 let mut last = 0;
1611 for pos in memchr::memchr_iter(ch, chunk) {
1612 if pos > last {
1613 let run = pos - last;
1614 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1615 wp += run;
1616 }
1617 last = pos + 1;
1618 }
1619 if last < chunk.len() {
1620 let run = chunk.len() - last;
1621 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1622 wp += run;
1623 }
1624 writer.write_all(&outbuf[..wp])?;
1625 }
1626 Ok(())
1627}
1628
1629fn delete_multi_memchr_mmap(chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1630 let c0 = chars[0];
1631 let c1 = if chars.len() >= 2 { chars[1] } else { 0 };
1632 let c2 = if chars.len() >= 3 { chars[2] } else { 0 };
1633 let is_three = chars.len() >= 3;
1634
1635 if data.len() >= PARALLEL_THRESHOLD {
1637 let n_threads = rayon::current_num_threads().max(1);
1638 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1639
1640 let results: Vec<Vec<u8>> = data
1641 .par_chunks(chunk_size)
1642 .map(|chunk| {
1643 let mut out = Vec::with_capacity(chunk.len());
1644 let mut last = 0;
1645 if is_three {
1646 for pos in memchr::memchr3_iter(c0, c1, c2, chunk) {
1647 if pos > last {
1648 out.extend_from_slice(&chunk[last..pos]);
1649 }
1650 last = pos + 1;
1651 }
1652 } else {
1653 for pos in memchr::memchr2_iter(c0, c1, chunk) {
1654 if pos > last {
1655 out.extend_from_slice(&chunk[last..pos]);
1656 }
1657 last = pos + 1;
1658 }
1659 }
1660 if last < chunk.len() {
1661 out.extend_from_slice(&chunk[last..]);
1662 }
1663 out
1664 })
1665 .collect();
1666
1667 let slices: Vec<std::io::IoSlice> = results
1669 .iter()
1670 .filter(|r| !r.is_empty())
1671 .map(|r| std::io::IoSlice::new(r))
1672 .collect();
1673 return write_ioslices(writer, &slices);
1674 }
1675
1676 if data.len() <= SINGLE_WRITE_LIMIT {
1678 let mut outbuf = Vec::with_capacity(data.len());
1679 let mut last = 0;
1680 if is_three {
1681 for pos in memchr::memchr3_iter(c0, c1, c2, data) {
1682 if pos > last {
1683 outbuf.extend_from_slice(&data[last..pos]);
1684 }
1685 last = pos + 1;
1686 }
1687 } else {
1688 for pos in memchr::memchr2_iter(c0, c1, data) {
1689 if pos > last {
1690 outbuf.extend_from_slice(&data[last..pos]);
1691 }
1692 last = pos + 1;
1693 }
1694 }
1695 if last < data.len() {
1696 outbuf.extend_from_slice(&data[last..]);
1697 }
1698 return writer.write_all(&outbuf);
1699 }
1700
1701 let buf_size = data.len().min(BUF_SIZE);
1703 let mut outbuf = vec![0u8; buf_size];
1704
1705 for chunk in data.chunks(buf_size) {
1706 let mut wp = 0;
1707 let mut last = 0;
1708
1709 if is_three {
1712 for pos in memchr::memchr3_iter(c0, c1, c2, chunk) {
1713 if pos > last {
1714 let run = pos - last;
1715 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1716 wp += run;
1717 }
1718 last = pos + 1;
1719 }
1720 } else {
1721 for pos in memchr::memchr2_iter(c0, c1, chunk) {
1722 if pos > last {
1723 let run = pos - last;
1724 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1725 wp += run;
1726 }
1727 last = pos + 1;
1728 }
1729 }
1730
1731 if last < chunk.len() {
1732 let run = chunk.len() - last;
1733 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1734 wp += run;
1735 }
1736 writer.write_all(&outbuf[..wp])?;
1737 }
1738 Ok(())
1739}
1740
1741pub fn delete_squeeze_mmap(
1746 delete_chars: &[u8],
1747 squeeze_chars: &[u8],
1748 data: &[u8],
1749 writer: &mut impl Write,
1750) -> io::Result<()> {
1751 let delete_set = build_member_set(delete_chars);
1752 let squeeze_set = build_member_set(squeeze_chars);
1753
1754 if data.len() <= SINGLE_WRITE_LIMIT {
1756 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
1757 let mut last_squeezed: u16 = 256;
1758 unsafe {
1759 outbuf.set_len(data.len());
1760 let outp: *mut u8 = outbuf.as_mut_ptr();
1761 let inp = data.as_ptr();
1762 let len = data.len();
1763 let mut out_pos = 0;
1764 let mut i = 0;
1765 while i < len {
1766 let b = *inp.add(i);
1767 if is_member(&delete_set, b) {
1768 i += 1;
1769 continue;
1770 }
1771 if is_member(&squeeze_set, b) {
1772 if last_squeezed == b as u16 {
1773 i += 1;
1774 continue;
1775 }
1776 last_squeezed = b as u16;
1777 } else {
1778 last_squeezed = 256;
1779 }
1780 *outp.add(out_pos) = b;
1781 out_pos += 1;
1782 i += 1;
1783 }
1784 outbuf.set_len(out_pos);
1785 }
1786 return writer.write_all(&outbuf);
1787 }
1788
1789 let buf_size = data.len().min(BUF_SIZE);
1791 let mut outbuf = vec![0u8; buf_size];
1792 let mut last_squeezed: u16 = 256;
1793
1794 for chunk in data.chunks(buf_size) {
1795 let mut out_pos = 0;
1796 for &b in chunk {
1797 if is_member(&delete_set, b) {
1798 continue;
1799 }
1800 if is_member(&squeeze_set, b) {
1801 if last_squeezed == b as u16 {
1802 continue;
1803 }
1804 last_squeezed = b as u16;
1805 } else {
1806 last_squeezed = 256;
1807 }
1808 unsafe {
1809 *outbuf.get_unchecked_mut(out_pos) = b;
1810 }
1811 out_pos += 1;
1812 }
1813 writer.write_all(&outbuf[..out_pos])?;
1814 }
1815 Ok(())
1816}
1817
1818pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1824 if squeeze_chars.len() == 1 {
1825 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1826 }
1827 if squeeze_chars.len() == 2 {
1828 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1829 }
1830 if squeeze_chars.len() == 3 {
1831 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1832 }
1833
1834 let member = build_member_set(squeeze_chars);
1835
1836 if data.len() >= PARALLEL_THRESHOLD {
1838 let n_threads = rayon::current_num_threads().max(1);
1839 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1840
1841 let results: Vec<Vec<u8>> = data
1842 .par_chunks(chunk_size)
1843 .map(|chunk| squeeze_chunk_bitset(chunk, &member))
1844 .collect();
1845
1846 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
1851 for (idx, result) in results.iter().enumerate() {
1852 if result.is_empty() {
1853 continue;
1854 }
1855 if idx > 0 {
1856 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
1858 if is_member(&member, prev_last) {
1859 let skip = result.iter().take_while(|&&b| b == prev_last).count();
1861 if skip < result.len() {
1862 slices.push(std::io::IoSlice::new(&result[skip..]));
1863 }
1864 continue;
1865 }
1866 }
1867 }
1868 slices.push(std::io::IoSlice::new(result));
1869 }
1870 return write_ioslices(writer, &slices);
1871 }
1872
1873 if data.len() <= SINGLE_WRITE_LIMIT {
1875 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
1876 let mut last_squeezed: u16 = 256;
1877 let len = data.len();
1878 let mut wp = 0;
1879 let mut i = 0;
1880
1881 unsafe {
1882 outbuf.set_len(data.len());
1883 let inp = data.as_ptr();
1884 let outp: *mut u8 = outbuf.as_mut_ptr();
1885
1886 while i < len {
1887 let b = *inp.add(i);
1888 if is_member(&member, b) {
1889 if last_squeezed != b as u16 {
1890 *outp.add(wp) = b;
1891 wp += 1;
1892 last_squeezed = b as u16;
1893 }
1894 i += 1;
1895 while i < len && *inp.add(i) == b {
1896 i += 1;
1897 }
1898 } else {
1899 last_squeezed = 256;
1900 *outp.add(wp) = b;
1901 wp += 1;
1902 i += 1;
1903 }
1904 }
1905 outbuf.set_len(wp);
1906 }
1907 return writer.write_all(&outbuf);
1908 }
1909
1910 let buf_size = data.len().min(BUF_SIZE);
1912 let mut outbuf = vec![0u8; buf_size];
1913 let mut last_squeezed: u16 = 256;
1914
1915 for chunk in data.chunks(buf_size) {
1916 let len = chunk.len();
1917 let mut wp = 0;
1918 let mut i = 0;
1919
1920 unsafe {
1921 let inp = chunk.as_ptr();
1922 let outp = outbuf.as_mut_ptr();
1923
1924 while i < len {
1925 let b = *inp.add(i);
1926 if is_member(&member, b) {
1927 if last_squeezed != b as u16 {
1928 *outp.add(wp) = b;
1929 wp += 1;
1930 last_squeezed = b as u16;
1931 }
1932 i += 1;
1933 while i < len && *inp.add(i) == b {
1934 i += 1;
1935 }
1936 } else {
1937 last_squeezed = 256;
1938 *outp.add(wp) = b;
1939 wp += 1;
1940 i += 1;
1941 }
1942 }
1943 }
1944 writer.write_all(&outbuf[..wp])?;
1945 }
1946 Ok(())
1947}
1948
1949fn squeeze_chunk_bitset(chunk: &[u8], member: &[u8; 32]) -> Vec<u8> {
1951 let len = chunk.len();
1952 let mut out = Vec::with_capacity(len);
1953 let mut last_squeezed: u16 = 256;
1954 let mut i = 0;
1955
1956 unsafe {
1957 out.set_len(len);
1958 let inp = chunk.as_ptr();
1959 let outp: *mut u8 = out.as_mut_ptr();
1960 let mut wp = 0;
1961
1962 while i < len {
1963 let b = *inp.add(i);
1964 if is_member(member, b) {
1965 if last_squeezed != b as u16 {
1966 *outp.add(wp) = b;
1967 wp += 1;
1968 last_squeezed = b as u16;
1969 }
1970 i += 1;
1971 while i < len && *inp.add(i) == b {
1972 i += 1;
1973 }
1974 } else {
1975 last_squeezed = 256;
1976 *outp.add(wp) = b;
1977 wp += 1;
1978 i += 1;
1979 }
1980 }
1981 out.set_len(wp);
1982 }
1983 out
1984}
1985
1986fn squeeze_multi_mmap<const N: usize>(
1987 chars: &[u8],
1988 data: &[u8],
1989 writer: &mut impl Write,
1990) -> io::Result<()> {
1991 if data.len() >= PARALLEL_THRESHOLD {
1993 let member = build_member_set(chars);
1994 let n_threads = rayon::current_num_threads().max(1);
1995 let chunk_size = (data.len() / n_threads).max(32 * 1024);
1996
1997 let results: Vec<Vec<u8>> = data
1998 .par_chunks(chunk_size)
1999 .map(|chunk| squeeze_chunk_bitset(chunk, &member))
2000 .collect();
2001
2002 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
2004 for (idx, result) in results.iter().enumerate() {
2005 if result.is_empty() {
2006 continue;
2007 }
2008 if idx > 0 {
2009 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
2010 if is_member(&member, prev_last) {
2011 let skip = result.iter().take_while(|&&b| b == prev_last).count();
2012 if skip < result.len() {
2013 slices.push(std::io::IoSlice::new(&result[skip..]));
2014 }
2015 continue;
2016 }
2017 }
2018 }
2019 slices.push(std::io::IoSlice::new(result));
2020 }
2021 return write_ioslices(writer, &slices);
2022 }
2023
2024 let buf_size = data.len().min(BUF_SIZE);
2025 let mut outbuf = vec![0u8; buf_size];
2026 let mut wp = 0;
2027 let mut last_squeezed: u16 = 256;
2028 let mut cursor = 0;
2029
2030 macro_rules! find_next {
2031 ($data:expr) => {
2032 if N == 2 {
2033 memchr::memchr2(chars[0], chars[1], $data)
2034 } else {
2035 memchr::memchr3(chars[0], chars[1], chars[2], $data)
2036 }
2037 };
2038 }
2039
2040 macro_rules! flush_and_copy {
2041 ($src:expr, $len:expr) => {
2042 if wp + $len > buf_size {
2043 writer.write_all(&outbuf[..wp])?;
2044 wp = 0;
2045 }
2046 if $len > buf_size {
2047 writer.write_all($src)?;
2048 } else {
2049 outbuf[wp..wp + $len].copy_from_slice($src);
2050 wp += $len;
2051 }
2052 };
2053 }
2054
2055 while cursor < data.len() {
2056 match find_next!(&data[cursor..]) {
2057 Some(offset) => {
2058 let pos = cursor + offset;
2059 let b = data[pos];
2060 if pos > cursor {
2061 let span = pos - cursor;
2062 flush_and_copy!(&data[cursor..pos], span);
2063 last_squeezed = 256;
2064 }
2065 if last_squeezed != b as u16 {
2066 if wp >= buf_size {
2067 writer.write_all(&outbuf[..wp])?;
2068 wp = 0;
2069 }
2070 outbuf[wp] = b;
2071 wp += 1;
2072 last_squeezed = b as u16;
2073 }
2074 let mut skip = pos + 1;
2075 while skip < data.len() && data[skip] == b {
2076 skip += 1;
2077 }
2078 cursor = skip;
2079 }
2080 None => {
2081 let remaining = data.len() - cursor;
2082 flush_and_copy!(&data[cursor..], remaining);
2083 break;
2084 }
2085 }
2086 }
2087 if wp > 0 {
2088 writer.write_all(&outbuf[..wp])?;
2089 }
2090 Ok(())
2091}
2092
2093fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
2094 if data.is_empty() {
2095 return Ok(());
2096 }
2097
2098 if memchr::memmem::find(data, &[ch, ch]).is_none() {
2099 return writer.write_all(data);
2100 }
2101
2102 if data.len() >= PARALLEL_THRESHOLD {
2104 let n_threads = rayon::current_num_threads().max(1);
2105 let chunk_size = (data.len() / n_threads).max(32 * 1024);
2106
2107 let results: Vec<Vec<u8>> = data
2108 .par_chunks(chunk_size)
2109 .map(|chunk| {
2110 let mut out = Vec::with_capacity(chunk.len());
2111 let mut cursor = 0;
2112 while cursor < chunk.len() {
2113 match memchr::memchr(ch, &chunk[cursor..]) {
2114 Some(offset) => {
2115 let pos = cursor + offset;
2116 if pos > cursor {
2117 out.extend_from_slice(&chunk[cursor..pos]);
2118 }
2119 out.push(ch);
2120 cursor = pos + 1;
2121 while cursor < chunk.len() && chunk[cursor] == ch {
2122 cursor += 1;
2123 }
2124 }
2125 None => {
2126 out.extend_from_slice(&chunk[cursor..]);
2127 break;
2128 }
2129 }
2130 }
2131 out
2132 })
2133 .collect();
2134
2135 let mut slices: Vec<std::io::IoSlice> = Vec::with_capacity(results.len());
2138 for (idx, result) in results.iter().enumerate() {
2139 if result.is_empty() {
2140 continue;
2141 }
2142 if idx > 0 {
2143 if let Some(&prev_last) = results[..idx].iter().rev().find_map(|r| r.last()) {
2144 if prev_last == ch {
2145 let skip = result.iter().take_while(|&&b| b == ch).count();
2147 if skip < result.len() {
2148 slices.push(std::io::IoSlice::new(&result[skip..]));
2149 }
2150 continue;
2151 }
2152 }
2153 }
2154 slices.push(std::io::IoSlice::new(result));
2155 }
2156 return write_ioslices(writer, &slices);
2157 }
2158
2159 let buf_size = data.len().min(BUF_SIZE);
2160 let mut outbuf = vec![0u8; buf_size];
2161 let len = data.len();
2162 let mut wp = 0;
2163 let mut cursor = 0;
2164
2165 while cursor < len {
2166 match memchr::memchr(ch, &data[cursor..]) {
2167 Some(offset) => {
2168 let pos = cursor + offset;
2169 let gap = pos - cursor;
2170 if gap > 0 {
2171 if wp + gap > buf_size {
2172 writer.write_all(&outbuf[..wp])?;
2173 wp = 0;
2174 }
2175 if gap > buf_size {
2176 writer.write_all(&data[cursor..pos])?;
2177 } else {
2178 outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
2179 wp += gap;
2180 }
2181 }
2182 if wp >= buf_size {
2183 writer.write_all(&outbuf[..wp])?;
2184 wp = 0;
2185 }
2186 outbuf[wp] = ch;
2187 wp += 1;
2188 cursor = pos + 1;
2189 while cursor < len && data[cursor] == ch {
2190 cursor += 1;
2191 }
2192 }
2193 None => {
2194 let remaining = len - cursor;
2195 if remaining > 0 {
2196 if wp + remaining > buf_size {
2197 writer.write_all(&outbuf[..wp])?;
2198 wp = 0;
2199 }
2200 if remaining > buf_size {
2201 writer.write_all(&data[cursor..])?;
2202 } else {
2203 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
2204 wp += remaining;
2205 }
2206 }
2207 break;
2208 }
2209 }
2210 }
2211
2212 if wp > 0 {
2213 writer.write_all(&outbuf[..wp])?;
2214 }
2215 Ok(())
2216}