1use rayon::prelude::*;
2use std::io::{self, Read, Write};
3
4const BUF_SIZE: usize = 4 * 1024 * 1024;
7
8const STREAM_BUF: usize = 1024 * 1024;
11
12const PAR_THRESHOLD: usize = 2 * 1024 * 1024;
15
16#[inline]
18fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
19 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
20 let last = set2.last().copied();
21 for (i, &from) in set1.iter().enumerate() {
22 table[from as usize] = if i < set2.len() {
23 set2[i]
24 } else {
25 last.unwrap_or(from)
26 };
27 }
28 table
29}
30
31#[inline]
33fn build_member_set(chars: &[u8]) -> [u8; 32] {
34 let mut set = [0u8; 32];
35 for &ch in chars {
36 set[ch as usize >> 3] |= 1 << (ch & 7);
37 }
38 set
39}
40
41#[inline(always)]
42fn is_member(set: &[u8; 32], ch: u8) -> bool {
43 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
44}
45
46#[allow(dead_code)] enum TranslateKind {
53 Identity,
55 RangeDelta { lo: u8, hi: u8, delta: u8 },
57 General,
59}
60
61fn analyze_table(table: &[u8; 256]) -> TranslateKind {
63 let mut first_delta: Option<u8> = None;
64 let mut lo: u8 = 0;
65 let mut hi: u8 = 0;
66 let mut count: u32 = 0;
67
68 for i in 0..256u16 {
69 let actual = table[i as usize];
70 if actual != i as u8 {
71 let delta = actual.wrapping_sub(i as u8);
72 count += 1;
73 match first_delta {
74 None => {
75 first_delta = Some(delta);
76 lo = i as u8;
77 hi = i as u8;
78 }
79 Some(d) if d == delta => {
80 hi = i as u8;
81 }
82 _ => return TranslateKind::General,
83 }
84 }
85 }
86
87 match (count, first_delta) {
88 (0, _) => TranslateKind::Identity,
89 (c, Some(delta)) if c == (hi as u32 - lo as u32 + 1) => {
90 TranslateKind::RangeDelta { lo, hi, delta }
91 }
92 _ => TranslateKind::General,
93 }
94}
95
96#[cfg(target_arch = "x86_64")]
101mod simd_tr {
102 #[target_feature(enable = "avx2")]
107 pub unsafe fn range_delta(data: &[u8], out: &mut [u8], lo: u8, hi: u8, delta: u8) {
108 unsafe {
109 use std::arch::x86_64::*;
110
111 let lo_vec = _mm256_set1_epi8(lo as i8);
112 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
113 let delta_vec = _mm256_set1_epi8(delta as i8);
114
115 let len = data.len();
116 let inp = data.as_ptr();
117 let outp = out.as_mut_ptr();
118 let mut i = 0usize;
119
120 while i + 128 <= len {
122 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
123 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
124 let v2 = _mm256_loadu_si256(inp.add(i + 64) as *const __m256i);
125 let v3 = _mm256_loadu_si256(inp.add(i + 96) as *const __m256i);
126
127 let d0 = _mm256_sub_epi8(v0, lo_vec);
128 let d1 = _mm256_sub_epi8(v1, lo_vec);
129 let d2 = _mm256_sub_epi8(v2, lo_vec);
130 let d3 = _mm256_sub_epi8(v3, lo_vec);
131
132 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
133 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
134 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
135 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
136
137 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
138 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
139 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
140 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
141
142 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
143 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
144 _mm256_storeu_si256(outp.add(i + 64) as *mut __m256i, r2);
145 _mm256_storeu_si256(outp.add(i + 96) as *mut __m256i, r3);
146 i += 128;
147 }
148
149 while i + 32 <= len {
150 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
151 let diff = _mm256_sub_epi8(v, lo_vec);
152 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
153 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
154 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
155 i += 32;
156 }
157
158 while i < len {
159 let b = *inp.add(i);
160 *outp.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
161 b.wrapping_add(delta)
162 } else {
163 b
164 };
165 i += 1;
166 }
167 }
168 }
169
170 #[target_feature(enable = "avx2")]
177 pub unsafe fn general_lookup(data: &[u8], out: &mut [u8], table: &[u8; 256]) {
178 unsafe {
179 use std::arch::x86_64::*;
180
181 let tp = table.as_ptr();
184 let mut luts = [_mm256_setzero_si256(); 16];
185 let mut h = 0;
186 while h < 16 {
187 luts[h] =
188 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
189 h += 1;
190 }
191
192 let lo_mask = _mm256_set1_epi8(0x0F);
193 let len = data.len();
194 let inp = data.as_ptr();
195 let outp = out.as_mut_ptr();
196 let mut i = 0;
197
198 while i + 64 <= len {
200 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
201 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
202
203 let lo0 = _mm256_and_si256(v0, lo_mask);
204 let lo1 = _mm256_and_si256(v1, lo_mask);
205 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
206 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
207
208 let mut r0 = _mm256_setzero_si256();
209 let mut r1 = _mm256_setzero_si256();
210
211 macro_rules! do_nib {
214 ($h:literal) => {
215 let hv = _mm256_set1_epi8($h);
216 let lut = luts[$h as usize];
217 let m0 = _mm256_cmpeq_epi8(hi0, hv);
218 let m1 = _mm256_cmpeq_epi8(hi1, hv);
219 r0 = _mm256_or_si256(
220 r0,
221 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
222 );
223 r1 = _mm256_or_si256(
224 r1,
225 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
226 );
227 };
228 }
229
230 do_nib!(0);
231 do_nib!(1);
232 do_nib!(2);
233 do_nib!(3);
234 do_nib!(4);
235 do_nib!(5);
236 do_nib!(6);
237 do_nib!(7);
238 do_nib!(8);
239 do_nib!(9);
240 do_nib!(10);
241 do_nib!(11);
242 do_nib!(12);
243 do_nib!(13);
244 do_nib!(14);
245 do_nib!(15);
246
247 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
248 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
249 i += 64;
250 }
251
252 while i + 32 <= len {
254 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
255 let lo = _mm256_and_si256(v, lo_mask);
256 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
257
258 let mut result = _mm256_setzero_si256();
259 let mut hh = 0u8;
260 while hh < 16 {
261 let hv = _mm256_set1_epi8(hh as i8);
262 let m = _mm256_cmpeq_epi8(hi, hv);
263 result = _mm256_or_si256(
264 result,
265 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
266 );
267 hh += 1;
268 }
269
270 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
271 i += 32;
272 }
273
274 while i < len {
276 *outp.add(i) = *table.get_unchecked(*inp.add(i) as usize);
277 i += 1;
278 }
279 }
280 }
281
282 #[target_feature(enable = "avx2")]
286 pub unsafe fn general_lookup_inplace(data: &mut [u8], table: &[u8; 256]) {
287 unsafe {
288 use std::arch::x86_64::*;
289
290 let tp = table.as_ptr();
291 let mut luts = [_mm256_setzero_si256(); 16];
292 let mut h = 0;
293 while h < 16 {
294 luts[h] =
295 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
296 h += 1;
297 }
298
299 let lo_mask = _mm256_set1_epi8(0x0F);
300 let len = data.len();
301 let ptr = data.as_mut_ptr();
302 let mut i = 0;
303
304 while i + 64 <= len {
305 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
306 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
307
308 let lo0 = _mm256_and_si256(v0, lo_mask);
309 let lo1 = _mm256_and_si256(v1, lo_mask);
310 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
311 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
312
313 let mut r0 = _mm256_setzero_si256();
314 let mut r1 = _mm256_setzero_si256();
315
316 macro_rules! do_nib {
317 ($h:literal) => {
318 let hv = _mm256_set1_epi8($h);
319 let lut = luts[$h as usize];
320 let m0 = _mm256_cmpeq_epi8(hi0, hv);
321 let m1 = _mm256_cmpeq_epi8(hi1, hv);
322 r0 = _mm256_or_si256(
323 r0,
324 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
325 );
326 r1 = _mm256_or_si256(
327 r1,
328 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
329 );
330 };
331 }
332
333 do_nib!(0);
334 do_nib!(1);
335 do_nib!(2);
336 do_nib!(3);
337 do_nib!(4);
338 do_nib!(5);
339 do_nib!(6);
340 do_nib!(7);
341 do_nib!(8);
342 do_nib!(9);
343 do_nib!(10);
344 do_nib!(11);
345 do_nib!(12);
346 do_nib!(13);
347 do_nib!(14);
348 do_nib!(15);
349
350 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
351 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
352 i += 64;
353 }
354
355 while i + 32 <= len {
356 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
357 let lo = _mm256_and_si256(v, lo_mask);
358 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
359
360 let mut result = _mm256_setzero_si256();
361 let mut hh = 0u8;
362 while hh < 16 {
363 let hv = _mm256_set1_epi8(hh as i8);
364 let m = _mm256_cmpeq_epi8(hi, hv);
365 result = _mm256_or_si256(
366 result,
367 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
368 );
369 hh += 1;
370 }
371
372 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
373 i += 32;
374 }
375
376 while i < len {
377 let b = *ptr.add(i);
378 *ptr.add(i) = *table.get_unchecked(b as usize);
379 i += 1;
380 }
381 }
382 }
383
384 #[target_feature(enable = "avx2")]
388 pub unsafe fn range_delta_inplace(data: &mut [u8], lo: u8, hi: u8, delta: u8) {
389 unsafe {
390 use std::arch::x86_64::*;
391
392 let lo_vec = _mm256_set1_epi8(lo as i8);
393 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
394 let delta_vec = _mm256_set1_epi8(delta as i8);
395
396 let len = data.len();
397 let ptr = data.as_mut_ptr();
398 let mut i = 0usize;
399
400 while i + 128 <= len {
401 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
402 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
403 let v2 = _mm256_loadu_si256(ptr.add(i + 64) as *const __m256i);
404 let v3 = _mm256_loadu_si256(ptr.add(i + 96) as *const __m256i);
405
406 let d0 = _mm256_sub_epi8(v0, lo_vec);
407 let d1 = _mm256_sub_epi8(v1, lo_vec);
408 let d2 = _mm256_sub_epi8(v2, lo_vec);
409 let d3 = _mm256_sub_epi8(v3, lo_vec);
410
411 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
412 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
413 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
414 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
415
416 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
417 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
418 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
419 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
420
421 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
422 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
423 _mm256_storeu_si256(ptr.add(i + 64) as *mut __m256i, r2);
424 _mm256_storeu_si256(ptr.add(i + 96) as *mut __m256i, r3);
425 i += 128;
426 }
427
428 while i + 32 <= len {
429 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
430 let diff = _mm256_sub_epi8(v, lo_vec);
431 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
432 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
433 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
434 i += 32;
435 }
436
437 while i < len {
438 let b = *ptr.add(i);
439 *ptr.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
440 b.wrapping_add(delta)
441 } else {
442 b
443 };
444 i += 1;
445 }
446 }
447 }
448}
449
450#[cfg(target_arch = "x86_64")]
455#[inline(always)]
456fn has_avx2() -> bool {
457 is_x86_feature_detected!("avx2")
458}
459
460#[inline(always)]
462fn translate_chunk(chunk: &[u8], out: &mut [u8], table: &[u8; 256]) {
463 let len = chunk.len();
464 let mut i = 0;
465 while i + 8 <= len {
466 unsafe {
467 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
468 *out.get_unchecked_mut(i + 1) =
469 *table.get_unchecked(*chunk.get_unchecked(i + 1) as usize);
470 *out.get_unchecked_mut(i + 2) =
471 *table.get_unchecked(*chunk.get_unchecked(i + 2) as usize);
472 *out.get_unchecked_mut(i + 3) =
473 *table.get_unchecked(*chunk.get_unchecked(i + 3) as usize);
474 *out.get_unchecked_mut(i + 4) =
475 *table.get_unchecked(*chunk.get_unchecked(i + 4) as usize);
476 *out.get_unchecked_mut(i + 5) =
477 *table.get_unchecked(*chunk.get_unchecked(i + 5) as usize);
478 *out.get_unchecked_mut(i + 6) =
479 *table.get_unchecked(*chunk.get_unchecked(i + 6) as usize);
480 *out.get_unchecked_mut(i + 7) =
481 *table.get_unchecked(*chunk.get_unchecked(i + 7) as usize);
482 }
483 i += 8;
484 }
485 while i < len {
486 unsafe {
487 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
488 }
489 i += 1;
490 }
491}
492
493#[inline(always)]
495fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
496 let len = data.len();
497 let ptr = data.as_mut_ptr();
498 let tab = table.as_ptr();
499
500 unsafe {
501 let mut i = 0;
502 while i + 8 <= len {
503 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
504 *ptr.add(i + 1) = *tab.add(*ptr.add(i + 1) as usize);
505 *ptr.add(i + 2) = *tab.add(*ptr.add(i + 2) as usize);
506 *ptr.add(i + 3) = *tab.add(*ptr.add(i + 3) as usize);
507 *ptr.add(i + 4) = *tab.add(*ptr.add(i + 4) as usize);
508 *ptr.add(i + 5) = *tab.add(*ptr.add(i + 5) as usize);
509 *ptr.add(i + 6) = *tab.add(*ptr.add(i + 6) as usize);
510 *ptr.add(i + 7) = *tab.add(*ptr.add(i + 7) as usize);
511 i += 8;
512 }
513 while i < len {
514 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
515 i += 1;
516 }
517 }
518}
519
520#[inline]
527fn translate_chunk_dispatch(
528 chunk: &[u8],
529 out: &mut [u8],
530 table: &[u8; 256],
531 kind: &TranslateKind,
532 _use_simd: bool,
533) {
534 match kind {
535 TranslateKind::Identity => {
536 out[..chunk.len()].copy_from_slice(chunk);
537 }
538 #[cfg(target_arch = "x86_64")]
539 TranslateKind::RangeDelta { lo, hi, delta } => {
540 if _use_simd {
541 unsafe { simd_tr::range_delta(chunk, out, *lo, *hi, *delta) };
542 return;
543 }
544 translate_chunk(chunk, out, table);
545 }
546 #[cfg(not(target_arch = "x86_64"))]
547 TranslateKind::RangeDelta { .. } => {
548 translate_chunk(chunk, out, table);
549 }
550 #[cfg(target_arch = "x86_64")]
551 TranslateKind::General => {
552 if _use_simd {
553 unsafe { simd_tr::general_lookup(chunk, out, table) };
554 return;
555 }
556 translate_chunk(chunk, out, table);
557 }
558 #[cfg(not(target_arch = "x86_64"))]
559 TranslateKind::General => {
560 translate_chunk(chunk, out, table);
561 }
562 }
563}
564
565#[inline]
568fn translate_inplace_dispatch(
569 data: &mut [u8],
570 table: &[u8; 256],
571 kind: &TranslateKind,
572 _use_simd: bool,
573) {
574 match kind {
575 TranslateKind::Identity => {}
576 #[cfg(target_arch = "x86_64")]
577 TranslateKind::RangeDelta { lo, hi, delta } => {
578 if _use_simd {
579 unsafe { simd_tr::range_delta_inplace(data, *lo, *hi, *delta) };
580 return;
581 }
582 translate_inplace(data, table);
583 }
584 #[cfg(not(target_arch = "x86_64"))]
585 TranslateKind::RangeDelta { .. } => {
586 translate_inplace(data, table);
587 }
588 #[cfg(target_arch = "x86_64")]
589 TranslateKind::General => {
590 if _use_simd {
591 unsafe { simd_tr::general_lookup_inplace(data, table) };
592 return;
593 }
594 translate_inplace(data, table);
595 }
596 #[cfg(not(target_arch = "x86_64"))]
597 TranslateKind::General => {
598 translate_inplace(data, table);
599 }
600 }
601}
602
603pub fn translate(
610 set1: &[u8],
611 set2: &[u8],
612 reader: &mut impl Read,
613 writer: &mut impl Write,
614) -> io::Result<()> {
615 let table = build_translate_table(set1, set2);
616 let kind = analyze_table(&table);
617 #[cfg(target_arch = "x86_64")]
618 let use_simd = has_avx2();
619 #[cfg(not(target_arch = "x86_64"))]
620 let use_simd = false;
621
622 let mut buf = vec![0u8; BUF_SIZE];
624 loop {
625 let n = match reader.read(&mut buf) {
626 Ok(0) => break,
627 Ok(n) => n,
628 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
629 Err(e) => return Err(e),
630 };
631 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
632 writer.write_all(&buf[..n])?;
633 }
634 Ok(())
635}
636
637pub fn translate_squeeze(
638 set1: &[u8],
639 set2: &[u8],
640 reader: &mut impl Read,
641 writer: &mut impl Write,
642) -> io::Result<()> {
643 let table = build_translate_table(set1, set2);
644 let squeeze_set = build_member_set(set2);
645 let kind = analyze_table(&table);
646 #[cfg(target_arch = "x86_64")]
647 let use_simd = has_avx2();
648 #[cfg(not(target_arch = "x86_64"))]
649 let use_simd = false;
650
651 let mut buf = vec![0u8; STREAM_BUF];
654 let mut last_squeezed: u16 = 256;
655
656 loop {
657 let n = match reader.read(&mut buf) {
658 Ok(0) => break,
659 Ok(n) => n,
660 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
661 Err(e) => return Err(e),
662 };
663 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
665 let mut wp = 0;
667 unsafe {
668 let ptr = buf.as_mut_ptr();
669 for i in 0..n {
670 let b = *ptr.add(i);
671 if is_member(&squeeze_set, b) {
672 if last_squeezed == b as u16 {
673 continue;
674 }
675 last_squeezed = b as u16;
676 } else {
677 last_squeezed = 256;
678 }
679 *ptr.add(wp) = b;
680 wp += 1;
681 }
682 }
683 writer.write_all(&buf[..wp])?;
684 }
685 Ok(())
686}
687
688pub fn delete(
689 delete_chars: &[u8],
690 reader: &mut impl Read,
691 writer: &mut impl Write,
692) -> io::Result<()> {
693 if delete_chars.len() == 1 {
695 return delete_single_streaming(delete_chars[0], reader, writer);
696 }
697
698 if delete_chars.len() <= 3 {
700 return delete_multi_streaming(delete_chars, reader, writer);
701 }
702
703 let member = build_member_set(delete_chars);
704 let mut buf = vec![0u8; STREAM_BUF];
706
707 loop {
708 let n = match reader.read(&mut buf) {
709 Ok(0) => break,
710 Ok(n) => n,
711 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
712 Err(e) => return Err(e),
713 };
714 let mut wp = 0;
715 unsafe {
716 let ptr = buf.as_mut_ptr();
717 let mut i = 0;
718 while i + 8 <= n {
720 let b0 = *ptr.add(i);
721 let b1 = *ptr.add(i + 1);
722 let b2 = *ptr.add(i + 2);
723 let b3 = *ptr.add(i + 3);
724 let b4 = *ptr.add(i + 4);
725 let b5 = *ptr.add(i + 5);
726 let b6 = *ptr.add(i + 6);
727 let b7 = *ptr.add(i + 7);
728
729 if !is_member(&member, b0) {
730 *ptr.add(wp) = b0;
731 wp += 1;
732 }
733 if !is_member(&member, b1) {
734 *ptr.add(wp) = b1;
735 wp += 1;
736 }
737 if !is_member(&member, b2) {
738 *ptr.add(wp) = b2;
739 wp += 1;
740 }
741 if !is_member(&member, b3) {
742 *ptr.add(wp) = b3;
743 wp += 1;
744 }
745 if !is_member(&member, b4) {
746 *ptr.add(wp) = b4;
747 wp += 1;
748 }
749 if !is_member(&member, b5) {
750 *ptr.add(wp) = b5;
751 wp += 1;
752 }
753 if !is_member(&member, b6) {
754 *ptr.add(wp) = b6;
755 wp += 1;
756 }
757 if !is_member(&member, b7) {
758 *ptr.add(wp) = b7;
759 wp += 1;
760 }
761 i += 8;
762 }
763 while i < n {
764 let b = *ptr.add(i);
765 if !is_member(&member, b) {
766 *ptr.add(wp) = b;
767 wp += 1;
768 }
769 i += 1;
770 }
771 }
772 writer.write_all(&buf[..wp])?;
773 }
774 Ok(())
775}
776
777fn delete_single_streaming(
780 ch: u8,
781 reader: &mut impl Read,
782 writer: &mut impl Write,
783) -> io::Result<()> {
784 let mut buf = vec![0u8; STREAM_BUF];
785 loop {
786 let n = match reader.read(&mut buf) {
787 Ok(0) => break,
788 Ok(n) => n,
789 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
790 Err(e) => return Err(e),
791 };
792 let mut wp = 0;
793 let mut i = 0;
794 while i < n {
795 match memchr::memchr(ch, &buf[i..n]) {
796 Some(offset) => {
797 if offset > 0 {
798 if wp != i {
799 unsafe {
800 std::ptr::copy(
801 buf.as_ptr().add(i),
802 buf.as_mut_ptr().add(wp),
803 offset,
804 );
805 }
806 }
807 wp += offset;
808 }
809 i += offset + 1; }
811 None => {
812 let run_len = n - i;
813 if run_len > 0 {
814 if wp != i {
815 unsafe {
816 std::ptr::copy(
817 buf.as_ptr().add(i),
818 buf.as_mut_ptr().add(wp),
819 run_len,
820 );
821 }
822 }
823 wp += run_len;
824 }
825 break;
826 }
827 }
828 }
829 writer.write_all(&buf[..wp])?;
830 }
831 Ok(())
832}
833
834fn delete_multi_streaming(
836 chars: &[u8],
837 reader: &mut impl Read,
838 writer: &mut impl Write,
839) -> io::Result<()> {
840 let mut buf = vec![0u8; STREAM_BUF];
841 loop {
842 let n = match reader.read(&mut buf) {
843 Ok(0) => break,
844 Ok(n) => n,
845 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
846 Err(e) => return Err(e),
847 };
848 let mut wp = 0;
849 let mut i = 0;
850 while i < n {
851 let found = if chars.len() == 2 {
852 memchr::memchr2(chars[0], chars[1], &buf[i..n])
853 } else {
854 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
855 };
856 match found {
857 Some(offset) => {
858 if offset > 0 {
859 if wp != i {
860 unsafe {
861 std::ptr::copy(
862 buf.as_ptr().add(i),
863 buf.as_mut_ptr().add(wp),
864 offset,
865 );
866 }
867 }
868 wp += offset;
869 }
870 i += offset + 1;
871 }
872 None => {
873 let run_len = n - i;
874 if run_len > 0 {
875 if wp != i {
876 unsafe {
877 std::ptr::copy(
878 buf.as_ptr().add(i),
879 buf.as_mut_ptr().add(wp),
880 run_len,
881 );
882 }
883 }
884 wp += run_len;
885 }
886 break;
887 }
888 }
889 }
890 writer.write_all(&buf[..wp])?;
891 }
892 Ok(())
893}
894
895pub fn delete_squeeze(
896 delete_chars: &[u8],
897 squeeze_chars: &[u8],
898 reader: &mut impl Read,
899 writer: &mut impl Write,
900) -> io::Result<()> {
901 let delete_set = build_member_set(delete_chars);
902 let squeeze_set = build_member_set(squeeze_chars);
903 let mut buf = vec![0u8; STREAM_BUF];
905 let mut last_squeezed: u16 = 256;
906
907 loop {
908 let n = match reader.read(&mut buf) {
909 Ok(0) => break,
910 Ok(n) => n,
911 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
912 Err(e) => return Err(e),
913 };
914 let mut wp = 0;
915 unsafe {
916 let ptr = buf.as_mut_ptr();
917 for i in 0..n {
918 let b = *ptr.add(i);
919 if is_member(&delete_set, b) {
920 continue;
921 }
922 if is_member(&squeeze_set, b) {
923 if last_squeezed == b as u16 {
924 continue;
925 }
926 last_squeezed = b as u16;
927 } else {
928 last_squeezed = 256;
929 }
930 *ptr.add(wp) = b;
931 wp += 1;
932 }
933 }
934 writer.write_all(&buf[..wp])?;
935 }
936 Ok(())
937}
938
939pub fn squeeze(
940 squeeze_chars: &[u8],
941 reader: &mut impl Read,
942 writer: &mut impl Write,
943) -> io::Result<()> {
944 if squeeze_chars.len() == 1 {
946 return squeeze_single_stream(squeeze_chars[0], reader, writer);
947 }
948
949 let member = build_member_set(squeeze_chars);
950 let mut buf = vec![0u8; STREAM_BUF];
952 let mut last_squeezed: u16 = 256;
953
954 loop {
955 let n = match reader.read(&mut buf) {
956 Ok(0) => break,
957 Ok(n) => n,
958 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
959 Err(e) => return Err(e),
960 };
961 let mut wp = 0;
962 unsafe {
963 let ptr = buf.as_mut_ptr();
964 for i in 0..n {
965 let b = *ptr.add(i);
966 if is_member(&member, b) {
967 if last_squeezed == b as u16 {
968 continue;
969 }
970 last_squeezed = b as u16;
971 } else {
972 last_squeezed = 256;
973 }
974 *ptr.add(wp) = b;
975 wp += 1;
976 }
977 }
978 writer.write_all(&buf[..wp])?;
979 }
980 Ok(())
981}
982
983fn squeeze_single_stream(
987 ch: u8,
988 reader: &mut impl Read,
989 writer: &mut impl Write,
990) -> io::Result<()> {
991 let mut buf = vec![0u8; STREAM_BUF];
992 let mut was_squeeze_char = false;
993
994 loop {
995 let n = match reader.read(&mut buf) {
996 Ok(0) => break,
997 Ok(n) => n,
998 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
999 Err(e) => return Err(e),
1000 };
1001
1002 let mut wp = 0;
1005 let mut i = 0;
1006
1007 while i < n {
1008 if was_squeeze_char && buf[i] == ch {
1010 i += 1;
1011 while i < n && buf[i] == ch {
1012 i += 1;
1013 }
1014 if i >= n {
1016 break;
1017 }
1018 }
1019
1020 match memchr::memchr(ch, &buf[i..n]) {
1022 Some(offset) => {
1023 let run_len = offset;
1024 if run_len > 0 {
1026 if wp != i {
1027 unsafe {
1028 std::ptr::copy(
1029 buf.as_ptr().add(i),
1030 buf.as_mut_ptr().add(wp),
1031 run_len,
1032 );
1033 }
1034 }
1035 wp += run_len;
1036 }
1037 i += run_len;
1038
1039 unsafe {
1041 *buf.as_mut_ptr().add(wp) = ch;
1042 }
1043 wp += 1;
1044 was_squeeze_char = true;
1045 i += 1;
1046 while i < n && buf[i] == ch {
1047 i += 1;
1048 }
1049 }
1050 None => {
1051 let run_len = n - i;
1053 if run_len > 0 {
1054 if wp != i {
1055 unsafe {
1056 std::ptr::copy(
1057 buf.as_ptr().add(i),
1058 buf.as_mut_ptr().add(wp),
1059 run_len,
1060 );
1061 }
1062 }
1063 wp += run_len;
1064 }
1065 was_squeeze_char = false;
1066 break;
1067 }
1068 }
1069 }
1070
1071 writer.write_all(&buf[..wp])?;
1072 }
1073 Ok(())
1074}
1075
1076pub fn translate_mmap(
1085 set1: &[u8],
1086 set2: &[u8],
1087 data: &[u8],
1088 writer: &mut impl Write,
1089) -> io::Result<()> {
1090 let table = build_translate_table(set1, set2);
1091 let kind = analyze_table(&table);
1092 #[cfg(target_arch = "x86_64")]
1093 let use_simd = has_avx2();
1094 #[cfg(not(target_arch = "x86_64"))]
1095 let use_simd = false;
1096
1097 if matches!(kind, TranslateKind::Identity) {
1098 return writer.write_all(data);
1099 }
1100
1101 if data.len() >= PAR_THRESHOLD {
1104 let mut out = vec![0u8; data.len()];
1105 let n = rayon::current_num_threads().max(1);
1106 let cs = (data.len() / n).max(BUF_SIZE);
1107 {
1108 data.par_chunks(cs)
1109 .zip(out.par_chunks_mut(cs))
1110 .for_each(|(inp, outp)| {
1111 translate_chunk_dispatch(inp, &mut outp[..inp.len()], &table, &kind, use_simd);
1112 });
1113 }
1114 return writer.write_all(&out);
1115 }
1116
1117 let buf_size = data.len().min(BUF_SIZE);
1119 let mut out = vec![0u8; buf_size];
1120 for chunk in data.chunks(buf_size) {
1121 translate_chunk_dispatch(chunk, &mut out[..chunk.len()], &table, &kind, use_simd);
1122 writer.write_all(&out[..chunk.len()])?;
1123 }
1124 Ok(())
1125}
1126
1127pub fn translate_squeeze_mmap(
1131 set1: &[u8],
1132 set2: &[u8],
1133 data: &[u8],
1134 writer: &mut impl Write,
1135) -> io::Result<()> {
1136 let table = build_translate_table(set1, set2);
1137 let squeeze_set = build_member_set(set2);
1138 let kind = analyze_table(&table);
1139 #[cfg(target_arch = "x86_64")]
1140 let use_simd = has_avx2();
1141 #[cfg(not(target_arch = "x86_64"))]
1142 let use_simd = false;
1143
1144 if data.len() >= PAR_THRESHOLD {
1146 let mut buf = vec![0u8; data.len()];
1147 let n = rayon::current_num_threads().max(1);
1148 let cs = (data.len() / n).max(BUF_SIZE);
1149 {
1150 data.par_chunks(cs)
1151 .zip(buf.par_chunks_mut(cs))
1152 .for_each(|(inp, outp)| {
1153 translate_chunk_dispatch(inp, &mut outp[..inp.len()], &table, &kind, use_simd);
1154 });
1155 }
1156 let mut last_squeezed: u16 = 256;
1158 let mut outbuf = vec![0u8; BUF_SIZE];
1159 for chunk in buf.chunks(BUF_SIZE) {
1160 let mut wp = 0;
1161 unsafe {
1162 let ptr = chunk.as_ptr();
1163 let outp = outbuf.as_mut_ptr();
1164 for i in 0..chunk.len() {
1165 let b = *ptr.add(i);
1166 if is_member(&squeeze_set, b) {
1167 if last_squeezed == b as u16 {
1168 continue;
1169 }
1170 last_squeezed = b as u16;
1171 } else {
1172 last_squeezed = 256;
1173 }
1174 *outp.add(wp) = b;
1175 wp += 1;
1176 }
1177 }
1178 writer.write_all(&outbuf[..wp])?;
1179 }
1180 return Ok(());
1181 }
1182
1183 let buf_size = data.len().min(BUF_SIZE);
1185 let mut buf = vec![0u8; buf_size];
1186 let mut last_squeezed: u16 = 256;
1187
1188 for chunk in data.chunks(buf_size) {
1189 translate_chunk_dispatch(chunk, &mut buf[..chunk.len()], &table, &kind, use_simd);
1190 let mut wp = 0;
1191 unsafe {
1192 let ptr = buf.as_mut_ptr();
1193 for i in 0..chunk.len() {
1194 let b = *ptr.add(i);
1195 if is_member(&squeeze_set, b) {
1196 if last_squeezed == b as u16 {
1197 continue;
1198 }
1199 last_squeezed = b as u16;
1200 } else {
1201 last_squeezed = 256;
1202 }
1203 *ptr.add(wp) = b;
1204 wp += 1;
1205 }
1206 }
1207 writer.write_all(&buf[..wp])?;
1208 }
1209 Ok(())
1210}
1211
1212pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1215 if delete_chars.len() == 1 {
1217 return delete_single_char_mmap(delete_chars[0], data, writer);
1218 }
1219
1220 if delete_chars.len() == 2 {
1222 return delete_multi_memchr_mmap::<2>(delete_chars, data, writer);
1223 }
1224
1225 if delete_chars.len() == 3 {
1227 return delete_multi_memchr_mmap::<3>(delete_chars, data, writer);
1228 }
1229
1230 let member = build_member_set(delete_chars);
1231
1232 if data.len() >= PAR_THRESHOLD {
1234 let n = rayon::current_num_threads().max(1);
1235 let cs = (data.len() / n).max(BUF_SIZE);
1236 let results: Vec<Vec<u8>> = data
1237 .par_chunks(cs)
1238 .map(|chunk| {
1239 let mut outbuf: Vec<u8> = Vec::with_capacity(chunk.len());
1240 let len = chunk.len();
1241 let mut i = 0;
1242 unsafe {
1243 outbuf.set_len(chunk.len());
1244 let mut out_pos = 0;
1245 let outp: *mut u8 = outbuf.as_mut_ptr();
1246 while i + 8 <= len {
1247 let b0 = *chunk.get_unchecked(i);
1248 let b1 = *chunk.get_unchecked(i + 1);
1249 let b2 = *chunk.get_unchecked(i + 2);
1250 let b3 = *chunk.get_unchecked(i + 3);
1251 let b4 = *chunk.get_unchecked(i + 4);
1252 let b5 = *chunk.get_unchecked(i + 5);
1253 let b6 = *chunk.get_unchecked(i + 6);
1254 let b7 = *chunk.get_unchecked(i + 7);
1255 *outp.add(out_pos) = b0;
1256 out_pos += !is_member(&member, b0) as usize;
1257 *outp.add(out_pos) = b1;
1258 out_pos += !is_member(&member, b1) as usize;
1259 *outp.add(out_pos) = b2;
1260 out_pos += !is_member(&member, b2) as usize;
1261 *outp.add(out_pos) = b3;
1262 out_pos += !is_member(&member, b3) as usize;
1263 *outp.add(out_pos) = b4;
1264 out_pos += !is_member(&member, b4) as usize;
1265 *outp.add(out_pos) = b5;
1266 out_pos += !is_member(&member, b5) as usize;
1267 *outp.add(out_pos) = b6;
1268 out_pos += !is_member(&member, b6) as usize;
1269 *outp.add(out_pos) = b7;
1270 out_pos += !is_member(&member, b7) as usize;
1271 i += 8;
1272 }
1273 while i < len {
1274 let b = *chunk.get_unchecked(i);
1275 *outp.add(out_pos) = b;
1276 out_pos += !is_member(&member, b) as usize;
1277 i += 1;
1278 }
1279 outbuf.set_len(out_pos);
1280 }
1281 outbuf
1282 })
1283 .collect();
1284 for r in &results {
1285 if !r.is_empty() {
1286 writer.write_all(r)?;
1287 }
1288 }
1289 return Ok(());
1290 }
1291
1292 let buf_size = data.len().min(BUF_SIZE);
1294 let mut outbuf = vec![0u8; buf_size];
1295
1296 for chunk in data.chunks(buf_size) {
1297 let mut out_pos = 0;
1298 let len = chunk.len();
1299 let mut i = 0;
1300
1301 while i + 8 <= len {
1302 unsafe {
1303 let b0 = *chunk.get_unchecked(i);
1304 let b1 = *chunk.get_unchecked(i + 1);
1305 let b2 = *chunk.get_unchecked(i + 2);
1306 let b3 = *chunk.get_unchecked(i + 3);
1307 let b4 = *chunk.get_unchecked(i + 4);
1308 let b5 = *chunk.get_unchecked(i + 5);
1309 let b6 = *chunk.get_unchecked(i + 6);
1310 let b7 = *chunk.get_unchecked(i + 7);
1311
1312 *outbuf.get_unchecked_mut(out_pos) = b0;
1313 out_pos += !is_member(&member, b0) as usize;
1314 *outbuf.get_unchecked_mut(out_pos) = b1;
1315 out_pos += !is_member(&member, b1) as usize;
1316 *outbuf.get_unchecked_mut(out_pos) = b2;
1317 out_pos += !is_member(&member, b2) as usize;
1318 *outbuf.get_unchecked_mut(out_pos) = b3;
1319 out_pos += !is_member(&member, b3) as usize;
1320 *outbuf.get_unchecked_mut(out_pos) = b4;
1321 out_pos += !is_member(&member, b4) as usize;
1322 *outbuf.get_unchecked_mut(out_pos) = b5;
1323 out_pos += !is_member(&member, b5) as usize;
1324 *outbuf.get_unchecked_mut(out_pos) = b6;
1325 out_pos += !is_member(&member, b6) as usize;
1326 *outbuf.get_unchecked_mut(out_pos) = b7;
1327 out_pos += !is_member(&member, b7) as usize;
1328 }
1329 i += 8;
1330 }
1331
1332 while i < len {
1333 unsafe {
1334 let b = *chunk.get_unchecked(i);
1335 *outbuf.get_unchecked_mut(out_pos) = b;
1336 out_pos += !is_member(&member, b) as usize;
1337 }
1338 i += 1;
1339 }
1340
1341 writer.write_all(&outbuf[..out_pos])?;
1342 }
1343 Ok(())
1344}
1345
1346fn delete_multi_memchr_mmap<const N: usize>(
1349 chars: &[u8],
1350 data: &[u8],
1351 writer: &mut impl Write,
1352) -> io::Result<()> {
1353 let c0 = chars[0];
1354 let c1 = if N >= 2 { chars[1] } else { 0 };
1355 let c2 = if N >= 3 { chars[2] } else { 0 };
1356
1357 if data.len() >= PAR_THRESHOLD {
1359 let n = rayon::current_num_threads().max(1);
1360 let cs = (data.len() / n).max(BUF_SIZE);
1361 let results: Vec<Vec<u8>> = data
1362 .par_chunks(cs)
1363 .map(|chunk| {
1364 let mut out = Vec::with_capacity(chunk.len());
1365 let mut last = 0;
1366 macro_rules! process_iter {
1367 ($iter:expr) => {
1368 for pos in $iter {
1369 if pos > last {
1370 out.extend_from_slice(&chunk[last..pos]);
1371 }
1372 last = pos + 1;
1373 }
1374 };
1375 }
1376 if N == 2 {
1377 process_iter!(memchr::memchr2_iter(c0, c1, chunk));
1378 } else {
1379 process_iter!(memchr::memchr3_iter(c0, c1, c2, chunk));
1380 }
1381 if last < chunk.len() {
1382 out.extend_from_slice(&chunk[last..]);
1383 }
1384 out
1385 })
1386 .collect();
1387 for r in &results {
1388 if !r.is_empty() {
1389 writer.write_all(r)?;
1390 }
1391 }
1392 return Ok(());
1393 }
1394
1395 let buf_size = data.len().min(BUF_SIZE);
1397 let mut outbuf = vec![0u8; buf_size];
1398
1399 for chunk in data.chunks(buf_size) {
1400 let mut wp = 0;
1401 let mut last = 0;
1402
1403 macro_rules! process_iter {
1404 ($iter:expr) => {
1405 for pos in $iter {
1406 if pos > last {
1407 let run = pos - last;
1408 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1409 wp += run;
1410 }
1411 last = pos + 1;
1412 }
1413 };
1414 }
1415
1416 if N == 2 {
1417 process_iter!(memchr::memchr2_iter(c0, c1, chunk));
1418 } else {
1419 process_iter!(memchr::memchr3_iter(c0, c1, c2, chunk));
1420 }
1421
1422 if last < chunk.len() {
1423 let run = chunk.len() - last;
1424 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1425 wp += run;
1426 }
1427 writer.write_all(&outbuf[..wp])?;
1428 }
1429 Ok(())
1430}
1431
1432fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1435 if data.len() >= PAR_THRESHOLD {
1437 let n = rayon::current_num_threads().max(1);
1438 let cs = (data.len() / n).max(BUF_SIZE);
1439 let results: Vec<Vec<u8>> = data
1440 .par_chunks(cs)
1441 .map(|chunk| {
1442 let mut out = Vec::with_capacity(chunk.len());
1443 let mut last = 0;
1444 for pos in memchr::memchr_iter(ch, chunk) {
1445 if pos > last {
1446 out.extend_from_slice(&chunk[last..pos]);
1447 }
1448 last = pos + 1;
1449 }
1450 if last < chunk.len() {
1451 out.extend_from_slice(&chunk[last..]);
1452 }
1453 out
1454 })
1455 .collect();
1456 for r in &results {
1457 if !r.is_empty() {
1458 writer.write_all(r)?;
1459 }
1460 }
1461 return Ok(());
1462 }
1463
1464 let buf_size = data.len().min(BUF_SIZE);
1466 let mut outbuf = vec![0u8; buf_size];
1467
1468 for chunk in data.chunks(buf_size) {
1469 let mut wp = 0;
1470 let mut last = 0;
1471 for pos in memchr::memchr_iter(ch, chunk) {
1472 if pos > last {
1473 let run = pos - last;
1474 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1475 wp += run;
1476 }
1477 last = pos + 1;
1478 }
1479 if last < chunk.len() {
1480 let run = chunk.len() - last;
1481 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1482 wp += run;
1483 }
1484 writer.write_all(&outbuf[..wp])?;
1485 }
1486 Ok(())
1487}
1488
1489pub fn delete_squeeze_mmap(
1491 delete_chars: &[u8],
1492 squeeze_chars: &[u8],
1493 data: &[u8],
1494 writer: &mut impl Write,
1495) -> io::Result<()> {
1496 let delete_set = build_member_set(delete_chars);
1497 let squeeze_set = build_member_set(squeeze_chars);
1498 let buf_size = data.len().min(BUF_SIZE);
1499 let mut outbuf = vec![0u8; buf_size];
1500 let mut last_squeezed: u16 = 256;
1501
1502 for chunk in data.chunks(buf_size) {
1503 let mut out_pos = 0;
1504 for &b in chunk {
1505 if is_member(&delete_set, b) {
1506 continue;
1507 }
1508 if is_member(&squeeze_set, b) {
1509 if last_squeezed == b as u16 {
1510 continue;
1511 }
1512 last_squeezed = b as u16;
1513 } else {
1514 last_squeezed = 256;
1515 }
1516 unsafe {
1517 *outbuf.get_unchecked_mut(out_pos) = b;
1518 }
1519 out_pos += 1;
1520 }
1521 writer.write_all(&outbuf[..out_pos])?;
1522 }
1523 Ok(())
1524}
1525
1526pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1530 if squeeze_chars.len() == 1 {
1532 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1533 }
1534
1535 if squeeze_chars.len() == 2 {
1537 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1538 }
1539 if squeeze_chars.len() == 3 {
1540 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1541 }
1542
1543 let member = build_member_set(squeeze_chars);
1545 let buf_size = data.len().min(BUF_SIZE);
1546 let mut outbuf = vec![0u8; buf_size];
1547 let mut last_squeezed: u16 = 256;
1548
1549 for chunk in data.chunks(buf_size) {
1550 let len = chunk.len();
1551 let mut wp = 0;
1552 let mut i = 0;
1553
1554 unsafe {
1555 let inp = chunk.as_ptr();
1556 let outp = outbuf.as_mut_ptr();
1557
1558 while i < len {
1559 let b = *inp.add(i);
1560 if is_member(&member, b) {
1561 if last_squeezed != b as u16 {
1562 *outp.add(wp) = b;
1563 wp += 1;
1564 last_squeezed = b as u16;
1565 }
1566 i += 1;
1567 while i < len && *inp.add(i) == b {
1569 i += 1;
1570 }
1571 } else {
1572 last_squeezed = 256;
1573 *outp.add(wp) = b;
1574 wp += 1;
1575 i += 1;
1576 }
1577 }
1578 }
1579 writer.write_all(&outbuf[..wp])?;
1580 }
1581 Ok(())
1582}
1583
1584fn squeeze_multi_mmap<const N: usize>(
1587 chars: &[u8],
1588 data: &[u8],
1589 writer: &mut impl Write,
1590) -> io::Result<()> {
1591 let buf_size = data.len().min(BUF_SIZE);
1592 let mut outbuf = vec![0u8; buf_size];
1593 let mut wp = 0;
1594 let mut last_squeezed: u16 = 256;
1595 let mut cursor = 0;
1596
1597 macro_rules! find_next {
1598 ($data:expr) => {
1599 if N == 2 {
1600 memchr::memchr2(chars[0], chars[1], $data)
1601 } else {
1602 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1603 }
1604 };
1605 }
1606
1607 macro_rules! flush_and_copy {
1608 ($src:expr, $len:expr) => {
1609 if wp + $len > buf_size {
1610 writer.write_all(&outbuf[..wp])?;
1611 wp = 0;
1612 }
1613 if $len > buf_size {
1614 writer.write_all($src)?;
1615 } else {
1616 outbuf[wp..wp + $len].copy_from_slice($src);
1617 wp += $len;
1618 }
1619 };
1620 }
1621
1622 while cursor < data.len() {
1623 match find_next!(&data[cursor..]) {
1624 Some(offset) => {
1625 let pos = cursor + offset;
1626 let b = data[pos];
1627 if pos > cursor {
1629 let span = pos - cursor;
1630 flush_and_copy!(&data[cursor..pos], span);
1631 last_squeezed = 256;
1632 }
1633 if last_squeezed != b as u16 {
1634 if wp >= buf_size {
1635 writer.write_all(&outbuf[..wp])?;
1636 wp = 0;
1637 }
1638 outbuf[wp] = b;
1639 wp += 1;
1640 last_squeezed = b as u16;
1641 }
1642 let mut skip = pos + 1;
1644 while skip < data.len() && data[skip] == b {
1645 skip += 1;
1646 }
1647 cursor = skip;
1648 }
1649 None => {
1650 let remaining = data.len() - cursor;
1651 flush_and_copy!(&data[cursor..], remaining);
1652 break;
1653 }
1654 }
1655 }
1656 if wp > 0 {
1657 writer.write_all(&outbuf[..wp])?;
1658 }
1659 Ok(())
1660}
1661
1662fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1665 if data.is_empty() {
1666 return Ok(());
1667 }
1668
1669 if memchr::memmem::find(data, &[ch, ch]).is_none() {
1671 return writer.write_all(data);
1672 }
1673
1674 if data.len() >= PAR_THRESHOLD {
1678 let mut outbuf = Vec::with_capacity(data.len());
1679 let len = data.len();
1680 let mut cursor = 0;
1681 while cursor < len {
1682 match memchr::memchr(ch, &data[cursor..]) {
1683 Some(offset) => {
1684 let pos = cursor + offset;
1685 if pos > cursor {
1686 outbuf.extend_from_slice(&data[cursor..pos]);
1687 }
1688 outbuf.push(ch);
1689 cursor = pos + 1;
1690 while cursor < len && data[cursor] == ch {
1691 cursor += 1;
1692 }
1693 }
1694 None => {
1695 outbuf.extend_from_slice(&data[cursor..]);
1696 break;
1697 }
1698 }
1699 }
1700 return writer.write_all(&outbuf);
1701 }
1702
1703 let buf_size = data.len().min(BUF_SIZE);
1705 let mut outbuf = vec![0u8; buf_size];
1706 let len = data.len();
1707 let mut wp = 0;
1708 let mut cursor = 0;
1709
1710 while cursor < len {
1711 match memchr::memchr(ch, &data[cursor..]) {
1712 Some(offset) => {
1713 let pos = cursor + offset;
1714 let gap = pos - cursor;
1715 if gap > 0 {
1716 if wp + gap > buf_size {
1717 writer.write_all(&outbuf[..wp])?;
1718 wp = 0;
1719 }
1720 if gap > buf_size {
1721 writer.write_all(&data[cursor..pos])?;
1722 } else {
1723 outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
1724 wp += gap;
1725 }
1726 }
1727 if wp >= buf_size {
1728 writer.write_all(&outbuf[..wp])?;
1729 wp = 0;
1730 }
1731 outbuf[wp] = ch;
1732 wp += 1;
1733 cursor = pos + 1;
1734 while cursor < len && data[cursor] == ch {
1735 cursor += 1;
1736 }
1737 }
1738 None => {
1739 let remaining = len - cursor;
1740 if remaining > 0 {
1741 if wp + remaining > buf_size {
1742 writer.write_all(&outbuf[..wp])?;
1743 wp = 0;
1744 }
1745 if remaining > buf_size {
1746 writer.write_all(&data[cursor..])?;
1747 } else {
1748 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
1749 wp += remaining;
1750 }
1751 }
1752 break;
1753 }
1754 }
1755 }
1756
1757 if wp > 0 {
1758 writer.write_all(&outbuf[..wp])?;
1759 }
1760 Ok(())
1761}