1use std::io::{self, Read, Write};
2
3use rayon::prelude::*;
4
5const BUF_SIZE: usize = 1024 * 1024; const STREAM_BUF: usize = 256 * 1024;
12
13const PARALLEL_TRANSLATE_THRESHOLD: usize = 4 * 1024 * 1024;
17
18#[inline]
20fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
21 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
22 let last = set2.last().copied();
23 for (i, &from) in set1.iter().enumerate() {
24 table[from as usize] = if i < set2.len() {
25 set2[i]
26 } else {
27 last.unwrap_or(from)
28 };
29 }
30 table
31}
32
33#[inline]
35fn build_member_set(chars: &[u8]) -> [u8; 32] {
36 let mut set = [0u8; 32];
37 for &ch in chars {
38 set[ch as usize >> 3] |= 1 << (ch & 7);
39 }
40 set
41}
42
43#[inline(always)]
44fn is_member(set: &[u8; 32], ch: u8) -> bool {
45 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
46}
47
48#[allow(dead_code)] enum TranslateKind {
55 Identity,
57 RangeDelta { lo: u8, hi: u8, delta: u8 },
59 General,
61}
62
63fn analyze_table(table: &[u8; 256]) -> TranslateKind {
65 let mut first_delta: Option<u8> = None;
66 let mut lo: u8 = 0;
67 let mut hi: u8 = 0;
68 let mut count: u32 = 0;
69
70 for i in 0..256u16 {
71 let actual = table[i as usize];
72 if actual != i as u8 {
73 let delta = actual.wrapping_sub(i as u8);
74 count += 1;
75 match first_delta {
76 None => {
77 first_delta = Some(delta);
78 lo = i as u8;
79 hi = i as u8;
80 }
81 Some(d) if d == delta => {
82 hi = i as u8;
83 }
84 _ => return TranslateKind::General,
85 }
86 }
87 }
88
89 match (count, first_delta) {
90 (0, _) => TranslateKind::Identity,
91 (c, Some(delta)) if c == (hi as u32 - lo as u32 + 1) => {
92 TranslateKind::RangeDelta { lo, hi, delta }
93 }
94 _ => TranslateKind::General,
95 }
96}
97
98#[cfg(target_arch = "x86_64")]
103mod simd_tr {
104 #[target_feature(enable = "avx2")]
109 pub unsafe fn range_delta(data: &[u8], out: &mut [u8], lo: u8, hi: u8, delta: u8) {
110 unsafe {
111 use std::arch::x86_64::*;
112
113 let lo_vec = _mm256_set1_epi8(lo as i8);
114 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
115 let delta_vec = _mm256_set1_epi8(delta as i8);
116
117 let len = data.len();
118 let inp = data.as_ptr();
119 let outp = out.as_mut_ptr();
120 let mut i = 0usize;
121
122 while i + 128 <= len {
124 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
125 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
126 let v2 = _mm256_loadu_si256(inp.add(i + 64) as *const __m256i);
127 let v3 = _mm256_loadu_si256(inp.add(i + 96) as *const __m256i);
128
129 let d0 = _mm256_sub_epi8(v0, lo_vec);
130 let d1 = _mm256_sub_epi8(v1, lo_vec);
131 let d2 = _mm256_sub_epi8(v2, lo_vec);
132 let d3 = _mm256_sub_epi8(v3, lo_vec);
133
134 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
135 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
136 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
137 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
138
139 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
140 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
141 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
142 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
143
144 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
145 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
146 _mm256_storeu_si256(outp.add(i + 64) as *mut __m256i, r2);
147 _mm256_storeu_si256(outp.add(i + 96) as *mut __m256i, r3);
148 i += 128;
149 }
150
151 while i + 32 <= len {
152 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
153 let diff = _mm256_sub_epi8(v, lo_vec);
154 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
155 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
156 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
157 i += 32;
158 }
159
160 while i < len {
161 let b = *inp.add(i);
162 *outp.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
163 b.wrapping_add(delta)
164 } else {
165 b
166 };
167 i += 1;
168 }
169 }
170 }
171
172 #[target_feature(enable = "avx2")]
179 pub unsafe fn general_lookup(data: &[u8], out: &mut [u8], table: &[u8; 256]) {
180 unsafe {
181 use std::arch::x86_64::*;
182
183 let tp = table.as_ptr();
186 let mut luts = [_mm256_setzero_si256(); 16];
187 let mut h = 0;
188 while h < 16 {
189 luts[h] =
190 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
191 h += 1;
192 }
193
194 let lo_mask = _mm256_set1_epi8(0x0F);
195 let len = data.len();
196 let inp = data.as_ptr();
197 let outp = out.as_mut_ptr();
198 let mut i = 0;
199
200 while i + 64 <= len {
202 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
203 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
204
205 let lo0 = _mm256_and_si256(v0, lo_mask);
206 let lo1 = _mm256_and_si256(v1, lo_mask);
207 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
208 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
209
210 let mut r0 = _mm256_setzero_si256();
211 let mut r1 = _mm256_setzero_si256();
212
213 macro_rules! do_nib {
216 ($h:literal) => {
217 let hv = _mm256_set1_epi8($h);
218 let lut = luts[$h as usize];
219 let m0 = _mm256_cmpeq_epi8(hi0, hv);
220 let m1 = _mm256_cmpeq_epi8(hi1, hv);
221 r0 = _mm256_or_si256(
222 r0,
223 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
224 );
225 r1 = _mm256_or_si256(
226 r1,
227 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
228 );
229 };
230 }
231
232 do_nib!(0);
233 do_nib!(1);
234 do_nib!(2);
235 do_nib!(3);
236 do_nib!(4);
237 do_nib!(5);
238 do_nib!(6);
239 do_nib!(7);
240 do_nib!(8);
241 do_nib!(9);
242 do_nib!(10);
243 do_nib!(11);
244 do_nib!(12);
245 do_nib!(13);
246 do_nib!(14);
247 do_nib!(15);
248
249 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
250 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
251 i += 64;
252 }
253
254 while i + 32 <= len {
256 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
257 let lo = _mm256_and_si256(v, lo_mask);
258 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
259
260 let mut result = _mm256_setzero_si256();
261 let mut hh = 0u8;
262 while hh < 16 {
263 let hv = _mm256_set1_epi8(hh as i8);
264 let m = _mm256_cmpeq_epi8(hi, hv);
265 result = _mm256_or_si256(
266 result,
267 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
268 );
269 hh += 1;
270 }
271
272 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
273 i += 32;
274 }
275
276 while i < len {
278 *outp.add(i) = *table.get_unchecked(*inp.add(i) as usize);
279 i += 1;
280 }
281 }
282 }
283
284 #[target_feature(enable = "avx2")]
288 pub unsafe fn general_lookup_inplace(data: &mut [u8], table: &[u8; 256]) {
289 unsafe {
290 use std::arch::x86_64::*;
291
292 let tp = table.as_ptr();
293 let mut luts = [_mm256_setzero_si256(); 16];
294 let mut h = 0;
295 while h < 16 {
296 luts[h] =
297 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
298 h += 1;
299 }
300
301 let lo_mask = _mm256_set1_epi8(0x0F);
302 let len = data.len();
303 let ptr = data.as_mut_ptr();
304 let mut i = 0;
305
306 while i + 64 <= len {
307 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
308 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
309
310 let lo0 = _mm256_and_si256(v0, lo_mask);
311 let lo1 = _mm256_and_si256(v1, lo_mask);
312 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
313 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
314
315 let mut r0 = _mm256_setzero_si256();
316 let mut r1 = _mm256_setzero_si256();
317
318 macro_rules! do_nib {
319 ($h:literal) => {
320 let hv = _mm256_set1_epi8($h);
321 let lut = luts[$h as usize];
322 let m0 = _mm256_cmpeq_epi8(hi0, hv);
323 let m1 = _mm256_cmpeq_epi8(hi1, hv);
324 r0 = _mm256_or_si256(
325 r0,
326 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
327 );
328 r1 = _mm256_or_si256(
329 r1,
330 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
331 );
332 };
333 }
334
335 do_nib!(0);
336 do_nib!(1);
337 do_nib!(2);
338 do_nib!(3);
339 do_nib!(4);
340 do_nib!(5);
341 do_nib!(6);
342 do_nib!(7);
343 do_nib!(8);
344 do_nib!(9);
345 do_nib!(10);
346 do_nib!(11);
347 do_nib!(12);
348 do_nib!(13);
349 do_nib!(14);
350 do_nib!(15);
351
352 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
353 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
354 i += 64;
355 }
356
357 while i + 32 <= len {
358 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
359 let lo = _mm256_and_si256(v, lo_mask);
360 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
361
362 let mut result = _mm256_setzero_si256();
363 let mut hh = 0u8;
364 while hh < 16 {
365 let hv = _mm256_set1_epi8(hh as i8);
366 let m = _mm256_cmpeq_epi8(hi, hv);
367 result = _mm256_or_si256(
368 result,
369 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
370 );
371 hh += 1;
372 }
373
374 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
375 i += 32;
376 }
377
378 while i < len {
379 let b = *ptr.add(i);
380 *ptr.add(i) = *table.get_unchecked(b as usize);
381 i += 1;
382 }
383 }
384 }
385
386 #[target_feature(enable = "avx2")]
390 pub unsafe fn range_delta_inplace(data: &mut [u8], lo: u8, hi: u8, delta: u8) {
391 unsafe {
392 use std::arch::x86_64::*;
393
394 let lo_vec = _mm256_set1_epi8(lo as i8);
395 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
396 let delta_vec = _mm256_set1_epi8(delta as i8);
397
398 let len = data.len();
399 let ptr = data.as_mut_ptr();
400 let mut i = 0usize;
401
402 while i + 128 <= len {
403 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
404 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
405 let v2 = _mm256_loadu_si256(ptr.add(i + 64) as *const __m256i);
406 let v3 = _mm256_loadu_si256(ptr.add(i + 96) as *const __m256i);
407
408 let d0 = _mm256_sub_epi8(v0, lo_vec);
409 let d1 = _mm256_sub_epi8(v1, lo_vec);
410 let d2 = _mm256_sub_epi8(v2, lo_vec);
411 let d3 = _mm256_sub_epi8(v3, lo_vec);
412
413 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
414 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
415 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
416 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
417
418 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
419 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
420 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
421 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
422
423 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
424 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
425 _mm256_storeu_si256(ptr.add(i + 64) as *mut __m256i, r2);
426 _mm256_storeu_si256(ptr.add(i + 96) as *mut __m256i, r3);
427 i += 128;
428 }
429
430 while i + 32 <= len {
431 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
432 let diff = _mm256_sub_epi8(v, lo_vec);
433 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
434 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
435 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
436 i += 32;
437 }
438
439 while i < len {
440 let b = *ptr.add(i);
441 *ptr.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
442 b.wrapping_add(delta)
443 } else {
444 b
445 };
446 i += 1;
447 }
448 }
449 }
450}
451
452#[cfg(target_arch = "x86_64")]
457#[inline(always)]
458fn has_avx2() -> bool {
459 is_x86_feature_detected!("avx2")
460}
461
462#[inline(always)]
464fn translate_chunk(chunk: &[u8], out: &mut [u8], table: &[u8; 256]) {
465 let len = chunk.len();
466 let mut i = 0;
467 while i + 8 <= len {
468 unsafe {
469 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
470 *out.get_unchecked_mut(i + 1) =
471 *table.get_unchecked(*chunk.get_unchecked(i + 1) as usize);
472 *out.get_unchecked_mut(i + 2) =
473 *table.get_unchecked(*chunk.get_unchecked(i + 2) as usize);
474 *out.get_unchecked_mut(i + 3) =
475 *table.get_unchecked(*chunk.get_unchecked(i + 3) as usize);
476 *out.get_unchecked_mut(i + 4) =
477 *table.get_unchecked(*chunk.get_unchecked(i + 4) as usize);
478 *out.get_unchecked_mut(i + 5) =
479 *table.get_unchecked(*chunk.get_unchecked(i + 5) as usize);
480 *out.get_unchecked_mut(i + 6) =
481 *table.get_unchecked(*chunk.get_unchecked(i + 6) as usize);
482 *out.get_unchecked_mut(i + 7) =
483 *table.get_unchecked(*chunk.get_unchecked(i + 7) as usize);
484 }
485 i += 8;
486 }
487 while i < len {
488 unsafe {
489 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
490 }
491 i += 1;
492 }
493}
494
495#[inline(always)]
497fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
498 let len = data.len();
499 let ptr = data.as_mut_ptr();
500 let tab = table.as_ptr();
501
502 unsafe {
503 let mut i = 0;
504 while i + 8 <= len {
505 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
506 *ptr.add(i + 1) = *tab.add(*ptr.add(i + 1) as usize);
507 *ptr.add(i + 2) = *tab.add(*ptr.add(i + 2) as usize);
508 *ptr.add(i + 3) = *tab.add(*ptr.add(i + 3) as usize);
509 *ptr.add(i + 4) = *tab.add(*ptr.add(i + 4) as usize);
510 *ptr.add(i + 5) = *tab.add(*ptr.add(i + 5) as usize);
511 *ptr.add(i + 6) = *tab.add(*ptr.add(i + 6) as usize);
512 *ptr.add(i + 7) = *tab.add(*ptr.add(i + 7) as usize);
513 i += 8;
514 }
515 while i < len {
516 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
517 i += 1;
518 }
519 }
520}
521
522#[inline]
529fn translate_chunk_dispatch(
530 chunk: &[u8],
531 out: &mut [u8],
532 table: &[u8; 256],
533 kind: &TranslateKind,
534 _use_simd: bool,
535) {
536 match kind {
537 TranslateKind::Identity => {
538 out[..chunk.len()].copy_from_slice(chunk);
539 }
540 #[cfg(target_arch = "x86_64")]
541 TranslateKind::RangeDelta { lo, hi, delta } => {
542 if _use_simd {
543 unsafe { simd_tr::range_delta(chunk, out, *lo, *hi, *delta) };
544 return;
545 }
546 translate_chunk(chunk, out, table);
547 }
548 #[cfg(not(target_arch = "x86_64"))]
549 TranslateKind::RangeDelta { .. } => {
550 translate_chunk(chunk, out, table);
551 }
552 #[cfg(target_arch = "x86_64")]
553 TranslateKind::General => {
554 if _use_simd {
555 unsafe { simd_tr::general_lookup(chunk, out, table) };
556 return;
557 }
558 translate_chunk(chunk, out, table);
559 }
560 #[cfg(not(target_arch = "x86_64"))]
561 TranslateKind::General => {
562 translate_chunk(chunk, out, table);
563 }
564 }
565}
566
567#[inline]
570fn translate_inplace_dispatch(
571 data: &mut [u8],
572 table: &[u8; 256],
573 kind: &TranslateKind,
574 _use_simd: bool,
575) {
576 match kind {
577 TranslateKind::Identity => {}
578 #[cfg(target_arch = "x86_64")]
579 TranslateKind::RangeDelta { lo, hi, delta } => {
580 if _use_simd {
581 unsafe { simd_tr::range_delta_inplace(data, *lo, *hi, *delta) };
582 return;
583 }
584 translate_inplace(data, table);
585 }
586 #[cfg(not(target_arch = "x86_64"))]
587 TranslateKind::RangeDelta { .. } => {
588 translate_inplace(data, table);
589 }
590 #[cfg(target_arch = "x86_64")]
591 TranslateKind::General => {
592 if _use_simd {
593 unsafe { simd_tr::general_lookup_inplace(data, table) };
594 return;
595 }
596 translate_inplace(data, table);
597 }
598 #[cfg(not(target_arch = "x86_64"))]
599 TranslateKind::General => {
600 translate_inplace(data, table);
601 }
602 }
603}
604
605pub fn translate(
612 set1: &[u8],
613 set2: &[u8],
614 reader: &mut impl Read,
615 writer: &mut impl Write,
616) -> io::Result<()> {
617 let table = build_translate_table(set1, set2);
618 let kind = analyze_table(&table);
619 #[cfg(target_arch = "x86_64")]
620 let use_simd = has_avx2();
621 #[cfg(not(target_arch = "x86_64"))]
622 let use_simd = false;
623
624 let mut buf = vec![0u8; BUF_SIZE];
626 loop {
627 let n = match reader.read(&mut buf) {
628 Ok(0) => break,
629 Ok(n) => n,
630 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
631 Err(e) => return Err(e),
632 };
633 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
634 writer.write_all(&buf[..n])?;
635 }
636 Ok(())
637}
638
639pub fn translate_squeeze(
640 set1: &[u8],
641 set2: &[u8],
642 reader: &mut impl Read,
643 writer: &mut impl Write,
644) -> io::Result<()> {
645 let table = build_translate_table(set1, set2);
646 let squeeze_set = build_member_set(set2);
647 let kind = analyze_table(&table);
648 #[cfg(target_arch = "x86_64")]
649 let use_simd = has_avx2();
650 #[cfg(not(target_arch = "x86_64"))]
651 let use_simd = false;
652
653 let mut buf = vec![0u8; STREAM_BUF];
656 let mut last_squeezed: u16 = 256;
657
658 loop {
659 let n = match reader.read(&mut buf) {
660 Ok(0) => break,
661 Ok(n) => n,
662 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
663 Err(e) => return Err(e),
664 };
665 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
667 let mut wp = 0;
669 unsafe {
670 let ptr = buf.as_mut_ptr();
671 for i in 0..n {
672 let b = *ptr.add(i);
673 if is_member(&squeeze_set, b) {
674 if last_squeezed == b as u16 {
675 continue;
676 }
677 last_squeezed = b as u16;
678 } else {
679 last_squeezed = 256;
680 }
681 *ptr.add(wp) = b;
682 wp += 1;
683 }
684 }
685 writer.write_all(&buf[..wp])?;
686 }
687 Ok(())
688}
689
690pub fn delete(
691 delete_chars: &[u8],
692 reader: &mut impl Read,
693 writer: &mut impl Write,
694) -> io::Result<()> {
695 if delete_chars.len() == 1 {
697 return delete_single_streaming(delete_chars[0], reader, writer);
698 }
699
700 if delete_chars.len() <= 3 {
702 return delete_multi_streaming(delete_chars, reader, writer);
703 }
704
705 let member = build_member_set(delete_chars);
706 let mut buf = vec![0u8; STREAM_BUF];
708
709 loop {
710 let n = match reader.read(&mut buf) {
711 Ok(0) => break,
712 Ok(n) => n,
713 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
714 Err(e) => return Err(e),
715 };
716 let mut wp = 0;
717 unsafe {
718 let ptr = buf.as_mut_ptr();
719 let mut i = 0;
720 while i + 8 <= n {
722 let b0 = *ptr.add(i);
723 let b1 = *ptr.add(i + 1);
724 let b2 = *ptr.add(i + 2);
725 let b3 = *ptr.add(i + 3);
726 let b4 = *ptr.add(i + 4);
727 let b5 = *ptr.add(i + 5);
728 let b6 = *ptr.add(i + 6);
729 let b7 = *ptr.add(i + 7);
730
731 if !is_member(&member, b0) {
732 *ptr.add(wp) = b0;
733 wp += 1;
734 }
735 if !is_member(&member, b1) {
736 *ptr.add(wp) = b1;
737 wp += 1;
738 }
739 if !is_member(&member, b2) {
740 *ptr.add(wp) = b2;
741 wp += 1;
742 }
743 if !is_member(&member, b3) {
744 *ptr.add(wp) = b3;
745 wp += 1;
746 }
747 if !is_member(&member, b4) {
748 *ptr.add(wp) = b4;
749 wp += 1;
750 }
751 if !is_member(&member, b5) {
752 *ptr.add(wp) = b5;
753 wp += 1;
754 }
755 if !is_member(&member, b6) {
756 *ptr.add(wp) = b6;
757 wp += 1;
758 }
759 if !is_member(&member, b7) {
760 *ptr.add(wp) = b7;
761 wp += 1;
762 }
763 i += 8;
764 }
765 while i < n {
766 let b = *ptr.add(i);
767 if !is_member(&member, b) {
768 *ptr.add(wp) = b;
769 wp += 1;
770 }
771 i += 1;
772 }
773 }
774 writer.write_all(&buf[..wp])?;
775 }
776 Ok(())
777}
778
779fn delete_single_streaming(
782 ch: u8,
783 reader: &mut impl Read,
784 writer: &mut impl Write,
785) -> io::Result<()> {
786 let mut buf = vec![0u8; STREAM_BUF];
787 loop {
788 let n = match reader.read(&mut buf) {
789 Ok(0) => break,
790 Ok(n) => n,
791 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
792 Err(e) => return Err(e),
793 };
794 let mut wp = 0;
795 let mut i = 0;
796 while i < n {
797 match memchr::memchr(ch, &buf[i..n]) {
798 Some(offset) => {
799 if offset > 0 {
800 if wp != i {
801 unsafe {
802 std::ptr::copy(
803 buf.as_ptr().add(i),
804 buf.as_mut_ptr().add(wp),
805 offset,
806 );
807 }
808 }
809 wp += offset;
810 }
811 i += offset + 1; }
813 None => {
814 let run_len = n - i;
815 if run_len > 0 {
816 if wp != i {
817 unsafe {
818 std::ptr::copy(
819 buf.as_ptr().add(i),
820 buf.as_mut_ptr().add(wp),
821 run_len,
822 );
823 }
824 }
825 wp += run_len;
826 }
827 break;
828 }
829 }
830 }
831 writer.write_all(&buf[..wp])?;
832 }
833 Ok(())
834}
835
836fn delete_multi_streaming(
838 chars: &[u8],
839 reader: &mut impl Read,
840 writer: &mut impl Write,
841) -> io::Result<()> {
842 let mut buf = vec![0u8; STREAM_BUF];
843 loop {
844 let n = match reader.read(&mut buf) {
845 Ok(0) => break,
846 Ok(n) => n,
847 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
848 Err(e) => return Err(e),
849 };
850 let mut wp = 0;
851 let mut i = 0;
852 while i < n {
853 let found = if chars.len() == 2 {
854 memchr::memchr2(chars[0], chars[1], &buf[i..n])
855 } else {
856 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
857 };
858 match found {
859 Some(offset) => {
860 if offset > 0 {
861 if wp != i {
862 unsafe {
863 std::ptr::copy(
864 buf.as_ptr().add(i),
865 buf.as_mut_ptr().add(wp),
866 offset,
867 );
868 }
869 }
870 wp += offset;
871 }
872 i += offset + 1;
873 }
874 None => {
875 let run_len = n - i;
876 if run_len > 0 {
877 if wp != i {
878 unsafe {
879 std::ptr::copy(
880 buf.as_ptr().add(i),
881 buf.as_mut_ptr().add(wp),
882 run_len,
883 );
884 }
885 }
886 wp += run_len;
887 }
888 break;
889 }
890 }
891 }
892 writer.write_all(&buf[..wp])?;
893 }
894 Ok(())
895}
896
897pub fn delete_squeeze(
898 delete_chars: &[u8],
899 squeeze_chars: &[u8],
900 reader: &mut impl Read,
901 writer: &mut impl Write,
902) -> io::Result<()> {
903 let delete_set = build_member_set(delete_chars);
904 let squeeze_set = build_member_set(squeeze_chars);
905 let mut buf = vec![0u8; STREAM_BUF];
907 let mut last_squeezed: u16 = 256;
908
909 loop {
910 let n = match reader.read(&mut buf) {
911 Ok(0) => break,
912 Ok(n) => n,
913 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
914 Err(e) => return Err(e),
915 };
916 let mut wp = 0;
917 unsafe {
918 let ptr = buf.as_mut_ptr();
919 for i in 0..n {
920 let b = *ptr.add(i);
921 if is_member(&delete_set, b) {
922 continue;
923 }
924 if is_member(&squeeze_set, b) {
925 if last_squeezed == b as u16 {
926 continue;
927 }
928 last_squeezed = b as u16;
929 } else {
930 last_squeezed = 256;
931 }
932 *ptr.add(wp) = b;
933 wp += 1;
934 }
935 }
936 writer.write_all(&buf[..wp])?;
937 }
938 Ok(())
939}
940
941pub fn squeeze(
942 squeeze_chars: &[u8],
943 reader: &mut impl Read,
944 writer: &mut impl Write,
945) -> io::Result<()> {
946 if squeeze_chars.len() == 1 {
948 return squeeze_single_stream(squeeze_chars[0], reader, writer);
949 }
950
951 let member = build_member_set(squeeze_chars);
952 let mut buf = vec![0u8; STREAM_BUF];
954 let mut last_squeezed: u16 = 256;
955
956 loop {
957 let n = match reader.read(&mut buf) {
958 Ok(0) => break,
959 Ok(n) => n,
960 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
961 Err(e) => return Err(e),
962 };
963 let mut wp = 0;
964 unsafe {
965 let ptr = buf.as_mut_ptr();
966 for i in 0..n {
967 let b = *ptr.add(i);
968 if is_member(&member, b) {
969 if last_squeezed == b as u16 {
970 continue;
971 }
972 last_squeezed = b as u16;
973 } else {
974 last_squeezed = 256;
975 }
976 *ptr.add(wp) = b;
977 wp += 1;
978 }
979 }
980 writer.write_all(&buf[..wp])?;
981 }
982 Ok(())
983}
984
985fn squeeze_single_stream(
989 ch: u8,
990 reader: &mut impl Read,
991 writer: &mut impl Write,
992) -> io::Result<()> {
993 let mut buf = vec![0u8; STREAM_BUF];
994 let mut was_squeeze_char = false;
995
996 loop {
997 let n = match reader.read(&mut buf) {
998 Ok(0) => break,
999 Ok(n) => n,
1000 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1001 Err(e) => return Err(e),
1002 };
1003
1004 let mut wp = 0;
1007 let mut i = 0;
1008
1009 while i < n {
1010 if was_squeeze_char && buf[i] == ch {
1012 i += 1;
1013 while i < n && buf[i] == ch {
1014 i += 1;
1015 }
1016 if i >= n {
1018 break;
1019 }
1020 }
1021
1022 match memchr::memchr(ch, &buf[i..n]) {
1024 Some(offset) => {
1025 let run_len = offset;
1026 if run_len > 0 {
1028 if wp != i {
1029 unsafe {
1030 std::ptr::copy(
1031 buf.as_ptr().add(i),
1032 buf.as_mut_ptr().add(wp),
1033 run_len,
1034 );
1035 }
1036 }
1037 wp += run_len;
1038 }
1039 i += run_len;
1040
1041 unsafe {
1043 *buf.as_mut_ptr().add(wp) = ch;
1044 }
1045 wp += 1;
1046 was_squeeze_char = true;
1047 i += 1;
1048 while i < n && buf[i] == ch {
1049 i += 1;
1050 }
1051 }
1052 None => {
1053 let run_len = n - i;
1055 if run_len > 0 {
1056 if wp != i {
1057 unsafe {
1058 std::ptr::copy(
1059 buf.as_ptr().add(i),
1060 buf.as_mut_ptr().add(wp),
1061 run_len,
1062 );
1063 }
1064 }
1065 wp += run_len;
1066 }
1067 was_squeeze_char = false;
1068 break;
1069 }
1070 }
1071 }
1072
1073 writer.write_all(&buf[..wp])?;
1074 }
1075 Ok(())
1076}
1077
1078pub fn translate_mmap(
1086 set1: &[u8],
1087 set2: &[u8],
1088 data: &[u8],
1089 writer: &mut impl Write,
1090) -> io::Result<()> {
1091 let table = build_translate_table(set1, set2);
1092 let kind = analyze_table(&table);
1093 #[cfg(target_arch = "x86_64")]
1094 let use_simd = has_avx2();
1095 #[cfg(not(target_arch = "x86_64"))]
1096 let use_simd = false;
1097
1098 if matches!(kind, TranslateKind::Identity) {
1099 return writer.write_all(data);
1100 }
1101
1102 if data.len() >= PARALLEL_TRANSLATE_THRESHOLD {
1104 let mut out = vec![0u8; data.len()];
1105 let num_threads = rayon::current_num_threads().max(1);
1106 let chunk_size = (data.len() + num_threads - 1) / num_threads;
1107 let chunk_size = ((chunk_size + BUF_SIZE - 1) / BUF_SIZE) * BUF_SIZE;
1109
1110 data.par_chunks(chunk_size)
1112 .zip(out.par_chunks_mut(chunk_size))
1113 .for_each(|(inp, outp)| {
1114 translate_chunk_dispatch(inp, &mut outp[..inp.len()], &table, &kind, use_simd);
1115 });
1116
1117 writer.write_all(&out)?;
1119 return Ok(());
1120 }
1121
1122 if data.len() <= BUF_SIZE {
1124 let mut out = vec![0u8; data.len()];
1125 translate_chunk_dispatch(data, &mut out, &table, &kind, use_simd);
1126 writer.write_all(&out)?;
1127 return Ok(());
1128 }
1129
1130 let mut out = vec![0u8; BUF_SIZE];
1132 for chunk in data.chunks(BUF_SIZE) {
1133 translate_chunk_dispatch(chunk, &mut out[..chunk.len()], &table, &kind, use_simd);
1134 writer.write_all(&out[..chunk.len()])?;
1135 }
1136 Ok(())
1137}
1138
1139pub fn translate_squeeze_mmap(
1143 set1: &[u8],
1144 set2: &[u8],
1145 data: &[u8],
1146 writer: &mut impl Write,
1147) -> io::Result<()> {
1148 let table = build_translate_table(set1, set2);
1149 let squeeze_set = build_member_set(set2);
1150 let kind = analyze_table(&table);
1151 #[cfg(target_arch = "x86_64")]
1152 let use_simd = has_avx2();
1153 #[cfg(not(target_arch = "x86_64"))]
1154 let use_simd = false;
1155
1156 let mut buf = vec![0u8; BUF_SIZE];
1158 let mut last_squeezed: u16 = 256;
1159
1160 for chunk in data.chunks(BUF_SIZE) {
1161 translate_chunk_dispatch(chunk, &mut buf[..chunk.len()], &table, &kind, use_simd);
1163
1164 let mut wp = 0;
1166 unsafe {
1167 let ptr = buf.as_mut_ptr();
1168 for i in 0..chunk.len() {
1169 let b = *ptr.add(i);
1170 if is_member(&squeeze_set, b) {
1171 if last_squeezed == b as u16 {
1172 continue;
1173 }
1174 last_squeezed = b as u16;
1175 } else {
1176 last_squeezed = 256;
1177 }
1178 *ptr.add(wp) = b;
1179 wp += 1;
1180 }
1181 }
1182 writer.write_all(&buf[..wp])?;
1183 }
1184 Ok(())
1185}
1186
1187pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1191 if delete_chars.len() == 1 {
1193 return delete_single_char_mmap(delete_chars[0], data, writer);
1194 }
1195
1196 if delete_chars.len() == 2 {
1198 return delete_multi_memchr_mmap::<2>(delete_chars, data, writer);
1199 }
1200
1201 if delete_chars.len() == 3 {
1203 return delete_multi_memchr_mmap::<3>(delete_chars, data, writer);
1204 }
1205
1206 let member = build_member_set(delete_chars);
1207 let mut outbuf = vec![0u8; BUF_SIZE];
1208
1209 for chunk in data.chunks(BUF_SIZE) {
1210 let mut out_pos = 0;
1211 let len = chunk.len();
1212 let mut i = 0;
1213
1214 while i + 8 <= len {
1216 unsafe {
1217 let b0 = *chunk.get_unchecked(i);
1218 let b1 = *chunk.get_unchecked(i + 1);
1219 let b2 = *chunk.get_unchecked(i + 2);
1220 let b3 = *chunk.get_unchecked(i + 3);
1221 let b4 = *chunk.get_unchecked(i + 4);
1222 let b5 = *chunk.get_unchecked(i + 5);
1223 let b6 = *chunk.get_unchecked(i + 6);
1224 let b7 = *chunk.get_unchecked(i + 7);
1225
1226 if !is_member(&member, b0) {
1227 *outbuf.get_unchecked_mut(out_pos) = b0;
1228 out_pos += 1;
1229 }
1230 if !is_member(&member, b1) {
1231 *outbuf.get_unchecked_mut(out_pos) = b1;
1232 out_pos += 1;
1233 }
1234 if !is_member(&member, b2) {
1235 *outbuf.get_unchecked_mut(out_pos) = b2;
1236 out_pos += 1;
1237 }
1238 if !is_member(&member, b3) {
1239 *outbuf.get_unchecked_mut(out_pos) = b3;
1240 out_pos += 1;
1241 }
1242 if !is_member(&member, b4) {
1243 *outbuf.get_unchecked_mut(out_pos) = b4;
1244 out_pos += 1;
1245 }
1246 if !is_member(&member, b5) {
1247 *outbuf.get_unchecked_mut(out_pos) = b5;
1248 out_pos += 1;
1249 }
1250 if !is_member(&member, b6) {
1251 *outbuf.get_unchecked_mut(out_pos) = b6;
1252 out_pos += 1;
1253 }
1254 if !is_member(&member, b7) {
1255 *outbuf.get_unchecked_mut(out_pos) = b7;
1256 out_pos += 1;
1257 }
1258 }
1259 i += 8;
1260 }
1261
1262 while i < len {
1263 unsafe {
1264 let b = *chunk.get_unchecked(i);
1265 if !is_member(&member, b) {
1266 *outbuf.get_unchecked_mut(out_pos) = b;
1267 out_pos += 1;
1268 }
1269 }
1270 i += 1;
1271 }
1272
1273 writer.write_all(&outbuf[..out_pos])?;
1274 }
1275 Ok(())
1276}
1277
1278fn delete_multi_memchr_mmap<const N: usize>(
1282 chars: &[u8],
1283 data: &[u8],
1284 writer: &mut impl Write,
1285) -> io::Result<()> {
1286 let mut outbuf = vec![0u8; BUF_SIZE];
1287
1288 for chunk in data.chunks(BUF_SIZE) {
1289 let mut wp = 0;
1290 let mut last = 0;
1291
1292 macro_rules! process_iter {
1293 ($iter:expr) => {
1294 for pos in $iter {
1295 if pos > last {
1296 let run = pos - last;
1297 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1298 wp += run;
1299 }
1300 last = pos + 1;
1301 }
1302 };
1303 }
1304
1305 if N == 2 {
1306 process_iter!(memchr::memchr2_iter(chars[0], chars[1], chunk));
1307 } else {
1308 process_iter!(memchr::memchr3_iter(chars[0], chars[1], chars[2], chunk));
1309 }
1310
1311 if last < chunk.len() {
1312 let run = chunk.len() - last;
1313 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1314 wp += run;
1315 }
1316 writer.write_all(&outbuf[..wp])?;
1317 }
1318 Ok(())
1319}
1320
1321fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1325 let mut outbuf = vec![0u8; BUF_SIZE];
1326
1327 for chunk in data.chunks(BUF_SIZE) {
1328 let mut wp = 0;
1329 let mut last = 0;
1330 for pos in memchr::memchr_iter(ch, chunk) {
1331 if pos > last {
1332 let run = pos - last;
1333 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1334 wp += run;
1335 }
1336 last = pos + 1;
1337 }
1338 if last < chunk.len() {
1339 let run = chunk.len() - last;
1340 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1341 wp += run;
1342 }
1343 writer.write_all(&outbuf[..wp])?;
1344 }
1345 Ok(())
1346}
1347
1348pub fn delete_squeeze_mmap(
1350 delete_chars: &[u8],
1351 squeeze_chars: &[u8],
1352 data: &[u8],
1353 writer: &mut impl Write,
1354) -> io::Result<()> {
1355 let delete_set = build_member_set(delete_chars);
1356 let squeeze_set = build_member_set(squeeze_chars);
1357 let mut outbuf = vec![0u8; BUF_SIZE];
1358 let mut last_squeezed: u16 = 256;
1359
1360 for chunk in data.chunks(BUF_SIZE) {
1361 let mut out_pos = 0;
1362 for &b in chunk {
1363 if is_member(&delete_set, b) {
1364 continue;
1365 }
1366 if is_member(&squeeze_set, b) {
1367 if last_squeezed == b as u16 {
1368 continue;
1369 }
1370 last_squeezed = b as u16;
1371 } else {
1372 last_squeezed = 256;
1373 }
1374 unsafe {
1375 *outbuf.get_unchecked_mut(out_pos) = b;
1376 }
1377 out_pos += 1;
1378 }
1379 writer.write_all(&outbuf[..out_pos])?;
1380 }
1381 Ok(())
1382}
1383
1384pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1388 if squeeze_chars.len() == 1 {
1390 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1391 }
1392
1393 if squeeze_chars.len() == 2 {
1395 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1396 }
1397 if squeeze_chars.len() == 3 {
1398 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1399 }
1400
1401 let member = build_member_set(squeeze_chars);
1403 let mut outbuf = vec![0u8; BUF_SIZE];
1404 let mut last_squeezed: u16 = 256;
1405
1406 for chunk in data.chunks(BUF_SIZE) {
1407 let len = chunk.len();
1408 let mut wp = 0;
1409 let mut i = 0;
1410
1411 unsafe {
1412 let inp = chunk.as_ptr();
1413 let outp = outbuf.as_mut_ptr();
1414
1415 while i < len {
1416 let b = *inp.add(i);
1417 if is_member(&member, b) {
1418 if last_squeezed != b as u16 {
1419 *outp.add(wp) = b;
1420 wp += 1;
1421 last_squeezed = b as u16;
1422 }
1423 i += 1;
1424 while i < len && *inp.add(i) == b {
1426 i += 1;
1427 }
1428 } else {
1429 last_squeezed = 256;
1430 *outp.add(wp) = b;
1431 wp += 1;
1432 i += 1;
1433 }
1434 }
1435 }
1436 writer.write_all(&outbuf[..wp])?;
1437 }
1438 Ok(())
1439}
1440
1441fn squeeze_multi_mmap<const N: usize>(
1444 chars: &[u8],
1445 data: &[u8],
1446 writer: &mut impl Write,
1447) -> io::Result<()> {
1448 let mut outbuf = vec![0u8; BUF_SIZE];
1449 let mut wp = 0;
1450 let mut last_squeezed: u16 = 256;
1451 let mut cursor = 0;
1452
1453 macro_rules! find_next {
1454 ($data:expr) => {
1455 if N == 2 {
1456 memchr::memchr2(chars[0], chars[1], $data)
1457 } else {
1458 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1459 }
1460 };
1461 }
1462
1463 macro_rules! flush_and_copy {
1464 ($src:expr, $len:expr) => {
1465 if wp + $len > BUF_SIZE {
1466 writer.write_all(&outbuf[..wp])?;
1467 wp = 0;
1468 }
1469 if $len > BUF_SIZE {
1470 writer.write_all($src)?;
1471 } else {
1472 outbuf[wp..wp + $len].copy_from_slice($src);
1473 wp += $len;
1474 }
1475 };
1476 }
1477
1478 while cursor < data.len() {
1479 match find_next!(&data[cursor..]) {
1480 Some(offset) => {
1481 let pos = cursor + offset;
1482 let b = data[pos];
1483 if pos > cursor {
1485 let span = pos - cursor;
1486 flush_and_copy!(&data[cursor..pos], span);
1487 last_squeezed = 256;
1488 }
1489 if last_squeezed != b as u16 {
1490 if wp >= BUF_SIZE {
1491 writer.write_all(&outbuf[..wp])?;
1492 wp = 0;
1493 }
1494 outbuf[wp] = b;
1495 wp += 1;
1496 last_squeezed = b as u16;
1497 }
1498 let mut skip = pos + 1;
1500 while skip < data.len() && data[skip] == b {
1501 skip += 1;
1502 }
1503 cursor = skip;
1504 }
1505 None => {
1506 let remaining = data.len() - cursor;
1507 flush_and_copy!(&data[cursor..], remaining);
1508 break;
1509 }
1510 }
1511 }
1512 if wp > 0 {
1513 writer.write_all(&outbuf[..wp])?;
1514 }
1515 Ok(())
1516}
1517
1518fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1522 let mut outbuf = vec![0u8; BUF_SIZE];
1523 let mut wp = 0;
1524 let mut cursor = 0;
1525
1526 while cursor < data.len() {
1527 match memchr::memchr(ch, &data[cursor..]) {
1528 Some(offset) => {
1529 let pos = cursor + offset;
1530 let run = offset + 1; if wp + run > BUF_SIZE {
1534 writer.write_all(&outbuf[..wp])?;
1535 wp = 0;
1536 }
1537
1538 outbuf[wp..wp + run].copy_from_slice(&data[cursor..pos + 1]);
1539 wp += run;
1540
1541 let mut skip = pos + 1;
1543 while skip < data.len() && data[skip] == ch {
1544 skip += 1;
1545 }
1546 cursor = skip;
1547 }
1548 None => {
1549 let remaining = data.len() - cursor;
1550 if wp + remaining > BUF_SIZE {
1551 writer.write_all(&outbuf[..wp])?;
1552 wp = 0;
1553 }
1554 if remaining > BUF_SIZE {
1555 writer.write_all(&data[cursor..])?;
1557 } else {
1558 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
1559 wp += remaining;
1560 }
1561 break;
1562 }
1563 }
1564 }
1565 if wp > 0 {
1566 writer.write_all(&outbuf[..wp])?;
1567 }
1568 Ok(())
1569}