1use std::io::{self, Read, Write};
2
3const BUF_SIZE: usize = 1024 * 1024; const STREAM_BUF: usize = 256 * 1024;
10
11#[inline]
13fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
14 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
15 let last = set2.last().copied();
16 for (i, &from) in set1.iter().enumerate() {
17 table[from as usize] = if i < set2.len() {
18 set2[i]
19 } else {
20 last.unwrap_or(from)
21 };
22 }
23 table
24}
25
26#[inline]
28fn build_member_set(chars: &[u8]) -> [u8; 32] {
29 let mut set = [0u8; 32];
30 for &ch in chars {
31 set[ch as usize >> 3] |= 1 << (ch & 7);
32 }
33 set
34}
35
36#[inline(always)]
37fn is_member(set: &[u8; 32], ch: u8) -> bool {
38 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
39}
40
41#[allow(dead_code)] enum TranslateKind {
48 Identity,
50 RangeDelta { lo: u8, hi: u8, delta: u8 },
52 General,
54}
55
56fn analyze_table(table: &[u8; 256]) -> TranslateKind {
58 let mut first_delta: Option<u8> = None;
59 let mut lo: u8 = 0;
60 let mut hi: u8 = 0;
61 let mut count: u32 = 0;
62
63 for i in 0..256u16 {
64 let actual = table[i as usize];
65 if actual != i as u8 {
66 let delta = actual.wrapping_sub(i as u8);
67 count += 1;
68 match first_delta {
69 None => {
70 first_delta = Some(delta);
71 lo = i as u8;
72 hi = i as u8;
73 }
74 Some(d) if d == delta => {
75 hi = i as u8;
76 }
77 _ => return TranslateKind::General,
78 }
79 }
80 }
81
82 match (count, first_delta) {
83 (0, _) => TranslateKind::Identity,
84 (c, Some(delta)) if c == (hi as u32 - lo as u32 + 1) => {
85 TranslateKind::RangeDelta { lo, hi, delta }
86 }
87 _ => TranslateKind::General,
88 }
89}
90
91#[cfg(target_arch = "x86_64")]
96mod simd_tr {
97 #[target_feature(enable = "avx2")]
102 pub unsafe fn range_delta(data: &[u8], out: &mut [u8], lo: u8, hi: u8, delta: u8) {
103 unsafe {
104 use std::arch::x86_64::*;
105
106 let lo_vec = _mm256_set1_epi8(lo as i8);
107 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
108 let delta_vec = _mm256_set1_epi8(delta as i8);
109
110 let len = data.len();
111 let inp = data.as_ptr();
112 let outp = out.as_mut_ptr();
113 let mut i = 0usize;
114
115 while i + 128 <= len {
117 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
118 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
119 let v2 = _mm256_loadu_si256(inp.add(i + 64) as *const __m256i);
120 let v3 = _mm256_loadu_si256(inp.add(i + 96) as *const __m256i);
121
122 let d0 = _mm256_sub_epi8(v0, lo_vec);
123 let d1 = _mm256_sub_epi8(v1, lo_vec);
124 let d2 = _mm256_sub_epi8(v2, lo_vec);
125 let d3 = _mm256_sub_epi8(v3, lo_vec);
126
127 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
128 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
129 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
130 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
131
132 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
133 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
134 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
135 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
136
137 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
138 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
139 _mm256_storeu_si256(outp.add(i + 64) as *mut __m256i, r2);
140 _mm256_storeu_si256(outp.add(i + 96) as *mut __m256i, r3);
141 i += 128;
142 }
143
144 while i + 32 <= len {
145 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
146 let diff = _mm256_sub_epi8(v, lo_vec);
147 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
148 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
149 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
150 i += 32;
151 }
152
153 while i < len {
154 let b = *inp.add(i);
155 *outp.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
156 b.wrapping_add(delta)
157 } else {
158 b
159 };
160 i += 1;
161 }
162 }
163 }
164
165 #[target_feature(enable = "avx2")]
172 pub unsafe fn general_lookup(data: &[u8], out: &mut [u8], table: &[u8; 256]) {
173 unsafe {
174 use std::arch::x86_64::*;
175
176 let tp = table.as_ptr();
179 let mut luts = [_mm256_setzero_si256(); 16];
180 let mut h = 0;
181 while h < 16 {
182 luts[h] =
183 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
184 h += 1;
185 }
186
187 let lo_mask = _mm256_set1_epi8(0x0F);
188 let len = data.len();
189 let inp = data.as_ptr();
190 let outp = out.as_mut_ptr();
191 let mut i = 0;
192
193 while i + 64 <= len {
195 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
196 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
197
198 let lo0 = _mm256_and_si256(v0, lo_mask);
199 let lo1 = _mm256_and_si256(v1, lo_mask);
200 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
201 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
202
203 let mut r0 = _mm256_setzero_si256();
204 let mut r1 = _mm256_setzero_si256();
205
206 macro_rules! do_nib {
209 ($h:literal) => {
210 let hv = _mm256_set1_epi8($h);
211 let lut = luts[$h as usize];
212 let m0 = _mm256_cmpeq_epi8(hi0, hv);
213 let m1 = _mm256_cmpeq_epi8(hi1, hv);
214 r0 = _mm256_or_si256(
215 r0,
216 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
217 );
218 r1 = _mm256_or_si256(
219 r1,
220 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
221 );
222 };
223 }
224
225 do_nib!(0);
226 do_nib!(1);
227 do_nib!(2);
228 do_nib!(3);
229 do_nib!(4);
230 do_nib!(5);
231 do_nib!(6);
232 do_nib!(7);
233 do_nib!(8);
234 do_nib!(9);
235 do_nib!(10);
236 do_nib!(11);
237 do_nib!(12);
238 do_nib!(13);
239 do_nib!(14);
240 do_nib!(15);
241
242 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
243 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
244 i += 64;
245 }
246
247 while i + 32 <= len {
249 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
250 let lo = _mm256_and_si256(v, lo_mask);
251 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
252
253 let mut result = _mm256_setzero_si256();
254 let mut hh = 0u8;
255 while hh < 16 {
256 let hv = _mm256_set1_epi8(hh as i8);
257 let m = _mm256_cmpeq_epi8(hi, hv);
258 result = _mm256_or_si256(
259 result,
260 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
261 );
262 hh += 1;
263 }
264
265 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
266 i += 32;
267 }
268
269 while i < len {
271 *outp.add(i) = *table.get_unchecked(*inp.add(i) as usize);
272 i += 1;
273 }
274 }
275 }
276
277 #[target_feature(enable = "avx2")]
281 pub unsafe fn general_lookup_inplace(data: &mut [u8], table: &[u8; 256]) {
282 unsafe {
283 use std::arch::x86_64::*;
284
285 let tp = table.as_ptr();
286 let mut luts = [_mm256_setzero_si256(); 16];
287 let mut h = 0;
288 while h < 16 {
289 luts[h] =
290 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
291 h += 1;
292 }
293
294 let lo_mask = _mm256_set1_epi8(0x0F);
295 let len = data.len();
296 let ptr = data.as_mut_ptr();
297 let mut i = 0;
298
299 while i + 64 <= len {
300 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
301 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
302
303 let lo0 = _mm256_and_si256(v0, lo_mask);
304 let lo1 = _mm256_and_si256(v1, lo_mask);
305 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
306 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
307
308 let mut r0 = _mm256_setzero_si256();
309 let mut r1 = _mm256_setzero_si256();
310
311 macro_rules! do_nib {
312 ($h:literal) => {
313 let hv = _mm256_set1_epi8($h);
314 let lut = luts[$h as usize];
315 let m0 = _mm256_cmpeq_epi8(hi0, hv);
316 let m1 = _mm256_cmpeq_epi8(hi1, hv);
317 r0 = _mm256_or_si256(
318 r0,
319 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
320 );
321 r1 = _mm256_or_si256(
322 r1,
323 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
324 );
325 };
326 }
327
328 do_nib!(0);
329 do_nib!(1);
330 do_nib!(2);
331 do_nib!(3);
332 do_nib!(4);
333 do_nib!(5);
334 do_nib!(6);
335 do_nib!(7);
336 do_nib!(8);
337 do_nib!(9);
338 do_nib!(10);
339 do_nib!(11);
340 do_nib!(12);
341 do_nib!(13);
342 do_nib!(14);
343 do_nib!(15);
344
345 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
346 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
347 i += 64;
348 }
349
350 while i + 32 <= len {
351 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
352 let lo = _mm256_and_si256(v, lo_mask);
353 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
354
355 let mut result = _mm256_setzero_si256();
356 let mut hh = 0u8;
357 while hh < 16 {
358 let hv = _mm256_set1_epi8(hh as i8);
359 let m = _mm256_cmpeq_epi8(hi, hv);
360 result = _mm256_or_si256(
361 result,
362 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
363 );
364 hh += 1;
365 }
366
367 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
368 i += 32;
369 }
370
371 while i < len {
372 let b = *ptr.add(i);
373 *ptr.add(i) = *table.get_unchecked(b as usize);
374 i += 1;
375 }
376 }
377 }
378
379 #[target_feature(enable = "avx2")]
383 pub unsafe fn range_delta_inplace(data: &mut [u8], lo: u8, hi: u8, delta: u8) {
384 unsafe {
385 use std::arch::x86_64::*;
386
387 let lo_vec = _mm256_set1_epi8(lo as i8);
388 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
389 let delta_vec = _mm256_set1_epi8(delta as i8);
390
391 let len = data.len();
392 let ptr = data.as_mut_ptr();
393 let mut i = 0usize;
394
395 while i + 128 <= len {
396 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
397 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
398 let v2 = _mm256_loadu_si256(ptr.add(i + 64) as *const __m256i);
399 let v3 = _mm256_loadu_si256(ptr.add(i + 96) as *const __m256i);
400
401 let d0 = _mm256_sub_epi8(v0, lo_vec);
402 let d1 = _mm256_sub_epi8(v1, lo_vec);
403 let d2 = _mm256_sub_epi8(v2, lo_vec);
404 let d3 = _mm256_sub_epi8(v3, lo_vec);
405
406 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
407 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
408 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
409 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
410
411 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
412 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
413 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
414 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
415
416 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
417 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
418 _mm256_storeu_si256(ptr.add(i + 64) as *mut __m256i, r2);
419 _mm256_storeu_si256(ptr.add(i + 96) as *mut __m256i, r3);
420 i += 128;
421 }
422
423 while i + 32 <= len {
424 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
425 let diff = _mm256_sub_epi8(v, lo_vec);
426 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
427 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
428 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
429 i += 32;
430 }
431
432 while i < len {
433 let b = *ptr.add(i);
434 *ptr.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
435 b.wrapping_add(delta)
436 } else {
437 b
438 };
439 i += 1;
440 }
441 }
442 }
443}
444
445#[cfg(target_arch = "x86_64")]
450#[inline(always)]
451fn has_avx2() -> bool {
452 is_x86_feature_detected!("avx2")
453}
454
455#[inline(always)]
457fn translate_chunk(chunk: &[u8], out: &mut [u8], table: &[u8; 256]) {
458 let len = chunk.len();
459 let mut i = 0;
460 while i + 8 <= len {
461 unsafe {
462 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
463 *out.get_unchecked_mut(i + 1) =
464 *table.get_unchecked(*chunk.get_unchecked(i + 1) as usize);
465 *out.get_unchecked_mut(i + 2) =
466 *table.get_unchecked(*chunk.get_unchecked(i + 2) as usize);
467 *out.get_unchecked_mut(i + 3) =
468 *table.get_unchecked(*chunk.get_unchecked(i + 3) as usize);
469 *out.get_unchecked_mut(i + 4) =
470 *table.get_unchecked(*chunk.get_unchecked(i + 4) as usize);
471 *out.get_unchecked_mut(i + 5) =
472 *table.get_unchecked(*chunk.get_unchecked(i + 5) as usize);
473 *out.get_unchecked_mut(i + 6) =
474 *table.get_unchecked(*chunk.get_unchecked(i + 6) as usize);
475 *out.get_unchecked_mut(i + 7) =
476 *table.get_unchecked(*chunk.get_unchecked(i + 7) as usize);
477 }
478 i += 8;
479 }
480 while i < len {
481 unsafe {
482 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
483 }
484 i += 1;
485 }
486}
487
488#[inline(always)]
490fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
491 let len = data.len();
492 let ptr = data.as_mut_ptr();
493 let tab = table.as_ptr();
494
495 unsafe {
496 let mut i = 0;
497 while i + 8 <= len {
498 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
499 *ptr.add(i + 1) = *tab.add(*ptr.add(i + 1) as usize);
500 *ptr.add(i + 2) = *tab.add(*ptr.add(i + 2) as usize);
501 *ptr.add(i + 3) = *tab.add(*ptr.add(i + 3) as usize);
502 *ptr.add(i + 4) = *tab.add(*ptr.add(i + 4) as usize);
503 *ptr.add(i + 5) = *tab.add(*ptr.add(i + 5) as usize);
504 *ptr.add(i + 6) = *tab.add(*ptr.add(i + 6) as usize);
505 *ptr.add(i + 7) = *tab.add(*ptr.add(i + 7) as usize);
506 i += 8;
507 }
508 while i < len {
509 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
510 i += 1;
511 }
512 }
513}
514
515#[inline]
522fn translate_chunk_dispatch(
523 chunk: &[u8],
524 out: &mut [u8],
525 table: &[u8; 256],
526 kind: &TranslateKind,
527 _use_simd: bool,
528) {
529 match kind {
530 TranslateKind::Identity => {
531 out[..chunk.len()].copy_from_slice(chunk);
532 }
533 #[cfg(target_arch = "x86_64")]
534 TranslateKind::RangeDelta { lo, hi, delta } => {
535 if _use_simd {
536 unsafe { simd_tr::range_delta(chunk, out, *lo, *hi, *delta) };
537 return;
538 }
539 translate_chunk(chunk, out, table);
540 }
541 #[cfg(not(target_arch = "x86_64"))]
542 TranslateKind::RangeDelta { .. } => {
543 translate_chunk(chunk, out, table);
544 }
545 #[cfg(target_arch = "x86_64")]
546 TranslateKind::General => {
547 if _use_simd {
548 unsafe { simd_tr::general_lookup(chunk, out, table) };
549 return;
550 }
551 translate_chunk(chunk, out, table);
552 }
553 #[cfg(not(target_arch = "x86_64"))]
554 TranslateKind::General => {
555 translate_chunk(chunk, out, table);
556 }
557 }
558}
559
560#[inline]
563fn translate_inplace_dispatch(
564 data: &mut [u8],
565 table: &[u8; 256],
566 kind: &TranslateKind,
567 _use_simd: bool,
568) {
569 match kind {
570 TranslateKind::Identity => {}
571 #[cfg(target_arch = "x86_64")]
572 TranslateKind::RangeDelta { lo, hi, delta } => {
573 if _use_simd {
574 unsafe { simd_tr::range_delta_inplace(data, *lo, *hi, *delta) };
575 return;
576 }
577 translate_inplace(data, table);
578 }
579 #[cfg(not(target_arch = "x86_64"))]
580 TranslateKind::RangeDelta { .. } => {
581 translate_inplace(data, table);
582 }
583 #[cfg(target_arch = "x86_64")]
584 TranslateKind::General => {
585 if _use_simd {
586 unsafe { simd_tr::general_lookup_inplace(data, table) };
587 return;
588 }
589 translate_inplace(data, table);
590 }
591 #[cfg(not(target_arch = "x86_64"))]
592 TranslateKind::General => {
593 translate_inplace(data, table);
594 }
595 }
596}
597
598pub fn translate(
605 set1: &[u8],
606 set2: &[u8],
607 reader: &mut impl Read,
608 writer: &mut impl Write,
609) -> io::Result<()> {
610 let table = build_translate_table(set1, set2);
611 let kind = analyze_table(&table);
612 #[cfg(target_arch = "x86_64")]
613 let use_simd = has_avx2();
614 #[cfg(not(target_arch = "x86_64"))]
615 let use_simd = false;
616
617 let mut buf = vec![0u8; BUF_SIZE];
619 loop {
620 let n = match reader.read(&mut buf) {
621 Ok(0) => break,
622 Ok(n) => n,
623 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
624 Err(e) => return Err(e),
625 };
626 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
627 writer.write_all(&buf[..n])?;
628 }
629 Ok(())
630}
631
632pub fn translate_squeeze(
633 set1: &[u8],
634 set2: &[u8],
635 reader: &mut impl Read,
636 writer: &mut impl Write,
637) -> io::Result<()> {
638 let table = build_translate_table(set1, set2);
639 let squeeze_set = build_member_set(set2);
640 let kind = analyze_table(&table);
641 #[cfg(target_arch = "x86_64")]
642 let use_simd = has_avx2();
643 #[cfg(not(target_arch = "x86_64"))]
644 let use_simd = false;
645
646 let mut buf = vec![0u8; STREAM_BUF];
649 let mut last_squeezed: u16 = 256;
650
651 loop {
652 let n = match reader.read(&mut buf) {
653 Ok(0) => break,
654 Ok(n) => n,
655 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
656 Err(e) => return Err(e),
657 };
658 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
660 let mut wp = 0;
662 unsafe {
663 let ptr = buf.as_mut_ptr();
664 for i in 0..n {
665 let b = *ptr.add(i);
666 if is_member(&squeeze_set, b) {
667 if last_squeezed == b as u16 {
668 continue;
669 }
670 last_squeezed = b as u16;
671 } else {
672 last_squeezed = 256;
673 }
674 *ptr.add(wp) = b;
675 wp += 1;
676 }
677 }
678 writer.write_all(&buf[..wp])?;
679 }
680 Ok(())
681}
682
683pub fn delete(
684 delete_chars: &[u8],
685 reader: &mut impl Read,
686 writer: &mut impl Write,
687) -> io::Result<()> {
688 if delete_chars.len() == 1 {
690 return delete_single_streaming(delete_chars[0], reader, writer);
691 }
692
693 if delete_chars.len() <= 3 {
695 return delete_multi_streaming(delete_chars, reader, writer);
696 }
697
698 let member = build_member_set(delete_chars);
699 let mut buf = vec![0u8; STREAM_BUF];
701
702 loop {
703 let n = match reader.read(&mut buf) {
704 Ok(0) => break,
705 Ok(n) => n,
706 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
707 Err(e) => return Err(e),
708 };
709 let mut wp = 0;
710 unsafe {
711 let ptr = buf.as_mut_ptr();
712 let mut i = 0;
713 while i + 8 <= n {
715 let b0 = *ptr.add(i);
716 let b1 = *ptr.add(i + 1);
717 let b2 = *ptr.add(i + 2);
718 let b3 = *ptr.add(i + 3);
719 let b4 = *ptr.add(i + 4);
720 let b5 = *ptr.add(i + 5);
721 let b6 = *ptr.add(i + 6);
722 let b7 = *ptr.add(i + 7);
723
724 if !is_member(&member, b0) {
725 *ptr.add(wp) = b0;
726 wp += 1;
727 }
728 if !is_member(&member, b1) {
729 *ptr.add(wp) = b1;
730 wp += 1;
731 }
732 if !is_member(&member, b2) {
733 *ptr.add(wp) = b2;
734 wp += 1;
735 }
736 if !is_member(&member, b3) {
737 *ptr.add(wp) = b3;
738 wp += 1;
739 }
740 if !is_member(&member, b4) {
741 *ptr.add(wp) = b4;
742 wp += 1;
743 }
744 if !is_member(&member, b5) {
745 *ptr.add(wp) = b5;
746 wp += 1;
747 }
748 if !is_member(&member, b6) {
749 *ptr.add(wp) = b6;
750 wp += 1;
751 }
752 if !is_member(&member, b7) {
753 *ptr.add(wp) = b7;
754 wp += 1;
755 }
756 i += 8;
757 }
758 while i < n {
759 let b = *ptr.add(i);
760 if !is_member(&member, b) {
761 *ptr.add(wp) = b;
762 wp += 1;
763 }
764 i += 1;
765 }
766 }
767 writer.write_all(&buf[..wp])?;
768 }
769 Ok(())
770}
771
772fn delete_single_streaming(
775 ch: u8,
776 reader: &mut impl Read,
777 writer: &mut impl Write,
778) -> io::Result<()> {
779 let mut buf = vec![0u8; STREAM_BUF];
780 loop {
781 let n = match reader.read(&mut buf) {
782 Ok(0) => break,
783 Ok(n) => n,
784 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
785 Err(e) => return Err(e),
786 };
787 let mut wp = 0;
788 let mut i = 0;
789 while i < n {
790 match memchr::memchr(ch, &buf[i..n]) {
791 Some(offset) => {
792 if offset > 0 {
793 if wp != i {
794 unsafe {
795 std::ptr::copy(
796 buf.as_ptr().add(i),
797 buf.as_mut_ptr().add(wp),
798 offset,
799 );
800 }
801 }
802 wp += offset;
803 }
804 i += offset + 1; }
806 None => {
807 let run_len = n - i;
808 if run_len > 0 {
809 if wp != i {
810 unsafe {
811 std::ptr::copy(
812 buf.as_ptr().add(i),
813 buf.as_mut_ptr().add(wp),
814 run_len,
815 );
816 }
817 }
818 wp += run_len;
819 }
820 break;
821 }
822 }
823 }
824 writer.write_all(&buf[..wp])?;
825 }
826 Ok(())
827}
828
829fn delete_multi_streaming(
831 chars: &[u8],
832 reader: &mut impl Read,
833 writer: &mut impl Write,
834) -> io::Result<()> {
835 let mut buf = vec![0u8; STREAM_BUF];
836 loop {
837 let n = match reader.read(&mut buf) {
838 Ok(0) => break,
839 Ok(n) => n,
840 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
841 Err(e) => return Err(e),
842 };
843 let mut wp = 0;
844 let mut i = 0;
845 while i < n {
846 let found = if chars.len() == 2 {
847 memchr::memchr2(chars[0], chars[1], &buf[i..n])
848 } else {
849 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
850 };
851 match found {
852 Some(offset) => {
853 if offset > 0 {
854 if wp != i {
855 unsafe {
856 std::ptr::copy(
857 buf.as_ptr().add(i),
858 buf.as_mut_ptr().add(wp),
859 offset,
860 );
861 }
862 }
863 wp += offset;
864 }
865 i += offset + 1;
866 }
867 None => {
868 let run_len = n - i;
869 if run_len > 0 {
870 if wp != i {
871 unsafe {
872 std::ptr::copy(
873 buf.as_ptr().add(i),
874 buf.as_mut_ptr().add(wp),
875 run_len,
876 );
877 }
878 }
879 wp += run_len;
880 }
881 break;
882 }
883 }
884 }
885 writer.write_all(&buf[..wp])?;
886 }
887 Ok(())
888}
889
890pub fn delete_squeeze(
891 delete_chars: &[u8],
892 squeeze_chars: &[u8],
893 reader: &mut impl Read,
894 writer: &mut impl Write,
895) -> io::Result<()> {
896 let delete_set = build_member_set(delete_chars);
897 let squeeze_set = build_member_set(squeeze_chars);
898 let mut buf = vec![0u8; STREAM_BUF];
900 let mut last_squeezed: u16 = 256;
901
902 loop {
903 let n = match reader.read(&mut buf) {
904 Ok(0) => break,
905 Ok(n) => n,
906 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
907 Err(e) => return Err(e),
908 };
909 let mut wp = 0;
910 unsafe {
911 let ptr = buf.as_mut_ptr();
912 for i in 0..n {
913 let b = *ptr.add(i);
914 if is_member(&delete_set, b) {
915 continue;
916 }
917 if is_member(&squeeze_set, b) {
918 if last_squeezed == b as u16 {
919 continue;
920 }
921 last_squeezed = b as u16;
922 } else {
923 last_squeezed = 256;
924 }
925 *ptr.add(wp) = b;
926 wp += 1;
927 }
928 }
929 writer.write_all(&buf[..wp])?;
930 }
931 Ok(())
932}
933
934pub fn squeeze(
935 squeeze_chars: &[u8],
936 reader: &mut impl Read,
937 writer: &mut impl Write,
938) -> io::Result<()> {
939 if squeeze_chars.len() == 1 {
941 return squeeze_single_stream(squeeze_chars[0], reader, writer);
942 }
943
944 let member = build_member_set(squeeze_chars);
945 let mut buf = vec![0u8; STREAM_BUF];
947 let mut last_squeezed: u16 = 256;
948
949 loop {
950 let n = match reader.read(&mut buf) {
951 Ok(0) => break,
952 Ok(n) => n,
953 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
954 Err(e) => return Err(e),
955 };
956 let mut wp = 0;
957 unsafe {
958 let ptr = buf.as_mut_ptr();
959 for i in 0..n {
960 let b = *ptr.add(i);
961 if is_member(&member, b) {
962 if last_squeezed == b as u16 {
963 continue;
964 }
965 last_squeezed = b as u16;
966 } else {
967 last_squeezed = 256;
968 }
969 *ptr.add(wp) = b;
970 wp += 1;
971 }
972 }
973 writer.write_all(&buf[..wp])?;
974 }
975 Ok(())
976}
977
978fn squeeze_single_stream(
982 ch: u8,
983 reader: &mut impl Read,
984 writer: &mut impl Write,
985) -> io::Result<()> {
986 let mut buf = vec![0u8; STREAM_BUF];
987 let mut was_squeeze_char = false;
988
989 loop {
990 let n = match reader.read(&mut buf) {
991 Ok(0) => break,
992 Ok(n) => n,
993 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
994 Err(e) => return Err(e),
995 };
996
997 let mut wp = 0;
1000 let mut i = 0;
1001
1002 while i < n {
1003 if was_squeeze_char && buf[i] == ch {
1005 i += 1;
1006 while i < n && buf[i] == ch {
1007 i += 1;
1008 }
1009 if i >= n {
1011 break;
1012 }
1013 }
1014
1015 match memchr::memchr(ch, &buf[i..n]) {
1017 Some(offset) => {
1018 let run_len = offset;
1019 if run_len > 0 {
1021 if wp != i {
1022 unsafe {
1023 std::ptr::copy(
1024 buf.as_ptr().add(i),
1025 buf.as_mut_ptr().add(wp),
1026 run_len,
1027 );
1028 }
1029 }
1030 wp += run_len;
1031 }
1032 i += run_len;
1033
1034 unsafe {
1036 *buf.as_mut_ptr().add(wp) = ch;
1037 }
1038 wp += 1;
1039 was_squeeze_char = true;
1040 i += 1;
1041 while i < n && buf[i] == ch {
1042 i += 1;
1043 }
1044 }
1045 None => {
1046 let run_len = n - i;
1048 if run_len > 0 {
1049 if wp != i {
1050 unsafe {
1051 std::ptr::copy(
1052 buf.as_ptr().add(i),
1053 buf.as_mut_ptr().add(wp),
1054 run_len,
1055 );
1056 }
1057 }
1058 wp += run_len;
1059 }
1060 was_squeeze_char = false;
1061 break;
1062 }
1063 }
1064 }
1065
1066 writer.write_all(&buf[..wp])?;
1067 }
1068 Ok(())
1069}
1070
1071pub fn translate_mmap(
1081 set1: &[u8],
1082 set2: &[u8],
1083 data: &[u8],
1084 writer: &mut impl Write,
1085) -> io::Result<()> {
1086 let table = build_translate_table(set1, set2);
1087 let kind = analyze_table(&table);
1088 #[cfg(target_arch = "x86_64")]
1089 let use_simd = has_avx2();
1090 #[cfg(not(target_arch = "x86_64"))]
1091 let use_simd = false;
1092
1093 if matches!(kind, TranslateKind::Identity) {
1094 return writer.write_all(data);
1095 }
1096
1097 let mut out = vec![0u8; BUF_SIZE];
1101 for chunk in data.chunks(BUF_SIZE) {
1102 translate_chunk_dispatch(chunk, &mut out[..chunk.len()], &table, &kind, use_simd);
1103 writer.write_all(&out[..chunk.len()])?;
1104 }
1105 Ok(())
1106}
1107
1108pub fn translate_squeeze_mmap(
1112 set1: &[u8],
1113 set2: &[u8],
1114 data: &[u8],
1115 writer: &mut impl Write,
1116) -> io::Result<()> {
1117 let table = build_translate_table(set1, set2);
1118 let squeeze_set = build_member_set(set2);
1119 let kind = analyze_table(&table);
1120 #[cfg(target_arch = "x86_64")]
1121 let use_simd = has_avx2();
1122 #[cfg(not(target_arch = "x86_64"))]
1123 let use_simd = false;
1124
1125 let mut buf = vec![0u8; BUF_SIZE];
1127 let mut last_squeezed: u16 = 256;
1128
1129 for chunk in data.chunks(BUF_SIZE) {
1130 translate_chunk_dispatch(chunk, &mut buf[..chunk.len()], &table, &kind, use_simd);
1132
1133 let mut wp = 0;
1135 unsafe {
1136 let ptr = buf.as_mut_ptr();
1137 for i in 0..chunk.len() {
1138 let b = *ptr.add(i);
1139 if is_member(&squeeze_set, b) {
1140 if last_squeezed == b as u16 {
1141 continue;
1142 }
1143 last_squeezed = b as u16;
1144 } else {
1145 last_squeezed = 256;
1146 }
1147 *ptr.add(wp) = b;
1148 wp += 1;
1149 }
1150 }
1151 writer.write_all(&buf[..wp])?;
1152 }
1153 Ok(())
1154}
1155
1156pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1160 if delete_chars.len() == 1 {
1162 return delete_single_char_mmap(delete_chars[0], data, writer);
1163 }
1164
1165 if delete_chars.len() == 2 {
1167 return delete_multi_memchr_mmap::<2>(delete_chars, data, writer);
1168 }
1169
1170 if delete_chars.len() == 3 {
1172 return delete_multi_memchr_mmap::<3>(delete_chars, data, writer);
1173 }
1174
1175 let member = build_member_set(delete_chars);
1176 let mut outbuf = vec![0u8; BUF_SIZE];
1177
1178 for chunk in data.chunks(BUF_SIZE) {
1179 let mut out_pos = 0;
1180 let len = chunk.len();
1181 let mut i = 0;
1182
1183 while i + 8 <= len {
1186 unsafe {
1187 let b0 = *chunk.get_unchecked(i);
1188 let b1 = *chunk.get_unchecked(i + 1);
1189 let b2 = *chunk.get_unchecked(i + 2);
1190 let b3 = *chunk.get_unchecked(i + 3);
1191 let b4 = *chunk.get_unchecked(i + 4);
1192 let b5 = *chunk.get_unchecked(i + 5);
1193 let b6 = *chunk.get_unchecked(i + 6);
1194 let b7 = *chunk.get_unchecked(i + 7);
1195
1196 *outbuf.get_unchecked_mut(out_pos) = b0;
1197 out_pos += !is_member(&member, b0) as usize;
1198 *outbuf.get_unchecked_mut(out_pos) = b1;
1199 out_pos += !is_member(&member, b1) as usize;
1200 *outbuf.get_unchecked_mut(out_pos) = b2;
1201 out_pos += !is_member(&member, b2) as usize;
1202 *outbuf.get_unchecked_mut(out_pos) = b3;
1203 out_pos += !is_member(&member, b3) as usize;
1204 *outbuf.get_unchecked_mut(out_pos) = b4;
1205 out_pos += !is_member(&member, b4) as usize;
1206 *outbuf.get_unchecked_mut(out_pos) = b5;
1207 out_pos += !is_member(&member, b5) as usize;
1208 *outbuf.get_unchecked_mut(out_pos) = b6;
1209 out_pos += !is_member(&member, b6) as usize;
1210 *outbuf.get_unchecked_mut(out_pos) = b7;
1211 out_pos += !is_member(&member, b7) as usize;
1212 }
1213 i += 8;
1214 }
1215
1216 while i < len {
1217 unsafe {
1218 let b = *chunk.get_unchecked(i);
1219 *outbuf.get_unchecked_mut(out_pos) = b;
1220 out_pos += !is_member(&member, b) as usize;
1221 }
1222 i += 1;
1223 }
1224
1225 writer.write_all(&outbuf[..out_pos])?;
1226 }
1227 Ok(())
1228}
1229
1230fn delete_multi_memchr_mmap<const N: usize>(
1234 chars: &[u8],
1235 data: &[u8],
1236 writer: &mut impl Write,
1237) -> io::Result<()> {
1238 let mut outbuf = vec![0u8; BUF_SIZE];
1239
1240 for chunk in data.chunks(BUF_SIZE) {
1241 let mut wp = 0;
1242 let mut last = 0;
1243
1244 macro_rules! process_iter {
1245 ($iter:expr) => {
1246 for pos in $iter {
1247 if pos > last {
1248 let run = pos - last;
1249 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1250 wp += run;
1251 }
1252 last = pos + 1;
1253 }
1254 };
1255 }
1256
1257 if N == 2 {
1258 process_iter!(memchr::memchr2_iter(chars[0], chars[1], chunk));
1259 } else {
1260 process_iter!(memchr::memchr3_iter(chars[0], chars[1], chars[2], chunk));
1261 }
1262
1263 if last < chunk.len() {
1264 let run = chunk.len() - last;
1265 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1266 wp += run;
1267 }
1268 writer.write_all(&outbuf[..wp])?;
1269 }
1270 Ok(())
1271}
1272
1273fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1277 let mut outbuf = vec![0u8; BUF_SIZE];
1278
1279 for chunk in data.chunks(BUF_SIZE) {
1280 let mut wp = 0;
1281 let mut last = 0;
1282 for pos in memchr::memchr_iter(ch, chunk) {
1283 if pos > last {
1284 let run = pos - last;
1285 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1286 wp += run;
1287 }
1288 last = pos + 1;
1289 }
1290 if last < chunk.len() {
1291 let run = chunk.len() - last;
1292 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1293 wp += run;
1294 }
1295 writer.write_all(&outbuf[..wp])?;
1296 }
1297 Ok(())
1298}
1299
1300pub fn delete_squeeze_mmap(
1302 delete_chars: &[u8],
1303 squeeze_chars: &[u8],
1304 data: &[u8],
1305 writer: &mut impl Write,
1306) -> io::Result<()> {
1307 let delete_set = build_member_set(delete_chars);
1308 let squeeze_set = build_member_set(squeeze_chars);
1309 let mut outbuf = vec![0u8; BUF_SIZE];
1310 let mut last_squeezed: u16 = 256;
1311
1312 for chunk in data.chunks(BUF_SIZE) {
1313 let mut out_pos = 0;
1314 for &b in chunk {
1315 if is_member(&delete_set, b) {
1316 continue;
1317 }
1318 if is_member(&squeeze_set, b) {
1319 if last_squeezed == b as u16 {
1320 continue;
1321 }
1322 last_squeezed = b as u16;
1323 } else {
1324 last_squeezed = 256;
1325 }
1326 unsafe {
1327 *outbuf.get_unchecked_mut(out_pos) = b;
1328 }
1329 out_pos += 1;
1330 }
1331 writer.write_all(&outbuf[..out_pos])?;
1332 }
1333 Ok(())
1334}
1335
1336pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1340 if squeeze_chars.len() == 1 {
1342 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1343 }
1344
1345 if squeeze_chars.len() == 2 {
1347 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1348 }
1349 if squeeze_chars.len() == 3 {
1350 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1351 }
1352
1353 let member = build_member_set(squeeze_chars);
1355 let mut outbuf = vec![0u8; BUF_SIZE];
1356 let mut last_squeezed: u16 = 256;
1357
1358 for chunk in data.chunks(BUF_SIZE) {
1359 let len = chunk.len();
1360 let mut wp = 0;
1361 let mut i = 0;
1362
1363 unsafe {
1364 let inp = chunk.as_ptr();
1365 let outp = outbuf.as_mut_ptr();
1366
1367 while i < len {
1368 let b = *inp.add(i);
1369 if is_member(&member, b) {
1370 if last_squeezed != b as u16 {
1371 *outp.add(wp) = b;
1372 wp += 1;
1373 last_squeezed = b as u16;
1374 }
1375 i += 1;
1376 while i < len && *inp.add(i) == b {
1378 i += 1;
1379 }
1380 } else {
1381 last_squeezed = 256;
1382 *outp.add(wp) = b;
1383 wp += 1;
1384 i += 1;
1385 }
1386 }
1387 }
1388 writer.write_all(&outbuf[..wp])?;
1389 }
1390 Ok(())
1391}
1392
1393fn squeeze_multi_mmap<const N: usize>(
1396 chars: &[u8],
1397 data: &[u8],
1398 writer: &mut impl Write,
1399) -> io::Result<()> {
1400 let mut outbuf = vec![0u8; BUF_SIZE];
1401 let mut wp = 0;
1402 let mut last_squeezed: u16 = 256;
1403 let mut cursor = 0;
1404
1405 macro_rules! find_next {
1406 ($data:expr) => {
1407 if N == 2 {
1408 memchr::memchr2(chars[0], chars[1], $data)
1409 } else {
1410 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1411 }
1412 };
1413 }
1414
1415 macro_rules! flush_and_copy {
1416 ($src:expr, $len:expr) => {
1417 if wp + $len > BUF_SIZE {
1418 writer.write_all(&outbuf[..wp])?;
1419 wp = 0;
1420 }
1421 if $len > BUF_SIZE {
1422 writer.write_all($src)?;
1423 } else {
1424 outbuf[wp..wp + $len].copy_from_slice($src);
1425 wp += $len;
1426 }
1427 };
1428 }
1429
1430 while cursor < data.len() {
1431 match find_next!(&data[cursor..]) {
1432 Some(offset) => {
1433 let pos = cursor + offset;
1434 let b = data[pos];
1435 if pos > cursor {
1437 let span = pos - cursor;
1438 flush_and_copy!(&data[cursor..pos], span);
1439 last_squeezed = 256;
1440 }
1441 if last_squeezed != b as u16 {
1442 if wp >= BUF_SIZE {
1443 writer.write_all(&outbuf[..wp])?;
1444 wp = 0;
1445 }
1446 outbuf[wp] = b;
1447 wp += 1;
1448 last_squeezed = b as u16;
1449 }
1450 let mut skip = pos + 1;
1452 while skip < data.len() && data[skip] == b {
1453 skip += 1;
1454 }
1455 cursor = skip;
1456 }
1457 None => {
1458 let remaining = data.len() - cursor;
1459 flush_and_copy!(&data[cursor..], remaining);
1460 break;
1461 }
1462 }
1463 }
1464 if wp > 0 {
1465 writer.write_all(&outbuf[..wp])?;
1466 }
1467 Ok(())
1468}
1469
1470fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1474 if data.is_empty() {
1475 return Ok(());
1476 }
1477
1478 if memchr::memmem::find(data, &[ch, ch]).is_none() {
1480 return writer.write_all(data);
1481 }
1482
1483 let mut outbuf = vec![0u8; BUF_SIZE];
1484 let len = data.len();
1485 let mut wp = 0;
1486 let mut cursor = 0;
1487
1488 while cursor < len {
1489 match memchr::memchr(ch, &data[cursor..]) {
1491 Some(offset) => {
1492 let pos = cursor + offset;
1493
1494 let gap = pos - cursor;
1496 if gap > 0 {
1497 if wp + gap > BUF_SIZE {
1498 writer.write_all(&outbuf[..wp])?;
1499 wp = 0;
1500 }
1501 if gap > BUF_SIZE {
1502 writer.write_all(&data[cursor..pos])?;
1504 } else {
1505 outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
1506 wp += gap;
1507 }
1508 }
1509
1510 if wp >= BUF_SIZE {
1512 writer.write_all(&outbuf[..wp])?;
1513 wp = 0;
1514 }
1515 outbuf[wp] = ch;
1516 wp += 1;
1517
1518 cursor = pos + 1;
1520 while cursor < len && data[cursor] == ch {
1521 cursor += 1;
1522 }
1523 }
1524 None => {
1525 let remaining = len - cursor;
1527 if remaining > 0 {
1528 if wp + remaining > BUF_SIZE {
1529 writer.write_all(&outbuf[..wp])?;
1530 wp = 0;
1531 }
1532 if remaining > BUF_SIZE {
1533 writer.write_all(&data[cursor..])?;
1534 } else {
1535 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
1536 wp += remaining;
1537 }
1538 }
1539 break;
1540 }
1541 }
1542 }
1543
1544 if wp > 0 {
1545 writer.write_all(&outbuf[..wp])?;
1546 }
1547 Ok(())
1548}