1use std::io::{self, Read, Write};
2
3const BUF_SIZE: usize = 1024 * 1024; const STREAM_BUF: usize = 256 * 1024;
10
11#[inline]
13fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
14 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
15 let last = set2.last().copied();
16 for (i, &from) in set1.iter().enumerate() {
17 table[from as usize] = if i < set2.len() {
18 set2[i]
19 } else {
20 last.unwrap_or(from)
21 };
22 }
23 table
24}
25
26#[inline]
28fn build_member_set(chars: &[u8]) -> [u8; 32] {
29 let mut set = [0u8; 32];
30 for &ch in chars {
31 set[ch as usize >> 3] |= 1 << (ch & 7);
32 }
33 set
34}
35
36#[inline(always)]
37fn is_member(set: &[u8; 32], ch: u8) -> bool {
38 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
39}
40
41#[allow(dead_code)] enum TranslateKind {
48 Identity,
50 RangeDelta { lo: u8, hi: u8, delta: u8 },
52 General,
54}
55
56fn analyze_table(table: &[u8; 256]) -> TranslateKind {
58 let mut first_delta: Option<u8> = None;
59 let mut lo: u8 = 0;
60 let mut hi: u8 = 0;
61 let mut count: u32 = 0;
62
63 for i in 0..256u16 {
64 let actual = table[i as usize];
65 if actual != i as u8 {
66 let delta = actual.wrapping_sub(i as u8);
67 count += 1;
68 match first_delta {
69 None => {
70 first_delta = Some(delta);
71 lo = i as u8;
72 hi = i as u8;
73 }
74 Some(d) if d == delta => {
75 hi = i as u8;
76 }
77 _ => return TranslateKind::General,
78 }
79 }
80 }
81
82 match (count, first_delta) {
83 (0, _) => TranslateKind::Identity,
84 (c, Some(delta)) if c == (hi as u32 - lo as u32 + 1) => {
85 TranslateKind::RangeDelta { lo, hi, delta }
86 }
87 _ => TranslateKind::General,
88 }
89}
90
91#[cfg(target_arch = "x86_64")]
96mod simd_tr {
97 #[target_feature(enable = "avx2")]
102 pub unsafe fn range_delta(data: &[u8], out: &mut [u8], lo: u8, hi: u8, delta: u8) {
103 unsafe {
104 use std::arch::x86_64::*;
105
106 let lo_vec = _mm256_set1_epi8(lo as i8);
107 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
108 let delta_vec = _mm256_set1_epi8(delta as i8);
109
110 let len = data.len();
111 let inp = data.as_ptr();
112 let outp = out.as_mut_ptr();
113 let mut i = 0usize;
114
115 while i + 128 <= len {
117 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
118 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
119 let v2 = _mm256_loadu_si256(inp.add(i + 64) as *const __m256i);
120 let v3 = _mm256_loadu_si256(inp.add(i + 96) as *const __m256i);
121
122 let d0 = _mm256_sub_epi8(v0, lo_vec);
123 let d1 = _mm256_sub_epi8(v1, lo_vec);
124 let d2 = _mm256_sub_epi8(v2, lo_vec);
125 let d3 = _mm256_sub_epi8(v3, lo_vec);
126
127 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
128 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
129 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
130 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
131
132 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
133 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
134 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
135 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
136
137 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
138 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
139 _mm256_storeu_si256(outp.add(i + 64) as *mut __m256i, r2);
140 _mm256_storeu_si256(outp.add(i + 96) as *mut __m256i, r3);
141 i += 128;
142 }
143
144 while i + 32 <= len {
145 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
146 let diff = _mm256_sub_epi8(v, lo_vec);
147 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
148 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
149 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
150 i += 32;
151 }
152
153 while i < len {
154 let b = *inp.add(i);
155 *outp.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
156 b.wrapping_add(delta)
157 } else {
158 b
159 };
160 i += 1;
161 }
162 }
163 }
164
165 #[target_feature(enable = "avx2")]
172 pub unsafe fn general_lookup(data: &[u8], out: &mut [u8], table: &[u8; 256]) {
173 unsafe {
174 use std::arch::x86_64::*;
175
176 let tp = table.as_ptr();
179 let mut luts = [_mm256_setzero_si256(); 16];
180 let mut h = 0;
181 while h < 16 {
182 luts[h] =
183 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
184 h += 1;
185 }
186
187 let lo_mask = _mm256_set1_epi8(0x0F);
188 let len = data.len();
189 let inp = data.as_ptr();
190 let outp = out.as_mut_ptr();
191 let mut i = 0;
192
193 while i + 64 <= len {
195 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
196 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
197
198 let lo0 = _mm256_and_si256(v0, lo_mask);
199 let lo1 = _mm256_and_si256(v1, lo_mask);
200 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
201 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
202
203 let mut r0 = _mm256_setzero_si256();
204 let mut r1 = _mm256_setzero_si256();
205
206 macro_rules! do_nib {
209 ($h:literal) => {
210 let hv = _mm256_set1_epi8($h);
211 let lut = luts[$h as usize];
212 let m0 = _mm256_cmpeq_epi8(hi0, hv);
213 let m1 = _mm256_cmpeq_epi8(hi1, hv);
214 r0 = _mm256_or_si256(
215 r0,
216 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
217 );
218 r1 = _mm256_or_si256(
219 r1,
220 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
221 );
222 };
223 }
224
225 do_nib!(0);
226 do_nib!(1);
227 do_nib!(2);
228 do_nib!(3);
229 do_nib!(4);
230 do_nib!(5);
231 do_nib!(6);
232 do_nib!(7);
233 do_nib!(8);
234 do_nib!(9);
235 do_nib!(10);
236 do_nib!(11);
237 do_nib!(12);
238 do_nib!(13);
239 do_nib!(14);
240 do_nib!(15);
241
242 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
243 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
244 i += 64;
245 }
246
247 while i + 32 <= len {
249 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
250 let lo = _mm256_and_si256(v, lo_mask);
251 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
252
253 let mut result = _mm256_setzero_si256();
254 let mut hh = 0u8;
255 while hh < 16 {
256 let hv = _mm256_set1_epi8(hh as i8);
257 let m = _mm256_cmpeq_epi8(hi, hv);
258 result = _mm256_or_si256(
259 result,
260 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
261 );
262 hh += 1;
263 }
264
265 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
266 i += 32;
267 }
268
269 while i < len {
271 *outp.add(i) = *table.get_unchecked(*inp.add(i) as usize);
272 i += 1;
273 }
274 }
275 }
276
277 #[target_feature(enable = "avx2")]
281 pub unsafe fn general_lookup_inplace(data: &mut [u8], table: &[u8; 256]) {
282 unsafe {
283 use std::arch::x86_64::*;
284
285 let tp = table.as_ptr();
286 let mut luts = [_mm256_setzero_si256(); 16];
287 let mut h = 0;
288 while h < 16 {
289 luts[h] =
290 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
291 h += 1;
292 }
293
294 let lo_mask = _mm256_set1_epi8(0x0F);
295 let len = data.len();
296 let ptr = data.as_mut_ptr();
297 let mut i = 0;
298
299 while i + 64 <= len {
300 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
301 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
302
303 let lo0 = _mm256_and_si256(v0, lo_mask);
304 let lo1 = _mm256_and_si256(v1, lo_mask);
305 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
306 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
307
308 let mut r0 = _mm256_setzero_si256();
309 let mut r1 = _mm256_setzero_si256();
310
311 macro_rules! do_nib {
312 ($h:literal) => {
313 let hv = _mm256_set1_epi8($h);
314 let lut = luts[$h as usize];
315 let m0 = _mm256_cmpeq_epi8(hi0, hv);
316 let m1 = _mm256_cmpeq_epi8(hi1, hv);
317 r0 = _mm256_or_si256(
318 r0,
319 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
320 );
321 r1 = _mm256_or_si256(
322 r1,
323 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
324 );
325 };
326 }
327
328 do_nib!(0);
329 do_nib!(1);
330 do_nib!(2);
331 do_nib!(3);
332 do_nib!(4);
333 do_nib!(5);
334 do_nib!(6);
335 do_nib!(7);
336 do_nib!(8);
337 do_nib!(9);
338 do_nib!(10);
339 do_nib!(11);
340 do_nib!(12);
341 do_nib!(13);
342 do_nib!(14);
343 do_nib!(15);
344
345 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
346 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
347 i += 64;
348 }
349
350 while i + 32 <= len {
351 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
352 let lo = _mm256_and_si256(v, lo_mask);
353 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
354
355 let mut result = _mm256_setzero_si256();
356 let mut hh = 0u8;
357 while hh < 16 {
358 let hv = _mm256_set1_epi8(hh as i8);
359 let m = _mm256_cmpeq_epi8(hi, hv);
360 result = _mm256_or_si256(
361 result,
362 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
363 );
364 hh += 1;
365 }
366
367 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
368 i += 32;
369 }
370
371 while i < len {
372 let b = *ptr.add(i);
373 *ptr.add(i) = *table.get_unchecked(b as usize);
374 i += 1;
375 }
376 }
377 }
378
379 #[target_feature(enable = "avx2")]
383 pub unsafe fn range_delta_inplace(data: &mut [u8], lo: u8, hi: u8, delta: u8) {
384 unsafe {
385 use std::arch::x86_64::*;
386
387 let lo_vec = _mm256_set1_epi8(lo as i8);
388 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
389 let delta_vec = _mm256_set1_epi8(delta as i8);
390
391 let len = data.len();
392 let ptr = data.as_mut_ptr();
393 let mut i = 0usize;
394
395 while i + 128 <= len {
396 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
397 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
398 let v2 = _mm256_loadu_si256(ptr.add(i + 64) as *const __m256i);
399 let v3 = _mm256_loadu_si256(ptr.add(i + 96) as *const __m256i);
400
401 let d0 = _mm256_sub_epi8(v0, lo_vec);
402 let d1 = _mm256_sub_epi8(v1, lo_vec);
403 let d2 = _mm256_sub_epi8(v2, lo_vec);
404 let d3 = _mm256_sub_epi8(v3, lo_vec);
405
406 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
407 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
408 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
409 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
410
411 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
412 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
413 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
414 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
415
416 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
417 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
418 _mm256_storeu_si256(ptr.add(i + 64) as *mut __m256i, r2);
419 _mm256_storeu_si256(ptr.add(i + 96) as *mut __m256i, r3);
420 i += 128;
421 }
422
423 while i + 32 <= len {
424 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
425 let diff = _mm256_sub_epi8(v, lo_vec);
426 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
427 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
428 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
429 i += 32;
430 }
431
432 while i < len {
433 let b = *ptr.add(i);
434 *ptr.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
435 b.wrapping_add(delta)
436 } else {
437 b
438 };
439 i += 1;
440 }
441 }
442 }
443}
444
445#[cfg(target_arch = "x86_64")]
450#[inline(always)]
451fn has_avx2() -> bool {
452 is_x86_feature_detected!("avx2")
453}
454
455#[inline(always)]
457fn translate_chunk(chunk: &[u8], out: &mut [u8], table: &[u8; 256]) {
458 let len = chunk.len();
459 let mut i = 0;
460 while i + 8 <= len {
461 unsafe {
462 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
463 *out.get_unchecked_mut(i + 1) =
464 *table.get_unchecked(*chunk.get_unchecked(i + 1) as usize);
465 *out.get_unchecked_mut(i + 2) =
466 *table.get_unchecked(*chunk.get_unchecked(i + 2) as usize);
467 *out.get_unchecked_mut(i + 3) =
468 *table.get_unchecked(*chunk.get_unchecked(i + 3) as usize);
469 *out.get_unchecked_mut(i + 4) =
470 *table.get_unchecked(*chunk.get_unchecked(i + 4) as usize);
471 *out.get_unchecked_mut(i + 5) =
472 *table.get_unchecked(*chunk.get_unchecked(i + 5) as usize);
473 *out.get_unchecked_mut(i + 6) =
474 *table.get_unchecked(*chunk.get_unchecked(i + 6) as usize);
475 *out.get_unchecked_mut(i + 7) =
476 *table.get_unchecked(*chunk.get_unchecked(i + 7) as usize);
477 }
478 i += 8;
479 }
480 while i < len {
481 unsafe {
482 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
483 }
484 i += 1;
485 }
486}
487
488#[inline(always)]
490fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
491 let len = data.len();
492 let ptr = data.as_mut_ptr();
493 let tab = table.as_ptr();
494
495 unsafe {
496 let mut i = 0;
497 while i + 8 <= len {
498 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
499 *ptr.add(i + 1) = *tab.add(*ptr.add(i + 1) as usize);
500 *ptr.add(i + 2) = *tab.add(*ptr.add(i + 2) as usize);
501 *ptr.add(i + 3) = *tab.add(*ptr.add(i + 3) as usize);
502 *ptr.add(i + 4) = *tab.add(*ptr.add(i + 4) as usize);
503 *ptr.add(i + 5) = *tab.add(*ptr.add(i + 5) as usize);
504 *ptr.add(i + 6) = *tab.add(*ptr.add(i + 6) as usize);
505 *ptr.add(i + 7) = *tab.add(*ptr.add(i + 7) as usize);
506 i += 8;
507 }
508 while i < len {
509 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
510 i += 1;
511 }
512 }
513}
514
515#[inline]
522fn translate_chunk_dispatch(
523 chunk: &[u8],
524 out: &mut [u8],
525 table: &[u8; 256],
526 kind: &TranslateKind,
527 _use_simd: bool,
528) {
529 match kind {
530 TranslateKind::Identity => {
531 out[..chunk.len()].copy_from_slice(chunk);
532 }
533 #[cfg(target_arch = "x86_64")]
534 TranslateKind::RangeDelta { lo, hi, delta } => {
535 if _use_simd {
536 unsafe { simd_tr::range_delta(chunk, out, *lo, *hi, *delta) };
537 return;
538 }
539 translate_chunk(chunk, out, table);
540 }
541 #[cfg(not(target_arch = "x86_64"))]
542 TranslateKind::RangeDelta { .. } => {
543 translate_chunk(chunk, out, table);
544 }
545 #[cfg(target_arch = "x86_64")]
546 TranslateKind::General => {
547 if _use_simd {
548 unsafe { simd_tr::general_lookup(chunk, out, table) };
549 return;
550 }
551 translate_chunk(chunk, out, table);
552 }
553 #[cfg(not(target_arch = "x86_64"))]
554 TranslateKind::General => {
555 translate_chunk(chunk, out, table);
556 }
557 }
558}
559
560#[inline]
563fn translate_inplace_dispatch(
564 data: &mut [u8],
565 table: &[u8; 256],
566 kind: &TranslateKind,
567 _use_simd: bool,
568) {
569 match kind {
570 TranslateKind::Identity => {}
571 #[cfg(target_arch = "x86_64")]
572 TranslateKind::RangeDelta { lo, hi, delta } => {
573 if _use_simd {
574 unsafe { simd_tr::range_delta_inplace(data, *lo, *hi, *delta) };
575 return;
576 }
577 translate_inplace(data, table);
578 }
579 #[cfg(not(target_arch = "x86_64"))]
580 TranslateKind::RangeDelta { .. } => {
581 translate_inplace(data, table);
582 }
583 #[cfg(target_arch = "x86_64")]
584 TranslateKind::General => {
585 if _use_simd {
586 unsafe { simd_tr::general_lookup_inplace(data, table) };
587 return;
588 }
589 translate_inplace(data, table);
590 }
591 #[cfg(not(target_arch = "x86_64"))]
592 TranslateKind::General => {
593 translate_inplace(data, table);
594 }
595 }
596}
597
598pub fn translate(
605 set1: &[u8],
606 set2: &[u8],
607 reader: &mut impl Read,
608 writer: &mut impl Write,
609) -> io::Result<()> {
610 let table = build_translate_table(set1, set2);
611 let kind = analyze_table(&table);
612 #[cfg(target_arch = "x86_64")]
613 let use_simd = has_avx2();
614 #[cfg(not(target_arch = "x86_64"))]
615 let use_simd = false;
616
617 let mut buf = vec![0u8; BUF_SIZE];
619 loop {
620 let n = match reader.read(&mut buf) {
621 Ok(0) => break,
622 Ok(n) => n,
623 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
624 Err(e) => return Err(e),
625 };
626 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
627 writer.write_all(&buf[..n])?;
628 }
629 Ok(())
630}
631
632pub fn translate_squeeze(
633 set1: &[u8],
634 set2: &[u8],
635 reader: &mut impl Read,
636 writer: &mut impl Write,
637) -> io::Result<()> {
638 let table = build_translate_table(set1, set2);
639 let squeeze_set = build_member_set(set2);
640 let kind = analyze_table(&table);
641 #[cfg(target_arch = "x86_64")]
642 let use_simd = has_avx2();
643 #[cfg(not(target_arch = "x86_64"))]
644 let use_simd = false;
645
646 let mut buf = vec![0u8; STREAM_BUF];
649 let mut last_squeezed: u16 = 256;
650
651 loop {
652 let n = match reader.read(&mut buf) {
653 Ok(0) => break,
654 Ok(n) => n,
655 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
656 Err(e) => return Err(e),
657 };
658 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
660 let mut wp = 0;
662 unsafe {
663 let ptr = buf.as_mut_ptr();
664 for i in 0..n {
665 let b = *ptr.add(i);
666 if is_member(&squeeze_set, b) {
667 if last_squeezed == b as u16 {
668 continue;
669 }
670 last_squeezed = b as u16;
671 } else {
672 last_squeezed = 256;
673 }
674 *ptr.add(wp) = b;
675 wp += 1;
676 }
677 }
678 writer.write_all(&buf[..wp])?;
679 }
680 Ok(())
681}
682
683pub fn delete(
684 delete_chars: &[u8],
685 reader: &mut impl Read,
686 writer: &mut impl Write,
687) -> io::Result<()> {
688 if delete_chars.len() == 1 {
690 return delete_single_streaming(delete_chars[0], reader, writer);
691 }
692
693 if delete_chars.len() <= 3 {
695 return delete_multi_streaming(delete_chars, reader, writer);
696 }
697
698 let member = build_member_set(delete_chars);
699 let mut buf = vec![0u8; STREAM_BUF];
701
702 loop {
703 let n = match reader.read(&mut buf) {
704 Ok(0) => break,
705 Ok(n) => n,
706 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
707 Err(e) => return Err(e),
708 };
709 let mut wp = 0;
710 unsafe {
711 let ptr = buf.as_mut_ptr();
712 let mut i = 0;
713 while i + 8 <= n {
715 let b0 = *ptr.add(i);
716 let b1 = *ptr.add(i + 1);
717 let b2 = *ptr.add(i + 2);
718 let b3 = *ptr.add(i + 3);
719 let b4 = *ptr.add(i + 4);
720 let b5 = *ptr.add(i + 5);
721 let b6 = *ptr.add(i + 6);
722 let b7 = *ptr.add(i + 7);
723
724 if !is_member(&member, b0) {
725 *ptr.add(wp) = b0;
726 wp += 1;
727 }
728 if !is_member(&member, b1) {
729 *ptr.add(wp) = b1;
730 wp += 1;
731 }
732 if !is_member(&member, b2) {
733 *ptr.add(wp) = b2;
734 wp += 1;
735 }
736 if !is_member(&member, b3) {
737 *ptr.add(wp) = b3;
738 wp += 1;
739 }
740 if !is_member(&member, b4) {
741 *ptr.add(wp) = b4;
742 wp += 1;
743 }
744 if !is_member(&member, b5) {
745 *ptr.add(wp) = b5;
746 wp += 1;
747 }
748 if !is_member(&member, b6) {
749 *ptr.add(wp) = b6;
750 wp += 1;
751 }
752 if !is_member(&member, b7) {
753 *ptr.add(wp) = b7;
754 wp += 1;
755 }
756 i += 8;
757 }
758 while i < n {
759 let b = *ptr.add(i);
760 if !is_member(&member, b) {
761 *ptr.add(wp) = b;
762 wp += 1;
763 }
764 i += 1;
765 }
766 }
767 writer.write_all(&buf[..wp])?;
768 }
769 Ok(())
770}
771
772fn delete_single_streaming(
775 ch: u8,
776 reader: &mut impl Read,
777 writer: &mut impl Write,
778) -> io::Result<()> {
779 let mut buf = vec![0u8; STREAM_BUF];
780 loop {
781 let n = match reader.read(&mut buf) {
782 Ok(0) => break,
783 Ok(n) => n,
784 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
785 Err(e) => return Err(e),
786 };
787 let mut wp = 0;
788 let mut i = 0;
789 while i < n {
790 match memchr::memchr(ch, &buf[i..n]) {
791 Some(offset) => {
792 if offset > 0 {
793 if wp != i {
794 unsafe {
795 std::ptr::copy(
796 buf.as_ptr().add(i),
797 buf.as_mut_ptr().add(wp),
798 offset,
799 );
800 }
801 }
802 wp += offset;
803 }
804 i += offset + 1; }
806 None => {
807 let run_len = n - i;
808 if run_len > 0 {
809 if wp != i {
810 unsafe {
811 std::ptr::copy(
812 buf.as_ptr().add(i),
813 buf.as_mut_ptr().add(wp),
814 run_len,
815 );
816 }
817 }
818 wp += run_len;
819 }
820 break;
821 }
822 }
823 }
824 writer.write_all(&buf[..wp])?;
825 }
826 Ok(())
827}
828
829fn delete_multi_streaming(
831 chars: &[u8],
832 reader: &mut impl Read,
833 writer: &mut impl Write,
834) -> io::Result<()> {
835 let mut buf = vec![0u8; STREAM_BUF];
836 loop {
837 let n = match reader.read(&mut buf) {
838 Ok(0) => break,
839 Ok(n) => n,
840 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
841 Err(e) => return Err(e),
842 };
843 let mut wp = 0;
844 let mut i = 0;
845 while i < n {
846 let found = if chars.len() == 2 {
847 memchr::memchr2(chars[0], chars[1], &buf[i..n])
848 } else {
849 memchr::memchr3(chars[0], chars[1], chars[2], &buf[i..n])
850 };
851 match found {
852 Some(offset) => {
853 if offset > 0 {
854 if wp != i {
855 unsafe {
856 std::ptr::copy(
857 buf.as_ptr().add(i),
858 buf.as_mut_ptr().add(wp),
859 offset,
860 );
861 }
862 }
863 wp += offset;
864 }
865 i += offset + 1;
866 }
867 None => {
868 let run_len = n - i;
869 if run_len > 0 {
870 if wp != i {
871 unsafe {
872 std::ptr::copy(
873 buf.as_ptr().add(i),
874 buf.as_mut_ptr().add(wp),
875 run_len,
876 );
877 }
878 }
879 wp += run_len;
880 }
881 break;
882 }
883 }
884 }
885 writer.write_all(&buf[..wp])?;
886 }
887 Ok(())
888}
889
890pub fn delete_squeeze(
891 delete_chars: &[u8],
892 squeeze_chars: &[u8],
893 reader: &mut impl Read,
894 writer: &mut impl Write,
895) -> io::Result<()> {
896 let delete_set = build_member_set(delete_chars);
897 let squeeze_set = build_member_set(squeeze_chars);
898 let mut buf = vec![0u8; STREAM_BUF];
900 let mut last_squeezed: u16 = 256;
901
902 loop {
903 let n = match reader.read(&mut buf) {
904 Ok(0) => break,
905 Ok(n) => n,
906 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
907 Err(e) => return Err(e),
908 };
909 let mut wp = 0;
910 unsafe {
911 let ptr = buf.as_mut_ptr();
912 for i in 0..n {
913 let b = *ptr.add(i);
914 if is_member(&delete_set, b) {
915 continue;
916 }
917 if is_member(&squeeze_set, b) {
918 if last_squeezed == b as u16 {
919 continue;
920 }
921 last_squeezed = b as u16;
922 } else {
923 last_squeezed = 256;
924 }
925 *ptr.add(wp) = b;
926 wp += 1;
927 }
928 }
929 writer.write_all(&buf[..wp])?;
930 }
931 Ok(())
932}
933
934pub fn squeeze(
935 squeeze_chars: &[u8],
936 reader: &mut impl Read,
937 writer: &mut impl Write,
938) -> io::Result<()> {
939 if squeeze_chars.len() == 1 {
941 return squeeze_single_stream(squeeze_chars[0], reader, writer);
942 }
943
944 let member = build_member_set(squeeze_chars);
945 let mut buf = vec![0u8; STREAM_BUF];
947 let mut last_squeezed: u16 = 256;
948
949 loop {
950 let n = match reader.read(&mut buf) {
951 Ok(0) => break,
952 Ok(n) => n,
953 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
954 Err(e) => return Err(e),
955 };
956 let mut wp = 0;
957 unsafe {
958 let ptr = buf.as_mut_ptr();
959 for i in 0..n {
960 let b = *ptr.add(i);
961 if is_member(&member, b) {
962 if last_squeezed == b as u16 {
963 continue;
964 }
965 last_squeezed = b as u16;
966 } else {
967 last_squeezed = 256;
968 }
969 *ptr.add(wp) = b;
970 wp += 1;
971 }
972 }
973 writer.write_all(&buf[..wp])?;
974 }
975 Ok(())
976}
977
978fn squeeze_single_stream(
982 ch: u8,
983 reader: &mut impl Read,
984 writer: &mut impl Write,
985) -> io::Result<()> {
986 let mut buf = vec![0u8; STREAM_BUF];
987 let mut was_squeeze_char = false;
988
989 loop {
990 let n = match reader.read(&mut buf) {
991 Ok(0) => break,
992 Ok(n) => n,
993 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
994 Err(e) => return Err(e),
995 };
996
997 let mut wp = 0;
1000 let mut i = 0;
1001
1002 while i < n {
1003 if was_squeeze_char && buf[i] == ch {
1005 i += 1;
1006 while i < n && buf[i] == ch {
1007 i += 1;
1008 }
1009 if i >= n {
1011 break;
1012 }
1013 }
1014
1015 match memchr::memchr(ch, &buf[i..n]) {
1017 Some(offset) => {
1018 let run_len = offset;
1019 if run_len > 0 {
1021 if wp != i {
1022 unsafe {
1023 std::ptr::copy(
1024 buf.as_ptr().add(i),
1025 buf.as_mut_ptr().add(wp),
1026 run_len,
1027 );
1028 }
1029 }
1030 wp += run_len;
1031 }
1032 i += run_len;
1033
1034 unsafe {
1036 *buf.as_mut_ptr().add(wp) = ch;
1037 }
1038 wp += 1;
1039 was_squeeze_char = true;
1040 i += 1;
1041 while i < n && buf[i] == ch {
1042 i += 1;
1043 }
1044 }
1045 None => {
1046 let run_len = n - i;
1048 if run_len > 0 {
1049 if wp != i {
1050 unsafe {
1051 std::ptr::copy(
1052 buf.as_ptr().add(i),
1053 buf.as_mut_ptr().add(wp),
1054 run_len,
1055 );
1056 }
1057 }
1058 wp += run_len;
1059 }
1060 was_squeeze_char = false;
1061 break;
1062 }
1063 }
1064 }
1065
1066 writer.write_all(&buf[..wp])?;
1067 }
1068 Ok(())
1069}
1070
1071pub fn translate_mmap(
1081 set1: &[u8],
1082 set2: &[u8],
1083 data: &[u8],
1084 writer: &mut impl Write,
1085) -> io::Result<()> {
1086 let table = build_translate_table(set1, set2);
1087 let kind = analyze_table(&table);
1088 #[cfg(target_arch = "x86_64")]
1089 let use_simd = has_avx2();
1090 #[cfg(not(target_arch = "x86_64"))]
1091 let use_simd = false;
1092
1093 if matches!(kind, TranslateKind::Identity) {
1094 return writer.write_all(data);
1095 }
1096
1097 let buf_size = data.len().min(BUF_SIZE);
1100 let mut out = vec![0u8; buf_size];
1101 for chunk in data.chunks(buf_size) {
1102 translate_chunk_dispatch(chunk, &mut out[..chunk.len()], &table, &kind, use_simd);
1103 writer.write_all(&out[..chunk.len()])?;
1104 }
1105 Ok(())
1106}
1107
1108pub fn translate_squeeze_mmap(
1112 set1: &[u8],
1113 set2: &[u8],
1114 data: &[u8],
1115 writer: &mut impl Write,
1116) -> io::Result<()> {
1117 let table = build_translate_table(set1, set2);
1118 let squeeze_set = build_member_set(set2);
1119 let kind = analyze_table(&table);
1120 #[cfg(target_arch = "x86_64")]
1121 let use_simd = has_avx2();
1122 #[cfg(not(target_arch = "x86_64"))]
1123 let use_simd = false;
1124
1125 let buf_size = data.len().min(BUF_SIZE);
1127 let mut buf = vec![0u8; buf_size];
1128 let mut last_squeezed: u16 = 256;
1129
1130 for chunk in data.chunks(buf_size) {
1131 translate_chunk_dispatch(chunk, &mut buf[..chunk.len()], &table, &kind, use_simd);
1133
1134 let mut wp = 0;
1136 unsafe {
1137 let ptr = buf.as_mut_ptr();
1138 for i in 0..chunk.len() {
1139 let b = *ptr.add(i);
1140 if is_member(&squeeze_set, b) {
1141 if last_squeezed == b as u16 {
1142 continue;
1143 }
1144 last_squeezed = b as u16;
1145 } else {
1146 last_squeezed = 256;
1147 }
1148 *ptr.add(wp) = b;
1149 wp += 1;
1150 }
1151 }
1152 writer.write_all(&buf[..wp])?;
1153 }
1154 Ok(())
1155}
1156
1157pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1161 if delete_chars.len() == 1 {
1163 return delete_single_char_mmap(delete_chars[0], data, writer);
1164 }
1165
1166 if delete_chars.len() == 2 {
1168 return delete_multi_memchr_mmap::<2>(delete_chars, data, writer);
1169 }
1170
1171 if delete_chars.len() == 3 {
1173 return delete_multi_memchr_mmap::<3>(delete_chars, data, writer);
1174 }
1175
1176 let member = build_member_set(delete_chars);
1177 let buf_size = data.len().min(BUF_SIZE);
1178 let mut outbuf = vec![0u8; buf_size];
1179
1180 for chunk in data.chunks(buf_size) {
1181 let mut out_pos = 0;
1182 let len = chunk.len();
1183 let mut i = 0;
1184
1185 while i + 8 <= len {
1188 unsafe {
1189 let b0 = *chunk.get_unchecked(i);
1190 let b1 = *chunk.get_unchecked(i + 1);
1191 let b2 = *chunk.get_unchecked(i + 2);
1192 let b3 = *chunk.get_unchecked(i + 3);
1193 let b4 = *chunk.get_unchecked(i + 4);
1194 let b5 = *chunk.get_unchecked(i + 5);
1195 let b6 = *chunk.get_unchecked(i + 6);
1196 let b7 = *chunk.get_unchecked(i + 7);
1197
1198 *outbuf.get_unchecked_mut(out_pos) = b0;
1199 out_pos += !is_member(&member, b0) as usize;
1200 *outbuf.get_unchecked_mut(out_pos) = b1;
1201 out_pos += !is_member(&member, b1) as usize;
1202 *outbuf.get_unchecked_mut(out_pos) = b2;
1203 out_pos += !is_member(&member, b2) as usize;
1204 *outbuf.get_unchecked_mut(out_pos) = b3;
1205 out_pos += !is_member(&member, b3) as usize;
1206 *outbuf.get_unchecked_mut(out_pos) = b4;
1207 out_pos += !is_member(&member, b4) as usize;
1208 *outbuf.get_unchecked_mut(out_pos) = b5;
1209 out_pos += !is_member(&member, b5) as usize;
1210 *outbuf.get_unchecked_mut(out_pos) = b6;
1211 out_pos += !is_member(&member, b6) as usize;
1212 *outbuf.get_unchecked_mut(out_pos) = b7;
1213 out_pos += !is_member(&member, b7) as usize;
1214 }
1215 i += 8;
1216 }
1217
1218 while i < len {
1219 unsafe {
1220 let b = *chunk.get_unchecked(i);
1221 *outbuf.get_unchecked_mut(out_pos) = b;
1222 out_pos += !is_member(&member, b) as usize;
1223 }
1224 i += 1;
1225 }
1226
1227 writer.write_all(&outbuf[..out_pos])?;
1228 }
1229 Ok(())
1230}
1231
1232fn delete_multi_memchr_mmap<const N: usize>(
1236 chars: &[u8],
1237 data: &[u8],
1238 writer: &mut impl Write,
1239) -> io::Result<()> {
1240 let buf_size = data.len().min(BUF_SIZE);
1241 let mut outbuf = vec![0u8; buf_size];
1242
1243 for chunk in data.chunks(buf_size) {
1244 let mut wp = 0;
1245 let mut last = 0;
1246
1247 macro_rules! process_iter {
1248 ($iter:expr) => {
1249 for pos in $iter {
1250 if pos > last {
1251 let run = pos - last;
1252 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1253 wp += run;
1254 }
1255 last = pos + 1;
1256 }
1257 };
1258 }
1259
1260 if N == 2 {
1261 process_iter!(memchr::memchr2_iter(chars[0], chars[1], chunk));
1262 } else {
1263 process_iter!(memchr::memchr3_iter(chars[0], chars[1], chars[2], chunk));
1264 }
1265
1266 if last < chunk.len() {
1267 let run = chunk.len() - last;
1268 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1269 wp += run;
1270 }
1271 writer.write_all(&outbuf[..wp])?;
1272 }
1273 Ok(())
1274}
1275
1276fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1280 let buf_size = data.len().min(BUF_SIZE);
1281 let mut outbuf = vec![0u8; buf_size];
1282
1283 for chunk in data.chunks(buf_size) {
1284 let mut wp = 0;
1285 let mut last = 0;
1286 for pos in memchr::memchr_iter(ch, chunk) {
1287 if pos > last {
1288 let run = pos - last;
1289 outbuf[wp..wp + run].copy_from_slice(&chunk[last..pos]);
1290 wp += run;
1291 }
1292 last = pos + 1;
1293 }
1294 if last < chunk.len() {
1295 let run = chunk.len() - last;
1296 outbuf[wp..wp + run].copy_from_slice(&chunk[last..]);
1297 wp += run;
1298 }
1299 writer.write_all(&outbuf[..wp])?;
1300 }
1301 Ok(())
1302}
1303
1304pub fn delete_squeeze_mmap(
1306 delete_chars: &[u8],
1307 squeeze_chars: &[u8],
1308 data: &[u8],
1309 writer: &mut impl Write,
1310) -> io::Result<()> {
1311 let delete_set = build_member_set(delete_chars);
1312 let squeeze_set = build_member_set(squeeze_chars);
1313 let buf_size = data.len().min(BUF_SIZE);
1314 let mut outbuf = vec![0u8; buf_size];
1315 let mut last_squeezed: u16 = 256;
1316
1317 for chunk in data.chunks(buf_size) {
1318 let mut out_pos = 0;
1319 for &b in chunk {
1320 if is_member(&delete_set, b) {
1321 continue;
1322 }
1323 if is_member(&squeeze_set, b) {
1324 if last_squeezed == b as u16 {
1325 continue;
1326 }
1327 last_squeezed = b as u16;
1328 } else {
1329 last_squeezed = 256;
1330 }
1331 unsafe {
1332 *outbuf.get_unchecked_mut(out_pos) = b;
1333 }
1334 out_pos += 1;
1335 }
1336 writer.write_all(&outbuf[..out_pos])?;
1337 }
1338 Ok(())
1339}
1340
1341pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1345 if squeeze_chars.len() == 1 {
1347 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1348 }
1349
1350 if squeeze_chars.len() == 2 {
1352 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1353 }
1354 if squeeze_chars.len() == 3 {
1355 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1356 }
1357
1358 let member = build_member_set(squeeze_chars);
1360 let buf_size = data.len().min(BUF_SIZE);
1361 let mut outbuf = vec![0u8; buf_size];
1362 let mut last_squeezed: u16 = 256;
1363
1364 for chunk in data.chunks(buf_size) {
1365 let len = chunk.len();
1366 let mut wp = 0;
1367 let mut i = 0;
1368
1369 unsafe {
1370 let inp = chunk.as_ptr();
1371 let outp = outbuf.as_mut_ptr();
1372
1373 while i < len {
1374 let b = *inp.add(i);
1375 if is_member(&member, b) {
1376 if last_squeezed != b as u16 {
1377 *outp.add(wp) = b;
1378 wp += 1;
1379 last_squeezed = b as u16;
1380 }
1381 i += 1;
1382 while i < len && *inp.add(i) == b {
1384 i += 1;
1385 }
1386 } else {
1387 last_squeezed = 256;
1388 *outp.add(wp) = b;
1389 wp += 1;
1390 i += 1;
1391 }
1392 }
1393 }
1394 writer.write_all(&outbuf[..wp])?;
1395 }
1396 Ok(())
1397}
1398
1399fn squeeze_multi_mmap<const N: usize>(
1402 chars: &[u8],
1403 data: &[u8],
1404 writer: &mut impl Write,
1405) -> io::Result<()> {
1406 let buf_size = data.len().min(BUF_SIZE);
1407 let mut outbuf = vec![0u8; buf_size];
1408 let mut wp = 0;
1409 let mut last_squeezed: u16 = 256;
1410 let mut cursor = 0;
1411
1412 macro_rules! find_next {
1413 ($data:expr) => {
1414 if N == 2 {
1415 memchr::memchr2(chars[0], chars[1], $data)
1416 } else {
1417 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1418 }
1419 };
1420 }
1421
1422 macro_rules! flush_and_copy {
1423 ($src:expr, $len:expr) => {
1424 if wp + $len > buf_size {
1425 writer.write_all(&outbuf[..wp])?;
1426 wp = 0;
1427 }
1428 if $len > buf_size {
1429 writer.write_all($src)?;
1430 } else {
1431 outbuf[wp..wp + $len].copy_from_slice($src);
1432 wp += $len;
1433 }
1434 };
1435 }
1436
1437 while cursor < data.len() {
1438 match find_next!(&data[cursor..]) {
1439 Some(offset) => {
1440 let pos = cursor + offset;
1441 let b = data[pos];
1442 if pos > cursor {
1444 let span = pos - cursor;
1445 flush_and_copy!(&data[cursor..pos], span);
1446 last_squeezed = 256;
1447 }
1448 if last_squeezed != b as u16 {
1449 if wp >= buf_size {
1450 writer.write_all(&outbuf[..wp])?;
1451 wp = 0;
1452 }
1453 outbuf[wp] = b;
1454 wp += 1;
1455 last_squeezed = b as u16;
1456 }
1457 let mut skip = pos + 1;
1459 while skip < data.len() && data[skip] == b {
1460 skip += 1;
1461 }
1462 cursor = skip;
1463 }
1464 None => {
1465 let remaining = data.len() - cursor;
1466 flush_and_copy!(&data[cursor..], remaining);
1467 break;
1468 }
1469 }
1470 }
1471 if wp > 0 {
1472 writer.write_all(&outbuf[..wp])?;
1473 }
1474 Ok(())
1475}
1476
1477fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1481 if data.is_empty() {
1482 return Ok(());
1483 }
1484
1485 if memchr::memmem::find(data, &[ch, ch]).is_none() {
1487 return writer.write_all(data);
1488 }
1489
1490 let buf_size = data.len().min(BUF_SIZE);
1491 let mut outbuf = vec![0u8; buf_size];
1492 let len = data.len();
1493 let mut wp = 0;
1494 let mut cursor = 0;
1495
1496 while cursor < len {
1497 match memchr::memchr(ch, &data[cursor..]) {
1499 Some(offset) => {
1500 let pos = cursor + offset;
1501
1502 let gap = pos - cursor;
1504 if gap > 0 {
1505 if wp + gap > buf_size {
1506 writer.write_all(&outbuf[..wp])?;
1507 wp = 0;
1508 }
1509 if gap > buf_size {
1510 writer.write_all(&data[cursor..pos])?;
1512 } else {
1513 outbuf[wp..wp + gap].copy_from_slice(&data[cursor..pos]);
1514 wp += gap;
1515 }
1516 }
1517
1518 if wp >= buf_size {
1520 writer.write_all(&outbuf[..wp])?;
1521 wp = 0;
1522 }
1523 outbuf[wp] = ch;
1524 wp += 1;
1525
1526 cursor = pos + 1;
1528 while cursor < len && data[cursor] == ch {
1529 cursor += 1;
1530 }
1531 }
1532 None => {
1533 let remaining = len - cursor;
1535 if remaining > 0 {
1536 if wp + remaining > buf_size {
1537 writer.write_all(&outbuf[..wp])?;
1538 wp = 0;
1539 }
1540 if remaining > buf_size {
1541 writer.write_all(&data[cursor..])?;
1542 } else {
1543 outbuf[wp..wp + remaining].copy_from_slice(&data[cursor..]);
1544 wp += remaining;
1545 }
1546 }
1547 break;
1548 }
1549 }
1550 }
1551
1552 if wp > 0 {
1553 writer.write_all(&outbuf[..wp])?;
1554 }
1555 Ok(())
1556}