1use std::io::{self, Read, Write};
2
3use rayon::prelude::*;
4
5const BUF_SIZE: usize = 1024 * 1024; const STREAM_BUF: usize = 256 * 1024;
12
13const PARALLEL_TRANSLATE_THRESHOLD: usize = 4 * 1024 * 1024;
17
18#[inline]
20fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
21 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
22 let last = set2.last().copied();
23 for (i, &from) in set1.iter().enumerate() {
24 table[from as usize] = if i < set2.len() {
25 set2[i]
26 } else {
27 last.unwrap_or(from)
28 };
29 }
30 table
31}
32
33#[inline]
35fn build_member_set(chars: &[u8]) -> [u8; 32] {
36 let mut set = [0u8; 32];
37 for &ch in chars {
38 set[ch as usize >> 3] |= 1 << (ch & 7);
39 }
40 set
41}
42
43#[inline(always)]
44fn is_member(set: &[u8; 32], ch: u8) -> bool {
45 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
46}
47
48#[allow(dead_code)] enum TranslateKind {
55 Identity,
57 RangeDelta { lo: u8, hi: u8, delta: u8 },
59 General,
61}
62
63fn analyze_table(table: &[u8; 256]) -> TranslateKind {
65 let mut first_delta: Option<u8> = None;
66 let mut lo: u8 = 0;
67 let mut hi: u8 = 0;
68 let mut count: u32 = 0;
69
70 for i in 0..256u16 {
71 let actual = table[i as usize];
72 if actual != i as u8 {
73 let delta = actual.wrapping_sub(i as u8);
74 count += 1;
75 match first_delta {
76 None => {
77 first_delta = Some(delta);
78 lo = i as u8;
79 hi = i as u8;
80 }
81 Some(d) if d == delta => {
82 hi = i as u8;
83 }
84 _ => return TranslateKind::General,
85 }
86 }
87 }
88
89 match (count, first_delta) {
90 (0, _) => TranslateKind::Identity,
91 (c, Some(delta)) if c == (hi as u32 - lo as u32 + 1) => {
92 TranslateKind::RangeDelta { lo, hi, delta }
93 }
94 _ => TranslateKind::General,
95 }
96}
97
98#[cfg(target_arch = "x86_64")]
103mod simd_tr {
104 #[target_feature(enable = "avx2")]
109 pub unsafe fn range_delta(data: &[u8], out: &mut [u8], lo: u8, hi: u8, delta: u8) {
110 unsafe {
111 use std::arch::x86_64::*;
112
113 let lo_vec = _mm256_set1_epi8(lo as i8);
114 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
115 let delta_vec = _mm256_set1_epi8(delta as i8);
116
117 let len = data.len();
118 let inp = data.as_ptr();
119 let outp = out.as_mut_ptr();
120 let mut i = 0usize;
121
122 while i + 128 <= len {
124 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
125 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
126 let v2 = _mm256_loadu_si256(inp.add(i + 64) as *const __m256i);
127 let v3 = _mm256_loadu_si256(inp.add(i + 96) as *const __m256i);
128
129 let d0 = _mm256_sub_epi8(v0, lo_vec);
130 let d1 = _mm256_sub_epi8(v1, lo_vec);
131 let d2 = _mm256_sub_epi8(v2, lo_vec);
132 let d3 = _mm256_sub_epi8(v3, lo_vec);
133
134 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
135 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
136 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
137 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
138
139 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
140 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
141 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
142 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
143
144 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
145 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
146 _mm256_storeu_si256(outp.add(i + 64) as *mut __m256i, r2);
147 _mm256_storeu_si256(outp.add(i + 96) as *mut __m256i, r3);
148 i += 128;
149 }
150
151 while i + 32 <= len {
152 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
153 let diff = _mm256_sub_epi8(v, lo_vec);
154 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
155 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
156 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
157 i += 32;
158 }
159
160 while i < len {
161 let b = *inp.add(i);
162 *outp.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
163 b.wrapping_add(delta)
164 } else {
165 b
166 };
167 i += 1;
168 }
169 }
170 }
171
172 #[target_feature(enable = "avx2")]
179 pub unsafe fn general_lookup(data: &[u8], out: &mut [u8], table: &[u8; 256]) {
180 unsafe {
181 use std::arch::x86_64::*;
182
183 let tp = table.as_ptr();
186 let mut luts = [_mm256_setzero_si256(); 16];
187 let mut h = 0;
188 while h < 16 {
189 luts[h] =
190 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
191 h += 1;
192 }
193
194 let lo_mask = _mm256_set1_epi8(0x0F);
195 let len = data.len();
196 let inp = data.as_ptr();
197 let outp = out.as_mut_ptr();
198 let mut i = 0;
199
200 while i + 64 <= len {
202 let v0 = _mm256_loadu_si256(inp.add(i) as *const __m256i);
203 let v1 = _mm256_loadu_si256(inp.add(i + 32) as *const __m256i);
204
205 let lo0 = _mm256_and_si256(v0, lo_mask);
206 let lo1 = _mm256_and_si256(v1, lo_mask);
207 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
208 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
209
210 let mut r0 = _mm256_setzero_si256();
211 let mut r1 = _mm256_setzero_si256();
212
213 macro_rules! do_nib {
216 ($h:literal) => {
217 let hv = _mm256_set1_epi8($h);
218 let lut = luts[$h as usize];
219 let m0 = _mm256_cmpeq_epi8(hi0, hv);
220 let m1 = _mm256_cmpeq_epi8(hi1, hv);
221 r0 = _mm256_or_si256(
222 r0,
223 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
224 );
225 r1 = _mm256_or_si256(
226 r1,
227 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
228 );
229 };
230 }
231
232 do_nib!(0);
233 do_nib!(1);
234 do_nib!(2);
235 do_nib!(3);
236 do_nib!(4);
237 do_nib!(5);
238 do_nib!(6);
239 do_nib!(7);
240 do_nib!(8);
241 do_nib!(9);
242 do_nib!(10);
243 do_nib!(11);
244 do_nib!(12);
245 do_nib!(13);
246 do_nib!(14);
247 do_nib!(15);
248
249 _mm256_storeu_si256(outp.add(i) as *mut __m256i, r0);
250 _mm256_storeu_si256(outp.add(i + 32) as *mut __m256i, r1);
251 i += 64;
252 }
253
254 while i + 32 <= len {
256 let v = _mm256_loadu_si256(inp.add(i) as *const __m256i);
257 let lo = _mm256_and_si256(v, lo_mask);
258 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
259
260 let mut result = _mm256_setzero_si256();
261 let mut hh = 0u8;
262 while hh < 16 {
263 let hv = _mm256_set1_epi8(hh as i8);
264 let m = _mm256_cmpeq_epi8(hi, hv);
265 result = _mm256_or_si256(
266 result,
267 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
268 );
269 hh += 1;
270 }
271
272 _mm256_storeu_si256(outp.add(i) as *mut __m256i, result);
273 i += 32;
274 }
275
276 while i < len {
278 *outp.add(i) = *table.get_unchecked(*inp.add(i) as usize);
279 i += 1;
280 }
281 }
282 }
283
284 #[target_feature(enable = "avx2")]
288 pub unsafe fn general_lookup_inplace(data: &mut [u8], table: &[u8; 256]) {
289 unsafe {
290 use std::arch::x86_64::*;
291
292 let tp = table.as_ptr();
293 let mut luts = [_mm256_setzero_si256(); 16];
294 let mut h = 0;
295 while h < 16 {
296 luts[h] =
297 _mm256_broadcastsi128_si256(_mm_loadu_si128(tp.add(h * 16) as *const __m128i));
298 h += 1;
299 }
300
301 let lo_mask = _mm256_set1_epi8(0x0F);
302 let len = data.len();
303 let ptr = data.as_mut_ptr();
304 let mut i = 0;
305
306 while i + 64 <= len {
307 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
308 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
309
310 let lo0 = _mm256_and_si256(v0, lo_mask);
311 let lo1 = _mm256_and_si256(v1, lo_mask);
312 let hi0 = _mm256_and_si256(_mm256_srli_epi16(v0, 4), lo_mask);
313 let hi1 = _mm256_and_si256(_mm256_srli_epi16(v1, 4), lo_mask);
314
315 let mut r0 = _mm256_setzero_si256();
316 let mut r1 = _mm256_setzero_si256();
317
318 macro_rules! do_nib {
319 ($h:literal) => {
320 let hv = _mm256_set1_epi8($h);
321 let lut = luts[$h as usize];
322 let m0 = _mm256_cmpeq_epi8(hi0, hv);
323 let m1 = _mm256_cmpeq_epi8(hi1, hv);
324 r0 = _mm256_or_si256(
325 r0,
326 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo0), m0),
327 );
328 r1 = _mm256_or_si256(
329 r1,
330 _mm256_and_si256(_mm256_shuffle_epi8(lut, lo1), m1),
331 );
332 };
333 }
334
335 do_nib!(0);
336 do_nib!(1);
337 do_nib!(2);
338 do_nib!(3);
339 do_nib!(4);
340 do_nib!(5);
341 do_nib!(6);
342 do_nib!(7);
343 do_nib!(8);
344 do_nib!(9);
345 do_nib!(10);
346 do_nib!(11);
347 do_nib!(12);
348 do_nib!(13);
349 do_nib!(14);
350 do_nib!(15);
351
352 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
353 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
354 i += 64;
355 }
356
357 while i + 32 <= len {
358 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
359 let lo = _mm256_and_si256(v, lo_mask);
360 let hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), lo_mask);
361
362 let mut result = _mm256_setzero_si256();
363 let mut hh = 0u8;
364 while hh < 16 {
365 let hv = _mm256_set1_epi8(hh as i8);
366 let m = _mm256_cmpeq_epi8(hi, hv);
367 result = _mm256_or_si256(
368 result,
369 _mm256_and_si256(_mm256_shuffle_epi8(luts[hh as usize], lo), m),
370 );
371 hh += 1;
372 }
373
374 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
375 i += 32;
376 }
377
378 while i < len {
379 let b = *ptr.add(i);
380 *ptr.add(i) = *table.get_unchecked(b as usize);
381 i += 1;
382 }
383 }
384 }
385
386 #[target_feature(enable = "avx2")]
390 pub unsafe fn range_delta_inplace(data: &mut [u8], lo: u8, hi: u8, delta: u8) {
391 unsafe {
392 use std::arch::x86_64::*;
393
394 let lo_vec = _mm256_set1_epi8(lo as i8);
395 let range_vec = _mm256_set1_epi8((hi - lo) as i8);
396 let delta_vec = _mm256_set1_epi8(delta as i8);
397
398 let len = data.len();
399 let ptr = data.as_mut_ptr();
400 let mut i = 0usize;
401
402 while i + 128 <= len {
403 let v0 = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
404 let v1 = _mm256_loadu_si256(ptr.add(i + 32) as *const __m256i);
405 let v2 = _mm256_loadu_si256(ptr.add(i + 64) as *const __m256i);
406 let v3 = _mm256_loadu_si256(ptr.add(i + 96) as *const __m256i);
407
408 let d0 = _mm256_sub_epi8(v0, lo_vec);
409 let d1 = _mm256_sub_epi8(v1, lo_vec);
410 let d2 = _mm256_sub_epi8(v2, lo_vec);
411 let d3 = _mm256_sub_epi8(v3, lo_vec);
412
413 let m0 = _mm256_cmpeq_epi8(_mm256_min_epu8(d0, range_vec), d0);
414 let m1 = _mm256_cmpeq_epi8(_mm256_min_epu8(d1, range_vec), d1);
415 let m2 = _mm256_cmpeq_epi8(_mm256_min_epu8(d2, range_vec), d2);
416 let m3 = _mm256_cmpeq_epi8(_mm256_min_epu8(d3, range_vec), d3);
417
418 let r0 = _mm256_add_epi8(v0, _mm256_and_si256(m0, delta_vec));
419 let r1 = _mm256_add_epi8(v1, _mm256_and_si256(m1, delta_vec));
420 let r2 = _mm256_add_epi8(v2, _mm256_and_si256(m2, delta_vec));
421 let r3 = _mm256_add_epi8(v3, _mm256_and_si256(m3, delta_vec));
422
423 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, r0);
424 _mm256_storeu_si256(ptr.add(i + 32) as *mut __m256i, r1);
425 _mm256_storeu_si256(ptr.add(i + 64) as *mut __m256i, r2);
426 _mm256_storeu_si256(ptr.add(i + 96) as *mut __m256i, r3);
427 i += 128;
428 }
429
430 while i + 32 <= len {
431 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
432 let diff = _mm256_sub_epi8(v, lo_vec);
433 let mask = _mm256_cmpeq_epi8(_mm256_min_epu8(diff, range_vec), diff);
434 let result = _mm256_add_epi8(v, _mm256_and_si256(mask, delta_vec));
435 _mm256_storeu_si256(ptr.add(i) as *mut __m256i, result);
436 i += 32;
437 }
438
439 while i < len {
440 let b = *ptr.add(i);
441 *ptr.add(i) = if b.wrapping_sub(lo) <= (hi - lo) {
442 b.wrapping_add(delta)
443 } else {
444 b
445 };
446 i += 1;
447 }
448 }
449 }
450}
451
452#[cfg(target_arch = "x86_64")]
457#[inline(always)]
458fn has_avx2() -> bool {
459 is_x86_feature_detected!("avx2")
460}
461
462#[inline(always)]
464fn translate_chunk(chunk: &[u8], out: &mut [u8], table: &[u8; 256]) {
465 let len = chunk.len();
466 let mut i = 0;
467 while i + 8 <= len {
468 unsafe {
469 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
470 *out.get_unchecked_mut(i + 1) =
471 *table.get_unchecked(*chunk.get_unchecked(i + 1) as usize);
472 *out.get_unchecked_mut(i + 2) =
473 *table.get_unchecked(*chunk.get_unchecked(i + 2) as usize);
474 *out.get_unchecked_mut(i + 3) =
475 *table.get_unchecked(*chunk.get_unchecked(i + 3) as usize);
476 *out.get_unchecked_mut(i + 4) =
477 *table.get_unchecked(*chunk.get_unchecked(i + 4) as usize);
478 *out.get_unchecked_mut(i + 5) =
479 *table.get_unchecked(*chunk.get_unchecked(i + 5) as usize);
480 *out.get_unchecked_mut(i + 6) =
481 *table.get_unchecked(*chunk.get_unchecked(i + 6) as usize);
482 *out.get_unchecked_mut(i + 7) =
483 *table.get_unchecked(*chunk.get_unchecked(i + 7) as usize);
484 }
485 i += 8;
486 }
487 while i < len {
488 unsafe {
489 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
490 }
491 i += 1;
492 }
493}
494
495#[inline(always)]
497fn translate_inplace(data: &mut [u8], table: &[u8; 256]) {
498 let len = data.len();
499 let ptr = data.as_mut_ptr();
500 let tab = table.as_ptr();
501
502 unsafe {
503 let mut i = 0;
504 while i + 8 <= len {
505 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
506 *ptr.add(i + 1) = *tab.add(*ptr.add(i + 1) as usize);
507 *ptr.add(i + 2) = *tab.add(*ptr.add(i + 2) as usize);
508 *ptr.add(i + 3) = *tab.add(*ptr.add(i + 3) as usize);
509 *ptr.add(i + 4) = *tab.add(*ptr.add(i + 4) as usize);
510 *ptr.add(i + 5) = *tab.add(*ptr.add(i + 5) as usize);
511 *ptr.add(i + 6) = *tab.add(*ptr.add(i + 6) as usize);
512 *ptr.add(i + 7) = *tab.add(*ptr.add(i + 7) as usize);
513 i += 8;
514 }
515 while i < len {
516 *ptr.add(i) = *tab.add(*ptr.add(i) as usize);
517 i += 1;
518 }
519 }
520}
521
522#[inline]
529fn translate_chunk_dispatch(
530 chunk: &[u8],
531 out: &mut [u8],
532 table: &[u8; 256],
533 kind: &TranslateKind,
534 _use_simd: bool,
535) {
536 match kind {
537 TranslateKind::Identity => {
538 out[..chunk.len()].copy_from_slice(chunk);
539 }
540 #[cfg(target_arch = "x86_64")]
541 TranslateKind::RangeDelta { lo, hi, delta } => {
542 if _use_simd {
543 unsafe { simd_tr::range_delta(chunk, out, *lo, *hi, *delta) };
544 return;
545 }
546 translate_chunk(chunk, out, table);
547 }
548 #[cfg(not(target_arch = "x86_64"))]
549 TranslateKind::RangeDelta { .. } => {
550 translate_chunk(chunk, out, table);
551 }
552 #[cfg(target_arch = "x86_64")]
553 TranslateKind::General => {
554 if _use_simd {
555 unsafe { simd_tr::general_lookup(chunk, out, table) };
556 return;
557 }
558 translate_chunk(chunk, out, table);
559 }
560 #[cfg(not(target_arch = "x86_64"))]
561 TranslateKind::General => {
562 translate_chunk(chunk, out, table);
563 }
564 }
565}
566
567#[inline]
570fn translate_inplace_dispatch(
571 data: &mut [u8],
572 table: &[u8; 256],
573 kind: &TranslateKind,
574 _use_simd: bool,
575) {
576 match kind {
577 TranslateKind::Identity => {}
578 #[cfg(target_arch = "x86_64")]
579 TranslateKind::RangeDelta { lo, hi, delta } => {
580 if _use_simd {
581 unsafe { simd_tr::range_delta_inplace(data, *lo, *hi, *delta) };
582 return;
583 }
584 translate_inplace(data, table);
585 }
586 #[cfg(not(target_arch = "x86_64"))]
587 TranslateKind::RangeDelta { .. } => {
588 translate_inplace(data, table);
589 }
590 #[cfg(target_arch = "x86_64")]
591 TranslateKind::General => {
592 if _use_simd {
593 unsafe { simd_tr::general_lookup_inplace(data, table) };
594 return;
595 }
596 translate_inplace(data, table);
597 }
598 #[cfg(not(target_arch = "x86_64"))]
599 TranslateKind::General => {
600 translate_inplace(data, table);
601 }
602 }
603}
604
605pub fn translate(
612 set1: &[u8],
613 set2: &[u8],
614 reader: &mut impl Read,
615 writer: &mut impl Write,
616) -> io::Result<()> {
617 let table = build_translate_table(set1, set2);
618 let kind = analyze_table(&table);
619 #[cfg(target_arch = "x86_64")]
620 let use_simd = has_avx2();
621 #[cfg(not(target_arch = "x86_64"))]
622 let use_simd = false;
623
624 let mut buf = vec![0u8; STREAM_BUF];
625 loop {
626 let n = match reader.read(&mut buf) {
627 Ok(0) => break,
628 Ok(n) => n,
629 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
630 Err(e) => return Err(e),
631 };
632 translate_inplace_dispatch(&mut buf[..n], &table, &kind, use_simd);
633 writer.write_all(&buf[..n])?;
634 }
635 Ok(())
636}
637
638pub fn translate_squeeze(
639 set1: &[u8],
640 set2: &[u8],
641 reader: &mut impl Read,
642 writer: &mut impl Write,
643) -> io::Result<()> {
644 let table = build_translate_table(set1, set2);
645 let squeeze_set = build_member_set(set2);
646 let mut outbuf = vec![0u8; STREAM_BUF];
647 let mut inbuf = vec![0u8; STREAM_BUF];
648 let mut last_squeezed: u16 = 256;
649
650 loop {
651 let n = match reader.read(&mut inbuf) {
652 Ok(0) => break,
653 Ok(n) => n,
654 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
655 Err(e) => return Err(e),
656 };
657 let mut out_pos = 0;
658 for &b in &inbuf[..n] {
659 let translated = unsafe { *table.get_unchecked(b as usize) };
660 if is_member(&squeeze_set, translated) {
661 if last_squeezed == translated as u16 {
662 continue;
663 }
664 last_squeezed = translated as u16;
665 } else {
666 last_squeezed = 256;
667 }
668 unsafe {
669 *outbuf.get_unchecked_mut(out_pos) = translated;
670 }
671 out_pos += 1;
672 }
673 writer.write_all(&outbuf[..out_pos])?;
674 }
675 Ok(())
676}
677
678pub fn delete(
679 delete_chars: &[u8],
680 reader: &mut impl Read,
681 writer: &mut impl Write,
682) -> io::Result<()> {
683 if delete_chars.len() == 1 {
685 return delete_single_streaming(delete_chars[0], reader, writer);
686 }
687
688 if delete_chars.len() <= 3 {
690 return delete_multi_streaming(delete_chars, reader, writer);
691 }
692
693 let member = build_member_set(delete_chars);
694 let mut outbuf = vec![0u8; STREAM_BUF];
695 let mut inbuf = vec![0u8; STREAM_BUF];
696
697 loop {
698 let n = match reader.read(&mut inbuf) {
699 Ok(0) => break,
700 Ok(n) => n,
701 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
702 Err(e) => return Err(e),
703 };
704 let mut out_pos = 0;
705 for &b in &inbuf[..n] {
706 if !is_member(&member, b) {
707 unsafe {
708 *outbuf.get_unchecked_mut(out_pos) = b;
709 }
710 out_pos += 1;
711 }
712 }
713 writer.write_all(&outbuf[..out_pos])?;
714 }
715 Ok(())
716}
717
718fn delete_single_streaming(
720 ch: u8,
721 reader: &mut impl Read,
722 writer: &mut impl Write,
723) -> io::Result<()> {
724 let mut buf = vec![0u8; STREAM_BUF];
725 loop {
726 let n = match reader.read(&mut buf) {
727 Ok(0) => break,
728 Ok(n) => n,
729 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
730 Err(e) => return Err(e),
731 };
732 let chunk = &buf[..n];
733 let mut last = 0;
734 for pos in memchr::memchr_iter(ch, chunk) {
735 if pos > last {
736 writer.write_all(&chunk[last..pos])?;
737 }
738 last = pos + 1;
739 }
740 if last < n {
741 writer.write_all(&chunk[last..n])?;
742 }
743 }
744 Ok(())
745}
746
747fn delete_multi_streaming(
749 chars: &[u8],
750 reader: &mut impl Read,
751 writer: &mut impl Write,
752) -> io::Result<()> {
753 let mut buf = vec![0u8; STREAM_BUF];
754 loop {
755 let n = match reader.read(&mut buf) {
756 Ok(0) => break,
757 Ok(n) => n,
758 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
759 Err(e) => return Err(e),
760 };
761 let chunk = &buf[..n];
762 let mut last = 0;
763 if chars.len() == 2 {
764 for pos in memchr::memchr2_iter(chars[0], chars[1], chunk) {
765 if pos > last {
766 writer.write_all(&chunk[last..pos])?;
767 }
768 last = pos + 1;
769 }
770 } else {
771 for pos in memchr::memchr3_iter(chars[0], chars[1], chars[2], chunk) {
772 if pos > last {
773 writer.write_all(&chunk[last..pos])?;
774 }
775 last = pos + 1;
776 }
777 }
778 if last < n {
779 writer.write_all(&chunk[last..n])?;
780 }
781 }
782 Ok(())
783}
784
785pub fn delete_squeeze(
786 delete_chars: &[u8],
787 squeeze_chars: &[u8],
788 reader: &mut impl Read,
789 writer: &mut impl Write,
790) -> io::Result<()> {
791 let delete_set = build_member_set(delete_chars);
792 let squeeze_set = build_member_set(squeeze_chars);
793 let mut outbuf = vec![0u8; STREAM_BUF];
794 let mut inbuf = vec![0u8; STREAM_BUF];
795 let mut last_squeezed: u16 = 256;
796
797 loop {
798 let n = match reader.read(&mut inbuf) {
799 Ok(0) => break,
800 Ok(n) => n,
801 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
802 Err(e) => return Err(e),
803 };
804 let mut out_pos = 0;
805 for &b in &inbuf[..n] {
806 if is_member(&delete_set, b) {
807 continue;
808 }
809 if is_member(&squeeze_set, b) {
810 if last_squeezed == b as u16 {
811 continue;
812 }
813 last_squeezed = b as u16;
814 } else {
815 last_squeezed = 256;
816 }
817 unsafe {
818 *outbuf.get_unchecked_mut(out_pos) = b;
819 }
820 out_pos += 1;
821 }
822 writer.write_all(&outbuf[..out_pos])?;
823 }
824 Ok(())
825}
826
827pub fn squeeze(
828 squeeze_chars: &[u8],
829 reader: &mut impl Read,
830 writer: &mut impl Write,
831) -> io::Result<()> {
832 if squeeze_chars.len() == 1 {
834 return squeeze_single_stream(squeeze_chars[0], reader, writer);
835 }
836
837 let member = build_member_set(squeeze_chars);
838 let mut outbuf = vec![0u8; STREAM_BUF];
839 let mut inbuf = vec![0u8; STREAM_BUF];
840 let mut last_squeezed: u16 = 256;
841
842 loop {
843 let n = match reader.read(&mut inbuf) {
844 Ok(0) => break,
845 Ok(n) => n,
846 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
847 Err(e) => return Err(e),
848 };
849 let mut out_pos = 0;
850 for &b in &inbuf[..n] {
851 if is_member(&member, b) {
852 if last_squeezed == b as u16 {
853 continue;
854 }
855 last_squeezed = b as u16;
856 } else {
857 last_squeezed = 256;
858 }
859 unsafe {
860 *outbuf.get_unchecked_mut(out_pos) = b;
861 }
862 out_pos += 1;
863 }
864 writer.write_all(&outbuf[..out_pos])?;
865 }
866 Ok(())
867}
868
869fn squeeze_single_stream(
871 ch: u8,
872 reader: &mut impl Read,
873 writer: &mut impl Write,
874) -> io::Result<()> {
875 let mut inbuf = vec![0u8; STREAM_BUF];
876 let mut outbuf = vec![0u8; STREAM_BUF];
877 let mut was_squeeze_char = false;
878
879 loop {
880 let n = match reader.read(&mut inbuf) {
881 Ok(0) => break,
882 Ok(n) => n,
883 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
884 Err(e) => return Err(e),
885 };
886 let chunk = &inbuf[..n];
887 let mut out_pos = 0;
888 let mut i = 0;
889
890 while i < n {
891 if chunk[i] == ch {
892 if !was_squeeze_char {
893 unsafe {
895 *outbuf.get_unchecked_mut(out_pos) = ch;
896 }
897 out_pos += 1;
898 was_squeeze_char = true;
899 }
900 i += 1;
902 } else {
903 was_squeeze_char = false;
904 let remaining = &chunk[i..];
906 let run_end = memchr::memchr(ch, remaining).unwrap_or(remaining.len());
907 outbuf[out_pos..out_pos + run_end].copy_from_slice(&chunk[i..i + run_end]);
909 out_pos += run_end;
910 i += run_end;
911 }
912 }
913
914 writer.write_all(&outbuf[..out_pos])?;
915 }
916 Ok(())
917}
918
919pub fn translate_mmap(
927 set1: &[u8],
928 set2: &[u8],
929 data: &[u8],
930 writer: &mut impl Write,
931) -> io::Result<()> {
932 let table = build_translate_table(set1, set2);
933 let kind = analyze_table(&table);
934 #[cfg(target_arch = "x86_64")]
935 let use_simd = has_avx2();
936 #[cfg(not(target_arch = "x86_64"))]
937 let use_simd = false;
938
939 if matches!(kind, TranslateKind::Identity) {
940 return writer.write_all(data);
941 }
942
943 if data.len() >= PARALLEL_TRANSLATE_THRESHOLD {
945 let mut out = vec![0u8; data.len()];
946 let num_threads = rayon::current_num_threads().max(1);
947 let chunk_size = (data.len() + num_threads - 1) / num_threads;
948 let chunk_size = ((chunk_size + BUF_SIZE - 1) / BUF_SIZE) * BUF_SIZE;
950
951 data.par_chunks(chunk_size)
953 .zip(out.par_chunks_mut(chunk_size))
954 .for_each(|(inp, outp)| {
955 translate_chunk_dispatch(inp, &mut outp[..inp.len()], &table, &kind, use_simd);
956 });
957
958 writer.write_all(&out)?;
960 return Ok(());
961 }
962
963 if data.len() <= BUF_SIZE {
965 let mut out = vec![0u8; data.len()];
966 translate_chunk_dispatch(data, &mut out, &table, &kind, use_simd);
967 writer.write_all(&out)?;
968 return Ok(());
969 }
970
971 let mut out = vec![0u8; BUF_SIZE];
973 for chunk in data.chunks(BUF_SIZE) {
974 translate_chunk_dispatch(chunk, &mut out[..chunk.len()], &table, &kind, use_simd);
975 writer.write_all(&out[..chunk.len()])?;
976 }
977 Ok(())
978}
979
980pub fn translate_squeeze_mmap(
984 set1: &[u8],
985 set2: &[u8],
986 data: &[u8],
987 writer: &mut impl Write,
988) -> io::Result<()> {
989 let table = build_translate_table(set1, set2);
990 let squeeze_set = build_member_set(set2);
991 let kind = analyze_table(&table);
992 #[cfg(target_arch = "x86_64")]
993 let use_simd = has_avx2();
994 #[cfg(not(target_arch = "x86_64"))]
995 let use_simd = false;
996
997 let mut trans_buf = vec![0u8; BUF_SIZE];
999 let mut outbuf = vec![0u8; BUF_SIZE];
1000 let mut last_squeezed: u16 = 256;
1001
1002 for chunk in data.chunks(BUF_SIZE) {
1003 translate_chunk_dispatch(
1005 chunk,
1006 &mut trans_buf[..chunk.len()],
1007 &table,
1008 &kind,
1009 use_simd,
1010 );
1011
1012 let mut out_pos = 0;
1014 for i in 0..chunk.len() {
1015 let translated = unsafe { *trans_buf.get_unchecked(i) };
1016 if is_member(&squeeze_set, translated) {
1017 if last_squeezed == translated as u16 {
1018 continue;
1019 }
1020 last_squeezed = translated as u16;
1021 } else {
1022 last_squeezed = 256;
1023 }
1024 unsafe {
1025 *outbuf.get_unchecked_mut(out_pos) = translated;
1026 }
1027 out_pos += 1;
1028 }
1029 writer.write_all(&outbuf[..out_pos])?;
1030 }
1031 Ok(())
1032}
1033
1034pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1038 if delete_chars.len() == 1 {
1040 return delete_single_char_mmap(delete_chars[0], data, writer);
1041 }
1042
1043 if delete_chars.len() == 2 {
1045 return delete_multi_memchr_mmap::<2>(delete_chars, data, writer);
1046 }
1047
1048 if delete_chars.len() == 3 {
1050 return delete_multi_memchr_mmap::<3>(delete_chars, data, writer);
1051 }
1052
1053 let member = build_member_set(delete_chars);
1054 let mut outbuf = vec![0u8; BUF_SIZE];
1055
1056 for chunk in data.chunks(BUF_SIZE) {
1057 let mut out_pos = 0;
1058 let len = chunk.len();
1059 let mut i = 0;
1060
1061 while i + 8 <= len {
1063 unsafe {
1064 let b0 = *chunk.get_unchecked(i);
1065 let b1 = *chunk.get_unchecked(i + 1);
1066 let b2 = *chunk.get_unchecked(i + 2);
1067 let b3 = *chunk.get_unchecked(i + 3);
1068 let b4 = *chunk.get_unchecked(i + 4);
1069 let b5 = *chunk.get_unchecked(i + 5);
1070 let b6 = *chunk.get_unchecked(i + 6);
1071 let b7 = *chunk.get_unchecked(i + 7);
1072
1073 if !is_member(&member, b0) {
1074 *outbuf.get_unchecked_mut(out_pos) = b0;
1075 out_pos += 1;
1076 }
1077 if !is_member(&member, b1) {
1078 *outbuf.get_unchecked_mut(out_pos) = b1;
1079 out_pos += 1;
1080 }
1081 if !is_member(&member, b2) {
1082 *outbuf.get_unchecked_mut(out_pos) = b2;
1083 out_pos += 1;
1084 }
1085 if !is_member(&member, b3) {
1086 *outbuf.get_unchecked_mut(out_pos) = b3;
1087 out_pos += 1;
1088 }
1089 if !is_member(&member, b4) {
1090 *outbuf.get_unchecked_mut(out_pos) = b4;
1091 out_pos += 1;
1092 }
1093 if !is_member(&member, b5) {
1094 *outbuf.get_unchecked_mut(out_pos) = b5;
1095 out_pos += 1;
1096 }
1097 if !is_member(&member, b6) {
1098 *outbuf.get_unchecked_mut(out_pos) = b6;
1099 out_pos += 1;
1100 }
1101 if !is_member(&member, b7) {
1102 *outbuf.get_unchecked_mut(out_pos) = b7;
1103 out_pos += 1;
1104 }
1105 }
1106 i += 8;
1107 }
1108
1109 while i < len {
1110 unsafe {
1111 let b = *chunk.get_unchecked(i);
1112 if !is_member(&member, b) {
1113 *outbuf.get_unchecked_mut(out_pos) = b;
1114 out_pos += 1;
1115 }
1116 }
1117 i += 1;
1118 }
1119
1120 writer.write_all(&outbuf[..out_pos])?;
1121 }
1122 Ok(())
1123}
1124
1125fn delete_multi_memchr_mmap<const N: usize>(
1128 chars: &[u8],
1129 data: &[u8],
1130 writer: &mut impl Write,
1131) -> io::Result<()> {
1132 let mut last = 0;
1133 if N == 2 {
1134 for pos in memchr::memchr2_iter(chars[0], chars[1], data) {
1135 if pos > last {
1136 writer.write_all(&data[last..pos])?;
1137 }
1138 last = pos + 1;
1139 }
1140 } else {
1141 for pos in memchr::memchr3_iter(chars[0], chars[1], chars[2], data) {
1142 if pos > last {
1143 writer.write_all(&data[last..pos])?;
1144 }
1145 last = pos + 1;
1146 }
1147 }
1148 if last < data.len() {
1149 writer.write_all(&data[last..])?;
1150 }
1151 Ok(())
1152}
1153
1154fn delete_single_char_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1157 let mut last = 0;
1158 for pos in memchr::memchr_iter(ch, data) {
1159 if pos > last {
1160 writer.write_all(&data[last..pos])?;
1161 }
1162 last = pos + 1;
1163 }
1164 if last < data.len() {
1165 writer.write_all(&data[last..])?;
1166 }
1167 Ok(())
1168}
1169
1170pub fn delete_squeeze_mmap(
1172 delete_chars: &[u8],
1173 squeeze_chars: &[u8],
1174 data: &[u8],
1175 writer: &mut impl Write,
1176) -> io::Result<()> {
1177 let delete_set = build_member_set(delete_chars);
1178 let squeeze_set = build_member_set(squeeze_chars);
1179 let mut outbuf = vec![0u8; BUF_SIZE];
1180 let mut last_squeezed: u16 = 256;
1181
1182 for chunk in data.chunks(BUF_SIZE) {
1183 let mut out_pos = 0;
1184 for &b in chunk {
1185 if is_member(&delete_set, b) {
1186 continue;
1187 }
1188 if is_member(&squeeze_set, b) {
1189 if last_squeezed == b as u16 {
1190 continue;
1191 }
1192 last_squeezed = b as u16;
1193 } else {
1194 last_squeezed = 256;
1195 }
1196 unsafe {
1197 *outbuf.get_unchecked_mut(out_pos) = b;
1198 }
1199 out_pos += 1;
1200 }
1201 writer.write_all(&outbuf[..out_pos])?;
1202 }
1203 Ok(())
1204}
1205
1206pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1210 if squeeze_chars.len() == 1 {
1212 return squeeze_single_mmap(squeeze_chars[0], data, writer);
1213 }
1214
1215 if squeeze_chars.len() == 2 {
1217 return squeeze_multi_mmap::<2>(squeeze_chars, data, writer);
1218 }
1219 if squeeze_chars.len() == 3 {
1220 return squeeze_multi_mmap::<3>(squeeze_chars, data, writer);
1221 }
1222
1223 let member = build_member_set(squeeze_chars);
1225 let mut last_squeezed: u16 = 256;
1226 let mut span_start = 0; let len = data.len();
1228 let mut i = 0;
1229
1230 while i < len {
1231 let b = unsafe { *data.get_unchecked(i) };
1232 if is_member(&member, b) {
1233 if i > span_start {
1235 writer.write_all(&data[span_start..i])?;
1236 }
1237 if last_squeezed != b as u16 {
1238 writer.write_all(&[b])?;
1240 last_squeezed = b as u16;
1241 }
1242 i += 1;
1243 while i < len && data[i] == b {
1245 i += 1;
1246 }
1247 span_start = i;
1248 } else {
1249 last_squeezed = 256;
1250 i += 1;
1251 }
1252 }
1253
1254 if span_start < len {
1256 writer.write_all(&data[span_start..])?;
1257 }
1258 Ok(())
1259}
1260
1261fn squeeze_multi_mmap<const N: usize>(
1264 chars: &[u8],
1265 data: &[u8],
1266 writer: &mut impl Write,
1267) -> io::Result<()> {
1268 let mut last_squeezed: u16 = 256;
1269 let mut cursor = 0;
1270
1271 macro_rules! find_next {
1272 ($data:expr) => {
1273 if N == 2 {
1274 memchr::memchr2(chars[0], chars[1], $data)
1275 } else {
1276 memchr::memchr3(chars[0], chars[1], chars[2], $data)
1277 }
1278 };
1279 }
1280
1281 while cursor < data.len() {
1282 match find_next!(&data[cursor..]) {
1283 Some(offset) => {
1284 let pos = cursor + offset;
1285 let b = data[pos];
1286 if pos > cursor {
1288 writer.write_all(&data[cursor..pos])?;
1289 last_squeezed = 256;
1290 }
1291 if last_squeezed != b as u16 {
1292 writer.write_all(&[b])?;
1293 last_squeezed = b as u16;
1294 }
1295 let mut skip = pos + 1;
1297 while skip < data.len() && data[skip] == b {
1298 skip += 1;
1299 }
1300 cursor = skip;
1301 }
1302 None => {
1303 writer.write_all(&data[cursor..])?;
1304 break;
1305 }
1306 }
1307 }
1308 Ok(())
1309}
1310
1311fn squeeze_single_mmap(ch: u8, data: &[u8], writer: &mut impl Write) -> io::Result<()> {
1315 let mut cursor = 0;
1316
1317 while cursor < data.len() {
1318 match memchr::memchr(ch, &data[cursor..]) {
1320 Some(offset) => {
1321 let pos = cursor + offset;
1322 writer.write_all(&data[cursor..pos + 1])?;
1324 let mut skip = pos + 1;
1326 while skip < data.len() && data[skip] == ch {
1327 skip += 1;
1328 }
1329 cursor = skip;
1330 }
1331 None => {
1332 writer.write_all(&data[cursor..])?;
1334 break;
1335 }
1336 }
1337 }
1338 Ok(())
1339}