1use std::io::{self, Read, Write};
2
3const BUF_SIZE: usize = 4 * 1024 * 1024; #[inline]
7fn build_translate_table(set1: &[u8], set2: &[u8]) -> [u8; 256] {
8 let mut table: [u8; 256] = std::array::from_fn(|i| i as u8);
9 let last = set2.last().copied();
10 for (i, &from) in set1.iter().enumerate() {
11 table[from as usize] = if i < set2.len() {
12 set2[i]
13 } else {
14 last.unwrap_or(from)
15 };
16 }
17 table
18}
19
20#[inline]
22fn build_member_set(chars: &[u8]) -> [u8; 32] {
23 let mut set = [0u8; 32];
24 for &ch in chars {
25 set[ch as usize >> 3] |= 1 << (ch & 7);
26 }
27 set
28}
29
30#[inline(always)]
31fn is_member(set: &[u8; 32], ch: u8) -> bool {
32 unsafe { (*set.get_unchecked(ch as usize >> 3) & (1 << (ch & 7))) != 0 }
33}
34
35fn fill_buf(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
37 let mut filled = 0;
38 while filled < buf.len() {
39 match reader.read(&mut buf[filled..]) {
40 Ok(0) => break,
41 Ok(n) => filled += n,
42 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
43 Err(e) => return Err(e),
44 }
45 }
46 Ok(filled)
47}
48
49#[inline(always)]
51fn translate_chunk(chunk: &[u8], out: &mut [u8], table: &[u8; 256]) {
52 let len = chunk.len();
53 let mut i = 0;
54 while i + 8 <= len {
55 unsafe {
56 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
57 *out.get_unchecked_mut(i + 1) =
58 *table.get_unchecked(*chunk.get_unchecked(i + 1) as usize);
59 *out.get_unchecked_mut(i + 2) =
60 *table.get_unchecked(*chunk.get_unchecked(i + 2) as usize);
61 *out.get_unchecked_mut(i + 3) =
62 *table.get_unchecked(*chunk.get_unchecked(i + 3) as usize);
63 *out.get_unchecked_mut(i + 4) =
64 *table.get_unchecked(*chunk.get_unchecked(i + 4) as usize);
65 *out.get_unchecked_mut(i + 5) =
66 *table.get_unchecked(*chunk.get_unchecked(i + 5) as usize);
67 *out.get_unchecked_mut(i + 6) =
68 *table.get_unchecked(*chunk.get_unchecked(i + 6) as usize);
69 *out.get_unchecked_mut(i + 7) =
70 *table.get_unchecked(*chunk.get_unchecked(i + 7) as usize);
71 }
72 i += 8;
73 }
74 while i < len {
75 unsafe {
76 *out.get_unchecked_mut(i) = *table.get_unchecked(*chunk.get_unchecked(i) as usize);
77 }
78 i += 1;
79 }
80}
81
82pub fn translate(
87 set1: &[u8],
88 set2: &[u8],
89 reader: &mut impl Read,
90 writer: &mut impl Write,
91) -> io::Result<()> {
92 let table = build_translate_table(set1, set2);
93 let mut buf = vec![0u8; BUF_SIZE];
94 let mut out = vec![0u8; BUF_SIZE];
95 loop {
96 let n = fill_buf(reader, &mut buf)?;
97 if n == 0 {
98 break;
99 }
100 translate_chunk(&buf[..n], &mut out[..n], &table);
101 writer.write_all(&out[..n])?;
102 }
103 Ok(())
104}
105
106pub fn translate_squeeze(
107 set1: &[u8],
108 set2: &[u8],
109 reader: &mut impl Read,
110 writer: &mut impl Write,
111) -> io::Result<()> {
112 let table = build_translate_table(set1, set2);
113 let squeeze_set = build_member_set(set2);
114 let mut outbuf = vec![0u8; BUF_SIZE];
115 let mut inbuf = vec![0u8; BUF_SIZE];
116 let mut last_squeezed: u16 = 256;
117
118 loop {
119 let n = fill_buf(reader, &mut inbuf)?;
120 if n == 0 {
121 break;
122 }
123 let mut out_pos = 0;
124 for &b in &inbuf[..n] {
125 let translated = unsafe { *table.get_unchecked(b as usize) };
126 if is_member(&squeeze_set, translated) {
127 if last_squeezed == translated as u16 {
128 continue;
129 }
130 last_squeezed = translated as u16;
131 } else {
132 last_squeezed = 256;
133 }
134 unsafe {
135 *outbuf.get_unchecked_mut(out_pos) = translated;
136 }
137 out_pos += 1;
138 }
139 writer.write_all(&outbuf[..out_pos])?;
140 }
141 Ok(())
142}
143
144pub fn delete(
145 delete_chars: &[u8],
146 reader: &mut impl Read,
147 writer: &mut impl Write,
148) -> io::Result<()> {
149 let member = build_member_set(delete_chars);
150 let mut outbuf = vec![0u8; BUF_SIZE];
151 let mut inbuf = vec![0u8; BUF_SIZE];
152
153 loop {
154 let n = fill_buf(reader, &mut inbuf)?;
155 if n == 0 {
156 break;
157 }
158 let mut out_pos = 0;
159 for &b in &inbuf[..n] {
160 if !is_member(&member, b) {
161 unsafe {
162 *outbuf.get_unchecked_mut(out_pos) = b;
163 }
164 out_pos += 1;
165 }
166 }
167 writer.write_all(&outbuf[..out_pos])?;
168 }
169 Ok(())
170}
171
172pub fn delete_squeeze(
173 delete_chars: &[u8],
174 squeeze_chars: &[u8],
175 reader: &mut impl Read,
176 writer: &mut impl Write,
177) -> io::Result<()> {
178 let delete_set = build_member_set(delete_chars);
179 let squeeze_set = build_member_set(squeeze_chars);
180 let mut outbuf = vec![0u8; BUF_SIZE];
181 let mut inbuf = vec![0u8; BUF_SIZE];
182 let mut last_squeezed: u16 = 256;
183
184 loop {
185 let n = fill_buf(reader, &mut inbuf)?;
186 if n == 0 {
187 break;
188 }
189 let mut out_pos = 0;
190 for &b in &inbuf[..n] {
191 if is_member(&delete_set, b) {
192 continue;
193 }
194 if is_member(&squeeze_set, b) {
195 if last_squeezed == b as u16 {
196 continue;
197 }
198 last_squeezed = b as u16;
199 } else {
200 last_squeezed = 256;
201 }
202 unsafe {
203 *outbuf.get_unchecked_mut(out_pos) = b;
204 }
205 out_pos += 1;
206 }
207 writer.write_all(&outbuf[..out_pos])?;
208 }
209 Ok(())
210}
211
212pub fn squeeze(
213 squeeze_chars: &[u8],
214 reader: &mut impl Read,
215 writer: &mut impl Write,
216) -> io::Result<()> {
217 let member = build_member_set(squeeze_chars);
218 let mut outbuf = vec![0u8; BUF_SIZE];
219 let mut inbuf = vec![0u8; BUF_SIZE];
220 let mut last_squeezed: u16 = 256;
221
222 loop {
223 let n = fill_buf(reader, &mut inbuf)?;
224 if n == 0 {
225 break;
226 }
227 let mut out_pos = 0;
228 for &b in &inbuf[..n] {
229 if is_member(&member, b) {
230 if last_squeezed == b as u16 {
231 continue;
232 }
233 last_squeezed = b as u16;
234 } else {
235 last_squeezed = 256;
236 }
237 unsafe {
238 *outbuf.get_unchecked_mut(out_pos) = b;
239 }
240 out_pos += 1;
241 }
242 writer.write_all(&outbuf[..out_pos])?;
243 }
244 Ok(())
245}
246
247pub fn translate_mmap(
253 set1: &[u8],
254 set2: &[u8],
255 data: &[u8],
256 writer: &mut impl Write,
257) -> io::Result<()> {
258 let table = build_translate_table(set1, set2);
259 let mut out = vec![0u8; BUF_SIZE];
260 for chunk in data.chunks(BUF_SIZE) {
261 translate_chunk(chunk, &mut out[..chunk.len()], &table);
262 writer.write_all(&out[..chunk.len()])?;
263 }
264 Ok(())
265}
266
267pub fn translate_squeeze_mmap(
269 set1: &[u8],
270 set2: &[u8],
271 data: &[u8],
272 writer: &mut impl Write,
273) -> io::Result<()> {
274 let table = build_translate_table(set1, set2);
275 let squeeze_set = build_member_set(set2);
276 let mut outbuf = vec![0u8; BUF_SIZE];
277 let mut last_squeezed: u16 = 256;
278
279 for chunk in data.chunks(BUF_SIZE) {
280 let mut out_pos = 0;
281 for &b in chunk {
282 let translated = unsafe { *table.get_unchecked(b as usize) };
283 if is_member(&squeeze_set, translated) {
284 if last_squeezed == translated as u16 {
285 continue;
286 }
287 last_squeezed = translated as u16;
288 } else {
289 last_squeezed = 256;
290 }
291 unsafe {
292 *outbuf.get_unchecked_mut(out_pos) = translated;
293 }
294 out_pos += 1;
295 }
296 writer.write_all(&outbuf[..out_pos])?;
297 }
298 Ok(())
299}
300
301pub fn delete_mmap(delete_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
303 let member = build_member_set(delete_chars);
304 let mut outbuf = vec![0u8; BUF_SIZE];
305
306 for chunk in data.chunks(BUF_SIZE) {
307 let mut out_pos = 0;
308 for &b in chunk {
309 if !is_member(&member, b) {
310 unsafe {
311 *outbuf.get_unchecked_mut(out_pos) = b;
312 }
313 out_pos += 1;
314 }
315 }
316 writer.write_all(&outbuf[..out_pos])?;
317 }
318 Ok(())
319}
320
321pub fn delete_squeeze_mmap(
323 delete_chars: &[u8],
324 squeeze_chars: &[u8],
325 data: &[u8],
326 writer: &mut impl Write,
327) -> io::Result<()> {
328 let delete_set = build_member_set(delete_chars);
329 let squeeze_set = build_member_set(squeeze_chars);
330 let mut outbuf = vec![0u8; BUF_SIZE];
331 let mut last_squeezed: u16 = 256;
332
333 for chunk in data.chunks(BUF_SIZE) {
334 let mut out_pos = 0;
335 for &b in chunk {
336 if is_member(&delete_set, b) {
337 continue;
338 }
339 if is_member(&squeeze_set, b) {
340 if last_squeezed == b as u16 {
341 continue;
342 }
343 last_squeezed = b as u16;
344 } else {
345 last_squeezed = 256;
346 }
347 unsafe {
348 *outbuf.get_unchecked_mut(out_pos) = b;
349 }
350 out_pos += 1;
351 }
352 writer.write_all(&outbuf[..out_pos])?;
353 }
354 Ok(())
355}
356
357pub fn squeeze_mmap(squeeze_chars: &[u8], data: &[u8], writer: &mut impl Write) -> io::Result<()> {
359 let member = build_member_set(squeeze_chars);
360 let mut outbuf = vec![0u8; BUF_SIZE];
361 let mut last_squeezed: u16 = 256;
362
363 for chunk in data.chunks(BUF_SIZE) {
364 let mut out_pos = 0;
365 for &b in chunk {
366 if is_member(&member, b) {
367 if last_squeezed == b as u16 {
368 continue;
369 }
370 last_squeezed = b as u16;
371 } else {
372 last_squeezed = 256;
373 }
374 unsafe {
375 *outbuf.get_unchecked_mut(out_pos) = b;
376 }
377 out_pos += 1;
378 }
379 writer.write_all(&outbuf[..out_pos])?;
380 }
381 Ok(())
382}