1use std::io::{self, IoSlice, Write};
2
3const MAX_IOV: usize = 1024;
6
7const CHUNK: usize = 2 * 1024 * 1024;
12
13#[inline]
17fn flush_iov(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
18 if slices.is_empty() {
19 return Ok(());
20 }
21 let total: usize = slices.iter().map(|s| s.len()).sum();
23
24 let written = match out.write_vectored(slices) {
26 Ok(n) if n >= total => return Ok(()),
27 Ok(n) => n,
28 Err(e) => return Err(e),
29 };
30
31 let mut skip = written;
33 for slice in slices {
34 let slen = slice.len();
35 if skip >= slen {
36 skip -= slen;
37 continue;
38 }
39 if skip > 0 {
40 out.write_all(&slice[skip..])?;
41 skip = 0;
42 } else {
43 out.write_all(slice)?;
44 }
45 }
46 Ok(())
47}
48
49pub fn tac_bytes(data: &[u8], separator: u8, before: bool, out: &mut impl Write) -> io::Result<()> {
62 if data.is_empty() {
63 return Ok(());
64 }
65 if data.len() <= CONTIG_LIMIT {
68 if !before {
69 tac_bytes_contig_after(data, separator, out)
70 } else {
71 tac_bytes_contig_before(data, separator, out)
72 }
73 } else if !before {
74 tac_bytes_backward_after(data, separator, out)
75 } else {
76 tac_bytes_backward_before(data, separator, out)
77 }
78}
79
80const CONTIG_LIMIT: usize = 32 * 1024 * 1024;
83
84fn tac_bytes_contig_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
88 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
90 let dst = outbuf.as_mut_ptr();
91 let mut wp = 0usize;
92
93 let mut global_end = data.len();
94 let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
95 let mut chunk_start = data.len().saturating_sub(CHUNK);
96
97 loop {
98 let chunk_end = global_end.min(data.len());
99 if chunk_start >= chunk_end {
100 if chunk_start == 0 {
101 break;
102 }
103 chunk_start = chunk_start.saturating_sub(CHUNK);
104 continue;
105 }
106 let chunk = &data[chunk_start..chunk_end];
107
108 positions_buf.clear();
109 positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
110
111 for &pos in positions_buf.iter().rev() {
112 let rec_start = pos + 1;
113 let rec_len = global_end - rec_start;
114 if rec_len > 0 {
115 unsafe {
116 std::ptr::copy_nonoverlapping(
117 data.as_ptr().add(rec_start),
118 dst.add(wp),
119 rec_len,
120 );
121 }
122 wp += rec_len;
123 }
124 global_end = pos + 1;
125 }
126
127 if chunk_start == 0 {
128 break;
129 }
130 chunk_start = chunk_start.saturating_sub(CHUNK);
131 }
132
133 if global_end > 0 {
135 unsafe {
136 std::ptr::copy_nonoverlapping(data.as_ptr(), dst.add(wp), global_end);
137 }
138 wp += global_end;
139 }
140
141 unsafe { outbuf.set_len(wp) };
142 out.write_all(&outbuf)
143}
144
145fn tac_bytes_contig_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
147 let mut outbuf: Vec<u8> = Vec::with_capacity(data.len());
148 let dst = outbuf.as_mut_ptr();
149 let mut wp = 0usize;
150
151 let mut global_end = data.len();
152 let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
153 let mut chunk_start = data.len().saturating_sub(CHUNK);
154
155 loop {
156 let chunk_end = global_end.min(data.len());
157 if chunk_start >= chunk_end {
158 if chunk_start == 0 {
159 break;
160 }
161 chunk_start = chunk_start.saturating_sub(CHUNK);
162 continue;
163 }
164 let chunk = &data[chunk_start..chunk_end];
165
166 positions_buf.clear();
167 positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
168
169 for &pos in positions_buf.iter().rev() {
170 let rec_len = global_end - pos;
171 if rec_len > 0 {
172 unsafe {
173 std::ptr::copy_nonoverlapping(data.as_ptr().add(pos), dst.add(wp), rec_len);
174 }
175 wp += rec_len;
176 }
177 global_end = pos;
178 }
179
180 if chunk_start == 0 {
181 break;
182 }
183 chunk_start = chunk_start.saturating_sub(CHUNK);
184 }
185
186 if global_end > 0 {
187 unsafe {
188 std::ptr::copy_nonoverlapping(data.as_ptr(), dst.add(wp), global_end);
189 }
190 wp += global_end;
191 }
192
193 unsafe { outbuf.set_len(wp) };
194 out.write_all(&outbuf)
195}
196
197fn tac_bytes_backward_after(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
210 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
211
212 let mut global_end = data.len();
214
215 let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
219
220 let mut chunk_start = data.len().saturating_sub(CHUNK);
221
222 loop {
223 let chunk_end = global_end.min(data.len());
224 if chunk_start >= chunk_end {
225 if chunk_start == 0 {
226 break;
227 }
228 chunk_start = chunk_start.saturating_sub(CHUNK);
229 continue;
230 }
231 let chunk = &data[chunk_start..chunk_end];
232
233 positions_buf.clear();
235 positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
236
237 for &pos in positions_buf.iter().rev() {
239 if pos + 1 < global_end {
240 iov.push(IoSlice::new(&data[pos + 1..global_end]));
241 }
242 global_end = pos + 1;
243
244 if iov.len() >= MAX_IOV {
245 flush_iov(out, &iov)?;
246 iov.clear();
247 }
248 }
249
250 if chunk_start == 0 {
251 break;
252 }
253 chunk_start = chunk_start.saturating_sub(CHUNK);
254 }
255
256 if global_end > 0 {
258 iov.push(IoSlice::new(&data[0..global_end]));
259 }
260 flush_iov(out, &iov)?;
261 Ok(())
262}
263
264fn tac_bytes_backward_before(data: &[u8], sep: u8, out: &mut impl Write) -> io::Result<()> {
269 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
270
271 let mut global_end = data.len();
272 let mut chunk_start = data.len().saturating_sub(CHUNK);
273 let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK);
274
275 loop {
276 let chunk_end = global_end.min(data.len());
277 if chunk_start >= chunk_end {
278 if chunk_start == 0 {
279 break;
280 }
281 chunk_start = chunk_start.saturating_sub(CHUNK);
282 continue;
283 }
284 let chunk = &data[chunk_start..chunk_end];
285
286 positions_buf.clear();
287 positions_buf.extend(memchr::memchr_iter(sep, chunk).map(|p| p + chunk_start));
288
289 for &pos in positions_buf.iter().rev() {
290 iov.push(IoSlice::new(&data[pos..global_end]));
291 global_end = pos;
292
293 if iov.len() >= MAX_IOV {
294 flush_iov(out, &iov)?;
295 iov.clear();
296 }
297 }
298
299 if chunk_start == 0 {
300 break;
301 }
302 chunk_start = chunk_start.saturating_sub(CHUNK);
303 }
304
305 if global_end > 0 {
306 iov.push(IoSlice::new(&data[0..global_end]));
307 }
308 flush_iov(out, &iov)?;
309 Ok(())
310}
311
312pub fn tac_string_separator(
317 data: &[u8],
318 separator: &[u8],
319 before: bool,
320 out: &mut impl Write,
321) -> io::Result<()> {
322 if data.is_empty() {
323 return Ok(());
324 }
325
326 if separator.len() == 1 {
327 return tac_bytes(data, separator[0], before, out);
328 }
329
330 let sep_len = separator.len();
331
332 if !before {
341 tac_string_after(data, separator, sep_len, out)
342 } else {
343 tac_string_before(data, separator, sep_len, out)
344 }
345}
346
347fn tac_string_after(
349 data: &[u8],
350 separator: &[u8],
351 sep_len: usize,
352 out: &mut impl Write,
353) -> io::Result<()> {
354 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
355 let mut global_end = data.len();
356 let mut chunk_start = data.len().saturating_sub(CHUNK);
357 let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK / 4);
358
359 loop {
360 let chunk_end = global_end.min(data.len());
361 let scan_start = chunk_start.saturating_sub(sep_len - 1);
362 if scan_start >= chunk_end {
363 if chunk_start == 0 {
364 break;
365 }
366 chunk_start = chunk_start.saturating_sub(CHUNK);
367 continue;
368 }
369 let scan_region = &data[scan_start..chunk_end];
370
371 positions_buf.clear();
372 positions_buf.extend(
373 memchr::memmem::find_iter(scan_region, separator)
374 .map(|p| p + scan_start)
375 .filter(|&p| p >= chunk_start || chunk_start == 0)
376 .filter(|&p| p + sep_len <= global_end),
377 );
378
379 for &pos in positions_buf.iter().rev() {
380 let rec_end_exclusive = pos + sep_len;
381 if rec_end_exclusive < global_end {
382 iov.push(IoSlice::new(&data[rec_end_exclusive..global_end]));
383 }
384 global_end = rec_end_exclusive;
385 if iov.len() >= MAX_IOV {
386 flush_iov(out, &iov)?;
387 iov.clear();
388 }
389 }
390
391 if chunk_start == 0 {
392 break;
393 }
394 chunk_start = chunk_start.saturating_sub(CHUNK);
395 }
396
397 if global_end > 0 {
398 iov.push(IoSlice::new(&data[0..global_end]));
399 }
400 flush_iov(out, &iov)?;
401 Ok(())
402}
403
404fn tac_string_before(
406 data: &[u8],
407 separator: &[u8],
408 sep_len: usize,
409 out: &mut impl Write,
410) -> io::Result<()> {
411 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
412 let mut global_end = data.len();
413 let mut chunk_start = data.len().saturating_sub(CHUNK);
414 let mut positions_buf: Vec<usize> = Vec::with_capacity(CHUNK / 4);
415
416 loop {
417 let chunk_end = global_end.min(data.len());
418 let scan_start = chunk_start.saturating_sub(sep_len - 1);
419 if scan_start >= chunk_end {
420 if chunk_start == 0 {
421 break;
422 }
423 chunk_start = chunk_start.saturating_sub(CHUNK);
424 continue;
425 }
426 let scan_region = &data[scan_start..chunk_end];
427
428 positions_buf.clear();
429 positions_buf.extend(
430 memchr::memmem::find_iter(scan_region, separator)
431 .map(|p| p + scan_start)
432 .filter(|&p| p >= chunk_start || chunk_start == 0)
433 .filter(|&p| p < global_end),
434 );
435
436 for &pos in positions_buf.iter().rev() {
437 iov.push(IoSlice::new(&data[pos..global_end]));
438 global_end = pos;
439 if iov.len() >= MAX_IOV {
440 flush_iov(out, &iov)?;
441 iov.clear();
442 }
443 }
444
445 if chunk_start == 0 {
446 break;
447 }
448 chunk_start = chunk_start.saturating_sub(CHUNK);
449 }
450
451 if global_end > 0 {
452 iov.push(IoSlice::new(&data[0..global_end]));
453 }
454 flush_iov(out, &iov)?;
455 Ok(())
456}
457
458fn find_regex_matches_backward(data: &[u8], re: ®ex::bytes::Regex) -> Vec<(usize, usize)> {
460 let mut matches = Vec::new();
461 let mut past_end = data.len();
462
463 while past_end > 0 {
464 let buf = &data[..past_end];
465 let mut found = false;
466
467 let mut pos = past_end;
468 while pos > 0 {
469 pos -= 1;
470 if let Some(m) = re.find_at(buf, pos) {
471 if m.start() == pos {
472 matches.push((m.start(), m.end()));
473 past_end = m.start();
474 found = true;
475 break;
476 }
477 }
478 }
479
480 if !found {
481 break;
482 }
483 }
484
485 matches.reverse();
486 matches
487}
488
489pub fn tac_regex_separator(
491 data: &[u8],
492 pattern: &str,
493 before: bool,
494 out: &mut impl Write,
495) -> io::Result<()> {
496 if data.is_empty() {
497 return Ok(());
498 }
499
500 let re = match regex::bytes::Regex::new(pattern) {
501 Ok(r) => r,
502 Err(e) => {
503 return Err(io::Error::new(
504 io::ErrorKind::InvalidInput,
505 format!("invalid regex '{}': {}", pattern, e),
506 ));
507 }
508 };
509
510 let matches = find_regex_matches_backward(data, &re);
511
512 if matches.is_empty() {
513 out.write_all(data)?;
514 return Ok(());
515 }
516
517 let mut iov: Vec<IoSlice> = Vec::with_capacity(matches.len().min(MAX_IOV) + 2);
518
519 if !before {
520 let last_end = matches.last().unwrap().1;
521
522 if last_end < data.len() {
523 iov.push(IoSlice::new(&data[last_end..]));
524 }
525
526 let mut i = matches.len();
527 while i > 0 {
528 i -= 1;
529 let rec_start = if i == 0 { 0 } else { matches[i - 1].1 };
530 iov.push(IoSlice::new(&data[rec_start..matches[i].1]));
531 if iov.len() >= MAX_IOV {
532 flush_iov(out, &iov)?;
533 iov.clear();
534 }
535 }
536 } else {
537 let mut i = matches.len();
538 while i > 0 {
539 i -= 1;
540 let start = matches[i].0;
541 let end = if i + 1 < matches.len() {
542 matches[i + 1].0
543 } else {
544 data.len()
545 };
546 iov.push(IoSlice::new(&data[start..end]));
547 if iov.len() >= MAX_IOV {
548 flush_iov(out, &iov)?;
549 iov.clear();
550 }
551 }
552
553 if matches[0].0 > 0 {
554 iov.push(IoSlice::new(&data[..matches[0].0]));
555 }
556 }
557
558 flush_iov(out, &iov)?;
559 Ok(())
560}