1use std::io::Write;
2
3#[derive(Clone)]
5pub enum NumberingStyle {
6 All,
8 NonEmpty,
10 None,
12 Prefix(Vec<u8>),
14 Regex(regex::bytes::Regex),
16}
17
18#[derive(Clone, Copy, Debug, PartialEq)]
20pub enum NumberFormat {
21 Ln,
23 Rn,
25 Rz,
27}
28
29pub struct NlConfig {
31 pub body_style: NumberingStyle,
32 pub header_style: NumberingStyle,
33 pub footer_style: NumberingStyle,
34 pub section_delimiter: Vec<u8>,
35 pub line_increment: i64,
36 pub join_blank_lines: usize,
37 pub number_format: NumberFormat,
38 pub no_renumber: bool,
39 pub number_separator: Vec<u8>,
40 pub starting_line_number: i64,
41 pub number_width: usize,
42}
43
44impl Default for NlConfig {
45 fn default() -> Self {
46 Self {
47 body_style: NumberingStyle::NonEmpty,
48 header_style: NumberingStyle::None,
49 footer_style: NumberingStyle::None,
50 section_delimiter: vec![b'\\', b':'],
51 line_increment: 1,
52 join_blank_lines: 1,
53 number_format: NumberFormat::Rn,
54 no_renumber: false,
55 number_separator: vec![b'\t'],
56 starting_line_number: 1,
57 number_width: 6,
58 }
59 }
60}
61
62pub fn parse_numbering_style(s: &str) -> Result<NumberingStyle, String> {
64 match s {
65 "a" => Ok(NumberingStyle::All),
66 "t" => Ok(NumberingStyle::NonEmpty),
67 "n" => Ok(NumberingStyle::None),
68 _ if s.starts_with('p') => {
69 let pattern = &s[1..];
70 if let Some(rest) = pattern.strip_prefix('^') {
72 if !rest.is_empty() && !rest.bytes().any(|b| b"\\.*+?|()[]{}$".contains(&b)) {
73 return Ok(NumberingStyle::Prefix(rest.as_bytes().to_vec()));
74 }
75 }
76 match regex::bytes::Regex::new(pattern) {
77 Ok(re) => Ok(NumberingStyle::Regex(re)),
78 Err(e) => Err(format!("invalid regular expression: {}", e)),
79 }
80 }
81 _ => Err(format!("invalid numbering style: '{}'", s)),
82 }
83}
84
85pub fn parse_number_format(s: &str) -> Result<NumberFormat, String> {
87 match s {
88 "ln" => Ok(NumberFormat::Ln),
89 "rn" => Ok(NumberFormat::Rn),
90 "rz" => Ok(NumberFormat::Rz),
91 _ => Err(format!("invalid line numbering: '{}'", s)),
92 }
93}
94
95#[derive(Clone, Copy, PartialEq)]
97enum Section {
98 Header,
99 Body,
100 Footer,
101}
102
103#[inline]
105fn check_section_delimiter(line: &[u8], delim: &[u8]) -> Option<Section> {
106 if delim.is_empty() {
107 return None;
108 }
109 let dlen = delim.len();
110
111 if line.len() == dlen * 3 {
113 let mut is_header = true;
114 for i in 0..3 {
115 if &line[i * dlen..(i + 1) * dlen] != delim {
116 is_header = false;
117 break;
118 }
119 }
120 if is_header {
121 return Some(Section::Header);
122 }
123 }
124
125 if line.len() == dlen * 2 && &line[..dlen] == delim && &line[dlen..] == delim {
127 return Some(Section::Body);
128 }
129
130 if line.len() == dlen && line == delim {
132 return Some(Section::Footer);
133 }
134
135 None
136}
137
138#[inline]
140fn format_number(num: i64, format: NumberFormat, width: usize, buf: &mut Vec<u8>) {
141 let mut num_buf = itoa::Buffer::new();
142 let num_str = num_buf.format(num).as_bytes();
143 let pad = width.saturating_sub(num_str.len());
144 let total = pad + num_str.len();
145 buf.reserve(total);
146 unsafe {
147 let start = buf.len();
148 let dst = buf.as_mut_ptr().add(start);
149 match format {
150 NumberFormat::Ln => {
151 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst, num_str.len());
152 std::ptr::write_bytes(dst.add(num_str.len()), b' ', pad);
153 }
154 NumberFormat::Rn => {
155 std::ptr::write_bytes(dst, b' ', pad);
156 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst.add(pad), num_str.len());
157 }
158 NumberFormat::Rz => {
159 if num < 0 {
160 *dst = b'-';
161 let abs = &num_str[1..];
162 let zpad = width.saturating_sub(abs.len() + 1);
163 std::ptr::write_bytes(dst.add(1), b'0', zpad);
164 std::ptr::copy_nonoverlapping(abs.as_ptr(), dst.add(1 + zpad), abs.len());
165 buf.set_len(start + 1 + zpad + abs.len());
166 return;
167 }
168 std::ptr::write_bytes(dst, b'0', pad);
169 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst.add(pad), num_str.len());
170 }
171 }
172 buf.set_len(start + total);
173 }
174}
175
176#[inline]
178fn should_number(line: &[u8], style: &NumberingStyle) -> bool {
179 match style {
180 NumberingStyle::All => true,
181 NumberingStyle::NonEmpty => !line.is_empty(),
182 NumberingStyle::None => false,
183 NumberingStyle::Prefix(prefix) => line.starts_with(prefix),
184 NumberingStyle::Regex(re) => re.is_match(line),
185 }
186}
187
188pub fn nl_to_vec(data: &[u8], config: &NlConfig) -> Vec<u8> {
190 let mut line_number = config.starting_line_number;
191 nl_to_vec_with_state(data, config, &mut line_number)
192}
193
194#[inline]
196fn is_simple_number_all(config: &NlConfig) -> bool {
197 matches!(config.body_style, NumberingStyle::All)
198 && matches!(config.header_style, NumberingStyle::None)
199 && matches!(config.footer_style, NumberingStyle::None)
200 && config.join_blank_lines == 1
201 && config.line_increment == 1
202 && config.starting_line_number >= 0
203 && !config.no_renumber
204 && config.number_width + config.number_separator.len() <= 30
205}
206
207#[inline]
209fn is_simple_number_nonempty(config: &NlConfig) -> bool {
210 matches!(config.body_style, NumberingStyle::NonEmpty)
211 && matches!(config.header_style, NumberingStyle::None)
212 && matches!(config.footer_style, NumberingStyle::None)
213 && config.join_blank_lines == 1
214 && config.line_increment == 1
215 && config.starting_line_number >= 0
216 && !config.no_renumber
217 && config.number_width + config.number_separator.len() <= 30
218}
219
220#[inline]
222fn is_simple_number_pattern(config: &NlConfig) -> bool {
223 matches!(
224 config.body_style,
225 NumberingStyle::Prefix(_) | NumberingStyle::Regex(_)
226 ) && matches!(config.header_style, NumberingStyle::None)
227 && matches!(config.footer_style, NumberingStyle::None)
228 && config.join_blank_lines == 1
229 && config.line_increment == 1
230 && config.starting_line_number >= 0
231 && !config.no_renumber
232 && config.number_width + config.number_separator.len() <= 30
233}
234
235#[inline]
240fn data_has_section_delimiters(data: &[u8], config: &NlConfig) -> bool {
241 if config.section_delimiter.is_empty() {
242 return false;
243 }
244 let first_byte = config.section_delimiter[0];
245 memchr::memchr(first_byte, data).is_some()
246 && memchr::memmem::find(data, &config.section_delimiter).is_some()
247}
248
249#[inline(always)]
252unsafe fn write_numbered_line(
253 output: &mut Vec<u8>,
254 fmt: NumberFormat,
255 num_str: &str,
256 pad: usize,
257 sep: &[u8],
258 line_data: *const u8,
259 line_len: usize,
260) {
261 unsafe {
262 let prefix_len = pad + num_str.len() + sep.len();
263 let total_len = prefix_len + line_len + 1;
264 let start_pos = output.len();
265 let dst = output.as_mut_ptr().add(start_pos);
266
267 match fmt {
268 NumberFormat::Rn => {
269 std::ptr::write_bytes(dst, b' ', pad);
270 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst.add(pad), num_str.len());
271 }
272 NumberFormat::Rz => {
273 std::ptr::write_bytes(dst, b'0', pad);
274 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst.add(pad), num_str.len());
275 }
276 NumberFormat::Ln => {
277 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst, num_str.len());
278 std::ptr::write_bytes(dst.add(num_str.len()), b' ', pad);
279 }
280 }
281 std::ptr::copy_nonoverlapping(sep.as_ptr(), dst.add(pad + num_str.len()), sep.len());
282 std::ptr::copy_nonoverlapping(line_data, dst.add(prefix_len), line_len);
283 *dst.add(prefix_len + line_len) = b'\n';
284 output.set_len(start_pos + total_len);
285 }
286}
287
288fn nl_number_all_fast(data: &[u8], config: &NlConfig, line_number: &mut i64) -> Vec<u8> {
292 let alloc = (data.len() * 2 + 256).min(128 * 1024 * 1024);
293 let mut output: Vec<u8> = Vec::with_capacity(alloc);
294
295 let width = config.number_width;
296 let sep = &config.number_separator;
297 let fmt = config.number_format;
298 let mut num = *line_number;
299 let mut pos: usize = 0;
300 let mut num_buf = itoa::Buffer::new();
301
302 for nl_pos in memchr::memchr_iter(b'\n', data) {
303 let line_len = nl_pos - pos;
304 let needed = output.len() + line_len + width + sep.len() + 22;
306 if needed > output.capacity() {
307 output.reserve(needed - output.capacity() + 4 * 1024 * 1024);
308 }
309
310 let num_str = num_buf.format(num);
311 let pad = width.saturating_sub(num_str.len());
312
313 unsafe {
314 write_numbered_line(
315 &mut output,
316 fmt,
317 num_str,
318 pad,
319 sep,
320 data.as_ptr().add(pos),
321 line_len,
322 );
323 }
324
325 num += 1;
326 pos = nl_pos + 1;
327 }
328
329 if pos < data.len() {
331 let remaining = data.len() - pos;
332 let needed = output.len() + remaining + width + sep.len() + 22;
333 if needed > output.capacity() {
334 output.reserve(needed - output.capacity() + 1024);
335 }
336 let num_str = num_buf.format(num);
337 let pad = width.saturating_sub(num_str.len());
338
339 unsafe {
340 write_numbered_line(
341 &mut output,
342 fmt,
343 num_str,
344 pad,
345 sep,
346 data.as_ptr().add(pos),
347 remaining,
348 );
349 }
350 num += 1;
351 }
352
353 *line_number = num;
354 output
355}
356
357fn nl_number_nonempty_fast(data: &[u8], config: &NlConfig, line_number: &mut i64) -> Vec<u8> {
361 let alloc = (data.len() * 2 + 256).min(128 * 1024 * 1024);
362 let mut output: Vec<u8> = Vec::with_capacity(alloc);
363
364 let width = config.number_width;
365 let sep = &config.number_separator;
366 let fmt = config.number_format;
367 let mut num = *line_number;
368 let mut pos: usize = 0;
369 let mut num_buf = itoa::Buffer::new();
370 let blank_pad = width + sep.len();
371
372 for nl_pos in memchr::memchr_iter(b'\n', data) {
373 let line_len = nl_pos - pos;
374 let needed = output.len() + line_len + width + sep.len() + 22;
376 if needed > output.capacity() {
377 output.reserve(needed - output.capacity() + 4 * 1024 * 1024);
378 }
379
380 if line_len == 0 {
381 let start_pos = output.len();
382 unsafe {
383 let dst = output.as_mut_ptr().add(start_pos);
384 std::ptr::write_bytes(dst, b' ', blank_pad);
385 *dst.add(blank_pad) = b'\n';
386 output.set_len(start_pos + blank_pad + 1);
387 }
388 } else {
389 let num_str = num_buf.format(num);
390 let pad = width.saturating_sub(num_str.len());
391 unsafe {
392 write_numbered_line(
393 &mut output,
394 fmt,
395 num_str,
396 pad,
397 sep,
398 data.as_ptr().add(pos),
399 line_len,
400 );
401 }
402 num += 1;
403 }
404 pos = nl_pos + 1;
405 }
406
407 if pos < data.len() {
410 let remaining = data.len() - pos;
411 let needed = output.len() + remaining + width + sep.len() + 22;
412 if needed > output.capacity() {
413 output.reserve(needed - output.capacity() + 1024);
414 }
415 let num_str = num_buf.format(num);
416 let pad = width.saturating_sub(num_str.len());
417 unsafe {
418 write_numbered_line(
419 &mut output,
420 fmt,
421 num_str,
422 pad,
423 sep,
424 data.as_ptr().add(pos),
425 remaining,
426 );
427 }
428 num += 1;
429 }
430
431 *line_number = num;
432 output
433}
434#[cfg(unix)]
439fn nl_number_all_stream(
440 data: &[u8],
441 config: &NlConfig,
442 line_number: &mut i64,
443 fd: i32,
444) -> std::io::Result<()> {
445 const BUF_SIZE: usize = 2 * 1024 * 1024;
446
447 let width = config.number_width;
448 let sep = &config.number_separator;
449 let fmt = config.number_format;
450 let mut num = *line_number;
451 let mut pos: usize = 0;
452
453 let mut output: Vec<u8> = Vec::with_capacity(BUF_SIZE + 128 * 1024);
454 let mut buf_ptr = output.as_mut_ptr();
455 let mut write_pos: usize = 0;
456 let data_ptr = data.as_ptr();
457
458 let mut prefix_buf = [0u8; 64];
460 let mut prefix_len: usize;
461 let mut num_end: usize;
462
463 let mut num_buf = itoa::Buffer::new();
464
465 {
467 let num_str = num_buf.format(num);
468 let pad = width.saturating_sub(num_str.len());
469 let mut wp = 0;
470 match fmt {
471 NumberFormat::Rn => {
472 for _ in 0..pad {
473 prefix_buf[wp] = b' ';
474 wp += 1;
475 }
476 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
477 wp += num_str.len();
478 }
479 NumberFormat::Rz => {
480 for _ in 0..pad {
481 prefix_buf[wp] = b'0';
482 wp += 1;
483 }
484 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
485 wp += num_str.len();
486 }
487 NumberFormat::Ln => {
488 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
489 wp += num_str.len();
490 for _ in 0..pad {
491 prefix_buf[wp] = b' ';
492 wp += 1;
493 }
494 }
495 }
496 num_end = wp;
497 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
498 wp += sep.len();
499 prefix_len = wp;
500 }
501
502 for nl_pos in memchr::memchr_iter(b'\n', data) {
503 let line_len = nl_pos - pos;
504
505 let needed = line_len + prefix_len + 2;
506 if write_pos + needed > BUF_SIZE {
507 unsafe {
508 output.set_len(write_pos);
509 }
510 write_all_fd(fd, &output)?;
511 write_pos = 0;
512 if needed > output.capacity() {
513 unsafe {
514 output.set_len(0);
515 }
516 output.reserve(needed);
517 buf_ptr = output.as_mut_ptr();
518 }
519 }
520
521 unsafe {
522 let dst = buf_ptr.add(write_pos);
523 std::ptr::copy_nonoverlapping(prefix_buf.as_ptr(), dst, prefix_len);
524 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(prefix_len), line_len);
525 *dst.add(prefix_len + line_len) = b'\n';
526 }
527 write_pos += prefix_len + line_len + 1;
528
529 num += 1;
530 pos = nl_pos + 1;
531
532 match fmt {
534 NumberFormat::Rn | NumberFormat::Rz => {
535 let mut idx = num_end - 1;
536 loop {
537 if prefix_buf[idx] < b'9' {
538 prefix_buf[idx] += 1;
539 break;
540 }
541 prefix_buf[idx] = b'0';
542 if idx == 0 {
543 let ns = num_buf.format(num);
544 let p = width.saturating_sub(ns.len());
545 let pc = if fmt == NumberFormat::Rz { b'0' } else { b' ' };
546 let mut wp = 0;
547 for _ in 0..p {
548 prefix_buf[wp] = pc;
549 wp += 1;
550 }
551 prefix_buf[wp..wp + ns.len()].copy_from_slice(ns.as_bytes());
552 wp += ns.len();
553 num_end = wp;
554 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
555 prefix_len = wp + sep.len();
556 break;
557 }
558 idx -= 1;
559 let c = prefix_buf[idx];
560 if c == b' ' || c == b'0' {
561 prefix_buf[idx] = b'1';
562 break;
563 }
564 }
565 }
566 NumberFormat::Ln => {
567 let mut last_digit = 0;
568 for j in 0..num_end {
569 if prefix_buf[j].is_ascii_digit() {
570 last_digit = j;
571 } else {
572 break;
573 }
574 }
575 let mut idx = last_digit;
576 loop {
577 if prefix_buf[idx] < b'9' {
578 prefix_buf[idx] += 1;
579 break;
580 }
581 prefix_buf[idx] = b'0';
582 if idx == 0 {
583 let ns = num_buf.format(num);
584 let p = width.saturating_sub(ns.len());
585 let mut wp = 0;
586 prefix_buf[wp..wp + ns.len()].copy_from_slice(ns.as_bytes());
587 wp += ns.len();
588 for _ in 0..p {
589 prefix_buf[wp] = b' ';
590 wp += 1;
591 }
592 num_end = wp;
593 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
594 prefix_len = wp + sep.len();
595 break;
596 }
597 idx -= 1;
598 }
599 }
600 }
601 }
602
603 if pos < data.len() {
605 let remaining = data.len() - pos;
606 let needed = prefix_len + remaining + 2;
607 if write_pos + needed > BUF_SIZE {
608 unsafe {
609 output.set_len(write_pos);
610 }
611 write_all_fd(fd, &output)?;
612 write_pos = 0;
613 if needed > output.capacity() {
614 unsafe {
615 output.set_len(0);
616 }
617 output.reserve(needed);
618 buf_ptr = output.as_mut_ptr();
619 }
620 }
621 unsafe {
622 let dst = buf_ptr.add(write_pos);
623 std::ptr::copy_nonoverlapping(prefix_buf.as_ptr(), dst, prefix_len);
624 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(prefix_len), remaining);
625 *dst.add(prefix_len + remaining) = b'\n';
626 }
627 write_pos += prefix_len + remaining + 1;
628 num += 1;
629 }
630
631 if write_pos > 0 {
632 unsafe {
633 output.set_len(write_pos);
634 }
635 write_all_fd(fd, &output)?;
636 }
637
638 *line_number = num;
639 Ok(())
640}
641
642#[cfg(unix)]
645fn nl_number_nonempty_stream(
646 data: &[u8],
647 config: &NlConfig,
648 line_number: &mut i64,
649 fd: i32,
650) -> std::io::Result<()> {
651 const BUF_SIZE: usize = 2 * 1024 * 1024;
652
653 let width = config.number_width;
654 let sep = &config.number_separator;
655 let fmt = config.number_format;
656 let mut num = *line_number;
657 let mut pos: usize = 0;
658
659 let mut output: Vec<u8> = Vec::with_capacity(BUF_SIZE + 128 * 1024);
660 let mut buf_ptr = output.as_mut_ptr();
661 let mut write_pos: usize = 0;
662 let data_ptr = data.as_ptr();
663
664 let mut prefix_buf = [0u8; 64];
665 let mut prefix_len: usize;
666 let mut num_end: usize;
667 let mut num_buf = itoa::Buffer::new();
668
669 let blank_pad = width + sep.len();
671
672 {
674 let num_str = num_buf.format(num);
675 let pad = width.saturating_sub(num_str.len());
676 let mut wp = 0;
677 match fmt {
678 NumberFormat::Rn => {
679 for _ in 0..pad {
680 prefix_buf[wp] = b' ';
681 wp += 1;
682 }
683 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
684 wp += num_str.len();
685 }
686 NumberFormat::Rz => {
687 for _ in 0..pad {
688 prefix_buf[wp] = b'0';
689 wp += 1;
690 }
691 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
692 wp += num_str.len();
693 }
694 NumberFormat::Ln => {
695 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
696 wp += num_str.len();
697 for _ in 0..pad {
698 prefix_buf[wp] = b' ';
699 wp += 1;
700 }
701 }
702 }
703 num_end = wp;
704 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
705 wp += sep.len();
706 prefix_len = wp;
707 }
708
709 for nl_pos in memchr::memchr_iter(b'\n', data) {
710 let line_len = nl_pos - pos;
711
712 let needed = line_len + prefix_len + 2;
715 if write_pos + needed > BUF_SIZE {
716 unsafe {
717 output.set_len(write_pos);
718 }
719 write_all_fd(fd, &output)?;
720 write_pos = 0;
721 if needed > output.capacity() {
723 output.reserve(needed);
724 buf_ptr = output.as_mut_ptr();
725 }
726 }
727
728 if line_len == 0 {
729 unsafe {
732 let dst = buf_ptr.add(write_pos);
733 std::ptr::write_bytes(dst, b' ', blank_pad);
734 *dst.add(blank_pad) = b'\n';
735 }
736 write_pos += blank_pad + 1;
737 } else {
738 unsafe {
740 let dst = buf_ptr.add(write_pos);
741 std::ptr::copy_nonoverlapping(prefix_buf.as_ptr(), dst, prefix_len);
742 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(prefix_len), line_len);
743 *dst.add(prefix_len + line_len) = b'\n';
744 }
745 write_pos += prefix_len + line_len + 1;
746
747 num += 1;
748
749 match fmt {
751 NumberFormat::Rn | NumberFormat::Rz => {
752 let mut idx = num_end - 1;
753 loop {
754 if prefix_buf[idx] < b'9' {
755 prefix_buf[idx] += 1;
756 break;
757 }
758 prefix_buf[idx] = b'0';
759 if idx == 0 {
760 let ns = num_buf.format(num);
761 let p = width.saturating_sub(ns.len());
762 let pc = if fmt == NumberFormat::Rz { b'0' } else { b' ' };
763 let mut wp = 0;
764 for _ in 0..p {
765 prefix_buf[wp] = pc;
766 wp += 1;
767 }
768 prefix_buf[wp..wp + ns.len()].copy_from_slice(ns.as_bytes());
769 wp += ns.len();
770 num_end = wp;
771 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
772 prefix_len = wp + sep.len();
773 break;
774 }
775 idx -= 1;
776 let c = prefix_buf[idx];
777 if c == b' ' || c == b'0' {
778 prefix_buf[idx] = b'1';
779 break;
780 }
781 }
782 }
783 NumberFormat::Ln => {
784 let mut last_digit = 0;
785 for j in 0..num_end {
786 if prefix_buf[j].is_ascii_digit() {
787 last_digit = j;
788 } else {
789 break;
790 }
791 }
792 let mut idx = last_digit;
793 loop {
794 if prefix_buf[idx] < b'9' {
795 prefix_buf[idx] += 1;
796 break;
797 }
798 prefix_buf[idx] = b'0';
799 if idx == 0 {
800 let ns = num_buf.format(num);
801 let p = width.saturating_sub(ns.len());
802 let mut wp = 0;
803 prefix_buf[wp..wp + ns.len()].copy_from_slice(ns.as_bytes());
804 wp += ns.len();
805 for _ in 0..p {
806 prefix_buf[wp] = b' ';
807 wp += 1;
808 }
809 num_end = wp;
810 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
811 prefix_len = wp + sep.len();
812 break;
813 }
814 idx -= 1;
815 }
816 }
817 }
818 }
819
820 pos = nl_pos + 1;
821 }
822
823 if pos < data.len() {
825 let remaining = data.len() - pos;
826 let needed = prefix_len + remaining + 2;
827 if write_pos + needed > BUF_SIZE {
828 unsafe {
829 output.set_len(write_pos);
830 }
831 write_all_fd(fd, &output)?;
832 write_pos = 0;
833 if needed > output.capacity() {
834 unsafe {
835 output.set_len(0);
836 }
837 output.reserve(needed);
838 buf_ptr = output.as_mut_ptr();
839 }
840 }
841 unsafe {
843 let dst = buf_ptr.add(write_pos);
844 std::ptr::copy_nonoverlapping(prefix_buf.as_ptr(), dst, prefix_len);
845 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(prefix_len), remaining);
846 *dst.add(prefix_len + remaining) = b'\n';
847 }
848 write_pos += prefix_len + remaining + 1;
849 num += 1;
850 }
851
852 if write_pos > 0 {
853 unsafe {
854 output.set_len(write_pos);
855 }
856 write_all_fd(fd, &output)?;
857 }
858
859 *line_number = num;
860 Ok(())
861}
862
863#[cfg(unix)]
867fn nl_number_pattern_stream(
868 data: &[u8],
869 config: &NlConfig,
870 line_number: &mut i64,
871 fd: i32,
872) -> std::io::Result<()> {
873 const BUF_SIZE: usize = 2 * 1024 * 1024;
874
875 let width = config.number_width;
876 let sep = &config.number_separator;
877 let fmt = config.number_format;
878 let style = &config.body_style;
879 let mut num = *line_number;
880 let mut pos: usize = 0;
881
882 let mut output: Vec<u8> = Vec::with_capacity(BUF_SIZE + 128 * 1024);
883 let mut buf_ptr = output.as_mut_ptr();
884 let mut write_pos: usize = 0;
885 let data_ptr = data.as_ptr();
886
887 let mut prefix_buf = [0u8; 64];
888 let mut prefix_len: usize;
889 let mut num_end: usize;
890 let mut num_buf = itoa::Buffer::new();
891
892 let blank_pad = width + sep.len();
893
894 {
896 let num_str = num_buf.format(num);
897 let pad = width.saturating_sub(num_str.len());
898 let mut wp = 0;
899 match fmt {
900 NumberFormat::Rn => {
901 for _ in 0..pad {
902 prefix_buf[wp] = b' ';
903 wp += 1;
904 }
905 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
906 wp += num_str.len();
907 }
908 NumberFormat::Rz => {
909 for _ in 0..pad {
910 prefix_buf[wp] = b'0';
911 wp += 1;
912 }
913 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
914 wp += num_str.len();
915 }
916 NumberFormat::Ln => {
917 prefix_buf[wp..wp + num_str.len()].copy_from_slice(num_str.as_bytes());
918 wp += num_str.len();
919 for _ in 0..pad {
920 prefix_buf[wp] = b' ';
921 wp += 1;
922 }
923 }
924 }
925 num_end = wp;
926 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
927 wp += sep.len();
928 prefix_len = wp;
929 }
930
931 for nl_pos in memchr::memchr_iter(b'\n', data) {
932 let line_len = nl_pos - pos;
933 let needed = line_len + prefix_len + 2;
934 if write_pos + needed > BUF_SIZE {
935 unsafe {
936 output.set_len(write_pos);
937 }
938 write_all_fd(fd, &output)?;
939 write_pos = 0;
940 if needed > output.capacity() {
941 unsafe {
942 output.set_len(0);
943 }
944 output.reserve(needed);
945 buf_ptr = output.as_mut_ptr();
946 }
947 }
948
949 let line = &data[pos..nl_pos];
950 if should_number(line, style) {
951 unsafe {
953 let dst = buf_ptr.add(write_pos);
954 std::ptr::copy_nonoverlapping(prefix_buf.as_ptr(), dst, prefix_len);
955 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(prefix_len), line_len);
956 *dst.add(prefix_len + line_len) = b'\n';
957 }
958 write_pos += prefix_len + line_len + 1;
959
960 num += 1;
961
962 match fmt {
964 NumberFormat::Rn | NumberFormat::Rz => {
965 let mut idx = num_end - 1;
966 loop {
967 if prefix_buf[idx] < b'9' {
968 prefix_buf[idx] += 1;
969 break;
970 }
971 prefix_buf[idx] = b'0';
972 if idx == 0 {
973 let ns = num_buf.format(num);
974 let p = width.saturating_sub(ns.len());
975 let pc = if fmt == NumberFormat::Rz { b'0' } else { b' ' };
976 let mut wp = 0;
977 for _ in 0..p {
978 prefix_buf[wp] = pc;
979 wp += 1;
980 }
981 prefix_buf[wp..wp + ns.len()].copy_from_slice(ns.as_bytes());
982 wp += ns.len();
983 num_end = wp;
984 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
985 prefix_len = wp + sep.len();
986 break;
987 }
988 idx -= 1;
989 let c = prefix_buf[idx];
990 if c == b' ' || c == b'0' {
991 prefix_buf[idx] = b'1';
992 break;
993 }
994 }
995 }
996 NumberFormat::Ln => {
997 let mut last_digit = 0;
998 for j in 0..num_end {
999 if prefix_buf[j].is_ascii_digit() {
1000 last_digit = j;
1001 } else {
1002 break;
1003 }
1004 }
1005 let mut idx = last_digit;
1006 loop {
1007 if prefix_buf[idx] < b'9' {
1008 prefix_buf[idx] += 1;
1009 break;
1010 }
1011 prefix_buf[idx] = b'0';
1012 if idx == 0 {
1013 let ns = num_buf.format(num);
1014 let p = width.saturating_sub(ns.len());
1015 let mut wp = 0;
1016 prefix_buf[wp..wp + ns.len()].copy_from_slice(ns.as_bytes());
1017 wp += ns.len();
1018 for _ in 0..p {
1019 prefix_buf[wp] = b' ';
1020 wp += 1;
1021 }
1022 num_end = wp;
1023 prefix_buf[wp..wp + sep.len()].copy_from_slice(sep);
1024 prefix_len = wp + sep.len();
1025 break;
1026 }
1027 idx -= 1;
1028 }
1029 }
1030 }
1031 } else {
1032 unsafe {
1034 let dst = buf_ptr.add(write_pos);
1035 std::ptr::write_bytes(dst, b' ', blank_pad);
1036 if line_len > 0 {
1037 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(blank_pad), line_len);
1038 }
1039 *dst.add(blank_pad + line_len) = b'\n';
1040 }
1041 write_pos += blank_pad + line_len + 1;
1042 }
1043
1044 pos = nl_pos + 1;
1045 }
1046
1047 if pos < data.len() {
1049 let remaining = data.len() - pos;
1050 let needed = prefix_len + remaining + 2;
1051 if write_pos + needed > BUF_SIZE {
1052 unsafe {
1053 output.set_len(write_pos);
1054 }
1055 write_all_fd(fd, &output)?;
1056 write_pos = 0;
1057 if needed > output.capacity() {
1058 unsafe {
1059 output.set_len(0);
1060 }
1061 output.reserve(needed);
1062 buf_ptr = output.as_mut_ptr();
1063 }
1064 }
1065 let line = &data[pos..];
1066 if should_number(line, style) {
1067 unsafe {
1068 let dst = buf_ptr.add(write_pos);
1069 std::ptr::copy_nonoverlapping(prefix_buf.as_ptr(), dst, prefix_len);
1070 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(prefix_len), remaining);
1071 *dst.add(prefix_len + remaining) = b'\n';
1072 }
1073 write_pos += prefix_len + remaining + 1;
1074 num += 1;
1075 } else {
1076 unsafe {
1077 let dst = buf_ptr.add(write_pos);
1078 std::ptr::write_bytes(dst, b' ', blank_pad);
1079 if remaining > 0 {
1080 std::ptr::copy_nonoverlapping(data_ptr.add(pos), dst.add(blank_pad), remaining);
1081 }
1082 *dst.add(blank_pad + remaining) = b'\n';
1083 }
1084 write_pos += blank_pad + remaining + 1;
1085 }
1086 }
1087
1088 if write_pos > 0 {
1089 unsafe {
1090 output.set_len(write_pos);
1091 }
1092 write_all_fd(fd, &output)?;
1093 }
1094
1095 *line_number = num;
1096 Ok(())
1097}
1098
1099#[cfg(unix)]
1102fn nl_generic_stream(
1103 data: &[u8],
1104 config: &NlConfig,
1105 line_number: &mut i64,
1106 fd: i32,
1107) -> std::io::Result<()> {
1108 if data.is_empty() {
1109 return Ok(());
1110 }
1111
1112 const BUF_SIZE: usize = 2 * 1024 * 1024;
1113
1114 let mut output: Vec<u8> = Vec::with_capacity(BUF_SIZE + 128 * 1024);
1115 let mut current_section = Section::Body;
1116 let mut consecutive_blanks: usize = 0;
1117 let mut start = 0;
1118 let mut line_iter = memchr::memchr_iter(b'\n', data);
1119
1120 loop {
1121 let (line, has_newline) = match line_iter.next() {
1122 Some(pos) => (&data[start..pos], true),
1123 None => {
1124 if start < data.len() {
1125 (&data[start..], false)
1126 } else {
1127 break;
1128 }
1129 }
1130 };
1131
1132 if output.len() > BUF_SIZE {
1134 write_all_fd(fd, &output)?;
1135 output.clear();
1136 }
1137
1138 if let Some(section) = check_section_delimiter(line, &config.section_delimiter) {
1140 if !config.no_renumber {
1141 *line_number = config.starting_line_number;
1142 }
1143 current_section = section;
1144 consecutive_blanks = 0;
1145 output.push(b'\n');
1146 if has_newline {
1147 start += line.len() + 1;
1148 } else {
1149 break;
1150 }
1151 continue;
1152 }
1153
1154 let style = match current_section {
1155 Section::Header => &config.header_style,
1156 Section::Body => &config.body_style,
1157 Section::Footer => &config.footer_style,
1158 };
1159
1160 let is_blank = line.is_empty();
1161
1162 if is_blank {
1163 consecutive_blanks += 1;
1164 } else {
1165 consecutive_blanks = 0;
1166 }
1167
1168 let do_number = if is_blank && config.join_blank_lines > 1 {
1169 if should_number(line, style) {
1170 consecutive_blanks >= config.join_blank_lines
1171 } else {
1172 false
1173 }
1174 } else {
1175 should_number(line, style)
1176 };
1177
1178 if do_number {
1179 if is_blank && config.join_blank_lines > 1 {
1180 consecutive_blanks = 0;
1181 }
1182 format_number(
1183 *line_number,
1184 config.number_format,
1185 config.number_width,
1186 &mut output,
1187 );
1188 output.extend_from_slice(&config.number_separator);
1189 output.extend_from_slice(line);
1190 *line_number = line_number.wrapping_add(config.line_increment);
1191 } else {
1192 let total_pad = config.number_width + config.number_separator.len();
1193 output.resize(output.len() + total_pad, b' ');
1194 output.extend_from_slice(line);
1195 }
1196
1197 if has_newline {
1198 output.push(b'\n');
1199 start += line.len() + 1;
1200 } else {
1201 output.push(b'\n');
1202 break;
1203 }
1204 }
1205
1206 if !output.is_empty() {
1208 write_all_fd(fd, &output)?;
1209 }
1210
1211 Ok(())
1212}
1213
1214#[cfg(unix)]
1216#[inline]
1217fn write_all_fd(fd: i32, data: &[u8]) -> std::io::Result<()> {
1218 let mut written = 0;
1219 while written < data.len() {
1220 let ret = unsafe {
1221 libc::write(
1222 fd,
1223 data[written..].as_ptr() as *const libc::c_void,
1224 (data.len() - written) as _,
1225 )
1226 };
1227 if ret > 0 {
1228 written += ret as usize;
1229 } else if ret == 0 {
1230 return Err(std::io::Error::new(
1231 std::io::ErrorKind::WriteZero,
1232 "write returned 0",
1233 ));
1234 } else {
1235 let err = std::io::Error::last_os_error();
1236 if err.kind() == std::io::ErrorKind::Interrupted {
1237 continue;
1238 }
1239 return Err(err);
1240 }
1241 }
1242 Ok(())
1243}
1244
1245#[cfg(unix)]
1250pub fn nl_stream_with_state(
1251 data: &[u8],
1252 config: &NlConfig,
1253 line_number: &mut i64,
1254 fd: i32,
1255) -> std::io::Result<()> {
1256 if data.is_empty() {
1257 return Ok(());
1258 }
1259
1260 let is_all = is_simple_number_all(config);
1262 let is_nonempty = !is_all && is_simple_number_nonempty(config);
1263
1264 if is_all || is_nonempty {
1265 if !data_has_section_delimiters(data, config) {
1266 return if is_all {
1270 nl_number_all_stream(data, config, line_number, fd)
1271 } else {
1272 nl_number_nonempty_stream(data, config, line_number, fd)
1273 };
1274 }
1275 }
1276
1277 if is_simple_number_pattern(config) && !data_has_section_delimiters(data, config) {
1279 return nl_number_pattern_stream(data, config, line_number, fd);
1280 }
1281
1282 nl_generic_stream(data, config, line_number, fd)
1283}
1284
1285pub fn nl_to_vec_with_state(data: &[u8], config: &NlConfig, line_number: &mut i64) -> Vec<u8> {
1288 if data.is_empty() {
1289 return Vec::new();
1290 }
1291
1292 if !data_has_section_delimiters(data, config) {
1295 if is_simple_number_all(config) {
1296 return nl_number_all_fast(data, config, line_number);
1297 }
1298 if is_simple_number_nonempty(config) {
1299 return nl_number_nonempty_fast(data, config, line_number);
1300 }
1301 }
1302
1303 let alloc = (data.len() * 2 + 256).min(128 * 1024 * 1024);
1305 let mut output: Vec<u8> = Vec::with_capacity(alloc);
1306
1307 let mut current_section = Section::Body;
1308 let mut consecutive_blanks: usize = 0;
1309
1310 let mut start = 0;
1311 let mut line_iter = memchr::memchr_iter(b'\n', data);
1312
1313 loop {
1314 let (line, has_newline) = match line_iter.next() {
1315 Some(pos) => (&data[start..pos], true),
1316 None => {
1317 if start < data.len() {
1318 (&data[start..], false)
1319 } else {
1320 break;
1321 }
1322 }
1323 };
1324
1325 if let Some(section) = check_section_delimiter(line, &config.section_delimiter) {
1327 if !config.no_renumber {
1328 *line_number = config.starting_line_number;
1329 }
1330 current_section = section;
1331 consecutive_blanks = 0;
1332 output.push(b'\n');
1333 if has_newline {
1334 start += line.len() + 1;
1335 } else {
1336 break;
1337 }
1338 continue;
1339 }
1340
1341 let style = match current_section {
1342 Section::Header => &config.header_style,
1343 Section::Body => &config.body_style,
1344 Section::Footer => &config.footer_style,
1345 };
1346
1347 let is_blank = line.is_empty();
1348
1349 if is_blank {
1350 consecutive_blanks += 1;
1351 } else {
1352 consecutive_blanks = 0;
1353 }
1354
1355 let do_number = if is_blank && config.join_blank_lines > 1 {
1356 if should_number(line, style) {
1357 consecutive_blanks >= config.join_blank_lines
1358 } else {
1359 false
1360 }
1361 } else {
1362 should_number(line, style)
1363 };
1364
1365 if do_number {
1366 if is_blank && config.join_blank_lines > 1 {
1367 consecutive_blanks = 0;
1368 }
1369 format_number(
1370 *line_number,
1371 config.number_format,
1372 config.number_width,
1373 &mut output,
1374 );
1375 output.extend_from_slice(&config.number_separator);
1376 output.extend_from_slice(line);
1377 *line_number = line_number.wrapping_add(config.line_increment);
1378 } else {
1379 let total_pad = config.number_width + config.number_separator.len();
1381 output.resize(output.len() + total_pad, b' ');
1382 output.extend_from_slice(line);
1383 }
1384
1385 if has_newline {
1386 output.push(b'\n');
1387 start += line.len() + 1;
1388 } else {
1389 output.push(b'\n');
1392 break;
1393 }
1394 }
1395
1396 output
1397}
1398
1399pub fn nl(data: &[u8], config: &NlConfig, out: &mut impl Write) -> std::io::Result<()> {
1401 let output = nl_to_vec(data, config);
1402 out.write_all(&output)
1403}