1use std::io::Write;
2
3#[derive(Clone)]
5pub enum NumberingStyle {
6 All,
8 NonEmpty,
10 None,
12 Regex(regex::Regex),
14}
15
16#[derive(Clone, Copy, Debug, PartialEq)]
18pub enum NumberFormat {
19 Ln,
21 Rn,
23 Rz,
25}
26
27pub struct NlConfig {
29 pub body_style: NumberingStyle,
30 pub header_style: NumberingStyle,
31 pub footer_style: NumberingStyle,
32 pub section_delimiter: Vec<u8>,
33 pub line_increment: i64,
34 pub join_blank_lines: usize,
35 pub number_format: NumberFormat,
36 pub no_renumber: bool,
37 pub number_separator: Vec<u8>,
38 pub starting_line_number: i64,
39 pub number_width: usize,
40}
41
42impl Default for NlConfig {
43 fn default() -> Self {
44 Self {
45 body_style: NumberingStyle::NonEmpty,
46 header_style: NumberingStyle::None,
47 footer_style: NumberingStyle::None,
48 section_delimiter: vec![b'\\', b':'],
49 line_increment: 1,
50 join_blank_lines: 1,
51 number_format: NumberFormat::Rn,
52 no_renumber: false,
53 number_separator: vec![b'\t'],
54 starting_line_number: 1,
55 number_width: 6,
56 }
57 }
58}
59
60pub fn parse_numbering_style(s: &str) -> Result<NumberingStyle, String> {
62 match s {
63 "a" => Ok(NumberingStyle::All),
64 "t" => Ok(NumberingStyle::NonEmpty),
65 "n" => Ok(NumberingStyle::None),
66 _ if s.starts_with('p') => {
67 let pattern = &s[1..];
68 match regex::Regex::new(pattern) {
69 Ok(re) => Ok(NumberingStyle::Regex(re)),
70 Err(e) => Err(format!("invalid regular expression: {}", e)),
71 }
72 }
73 _ => Err(format!("invalid numbering style: '{}'", s)),
74 }
75}
76
77pub fn parse_number_format(s: &str) -> Result<NumberFormat, String> {
79 match s {
80 "ln" => Ok(NumberFormat::Ln),
81 "rn" => Ok(NumberFormat::Rn),
82 "rz" => Ok(NumberFormat::Rz),
83 _ => Err(format!("invalid line numbering: '{}'", s)),
84 }
85}
86
87#[derive(Clone, Copy, PartialEq)]
89enum Section {
90 Header,
91 Body,
92 Footer,
93}
94
95#[inline]
97fn check_section_delimiter(line: &[u8], delim: &[u8]) -> Option<Section> {
98 if delim.is_empty() {
99 return None;
100 }
101 let dlen = delim.len();
102
103 if line.len() == dlen * 3 {
105 let mut is_header = true;
106 for i in 0..3 {
107 if &line[i * dlen..(i + 1) * dlen] != delim {
108 is_header = false;
109 break;
110 }
111 }
112 if is_header {
113 return Some(Section::Header);
114 }
115 }
116
117 if line.len() == dlen * 2 && &line[..dlen] == delim && &line[dlen..] == delim {
119 return Some(Section::Body);
120 }
121
122 if line.len() == dlen && line == delim {
124 return Some(Section::Footer);
125 }
126
127 None
128}
129
130#[inline]
132fn format_number(num: i64, format: NumberFormat, width: usize, buf: &mut Vec<u8>) {
133 let mut num_buf = itoa::Buffer::new();
134 let num_str = num_buf.format(num);
135
136 match format {
137 NumberFormat::Ln => {
138 buf.extend_from_slice(num_str.as_bytes());
139 let pad = width.saturating_sub(num_str.len());
140 buf.resize(buf.len() + pad, b' ');
141 }
142 NumberFormat::Rn => {
143 let pad = width.saturating_sub(num_str.len());
144 buf.resize(buf.len() + pad, b' ');
145 buf.extend_from_slice(num_str.as_bytes());
146 }
147 NumberFormat::Rz => {
148 if num < 0 {
149 buf.push(b'-');
150 let abs_str = &num_str[1..];
151 let pad = width.saturating_sub(abs_str.len() + 1);
152 buf.resize(buf.len() + pad, b'0');
153 buf.extend_from_slice(abs_str.as_bytes());
154 } else {
155 let pad = width.saturating_sub(num_str.len());
156 buf.resize(buf.len() + pad, b'0');
157 buf.extend_from_slice(num_str.as_bytes());
158 }
159 }
160 }
161}
162
163#[inline]
165fn should_number(line: &[u8], style: &NumberingStyle) -> bool {
166 match style {
167 NumberingStyle::All => true,
168 NumberingStyle::NonEmpty => !line.is_empty(),
169 NumberingStyle::None => false,
170 NumberingStyle::Regex(re) => match std::str::from_utf8(line) {
171 Ok(s) => re.is_match(s),
172 Err(_) => false,
173 },
174 }
175}
176
177pub fn nl_to_vec(data: &[u8], config: &NlConfig) -> Vec<u8> {
179 let mut line_number = config.starting_line_number;
180 nl_to_vec_with_state(data, config, &mut line_number)
181}
182
183#[inline]
185fn is_simple_number_all(config: &NlConfig) -> bool {
186 matches!(config.body_style, NumberingStyle::All)
187 && matches!(config.header_style, NumberingStyle::None)
188 && matches!(config.footer_style, NumberingStyle::None)
189 && config.join_blank_lines == 1
190 && config.line_increment == 1
191 && !config.no_renumber
192}
193
194#[inline(always)]
197unsafe fn write_numbered_line(
198 output: &mut Vec<u8>,
199 fmt: NumberFormat,
200 num_str: &str,
201 pad: usize,
202 sep: &[u8],
203 line_data: *const u8,
204 line_len: usize,
205) {
206 unsafe {
207 let prefix_len = pad + num_str.len() + sep.len();
208 let total_len = prefix_len + line_len + 1;
209 let start_pos = output.len();
210 let dst = output.as_mut_ptr().add(start_pos);
211
212 match fmt {
213 NumberFormat::Rn => {
214 std::ptr::write_bytes(dst, b' ', pad);
215 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst.add(pad), num_str.len());
216 }
217 NumberFormat::Rz => {
218 std::ptr::write_bytes(dst, b'0', pad);
219 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst.add(pad), num_str.len());
220 }
221 NumberFormat::Ln => {
222 std::ptr::copy_nonoverlapping(num_str.as_ptr(), dst, num_str.len());
223 std::ptr::write_bytes(dst.add(num_str.len()), b' ', pad);
224 }
225 }
226 std::ptr::copy_nonoverlapping(sep.as_ptr(), dst.add(pad + num_str.len()), sep.len());
227 std::ptr::copy_nonoverlapping(line_data, dst.add(prefix_len), line_len);
228 *dst.add(prefix_len + line_len) = b'\n';
229 output.set_len(start_pos + total_len);
230 }
231}
232
233fn nl_number_all_fast(data: &[u8], config: &NlConfig, line_number: &mut i64) -> Vec<u8> {
237 let alloc = (data.len() * 2 + 256).min(128 * 1024 * 1024);
238 let mut output: Vec<u8> = Vec::with_capacity(alloc);
239
240 let width = config.number_width;
241 let sep = &config.number_separator;
242 let fmt = config.number_format;
243 let mut num = *line_number;
244 let mut pos: usize = 0;
245 let mut num_buf = itoa::Buffer::new();
246
247 for nl_pos in memchr::memchr_iter(b'\n', data) {
248 let line_len = nl_pos - pos;
249 let needed = output.len() + line_len + width + sep.len() + 22;
250 if needed > output.capacity() {
251 output.reserve(needed - output.capacity() + 4 * 1024 * 1024);
252 }
253
254 let num_str = num_buf.format(num);
255 let pad = width.saturating_sub(num_str.len());
256
257 unsafe {
258 write_numbered_line(
259 &mut output,
260 fmt,
261 num_str,
262 pad,
263 sep,
264 data.as_ptr().add(pos),
265 line_len,
266 );
267 }
268
269 num += 1;
270 pos = nl_pos + 1;
271 }
272
273 if pos < data.len() {
275 let remaining = data.len() - pos;
276 let needed = output.len() + remaining + width + sep.len() + 22;
277 if needed > output.capacity() {
278 output.reserve(needed - output.capacity() + 1024);
279 }
280 let num_str = num_buf.format(num);
281 let pad = width.saturating_sub(num_str.len());
282
283 unsafe {
284 write_numbered_line(
285 &mut output,
286 fmt,
287 num_str,
288 pad,
289 sep,
290 data.as_ptr().add(pos),
291 remaining,
292 );
293 }
294 num += 1;
295 }
296
297 *line_number = num;
298 output
299}
300
301#[cfg(unix)]
306fn nl_number_all_stream(
307 data: &[u8],
308 config: &NlConfig,
309 line_number: &mut i64,
310 fd: i32,
311) -> std::io::Result<()> {
312 const BUF_SIZE: usize = 1024 * 1024; let width = config.number_width;
315 let sep = &config.number_separator;
316 let fmt = config.number_format;
317 let mut num = *line_number;
318 let mut pos: usize = 0;
319 let mut num_buf = itoa::Buffer::new();
320
321 let mut output: Vec<u8> = Vec::with_capacity(BUF_SIZE + 64 * 1024);
323
324 for nl_pos in memchr::memchr_iter(b'\n', data) {
325 let line_len = nl_pos - pos;
326
327 if output.len() + line_len + width + sep.len() + 22 > BUF_SIZE {
329 write_all_fd(fd, &output)?;
330 output.clear();
331 }
332
333 let needed = output.len() + line_len + width + sep.len() + 22;
335 if needed > output.capacity() {
336 output.reserve(needed - output.capacity());
337 }
338
339 let num_str = num_buf.format(num);
340 let pad = width.saturating_sub(num_str.len());
341
342 unsafe {
343 write_numbered_line(
344 &mut output,
345 fmt,
346 num_str,
347 pad,
348 sep,
349 data.as_ptr().add(pos),
350 line_len,
351 );
352 }
353
354 num += 1;
355 pos = nl_pos + 1;
356 }
357
358 if pos < data.len() {
360 let remaining = data.len() - pos;
361 let needed = output.len() + remaining + width + sep.len() + 22;
362 if needed > output.capacity() {
363 output.reserve(needed - output.capacity());
364 }
365 let num_str = num_buf.format(num);
366 let pad = width.saturating_sub(num_str.len());
367
368 unsafe {
369 write_numbered_line(
370 &mut output,
371 fmt,
372 num_str,
373 pad,
374 sep,
375 data.as_ptr().add(pos),
376 remaining,
377 );
378 }
379 num += 1;
380 }
381
382 if !output.is_empty() {
384 write_all_fd(fd, &output)?;
385 }
386
387 *line_number = num;
388 Ok(())
389}
390
391#[cfg(unix)]
394fn nl_generic_stream(
395 data: &[u8],
396 config: &NlConfig,
397 line_number: &mut i64,
398 fd: i32,
399) -> std::io::Result<()> {
400 if data.is_empty() {
401 return Ok(());
402 }
403
404 const BUF_SIZE: usize = 1024 * 1024; let mut output: Vec<u8> = Vec::with_capacity(BUF_SIZE + 64 * 1024);
407 let mut current_section = Section::Body;
408 let mut consecutive_blanks: usize = 0;
409 let mut start = 0;
410 let mut line_iter = memchr::memchr_iter(b'\n', data);
411
412 loop {
413 let (line, has_newline) = match line_iter.next() {
414 Some(pos) => (&data[start..pos], true),
415 None => {
416 if start < data.len() {
417 (&data[start..], false)
418 } else {
419 break;
420 }
421 }
422 };
423
424 if output.len() > BUF_SIZE {
426 write_all_fd(fd, &output)?;
427 output.clear();
428 }
429
430 if let Some(section) = check_section_delimiter(line, &config.section_delimiter) {
432 if !config.no_renumber {
433 *line_number = config.starting_line_number;
434 }
435 current_section = section;
436 consecutive_blanks = 0;
437 output.push(b'\n');
438 if has_newline {
439 start += line.len() + 1;
440 } else {
441 break;
442 }
443 continue;
444 }
445
446 let style = match current_section {
447 Section::Header => &config.header_style,
448 Section::Body => &config.body_style,
449 Section::Footer => &config.footer_style,
450 };
451
452 let is_blank = line.is_empty();
453
454 if is_blank {
455 consecutive_blanks += 1;
456 } else {
457 consecutive_blanks = 0;
458 }
459
460 let do_number = if is_blank && config.join_blank_lines > 1 {
461 if should_number(line, style) {
462 consecutive_blanks >= config.join_blank_lines
463 } else {
464 false
465 }
466 } else {
467 should_number(line, style)
468 };
469
470 if do_number {
471 if is_blank && config.join_blank_lines > 1 {
472 consecutive_blanks = 0;
473 }
474 format_number(
475 *line_number,
476 config.number_format,
477 config.number_width,
478 &mut output,
479 );
480 output.extend_from_slice(&config.number_separator);
481 output.extend_from_slice(line);
482 *line_number = line_number.wrapping_add(config.line_increment);
483 } else {
484 let total_pad = config.number_width + config.number_separator.len();
485 output.resize(output.len() + total_pad, b' ');
486 output.extend_from_slice(line);
487 }
488
489 if has_newline {
490 output.push(b'\n');
491 start += line.len() + 1;
492 } else {
493 output.push(b'\n');
494 break;
495 }
496 }
497
498 if !output.is_empty() {
500 write_all_fd(fd, &output)?;
501 }
502
503 Ok(())
504}
505
506#[cfg(unix)]
508#[inline]
509fn write_all_fd(fd: i32, data: &[u8]) -> std::io::Result<()> {
510 let mut written = 0;
511 while written < data.len() {
512 let ret = unsafe {
513 libc::write(
514 fd,
515 data[written..].as_ptr() as *const libc::c_void,
516 (data.len() - written) as _,
517 )
518 };
519 if ret > 0 {
520 written += ret as usize;
521 } else if ret == 0 {
522 return Err(std::io::Error::new(
523 std::io::ErrorKind::WriteZero,
524 "write returned 0",
525 ));
526 } else {
527 let err = std::io::Error::last_os_error();
528 if err.kind() == std::io::ErrorKind::Interrupted {
529 continue;
530 }
531 return Err(err);
532 }
533 }
534 Ok(())
535}
536
537#[cfg(unix)]
542pub fn nl_stream_with_state(
543 data: &[u8],
544 config: &NlConfig,
545 line_number: &mut i64,
546 fd: i32,
547) -> std::io::Result<()> {
548 if data.is_empty() {
549 return Ok(());
550 }
551
552 let has_section_delims = !config.section_delimiter.is_empty()
554 && memchr::memmem::find(data, &config.section_delimiter).is_some();
555 if is_simple_number_all(config) && !has_section_delims {
556 return nl_number_all_stream(data, config, line_number, fd);
557 }
558
559 nl_generic_stream(data, config, line_number, fd)
560}
561
562pub fn nl_to_vec_with_state(data: &[u8], config: &NlConfig, line_number: &mut i64) -> Vec<u8> {
565 if data.is_empty() {
566 return Vec::new();
567 }
568
569 let has_section_delims = !config.section_delimiter.is_empty()
572 && memchr::memmem::find(data, &config.section_delimiter).is_some();
573 if is_simple_number_all(config) && !has_section_delims {
574 return nl_number_all_fast(data, config, line_number);
575 }
576
577 let alloc = (data.len() * 2 + 256).min(128 * 1024 * 1024);
579 let mut output: Vec<u8> = Vec::with_capacity(alloc);
580
581 let mut current_section = Section::Body;
582 let mut consecutive_blanks: usize = 0;
583
584 let mut start = 0;
585 let mut line_iter = memchr::memchr_iter(b'\n', data);
586
587 loop {
588 let (line, has_newline) = match line_iter.next() {
589 Some(pos) => (&data[start..pos], true),
590 None => {
591 if start < data.len() {
592 (&data[start..], false)
593 } else {
594 break;
595 }
596 }
597 };
598
599 if let Some(section) = check_section_delimiter(line, &config.section_delimiter) {
601 if !config.no_renumber {
602 *line_number = config.starting_line_number;
603 }
604 current_section = section;
605 consecutive_blanks = 0;
606 output.push(b'\n');
607 if has_newline {
608 start += line.len() + 1;
609 } else {
610 break;
611 }
612 continue;
613 }
614
615 let style = match current_section {
616 Section::Header => &config.header_style,
617 Section::Body => &config.body_style,
618 Section::Footer => &config.footer_style,
619 };
620
621 let is_blank = line.is_empty();
622
623 if is_blank {
624 consecutive_blanks += 1;
625 } else {
626 consecutive_blanks = 0;
627 }
628
629 let do_number = if is_blank && config.join_blank_lines > 1 {
630 if should_number(line, style) {
631 consecutive_blanks >= config.join_blank_lines
632 } else {
633 false
634 }
635 } else {
636 should_number(line, style)
637 };
638
639 if do_number {
640 if is_blank && config.join_blank_lines > 1 {
641 consecutive_blanks = 0;
642 }
643 format_number(
644 *line_number,
645 config.number_format,
646 config.number_width,
647 &mut output,
648 );
649 output.extend_from_slice(&config.number_separator);
650 output.extend_from_slice(line);
651 *line_number = line_number.wrapping_add(config.line_increment);
652 } else {
653 let total_pad = config.number_width + config.number_separator.len();
655 output.resize(output.len() + total_pad, b' ');
656 output.extend_from_slice(line);
657 }
658
659 if has_newline {
660 output.push(b'\n');
661 start += line.len() + 1;
662 } else {
663 output.push(b'\n');
666 break;
667 }
668 }
669
670 output
671}
672
673pub fn nl(data: &[u8], config: &NlConfig, out: &mut impl Write) -> std::io::Result<()> {
675 let output = nl_to_vec(data, config);
676 out.write_all(&output)
677}