Skip to main content

coreutils_rs/nl/
core.rs

1use std::io::Write;
2
3/// Line numbering style.
4#[derive(Clone)]
5pub enum NumberingStyle {
6    /// Number all lines.
7    All,
8    /// Number only non-empty lines (default for body).
9    NonEmpty,
10    /// Don't number lines.
11    None,
12    /// Number lines matching a basic regular expression.
13    Regex(regex::Regex),
14}
15
16/// Number format for line numbers.
17#[derive(Clone, Copy, Debug, PartialEq)]
18pub enum NumberFormat {
19    /// Left-justified, no leading zeros.
20    Ln,
21    /// Right-justified, no leading zeros (default).
22    Rn,
23    /// Right-justified, leading zeros.
24    Rz,
25}
26
27/// Configuration for the nl command.
28pub struct NlConfig {
29    pub body_style: NumberingStyle,
30    pub header_style: NumberingStyle,
31    pub footer_style: NumberingStyle,
32    pub section_delimiter: Vec<u8>,
33    pub line_increment: i64,
34    pub join_blank_lines: usize,
35    pub number_format: NumberFormat,
36    pub no_renumber: bool,
37    pub number_separator: Vec<u8>,
38    pub starting_line_number: i64,
39    pub number_width: usize,
40}
41
42impl Default for NlConfig {
43    fn default() -> Self {
44        Self {
45            body_style: NumberingStyle::NonEmpty,
46            header_style: NumberingStyle::None,
47            footer_style: NumberingStyle::None,
48            section_delimiter: vec![b'\\', b':'],
49            line_increment: 1,
50            join_blank_lines: 1,
51            number_format: NumberFormat::Rn,
52            no_renumber: false,
53            number_separator: vec![b'\t'],
54            starting_line_number: 1,
55            number_width: 6,
56        }
57    }
58}
59
60/// Parse a numbering style string.
61pub fn parse_numbering_style(s: &str) -> Result<NumberingStyle, String> {
62    match s {
63        "a" => Ok(NumberingStyle::All),
64        "t" => Ok(NumberingStyle::NonEmpty),
65        "n" => Ok(NumberingStyle::None),
66        _ if s.starts_with('p') => {
67            let pattern = &s[1..];
68            match regex::Regex::new(pattern) {
69                Ok(re) => Ok(NumberingStyle::Regex(re)),
70                Err(e) => Err(format!("invalid regular expression: {}", e)),
71            }
72        }
73        _ => Err(format!("invalid numbering style: '{}'", s)),
74    }
75}
76
77/// Parse a number format string.
78pub fn parse_number_format(s: &str) -> Result<NumberFormat, String> {
79    match s {
80        "ln" => Ok(NumberFormat::Ln),
81        "rn" => Ok(NumberFormat::Rn),
82        "rz" => Ok(NumberFormat::Rz),
83        _ => Err(format!("invalid line numbering: '{}'", s)),
84    }
85}
86
87/// Logical page section types.
88#[derive(Clone, Copy, PartialEq)]
89enum Section {
90    Header,
91    Body,
92    Footer,
93}
94
95/// Check if a line is a section delimiter.
96#[inline]
97fn check_section_delimiter(line: &[u8], delim: &[u8]) -> Option<Section> {
98    if delim.is_empty() {
99        return None;
100    }
101    let dlen = delim.len();
102
103    // Check header (3x)
104    if line.len() == dlen * 3 {
105        let mut is_header = true;
106        for i in 0..3 {
107            if &line[i * dlen..(i + 1) * dlen] != delim {
108                is_header = false;
109                break;
110            }
111        }
112        if is_header {
113            return Some(Section::Header);
114        }
115    }
116
117    // Check body (2x)
118    if line.len() == dlen * 2 && &line[..dlen] == delim && &line[dlen..] == delim {
119        return Some(Section::Body);
120    }
121
122    // Check footer (1x)
123    if line.len() == dlen && line == delim {
124        return Some(Section::Footer);
125    }
126
127    None
128}
129
130/// Format a line number according to the format and width.
131#[inline]
132fn format_number(num: i64, format: NumberFormat, width: usize, buf: &mut Vec<u8>) {
133    let mut num_buf = itoa::Buffer::new();
134    let num_str = num_buf.format(num);
135
136    match format {
137        NumberFormat::Ln => {
138            buf.extend_from_slice(num_str.as_bytes());
139            let pad = width.saturating_sub(num_str.len());
140            buf.resize(buf.len() + pad, b' ');
141        }
142        NumberFormat::Rn => {
143            let pad = width.saturating_sub(num_str.len());
144            buf.resize(buf.len() + pad, b' ');
145            buf.extend_from_slice(num_str.as_bytes());
146        }
147        NumberFormat::Rz => {
148            if num < 0 {
149                buf.push(b'-');
150                let abs_str = &num_str[1..];
151                let pad = width.saturating_sub(abs_str.len() + 1);
152                buf.resize(buf.len() + pad, b'0');
153                buf.extend_from_slice(abs_str.as_bytes());
154            } else {
155                let pad = width.saturating_sub(num_str.len());
156                buf.resize(buf.len() + pad, b'0');
157                buf.extend_from_slice(num_str.as_bytes());
158            }
159        }
160    }
161}
162
163/// Check if a line should be numbered based on the style.
164#[inline]
165fn should_number(line: &[u8], style: &NumberingStyle) -> bool {
166    match style {
167        NumberingStyle::All => true,
168        NumberingStyle::NonEmpty => !line.is_empty(),
169        NumberingStyle::None => false,
170        NumberingStyle::Regex(re) => match std::str::from_utf8(line) {
171            Ok(s) => re.is_match(s),
172            Err(_) => false,
173        },
174    }
175}
176
177/// Build the nl output into a Vec.
178pub fn nl_to_vec(data: &[u8], config: &NlConfig) -> Vec<u8> {
179    if data.is_empty() {
180        return Vec::new();
181    }
182
183    let estimated_lines = memchr::memchr_iter(b'\n', data).count() + 1;
184    let prefix_size = config.number_width + config.number_separator.len() + 2;
185    let mut output = Vec::with_capacity(data.len() + estimated_lines * prefix_size);
186
187    let mut line_number = config.starting_line_number;
188    let mut current_section = Section::Body;
189    let mut consecutive_blanks: usize = 0;
190
191    let mut start = 0;
192    let mut line_iter = memchr::memchr_iter(b'\n', data);
193
194    loop {
195        let (line, has_newline) = match line_iter.next() {
196            Some(pos) => (&data[start..pos], true),
197            None => {
198                if start < data.len() {
199                    (&data[start..], false)
200                } else {
201                    break;
202                }
203            }
204        };
205
206        // Check for section delimiter
207        if let Some(section) = check_section_delimiter(line, &config.section_delimiter) {
208            if !config.no_renumber {
209                line_number = config.starting_line_number;
210            }
211            current_section = section;
212            consecutive_blanks = 0;
213            output.push(b'\n');
214            if has_newline {
215                start += line.len() + 1;
216            } else {
217                break;
218            }
219            continue;
220        }
221
222        let style = match current_section {
223            Section::Header => &config.header_style,
224            Section::Body => &config.body_style,
225            Section::Footer => &config.footer_style,
226        };
227
228        let is_blank = line.is_empty();
229
230        if is_blank {
231            consecutive_blanks += 1;
232        } else {
233            consecutive_blanks = 0;
234        }
235
236        let do_number = if is_blank && config.join_blank_lines > 1 {
237            if should_number(line, style) {
238                consecutive_blanks >= config.join_blank_lines
239            } else {
240                false
241            }
242        } else {
243            should_number(line, style)
244        };
245
246        if do_number {
247            if is_blank && config.join_blank_lines > 1 {
248                consecutive_blanks = 0;
249            }
250            format_number(
251                line_number,
252                config.number_format,
253                config.number_width,
254                &mut output,
255            );
256            output.extend_from_slice(&config.number_separator);
257            output.extend_from_slice(line);
258            line_number = line_number.wrapping_add(config.line_increment);
259        } else {
260            // Non-numbered lines: GNU nl outputs width + separator_len total spaces, then content
261            let total_pad = config.number_width + config.number_separator.len();
262            output.resize(output.len() + total_pad, b' ');
263            output.extend_from_slice(line);
264        }
265
266        if has_newline {
267            output.push(b'\n');
268            start += line.len() + 1;
269        } else {
270            // GNU nl always adds a trailing newline, even when the input lacks one
271            // (but has content on the last line). Empty input produces empty output.
272            output.push(b'\n');
273            break;
274        }
275    }
276
277    output
278}
279
280/// Number lines and write to the provided writer.
281pub fn nl(data: &[u8], config: &NlConfig, out: &mut impl Write) -> std::io::Result<()> {
282    let output = nl_to_vec(data, config);
283    out.write_all(&output)
284}