Skip to main content

nash_parse/
space.rs

1//! Whitespace and comment handling for Nash.
2//!
3//! Ported from Elm's `Parse/Space.hs`.
4//!
5//! Handles:
6//! - Spaces and newlines
7//! - Line comments (`--`)
8//! - Multi-line comments (`{- ... -}`) with nesting
9//! - Doc comments (`{-| ... -}`)
10//! - Indentation checking
11//! - Tab detection (tabs are not allowed)
12
13use crate::error::Space;
14use crate::{Col, Parser, Row};
15use nash_source::{Comment, Snippet};
16
17/// Result of eating spaces.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum SpaceStatus {
20    /// Successfully consumed whitespace.
21    Good,
22    /// Encountered a tab character (not allowed).
23    HasTab,
24    /// Encountered an unclosed multi-line comment.
25    EndlessMultiComment,
26}
27
28impl<'a> Parser<'a> {
29    /// Consume whitespace and comments.
30    ///
31    /// Returns an error if tabs or unclosed comments are found.
32    /// Mirrors Elm's `Space.chomp`.
33    pub fn chomp<E>(&mut self, to_error: impl FnOnce(Space, Row, Col) -> E) -> Result<(), E> {
34        let (status, new_row, new_col) = self.eat_spaces();
35
36        match status {
37            SpaceStatus::Good => Ok(()),
38            SpaceStatus::HasTab => Err(to_error(Space::HasTab, new_row, new_col)),
39            SpaceStatus::EndlessMultiComment => {
40                Err(to_error(Space::EndlessMultiComment, new_row, new_col))
41            }
42        }
43    }
44
45    /// Consume whitespace and check that we're indented past the current indent level.
46    ///
47    /// Mirrors Elm's `Space.chompAndCheckIndent`.
48    pub fn chomp_and_check_indent<E>(
49        &mut self,
50        to_space_error: impl FnOnce(Space, Row, Col) -> E,
51        to_indent_error: impl FnOnce(Row, Col) -> E,
52    ) -> Result<(), E> {
53        let (row_before, col_before) = self.position();
54        let (status, new_row, new_col) = self.eat_spaces();
55
56        match status {
57            SpaceStatus::Good => {
58                if new_col > self.indent && new_col > 1 {
59                    Ok(())
60                } else {
61                    Err(to_indent_error(row_before, col_before))
62                }
63            }
64            SpaceStatus::HasTab => Err(to_space_error(Space::HasTab, new_row, new_col)),
65            SpaceStatus::EndlessMultiComment => {
66                Err(to_space_error(Space::EndlessMultiComment, new_row, new_col))
67            }
68        }
69    }
70
71    /// Check that current column is greater than indent level.
72    ///
73    /// Called after a chomp to verify indentation.
74    /// Mirrors Elm's `Space.checkIndent`.
75    ///
76    /// Note: Uses `end_col` for the indent check (not current position),
77    /// matching Elm's behavior where the position is passed explicitly.
78    pub fn check_indent<E>(
79        &self,
80        end_row: Row,
81        end_col: Col,
82        to_error: impl FnOnce(Row, Col) -> E,
83    ) -> Result<(), E> {
84        if end_col > self.indent {
85            Ok(())
86        } else {
87            Err(to_error(end_row, end_col))
88        }
89    }
90
91    /// Check that current column equals indent level (for alignment).
92    ///
93    /// Mirrors Elm's `Space.checkAligned`.
94    pub fn check_aligned<E>(&self, to_error: impl FnOnce(u16, Row, Col) -> E) -> Result<(), E> {
95        if self.col == self.indent {
96            Ok(())
97        } else {
98            Err(to_error(self.indent, self.row, self.col))
99        }
100    }
101
102    /// Check that we're at column 1 (start of a fresh line).
103    ///
104    /// Mirrors Elm's `Space.checkFreshLine`.
105    pub fn check_fresh_line<E>(&self, to_error: impl FnOnce(Row, Col) -> E) -> Result<(), E> {
106        if self.col == 1 {
107            Ok(())
108        } else {
109            Err(to_error(self.row, self.col))
110        }
111    }
112
113    /// Parse a doc comment `{-| ... -}`.
114    ///
115    /// Returns the Comment containing a Snippet of the content.
116    /// Mirrors Elm's `Space.docComment`.
117    pub fn doc_comment<E>(
118        &mut self,
119        to_expectation: impl FnOnce(Row, Col) -> E,
120        to_space_error: impl FnOnce(Space, Row, Col) -> E,
121    ) -> Result<&'a Comment<'a>, E> {
122        // Check for {-| at current position
123        if self.peek() == Some(0x7B)
124            && self.peek_at(1) == Some(0x2D)
125            && self.peek_at(2) == Some(0x7C)
126        {
127            let start_row = self.row;
128            let start_col = self.col + 3; // Column after {-|
129
130            // Skip {-|
131            self.advance();
132            self.advance();
133            self.advance();
134
135            let content_start = self.pos;
136
137            // Use the existing multi-comment helper with nesting=1
138            let status = self.eat_multi_comment_help(1);
139
140            match status {
141                SpaceStatus::Good => {
142                    // Content ends 2 bytes before current position (before -})
143                    let content_end = self.pos - 2;
144                    let content = &self.src[content_start..content_end];
145
146                    let snippet = self.alloc(Snippet {
147                        data: content,
148                        off_row: start_row,
149                        off_col: start_col,
150                    });
151                    let comment = self.alloc(Comment(snippet));
152
153                    Ok(comment)
154                }
155                SpaceStatus::HasTab => Err(to_space_error(Space::HasTab, self.row, self.col)),
156                SpaceStatus::EndlessMultiComment => Err(to_space_error(
157                    Space::EndlessMultiComment,
158                    self.row,
159                    self.col,
160                )),
161            }
162        } else {
163            Err(to_expectation(self.row, self.col))
164        }
165    }
166
167    /// Core function to eat spaces, newlines, and comments.
168    ///
169    /// Updates parser position and returns status.
170    fn eat_spaces(&mut self) -> (SpaceStatus, Row, Col) {
171        loop {
172            match self.peek() {
173                // Space
174                Some(0x20) => {
175                    self.advance();
176                }
177
178                // Newline
179                Some(0x0A) => {
180                    self.advance();
181                }
182
183                // Carriage return (skip)
184                Some(0x0D) => {
185                    self.advance();
186                }
187
188                // Potential comment start: { or -
189                Some(0x7B) => {
190                    // Check for {-
191                    if self.peek_at(1) == Some(0x2D) {
192                        // Check for {-| (doc comment marker - don't consume, let caller handle)
193                        if self.peek_at(2) == Some(0x7C) {
194                            return (SpaceStatus::Good, self.row, self.col);
195                        }
196                        // Multi-line comment
197                        match self.eat_multi_comment() {
198                            SpaceStatus::Good => {
199                                // Continue eating spaces
200                            }
201                            status => return (status, self.row, self.col),
202                        }
203                    } else {
204                        return (SpaceStatus::Good, self.row, self.col);
205                    }
206                }
207
208                Some(0x2D) => {
209                    // Check for --
210                    if self.peek_at(1) == Some(0x2D) {
211                        self.eat_line_comment();
212                        // Continue eating spaces
213                    } else {
214                        return (SpaceStatus::Good, self.row, self.col);
215                    }
216                }
217
218                // Tab (not allowed)
219                Some(0x09) => {
220                    return (SpaceStatus::HasTab, self.row, self.col);
221                }
222
223                // Anything else (including EOF)
224                _ => {
225                    return (SpaceStatus::Good, self.row, self.col);
226                }
227            }
228        }
229    }
230
231    /// Eat a line comment (from -- to end of line).
232    fn eat_line_comment(&mut self) {
233        // Skip the --
234        self.advance();
235        self.advance();
236
237        loop {
238            match self.peek() {
239                Some(0x0A) => {
240                    // Newline ends the comment
241                    self.advance();
242                    return;
243                }
244                Some(_) => {
245                    self.advance();
246                }
247                None => {
248                    // EOF ends the comment
249                    return;
250                }
251            }
252        }
253    }
254
255    /// Eat a multi-line comment ({- ... -}).
256    ///
257    /// Supports nested comments.
258    fn eat_multi_comment(&mut self) -> SpaceStatus {
259        // Skip the {-
260        self.advance();
261        self.advance();
262
263        self.eat_multi_comment_help(1)
264    }
265
266    /// Helper for eating multi-line comments with nesting.
267    fn eat_multi_comment_help(&mut self, open_comments: u16) -> SpaceStatus {
268        loop {
269            match self.peek() {
270                // Newline
271                Some(0x0A) => {
272                    self.advance();
273                }
274
275                // Tab (not allowed even in comments)
276                Some(0x09) => {
277                    return SpaceStatus::HasTab;
278                }
279
280                // Potential close: -}
281                Some(0x2D) => {
282                    if self.peek_at(1) == Some(0x7D) {
283                        self.advance();
284                        self.advance();
285                        if open_comments == 1 {
286                            return SpaceStatus::Good;
287                        } else {
288                            return self.eat_multi_comment_help(open_comments - 1);
289                        }
290                    } else {
291                        self.advance();
292                    }
293                }
294
295                // Potential nested open: {-
296                Some(0x7B) => {
297                    if self.peek_at(1) == Some(0x2D) {
298                        self.advance();
299                        self.advance();
300                        return self.eat_multi_comment_help(open_comments + 1);
301                    } else {
302                        self.advance();
303                    }
304                }
305
306                // Any other character
307                Some(_) => {
308                    self.advance();
309                }
310
311                // EOF without closing
312                None => {
313                    return SpaceStatus::EndlessMultiComment;
314                }
315            }
316        }
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use bumpalo::Bump;
324
325    fn parse_and_chomp(input: &str) -> (SpaceStatus, usize, Row, Col) {
326        let bump = Bump::new();
327        let src = bump.alloc_str(input);
328        let mut parser = Parser::new(&bump, src.as_bytes());
329
330        let (status, row, col) = parser.eat_spaces();
331        (status, parser.pos, row, col)
332    }
333
334    #[test]
335    fn empty() {
336        let (status, pos, _, _) = parse_and_chomp("");
337        assert_eq!(status, SpaceStatus::Good);
338        assert_eq!(pos, 0);
339    }
340
341    #[test]
342    fn spaces_only() {
343        let (status, pos, _, _) = parse_and_chomp("   ");
344        assert_eq!(status, SpaceStatus::Good);
345        assert_eq!(pos, 3);
346    }
347
348    #[test]
349    fn newlines() {
350        let (status, pos, row, col) = parse_and_chomp("  \n  \n  ");
351        assert_eq!(status, SpaceStatus::Good);
352        assert_eq!(pos, 8);
353        assert_eq!(row, 3);
354        assert_eq!(col, 3);
355    }
356
357    #[test]
358    fn line_comment() {
359        let (status, pos, _, _) = parse_and_chomp("-- comment\nfoo");
360        assert_eq!(status, SpaceStatus::Good);
361        assert_eq!(pos, 11); // After the newline
362    }
363
364    #[test]
365    fn multi_comment() {
366        let (status, pos, _, _) = parse_and_chomp("{- comment -}foo");
367        assert_eq!(status, SpaceStatus::Good);
368        assert_eq!(pos, 13);
369    }
370
371    #[test]
372    fn nested_multi_comment() {
373        let (status, pos, _, _) = parse_and_chomp("{- outer {- inner -} outer -}foo");
374        assert_eq!(status, SpaceStatus::Good);
375        assert_eq!(pos, 29);
376    }
377
378    #[test]
379    fn tab_error() {
380        let (status, _, _, _) = parse_and_chomp("  \t  ");
381        assert_eq!(status, SpaceStatus::HasTab);
382    }
383
384    #[test]
385    fn endless_multi_comment() {
386        let (status, _, _, _) = parse_and_chomp("{- never closed");
387        assert_eq!(status, SpaceStatus::EndlessMultiComment);
388    }
389
390    #[test]
391    fn doc_comment_not_consumed() {
392        // {-| should not be consumed - it's a doc comment for the caller
393        let (status, pos, _, _) = parse_and_chomp("{-| doc -}");
394        assert_eq!(status, SpaceStatus::Good);
395        assert_eq!(pos, 0); // Not consumed
396    }
397
398    #[test]
399    fn stops_at_content() {
400        let (status, pos, _, _) = parse_and_chomp("  foo");
401        assert_eq!(status, SpaceStatus::Good);
402        assert_eq!(pos, 2); // Stopped at 'f'
403    }
404
405    #[test]
406    fn doc_comment_simple() {
407        let bump = Bump::new();
408        let src = bump.alloc_str("{-| hello -}");
409        let mut parser = Parser::new(&bump, src.as_bytes());
410
411        let result = parser.doc_comment(|_, _| "expected", |_, _, _| "space error");
412        assert!(result.is_ok());
413        let comment = result.unwrap();
414        // Content is " hello " (between {-| and -})
415        assert_eq!(comment.0.data, b" hello ");
416        assert_eq!(comment.0.off_row, 1);
417        assert_eq!(comment.0.off_col, 4); // Column after {-|
418    }
419
420    #[test]
421    fn doc_comment_multiline() {
422        let bump = Bump::new();
423        let src = bump.alloc_str("{-| line one\nline two -}");
424        let mut parser = Parser::new(&bump, src.as_bytes());
425
426        let result = parser.doc_comment(|_, _| "expected", |_, _, _| "space error");
427        assert!(result.is_ok());
428        let comment = result.unwrap();
429        assert_eq!(comment.0.data, b" line one\nline two ");
430    }
431
432    #[test]
433    fn doc_comment_not_doc() {
434        let bump = Bump::new();
435        let src = bump.alloc_str("{- not a doc comment -}");
436        let mut parser = Parser::new(&bump, src.as_bytes());
437
438        let result = parser.doc_comment(|_, _| "expected", |_, _, _| "space error");
439        assert!(result.is_err());
440        assert_eq!(result.unwrap_err(), "expected");
441    }
442}