syntax_rs/
spec.rs

1// TODO: Warn when unicode combining characters are detected as they may be misinterpreted by the compiler.
2
3use crate::parse::{Parse, ParseStream};
4use crate::Result;
5use unicode_xid::UnicodeXID;
6
7#[repr(u8)]
8pub enum LineBreak {
9    CRLF,
10    CR,
11    LF,
12    NEL,
13}
14
15impl Parse for LineBreak {
16    fn parse(stream: &mut ParseStream) -> Result<Self> {
17        stream.try_parse(|stream| {
18            let cur = stream.cur();
19            Ok(match cur.advance().ok_or("Expected linebreak.")? {
20                '\u{000D}' => {
21                    if cur.consume('\u{000A}') {
22                        LineBreak::CRLF
23                    } else {
24                        LineBreak::CR
25                    }
26                }
27                '\u{000A}' => LineBreak::LF,
28                '\u{0085}' => LineBreak::NEL,
29                _ => return Err("Unrecognized linebreak. Expected CRLF, CR, LF or NEL."),
30            })
31        })
32    }
33}
34
35pub trait UnicodeSpec {
36    fn is_xid_start(&self) -> bool;
37    fn is_xid_continue(&self) -> bool;
38    fn is_whitespace(&self) -> bool;
39}
40
41#[cfg(feature = "char_spec")]
42impl UnicodeSpec for char {
43    fn is_xid_start(&self) -> bool {
44        <Self as UnicodeXID>::is_xid_start(*self)
45    }
46
47    fn is_xid_continue(&self) -> bool {
48        <Self as UnicodeXID>::is_xid_continue(*self)
49    }
50
51    fn is_whitespace(&self) -> bool {
52        matches!(
53            *self,
54            '\u{0009}'
55                | '\u{000A}'
56                | '\u{000B}'
57                | '\u{000C}'
58                | '\u{000D}'
59                | '\u{0020}'
60                | '\u{0085}'
61                | '\u{00A0}'
62                | '\u{1680}'
63                | '\u{2000}'
64                | '\u{2001}'
65                | '\u{2002}'
66                | '\u{2003}'
67                | '\u{2004}'
68                | '\u{2005}'
69                | '\u{2006}'
70                | '\u{2007}'
71                | '\u{2008}'
72                | '\u{2009}'
73                | '\u{200A}'
74                | '\u{2028}'
75                | '\u{2029}'
76                | '\u{202F}'
77                | '\u{205F}'
78                | '\u{3000}'
79        )
80    }
81}
82
83// TODO: Make a set_unicode_spec function in compiler.rs
84pub fn is_whitespace<T: UnicodeSpec>(val: T) -> bool {
85    val.is_whitespace()
86}
87
88pub fn is_xid_start<T: UnicodeSpec>(val: T) -> bool {
89    val.is_xid_start()
90}
91
92pub fn is_xid_continue<T: UnicodeSpec>(val: T) -> bool {
93    val.is_xid_continue()
94}
95
96pub fn parse_linebreak(stream: &mut ParseStream) -> Result<LineBreak> {
97    LineBreak::parse(stream)
98}