1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
use crate::parse::{Parse, ParseStream};
use crate::Result;
use unicode_xid::UnicodeXID;
#[repr(u8)]
pub enum LineBreak {
CRLF,
CR,
LF,
NEL,
}
impl Parse for LineBreak {
fn parse(stream: &mut ParseStream) -> Result<Self> {
stream.try_parse(|stream| {
let cur = stream.cur();
Ok(match cur.advance().ok_or("Expected linebreak.")? {
'\u{000D}' => {
if cur.consume('\u{000A}') {
LineBreak::CRLF
} else {
LineBreak::CR
}
}
'\u{000A}' => LineBreak::LF,
'\u{0085}' => LineBreak::NEL,
_ => return Err("Unrecognized linebreak. Expected CRLF, CR, LF or NEL."),
})
})
}
}
pub trait UnicodeSpec {
fn is_xid_start(&self) -> bool;
fn is_xid_continue(&self) -> bool;
fn is_whitespace(&self) -> bool;
}
#[cfg(feature = "char_spec")]
impl UnicodeSpec for char {
fn is_xid_start(&self) -> bool {
<Self as UnicodeXID>::is_xid_start(*self)
}
fn is_xid_continue(&self) -> bool {
<Self as UnicodeXID>::is_xid_continue(*self)
}
fn is_whitespace(&self) -> bool {
matches!(
*self,
'\u{0009}'
| '\u{000A}'
| '\u{000B}'
| '\u{000C}'
| '\u{000D}'
| '\u{0020}'
| '\u{0085}'
| '\u{00A0}'
| '\u{1680}'
| '\u{2000}'
| '\u{2001}'
| '\u{2002}'
| '\u{2003}'
| '\u{2004}'
| '\u{2005}'
| '\u{2006}'
| '\u{2007}'
| '\u{2008}'
| '\u{2009}'
| '\u{200A}'
| '\u{2028}'
| '\u{2029}'
| '\u{202F}'
| '\u{205F}'
| '\u{3000}'
)
}
}
pub fn is_whitespace<T: UnicodeSpec>(val: T) -> bool {
val.is_whitespace()
}
pub fn is_xid_start<T: UnicodeSpec>(val: T) -> bool {
val.is_xid_start()
}
pub fn is_xid_continue<T: UnicodeSpec>(val: T) -> bool {
val.is_xid_continue()
}
pub fn parse_linebreak(stream: &mut ParseStream) -> Result<LineBreak> {
LineBreak::parse(stream)
}