Skip to main content

granit_parser/
char_traits.rs

1//! Holds functions to determine if a character belongs to a specific character set.
2
3/// Check whether the character is nil (`\0`).
4#[inline]
5#[must_use]
6pub fn is_z(c: char) -> bool {
7    c == '\0'
8}
9
10/// Check whether the character is a line break (`\r` or `\n`).
11#[inline]
12#[must_use]
13pub fn is_break(c: char) -> bool {
14    c == '\n' || c == '\r'
15}
16
17/// Check whether the character is nil or a line break (`\0`, `\r`, `\n`).
18#[inline]
19#[must_use]
20pub fn is_breakz(c: char) -> bool {
21    is_break(c) || is_z(c)
22}
23
24/// Check whether the character is a whitespace (` ` or `\t`).
25#[inline]
26#[must_use]
27pub fn is_blank(c: char) -> bool {
28    c == ' ' || c == '\t'
29}
30
31/// Check whether the character is nil, a linebreak or a whitespace.
32///
33/// `\0`, ` `, `\t`, `\n`, `\r`
34#[inline]
35#[must_use]
36pub fn is_blank_or_breakz(c: char) -> bool {
37    is_blank(c) || is_breakz(c)
38}
39
40/// Check whether the character is an ascii digit.
41#[inline]
42#[must_use]
43pub fn is_digit(c: char) -> bool {
44    c.is_ascii_digit()
45}
46
47/// Check whether the character is an ASCII alphanumeric, `_` or `-`.
48///
49/// This is used for scanning tag handles and similar constructs.
50/// Note: This is slightly more permissive than YAML's `ns-word-char` (which excludes `_`).
51/// For strict `ns-word-char` compliance, use [`is_word_char`] instead.
52///
53/// Matches: `[0-9a-zA-Z_-]`
54#[inline]
55#[must_use]
56pub fn is_alpha(c: char) -> bool {
57    matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
58}
59
60/// Check whether the character is a hexadecimal character (case insensitive).
61#[inline]
62#[must_use]
63pub fn is_hex(c: char) -> bool {
64    c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
65}
66
67/// Convert the hexadecimal digit to an integer.
68#[inline]
69#[must_use]
70pub fn as_hex(c: char) -> u32 {
71    match c {
72        '0'..='9' => (c as u32) - ('0' as u32),
73        'a'..='f' => (c as u32) - ('a' as u32) + 10,
74        'A'..='F' => (c as u32) - ('A' as u32) + 10,
75        _ => unreachable!(),
76    }
77}
78
79/// Check whether the character is a YAML flow character (one of `,[]{}`).
80#[inline]
81#[must_use]
82pub fn is_flow(c: char) -> bool {
83    matches!(c, ',' | '[' | ']' | '{' | '}')
84}
85
86/// Check whether the character is the BOM character.
87#[inline]
88#[must_use]
89pub fn is_bom(c: char) -> bool {
90    c == '\u{FEFF}'
91}
92
93/// Check whether the character is a YAML non-breaking character.
94#[inline]
95#[must_use]
96pub fn is_yaml_non_break(c: char) -> bool {
97    is_printable(c) && !is_break(c) && !is_bom(c)
98}
99
100/// Check whether the character is a YAML printable character (`c-printable`).
101#[inline]
102#[must_use]
103fn is_printable(c: char) -> bool {
104    matches!(
105        c as u32,
106        0x0009
107            | 0x000A
108            | 0x000D
109            | 0x0020..=0x007E
110            | 0x0085
111            | 0x00A0..=0xD7FF
112            | 0xE000..=0xFFFD
113            | 0x10000..=0x0010_FFFF
114    )
115}
116
117/// Check whether the character is NOT a YAML whitespace (` ` / `\t`).
118#[inline]
119#[must_use]
120pub fn is_yaml_non_space(c: char) -> bool {
121    is_yaml_non_break(c) && !is_blank(c)
122}
123
124/// Check whether the character is a valid YAML anchor name character.
125#[inline]
126#[must_use]
127pub fn is_anchor_char(c: char) -> bool {
128    is_yaml_non_space(c) && !is_flow(c) && !is_z(c)
129}
130
131/// Check whether the character is a valid YAML word character (`ns-word-char`).
132///
133/// Per YAML 1.2 spec: `ns-word-char ::= ns-dec-digit | ns-ascii-letter | "-"`
134///
135/// Matches: `[0-9a-zA-Z-]`
136#[inline]
137#[must_use]
138pub fn is_word_char(c: char) -> bool {
139    is_alpha(c) && c != '_'
140}
141
142/// Check whether the character is a valid URI character.
143#[inline]
144#[must_use]
145pub fn is_uri_char(c: char) -> bool {
146    is_word_char(c) || "#;/?:@&=+$,_.!~*\'()[]%".contains(c)
147}
148
149/// Check whether the character is a valid tag character.
150#[inline]
151#[must_use]
152pub fn is_tag_char(c: char) -> bool {
153    is_uri_char(c) && !is_flow(c) && c != '!'
154}