solar_parse/lexer/cursor/
char_info.rs

1/// Returns `true` if the given character is considered a whitespace.
2#[inline]
3pub const fn is_whitespace(c: char) -> bool {
4    is_whitespace_byte(ch2u8(c))
5}
6/// Returns `true` if the given character is considered a whitespace.
7#[inline]
8pub const fn is_whitespace_byte(c: u8) -> bool {
9    classify(c) & WHITESPACE != 0
10}
11
12/// Returns `true` if the given character is valid at the start of a Solidity identifier.
13#[inline]
14pub const fn is_id_start(c: char) -> bool {
15    is_id_start_byte(ch2u8(c))
16}
17/// Returns `true` if the given character is valid at the start of a Solidity identifier.
18#[inline]
19pub const fn is_id_start_byte(c: u8) -> bool {
20    classify(c) & ID_START != 0
21}
22
23/// Returns `true` if the given character is valid in a Solidity identifier.
24#[inline]
25pub const fn is_id_continue(c: char) -> bool {
26    is_id_continue_byte(ch2u8(c))
27}
28/// Returns `true` if the given character is valid in a Solidity identifier.
29#[inline]
30pub const fn is_id_continue_byte(c: u8) -> bool {
31    classify(c) & ID_CONTINUE != 0
32}
33
34#[inline]
35pub(super) const fn is_decimal_digit(c: u8) -> bool {
36    // `is_ascii_digit` is cheap enough to not benefit from the lookup table.
37    // classify(c) & DECIMAL_DIGIT != 0
38    c.is_ascii_digit()
39}
40
41#[inline]
42pub(super) const fn is_hex_digit(c: u8) -> bool {
43    classify(c) & HEX_DIGIT != 0
44}
45
46/// Returns `true` if the given string is a valid Solidity identifier.
47///
48/// An identifier in Solidity has to start with a letter, a dollar-sign or an underscore and may
49/// additionally contain numbers after the first symbol.
50///
51/// Reference: <https://docs.soliditylang.org/en/latest/grammar.html#a4.SolidityLexer.Identifier>
52#[inline]
53pub const fn is_ident(s: &str) -> bool {
54    is_ident_bytes(s.as_bytes())
55}
56
57/// Returns `true` if the given byte slice is a valid Solidity identifier.
58///
59/// See [`is_ident`] for more details.
60pub const fn is_ident_bytes(s: &[u8]) -> bool {
61    let [first, ref rest @ ..] = *s else {
62        return false;
63    };
64
65    if !is_id_start_byte(first) {
66        return false;
67    }
68
69    let mut i = 0;
70    while i < rest.len() {
71        if !is_id_continue_byte(rest[i]) {
72            return false;
73        }
74        i += 1;
75    }
76
77    true
78}
79
80/// Converts a `char` to a `u8`.
81#[inline(always)]
82const fn ch2u8(c: char) -> u8 {
83    c as u32 as u8
84}
85
86pub(super) const EOF: u8 = b'\0';
87
88const WHITESPACE: u8 = 1 << 0;
89const ID_START: u8 = 1 << 1;
90const ID_CONTINUE: u8 = 1 << 2;
91const DECIMAL_DIGIT: u8 = 1 << 3;
92const HEX_DIGIT: u8 = 1 << 4;
93
94#[inline(always)]
95const fn classify(c: u8) -> u8 {
96    INFO[c as usize]
97}
98
99static INFO: [u8; 256] = {
100    let mut table = [0; 256];
101    let mut i = 0;
102    while i < 256 {
103        table[i] = classify_impl(i as u8);
104        i += 1;
105    }
106    table
107};
108
109const fn classify_impl(c: u8) -> u8 {
110    // https://github.com/argotorg/solidity/blob/965166317bbc2b02067eb87f222a2dce9d24e289/liblangutil/Common.h#L20-L46
111
112    let mut result = 0;
113    if matches!(c, b' ' | b'\t' | b'\n' | b'\r') {
114        result |= WHITESPACE;
115    }
116    if matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'$') {
117        result |= ID_START;
118    }
119    if matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'$' | b'0'..=b'9') {
120        result |= ID_CONTINUE;
121    }
122    if c.is_ascii_digit() {
123        result |= DECIMAL_DIGIT;
124    }
125    if c.is_ascii_hexdigit() {
126        result |= HEX_DIGIT;
127    }
128    result
129}