fea_rs/parse/lexer/
token_set.rs

1//! A bitset of token kinds
2//!
3//! This is taken directly from rust-analzyer
4
5use super::Kind;
6
7/// A bit-set of `Kind`s
8#[derive(Clone, Copy)]
9pub struct TokenSet(u128);
10
11impl TokenSet {
12    pub(crate) const EMPTY: TokenSet = TokenSet(0);
13
14    pub(crate) const SEMI: TokenSet = TokenSet::new(&[Kind::Semi]);
15
16    pub(crate) const SEMI_RBRACE: TokenSet = TokenSet::new(&[Kind::Semi, Kind::RBrace]);
17
18    pub(crate) const TOP_LEVEL: TokenSet = TokenSet::new(&[
19        Kind::TableKw,
20        Kind::IncludeKw,
21        Kind::LookupKw,
22        Kind::LanguagesystemKw,
23        Kind::AnchorDefKw,
24        Kind::FeatureKw,
25        Kind::MarkClassKw,
26        Kind::AnonKw,
27        Kind::NamedGlyphClass,
28    ]);
29
30    /// Tokens that may be a tag.
31    pub(crate) const TAG_LIKE: TokenSet =
32        TokenSet::new(&[Kind::MarkKw, Kind::NameKw, Kind::FlagKw, Kind::Ident]);
33
34    /// Tokens that may be identifiers.
35    ///
36    /// This includes tokens that have special meaning only in certain contexts.
37    pub(crate) const IDENT_LIKE: TokenSet = TokenSet::new(&[
38        Kind::Ident,
39        Kind::HorizAxisBaseScriptListKw,
40        Kind::HorizAxisBaseTagListKw,
41        Kind::HorizAxisMinMaxKw,
42        Kind::VertAxisBaseScriptListKw,
43        Kind::VertAxisBaseTagListKw,
44        Kind::VertAxisMinMaxKw,
45        Kind::AttachKw,
46        Kind::GlyphClassDefKw,
47        Kind::LigatureCaretByDevKw,
48        Kind::LigatureCaretByIndexKw,
49        Kind::LigatureCaretByPosKw,
50        Kind::MarkAttachClassKw,
51        Kind::FontRevisionKw,
52        Kind::AscenderKw,
53        Kind::CaretOffsetKw,
54        Kind::DescenderKw,
55        Kind::LineGapKw,
56        Kind::CapHeightKw,
57        Kind::CodePageRangeKw,
58        Kind::PanoseKw,
59        Kind::TypoAscenderKw,
60        Kind::TypoDescenderKw,
61        Kind::TypoLineGapKw,
62        Kind::UnicodeRangeKw,
63        Kind::VendorKw,
64        Kind::WinAscentKw,
65        Kind::WinDescentKw,
66        Kind::XHeightKw,
67        Kind::SizemenunameKw,
68        Kind::VertTypoAscenderKw,
69        Kind::VertTypoDescenderKw,
70        Kind::VertTypoLineGapKw,
71        Kind::VertAdvanceYKw,
72        Kind::VertOriginYKw,
73        Kind::ElidedFallbackNameKw,
74        Kind::ElidedFallbackNameIDKw,
75        Kind::DesignAxisKw,
76        Kind::AxisValueKw,
77        Kind::FlagKw,
78        Kind::LocationKw,
79        Kind::ElidableAxisValueNameKw,
80        Kind::OlderSiblingFontAttributeKw,
81        Kind::FeatureNamesKw,
82        Kind::NameKw,
83    ]);
84
85    /// Top level items + semi
86    pub(crate) const TOP_SEMI: TokenSet = TokenSet::TOP_LEVEL.union(TokenSet::new(&[Kind::Semi]));
87
88    /// keywords that start a gsub or gpos rule
89    pub(crate) const RULES: TokenSet = TokenSet::new(&[
90        Kind::PosKw,
91        Kind::EnumKw,
92        Kind::IgnoreKw,
93        Kind::SubKw,
94        Kind::RsubKw,
95    ]);
96
97    /// top level items in a feature or lookup block
98    pub(crate) const STATEMENT: TokenSet = TokenSet::new(&[
99        Kind::NamedGlyphClass,
100        Kind::MarkClassKw,
101        Kind::ParametersKw,
102        Kind::SubtableKw,
103        Kind::LookupflagKw,
104        Kind::ScriptKw,
105        Kind::LanguageKw,
106        Kind::FeatureKw,      //aalt only
107        Kind::SizemenunameKw, // size only
108        Kind::FeatureNamesKw, //ss01 - ss20 only
109    ])
110    .union(TokenSet::RULES);
111
112    /// feature block only:
113    pub(crate) const FEATURE_STATEMENT: TokenSet =
114        TokenSet::new(&[Kind::CvParametersKw, Kind::LookupKw]).union(TokenSet::STATEMENT);
115
116    pub(crate) const TOP_AND_FEATURE: TokenSet = TokenSet::TOP_LEVEL.union(TokenSet::STATEMENT);
117
118    pub(crate) const NUM_TYPES: TokenSet = TokenSet::new(&[Kind::Number, Kind::Octal, Kind::Hex]);
119
120    pub(crate) const FLOAT_LIKE: TokenSet = TokenSet::new(&[Kind::Number, Kind::Float]);
121
122    /// Used in glyphsapp number values
123    pub(crate) const OPERATORS: TokenSet =
124        TokenSet::new(&[Kind::Hyphen, Kind::Slash, Kind::Plus, Kind::Asterisk]);
125
126    pub(crate) const fn new(kinds: &[Kind]) -> TokenSet {
127        let mut res = 0u128;
128        let mut i = 0;
129        while i < kinds.len() {
130            res |= mask(kinds[i]);
131            i += 1
132        }
133        TokenSet(res)
134    }
135
136    pub(crate) const fn union(self, other: TokenSet) -> TokenSet {
137        TokenSet(self.0 | other.0)
138    }
139
140    pub(crate) const fn add(self, token: Kind) -> TokenSet {
141        assert!((token as u16) < 128);
142        TokenSet(self.0 | mask(token))
143    }
144
145    pub(crate) const fn contains(&self, kind: Kind) -> bool {
146        self.0 & mask(kind) != 0
147    }
148}
149
150const fn mask(kind: Kind) -> u128 {
151    1u128 << (kind as usize)
152}
153
154impl From<Kind> for TokenSet {
155    fn from(src: Kind) -> TokenSet {
156        TokenSet::new(&[src])
157    }
158}
159
160impl std::fmt::Display for TokenSet {
161    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
162        if self.0.count_ones() == 0 {
163            return write!(f, "no tokens");
164        }
165
166        let mut first = true;
167        for kind in iter_tokens(*self) {
168            if !first {
169                write!(f, ", ")?;
170            }
171            first = false;
172            write!(f, "{kind}")?;
173        }
174        Ok(())
175    }
176}
177
178fn iter_tokens(set: TokenSet) -> impl Iterator<Item = Kind> {
179    let mut raw = set.0;
180    std::iter::from_fn(move || {
181        let idx = raw.trailing_zeros();
182        if idx == u128::BITS {
183            return None;
184        }
185        let raw_next = idx as u16;
186        // safety: Kind is repr(u16), and has more than 128 members, so this
187        // will at least generate a valid Kind (not UB)
188        let next: Kind = unsafe { std::mem::transmute(raw_next) };
189        raw ^= 1u128 << idx;
190        Some(next)
191    })
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn token_set_works_for_tokens() {
200        let ts = TokenSet::new(&[Kind::Eof, Kind::Whitespace]);
201        assert!(ts.contains(Kind::Eof));
202        assert!(ts.contains(Kind::Whitespace));
203        assert!(!ts.contains(Kind::Eq));
204    }
205
206    #[test]
207    fn iter_tokens_smoke_test() {
208        let set = TokenSet::new(&[
209            Kind::Ident,
210            Kind::LAngle,
211            Kind::Cid,
212            Kind::OlderSiblingFontAttributeKw,
213        ]);
214
215        assert_eq!(iter_tokens(set).count(), 4);
216        for token in iter_tokens(set) {
217            assert!(set.contains(token));
218        }
219        for token in &[
220            Kind::String,
221            Kind::RAngle,
222            Kind::RParen,
223            Kind::NamedGlyphClass,
224            Kind::TableKw,
225            Kind::ElidableAxisValueNameKw,
226        ] {
227            assert!(!set.contains(*token));
228        }
229    }
230
231    #[test]
232    fn display() {
233        let empty = TokenSet::EMPTY;
234        assert_eq!(empty.to_string(), "no tokens");
235        let solo = TokenSet::from(Kind::LParen);
236        assert_eq!(solo.to_string(), "(");
237        let multi = TokenSet::new(&[Kind::TableKw, Kind::Comma, Kind::Hex]);
238        assert_eq!(multi.to_string(), "HEX, ,, TableKw");
239    }
240}