pub type Pos = u32;
pub type InsnId = u32;
pub type RuleId = u16;
pub type Tag = u16;
pub type FieldId = u16;
pub type SyntaxKind = u16;
pub trait IntoSyntaxKind {
fn into_syntax_kind(self) -> SyntaxKind;
}
impl IntoSyntaxKind for SyntaxKind {
#[inline]
fn into_syntax_kind(self) -> SyntaxKind {
self
}
}
pub trait FromSyntaxKind: Sized {
fn from_syntax_kind(k: SyntaxKind) -> Option<Self>;
}
impl FromSyntaxKind for SyntaxKind {
#[inline]
fn from_syntax_kind(k: SyntaxKind) -> Option<Self> {
Some(k)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Span {
pub start: Pos,
pub end: Pos,
}
impl Span {
#[must_use]
#[inline]
pub const fn new(start: Pos, end: Pos) -> Self {
Self { start, end }
}
#[must_use]
#[inline]
pub const fn len(self) -> usize {
(self.end - self.start) as usize
}
#[must_use]
#[inline]
pub const fn is_empty(self) -> bool {
self.start == self.end
}
#[must_use]
#[inline]
pub fn as_slice(self, input: &[u8]) -> &[u8] {
&input[self.start as usize..self.end as usize]
}
#[must_use]
#[inline]
pub const fn contains_offset(self, offset: Pos) -> bool {
offset >= self.start && offset < self.end
}
#[must_use]
#[inline]
pub const fn contains_span(self, other: Self) -> bool {
other.start >= self.start && other.end <= self.end
}
#[must_use]
#[inline]
pub fn merge(self, other: Self) -> Option<Self> {
if self.end >= other.start && other.end >= self.start {
Some(Self::new(
self.start.min(other.start),
self.end.max(other.end),
))
} else {
None
}
}
}
#[inline]
pub fn sort_spans(spans: &mut [Span]) {
spans.sort_by_key(|s| s.start);
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C, align(32))]
pub struct CharClass(pub [u64; 4]);
impl CharClass {
pub const EMPTY: Self = Self([0; 4]);
pub const ANY: Self = Self([u64::MAX; 4]);
#[must_use]
#[inline]
pub const fn from_byte(byte: u8) -> Self {
Self::EMPTY.with_byte(byte)
}
#[must_use]
#[inline]
pub const fn from_bytes(bytes: &[u8]) -> Self {
let mut cls = Self::EMPTY;
let mut i = 0;
while i < bytes.len() {
cls = cls.with_byte(bytes[i]);
i += 1;
}
cls
}
#[must_use]
#[inline]
pub const fn from_words(words: [u64; 4]) -> Self {
Self(words)
}
#[must_use]
#[inline]
pub const fn contains(self, byte: u8) -> bool {
let (word, bit) = (byte as usize >> 6, byte as usize & 63);
(self.0[word] >> bit) & 1 != 0
}
#[must_use]
#[inline]
pub const fn with_byte(mut self, byte: u8) -> Self {
let (word, bit) = ((byte >> 6) as usize, (byte & 63) as usize);
self.0[word] |= 1u64 << bit;
self
}
#[must_use]
#[inline]
pub const fn with_range(mut self, lo: u8, hi: u8) -> Self {
let mut b = lo;
loop {
let (word, bit) = ((b >> 6) as usize, (b & 63) as usize);
self.0[word] |= 1u64 << bit;
if b == hi {
break;
}
b += 1;
}
self
}
#[must_use]
#[inline]
pub const fn union(self, other: Self) -> Self {
Self([
self.0[0] | other.0[0],
self.0[1] | other.0[1],
self.0[2] | other.0[2],
self.0[3] | other.0[3],
])
}
#[must_use]
#[inline]
pub const fn complement(self) -> Self {
Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
}
#[must_use]
#[inline]
pub const fn from_chars(chars: &[u8]) -> Self {
Self::from_bytes(chars)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn span_len_and_slice() {
let s = Span::new(1, 4);
assert_eq!(s.len(), 3);
assert!(!s.is_empty());
let input = b"0123456789";
assert_eq!(s.as_slice(input), b"123");
}
#[test]
fn span_empty() {
let s = Span::new(2, 2);
assert_eq!(s.len(), 0);
assert!(s.is_empty());
}
#[test]
fn char_class_with_byte_and_contains() {
let c = CharClass::from_byte(b'a').union(CharClass::from_byte(b'z'));
assert!(c.contains(b'a'));
assert!(c.contains(b'z'));
assert!(!c.contains(b'm'));
}
#[test]
fn char_class_from_byte_and_from_bytes() {
assert!(CharClass::from_byte(b'x').contains(b'x'));
assert!(!CharClass::from_byte(b'x').contains(b'y'));
let m = CharClass::from_bytes(b"abc");
assert!(m.contains(b'a') && m.contains(b'b') && m.contains(b'c'));
assert!(!m.contains(b'd'));
assert_eq!(CharClass::from_bytes(b" \t\n\r"), classes::WHITESPACE);
assert_eq!(
CharClass::from_bytes("€".as_bytes()),
CharClass::from_byte(0xE2)
.union(CharClass::from_byte(0x82))
.union(CharClass::from_byte(0xAC))
);
}
#[test]
fn char_class_with_range() {
let c = CharClass::EMPTY.with_range(b'0', b'9');
for b in b'0'..=b'9' {
assert!(c.contains(b), "should contain {}", b as char);
}
assert!(!c.contains(b'/'));
assert!(!c.contains(b':'));
}
#[test]
fn char_class_union() {
let a = CharClass::from_byte(b'a');
let b = CharClass::from_byte(b'b');
let u = a.union(b);
assert!(u.contains(b'a'));
assert!(u.contains(b'b'));
}
}
pub mod classes {
use super::CharClass;
pub const DIGIT: CharClass = CharClass::EMPTY.with_range(b'0', b'9');
pub const LOWER: CharClass = CharClass::EMPTY.with_range(b'a', b'z');
pub const UPPER: CharClass = CharClass::EMPTY.with_range(b'A', b'Z');
pub const ALPHA: CharClass = LOWER.union(UPPER);
pub const ALNUM: CharClass = ALPHA.union(DIGIT);
pub const IDENT_START: CharClass = ALPHA.union(CharClass::from_byte(b'_'));
pub const IDENT_CONT: CharClass = ALNUM.union(CharClass::from_byte(b'_'));
pub const WHITESPACE: CharClass = CharClass::from_chars(b" \t\n\r");
pub const HEX_DIGIT: CharClass = DIGIT
.union(CharClass::EMPTY.with_range(b'a', b'f'))
.union(CharClass::EMPTY.with_range(b'A', b'F'));
pub const PRINTABLE: CharClass = CharClass::EMPTY.with_range(0x20, 0x7E);
pub const NOT_NEWLINE: CharClass = CharClass::EMPTY
.with_range(0, 9)
.union(CharClass::EMPTY.with_range(11, 255));
}
#[derive(Clone, Copy, Debug)]
pub enum CaptureEvent {
Open { tag: Tag, pos: Pos },
Close { tag: Tag, pos: Pos },
}
#[derive(Clone, Copy, Debug)]
pub enum TreeEvent {
NodeOpen {
kind: SyntaxKind,
field: Option<FieldId>,
pos: Pos,
},
NodeClose { pos: Pos },
Token {
kind: SyntaxKind,
start: Pos,
end: Pos,
is_trivia: bool,
},
}