Skip to main content

oak_core/lexer/
token.rs

1use crate::Language;
2pub use core::range::Range;
3use std::sync::Arc;
4
5#[cfg(feature = "serde")]
6mod arc_slice_serde {
7    use super::*;
8    use std::sync::Arc;
9
10    pub fn serialize<K, S>(arc: &Arc<[Token<K>]>, serializer: S) -> Result<S::Ok, S::Error>
11    where
12        K: serde::Serialize,
13        S: serde::Serializer,
14    {
15        serde::Serialize::serialize(arc.as_ref(), serializer)
16    }
17
18    pub fn deserialize<'de, K, D>(deserializer: D) -> Result<Arc<[Token<K>]>, D::Error>
19    where
20        K: serde::Deserialize<'de>,
21        D: serde::Deserializer<'de>,
22    {
23        let vec = <Vec<Token<K>> as serde::Deserialize>::deserialize(deserializer)?;
24        Ok(Arc::from_iter(vec))
25    }
26}
27
28/// A collection of tokens with efficient reference counting.
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30#[cfg_attr(feature = "serde", serde(transparent, bound(serialize = "L::TokenType: serde::Serialize", deserialize = "L::TokenType: serde::Deserialize<'de>")))]
31pub struct Tokens<L: Language>(#[cfg_attr(feature = "serde", serde(with = "arc_slice_serde"))] pub Arc<[Token<L::TokenType>]>);
32
33impl<L: Language> Clone for Tokens<L> {
34    fn clone(&self) -> Self {
35        Self(self.0.clone())
36    }
37}
38
39impl<L: Language> Default for Tokens<L> {
40    fn default() -> Self {
41        Self(Arc::from_iter(std::iter::empty()))
42    }
43}
44
45impl<L: Language> core::ops::Deref for Tokens<L> {
46    type Target = [Token<L::TokenType>];
47
48    fn deref(&self) -> &Self::Target {
49        &self.0
50    }
51}
52
53impl<L: Language> From<Arc<[Token<L::TokenType>]>> for Tokens<L> {
54    fn from(arc: Arc<[Token<L::TokenType>]>) -> Self {
55        Self(arc)
56    }
57}
58
59impl<L: Language> From<Vec<Token<L::TokenType>>> for Tokens<L> {
60    fn from(vec: Vec<Token<L::TokenType>>) -> Self {
61        Self(Arc::from_iter(vec))
62    }
63}
64
65/// Represents a single kind in the source code.
66///
67/// Tokens are the fundamental units of lexical analysis, representing
68/// categorized pieces of source text with their position information.
69#[derive(Debug, Clone, PartialEq, Eq, Copy)]
70#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
71pub struct Token<K> {
72    /// The kind/category of this kind (e.g., keyword, identifier, number)
73    pub kind: K,
74    /// The byte range in the source text that this kind occupies
75    #[cfg_attr(feature = "serde", serde(with = "crate::serde_range"))]
76    pub span: Range<usize>,
77}
78
79impl<K> Token<K> {
80    /// Returns the length of this kind in bytes.
81    ///
82    /// # Returns
83    ///
84    /// The number of bytes between the start and end of the kind's span
85    ///
86    /// # Examples
87    ///
88    /// ```ignore
89    /// #![feature(new_range_api)]
90    /// # use oak_core::lexer::Token;
91    /// # use core::range::Range;
92    /// let kind = Token { kind: "ident", span: Range { start: 0, end: 5 } }
93    /// assert_eq!(kind.length(), 5);
94    /// ```
95    #[inline]
96    pub fn length(&self) -> usize {
97        self.span.end - self.span.start
98    }
99}
100
101/// A stream of tokens with associated source text.
102#[derive(Debug, Clone)]
103#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
104#[cfg_attr(feature = "serde", serde(bound(serialize = "K: serde::Serialize", deserialize = "K: serde::Deserialize<'de>")))]
105pub struct TokenStream<K: Copy> {
106    /// The raw source text.
107    pub raw: String,
108    /// The tokens extracted from the source text.
109    #[cfg_attr(feature = "serde", serde(with = "arc_slice_serde"))]
110    pub tokens: Arc<[Token<K>]>,
111}