relex/
token.rs

1use std::fmt::Debug;
2
3/// You must implement this trait for your own custom TokenKind's
4/// For example:
5///
6/// ```rust
7/// use relex::TokenKind;
8///
9/// #[derive(Debug, Clone, PartialEq)]
10/// enum MyToken {
11///   Whitespace,
12///   ID,
13///   Eof,
14///   Unrecognized,
15/// }
16/// impl TokenKind for MyToken {
17///   fn eof() -> Self { MyToken::Eof }
18///   fn unrecognized() -> Self { MyToken::Unrecognized }
19/// }
20/// ```
21pub trait TokenKind: Clone + PartialEq {
22  /// Constructs a TokenKind denoting that the token of interest is "unrecognized"
23  /// (i.e., unmatched by any given regex)
24  fn unrecognized() -> Self;
25  fn is_unrecognized(&self) -> bool {
26    self == &Self::unrecognized()
27  }
28
29  /// Constructs a TokenKind denoting that the token of interest is at the end of the input
30  fn eof() -> Self;
31  fn is_eof(&self) -> bool {
32    self == &Self::eof()
33  }
34}
35
36/// Represents a detected token
37#[derive(Debug, PartialEq)]
38pub struct Token<'a, K: TokenKind> {
39  pub kind: K,
40  pub start: usize,
41  pub end: usize,
42  pub text: &'a str,
43  pub skip: bool,
44  /// In some cases (viz. when an "unrecognized" token is returned),
45  /// whatever token comes next is cached here.
46  pub next: Option<Box<Token<'a, K>>>,
47  /// The tokens skipped over to get here.
48  pub skipped: Vec<Token<'a, K>>,
49  pub captures: Option<Vec<Option<(usize, usize)>>>,
50}
51impl<'a, K: TokenKind> Token<'a, K> {
52  pub fn new(source: &'a str, kind: K, start: usize, end: usize) -> Token<'a, K> {
53    Token {
54      kind,
55      start,
56      end,
57      text: &source[start..end],
58      skip: false,
59      skipped: vec![],
60      next: None,
61      captures: None,
62    }
63  }
64  pub fn from_text(kind: K, text: &'a str, start: usize) -> Token<'a, K> {
65    Token {
66      kind,
67      start,
68      end: start + text.len(),
69      text,
70      skip: false,
71      skipped: vec![],
72      next: None,
73      captures: None,
74    }
75  }
76
77  pub fn eof(source: &'a str) -> Token<'a, K> {
78    Token {
79      kind: K::eof(),
80      start: source.len(),
81      end: source.len(),
82      text: "",
83      skip: false,
84      next: None,
85      skipped: vec![],
86      captures: None,
87    }
88  }
89
90  pub fn skip(mut self, skip: bool) -> Self {
91    self.skip = skip;
92    self
93  }
94
95  pub fn skipped(mut self, skipped: Vec<Token<'a, K>>) -> Self {
96    self.skipped = skipped;
97    self
98  }
99
100  pub fn next(mut self, next: Option<Box<Token<'a, K>>>) -> Self {
101    self.next = next;
102    self
103  }
104
105  pub fn captures(mut self, captures: Option<Vec<Option<(usize, usize)>>>) -> Self {
106    self.captures = captures;
107    self
108  }
109
110  pub fn get_capture(&self, idx: usize) -> Option<TokenCapture<'a>> {
111    self
112      .captures
113      .as_ref()
114      .and_then(|captures| captures[idx])
115      .map(|(start, end)| TokenCapture {
116        text: &self.text[start..end],
117        start,
118        end,
119      })
120  }
121}
122
123/// Represents information for a given capture for a given token.
124#[derive(Debug, PartialEq)]
125pub struct TokenCapture<'a> {
126  /// the capture text
127  pub text: &'a str,
128
129  /// the start index: relative to the _token_ text (i.e., not absolute within the source string)
130  pub start: usize,
131
132  /// the end index: relative to the _token_ text (i.e., not absolute within the source string)
133  pub end: usize,
134}
135
136#[cfg(test)]
137impl TokenKind for &'static str {
138  fn unrecognized() -> Self {
139    "UNRECOGNIZED"
140  }
141  fn eof() -> Self {
142    "EOF"
143  }
144}