php_parser_rs/lexer/stream.rs
1use crate::lexer::token::Token;
2use crate::lexer::token::TokenKind;
3
4use crate::parser::ast::comments::Comment;
5use crate::parser::ast::comments::CommentFormat;
6use crate::parser::ast::comments::CommentGroup;
7
8/// Token stream.
9///
10/// # Examples
11///
12/// ```rust
13/// use php_parser_rs::lexer::token::Token;
14/// use php_parser_rs::lexer::token::TokenKind;
15/// use php_parser_rs::lexer::stream::TokenStream;
16///
17/// let tokens = vec![
18/// Token { kind: TokenKind::SingleLineComment("// some class".into()), span: (1, 1) },
19/// Token { kind: TokenKind::Readonly, span: (2, 1) },
20/// Token { kind: TokenKind::Class, span: (2, 10) },
21/// Token { kind: TokenKind::Enum, span: (2, 16) },
22/// Token { kind: TokenKind::LeftBrace, span: (2, 21) },
23/// Token { kind: TokenKind::SingleLineComment("// empty body!".into()), span: (3, 1) },
24/// Token { kind: TokenKind::RightBrace, span: (4, 1) },
25/// Token { kind: TokenKind::Eof, span: (0, 0) },
26/// ];
27///
28/// let mut stream = TokenStream::new(tokens);
29///
30/// assert!(matches!(stream.current().kind, TokenKind::Readonly));
31/// assert!(matches!(stream.peek().kind, TokenKind::Class));
32/// assert!(matches!(stream.lookahead(1).kind, TokenKind::Enum));
33/// assert!(matches!(stream.lookahead(2).kind, TokenKind::LeftBrace));
34/// assert!(matches!(stream.lookahead(3).kind, TokenKind::RightBrace));
35/// assert!(matches!(stream.lookahead(4).kind, TokenKind::Eof));
36/// assert!(matches!(stream.lookahead(5).kind, TokenKind::Eof));
37///
38/// stream.next();
39///
40/// assert!(matches!(stream.current().kind, TokenKind::Class));
41///
42/// stream.next();
43/// stream.next();
44/// stream.next();
45///
46/// assert!(matches!(stream.current().kind, TokenKind::RightBrace));
47///
48/// stream.next();
49///
50/// assert!(matches!(stream.current().kind, TokenKind::Eof));
51/// assert!(stream.is_eof());
52///
53/// assert_eq!(stream.comments(), vec![
54/// Token { kind: TokenKind::SingleLineComment("// some class".into()), span: (1, 1) },
55/// Token { kind: TokenKind::SingleLineComment("// empty body!".into()), span: (3, 1) },
56/// ]);
57/// ```
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct TokenStream<'a> {
60 tokens: &'a [Token],
61 length: usize,
62 comments: Vec<&'a Token>,
63 cursor: usize,
64}
65
66/// Token stream.
67impl<'a> TokenStream<'a> {
68 pub fn new(tokens: &'a [Token]) -> TokenStream {
69 let length = tokens.len();
70
71 let mut stream = TokenStream {
72 tokens,
73 length,
74 comments: vec![],
75 cursor: 0,
76 };
77
78 stream.collect_comments();
79
80 stream
81 }
82
83 /// Move cursor to next token.
84 ///
85 /// Comments are collected.
86 pub fn next(&mut self) {
87 self.cursor += 1;
88 self.collect_comments();
89 }
90
91 /// Get current token.
92 pub const fn current(&self) -> &'a Token {
93 let position = if self.cursor >= self.length {
94 self.length - 1
95 } else {
96 self.cursor
97 };
98
99 &self.tokens[position]
100 }
101
102 /// Get previous token.
103 pub const fn previous(&self) -> &'a Token {
104 let position = if self.cursor == 0 { 0 } else { self.cursor - 1 };
105
106 &self.tokens[position]
107 }
108
109 /// Peek next token.
110 ///
111 /// All comments are skipped.
112 pub const fn peek(&self) -> &'a Token {
113 self.peek_nth(1)
114 }
115
116 /// Peek nth+1 token.
117 ///
118 /// All comments are skipped.
119 pub const fn lookahead(&self, n: usize) -> &'a Token {
120 self.peek_nth(n + 1)
121 }
122
123 /// Peek nth token.
124 ///
125 /// All comments are skipped.
126 #[inline(always)]
127 const fn peek_nth(&self, n: usize) -> &'a Token {
128 let mut cursor = self.cursor + 1;
129 let mut target = 1;
130 loop {
131 if cursor >= self.length {
132 return &self.tokens[self.length - 1];
133 }
134
135 let current = &self.tokens[cursor];
136
137 if matches!(
138 current.kind,
139 TokenKind::SingleLineComment
140 | TokenKind::MultiLineComment
141 | TokenKind::HashMarkComment
142 | TokenKind::DocumentComment
143 ) {
144 cursor += 1;
145 continue;
146 }
147
148 if target == n {
149 return current;
150 }
151
152 target += 1;
153 cursor += 1;
154 }
155 }
156
157 /// Check if current token is EOF.
158 pub fn is_eof(&self) -> bool {
159 if self.cursor >= self.length {
160 return true;
161 }
162
163 self.tokens[self.cursor].kind == TokenKind::Eof
164 }
165
166 /// Get all comments.
167 #[allow(dead_code)]
168 pub fn comments(&mut self) -> CommentGroup {
169 let mut comments = vec![];
170
171 std::mem::swap(&mut self.comments, &mut comments);
172
173 CommentGroup {
174 comments: comments
175 .iter()
176 .map(|token| match token {
177 Token {
178 kind: TokenKind::SingleLineComment,
179 span,
180 value,
181 } => Comment {
182 span: *span,
183 format: CommentFormat::SingleLine,
184 content: value.clone(),
185 },
186 Token {
187 kind: TokenKind::MultiLineComment,
188 span,
189 value,
190 } => Comment {
191 span: *span,
192 format: CommentFormat::MultiLine,
193 content: value.clone(),
194 },
195 Token {
196 kind: TokenKind::HashMarkComment,
197 span,
198 value,
199 } => Comment {
200 span: *span,
201 format: CommentFormat::HashMark,
202 content: value.clone(),
203 },
204 Token {
205 kind: TokenKind::DocumentComment,
206 span,
207 value,
208 } => Comment {
209 span: *span,
210 format: CommentFormat::Document,
211 content: value.clone(),
212 },
213 _ => unreachable!(),
214 })
215 .collect(),
216 }
217 }
218
219 fn collect_comments(&mut self) {
220 loop {
221 if self.cursor >= self.length {
222 break;
223 }
224
225 let current = &self.tokens[self.cursor];
226
227 if !matches!(
228 current.kind,
229 TokenKind::SingleLineComment
230 | TokenKind::MultiLineComment
231 | TokenKind::HashMarkComment
232 | TokenKind::DocumentComment
233 ) {
234 break;
235 }
236
237 self.comments.push(current);
238 self.cursor += 1;
239 }
240 }
241}
242
243impl<'a> Default for TokenStream<'a> {
244 fn default() -> Self {
245 Self::new(&[])
246 }
247}
248
249impl<'a> From<&'a Vec<Token>> for TokenStream<'a> {
250 fn from(tokens: &'a Vec<Token>) -> Self {
251 Self::new(tokens.as_slice())
252 }
253}