php_parser_rs/lexer/
stream.rs

1use crate::lexer::token::Token;
2use crate::lexer::token::TokenKind;
3
4use crate::parser::ast::comments::Comment;
5use crate::parser::ast::comments::CommentFormat;
6use crate::parser::ast::comments::CommentGroup;
7
8/// Token stream.
9///
10/// # Examples
11///
12/// ```rust
13/// use php_parser_rs::lexer::token::Token;
14/// use php_parser_rs::lexer::token::TokenKind;
15/// use php_parser_rs::lexer::stream::TokenStream;
16///
17/// let tokens = vec![
18///     Token { kind: TokenKind::SingleLineComment("// some class".into()), span: (1, 1) },
19///     Token { kind: TokenKind::Readonly, span: (2, 1) },
20///     Token { kind: TokenKind::Class, span: (2, 10) },
21///     Token { kind: TokenKind::Enum, span: (2, 16) },
22///     Token { kind: TokenKind::LeftBrace, span: (2, 21) },
23///     Token { kind: TokenKind::SingleLineComment("// empty body!".into()), span: (3, 1) },
24///     Token { kind: TokenKind::RightBrace, span: (4, 1) },
25///     Token { kind: TokenKind::Eof, span: (0, 0) },
26/// ];
27///
28/// let mut stream = TokenStream::new(tokens);
29///
30/// assert!(matches!(stream.current().kind, TokenKind::Readonly));
31/// assert!(matches!(stream.peek().kind, TokenKind::Class));
32/// assert!(matches!(stream.lookahead(1).kind, TokenKind::Enum));
33/// assert!(matches!(stream.lookahead(2).kind, TokenKind::LeftBrace));
34/// assert!(matches!(stream.lookahead(3).kind, TokenKind::RightBrace));
35/// assert!(matches!(stream.lookahead(4).kind, TokenKind::Eof));
36/// assert!(matches!(stream.lookahead(5).kind, TokenKind::Eof));
37///
38/// stream.next();
39///
40/// assert!(matches!(stream.current().kind, TokenKind::Class));
41///
42/// stream.next();
43/// stream.next();
44/// stream.next();
45///
46/// assert!(matches!(stream.current().kind, TokenKind::RightBrace));
47///
48/// stream.next();
49///
50/// assert!(matches!(stream.current().kind, TokenKind::Eof));
51/// assert!(stream.is_eof());
52///
53/// assert_eq!(stream.comments(), vec![
54///     Token { kind: TokenKind::SingleLineComment("// some class".into()), span: (1, 1) },
55///     Token { kind: TokenKind::SingleLineComment("// empty body!".into()), span: (3, 1) },
56/// ]);
57/// ```
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct TokenStream<'a> {
60    tokens: &'a [Token],
61    length: usize,
62    comments: Vec<&'a Token>,
63    cursor: usize,
64}
65
66/// Token stream.
67impl<'a> TokenStream<'a> {
68    pub fn new(tokens: &'a [Token]) -> TokenStream {
69        let length = tokens.len();
70
71        let mut stream = TokenStream {
72            tokens,
73            length,
74            comments: vec![],
75            cursor: 0,
76        };
77
78        stream.collect_comments();
79
80        stream
81    }
82
83    /// Move cursor to next token.
84    ///
85    /// Comments are collected.
86    pub fn next(&mut self) {
87        self.cursor += 1;
88        self.collect_comments();
89    }
90
91    /// Get current token.
92    pub const fn current(&self) -> &'a Token {
93        let position = if self.cursor >= self.length {
94            self.length - 1
95        } else {
96            self.cursor
97        };
98
99        &self.tokens[position]
100    }
101
102    /// Get previous token.
103    pub const fn previous(&self) -> &'a Token {
104        let position = if self.cursor == 0 { 0 } else { self.cursor - 1 };
105
106        &self.tokens[position]
107    }
108
109    /// Peek next token.
110    ///
111    /// All comments are skipped.
112    pub const fn peek(&self) -> &'a Token {
113        self.peek_nth(1)
114    }
115
116    /// Peek nth+1 token.
117    ///
118    /// All comments are skipped.
119    pub const fn lookahead(&self, n: usize) -> &'a Token {
120        self.peek_nth(n + 1)
121    }
122
123    /// Peek nth token.
124    ///
125    /// All comments are skipped.
126    #[inline(always)]
127    const fn peek_nth(&self, n: usize) -> &'a Token {
128        let mut cursor = self.cursor + 1;
129        let mut target = 1;
130        loop {
131            if cursor >= self.length {
132                return &self.tokens[self.length - 1];
133            }
134
135            let current = &self.tokens[cursor];
136
137            if matches!(
138                current.kind,
139                TokenKind::SingleLineComment
140                    | TokenKind::MultiLineComment
141                    | TokenKind::HashMarkComment
142                    | TokenKind::DocumentComment
143            ) {
144                cursor += 1;
145                continue;
146            }
147
148            if target == n {
149                return current;
150            }
151
152            target += 1;
153            cursor += 1;
154        }
155    }
156
157    /// Check if current token is EOF.
158    pub fn is_eof(&self) -> bool {
159        if self.cursor >= self.length {
160            return true;
161        }
162
163        self.tokens[self.cursor].kind == TokenKind::Eof
164    }
165
166    /// Get all comments.
167    #[allow(dead_code)]
168    pub fn comments(&mut self) -> CommentGroup {
169        let mut comments = vec![];
170
171        std::mem::swap(&mut self.comments, &mut comments);
172
173        CommentGroup {
174            comments: comments
175                .iter()
176                .map(|token| match token {
177                    Token {
178                        kind: TokenKind::SingleLineComment,
179                        span,
180                        value,
181                    } => Comment {
182                        span: *span,
183                        format: CommentFormat::SingleLine,
184                        content: value.clone(),
185                    },
186                    Token {
187                        kind: TokenKind::MultiLineComment,
188                        span,
189                        value,
190                    } => Comment {
191                        span: *span,
192                        format: CommentFormat::MultiLine,
193                        content: value.clone(),
194                    },
195                    Token {
196                        kind: TokenKind::HashMarkComment,
197                        span,
198                        value,
199                    } => Comment {
200                        span: *span,
201                        format: CommentFormat::HashMark,
202                        content: value.clone(),
203                    },
204                    Token {
205                        kind: TokenKind::DocumentComment,
206                        span,
207                        value,
208                    } => Comment {
209                        span: *span,
210                        format: CommentFormat::Document,
211                        content: value.clone(),
212                    },
213                    _ => unreachable!(),
214                })
215                .collect(),
216        }
217    }
218
219    fn collect_comments(&mut self) {
220        loop {
221            if self.cursor >= self.length {
222                break;
223            }
224
225            let current = &self.tokens[self.cursor];
226
227            if !matches!(
228                current.kind,
229                TokenKind::SingleLineComment
230                    | TokenKind::MultiLineComment
231                    | TokenKind::HashMarkComment
232                    | TokenKind::DocumentComment
233            ) {
234                break;
235            }
236
237            self.comments.push(current);
238            self.cursor += 1;
239        }
240    }
241}
242
243impl<'a> Default for TokenStream<'a> {
244    fn default() -> Self {
245        Self::new(&[])
246    }
247}
248
249impl<'a> From<&'a Vec<Token>> for TokenStream<'a> {
250    fn from(tokens: &'a Vec<Token>) -> Self {
251        Self::new(tokens.as_slice())
252    }
253}
php_parser_rs/lexer/stream.rs

php_parser_rs/lexer/
stream.rs