1use thiserror::Error;
5
6use crate::{
7 names::Identifier,
8 src_pos::SrcPos,
9 token::{SrcToken, TaggedToken},
10};
11
12pub type Toktr = TaggedToktr<()>;
14
15pub(crate) type SrcToktr = TaggedToktr<SrcPos>;
16
17#[derive(Clone, Debug, Eq, PartialEq)]
19pub enum TaggedToktr<T> {
20 Import(T),
23 As(T),
25 Class(T),
27 Colon(T),
29 Eq(T),
31 Comma(T),
33 Dot(T),
35 Newline(T),
37 Null(T),
39
40 Identifier(T, Identifier),
43
44 IntegerLiteral(T, u64),
47 Shl(T),
49 Mul(T),
51
52 BracketBlock(T, Box<NodeData<T>>),
55 ParenBlock(T, Box<NodeData<T>>),
57 IndentBlock(T, Box<NodeData<T>>),
59
60 DocString(T, String),
62}
63
64impl<T> TaggedToktr<T> {
65 pub fn tag(&self) -> &T {
67 match self {
68 Self::Import(t) => t,
69 Self::As(t) => t,
70 Self::Class(t) => t,
71 Self::Colon(t) => t,
72 Self::Eq(t) => t,
73 Self::Comma(t) => t,
74 Self::Dot(t) => t,
75 Self::Newline(t) => t,
76 Self::Null(t) => t,
77 Self::Identifier(t, _) => t,
78 Self::IntegerLiteral(t, _) => t,
79 Self::Shl(t) => t,
80 Self::Mul(t) => t,
81 Self::BracketBlock(t, _) => t,
82 Self::ParenBlock(t, _) => t,
83 Self::IndentBlock(t, _) => t,
84 Self::DocString(t, _) => t,
85 }
86 }
87
88 pub fn is_block(&self) -> bool {
90 matches!(
91 self,
92 Self::BracketBlock(_, _) | Self::ParenBlock(_, _) | Self::IndentBlock(_, _)
93 )
94 }
95
96 pub fn node_data(&self) -> Option<&NodeData<T>> {
98 match self {
99 Self::BracketBlock(_, data)
100 | Self::ParenBlock(_, data)
101 | Self::IndentBlock(_, data) => Some(data),
102 _ => None,
103 }
104 }
105}
106
107#[derive(Clone, Debug, Eq, PartialEq)]
109pub struct NodeData<T> {
110 children: Vec<TaggedToktr<T>>,
111}
112
113impl<T> NodeData<T> {
114 pub fn new(children: Vec<TaggedToktr<T>>) -> Self {
115 Self { children }
116 }
117
118 pub fn children(&self) -> &[TaggedToktr<T>] {
119 &self.children
120 }
121}
122
123#[derive(Debug, Error)]
124pub enum ToktrError {
125 #[error("expected {0:?} terminal but found {1:?} terminal")]
126 UnfinishedBlock(BlockType, BlockType),
127
128 #[error("end of sequence with unclosed block of type {0:?}")]
129 UnclosedBlock(BlockType),
130
131 #[error("not yet implemented")]
132 Unimplemented,
133}
134
135#[derive(Copy, Clone, Debug, Eq, PartialEq)]
137pub enum BlockType {
138 Root,
139 Bracket,
140 Paren,
141 Indent,
142}
143
144struct ToktrBuilder {
145 block_ty_stack: Vec<BlockType>,
146 block_sp_stack: Vec<SrcPos>,
147 contexts: Vec<Vec<SrcToktr>>,
148}
149
150impl ToktrBuilder {
151 pub(crate) fn new() -> Self {
152 Self {
153 block_ty_stack: vec![BlockType::Root],
154 block_sp_stack: vec![],
155 contexts: vec![Vec::new()],
156 }
157 }
158
159 fn top_block_mut(&mut self) -> &mut Vec<SrcToktr> {
160 self.contexts.last_mut().expect("toktr: top_block_mut")
161 }
162
163 fn top_ty(&self) -> &BlockType {
164 self.block_ty_stack.last().expect("toktr: top_ty")
165 }
166
167 pub(crate) fn push_token(&mut self, tt: SrcToktr) {
168 self.top_block_mut().push(tt);
169 }
170
171 pub(crate) fn push_block(&mut self, ty: BlockType, sp: SrcPos) {
172 self.contexts.push(Vec::new());
173 self.block_ty_stack.push(ty);
174 self.block_sp_stack.push(sp);
175 }
176
177 pub(crate) fn try_pop_block(&mut self, ty: BlockType) -> Result<SrcToktr, ToktrError> {
178 if ty == BlockType::Root {
180 panic!("toktr: tried to pop root");
181 }
182
183 if *self.top_ty() != ty {
184 return Err(ToktrError::UnfinishedBlock(*self.top_ty(), ty));
185 }
186
187 self.block_ty_stack.pop();
189 let sp = self.block_sp_stack.pop().unwrap();
190 let body_toks = self.contexts.pop().expect("toktr: try_finish_block");
191 let data = NodeData::new(body_toks);
192 let tt = match ty {
193 BlockType::Bracket => SrcToktr::BracketBlock(sp, Box::new(data)),
194 BlockType::Paren => SrcToktr::ParenBlock(sp, Box::new(data)),
195 BlockType::Indent => SrcToktr::IndentBlock(sp, Box::new(data)),
196 _ => unreachable!(),
197 };
198
199 Ok(tt)
200 }
201
202 pub(crate) fn finish(mut self) -> Result<Vec<SrcToktr>, ToktrError> {
203 if self.contexts.len() != 1 {
204 return Err(ToktrError::UnclosedBlock(*self.top_ty()));
205 }
206
207 let toks = self.contexts.pop().unwrap();
208 Ok(toks)
209 }
210}
211
212pub(crate) fn parse_tokens_to_toktrs(tokens: &[SrcToken]) -> Result<Vec<SrcToktr>, ToktrError> {
213 let mut i = 0;
214
215 let mut builder = ToktrBuilder::new();
216
217 while i < tokens.len() {
218 let cur = &tokens[i];
219
220 let tt = match cur {
221 TaggedToken::Null(sp) => TaggedToktr::Null(*sp),
222 TaggedToken::Class(sp) => TaggedToktr::Class(*sp),
223 TaggedToken::Import(sp) => TaggedToktr::Import(*sp),
224 TaggedToken::As(sp) => TaggedToktr::As(*sp),
225 TaggedToken::Colon(sp) => TaggedToktr::Colon(*sp),
226 TaggedToken::Eq(sp) => TaggedToktr::Eq(*sp),
227 TaggedToken::Comma(sp) => TaggedToktr::Comma(*sp),
228 TaggedToken::Dot(sp) => TaggedToktr::Dot(*sp),
229 TaggedToken::Newline(sp) => TaggedToktr::Newline(*sp),
230 TaggedToken::Identifier(sp, ident) => TaggedToktr::Identifier(*sp, ident.clone()),
231 TaggedToken::IntegerLiteral(sp, v) => TaggedToktr::IntegerLiteral(*sp, *v),
232 TaggedToken::Shl(sp) => TaggedToktr::Shl(*sp),
233 TaggedToken::Mul(sp) => TaggedToktr::Mul(*sp),
234 TaggedToken::DocString(sp, doc) => TaggedToktr::DocString(*sp, doc.clone()),
235
236 TaggedToken::OpenBracket(sp) => {
237 builder.push_block(BlockType::Bracket, *sp);
238 i += 1;
239 continue;
240 }
241
242 TaggedToken::CloseBracket(_) => builder.try_pop_block(BlockType::Bracket)?,
243
244 TaggedToken::OpenParen(sp) => {
245 builder.push_block(BlockType::Paren, *sp);
246 i += 1;
247 continue;
248 }
249 TaggedToken::CloseParen(_) => builder.try_pop_block(BlockType::Paren)?,
250
251 TaggedToken::Indent(sp) => {
252 builder.push_block(BlockType::Indent, *sp);
253 i += 1;
254 continue;
255 }
256 TaggedToken::Deindent(_) => builder.try_pop_block(BlockType::Indent)?,
257 };
258
259 builder.push_token(tt);
260
261 i += 1;
262 }
263
264 builder.finish()
265}
266
267#[cfg(test)]
268mod tests {
269 use crate::{
270 src_pos::SrcPos,
271 token::{parse_char_array_to_tokens, SrcToken},
272 Identifier,
273 };
274
275 use super::parse_tokens_to_toktrs;
276
277 #[test]
278 fn test_parse_simple() {
279 let tokens = vec![
280 SrcToken::Class(SrcPos::dummy()),
281 SrcToken::Colon(SrcPos::dummy()),
282 SrcToken::Eq(SrcPos::dummy()),
283 SrcToken::Comma(SrcPos::dummy()),
284 ];
285
286 let tt = parse_tokens_to_toktrs(&tokens).expect("test: invoke parse_tokens_to_toktrs");
287
288 eprintln!("{tt:#?}");
289 }
290
291 #[test]
292 fn test_parse_tree() {
293 let foo_name = Identifier::try_from("Foo".to_owned()).expect("test: parse ident name");
294 let container_name =
295 Identifier::try_from("Container".to_owned()).expect("test: parse ident name");
296 let vector_name =
297 Identifier::try_from("Vector".to_owned()).expect("test: parse ident name");
298 let bar_name = Identifier::try_from("bar".to_owned()).expect("test: parse ident name");
299
300 let sp = SrcPos::dummy();
301
302 let tokens = vec![
303 SrcToken::Class(sp),
304 SrcToken::Identifier(sp, foo_name),
305 SrcToken::OpenParen(sp),
306 SrcToken::Identifier(sp, container_name),
307 SrcToken::CloseParen(sp),
308 SrcToken::Colon(sp),
309 SrcToken::Indent(sp),
310 SrcToken::Identifier(sp, bar_name),
311 SrcToken::Colon(sp),
312 SrcToken::Identifier(sp, vector_name),
313 SrcToken::Deindent(sp),
314 ];
315
316 let tt = parse_tokens_to_toktrs(&tokens).expect("test: invoke parse_tokens_to_toktrs");
317
318 eprintln!("{tt:#?}");
319 }
320
321 #[test]
322 fn test_parse_nested_tree() {
323 let foo_name = Identifier::try_from("Foo".to_owned()).expect("test: parse ident");
324 let stable_container_name =
325 Identifier::try_from("StableContainer".to_owned()).expect("test: parse ident");
326 let vector_name = Identifier::try_from("Vector".to_owned()).expect("test: parse ident");
327 let bar_name = Identifier::try_from("bar".to_owned()).expect("test: parse ident");
328
329 let sp = SrcPos::dummy();
330
331 let tokens = vec![
332 SrcToken::Class(sp),
333 SrcToken::Identifier(sp, foo_name),
334 SrcToken::OpenParen(sp),
335 SrcToken::Identifier(sp, stable_container_name.clone()),
336 SrcToken::OpenBracket(sp),
337 SrcToken::IntegerLiteral(sp, 32),
338 SrcToken::CloseBracket(sp),
339 SrcToken::CloseParen(sp),
340 SrcToken::Colon(sp),
341 SrcToken::Indent(sp),
342 SrcToken::Identifier(sp, bar_name),
343 SrcToken::Colon(sp),
344 SrcToken::Identifier(sp, vector_name),
345 SrcToken::Deindent(sp),
346 ];
347
348 let tt = parse_tokens_to_toktrs(&tokens).expect("test: invoke parse_tokens_to_toktrs");
349
350 eprintln!("{tt:#?}");
351 }
352
353 #[test]
354 fn test_full_parse_stable_container() {
355 let s = r"
356class Foo(StableContainer[16]):
357 x_coordinate: Optional[uint32]
358 y_coordinate: Optional[uint64]
359";
360
361 let arr = s.chars().collect::<Vec<_>>();
362
363 let toks = parse_char_array_to_tokens(&arr).expect("test: tokenize string");
364
365 eprintln!("tokens {toks:#?}");
366
367 let tt = parse_tokens_to_toktrs(&toks).expect("test: treeize tokens");
368
369 eprintln!("tree {tt:#?}");
370 }
371}