customasm/syntax/
walker.rs

1use crate::*;
2
3
4#[derive(Clone)]
5pub struct Walker<'src>
6{
7	src: &'src str,
8    file_handle: util::FileServerHandle,
9    span_offset: usize,
10
11    /// The current byte index into the `src` string.
12    cursor_index: usize,
13    /// The walker ignores characters from this byte index onward.
14    cursor_limit: usize,
15}
16
17
18impl<'src> Walker<'src>
19{
20	pub fn new(
21        src: &'src str,
22        src_file_handle: util::FileServerHandle,
23        src_byte_offset: usize)
24        -> Walker<'src>
25	{
26		let walker = Walker {
27            src,
28            file_handle: src_file_handle,
29            span_offset: src_byte_offset,
30
31            cursor_index: 0,
32            cursor_limit: src.len(),
33		};
34		
35		walker
36	}
37
38
39    pub fn slice(
40        &self,
41        start_byte_index: usize,
42        end_byte_index: usize)
43        -> Walker<'src>
44    {
45        let src = &self.src[start_byte_index..end_byte_index];
46
47		let walker = Walker {
48            src,
49            file_handle: self.file_handle,
50            span_offset: self.span_offset + start_byte_index,
51
52            cursor_index: 0,
53            cursor_limit: src.len(),
54		};
55		
56		walker
57    }
58	
59	
60	pub fn is_over(&self) -> bool
61	{
62		self.cursor_index >= self.cursor_limit
63	}
64
65
66    pub fn advance_to_token_end(
67        &mut self,
68        token: &syntax::Token)
69    {
70        self.cursor_index = self.get_index_at_span_end(token.span);
71    }
72	
73	
74	pub fn skip_ignorable(&mut self)
75	{
76        loop
77        {
78            if self.is_over()
79                { break; }
80            
81            let token = self.token_at(self.cursor_index);
82
83            if !token.kind.is_ignorable()
84                { break; }
85
86            self.advance_to_token_end(&token);
87        }
88	}
89
90
91    pub fn get_cursor_index(&self) -> usize
92    {
93        self.cursor_index
94    }
95
96
97    pub fn get_index_at_span_start(
98        &self,
99        span: diagn::Span)
100        -> usize
101    {
102        span.location().unwrap().0 - self.span_offset
103    }
104
105
106    pub fn get_index_at_span_end(
107        &self,
108        span: diagn::Span)
109        -> usize
110    {
111        span.location().unwrap().1 - self.span_offset
112    }
113
114
115    pub fn next_useful_index(&self) -> usize
116    {
117        let token = self.next_useful_token();
118        self.get_index_at_span_start(token.span)
119    }
120
121
122    pub fn get_span(
123        &self,
124        start_byte_index: usize,
125        end_byte_index: usize)
126        -> diagn::Span
127    {
128        let start =
129            self.span_offset + start_byte_index;
130        
131        let end =
132            self.span_offset + end_byte_index;
133        
134        diagn::Span::new(
135            self.file_handle,
136            start,
137            end)
138    }
139
140
141    pub fn get_cursor_span(&self) -> diagn::Span
142    {
143        self.get_span(self.cursor_index, self.cursor_index)
144    }
145
146
147    pub fn get_full_span(&self) -> diagn::Span
148    {
149        self.get_span(0, self.cursor_limit)
150    }
151
152
153    pub fn get_excerpt(
154        &self,
155        start_byte_index: usize,
156        end_byte_index: usize)
157        -> &'src str
158    {
159        &self.src[start_byte_index..end_byte_index]
160    }
161
162
163    pub fn get_full_excerpt(&self) -> &'src str
164    {
165        &self.src[0..self.cursor_limit]
166    }
167
168
169    pub fn get_span_excerpt(
170        &self,
171        span: diagn::Span)
172        -> &'src str
173    {
174        let start = self.get_index_at_span_start(span);
175        let end = self.get_index_at_span_end(span);
176        &self.src[start..end]
177    }
178
179
180    pub fn get_cursor_limit(
181        &self)
182        -> usize
183    {
184        self.cursor_limit
185    }
186
187
188    pub fn set_cursor_limit(
189        &mut self,
190        end: usize)
191    {
192        self.cursor_limit = end;
193    }
194
195
196    fn char_at(
197        &self,
198        byte_index: usize)
199        -> char
200    {
201        if byte_index >= self.cursor_limit
202        {
203            '\0'
204        }
205        else
206        {
207            self.src[byte_index..self.cursor_limit]
208                .chars()
209                .next()
210                .unwrap_or('\0')
211        }
212    }
213
214
215    pub fn next_char(
216        &self)
217        -> char
218    {
219        self.char_at(self.cursor_index)
220    }
221
222
223    fn token_at(
224        &self,
225        byte_index: usize)
226        -> syntax::Token
227    {
228        if byte_index >= self.cursor_limit
229        {
230            let span_index =
231                self.span_offset + self.cursor_limit;
232            
233            let span = diagn::Span::new(
234                self.file_handle,
235                span_index,
236                span_index);
237
238            return syntax::Token {
239                kind: syntax::TokenKind::LineBreak,
240                span,
241            };
242        }
243
244        let src_next = &self.src[byte_index..self.cursor_limit];
245        let (kind, length) = syntax::decide_next_token(src_next);
246
247        let end = byte_index + length;
248
249        let span = diagn::Span::new(
250            self.file_handle,
251            self.span_offset + byte_index,
252            self.span_offset + end);
253        
254        syntax::Token {
255            kind,
256            span,
257        }
258    }
259
260
261    pub fn next_token(
262        &self)
263        -> syntax::Token
264    {
265        self.token_at(self.cursor_index)
266    }
267
268
269    pub fn next_nth_token(
270        &self,
271        mut nth: usize)
272        -> syntax::Token
273    {
274        let mut byte_index = self.cursor_index;
275
276        loop
277        {
278            let token = self.token_at(byte_index);
279
280            if nth == 0
281                { return token; }
282
283            if byte_index >= self.cursor_limit
284                { return token; }
285
286            nth -= 1;
287            byte_index += token.span.length();
288        }
289    }
290
291
292    pub fn next_nth_useful_token(
293        &self,
294        mut nth: usize)
295        -> syntax::Token
296    {
297        let mut byte_index = self.cursor_index;
298
299        loop
300        {
301            let token = self.token_at(byte_index);
302
303            if byte_index >= self.cursor_limit
304                { return token; }
305
306            if !token.kind.is_ignorable()
307            {
308                if nth == 0
309                    { return token; }
310                
311                nth -= 1;
312            }
313
314            byte_index += token.span.length();
315        }
316    }
317
318
319    fn next_useful_token(
320        &self)
321        -> syntax::Token
322    {
323        self.next_nth_useful_token(0)
324    }
325	
326	
327	pub fn next_linebreak(
328        &self)
329        -> Option<syntax::Token>
330	{
331        let mut byte_index = self.cursor_index;
332
333        loop
334        {
335            let token = self.token_at(byte_index);
336
337            if token.kind == syntax::TokenKind::LineBreak
338                { return Some(token); }
339
340            if !token.kind.is_ignorable()
341                { return None; }
342
343            byte_index += token.span.length();
344        }
345	}
346
347	
348	pub fn next_useful_is(
349		&mut self,
350        nth: usize,
351		kind: syntax::TokenKind)
352		-> bool
353	{
354        let token = self.next_nth_useful_token(nth);
355        token.kind == kind
356	}
357
358	
359	pub fn maybe_expect(
360		&mut self,
361		kind: syntax::TokenKind)
362		-> Option<syntax::Token>
363	{
364        let token = self.next_useful_token();
365        if token.kind == kind
366		{
367            let token = token.clone();
368            self.advance_to_token_end(&token);
369			Some(token)
370		}
371		else
372        {
373            None
374        }
375	}
376	
377	
378	pub fn expect(
379		&mut self,
380		report: &mut diagn::Report,
381		kind: syntax::TokenKind)
382		-> Result<syntax::Token, ()>
383	{
384		match self.maybe_expect(kind)
385		{
386			Some(token) => Ok(token),
387			None =>
388			{
389				report.error_span(
390                    format!("expected {}", kind.printable()),
391                    self.get_cursor_span());
392                
393				Err(())
394			}
395		}
396	}
397
398	
399	pub fn maybe_expect_char(
400		&mut self,
401		wanted_char: char)
402		-> bool
403	{
404        let index = self.next_useful_index();
405
406        let c = self.char_at(index);
407
408        if c.eq_ignore_ascii_case(&wanted_char)
409		{
410            self.cursor_index =
411                index +
412                c.len_utf8();
413            
414			true
415		}
416		else
417        {
418            false
419        }
420	}
421	
422	
423	pub fn expect_linebreak(
424        &mut self,
425		report: &mut diagn::Report)
426        -> Result<(), ()>
427	{
428		match self.maybe_expect_linebreak()
429		{
430			Some(()) => Ok(()),
431			None =>
432			{
433				report.error_span(
434                    format!(
435                        "expected {}",
436                        syntax::TokenKind::LineBreak.printable()),
437                    self.get_cursor_span());
438                
439				Err(())
440			}
441		}
442	}
443	
444	
445	pub fn maybe_expect_linebreak(&mut self) -> Option<()>
446	{
447		if let Some(token) = self.next_linebreak()
448		{
449            self.advance_to_token_end(&token);
450			Some(())
451		}
452		else
453		{
454			None
455		}
456	}
457
458
459	pub fn advance_until_closing_brace(
460		&mut self)
461		-> Walker<'src>
462	{
463		let start = self.cursor_index;
464
465		let mut brace_nesting = 0;
466
467		while !self.is_over()
468		{
469            let c = self.next_char();
470
471            if c == '{'
472			{
473				brace_nesting += 1;
474			}
475			else if c == '}'
476			{
477                if brace_nesting == 0
478                    { break; }
479                
480				brace_nesting -= 1;
481			}
482
483            self.cursor_index += c.len_utf8();
484		}
485
486		let end = self.cursor_index;
487
488        self.slice(start, end)
489	}
490
491
492	pub fn advance_until_linebreak(
493		&mut self)
494		-> Walker<'src>
495	{
496		let start = self.cursor_index;
497		let mut end = self.cursor_index;
498
499		let mut brace_nesting = 0;
500
501		while !self.is_over()
502		{
503            let token = self.next_token();
504
505            if token.kind == syntax::TokenKind::LineBreak &&
506                brace_nesting == 0
507            {
508                break;
509            }
510            else if token.kind == syntax::TokenKind::BraceOpen
511			{
512				brace_nesting += 1;
513			}
514			else if token.kind == syntax::TokenKind::BraceClose
515			{
516                if brace_nesting == 0
517                    { break; }
518                
519				brace_nesting -= 1;
520			}
521
522            self.advance_to_token_end(&token);
523
524            if !token.kind.is_ignorable()
525            {
526                end = self.cursor_index;
527            }
528		}
529
530        self.slice(start, end)
531	}
532
533
534	pub fn find_lookahead_char_index(
535		&self,
536        wanted_char: char)
537		-> Option<usize>
538	{
539		let mut byte_index = self.cursor_index;
540
541        let mut seen_tokens = false;
542        let mut paren_nesting = 0;
543		let mut brace_nesting = 0;
544
545		while byte_index < self.cursor_limit
546		{
547            let c = self.char_at(byte_index);
548
549            if c.eq_ignore_ascii_case(&wanted_char) &&
550                seen_tokens &&
551                paren_nesting == 0 &&
552                brace_nesting == 0
553            {
554                return Some(byte_index);
555            }
556            else if c == '('
557			{
558				paren_nesting += 1;
559			}
560			else if c == ')'
561			{
562                if paren_nesting == 0
563                    { break; }
564                
565                paren_nesting -= 1;
566			}
567            else if c == '{'
568			{
569				brace_nesting += 1;
570			}
571			else if c == '}'
572			{
573                if brace_nesting == 0
574                    { break; }
575                
576				brace_nesting -= 1;
577			}
578
579
580            byte_index += c.len_utf8();
581
582            if !syntax::token::is_whitespace(c)
583            {
584                seen_tokens = true;
585            }
586		}
587
588        None
589	}
590}