ad_editor/syntax/
mod.rs

1//! Syntax highlighting support
2//!
3//! Producing the token stream for a given buffer is handled in a multi-step process in
4//! order to support caching of tokens per-line and not baking in an explicit rendered
5//! representation (e.g. ANSI terminal escape codes) to the output.
6//!   - The file as a whole is tokenized via tree-sitter using a user provided query
7//!   - Tokens are obtained per-line using a [LineIter] which may be efficiently started
8//!     at a non-zero line offset when needed
9//!   - The [TokenIter] type returned by [LineIter] yields [RangeToken]s containing the
10//!     tags provided by the user in their query
11//!   - [TK_DEFAULT] tokens are injected between those identified by the user's query so
12//!     that the full token stream from a [TokenIter] will always contain the complete
13//!     text of the raw buffer line
14//!   - [RangeToken]s are tagged byte offsets within the parent GapBuffer which may be used
15//!     to extract and render sub-regions of text. In order to implement horizontal scrolling
16//!     and clamping of text based on the available screen columns, a UI implementation will
17//!     need to make use of [unicode_width::UnicodeWidthChar] in order to determine whether
18//!     none, part or all of any given token should be rendered.
19use crate::{
20    Config,
21    buffer::{GapBuffer, Slice},
22    dot::Range,
23};
24use std::{
25    cmp::{Ord, Ordering, PartialOrd, max, min},
26    iter::Peekable,
27    slice,
28};
29
30pub mod re;
31pub mod ts;
32
33pub const TK_DEFAULT: &str = "default";
34pub const TK_DOT: &str = "dot";
35pub const TK_LOAD: &str = "load";
36pub const TK_EXEC: &str = "exec";
37
38/// Buffer level state for parsing and highlighting visible lines.
39#[derive(Debug)]
40pub struct SyntaxState {
41    pub(crate) pending_edit: Option<(usize, usize, usize)>,
42    pub(crate) inner: SyntaxStateInner,
43}
44
45#[derive(Debug)]
46pub(crate) enum SyntaxStateInner {
47    Ts(ts::TsState),
48    Re(re::ReState),
49}
50
51impl SyntaxState {
52    pub fn try_new(lang: &str, gb: &GapBuffer, cfg: &Config) -> Result<Self, String> {
53        let lang_cfg = cfg
54            .filetypes
55            .get(lang)
56            .ok_or_else(|| format!("unknown language {lang:?}"))?;
57
58        let inner = if lang_cfg.re_syntax.is_empty() {
59            SyntaxStateInner::Ts(ts::TsState::try_new(
60                lang,
61                &cfg.tree_sitter.parser_dir,
62                &cfg.tree_sitter.syntax_query_dir,
63                gb,
64            )?)
65        } else {
66            SyntaxStateInner::Re(re::ReState::new(&lang_cfg.re_syntax)?)
67        };
68
69        Ok(Self {
70            pending_edit: None,
71            inner,
72        })
73    }
74
75    #[cfg(test)]
76    pub(crate) fn ts(inner: ts::TsState) -> Self {
77        Self {
78            pending_edit: None,
79            inner: SyntaxStateInner::Ts(inner),
80        }
81    }
82
83    pub fn prepare_insert_char(&mut self, idx: usize, ch: char, gb: &GapBuffer) {
84        if let SyntaxStateInner::Ts(ts) = &self.inner {
85            self.pending_edit = Some(ts.prepare_insert_char(idx, ch, gb));
86        }
87    }
88
89    pub fn prepare_insert_string(&mut self, idx: usize, s: &str, gb: &GapBuffer) {
90        if let SyntaxStateInner::Ts(ts) = &self.inner {
91            self.pending_edit = Some(ts.prepare_insert_string(idx, s, gb));
92        }
93    }
94
95    pub fn prepare_delete_char(&mut self, idx: usize, gb: &GapBuffer) {
96        if let SyntaxStateInner::Ts(ts) = &self.inner {
97            self.pending_edit = Some(ts.prepare_delete_char(idx, gb));
98        }
99    }
100
101    pub fn prepare_delete_range(&mut self, from: usize, to: usize, gb: &GapBuffer) {
102        if let SyntaxStateInner::Ts(ts) = &self.inner {
103            self.pending_edit = Some(ts.prepare_delete_range(from, to, gb));
104        }
105    }
106
107    /// Mirror an edit that has been made to the underlying GapBuffer to the syntax state in
108    /// order to keep syntax ranges in sync.
109    ///
110    /// # Panics
111    /// This method will panic if the edit was not previously prepared using one of the `prepare_*`
112    /// methods.
113    pub fn apply_prepared_edit(&mut self, gb: &GapBuffer) {
114        // Only TsState needs to care about tracking edits
115        if let SyntaxStateInner::Ts(ts) = &mut self.inner {
116            let (start_byte, old_end_byte, new_end_byte) = self
117                .pending_edit
118                .take()
119                .expect("edit should have been prepared");
120
121            ts.apply_prepared_edit(start_byte, old_end_byte, new_end_byte, gb);
122        }
123    }
124
125    /// Update internal state for the requested region to prepare for a call to
126    /// [Self::iter_tokenized_lines_from].
127    pub fn update(&mut self, gb: &GapBuffer, from: usize, n_rows: usize) {
128        match &mut self.inner {
129            SyntaxStateInner::Ts(s) => s.update(gb, from, n_rows),
130            SyntaxStateInner::Re(s) => s.update(gb, from, n_rows),
131        }
132    }
133
134    /// Yield per-line [RangeToken]s for use in a UI impl to render the contents of the associated
135    /// buffer.
136    #[inline]
137    pub fn iter_tokenized_lines_from<'a>(
138        &'a self,
139        line: usize,
140        gb: &'a GapBuffer,
141        dot_range: Range,
142        load_exec_range: Option<(bool, Range)>,
143    ) -> LineIter<'a> {
144        match &self.inner {
145            SyntaxStateInner::Ts(s) => {
146                s.iter_tokenized_lines_from(line, gb, dot_range, load_exec_range)
147            }
148            SyntaxStateInner::Re(s) => {
149                s.iter_tokenized_lines_from(line, gb, dot_range, load_exec_range)
150            }
151        }
152    }
153
154    /// Return a string representation of the current syntax tree if possible
155    pub fn pretty_print_tree(&self) -> Option<String> {
156        match &self.inner {
157            SyntaxStateInner::Ts(s) => Some(s.pretty_print_tree()),
158            SyntaxStateInner::Re(_) => None,
159        }
160    }
161}
162
163/// Byte offsets within a Buffer
164#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
165pub(crate) struct ByteRange {
166    pub(crate) from: usize,
167    pub(crate) to: usize,
168}
169
170impl ByteRange {
171    fn from_range(r: Range, gb: &GapBuffer) -> Self {
172        let Range { start, mut end, .. } = r;
173
174        // For cursor ranges we don't highlight and for "real" ranges we need to
175        // insert the end of the range _after_ the end index.
176        if end.idx != start.idx {
177            end.idx += 1;
178        }
179
180        Self {
181            from: gb.char_to_byte(start.idx),
182            to: gb.char_to_byte(end.idx),
183        }
184    }
185
186    #[inline]
187    fn intersects(&self, start_byte: usize, end_byte: usize) -> bool {
188        self.from <= end_byte && start_byte <= self.to
189    }
190
191    #[inline]
192    fn contains(&self, start_byte: usize, end_byte: usize) -> bool {
193        self.from <= start_byte && self.to >= end_byte
194    }
195
196    /// Convert this [ByteRange] into a [RangeToken] if it intersects with the provided
197    /// start and end point.
198    fn try_as_token<'a>(
199        &self,
200        ty: &'a str,
201        start_byte: usize,
202        end_byte: usize,
203    ) -> Option<RangeToken<'a>> {
204        if self.intersects(start_byte, end_byte) {
205            Some(RangeToken {
206                tag: ty,
207                r: ByteRange {
208                    from: max(self.from, start_byte),
209                    to: min(self.to, end_byte),
210                },
211            })
212        } else {
213            None
214        }
215    }
216}
217
218/// A tagged [ByteRange] denoting which tree-sitter capture index from our scheme query
219/// matched this range within the buffer. A cap_idx of [None] indicates that this is a
220/// default range for the purposes of syntax highlighting
221#[derive(Debug, Clone, Copy, PartialEq, Eq)]
222pub(crate) struct SyntaxRange {
223    cap_idx: Option<usize>,
224    r: ByteRange,
225}
226
227#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub struct RangeToken<'a> {
229    pub(crate) tag: &'a str,
230    pub(crate) r: ByteRange,
231}
232
233impl RangeToken<'_> {
234    pub fn tag(&self) -> &str {
235        self.tag
236    }
237
238    pub fn as_slice<'a>(&self, gb: &'a GapBuffer) -> Slice<'a> {
239        gb.slice_from_byte_offsets(self.r.from, self.r.to)
240    }
241
242    #[inline]
243    fn split(self, at: usize) -> (Self, Self) {
244        (
245            RangeToken {
246                tag: self.tag,
247                r: ByteRange {
248                    from: self.r.from,
249                    to: at,
250                },
251            },
252            RangeToken {
253                tag: self.tag,
254                r: ByteRange {
255                    from: at,
256                    to: self.r.to,
257                },
258            },
259        )
260    }
261}
262
263impl PartialOrd for SyntaxRange {
264    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
265        Some(self.cmp(other))
266    }
267}
268
269impl Ord for SyntaxRange {
270    fn cmp(&self, other: &Self) -> Ordering {
271        self.r.cmp(&other.r)
272    }
273}
274
275/// Yield sub-iterators of tokens per-line in a file.
276///
277/// Any given SyntaxRange coming from the underlying Tokenizer may be
278/// used by multiple [TokenIter]s coming from this iterator if the range
279/// in question spans multiple lines
280#[derive(Debug)]
281pub struct LineIter<'a> {
282    /// capture names to be used as the token types
283    names: &'a [String],
284    /// the underlying buffer being iterated
285    gb: &'a GapBuffer,
286    /// full set of syntax ranges for the input
287    ranges: &'a [SyntaxRange],
288    /// the next line to yield
289    line: usize,
290    /// total number of lines (cached from gb.len_lines())
291    n_lines: usize,
292    dot_range: ByteRange,
293    load_exec_range: Option<(bool, ByteRange)>,
294}
295
296impl<'a> LineIter<'a> {
297    pub(crate) fn new(
298        line: usize,
299        gb: &'a GapBuffer,
300        dot_range: Range,
301        load_exec_range: Option<(bool, Range)>,
302        names: &'a [String],
303        ranges: &'a [SyntaxRange],
304    ) -> LineIter<'a> {
305        let dot_range = ByteRange::from_range(dot_range, gb);
306        let load_exec_range =
307            load_exec_range.map(|(is_load, r)| (is_load, ByteRange::from_range(r, gb)));
308
309        LineIter {
310            names,
311            gb,
312            ranges,
313            line,
314            n_lines: gb.len_lines(),
315            dot_range,
316            load_exec_range,
317        }
318    }
319}
320
321impl<'a> Iterator for LineIter<'a> {
322    type Item = TokenIter<'a>;
323
324    fn next(&mut self) -> Option<Self::Item> {
325        if self.line == self.n_lines {
326            return None;
327        }
328
329        let start_byte = self.gb.line_to_byte(self.line);
330        let end_byte = self.gb.line_end_byte(self.line);
331
332        self.line += 1;
333
334        // Determine tokens required for the next line
335        let held: Option<RangeToken<'_>>;
336        let ranges: Peekable<slice::Iter<'_, SyntaxRange>>;
337
338        let dot_range = self.dot_range.try_as_token(TK_DOT, start_byte, end_byte);
339        let load_exec_range = self.load_exec_range.and_then(|(is_load, br)| {
340            let ty = if is_load { TK_LOAD } else { TK_EXEC };
341            br.try_as_token(ty, start_byte, end_byte)
342        });
343
344        loop {
345            match self.ranges.first() {
346                // Advance to the next range
347                Some(sr) if sr.r.to < start_byte => {
348                    self.ranges = &self.ranges[1..];
349                }
350
351                // End of known tokens so everything else is just TK_DEFAULT
352                None => {
353                    held = Some(RangeToken {
354                        tag: TK_DEFAULT,
355                        r: ByteRange {
356                            from: start_byte,
357                            to: end_byte,
358                        },
359                    });
360                    ranges = [].iter().peekable();
361                    break;
362                }
363
364                // The next range is beyond this line
365                Some(sr) if sr.r.from >= end_byte => {
366                    held = Some(RangeToken {
367                        tag: TK_DEFAULT,
368                        r: ByteRange {
369                            from: start_byte,
370                            to: end_byte,
371                        },
372                    });
373                    ranges = [].iter().peekable();
374                    break;
375                }
376
377                // The next range fully contains the line
378                Some(sr) if sr.r.contains(start_byte, end_byte) => {
379                    held = Some(RangeToken {
380                        tag: sr
381                            .cap_idx
382                            .map(|i| self.names[i].as_ref())
383                            .unwrap_or(TK_DEFAULT),
384                        r: ByteRange {
385                            from: start_byte,
386                            to: end_byte,
387                        },
388                    });
389                    ranges = [].iter().peekable();
390                    break;
391                }
392
393                // The next range starts at the beginning of the line or ends within the line
394                Some(sr) => {
395                    assert!(sr.r.from < end_byte);
396                    if sr.r.from > start_byte {
397                        held = Some(RangeToken {
398                            tag: TK_DEFAULT,
399                            r: ByteRange {
400                                from: start_byte,
401                                to: sr.r.from,
402                            },
403                        });
404                    } else {
405                        held = None;
406                    }
407                    ranges = self.ranges.iter().peekable();
408                    break;
409                }
410            }
411        }
412
413        Some(TokenIter {
414            start_byte,
415            end_byte,
416            names: self.names,
417            ranges,
418            held,
419            dot_held: None,
420            dot_range,
421            load_exec_range,
422        })
423    }
424}
425
426type Rt<'a> = RangeToken<'a>;
427
428#[derive(Debug, PartialEq, Eq)]
429enum Held<'a> {
430    One(Rt<'a>),
431    Two(Rt<'a>, Rt<'a>),
432    Three(Rt<'a>, Rt<'a>, Rt<'a>),
433    Four(Rt<'a>, Rt<'a>, Rt<'a>, Rt<'a>),
434    Five(Rt<'a>, Rt<'a>, Rt<'a>, Rt<'a>, Rt<'a>),
435}
436
437impl Held<'_> {
438    fn byte_from_to(&self) -> (usize, usize) {
439        match self {
440            Held::One(a) => (a.r.from, a.r.to),
441            Held::Two(a, b) => (a.r.from, b.r.to),
442            Held::Three(a, _, b) => (a.r.from, b.r.to),
443            Held::Four(a, _, _, b) => (a.r.from, b.r.to),
444            Held::Five(a, _, _, _, b) => (a.r.from, b.r.to),
445        }
446    }
447
448    fn split(self, at: usize) -> (Self, Self) {
449        use Held::*;
450
451        match self {
452            One(a) => {
453                let (l, r) = a.split(at);
454                (One(l), One(r))
455            }
456
457            Two(a, b) => {
458                if at == a.r.to {
459                    (One(a), One(b))
460                } else if a.r.contains(at, at) {
461                    let (l, r) = a.split(at);
462                    (One(l), Two(r, b))
463                } else {
464                    let (l, r) = b.split(at);
465                    (Two(a, l), One(r))
466                }
467            }
468
469            Three(a, b, c) => {
470                if at == a.r.to {
471                    (One(a), Two(b, c))
472                } else if at == b.r.to {
473                    (Two(a, b), One(c))
474                } else if a.r.contains(at, at) {
475                    let (l, r) = a.split(at);
476                    (One(l), Three(r, b, c))
477                } else if b.r.contains(at, at) {
478                    let (l, r) = b.split(at);
479                    (Two(a, l), Two(r, c))
480                } else {
481                    let (l, r) = c.split(at);
482                    (Three(a, b, l), One(r))
483                }
484            }
485
486            Four(_, _, _, _) => unreachable!("only called for 1-3"),
487            Five(_, _, _, _, _) => unreachable!("only called for 1-3"),
488        }
489    }
490
491    fn join(self, other: Self) -> Self {
492        use Held::*;
493
494        match (self, other) {
495            (One(a), One(b)) => Two(a, b),
496            (One(a), Two(b, c)) => Three(a, b, c),
497            (One(a), Three(b, c, d)) => Four(a, b, c, d),
498            (One(a), Four(b, c, d, e)) => Five(a, b, c, d, e),
499
500            (Two(a, b), One(c)) => Three(a, b, c),
501            (Two(a, b), Two(c, d)) => Four(a, b, c, d),
502            (Two(a, b), Three(c, d, e)) => Five(a, b, c, d, e),
503
504            (Three(a, b, c), One(d)) => Four(a, b, c, d),
505            (Three(a, b, c), Two(d, e)) => Five(a, b, c, d, e),
506
507            (Four(a, b, c, d), One(e)) => Five(a, b, c, d, e),
508
509            _ => unreachable!("only have a max of 5 held"),
510        }
511    }
512}
513
514/// An iterator of tokens for a single line.
515///
516/// "default" ranges will be injected in-between the known syntax regions
517/// so a consumer may treat the output of this iterator as a continuous,
518/// non-overlapping set of sub-regions spanning a single line within a
519/// given buffer.
520#[derive(Debug)]
521pub struct TokenIter<'a> {
522    /// byte offset for the start of this line
523    start_byte: usize,
524    /// byte offset for the end of this line
525    end_byte: usize,
526    /// Capture names to be used as the token types
527    names: &'a [String],
528    /// The set of ranges applicable to this line
529    ranges: Peekable<slice::Iter<'a, SyntaxRange>>,
530    /// When yielding a dot range we may end up partially consuming
531    /// the following range so we need to stash a Token for yielding
532    /// on the next call to .next()
533    held: Option<RangeToken<'a>>,
534    dot_held: Option<Held<'a>>,
535    dot_range: Option<RangeToken<'a>>,
536    load_exec_range: Option<RangeToken<'a>>,
537}
538
539impl<'a> TokenIter<'a> {
540    fn next_without_selections(&mut self) -> Option<RangeToken<'a>> {
541        let held = self.held.take();
542        if held.is_some() {
543            return held;
544        }
545
546        let next = self.ranges.next()?;
547
548        if next.r.from > self.end_byte {
549            // Next available token is after this line and any 'default' held token will
550            // have been emitted above before we hit this point, so we're done.
551            return None;
552        } else if next.r.to >= self.end_byte {
553            // Last token runs until at least the end of this line so we just need to truncate
554            // to the end of the line and ensure that the following call to .next() returns None.
555            self.ranges = [].iter().peekable();
556
557            return Some(RangeToken {
558                tag: next
559                    .cap_idx
560                    .map(|i| self.names[i].as_ref())
561                    .unwrap_or(TK_DEFAULT),
562                r: ByteRange {
563                    from: max(next.r.from, self.start_byte),
564                    to: self.end_byte,
565                },
566            });
567        }
568
569        match self.ranges.peek() {
570            Some(sr) if sr.r.from > self.end_byte => {
571                self.ranges = [].iter().peekable();
572
573                self.held = Some(RangeToken {
574                    tag: TK_DEFAULT,
575                    r: ByteRange {
576                        from: next.r.to,
577                        to: self.end_byte,
578                    },
579                });
580            }
581
582            Some(sr) if sr.r.from > next.r.to => {
583                self.held = Some(RangeToken {
584                    tag: TK_DEFAULT,
585                    r: ByteRange {
586                        from: next.r.to,
587                        to: sr.r.from,
588                    },
589                });
590            }
591
592            None if next.r.to < self.end_byte => {
593                self.held = Some(RangeToken {
594                    tag: TK_DEFAULT,
595                    r: ByteRange {
596                        from: next.r.to,
597                        to: self.end_byte,
598                    },
599                });
600            }
601
602            _ => (),
603        }
604
605        Some(RangeToken {
606            tag: next
607                .cap_idx
608                .map(|i| self.names[i].as_ref())
609                .unwrap_or(TK_DEFAULT),
610            r: ByteRange {
611                from: max(next.r.from, self.start_byte),
612                to: next.r.to,
613            },
614        })
615    }
616
617    fn update_held(&mut self, mut held: Held<'a>, rt: RangeToken<'a>) -> Held<'a> {
618        let (self_from, self_to) = held.byte_from_to();
619        let (from, to) = (rt.r.from, rt.r.to);
620
621        match (from.cmp(&self_from), to.cmp(&self_to)) {
622            (Ordering::Less, _) => unreachable!("only called when rt >= self"),
623
624            (Ordering::Equal, Ordering::Less) => {
625                // hold rt then remaining of held
626                let (_, r) = held.split(to);
627                held = Held::One(rt).join(r);
628            }
629
630            (Ordering::Greater, Ordering::Less) => {
631                // hold held up to rt, rt & held from rt
632                let (l, r) = held.split(from);
633                let (_, r) = r.split(to);
634                held = l.join(Held::One(rt)).join(r);
635            }
636
637            (Ordering::Equal, Ordering::Equal) => {
638                // replace held with rt
639                held = Held::One(rt);
640            }
641
642            (Ordering::Greater, Ordering::Equal) => {
643                // hold held to rt & rt
644                let (l, _) = held.split(from);
645                held = l.join(Held::One(rt));
646            }
647
648            (Ordering::Equal, Ordering::Greater) => {
649                // hold rt, consume to find other held tokens (if any)
650                held = self.find_end_of_selection(Held::One(rt), to);
651            }
652
653            (Ordering::Greater, Ordering::Greater) => {
654                // hold held to rt & rt, consume to find other held tokens (if any)
655                let (l, _) = held.split(from);
656                held = self.find_end_of_selection(l.join(Held::One(rt)), to);
657            }
658        }
659
660        held
661    }
662
663    fn find_end_of_selection(&mut self, mut held: Held<'a>, to: usize) -> Held<'a> {
664        loop {
665            let mut next = match self.next_without_selections() {
666                None => break,
667                Some(next) => next,
668            };
669            if next.r.to <= to {
670                continue; // token is entirely within rt
671            }
672            next.r.from = to;
673            held = held.join(Held::One(next));
674            break;
675        }
676
677        held
678    }
679
680    fn pop(&mut self) -> Option<RangeToken<'a>> {
681        match self.dot_held {
682            None => None,
683            Some(Held::One(a)) => {
684                self.dot_held = None;
685                Some(a)
686            }
687            Some(Held::Two(a, b)) => {
688                self.dot_held = Some(Held::One(b));
689                Some(a)
690            }
691            Some(Held::Three(a, b, c)) => {
692                self.dot_held = Some(Held::Two(b, c));
693                Some(a)
694            }
695            Some(Held::Four(a, b, c, d)) => {
696                self.dot_held = Some(Held::Three(b, c, d));
697                Some(a)
698            }
699            Some(Held::Five(a, b, c, d, e)) => {
700                self.dot_held = Some(Held::Four(b, c, d, e));
701                Some(a)
702            }
703        }
704    }
705}
706
707impl<'a> Iterator for TokenIter<'a> {
708    type Item = RangeToken<'a>;
709
710    fn next(&mut self) -> Option<Self::Item> {
711        // Emit pre-computed held tokens first
712        let next = self.pop();
713        if next.is_some() {
714            return next;
715        }
716
717        // Determine the next token we would emit in the absence of any user selections and then
718        // apply the selections in priority order:
719        //   - dot overwrites original syntax highlighting
720        //   - load/exec overwrite dot
721        #[inline]
722        fn intersects(opt: &Option<RangeToken<'_>>, from: usize, to: usize) -> bool {
723            opt.as_ref()
724                .map(|rt| rt.r.intersects(from, to))
725                .unwrap_or(false)
726        }
727
728        let next = self.next_without_selections()?;
729        let (from, to) = (next.r.from, next.r.to);
730        let mut held = Held::One(next);
731
732        if intersects(&self.dot_range, from, to) {
733            let r = self.dot_range.take().unwrap();
734            held = self.update_held(held, r);
735        }
736
737        let (from, to) = held.byte_from_to();
738        if intersects(&self.load_exec_range, from, to) {
739            let r = self.load_exec_range.take().unwrap();
740            held = self.update_held(held, r);
741        }
742
743        if let Held::One(rt) = held {
744            Some(rt) // held_dot is None so just return the token directly
745        } else {
746            self.dot_held = Some(held);
747            self.pop()
748        }
749    }
750}
751
752#[cfg(test)]
753mod tests {
754    use super::*;
755    use simple_test_case::test_case;
756
757    fn sr(from: usize, to: usize) -> SyntaxRange {
758        SyntaxRange {
759            cap_idx: Some(0),
760            r: ByteRange { from, to },
761        }
762    }
763
764    fn rt_def(from: usize, to: usize) -> RangeToken<'static> {
765        RangeToken {
766            tag: TK_DEFAULT,
767            r: ByteRange { from, to },
768        }
769    }
770
771    fn rt_dot(from: usize, to: usize) -> RangeToken<'static> {
772        RangeToken {
773            tag: TK_DOT,
774            r: ByteRange { from, to },
775        }
776    }
777
778    fn rt_exe(from: usize, to: usize) -> RangeToken<'static> {
779        RangeToken {
780            tag: TK_EXEC,
781            r: ByteRange { from, to },
782        }
783    }
784
785    fn rt_str(from: usize, to: usize) -> RangeToken<'static> {
786        RangeToken {
787            tag: "string",
788            r: ByteRange { from, to },
789        }
790    }
791
792    // range at start of single token
793    #[test_case(
794        Held::One(rt_str(0, 5)),
795        None,
796        rt_dot(0, 5),
797        &[sr(10, 15)],
798        Held::One(rt_dot(0, 5));
799        "held one range matches held"
800    )]
801    #[test_case(
802        Held::One(rt_str(0, 5)),
803        None,
804        rt_dot(0, 3),
805        &[sr(10, 15)],
806        Held::Two(rt_dot(0, 3), rt_str(3, 5));
807        "held one range start to within held"
808    )]
809    #[test_case(
810        Held::One(rt_str(0, 5)),
811        Some(rt_def(5, 10)),
812        rt_dot(0, 7),
813        &[sr(10, 15), sr(20, 30)],
814        Held::Two(rt_dot(0, 7), rt_def(7, 10));
815        "held one range start to past held but before next token"
816    )]
817    #[test_case(
818        Held::One(rt_str(0, 5)),
819        Some(rt_def(5, 10)),
820        rt_dot(0, 13),
821        &[sr(10, 15), sr(20, 30)],
822        Held::Two(rt_dot(0, 13), rt_str(13, 15));
823        "held one range start to into next token"
824    )]
825    #[test_case(
826        Held::One(rt_str(0, 5)),
827        Some(rt_def(5, 10)),
828        rt_dot(0, 16),
829        &[sr(10, 15), sr(20, 30)],
830        Held::Two(rt_dot(0, 16), rt_def(16, 20));
831        "held one range start to past next token"
832    )]
833    // range within single token
834    #[test_case(
835        Held::One(rt_str(0, 5)),
836        None,
837        rt_dot(3, 5),
838        &[sr(10, 15)],
839        Held::Two(rt_str(0, 3), rt_dot(3, 5));
840        "held one range from within to end of held"
841    )]
842    #[test_case(
843        Held::One(rt_str(0, 5)),
844        None,
845        rt_dot(2, 4),
846        &[sr(10, 15)],
847        Held::Three(rt_str(0, 2), rt_dot(2, 4), rt_str(4, 5));
848        "held one range with to within held"
849    )]
850    #[test_case(
851        Held::One(rt_str(0, 5)),
852        Some(rt_def(5, 10)),
853        rt_dot(3, 7),
854        &[sr(10, 15), sr(20, 30)],
855        Held::Three(rt_str(0, 3), rt_dot(3, 7), rt_def(7, 10));
856        "held one range within to past held but before next token"
857    )]
858    #[test_case(
859        Held::One(rt_str(0, 5)),
860        Some(rt_def(5, 10)),
861        rt_dot(3, 13),
862        &[sr(10, 15), sr(20, 30)],
863        Held::Three(rt_str(0, 3), rt_dot(3, 13), rt_str(13, 15));
864        "held one range within to into next token"
865    )]
866    #[test_case(
867        Held::One(rt_str(0, 5)),
868        Some(rt_def(5, 10)),
869        rt_dot(3, 16),
870        &[sr(10, 15), sr(20, 30)],
871        Held::Three(rt_str(0, 3), rt_dot(3, 16), rt_def(16, 20));
872        "held one range within to past next token"
873    )]
874    // held 2 tokens
875    #[test_case(
876        Held::Two(rt_str(0, 3), rt_dot(3, 5)),
877        None,
878        rt_exe(0, 5),
879        &[sr(10, 15)],
880        Held::One(rt_exe(0, 5));
881        "held two range matches all held"
882    )]
883    #[test_case(
884        Held::Two(rt_str(0, 3), rt_dot(3, 5)),
885        None,
886        rt_exe(2, 5),
887        &[sr(10, 15)],
888        Held::Two(rt_str(0, 2), rt_exe(2, 5));
889        "held two range from within first to end of held"
890    )]
891    #[test_case(
892        Held::Two(rt_str(0, 3), rt_dot(3, 5)),
893        None,
894        rt_exe(4, 5),
895        &[sr(10, 15)],
896        Held::Three(rt_str(0, 3), rt_dot(3, 4), rt_exe(4, 5));
897        "held two range from within second to end of held"
898    )]
899    #[test_case(
900        Held::Two(rt_str(0, 3), rt_dot(3, 5)),
901        Some(rt_def(5, 10)),
902        rt_exe(4, 8),
903        &[sr(10, 15)],
904        Held::Four(rt_str(0, 3), rt_dot(3, 4), rt_exe(4, 8), rt_def(8, 10));
905        "held two range from within second past end of held"
906    )]
907    // held 3 tokens
908    #[test_case(
909        Held::Three(rt_str(0, 3), rt_dot(3, 5), rt_str(5, 8)),
910        None,
911        rt_exe(0, 8),
912        &[sr(10, 15)],
913        Held::One(rt_exe(0, 8));
914        "held three range matches all held"
915    )]
916    #[test_case(
917        Held::Three(rt_str(0, 3), rt_dot(3, 5), rt_str(5, 8)),
918        None,
919        rt_exe(2, 8),
920        &[sr(10, 15)],
921        Held::Two(rt_str(0, 2), rt_exe(2, 8));
922        "held three range from within first to end of held"
923    )]
924    #[test_case(
925        Held::Three(rt_str(0, 3), rt_dot(3, 5), rt_str(5, 8)),
926        None,
927        rt_exe(4, 8),
928        &[sr(10, 15)],
929        Held::Three(rt_str(0, 3), rt_dot(3, 4), rt_exe(4, 8));
930        "held three range from within second to end of held"
931    )]
932    #[test_case(
933        Held::Three(rt_str(0, 3), rt_dot(3, 6), rt_str(6, 9)),
934        None,
935        rt_exe(4, 5),
936        &[sr(10, 15)],
937        Held::Five(rt_str(0, 3), rt_dot(3, 4), rt_exe(4, 5), rt_dot(5, 6), rt_str(6, 9));
938        "held three range from within second"
939    )]
940    #[test]
941    fn update_held(
942        initial: Held<'static>,
943        held: Option<RangeToken<'static>>,
944        r: RangeToken<'static>,
945        ranges: &[SyntaxRange],
946        expected: Held<'static>,
947    ) {
948        let mut it = TokenIter {
949            start_byte: 0,
950            end_byte: 42,
951            names: &["string".to_string()],
952            ranges: ranges.iter().peekable(),
953            held,
954            dot_held: None,
955            dot_range: None,
956            load_exec_range: None,
957        };
958
959        let held = it.update_held(initial, r);
960
961        assert_eq!(held, expected);
962    }
963}