word_filter 0.8.1

A Word Filter for filtering text.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
//! Internal structs for the push-down automaton system.
//!
//! The [`WordFilter`] is internally a nondeterministic push-down automaton. The structs here
//! define the various parts of the system, including states, state types, transitions between
//! states, and stack manipulations. Additionally, instantaneous descriptions are defined to be
//! used during computation.
//!
//! Some of the structs here are made publicly visible to allow for access by code generation.
//! However, none of the structs here should be relied upon. They are not guaranteed by semver and
//! may change at any time.

#![doc(hidden)]

use alloc::{vec, vec::Vec};
use bitflags::bitflags;
use by_address::ByAddress;
use const_fn_assert::{cfn_assert, cfn_assert_eq};
use core::{
    ops::{Bound, RangeBounds},
    ptr,
};
use debug_unreachable::debug_unreachable;
use hashbrown::HashSet;

bitflags! {
    /// Bitflags that define attributes on a [`State`].
    ///
    /// These flags define boolean attributes on a `State`. Multiple flags may be set at the same
    /// time.
    pub struct Flags: u8 {
        /// The state is a matching state, matching a word.
        ///
        /// If this flag is set, a word should be stored within the state as well.
        ///
        /// This flag cannot be set if `EXCEPTION` is set.
        const WORD = 0b0000_0001;
        /// The state is a matching state, matching an exception.
        ///
        /// This flag cannot be set if `WORD` is set.
        const EXCEPTION = 0b0000_0010;
        /// This is a separator state, existing within a separator subroutine.
        const SEPARATOR = 0b0000_0100;
        /// [`InstantaneousDescription`]s can return from this state.
        const RETURN = 0b0000_1000;
        /// This state can be repeated to.
        const INTO_REPETITION = 0b0001_0000;
        /// This state can process a repetition on the stack.
        const TAKE_REPETITION = 0b0010_0000;
        /// This state can enter a separator subroutine.
        const INTO_SEPARATOR = 0b0100_0000;

        /// This state is an accepting state.
        ///
        /// This is the same as saying the state is a matching state. A state should not be set
        /// with these flags, as it would set both the `WORD` and `EXCEPTION` bits, which is not
        /// valid for constructing a state.
        const ACCEPTING = Flags::WORD.bits() | Flags::EXCEPTION.bits();
    }
}

impl Default for Flags {
    fn default() -> Self {
        Self::empty()
    }
}

/// Attributes of a [`State`].
///
/// Contains binary flags and an optional string containing the `State`'s matched word.
///
/// Having these attributes stored together ensures that invariants can be upheld on the flags and
/// the associated word. The `WORD` flag will invariantly be set when the `word` field is not
/// `None`, and the `WORD` and `EXCEPTION` flags will never be set at the same time.
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
pub struct Attributes<'a> {
    /// Flags defining binary attributes.
    flags: Flags,
    /// A possible associated word with the state.
    ///
    /// This will be provided if and only if the `WORD` flag is present in `flags`.
    word: Option<&'a str>,
}

impl<'a> Attributes<'a> {
    /// Create a new `Attributes` struct containing the given `flags` and `word`.
    ///
    /// This associated function checks that the following invariants are upheld:
    /// - If the `WORD` flag is set, then `word` must not be `None`.
    /// - The `WORD` and `EXCEPTION` flags cannot both be set.
    ///
    /// If a strange error is encountered within this method, it is likely one of these invariants
    /// is not upheld. The strangeness of the error is consequential of the difficulty is asserting
    /// values at compile-time.
    pub const fn new(flags: Flags, word: Option<&'a str>) -> Self {
        // If this errors with some weird error, it means that the invariant between the WORD flag
        // and the `word` attribute is not upheld.
        cfn_assert_eq!(flags.contains(Flags::WORD), word.is_some());

        // Same as above, but with regards to only one accepting flag.
        cfn_assert!(!flags.contains(Flags::ACCEPTING));

        Self { flags, word }
    }

    /// Merge another `Attributes` into `self`.
    #[inline]
    pub(crate) fn merge(&mut self, other: Attributes<'a>) {
        if self.flags.contains(Flags::WORD) {
            assert!(!other.flags.contains(Flags::EXCEPTION));
            assert!(!other.flags.contains(Flags::WORD) || other.word == self.word);
        }
        if self.flags.contains(Flags::EXCEPTION) {
            assert!(!other.flags.contains(Flags::WORD));
        }
        self.flags.insert(other.flags);
        self.word = other.word;
    }

    /// Returns the `flag` field.
    #[inline]
    pub(crate) fn flags(&self) -> Flags {
        self.flags
    }

    /// Returns the `word` field.
    #[inline]
    pub(crate) fn word(&self) -> Option<&str> {
        self.word
    }

    /// Returns whether the `WORD` flag is set.
    #[inline]
    fn is_word(&self) -> bool {
        self.flags.contains(Flags::WORD)
    }

    /// Returns whether the `EXCEPTION` flag is set.
    #[inline]
    fn is_exception(&self) -> bool {
        self.flags.contains(Flags::EXCEPTION)
    }

    /// Returns whether the `SEPARATOR` flag is set.
    #[inline]
    fn is_separator(&self) -> bool {
        self.flags.contains(Flags::SEPARATOR)
    }

    /// Returns whether the `RETURN` flag is set.
    #[inline]
    fn is_return(&self) -> bool {
        self.flags.contains(Flags::RETURN)
    }

    /// Returns whether the `INTO_REPETITION` flag is set.
    #[inline]
    fn is_into_repetition(&self) -> bool {
        self.flags.contains(Flags::INTO_REPETITION)
    }

    /// Unsets the `INTO_REPETITION` flag, if set.
    #[inline]
    pub(crate) fn remove_into_repetition(&mut self) {
        self.flags.remove(Flags::INTO_REPETITION)
    }

    /// Returns whether the `TAKE_REPETITION` flag is set.
    #[inline]
    fn is_take_repetition(&self) -> bool {
        self.flags.contains(Flags::TAKE_REPETITION)
    }

    /// Unsets the `TAKE_REPETITION` flag, if set.
    #[inline]
    pub(crate) fn remove_take_repetition(&mut self) {
        self.flags.remove(Flags::TAKE_REPETITION)
    }

    /// Returns whether the `INTO_SEPARATOR` flag is set.
    #[inline]
    fn is_into_separator(&self) -> bool {
        self.flags.contains(Flags::INTO_SEPARATOR)
    }

    /// Unsets the `INTO_SEPARATOR` flag, if set.
    #[inline]
    pub(crate) fn remove_into_separator(&mut self) {
        self.flags.remove(Flags::INTO_SEPARATOR)
    }

    /// Returns whether one of the `WORD` or `EXCEPTION` flags are set.
    #[inline]
    fn accepting(&self) -> bool {
        self.flags.intersects(Flags::ACCEPTING)
    }
}

/// Stack-related enums.
mod stack {
    use super::State;

    /// A value on the stack.
    #[derive(Clone, Debug)]
    pub(super) enum Value<'a> {
        /// Indicates the absence of a value.
        ///
        /// This is used when the stack is empty.
        None,
        /// A return state.
        ///
        /// States stored here are returned to at `Return` or `SeparatorReturn` nodes.
        Return(&'a State<'a>),
        /// A target state.
        ///
        /// States stored here must be hit before they are popped. These are pushed in repetition
        /// handling to ensure the same path is repeated.
        Target(&'a State<'a>),
        Repetition(&'a State<'a>),
    }

    /// Defines a manipulation of the stack.
    #[derive(Debug)]
    pub(super) enum Manipulation<'a> {
        /// Pushes a value to the stack.
        Push(Value<'a>),
        /// Pops the top value of the stack.
        Pop,
    }
}

/// A transition between states.
#[derive(Debug)]
struct Transition<'a> {
    /// The state being transitioned to.
    state: &'a State<'a>,
    /// Manipulations to the stack that should occur if this transition is taken.
    stack_manipulations: Vec<stack::Manipulation<'a>>,
    took_repetition: bool,
}

/// A valid state within the push-down automaton.
///
/// This struct contains information about the state and the transitions that can be made from it.
///
/// `c_transitions` and `repetition` define character transitions, while `separator`, `aliases`,
/// and `graphemes` define ε-transitions.
#[derive(Debug)]
pub struct State<'a> {
    /// The state's associated attributes.
    pub attributes: Attributes<'a>,
    /// Direct character transitions.
    ///
    /// Each character can only transition to one other state directly.
    pub c_transitions: fn(char) -> Option<&'a State<'a>>,
    /// Alias states and their accompanying return states.
    ///
    /// These are pairs of the form (alias_state, return_state). When computation traversed to
    /// `alias_state`, `return_state` should be pushed to the stack.
    pub aliases: &'a [(&'a State<'a>, &'a State<'a>)],
    /// Grapheme states.
    ///
    /// These are states that are traversed to from this state via ε-transitions. They are still
    /// direct paths from this state, but traverse down grapheme paths which must be handled
    /// different from c_transitions.
    pub graphemes: &'a [&'a State<'a>],
}

impl<'a> State<'a> {
    /// Returns whether the state can be repeated to.
    #[inline]
    fn is_into_repetition(&self) -> bool {
        self.attributes.is_into_repetition()
    }

    /// Returns whether the state can process a repetition on the stack.
    #[inline]
    fn is_take_repetition(&self) -> bool {
        self.attributes.is_take_repetition()
    }

    /// Returns whether the state can enter a separator.
    #[inline]
    fn is_into_separator(&self) -> bool {
        self.attributes.is_into_separator()
    }

    /// Transition using the given input character `c` with the top-of-stack value `s`.
    ///
    /// To perform an ε-transition, a `None` value should be provided for the parameter `c`.
    #[inline]
    fn transitions(
        &'a self,
        c: Option<char>,
        s: stack::Value<'a>,
        separator: &'a State<'a>,
    ) -> Vec<Transition<'a>> {
        let mut result = Vec::new();

        match s {
            stack::Value::Repetition(repetition_state) => {
                match c {
                    Some(c) => {
                        if !self.is_take_repetition() {
                            if let Some(state) = (self.c_transitions)(c) {
                                result.push(Transition {
                                    state,
                                    stack_manipulations: vec![],
                                    took_repetition: false,
                                });
                                if self.is_into_repetition() {
                                    if let Some(future_same_c_state) = (state.c_transitions)(c) {
                                        if !state.is_into_repetition()
                                            || !future_same_c_state.is_take_repetition()
                                        {
                                            result.push(Transition {
                                                state,
                                                stack_manipulations: vec![
                                                    stack::Manipulation::Push(
                                                        stack::Value::Repetition(self),
                                                    ),
                                                ],
                                                took_repetition: false,
                                            });
                                        }
                                    } else {
                                        result.push(Transition {
                                            state,
                                            stack_manipulations: vec![stack::Manipulation::Push(
                                                stack::Value::Repetition(self),
                                            )],
                                            took_repetition: false,
                                        });
                                    }
                                }
                            }
                        }
                    }
                    None => {
                        if self.is_into_separator() {
                            result.push(Transition {
                                state: separator,
                                stack_manipulations: vec![stack::Manipulation::Push(
                                    stack::Value::Return(self),
                                )],
                                took_repetition: false,
                            });
                        }
                        if self.is_take_repetition() {
                            // Take the repetition.
                            result.push(Transition {
                                state: repetition_state,
                                stack_manipulations: vec![
                                    stack::Manipulation::Pop,
                                    stack::Manipulation::Push(stack::Value::Target(self)),
                                ],
                                took_repetition: true,
                            })
                        } else {
                            for alias in self.aliases {
                                result.push(Transition {
                                    state: alias.0,
                                    stack_manipulations: vec![stack::Manipulation::Push(
                                        stack::Value::Return(alias.1),
                                    )],
                                    took_repetition: false,
                                });
                                if self.is_into_repetition() {
                                    result.push(Transition {
                                        state: alias.0,
                                        stack_manipulations: vec![
                                            stack::Manipulation::Push(stack::Value::Repetition(
                                                self,
                                            )),
                                            stack::Manipulation::Push(stack::Value::Return(
                                                alias.1,
                                            )),
                                        ],
                                        took_repetition: false,
                                    })
                                }
                            }
                            for grapheme in self.graphemes {
                                result.push(Transition {
                                    state: grapheme,
                                    stack_manipulations: vec![],
                                    took_repetition: false,
                                });
                            }
                        }
                    }
                }
            }
            stack::Value::Target(target_state) => match c {
                Some(c) => {
                    if let Some(state) = (self.c_transitions)(c) {
                        if state.is_take_repetition() {
                            if ptr::eq(state, target_state) {
                                result.push(Transition {
                                    state,
                                    stack_manipulations: vec![stack::Manipulation::Pop],
                                    took_repetition: false,
                                });
                                if self.is_into_repetition() {
                                    if let Some(future_same_c_state) = (state.c_transitions)(c) {
                                        if !state.is_into_repetition()
                                            || !future_same_c_state.is_take_repetition()
                                        {
                                            result.push(Transition {
                                                state,
                                                stack_manipulations: vec![
                                                    stack::Manipulation::Pop,
                                                    stack::Manipulation::Push(
                                                        stack::Value::Repetition(self),
                                                    ),
                                                ],
                                                took_repetition: false,
                                            });
                                        }
                                    } else {
                                        result.push(Transition {
                                            state,
                                            stack_manipulations: vec![
                                                stack::Manipulation::Pop,
                                                stack::Manipulation::Push(
                                                    stack::Value::Repetition(self),
                                                ),
                                            ],
                                            took_repetition: false,
                                        });
                                    }
                                }
                            }
                        } else {
                            result.push(Transition {
                                state,
                                stack_manipulations: vec![],
                                took_repetition: false,
                            });
                            if self.is_into_repetition() {
                                result.push(Transition {
                                    state,
                                    stack_manipulations: vec![stack::Manipulation::Push(
                                        stack::Value::Repetition(self),
                                    )],
                                    took_repetition: false,
                                });
                            }
                        }
                    }
                }
                None => {
                    for alias in self.aliases {
                        if ptr::eq(alias.1, target_state) {
                            result.push(Transition {
                                state: alias.0,
                                stack_manipulations: vec![
                                    stack::Manipulation::Pop,
                                    stack::Manipulation::Push(stack::Value::Return(alias.1)),
                                ],
                                took_repetition: false,
                            });
                            if self.is_into_repetition() {
                                result.push(Transition {
                                    state: alias.0,
                                    stack_manipulations: vec![
                                        stack::Manipulation::Pop,
                                        stack::Manipulation::Push(stack::Value::Repetition(self)),
                                        stack::Manipulation::Push(stack::Value::Return(alias.1)),
                                    ],
                                    took_repetition: false,
                                });
                            }
                        }
                    }
                }
            },
            _ => match c {
                Some(c) => {
                    if let Some(state) = (self.c_transitions)(c) {
                        result.push(Transition {
                            state,
                            stack_manipulations: vec![],
                            took_repetition: false,
                        });
                        if self.is_into_repetition() {
                            if let Some(future_same_c_state) = (state.c_transitions)(c) {
                                if !state.is_into_repetition()
                                    || !future_same_c_state.is_take_repetition()
                                {
                                    result.push(Transition {
                                        state,
                                        stack_manipulations: vec![stack::Manipulation::Push(
                                            stack::Value::Repetition(self),
                                        )],
                                        took_repetition: false,
                                    });
                                }
                            } else {
                                result.push(Transition {
                                    state,
                                    stack_manipulations: vec![stack::Manipulation::Push(
                                        stack::Value::Repetition(self),
                                    )],
                                    took_repetition: false,
                                });
                            }
                        }
                    }
                }
                None => {
                    if self.is_into_separator() {
                        result.push(Transition {
                            state: separator,
                            stack_manipulations: vec![stack::Manipulation::Push(
                                stack::Value::Return(self),
                            )],
                            took_repetition: false,
                        });
                    }
                    for alias in self.aliases {
                        result.push(Transition {
                            state: alias.0,
                            stack_manipulations: vec![stack::Manipulation::Push(
                                stack::Value::Return(alias.1),
                            )],
                            took_repetition: false,
                        });
                        if self.is_into_repetition() {
                            result.push(Transition {
                                state: alias.0,
                                stack_manipulations: vec![
                                    stack::Manipulation::Push(stack::Value::Repetition(self)),
                                    stack::Manipulation::Push(stack::Value::Return(alias.1)),
                                ],
                                took_repetition: false,
                            });
                        }
                    }
                    for grapheme in self.graphemes {
                        result.push(Transition {
                            state: grapheme,
                            stack_manipulations: vec![],
                            took_repetition: false,
                        });
                    }

                    if self.attributes.is_return() {
                        if let stack::Value::Return(state) = s {
                            result.push(Transition {
                                state,
                                stack_manipulations: vec![stack::Manipulation::Pop],
                                took_repetition: false,
                            });
                        }
                    }
                }
            },
        }

        result
    }
}

/// An instantaneous description (ID) of a specific instant in computation.
///
/// An instantaneous description stored the current state, the current stack, and the range of
/// characters which have been traversed.
///
/// During computation, multiple IDs will exist at the same time, representing the different paths
/// that are being traversed simultaneously due to the nondeterministic nature of the push-down
/// automaton.
#[derive(Clone, Debug)]
pub(crate) struct InstantaneousDescription<'a> {
    /// The current state.
    pub state: &'a State<'a>,
    /// The current stack.
    stack: Vec<stack::Value<'a>>,
    /// The index within the input where this computation started.
    start: usize,
    /// The current end index, marking the range of input that has been computed.
    end: usize,
    /// Whether the computation is within a separator grapheme.
    ///
    /// A separator grapheme is defined as a grapheme that starts on a Separator or SeparatorReturn
    /// state.
    separator_grapheme: bool,
    took_repetition: bool,
}

impl<'a> InstantaneousDescription<'a> {
    /// Creates a new Instantaneous Description, starting from `state` with computation beginning
    /// at index `start` in the input.
    pub(crate) fn new(state: &'a State<'a>, start: usize) -> Self {
        Self {
            state,
            stack: Vec::new(),
            start,
            end: start,
            separator_grapheme: false,
            took_repetition: false,
        }
    }

    /// Return whether the instantaneous description is an accepting state.
    ///
    /// An Instantaneous Description is accepting if it has an accepting state (Word or Exception),
    /// if the stack is empty, and if the computation is not currently within a separator grapheme.
    #[inline]
    pub(crate) fn is_accepting(&self) -> bool {
        self.state.attributes.accepting() && self.stack.is_empty() && !self.separator_grapheme
    }

    /// Return whether the state is a word.
    #[inline]
    pub(crate) fn is_word(&self) -> bool {
        self.state.attributes.is_word()
    }

    /// Unwrap the word that is contained in the state's type.
    ///
    /// This is undefined behavior if the state's type is not Word. This must be checked prior to
    /// calling.
    #[inline]
    pub(crate) unsafe fn unwrap_word_unchecked(self) -> &'a str {
        match self.state.attributes.word {
            Some(word) => word,
            None => debug_unreachable!(),
        }
    }

    /// Return the start index.
    #[inline]
    pub(crate) fn start(&self) -> usize {
        self.start
    }

    /// Return the end index.
    #[inline]
    pub(crate) fn end(&self) -> usize {
        self.end
    }

    /// Internal transition method, with visited context.
    ///
    /// This allows transitions to keep track of which states have already been visited to prevent
    /// getting stuck in infinite loops.
    fn transition_with_visited(
        &self,
        c: Option<char>,
        separator: &'a State<'a>,
        visited: &mut HashSet<ByAddress<&State<'a>>>,
    ) -> impl Iterator<Item = InstantaneousDescription<'a>> {
        let mut new_ids = Vec::new();
        for transition in self
            .state
            .transitions(
                c,
                self.stack.last().unwrap_or(&stack::Value::None).clone(),
                separator,
            )
            .iter()
        {
            if !visited.contains(&ByAddress(transition.state))
                || transition.state.attributes.is_return()
            {
                let mut new_id = self.clone();
                new_id.state = transition.state;
                for manipulation in &transition.stack_manipulations {
                    match manipulation {
                        stack::Manipulation::Push(value) => new_id.stack.push(value.clone()),
                        stack::Manipulation::Pop => {
                            new_id.stack.pop();
                        }
                    }
                }
                if transition.took_repetition {
                    new_id.took_repetition = true;
                }
                // ε-transitions.
                visited.insert(ByAddress(transition.state));
                new_ids.extend(new_id.transition_with_visited(None, separator, visited));
                visited.remove(&ByAddress(transition.state));

                new_ids.push(new_id);
            }
        }
        new_ids.into_iter()
    }

    /// Transition using the input character `c`.
    ///
    /// If an ε-transition is desired, `None` should be provided for `c`.
    #[inline]
    pub(crate) fn transition(
        &self,
        c: Option<char>,
        separator: &'a State<'a>,
    ) -> impl Iterator<Item = InstantaneousDescription<'a>> {
        self.transition_with_visited(c, separator, &mut HashSet::new())
    }

    /// Step along the input `c`.
    pub(crate) fn step(
        mut self,
        c: char,
        separator: &'a State<'a>,
        new_grapheme: bool,
    ) -> impl Iterator<Item = InstantaneousDescription<'a>> {
        self.end += c.len_utf8();
        if new_grapheme {
            self.separator_grapheme = if self.separator_grapheme && self.took_repetition {
                true
            } else {
                self.state.attributes.is_separator()
            };
        }
        self.took_repetition = false;
        self.transition(Some(c), separator)
    }
}

/// Define RangeBounds for an Instantaneous Description.
///
/// This defines the range of input that was consumed by the ID. This is useful for nesting the IDs
/// using a nested containment list, which requires that RangeBounds are defined.
impl RangeBounds<usize> for InstantaneousDescription<'_> {
    /// The start bound, which is always inclusive.
    #[inline]
    fn start_bound(&self) -> Bound<&usize> {
        Bound::Included(&self.start)
    }

    /// The end bound. This is always exclusive, except when the state's type is Exception, in
    /// which case it is inclusive. This is to ensure that Exceptions take precedence over Words.
    #[inline]
    fn end_bound(&self) -> Bound<&usize> {
        if self.state.attributes.is_exception() {
            Bound::Included(&self.end)
        } else {
            Bound::Excluded(&self.end)
        }
    }
}