Skip to main content

parol/analysis/
k_tuple.rs

1use parol_runtime::TerminalIndex;
2
3use crate::analysis::compiled_terminal::{EPS, INVALID};
4use crate::{CompiledTerminal, MAX_K};
5use std::fmt::{Debug, Display, Error, Formatter};
6use std::hash::{Hash, Hasher};
7
8const EOI: TerminalIndex = 0;
9const NEW_LINE: TerminalIndex = 1;
10const WHITESPACE: TerminalIndex = 2;
11const LINE_COMMENT: TerminalIndex = 3;
12const BLOCK_COMMENT: TerminalIndex = 4;
13
14/// Common functions needed for terminal handling
15pub trait TerminalMappings<T> {
16    /// Create an epsilon representation
17    fn eps() -> T;
18    /// Create an end-of-input representation
19    fn end() -> T;
20    /// Check for epsilon
21    fn is_eps(&self) -> bool;
22    /// Check for end-of-input
23    fn is_end(&self) -> bool;
24    /// Check for invalid (i.e. unassigned) terminal
25    fn is_inv(&self) -> bool;
26}
27
28/// When storing MAX_K terminals in 128 bits, the maximum number of bits used per terminal is 12.
29const MAX_BITS: u8 = (std::mem::size_of::<u128>() * 8) as u8 / MAX_K as u8;
30
31/// A collection of terminals
32///
33/// The terminals are stored in a 128 bit integer where each terminal is stored in a fixed number of
34/// bits. The number of bits is determined by the number of terminals to store.
35/// The maximum number of terminals when storing MAX_K terminals in 128 bits is:
36/// 128 / MAX_K = 128 / 10 = 12.8 => 12 bits
37/// The maximum number of terminals that can be stored is 2^12 = 4096.
38/// The maximum value of the bit count is therefore 12 and can safely be stored in four bits.
39/// We store a mask to more easily extract the terminals from the 128 bits unsigned integer.
40/// The mask to extract single terminals from the 128 bit unsigned integer is calculated as
41/// 2^bits - 1 that is equivalent to the expression !(!0u128 << bits) at runtime.
42///
43/// Since we use only 120 bits to store the terminals, we have 8 bits left. We use the 8 bits to
44/// store the index of the next insertion as well as the bit count used to calculate the mask.
45/// Therefore we split the highest 8 bits of the 128 bits unsigned integer as follows:
46/// - The higher 4 bits are used to store the number of bits used per terminal
47/// - The lower 4 bits are used to store the index of the next insertion
48#[derive(Clone, Copy, Default, Hash, Eq, PartialEq)]
49pub struct Terminals {
50    t: u128,
51}
52
53impl Terminals {
54    /// Creates a new item
55    /// ```
56    /// use parol::analysis::k_tuple::Terminals;
57    /// use parol::analysis::compiled_terminal::CompiledTerminal;
58    /// let t = Terminals::new(1);
59    /// assert!(t.is_empty());
60    /// assert_eq!(0, t.len(), "len");
61    /// assert_eq!(0, t.k_len(5), "k_len");
62    /// assert_eq!(None, t.get(0));
63    /// assert_eq!(None, t.get(9));
64    /// ```
65    pub fn new(max_terminal_index: usize) -> Self {
66        // max_terminal_index + 1: we also need to store EPS
67        let bits = (max_terminal_index + 1).ilog2() as u8 + 1;
68        if bits > MAX_BITS {
69            panic!(
70                "The number of bits required to store {} terminals is {} which is greater than the maximum of {}",
71                max_terminal_index + 1,
72                bits,
73                MAX_BITS
74            );
75        }
76        let mut this = Self { t: 0 };
77        this.set_bits(bits);
78        this
79    }
80
81    /// Creates a new item with epsilon semantic
82    /// ```
83    /// use parol::analysis::k_tuple::{Terminals, TerminalMappings};
84    /// use parol::analysis::compiled_terminal::CompiledTerminal;
85    /// let t = Terminals::eps(1);
86    /// assert!(!t.is_empty());
87    /// assert_eq!(1, t.len(), "len");
88    /// assert_eq!(1, t.k_len(5), "k_len");
89    /// assert_eq!(Some(CompiledTerminal::eps()), t.get(0));
90    /// assert_eq!(None, t.get(1));
91    /// assert_eq!(None, t.get(9));
92    /// ```
93    pub fn eps(max_terminal_index: usize) -> Terminals {
94        let mut t = Self::new(max_terminal_index);
95        t.set(0, CompiledTerminal(EPS));
96        t.set_next_index(1);
97        t
98    }
99
100    /// Creates a new item with end (EOI) semantic
101    /// Such a terminal can't be extended, i.e. you can't append more terminals
102    /// ```
103    /// use parol::analysis::k_tuple::{Terminals, TerminalMappings};
104    /// use parol::analysis::compiled_terminal::CompiledTerminal;
105    /// let t = Terminals::end(1);
106    /// assert!(!t.is_empty());
107    /// assert_eq!(1, t.len());
108    /// assert_eq!(1, t.k_len(5));
109    /// assert_eq!(Some(CompiledTerminal::end()), t.get(0));
110    /// assert_eq!(None, t.get(1));
111    /// assert_eq!(None, t.get(9));
112    /// ```
113    pub fn end(max_terminal_index: usize) -> Terminals {
114        let mut t = Self::new(max_terminal_index);
115        // t.t = 0; // EOI as u128 & t.mask;
116        t.set_next_index(1);
117        t
118    }
119
120    ///
121    /// Creates a new object with maximum k length from another object
122    ///
123    #[must_use]
124    pub fn of(k: usize, other: Self) -> Self {
125        let bits = other.bits();
126        let mask = other.mask();
127        let i = other.k_len(k) as u8;
128        let mut copy_mask = 0u128;
129        (0..i).for_each(|_| {
130            copy_mask <<= bits as usize;
131            copy_mask |= mask;
132        });
133        let t = other.t & copy_mask;
134        let mut t = Self { t };
135        t.set_bits(bits);
136        t.set_next_index(i);
137        t
138    }
139
140    /// Returns the length of the collection
141    #[inline]
142    pub fn len(&self) -> usize {
143        self.next_index() as usize
144    }
145    /// Checks if the collection is empty
146    #[inline]
147    pub fn is_empty(&self) -> bool {
148        self.next_index() == 0
149    }
150
151    /// Returns the index of the next insertion
152    /// The highest 8 bits of t are used to store the index of the next insertion in it's lowest 4
153    /// bits.
154    #[inline]
155    pub fn next_index(&self) -> u8 {
156        ((self.t & 0x0F00_0000_0000_0000_0000_0000_0000_0000) >> 120) as u8
157    }
158
159    /// Sets the index of the next insertion
160    #[inline]
161    fn set_next_index(&mut self, i: u8) {
162        self.t &= 0xF0FF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF;
163        self.t |= (i as u128) << 120;
164    }
165
166    /// Increments the index of the next insertion
167    #[inline]
168    pub fn inc_index(&mut self) {
169        let i = self.next_index() + 1;
170        self.t &= 0xF0FF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF;
171        self.t |= (i as u128) << 120;
172    }
173
174    /// Returns the bits used per terminal
175    /// The highest 8 bits of t are used to store the number of bits used per terminal in it's highest
176    /// 4 bits.
177    /// ```
178    /// use parol::analysis::k_tuple::Terminals;
179    /// let t = Terminals::eps(1);
180    /// assert_eq!(2, t.bits());
181    /// ```
182    #[inline]
183    pub fn bits(&self) -> u8 {
184        ((self.t & 0xF000_0000_0000_0000_0000_0000_0000_0000) >> 124) as u8
185    }
186
187    /// Sets the number of bits used per terminal
188    #[inline]
189    pub fn set_bits(&mut self, bits: u8) {
190        self.t &= 0x0FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF;
191        self.t |= (bits as u128) << 124;
192        debug_assert_ne!(self.bits(), 0, "Bits must not be 0");
193    }
194
195    /// Returns the mask used to extract the terminal at position i
196    /// The mask is calculated as 2^bits - 1 that is equivalent to the expression !(!0u128 << bits).
197    #[inline]
198    pub fn mask(&self) -> u128 {
199        !(!0u128 << self.bits())
200    }
201
202    #[must_use]
203    fn last(&self) -> Option<CompiledTerminal> {
204        if self.is_empty() {
205            None
206        } else {
207            self.get(self.next_index() as usize - 1)
208        }
209    }
210
211    /// Checks if the collection is k-complete, i.e. no terminals can be added
212    /// ```
213    /// use parol::analysis::k_tuple::Terminals;
214    /// let t = Terminals::end(1);
215    /// assert!(t.is_k_complete(5));
216    /// ```
217    pub fn is_k_complete(&self, k: usize) -> bool {
218        !self.is_eps() && (self.len() >= k || self.last().is_some_and(|t| t.is_end()))
219    }
220
221    /// Returns the k-length, i.e. the number of symbols that contributes to lookahead sizes
222    #[must_use]
223    pub fn k_len(&self, k: usize) -> usize {
224        std::cmp::min(self.len(), k)
225    }
226
227    /// Clears the collection
228    pub fn clear(&mut self) {
229        let bits = self.bits();
230        self.t = 0;
231        self.set_bits(bits);
232        debug_assert_ne!(self.bits(), 0, "Bits must not be 0");
233    }
234
235    /// Concatenates two collections with respect to the rules of k-concatenation
236    /// ```
237    /// use parol::analysis::k_tuple::{Terminals, TerminalMappings};
238    /// use parol::analysis::compiled_terminal::CompiledTerminal;
239    /// let t1 = Terminals::eps(1);
240    /// let t2 = Terminals::end(1);
241    /// let t = t1.k_concat(&t2, 5);
242    /// assert!(t.is_k_complete(5));
243    /// assert_eq!(1, t.len());
244    /// assert_eq!(Some(CompiledTerminal::end()), t.get(0));
245    /// let t = t2.k_concat(&t1, 5);
246    /// assert!(t.is_k_complete(5));
247    /// assert_eq!(1, t.len());
248    /// assert_eq!(Some(CompiledTerminal::end()), t.get(0));
249    /// let mut t1 = Terminals::new(6);
250    /// t1.extend([1, 2, 3].iter().cloned());
251    /// let mut t2 = Terminals::new(6);
252    /// t2.extend([4, 5, 6].iter().cloned());
253    /// let t = t1.k_concat(&t2, 5);
254    /// assert!(t.is_k_complete(5));
255    /// assert_eq!(5, t.len());
256    /// assert_eq!(Some(CompiledTerminal(1)), t.get(0));
257    /// assert_eq!(Some(CompiledTerminal(2)), t.get(1));
258    /// assert_eq!(Some(CompiledTerminal(3)), t.get(2));
259    /// assert_eq!(Some(CompiledTerminal(4)), t.get(3));
260    /// assert_eq!(Some(CompiledTerminal(5)), t.get(4));
261    /// assert_eq!(None, t.get(5));
262    /// ```
263    pub fn k_concat(mut self, other: &Self, k: usize) -> Self {
264        debug_assert!(
265            other.bits() == self.bits(),
266            "Bits must be the same, self:({self:?}) != other:({other:?})"
267        );
268        debug_assert_ne!(self.bits(), 0, "Bits must not be 0");
269
270        if other.is_eps() || other.is_empty() {
271            // w + ε = w
272            return self;
273        }
274
275        if self.is_eps() {
276            // ε + w = w
277            // Remove possible epsilon terminal
278            self.clear();
279        }
280
281        if self.is_k_complete(k) {
282            // k: w would be the same as k: (w + x)
283            return self;
284        }
285
286        let my_k_len = self.k_len(k);
287        let other_len = other.k_len(k);
288        let to_take = std::cmp::min(k - my_k_len, other_len);
289        if to_take == 0 {
290            // We can't take any more terminals
291            debug_assert!(false, "to_take == 0, self:({self:?}), other:({other:?})");
292            return self;
293        };
294
295        let bits = self.bits();
296
297        // Mask out the other value with a length of to_take
298        // Shift the other value to the left by the length of my_k_len
299        let value =
300            (other.t & !(!0u128 << (to_take * bits as usize))) << (my_k_len * bits as usize);
301        // Add the other value to self
302        self.t |= value;
303        self.set_next_index((my_k_len + to_take) as u8);
304        self.set_bits(bits);
305        self
306    }
307
308    /// Adds a new terminal to self if max size is not reached yet and if last is not EOI
309    pub fn push(&mut self, t: CompiledTerminal) -> Result<(), String> {
310        if self.next_index() >= MAX_K as u8 {
311            return Err("Maximum number of terminals reached".to_owned());
312        }
313        if matches!(self.last(), Some(CompiledTerminal(EOI))) {
314            return Ok(());
315        }
316        debug_assert_ne!(t.0, INVALID, "Invalid terminal");
317        self.set(self.next_index().into(), t);
318        self.inc_index();
319        Ok(())
320    }
321
322    /// Checks if self is an Epsilon
323    #[inline]
324    pub fn is_eps(&self) -> bool {
325        if self.next_index() != 1 {
326            return false;
327        }
328        let mask = self.mask();
329        (self.t & mask) == mask
330    }
331
332    /// Creates an iterator over the terminals
333    pub fn iter(&self) -> TermIt {
334        TermIt::new(*self)
335    }
336
337    /// Returns the terminal at position i
338    pub fn get(&self, i: usize) -> Option<CompiledTerminal> {
339        if i < self.next_index() as usize {
340            let mut terminal_index = (self.t >> (i * self.bits() as usize)) & self.mask();
341            if terminal_index == self.mask() {
342                // Epsilon is defined as 0xFFFF and stored as a value identical to self.mask, i.e. all
343                // bits set to 1. We need to convert it back to 0xFFFF.
344                terminal_index = EPS as u128;
345            }
346            Some(CompiledTerminal(terminal_index as TerminalIndex))
347        } else {
348            None
349        }
350    }
351
352    /// Sets the terminal at position i
353    pub fn set(&mut self, i: usize, t: CompiledTerminal) {
354        let terminal_mask = self.mask();
355        debug_assert!(
356            t.0 <= terminal_mask as TerminalIndex || t.0 == EPS as TerminalIndex,
357            "Terminal index {} out of range",
358            t.0
359        );
360        debug_assert_ne!(t.0, INVALID, "Invalid terminal");
361        let bits = self.bits() as usize;
362        let v = (t.0 as u128 & terminal_mask) << (i * bits);
363        let mask = !(terminal_mask << (i * bits));
364        self.t &= mask;
365        self.t |= v;
366    }
367}
368
369impl Ord for Terminals {
370    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
371        match self.next_index().cmp(&other.next_index()) {
372            std::cmp::Ordering::Less => std::cmp::Ordering::Less,
373            std::cmp::Ordering::Equal => {
374                <&Self as Into<u128>>::into(self).cmp(&<&Self as Into<u128>>::into(other))
375            }
376            std::cmp::Ordering::Greater => std::cmp::Ordering::Greater,
377        }
378    }
379}
380
381impl PartialOrd for Terminals {
382    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
383        Some(self.cmp(other))
384    }
385}
386
387impl Display for Terminals {
388    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
389        write!(
390            f,
391            "[{}(i{})]",
392            (0..self.next_index())
393                .map(|i| format!("{}", self.get(i as usize).unwrap()))
394                .collect::<Vec<String>>()
395                .join(", "),
396            self.next_index(),
397        )
398    }
399}
400
401// Used for comparison in implementation of Ord
402impl From<&Terminals> for u128 {
403    fn from(t: &Terminals) -> Self {
404        // Mask out the unused bits although it should not be necessary
405        t.t & !(!0u128 << (t.next_index() * t.bits()) as usize)
406    }
407}
408
409impl Extend<CompiledTerminal> for Terminals {
410    fn extend<I: IntoIterator<Item = CompiledTerminal>>(&mut self, iter: I) {
411        for t in iter {
412            let _ = self.push(t);
413        }
414    }
415}
416
417impl Extend<TerminalIndex> for Terminals {
418    fn extend<I: IntoIterator<Item = TerminalIndex>>(&mut self, iter: I) {
419        for t in iter {
420            let _ = self.push(CompiledTerminal(t));
421        }
422    }
423}
424
425impl Debug for Terminals {
426    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
427        write!(
428            f,
429            "0b{:b}, i:{}, bits:0x{:x}, mask:0x{:x}",
430            self.t,
431            self.next_index(),
432            self.bits(),
433            self.mask()
434        )
435    }
436}
437
438/// Iterator for Terminals
439/// It returns the terminal indices
440#[derive(Debug)]
441pub struct TermIt {
442    /// A copy of the Terminals object.
443    /// During iteration, the member t is shifted to the right by bits and the terminal is extracted
444    /// by masking the lowest bits.
445    t: Terminals,
446    /// The current index
447    i: usize,
448    /// The number of bits used per terminal
449    bits: usize,
450    /// The mask to extract the terminal
451    mask: u128,
452    /// The number of terminals in the collection
453    len: usize,
454}
455
456impl TermIt {
457    fn new(t: Terminals) -> Self {
458        Self {
459            t,
460            i: 0,
461            bits: t.bits() as usize,
462            mask: t.mask(),
463            len: t.next_index() as usize,
464        }
465    }
466}
467
468impl Iterator for TermIt {
469    type Item = TerminalIndex;
470
471    fn next(&mut self) -> Option<Self::Item> {
472        if self.i < self.len {
473            let t = self.t.t & self.mask;
474            // Prepare for the next iteration
475            self.t.t >>= self.bits;
476            self.i += 1;
477
478            if t == self.mask {
479                // Epsilon is defined as 0xFFFF and stored as a value identical to self.mask, i.e.
480                // all bits set to 1. We need to convert it back to 0xFFFF.
481                Some(EPS)
482            } else {
483                Some(t as TerminalIndex)
484            }
485        } else {
486            None
487        }
488    }
489}
490
491/// Terminal string with support for k-completeness
492#[derive(Clone, Copy, Hash, Eq, PartialEq, Ord, PartialOrd)]
493pub enum TerminalString {
494    /// Incomplete sequence
495    Incomplete(Terminals),
496    /// k-complete sequence
497    Complete(Terminals),
498}
499
500impl TerminalString {
501    /// Returns the length of the sequence
502    pub fn len(&self) -> usize {
503        self.inner().len()
504    }
505    /// Checks if the sequence is empty
506    pub fn is_empty(&self) -> bool {
507        self.inner().is_empty()
508    }
509
510    /// Checks if the sequence is k-complete
511    pub fn is_k_complete(&self) -> bool {
512        match self {
513            Self::Incomplete(_) => false,
514            Self::Complete(_) => true,
515        }
516    }
517
518    /// Checks if the inner sequence is k-complete
519    pub fn is_complete(&self, k: usize) -> bool {
520        self.inner().is_k_complete(k)
521    }
522
523    /// Change the state to k-complete
524    pub fn make_complete(self) -> Self {
525        if let Self::Incomplete(e) = self {
526            Self::Complete(e)
527        } else {
528            self
529        }
530    }
531
532    /// Revoke the k-complete state
533    pub fn make_incomplete(self) -> Self {
534        if let Self::Complete(e) = self {
535            Self::Incomplete(e)
536        } else {
537            self
538        }
539    }
540
541    /// Clear the sequences
542    pub fn clear(self) -> Self {
543        let mut inner = match self {
544            Self::Incomplete(t) | Self::Complete(t) => t,
545        };
546        inner.clear();
547
548        Self::Incomplete(inner)
549    }
550
551    /// Return the inner sequences
552    pub fn inner(&self) -> &Terminals {
553        match self {
554            Self::Incomplete(v) => v,
555            Self::Complete(v) => v,
556        }
557    }
558
559    /// Checks if self is an Epsilon
560    pub fn is_eps(&self) -> bool {
561        match self {
562            Self::Incomplete(v) => v.is_eps(),
563            Self::Complete(_) => false,
564        }
565    }
566
567    /// Push a new terminal
568    pub fn push(&mut self, t: CompiledTerminal, k: usize) -> Result<(), String> {
569        match self {
570            Self::Incomplete(v) => {
571                v.push(t)?;
572                if v.is_k_complete(k) {
573                    *self = Self::Complete(*v);
574                }
575            }
576            Self::Complete(_) => {}
577        }
578        Ok(())
579    }
580
581    /// Concat self with another sequence while consuming self
582    pub fn k_concat(self, other: &Self, k: usize) -> Self {
583        match self {
584            Self::Incomplete(v) => {
585                let terminals = v.k_concat(other.inner(), k);
586                if terminals.is_k_complete(k) {
587                    TerminalString::Complete(terminals)
588                } else {
589                    TerminalString::Incomplete(terminals)
590                }
591            }
592            Self::Complete(_) => self,
593        }
594    }
595}
596
597impl Display for TerminalString {
598    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
599        match self {
600            Self::Incomplete(v) => write!(f, "Incomplete({v})"),
601            Self::Complete(v) => write!(f, "Complete  ({v})"),
602        }
603    }
604}
605
606impl Debug for TerminalString {
607    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
608        match self {
609            Self::Incomplete(v) => write!(f, "Incomplete({v:?})"),
610            Self::Complete(v) => write!(f, "Complete  ({v:?})"),
611        }
612    }
613}
614
615/// A builder for KTuple
616#[derive(Clone, Default)]
617pub struct KTupleBuilder<'a> {
618    k: Option<usize>,
619    max_terminal_index: Option<usize>,
620    k_tuple: Option<&'a KTuple>,
621    terminal_string: Option<&'a [TerminalIndex]>,
622}
623
624impl<'a> KTupleBuilder<'a> {
625    /// Creates a new builder
626    pub fn new() -> Self {
627        Self::default()
628    }
629
630    /// Sets the lookahead size
631    pub fn k(mut self, k: usize) -> Self {
632        self.k = Some(k);
633        self
634    }
635
636    /// Sets the maximum terminal index
637    pub fn max_terminal_index(mut self, max_terminal_index: usize) -> Self {
638        self.max_terminal_index = Some(max_terminal_index);
639        self
640    }
641
642    /// Sets the k-tuple to be used during construction
643    pub fn k_tuple(mut self, k_tuple: &'a KTuple) -> Self {
644        self.k_tuple = Some(k_tuple);
645        self
646    }
647
648    /// Sets the terminal string to be used during construction
649    pub fn terminal_string(mut self, terminal_string: &'a [TerminalIndex]) -> Self {
650        self.terminal_string = Some(terminal_string);
651        self
652    }
653
654    /// Builds a new KTuple
655    pub fn build(self) -> Result<KTuple, String> {
656        if self.k.is_none() {
657            return Err("k is not set".to_owned());
658        }
659        if self.max_terminal_index.is_none() {
660            return Err("max_terminal_index is not set".to_owned());
661        }
662        let k = self.k.unwrap();
663        let max_terminal_index = self.max_terminal_index.unwrap_or(0);
664        if let Some(k_tuple) = self.k_tuple {
665            let mut terminals = Terminals::new(max_terminal_index);
666            for t in k_tuple.terminals.inner().iter().take(k) {
667                terminals.push(CompiledTerminal(t))?;
668            }
669            let terminals = if terminals.is_k_complete(k) {
670                TerminalString::Complete(terminals)
671            } else {
672                TerminalString::Incomplete(terminals)
673            };
674            Ok(KTuple {
675                terminals,
676                k: std::cmp::min(k, MAX_K),
677            })
678        } else if let Some(terminal_string) = self.terminal_string {
679            let mut terminals = Terminals::new(max_terminal_index);
680            for t in terminal_string.iter().take(k) {
681                terminals.push(CompiledTerminal(*t))?;
682            }
683            let terminals = if terminals.is_k_complete(k) {
684                TerminalString::Complete(terminals)
685            } else {
686                TerminalString::Incomplete(terminals)
687            };
688            Ok(KTuple {
689                terminals,
690                k: std::cmp::min(k, MAX_K),
691            })
692        } else {
693            Err("k_tuple or terminal_string must be set".to_owned())
694        }
695    }
696
697    ///
698    /// Creates a new ε object
699    ///
700    pub fn eps(self) -> Result<KTuple, String> {
701        if self.k.is_none() {
702            return Err("k is not set".to_owned());
703        }
704        if self.max_terminal_index.is_none() {
705            return Err("max_terminal_index is not set".to_owned());
706        }
707        let k = self.k.unwrap();
708        let terminals =
709            TerminalString::Incomplete(Terminals::eps(self.max_terminal_index.unwrap()));
710        Ok(KTuple {
711            terminals,
712            k: std::cmp::min(k, MAX_K),
713        })
714    }
715    ///
716    /// Creates a new End object
717    ///
718    pub fn end(self) -> Result<KTuple, String> {
719        if self.k.is_none() {
720            return Err("k is not set".to_owned());
721        }
722        if self.max_terminal_index.is_none() {
723            return Err("max_terminal_index is not set".to_owned());
724        }
725        let k = self.k.unwrap();
726        let terminals = TerminalString::Complete(Terminals::end(self.max_terminal_index.unwrap()));
727        Ok(KTuple {
728            terminals,
729            k: std::cmp::min(k, MAX_K),
730        })
731    }
732}
733
734// ---------------------------------------------------
735// Part of the Public API
736// *Changes will affect crate's version according to semver*
737// ---------------------------------------------------
738///
739/// Terminal symbol string type
740///
741#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
742pub struct KTuple {
743    /// The sequence of terminals
744    terminals: TerminalString,
745    /// The lookahead size
746    k: usize,
747}
748
749impl KTuple {
750    /// Used for debugging only
751    pub fn with_terminal_indices(self, terms: &[TerminalIndex]) -> Self {
752        let k = self.k;
753        let mut terminals = match self.terminals {
754            TerminalString::Incomplete(s) => s,
755            TerminalString::Complete(s) => s,
756        };
757
758        terms.iter().take(k).enumerate().for_each(|(i, t)| {
759            terminals.set(i, CompiledTerminal(*t));
760            terminals.inc_index();
761        });
762
763        let terminals = if terminals.is_k_complete(k) {
764            TerminalString::Complete(terminals)
765        } else {
766            TerminalString::Incomplete(terminals)
767        };
768
769        Self { terminals, k }
770    }
771
772    ///
773    /// Creates a new object from a slice of CompiledTerminals
774    ///
775    pub fn from_slice(others: &[CompiledTerminal], k: usize, max_terminal_index: usize) -> Self {
776        let mut terminals = Terminals::new(max_terminal_index);
777        terminals.extend(others.iter().take(k).cloned());
778        let terminals = if terminals.is_k_complete(k) {
779            TerminalString::Complete(terminals)
780        } else {
781            TerminalString::Incomplete(terminals)
782        };
783        Self { terminals, k }
784    }
785
786    ///
787    /// Creates a new object from a vector of terminal symbols
788    ///
789    pub fn of(t: Terminals, k: usize) -> Self {
790        let terminals = Terminals::of(k, t);
791
792        let terminals = if terminals.is_k_complete(k) {
793            TerminalString::Complete(terminals)
794        } else {
795            TerminalString::Incomplete(terminals)
796        };
797        Self { terminals, k }
798    }
799
800    /// Adds a new terminal to self while consuming self
801    pub fn push(&mut self, t: CompiledTerminal) -> Result<(), String> {
802        self.terminals.push(t, self.k)
803    }
804
805    /// Checks if self is an Epsilon
806    pub fn is_eps(&self) -> bool {
807        self.terminals.is_eps()
808    }
809    /// Returns the length of the sequence
810    pub fn len(&self) -> usize {
811        self.terminals.len()
812    }
813    /// Checks if the sequence is empty
814    pub fn is_empty(&self) -> bool {
815        self.terminals.is_empty()
816    }
817    /// Returns the k-length of the sequence
818    pub fn k_len(&self, k: usize) -> usize {
819        self.terminals.inner().k_len(k)
820    }
821    /// Checks if the sequence is k-complete
822    pub fn is_k_complete(&self) -> bool {
823        self.terminals.is_k_complete()
824    }
825
826    /// Concat self with another sequence while consuming self
827    pub fn k_concat(self, other: &Self, k: usize) -> Self {
828        let terminals = self.terminals.k_concat(&other.terminals, k);
829        let k = terminals.inner().k_len(k);
830        Self { terminals, k }
831    }
832
833    /// Sets the lookahead size
834    pub fn set_k(mut self, k: usize) -> Self {
835        if self.terminals.is_complete(k) {
836            self.terminals = self.terminals.make_complete();
837        } else {
838            self.terminals = self.terminals.make_incomplete();
839        }
840        self.k = k;
841        self
842    }
843
844    /// Conversion to string with the help of the terminals slice
845    pub fn to_string(&self, terminals: &[String]) -> String {
846        format!(
847            "[{}]",
848            self.terminals
849                .inner()
850                .iter()
851                .map(|t| match t {
852                    EOI => "$".to_owned(),
853                    NEW_LINE => "NewLine".to_owned(),
854                    WHITESPACE => "WhiteSpace".to_owned(),
855                    LINE_COMMENT => "LineComment".to_owned(),
856                    BLOCK_COMMENT => "BlockComment".to_owned(),
857                    EPS => "\u{03B5}".to_owned(),
858                    _ => terminals[t as usize].to_string(),
859                })
860                .collect::<Vec<String>>()
861                .join(", ")
862        )
863    }
864
865    /// Returns the k value
866    #[inline]
867    pub fn k(&self) -> usize {
868        self.k
869    }
870
871    /// Returns the terminals
872    #[inline]
873    pub fn terminals(&self) -> &Terminals {
874        self.terminals.inner()
875    }
876}
877
878impl Debug for KTuple {
879    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
880        write!(
881            f,
882            "[{:?}(i{})](k{})",
883            self.terminals,
884            self.terminals.inner().next_index(),
885            self.k
886        )
887    }
888}
889
890impl Display for KTuple {
891    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
892        write!(
893            f,
894            "[{}(i{})](k{})",
895            self.terminals,
896            self.terminals.inner().next_index(),
897            self.k
898        )
899    }
900}
901
902impl Hash for KTuple {
903    fn hash<H: Hasher>(&self, state: &mut H) {
904        let self_inner = self.terminals.inner();
905        self_inner.t.hash(state)
906    }
907}
908
909impl Extend<CompiledTerminal> for KTuple {
910    fn extend<I: IntoIterator<Item = CompiledTerminal>>(&mut self, iter: I) {
911        if !self.terminals.is_k_complete() {
912            for t in iter.into_iter().take(self.k - self.len()) {
913                let _ = self.push(t);
914            }
915        }
916    }
917}
918
919impl Extend<TerminalIndex> for KTuple {
920    fn extend<I: IntoIterator<Item = TerminalIndex>>(&mut self, iter: I) {
921        if !self.terminals.is_k_complete() {
922            for t in iter.into_iter().take(self.k - self.len()) {
923                let _ = self.push(CompiledTerminal(t));
924            }
925        }
926    }
927}
928
929#[cfg(test)]
930mod test {
931    use parol_runtime::TerminalIndex;
932
933    use super::{TerminalString, Terminals};
934    use crate::{
935        CompiledTerminal, KTuple, MAX_K,
936        analysis::k_tuple::{EOI, KTupleBuilder},
937    };
938
939    fn term(terminals: &[TerminalIndex], k: usize, max_terminal_index: usize) -> Terminals {
940        debug_assert!(k <= MAX_K);
941        let mut t = Terminals::new(max_terminal_index);
942        t.extend(terminals.iter().map(|t| CompiledTerminal(*t)));
943        t
944    }
945
946    #[test]
947    fn test_terminals_bits() {
948        let terminals = Terminals::new(6);
949        assert_eq!(3, terminals.bits());
950    }
951
952    #[test]
953    fn test_terminals_set_bits() {
954        let mut terminals = Terminals::new(6);
955        terminals.set_bits(0b1010);
956        assert_eq!(0b1010, terminals.bits());
957        terminals.set_bits(0b1100);
958        assert_eq!(0b1100, terminals.bits());
959    }
960
961    #[test]
962    fn test_terminals_mask() {
963        let terminals = Terminals::new(6);
964        assert_eq!(terminals.mask(), 0b111);
965    }
966
967    #[test]
968    fn test_terminals_next_index() {
969        let mut terminals = Terminals::new(6);
970        assert_eq!(0, terminals.next_index());
971        terminals.set_next_index(3);
972        assert_eq!(3, terminals.next_index());
973    }
974
975    #[test]
976    fn test_terminals_set_next_index() {
977        let mut terminals = Terminals::new(6);
978        assert_eq!(0, terminals.next_index());
979        terminals.set_next_index(3);
980        assert_eq!(3, terminals.next_index());
981        terminals.set_next_index(5);
982        assert_eq!(5, terminals.next_index());
983    }
984
985    #[test]
986    fn test_terminals_inc_index() {
987        let mut terminals = Terminals::new(6);
988        assert_eq!(0, terminals.next_index());
989        terminals.inc_index();
990        assert_eq!(1, terminals.next_index());
991        terminals.inc_index();
992        assert_eq!(2, terminals.next_index());
993    }
994
995    #[test]
996    fn check_terminals_k_concat() {
997        // let t1 = Terminals::eps(1);
998        // let t2 = Terminals::end(1);
999        // let t = t1.k_concat(&t2, 5);
1000        // assert!(t.is_k_complete(5));
1001        // assert_eq!(1, t.len());
1002        // assert_eq!(Some(CompiledTerminal(EOI)), t.get(0));
1003        // let t = t2.k_concat(&t1, 5);
1004        // assert!(t.is_k_complete(5));
1005        // assert_eq!(1, t.len());
1006        // assert_eq!(Some(CompiledTerminal(EOI)), t.get(0));
1007        let mut t1 = Terminals::new(6);
1008        t1.extend([1, 2, 3].iter().cloned());
1009        let mut t2 = Terminals::new(6);
1010        t2.extend([4, 5, 6].iter().cloned());
1011        let t = t1.k_concat(&t2, 5);
1012        assert!(t.is_k_complete(5));
1013        assert_eq!(5, t.len());
1014        assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1015        assert_eq!(Some(CompiledTerminal(2)), t.get(1));
1016        assert_eq!(Some(CompiledTerminal(3)), t.get(2));
1017        assert_eq!(Some(CompiledTerminal(4)), t.get(3));
1018        assert_eq!(Some(CompiledTerminal(5)), t.get(4));
1019        assert_eq!(None, t.get(5));
1020    }
1021
1022    #[test]
1023    fn check_with_terminal_indices() {
1024        {
1025            let k_tuple = KTupleBuilder::new()
1026                .k(1)
1027                .max_terminal_index(1)
1028                .terminal_string(&[1])
1029                .build()
1030                .unwrap();
1031            let t = term(&[1], 1, 1);
1032            let expected = KTuple {
1033                terminals: TerminalString::Complete(t),
1034                k: 1,
1035            };
1036            assert_eq!(None, t.get(1));
1037            assert_eq!(None, t.get(MAX_K - 1));
1038            assert_eq!(expected, k_tuple, "[1]");
1039        }
1040        {
1041            let k_tuple = KTupleBuilder::new()
1042                .k(MAX_K)
1043                .max_terminal_index(10)
1044                .terminal_string(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
1045                .build()
1046                .unwrap();
1047            let t = term(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], MAX_K, 10);
1048            let expected = KTuple {
1049                terminals: TerminalString::Complete(t),
1050                k: MAX_K,
1051            };
1052            assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1053            assert_eq!(Some(CompiledTerminal(10)), t.get(MAX_K - 1));
1054            assert_eq!(expected, k_tuple, "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]");
1055        }
1056        {
1057            let k_tuple = KTupleBuilder::new()
1058                .k(5)
1059                .max_terminal_index(10)
1060                .terminal_string(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
1061                .build()
1062                .unwrap();
1063            let t = term(&[1, 2, 3, 4, 5], 5, 10);
1064            let expected = KTuple {
1065                terminals: TerminalString::Complete(t),
1066                k: 5,
1067            };
1068            assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1069            assert_eq!(Some(CompiledTerminal(5)), t.get(4));
1070            assert_eq!(None, t.get(5));
1071            assert_eq!(expected, k_tuple, "[1, 2, 3, 4, 5]");
1072        }
1073    }
1074
1075    #[test]
1076    fn check_from_slice() {
1077        {
1078            let k_tuple = KTuple::from_slice(&[CompiledTerminal(1)], 1, 1);
1079            let t = term(&[1], 1, 1);
1080            let expected = KTuple {
1081                terminals: TerminalString::Complete(t),
1082                k: 1,
1083            };
1084            assert_eq!(None, t.get(1));
1085            assert_eq!(None, t.get(MAX_K - 1));
1086            assert_eq!(expected, k_tuple, "[1]");
1087        }
1088        {
1089            let k_tuple = KTuple::from_slice(
1090                &[
1091                    CompiledTerminal(1),
1092                    CompiledTerminal(2),
1093                    CompiledTerminal(3),
1094                    CompiledTerminal(4),
1095                    CompiledTerminal(5),
1096                    CompiledTerminal(6),
1097                    CompiledTerminal(7),
1098                    CompiledTerminal(8),
1099                    CompiledTerminal(9),
1100                    CompiledTerminal(10),
1101                ],
1102                MAX_K,
1103                10,
1104            );
1105            let t = term(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], MAX_K, 10);
1106            let expected = KTuple {
1107                terminals: TerminalString::Complete(t),
1108                k: MAX_K,
1109            };
1110            assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1111            assert_eq!(Some(CompiledTerminal(10)), t.get(MAX_K - 1));
1112            assert_eq!(expected, k_tuple);
1113        }
1114        {
1115            let k_tuple = KTuple::from_slice(
1116                &[
1117                    CompiledTerminal(1),
1118                    CompiledTerminal(2),
1119                    CompiledTerminal(3),
1120                    CompiledTerminal(4),
1121                    CompiledTerminal(5),
1122                    CompiledTerminal(6),
1123                    CompiledTerminal(7),
1124                    CompiledTerminal(8),
1125                    CompiledTerminal(9),
1126                    CompiledTerminal(10),
1127                ],
1128                5,
1129                10,
1130            );
1131            let t = term(&[1, 2, 3, 4, 5], 5, 10);
1132            let expected = KTuple {
1133                terminals: TerminalString::Complete(t),
1134                k: 5,
1135            };
1136            assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1137            assert_eq!(Some(CompiledTerminal(5)), t.get(4));
1138            assert_eq!(None, t.get(5));
1139            assert_eq!(expected, k_tuple, "[1, 2, 3, 4, 5]");
1140        }
1141    }
1142
1143    #[test]
1144    fn check_k_tuple_of() {
1145        {
1146            let k = 1;
1147            let mut t = Terminals::new(1);
1148            t.extend([1]);
1149            let k_tuple = KTuple::of(t, k);
1150            let mut t2 = Terminals::new(1);
1151            t2.extend([1]);
1152            let expected = KTuple {
1153                terminals: TerminalString::Complete(t2),
1154                k,
1155            };
1156            assert_eq!(None, t.get(1));
1157            assert_eq!(None, t.get(MAX_K - 1));
1158            assert_eq!(expected, k_tuple, "[1]");
1159        }
1160        {
1161            let k = MAX_K;
1162            let mut t = Terminals::new(11);
1163            t.extend([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1164            let k_tuple = KTuple::of(t, k);
1165            assert_eq!(MAX_K, k_tuple.len());
1166            let mut t2 = Terminals::new(11);
1167            t2.extend([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1168            let expected = KTuple {
1169                terminals: TerminalString::Complete(t2),
1170                k,
1171            };
1172            assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1173            assert_eq!(Some(CompiledTerminal(10)), t.get(MAX_K - 1));
1174            assert_eq!(expected, k_tuple, "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]");
1175        }
1176        {
1177            let k = 5;
1178            let mut t = Terminals::new(11);
1179            t.extend([1, 2, 3, 4, 5]);
1180
1181            let k_tuple = KTuple::of(t, k);
1182            let mut t2 = Terminals::new(11);
1183            t2.extend([1, 2, 3, 4, 5]);
1184            let expected = KTuple {
1185                terminals: TerminalString::Complete(t2),
1186                k,
1187            };
1188            assert_eq!(Some(CompiledTerminal(1)), t.get(0));
1189            assert_eq!(Some(CompiledTerminal(5)), t.get(4));
1190            assert_eq!(None, t.get(5));
1191            assert_eq!(expected, k_tuple, "[1, 2, 3, 4, 5]");
1192        }
1193    }
1194
1195    #[test]
1196    fn check_k_concat() {
1197        {
1198            let tuple1 = KTupleBuilder::new()
1199                .k(1)
1200                .max_terminal_index(1)
1201                .eps()
1202                .unwrap();
1203            let tuple2 = KTupleBuilder::new()
1204                .k(1)
1205                .max_terminal_index(1)
1206                .eps()
1207                .unwrap();
1208            let result = tuple1.k_concat(&tuple2, 1);
1209            let expected = KTupleBuilder::new()
1210                .k(1)
1211                .max_terminal_index(1)
1212                .eps()
1213                .unwrap();
1214            assert_eq!(expected, result, "1: [ε] + [ε] = [ε]");
1215        }
1216        {
1217            let tuple1 = KTupleBuilder::new()
1218                .k(1)
1219                .max_terminal_index(1)
1220                .terminal_string(&[1])
1221                .build()
1222                .unwrap();
1223            let tuple2 = KTupleBuilder::new()
1224                .k(1)
1225                .max_terminal_index(1)
1226                .eps()
1227                .unwrap();
1228            let result = tuple1.k_concat(&tuple2, 1);
1229            let expected = KTupleBuilder::new()
1230                .k(1)
1231                .max_terminal_index(1)
1232                .terminal_string(&[1])
1233                .build()
1234                .unwrap();
1235            assert_eq!(expected, result, "1: [a] + [ε] = [a]");
1236        }
1237        {
1238            let tuple1 = KTupleBuilder::new()
1239                .k(1)
1240                .max_terminal_index(1)
1241                .eps()
1242                .unwrap();
1243            let tuple2 = KTupleBuilder::new()
1244                .k(1)
1245                .max_terminal_index(1)
1246                .terminal_string(&[1])
1247                .build()
1248                .unwrap();
1249            let result = tuple1.k_concat(&tuple2, 1);
1250            let expected = KTupleBuilder::new()
1251                .k(1)
1252                .max_terminal_index(1)
1253                .terminal_string(&[1])
1254                .build()
1255                .unwrap();
1256            assert_eq!(expected, result, "1: [ε] + [a] = [a]");
1257        }
1258        {
1259            let tuple1 = KTupleBuilder::new()
1260                .k(2)
1261                .max_terminal_index(2)
1262                .terminal_string(&[1])
1263                .build()
1264                .unwrap();
1265            let tuple2 = KTupleBuilder::new()
1266                .k(2)
1267                .max_terminal_index(2)
1268                .terminal_string(&[2])
1269                .build()
1270                .unwrap();
1271            let result = tuple1.k_concat(&tuple2, 2);
1272            let expected = KTupleBuilder::new()
1273                .k(2)
1274                .max_terminal_index(1)
1275                .terminal_string(&[1, 2])
1276                .build()
1277                .unwrap();
1278            assert_eq!(expected, result, "2: [a] + [b] = [ab]");
1279        }
1280    }
1281
1282    #[test]
1283    fn check_term() {
1284        {
1285            let terminals = Terminals::new(4);
1286            assert_eq!(0, terminals.k_len(0));
1287            assert_eq!(0, terminals.k_len(1));
1288            assert_eq!(0, terminals.k_len(2));
1289
1290            assert!(terminals.is_k_complete(0));
1291            assert!(!terminals.is_k_complete(1));
1292            assert!(!terminals.is_k_complete(2));
1293            assert!(!terminals.is_k_complete(3));
1294        }
1295        {
1296            let terminals = term(&[1], 1, 4);
1297            assert_eq!(0, terminals.k_len(0));
1298            assert_eq!(1, terminals.k_len(1));
1299            assert_eq!(1, terminals.k_len(2));
1300
1301            assert!(terminals.is_k_complete(0));
1302            assert!(terminals.is_k_complete(1));
1303            assert!(!terminals.is_k_complete(2));
1304            assert!(!terminals.is_k_complete(3));
1305        }
1306        {
1307            let terminals = term(&[1, 2], 2, 4);
1308            assert_eq!(0, terminals.k_len(0));
1309            assert_eq!(1, terminals.k_len(1));
1310            assert_eq!(2, terminals.k_len(2));
1311            assert_eq!(2, terminals.k_len(3));
1312
1313            assert!(terminals.is_k_complete(0));
1314            assert!(terminals.is_k_complete(1));
1315            assert!(terminals.is_k_complete(2));
1316            assert!(!terminals.is_k_complete(3));
1317        }
1318        {
1319            let terminals = term(&[1, EOI], 2, 4);
1320            assert_eq!(0, terminals.k_len(0));
1321            assert_eq!(1, terminals.k_len(1));
1322            assert_eq!(2, terminals.k_len(2));
1323            assert_eq!(2, terminals.k_len(3));
1324
1325            assert!(terminals.is_k_complete(0));
1326            assert!(terminals.is_k_complete(1));
1327            assert!(terminals.is_k_complete(2));
1328            assert!(terminals.is_k_complete(3));
1329        }
1330        {
1331            let terminals = term(
1332                &[
1333                    1, EOI, 1, // This constellation is actually illegal!
1334                ],
1335                3,
1336                1,
1337            );
1338            assert_eq!(0, terminals.k_len(0));
1339            assert_eq!(1, terminals.k_len(1));
1340            assert_eq!(2, terminals.k_len(2));
1341            assert_eq!(2, terminals.k_len(3));
1342
1343            assert!(terminals.is_k_complete(0));
1344            assert!(terminals.is_k_complete(1));
1345            assert!(terminals.is_k_complete(2));
1346            assert!(terminals.is_k_complete(3));
1347
1348            let terminals2 = term(&[3], 1, 1);
1349            let result = terminals.k_concat(&terminals2, 3);
1350            let expected = term(&[1, EOI, 1], 3, 1);
1351            assert_eq!(expected, result);
1352        }
1353    }
1354
1355    #[test]
1356    fn test_iteration_of_terminals() {
1357        let terminals = term(&[1, 2, 3, 4, 5], 5, 5);
1358        let mut iter = terminals.iter();
1359        assert_eq!(Some(1), iter.next());
1360        assert_eq!(Some(2), iter.next());
1361        assert_eq!(Some(3), iter.next());
1362        assert_eq!(Some(4), iter.next());
1363        assert_eq!(Some(5), iter.next());
1364        assert_eq!(None, iter.next());
1365    }
1366}