Skip to main content

antlr4_runtime/
char_stream.rs

1use crate::int_stream::{EOF, IntStream, UNKNOWN_SOURCE_NAME};
2
3#[derive(Clone, Copy, Debug, Eq, PartialEq)]
4pub struct TextInterval {
5    pub start: usize,
6    pub stop: usize,
7}
8
9impl TextInterval {
10    pub const fn new(start: usize, stop: usize) -> Self {
11        Self { start, stop }
12    }
13
14    pub const fn empty() -> Self {
15        Self { start: 1, stop: 0 }
16    }
17
18    pub const fn is_empty(self) -> bool {
19        self.start > self.stop
20    }
21}
22
23pub trait CharStream: IntStream {
24    fn text(&self, interval: TextInterval) -> String;
25}
26
27#[derive(Clone, Debug)]
28pub struct InputStream {
29    data: Vec<char>,
30    cursor: usize,
31    source_name: String,
32}
33
34impl InputStream {
35    /// Creates a character stream from UTF-8 text using ANTLR's unknown source
36    /// name placeholder.
37    pub fn new(input: impl AsRef<str>) -> Self {
38        Self::with_source_name(input, UNKNOWN_SOURCE_NAME)
39    }
40
41    /// Creates a character stream with an explicit source name for tokens and
42    /// diagnostics.
43    pub fn with_source_name(input: impl AsRef<str>, source_name: impl Into<String>) -> Self {
44        Self {
45            data: input.as_ref().chars().collect(),
46            cursor: 0,
47            source_name: source_name.into(),
48        }
49    }
50
51    /// Returns true when the cursor has reached or passed the end of input.
52    pub const fn is_eof(&self) -> bool {
53        self.cursor >= self.data.len()
54    }
55}
56
57impl IntStream for InputStream {
58    fn consume(&mut self) {
59        if !self.is_eof() {
60            self.cursor += 1;
61        }
62    }
63
64    fn la(&mut self, offset: isize) -> i32 {
65        if offset == 0 {
66            return 0;
67        }
68
69        let absolute = if offset > 0 {
70            self.cursor.checked_add((offset - 1).cast_unsigned())
71        } else {
72            offset
73                .checked_neg()
74                .and_then(|distance| usize::try_from(distance).ok())
75                .and_then(|distance| self.cursor.checked_sub(distance))
76        };
77
78        absolute
79            .and_then(|index| self.data.get(index).copied())
80            .map_or(EOF, |ch| ch as i32)
81    }
82
83    fn index(&self) -> usize {
84        self.cursor
85    }
86
87    fn seek(&mut self, index: usize) {
88        self.cursor = index.min(self.data.len());
89    }
90
91    fn size(&self) -> usize {
92        self.data.len()
93    }
94
95    fn source_name(&self) -> &str {
96        &self.source_name
97    }
98}
99
100impl CharStream for InputStream {
101    /// Returns text for an inclusive interval of Unicode scalar indices.
102    fn text(&self, interval: TextInterval) -> String {
103        if interval.is_empty() || self.data.is_empty() {
104            return String::new();
105        }
106
107        let start = interval.start.min(self.data.len());
108        let stop = interval.stop.min(self.data.len().saturating_sub(1));
109        if start > stop {
110            return String::new();
111        }
112
113        self.data[start..=stop].iter().collect()
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn lookahead_and_text_are_codepoint_indexed() {
123        let mut input = InputStream::with_source_name("aβ\n", "sample");
124        assert_eq!(input.source_name(), "sample");
125        assert_eq!(input.size(), 3);
126        assert_eq!(input.la(1), 'a' as i32);
127        assert_eq!(input.la(2), 'β' as i32);
128        assert_eq!(input.text(TextInterval::new(0, 1)), "aβ");
129        input.consume();
130        assert_eq!(input.index(), 1);
131        assert_eq!(input.la(-1), 'a' as i32);
132        assert_eq!(input.la(isize::MIN), EOF);
133        input.seek(99);
134        assert_eq!(input.la(1), EOF);
135    }
136}