1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// Copyright (c) 2016-2021 Fabian Schuiki

//! Lexer utilities.

use crate::errors::DiagResult;
use std::cmp::max;
use std::collections::VecDeque;
use std::io::Read;

/// A trait that can supply a peekable stream of characters.
pub trait Reader {
    fn peek(&mut self, offset: usize) -> Option<char>;
    fn consume(&mut self, amount: usize);
    fn clear(&mut self);
    fn to_string(&self) -> String;
}

/// A trait that can supply a stream of tokens.
pub trait Lexer<T> {
    fn next_token<'td>(&mut self) -> DiagResult<'td, T>;
}

pub struct AccumulatingReader {
    rd: Box<dyn Read>,
    buf: Vec<u8>,
    base: usize,
    pos: usize,
    tail: usize,
}

impl AccumulatingReader {
    pub fn new(rd: Box<dyn Read>) -> AccumulatingReader {
        AccumulatingReader {
            rd: rd,
            buf: Vec::new(),
            base: 0,
            pos: 0,
            tail: 0,
        }
    }

    /// Grow and fill the internal buffer such that at least min_len characters
    /// are present, or the end of the file has been reached. This function may
    /// shift the buffer contents around, in which case previous buffer indices
    /// become invalid. Recalculate all indices derived from `base`, `pos`, or
    /// `tail` after a call to this function.
    pub fn refill(&mut self, mut min_len: usize) {
        // Move the buffer contents to the beginning to make more space at the
        // end.
        if self.base > 0 {
            for i in 0..(self.tail - self.base) {
                self.buf[i] = self.buf[self.base + i]
            }
            self.pos -= self.base;
            self.tail -= self.base;
            min_len -= self.base;
            self.base = 0;
        }
        assert!(self.base == 0);

        // Increase the buffer size to make sure the requested min_len can be
        // accomodated.
        let cur_len = self.buf.len();
        if min_len > cur_len {
            let new_len = max(cur_len * 2, 32);
            self.buf.resize(new_len, 0);
        }
        assert!(self.buf.len() >= min_len);

        // Keep reading data until at least min_len bytes are in the buffer.
        // Note that this will usually fill in the entire buffer.
        while min_len > self.tail {
            let nread = {
                let dst = &mut self.buf[self.tail..];
                let nread = self.rd.read(dst).unwrap();
                nread
            };
            if nread == 0 {
                break;
            }
            self.tail += nread;
        }
    }

    /// Return a slice of the consumed bytes.
    pub fn slice(&self) -> &[u8] {
        &self.buf[self.base..self.pos]
    }

    /// Return a slice of the remaining bytes, starting at the last call to
    /// clear().
    pub fn rem_slice(&self) -> &[u8] {
        &self.buf[self.base..]
    }
}

impl Reader for AccumulatingReader {
    /// Return the value of the byte that is `off` bytes away from the current
    /// position in the input file. If the `off` lies beyond the end of file,
    /// `None` is returned.
    fn peek(&mut self, off: usize) -> Option<char> {
        // If the requested offset lies outside the chunk of the file that is
        // currently in memory, refill the buffer first.
        let idx = self.pos + off;
        if idx >= self.tail {
            self.refill(idx + 1)
        }

        // The previous call to refill may move things around in the buffer, so
        // the index needs to be recalculated. If it lies beyond what we were
        // able to read from the file, as indicated by `self.tail`, return
        // `None` to indicate the end of the file.
        let idx = self.pos + off;
        if idx < self.tail {
            Some(self.buf[idx] as char)
        } else {
            None
        }
    }

    /// Consume the next `amt` bytes of the input. All consumed bytes since the
    /// last `clear()` can be accessed via `slice()` or `to_string()`.
    fn consume(&mut self, amt: usize) {
        self.pos += amt
    }

    /// Clear the consumed bytes.
    fn clear(&mut self) {
        self.base = self.pos
    }

    /// Convert the consumed bytes to a String.
    fn to_string(&self) -> String {
        // TODO: Don't just unwrap, but handle the case where the input file is
        // not valid UTF8.
        String::from_utf8(self.slice().to_vec()).unwrap()
    }
}

pub struct StringReader<'tdata> {
    // data: &'tdata str,
    iter: Box<dyn Iterator<Item = char> + 'tdata>,
    buf: Vec<char>,
    base: usize,
    pos: usize,
    tail: usize,
}

impl<'tdata> StringReader<'tdata> {
    pub fn new(data: &'tdata str) -> StringReader<'tdata> {
        StringReader {
            // data: data,
            iter: Box::new(data.chars()),
            buf: Vec::new(),
            base: 0,
            pos: 0,
            tail: 0,
        }
    }

    fn refill(&mut self, mut min_len: usize) {
        // Move the buffer contents to the beginning to make more space at the
        // end.
        if self.base > 0 {
            for i in 0..(self.tail - self.base) {
                self.buf[i] = self.buf[self.base + i]
            }
            self.pos -= self.base;
            self.tail -= self.base;
            min_len -= self.base;
            self.base = 0;
        }
        assert!(self.base == 0);

        // Increase the buffer size to make sure the requested min_len can be
        // accomodated.
        let cur_len = self.buf.len();
        if min_len > cur_len {
            let new_len = max(cur_len * 2, 32);
            self.buf.resize(new_len, 0 as char);
        }
        assert!(self.buf.len() >= min_len);

        // Keep reading data until at least min_len bytes are in the buffer.
        // Note that this will usually fill in the entire buffer.
        while min_len > self.tail {
            match self.iter.next() {
                Some(c) => {
                    self.buf[self.tail] = c;
                    self.tail += 1;
                }
                None => break,
            }
        }
    }
}

impl<'tdata> Reader for StringReader<'tdata> {
    fn peek(&mut self, offset: usize) -> Option<char> {
        // If the requested offset lies outside the chunk of characters that has
        // been parsed, refill the buffer first.
        let idx = self.pos + offset;
        if idx >= self.tail {
            self.refill(idx + 1)
        }

        // The previous call to refill may move things around in the buffer, so
        // the index needs to be recalculated. If it lies beyond what we were
        // able to parse from the string, as indicated by `self.tail`, return
        // `None` to indicate the end of the file.
        let idx = self.pos + offset;
        if idx < self.tail {
            Some(self.buf[idx])
        } else {
            None
        }
    }

    fn consume(&mut self, amount: usize) {
        self.pos += amount;
    }

    fn clear(&mut self) {
        self.base = self.pos;
    }

    fn to_string(&self) -> String {
        let mut s = String::new();
        for c in self.buf[self.base..self.pos].iter() {
            s.push(*c);
        }
        s
    }
}

/// A lexer chaining the tokens of multiple other lexers after another. Lexers
/// can be pushed onto an internal stack and will then be queried for tokens
/// until their respective Eof has been reached. At that point, the lexer is
/// popped off the stack and the next lexer's tokens are produced. An Eof is
/// returned once all lexers have been drained.
pub struct StackedLexer<T> {
    stack: Vec<Box<dyn Lexer<T>>>,
    eof: T,
}

impl<T> StackedLexer<T> {
    pub fn new(eof: T) -> StackedLexer<T> {
        StackedLexer {
            stack: Vec::new(),
            eof: eof,
        }
    }

    pub fn push(&mut self, lexer: Box<dyn Lexer<T>>) {
        self.stack.push(lexer);
    }
}

impl<T: Clone + PartialEq> Lexer<T> for StackedLexer<T> {
    fn next_token<'td>(&mut self) -> DiagResult<'td, T> {
        loop {
            let token = if let Some(lexer) = self.stack.last_mut() {
                lexer.next_token()?
            } else {
                return Ok(self.eof.clone());
            };
            if token == self.eof {
                self.stack.pop();
                continue;
            } else {
                return Ok(token);
            }
        }
    }
}

/// A buffered lexer that allows tokens to be peeked at before they are actually
/// consumed.
pub struct BufferedLexer<T> {
    inner: Box<dyn Lexer<T>>,
    eof: T,
    buffer: VecDeque<T>,
    done: bool,
}

impl<T: Clone + PartialEq> BufferedLexer<T> {
    /// Create a new buffered lexer.
    pub fn new(inner: Box<dyn Lexer<T>>, eof: T) -> BufferedLexer<T> {
        BufferedLexer {
            inner: inner,
            eof: eof,
            buffer: VecDeque::new(),
            done: false,
        }
    }

    /// Peek at a token not yet consumed. This function merely returns a
    /// reference to said token. Use `pop()` to advance the lexer.
    pub fn peek<'td>(&mut self, offset: usize) -> DiagResult<'td, &T> {
        // Fill the buffer up to the offset that should be peeked at. If an Eof is
        // encountered beforehand, set the `done` flag and abort.
        while !self.done && self.buffer.len() <= offset {
            let token = self.inner.next_token()?;
            if token == self.eof {
                self.done = true;
            }
            self.buffer.push_back(token);
        }

        // Return the token at the requested offset, or the Eof token if the
        // offset lies beyond the end of the stream.
        Ok(self.buffer.get(offset).unwrap_or(&self.eof))
        // if offset < self.buffer.len() {
        // } else {
        //  Ok(self.eof.clone())
        // }
    }

    /// Insert a token in front of the stream such that it becomes the next
    /// token to be returned from `peek(0)` or `pop()`.
    #[allow(unused_variables)]
    pub fn push(&mut self, token: T) {
        unimplemented!();
    }

    /// Consume and return the current token. This is the same token that would
    /// be returned by `peek(0)`.
    pub fn pop<'td>(&mut self) -> DiagResult<'td, T> {
        if self.buffer.is_empty() && !self.done {
            self.inner.next_token()
        } else {
            Ok(self
                .buffer
                .pop_front()
                .expect("pop() called on drained BufferedLexer"))
        }
    }

    pub fn inner(&self) -> &dyn Lexer<T> {
        &*self.inner
    }

    pub fn inner_mut(&mut self) -> &mut dyn Lexer<T> {
        &mut *self.inner
    }
}