use std::sync::Arc;
pub(crate) struct InputStream {
input: Arc<String>,
pos: usize,
eof: bool,
current_codepoint_size: u8,
checkpoints: Vec<Checkpoint>,
}
struct Checkpoint {
current_codepoint_size: u8,
byte_offset: usize,
eof: bool,
}
impl InputStream {
pub(crate) fn new(input: &str) -> InputStream {
let normalized = input.replace("\r\n", "\n").replace("\r", "\n");
InputStream {
input: Arc::new(normalized),
pos: 0,
eof: false,
current_codepoint_size: 0,
checkpoints: Vec::new(),
}
}
pub(crate) fn byte_offset(&self) -> usize {
self.pos
}
pub(crate) fn lookahead(&mut self, n: usize) -> &str {
debug_assert!(n > 0, "lookahead must be greater than 0");
let mut offset = 0;
for codepoint in self.remaining().chars().take(n) {
offset += codepoint.len_utf8();
}
let peeked = &self.remaining()[..offset];
if peeked.chars().count() < n {
return "";
}
peeked
}
pub(crate) fn advance(&mut self, step: usize) {
for _ in 0..step {
let _ = self.consume();
}
}
pub(crate) fn peek(&self) -> Option<char> {
self.remaining().chars().next()
}
pub(crate) fn reconsume(&mut self) {
if self.pos == 0 {
return;
};
if self.eof {
return;
}
if self.current_codepoint_size == 0 {
panic!("can't reconsume twice in a row")
}
self.pos -= self.current_codepoint_size as usize;
self.current_codepoint_size = 0;
}
pub(crate) fn consume(&mut self) -> Option<char> {
let Some(next) = self.remaining().chars().next() else {
self.eof = true;
self.current_codepoint_size = 0;
return None;
};
self.pos += next.len_utf8();
self.current_codepoint_size = next.len_utf8() as u8;
Some(next)
}
pub(crate) fn mark(&mut self) {
self.checkpoints.push(Checkpoint {
byte_offset: self.pos,
current_codepoint_size: self.current_codepoint_size,
eof: self.eof,
});
}
pub(crate) fn rewind(&mut self) {
let Some(checkpoint) = self.checkpoints.pop() else {
panic!("can't rewind input stream without available checkpoints")
};
self.eof = checkpoint.eof;
self.pos = checkpoint.byte_offset;
self.current_codepoint_size = checkpoint.current_codepoint_size;
}
pub(crate) fn get_input(&self) -> Arc<String> {
self.input.clone()
}
fn remaining(&self) -> &str {
&self.input[self.pos..]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn consuming_an_empty_stream() {
let mut stream = InputStream::new("");
assert_eq!(stream.consume(), None);
assert_eq!(stream.consume(), None);
stream.reconsume();
assert_eq!(stream.consume(), None);
}
#[test]
fn reconsuming() {
let mut stream = InputStream::new("abc");
assert_eq!(stream.consume(), Some('a'));
assert_eq!(stream.consume(), Some('b'));
assert_eq!(stream.consume(), Some('c'));
stream.reconsume();
assert_eq!(stream.consume(), Some('c'));
assert_eq!(stream.consume(), None);
}
#[test]
fn reconsuming_before_anything_has_been_consumed() {
let mut stream = InputStream::new("abc");
stream.reconsume();
stream.reconsume();
assert_eq!(stream.consume(), Some('a'));
}
#[test]
fn can_reconsume_twice_after_exactly_one_call_to_consume() {
let mut stream = InputStream::new("abc");
assert_eq!(stream.consume(), Some('a'));
stream.reconsume();
stream.reconsume();
assert_eq!(stream.consume(), Some('a'));
assert_eq!(stream.consume(), Some('b'));
}
#[test]
fn reconsuming_after_end_of_stream_has_been_reached() {
let mut s = InputStream::new("a");
assert_eq!(s.consume(), Some('a'));
assert_eq!(s.consume(), None);
s.reconsume();
s.reconsume();
s.reconsume();
assert_eq!(s.consume(), None);
}
#[test]
#[should_panic = "can't reconsume twice in a row"]
fn reconsuming_twice_in_row_in_the_middle_of_the_input() {
let mut s = InputStream::new("ab");
assert_eq!(s.consume(), Some('a'));
assert_eq!(s.consume(), Some('b'));
s.reconsume();
s.reconsume();
}
#[test]
fn lookahead_returns_none_when_there_is_not_enough_bytes() {
let mut s = InputStream::new("🙈🙉🙊");
assert_eq!(s.lookahead(3), "🙈🙉🙊");
dbg!(s.consume().unwrap());
assert_eq!(s.lookahead(3), "");
assert_eq!(s.lookahead(2), "🙉🙊");
s.consume().unwrap();
assert_eq!(s.lookahead(2), "");
assert_eq!(s.lookahead(1), "🙊");
s.consume().unwrap();
assert_eq!(s.lookahead(1), "");
}
#[test]
fn rewinding_to_the_start_of_the_stream() {
let mut s = InputStream::new("abc");
s.mark();
assert_eq!(s.consume(), Some('a'));
assert_eq!(s.consume(), Some('b'));
assert_eq!(s.consume(), Some('c'));
assert_eq!(s.consume(), None);
s.rewind();
assert_eq!(s.consume(), Some('a'));
assert_eq!(s.consume(), Some('b'));
assert_eq!(s.consume(), Some('c'));
assert_eq!(s.consume(), None);
}
#[test]
fn rewinding_after_reconsuming() {
let mut s = InputStream::new("abc");
assert_eq!(s.consume(), Some('a'));
assert_eq!(s.consume(), Some('b'));
s.reconsume();
s.mark();
assert_eq!(s.consume(), Some('b'));
assert_eq!(s.consume(), Some('c'));
assert_eq!(s.consume(), None);
s.rewind();
assert_eq!(s.consume(), Some('b'));
assert_eq!(s.consume(), Some('c'));
assert_eq!(s.consume(), None);
}
}