#![no_std]
#![doc(html_root_url = "https://docs.rs/uwl/*")]
#![deny(rust_2018_idioms)]
#![cfg_attr(test, feature(test))]
#[derive(Debug, Default, Clone, Copy)]
pub struct Stream<'a> {
src: &'a [u8],
offset: usize,
}
impl<'a> Stream<'a> {
#[inline]
pub fn new(src: &'a str) -> Self {
Self {
src: src.as_bytes(),
offset: 0,
}
}
#[inline]
pub fn offset(&self) -> usize {
self.offset
}
#[inline]
pub fn source(&self) -> &'a str {
unsafe { core::str::from_utf8_unchecked(self.src) }
}
#[inline]
pub fn len(&self) -> usize {
self.src.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.offset >= self.len()
}
#[inline]
pub fn rest(&self) -> &'a str {
&self.source()[self.offset..]
}
#[inline]
pub fn peek(&self, amount: usize) -> Option<u8> {
self.src.get(self.offset + amount).copied()
}
#[inline]
pub fn peek_char(&self, amount: usize) -> Option<char> {
self.source()[self.offset..].chars().nth(amount)
}
#[inline]
pub fn current(&self) -> Option<u8> {
self.peek(0)
}
#[inline]
pub fn current_char(&self) -> Option<char> {
self.peek_char(0)
}
#[inline]
pub fn next(&mut self) -> Option<u8> {
self.current().map(|c| {
self.offset += 1;
c
})
}
#[inline]
pub fn next_char(&mut self) -> Option<char> {
self.current_char().map(|c| {
self.offset += c.len_utf8();
c
})
}
#[inline]
pub fn peek_while(&self, mut f: impl FnMut(u8) -> bool) -> &'a str {
if self.is_empty() {
return "";
}
let src = self.src;
let start = self.offset;
let mut end = start;
while let Some(b) = src.get(end) {
if !f(*b) {
break;
}
end += 1;
}
&self.source()[start..end]
}
#[inline]
pub fn peek_while_char(&self, mut f: impl FnMut(char) -> bool) -> &'a str {
if self.is_empty() {
return "";
}
let src = self.rest();
let mut end = 0;
for c in src.chars() {
if !f(c) {
break;
}
end += c.len_utf8();
}
&src[..end]
}
#[inline]
pub fn peek_until(&self, mut f: impl FnMut(u8) -> bool) -> &'a str {
self.peek_while(|c| !f(c))
}
#[inline]
pub fn peek_until_char(&self, mut f: impl FnMut(char) -> bool) -> &'a str {
self.peek_while_char(|c| !f(c))
}
pub fn take_while(&mut self, f: impl FnMut(u8) -> bool) -> &'a str {
let s = self.peek_while(f);
self.offset += s.len();
s
}
pub fn take_while_char(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
let s = self.peek_while_char(f);
self.offset += s.len();
s
}
#[inline]
pub fn take_until(&mut self, mut f: impl FnMut(u8) -> bool) -> &'a str {
self.take_while(|c| !f(c))
}
#[inline]
pub fn take_until_char(&mut self, mut f: impl FnMut(char) -> bool) -> &'a str {
self.take_while_char(|c| !f(c))
}
#[inline]
pub fn peek_for(&self, amount: usize) -> &'a str {
let src = self.rest();
if src.len() <= amount {
return src;
}
&src[..amount]
}
#[inline]
pub fn peek_for_char(&self, amount: usize) -> &'a str {
let src = self.rest();
let end = src
.chars()
.take(amount)
.fold(0, |acc, c| acc + c.len_utf8());
if src.len() <= end {
return src;
}
&src[..end]
}
#[inline]
pub fn advance(&mut self, amount: usize) -> &'a str {
let s = self.peek_for(amount);
self.offset += s.len();
s
}
#[inline]
pub fn advance_char(&mut self, amount: usize) -> &'a str {
let s = self.peek_for_char(amount);
self.offset += s.len();
s
}
#[inline]
pub fn eat(&mut self, m: &str) -> bool {
let s = self.peek_for_char(m.chars().count());
if s == m {
self.offset += s.len();
true
} else {
false
}
}
#[inline]
pub fn set(&mut self, pos: usize) {
self.offset = pos;
}
#[inline]
pub fn increment(&mut self, amount: usize) {
self.offset += amount;
}
}
#[cfg(test)]
mod tests {
use super::*;
extern crate alloc;
extern crate test;
use alloc::vec::Vec;
#[test]
fn all_chars() {
const STRING: &str = "hello a b c ! ?👀👁!!!";
let mut v = Vec::with_capacity(STRING.len());
let mut s = Stream::new(STRING);
while let Some(c) = s.next() {
v.push(c);
}
assert_eq!(v[0], b'h');
assert_eq!(v[1], b'e');
assert_eq!(v[2], b'l');
assert_eq!(v[3], b'l');
assert_eq!(v[4], b'o');
assert_eq!(v[5], b' ');
assert_eq!(v[6], b'a');
assert_eq!(v[7], b' ');
assert_eq!(v[8], b'b');
assert_eq!(v[9], b' ');
assert_eq!(v[10], b'c');
assert_eq!(v[11], b' ');
assert_eq!(v[12], b'!');
assert_eq!(v[13], b' ');
assert_eq!(v[14], b'?');
assert_eq!(&v[15..19], "👀".as_bytes());
assert_eq!(&v[19..23], "👁".as_bytes());
assert_eq!(v[23], b'!');
assert_eq!(v[24], b'!');
assert_eq!(v[25], b'!');
assert_eq!(v.len(), 26);
assert_eq!(v.get(26), None);
assert_eq!(core::str::from_utf8(&v), Ok(STRING));
}
#[bench]
fn lang(b: &mut test::Bencher) {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TokenKind {
Illegal,
Identifier,
Number,
Paren,
CParen,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct Token<'a> {
kind: TokenKind,
lit: &'a str,
}
impl<'a> Token<'a> {
fn new(kind: TokenKind, lit: &'a str) -> Self {
Self { kind, lit }
}
}
return b.iter(|| {
const SRC: &str = "(abc foo bar) ()() 1 2 3 4 5 6 7 8 9";
let mut stream = Stream::new(SRC);
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Paren, "(")));
assert_eq!(
lit(&mut stream),
Some(Token::new(TokenKind::Identifier, "abc"))
);
assert_eq!(
lit(&mut stream),
Some(Token::new(TokenKind::Identifier, "foo"))
);
assert_eq!(
lit(&mut stream),
Some(Token::new(TokenKind::Identifier, "bar"))
);
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::CParen, ")")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Paren, "(")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::CParen, ")")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Paren, "(")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::CParen, ")")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "1")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "2")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "3")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "4")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "5")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "6")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "7")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "8")));
assert_eq!(lit(&mut stream), Some(Token::new(TokenKind::Number, "9")));
assert_eq!(lit(&mut stream), None);
});
fn is_ident_start(b: u8) -> bool {
b == b'_' || b.is_ascii_alphabetic()
}
fn is_ident_continue(b: u8) -> bool {
is_ident_start(b) || b.is_ascii_digit()
}
fn lit<'a>(s: &mut Stream<'a>) -> Option<Token<'a>> {
let b = s.current()?;
if b.is_ascii_whitespace() {
s.take_while(|b| b.is_ascii_whitespace());
return lit(s);
}
if b.is_ascii_digit() {
let lit = s.take_while(|b| b.is_ascii_digit());
return Some(Token::new(TokenKind::Number, lit));
}
if is_ident_start(b) {
let lit = s.take_while(|b| is_ident_continue(b));
return Some(Token::new(TokenKind::Identifier, lit));
}
let kind = match b {
b'(' => TokenKind::Paren,
b')' => TokenKind::CParen,
_ => TokenKind::Illegal,
};
let lit = &s.rest()[..1];
s.next();
Some(Token::new(kind, lit))
}
}
}