use std::{collections::HashMap, str::CharIndices};
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
pub struct Substr {
pub start: usize,
pub end: usize,
}
impl Substr {
pub fn extract_str<'a>(&'_ self, relative_to: &'a str) -> &'a str {
&relative_to[self.start..self.end]
}
}
pub struct Interner<'a>(HashMap<&'a str, Substr>);
impl<'a> Interner<'a> {
pub fn new() -> Self {
Self(HashMap::new())
}
pub fn intern(&mut self, str: &'a str, substr: Substr) -> Substr {
*self
.0
.entry(&str[substr.start..substr.end])
.or_insert(substr)
}
}
pub struct WhitespaceSplitIterator<'a> {
underlying: CharIndices<'a>,
}
impl<'a> WhitespaceSplitIterator<'a> {
pub fn new(s: &'a str) -> Self {
Self {
underlying: s.char_indices(),
}
}
}
impl Iterator for WhitespaceSplitIterator<'_> {
type Item = Substr;
fn next(&mut self) -> Option<Self::Item> {
let start = loop {
let (pos, c) = self.underlying.next()?;
if !c.is_whitespace() {
break pos;
}
};
let end = loop {
let Some((pos, c)) = self.underlying.next() else {
break self.underlying.offset();
};
if c.is_whitespace() {
break pos;
}
};
Some(Substr { start, end })
}
}
#[cfg(test)]
mod tests {
use super::*;
fn compare_same(s: &str) {
let substrs = WhitespaceSplitIterator::new(s)
.map(|ss| ss.extract_str(s))
.collect::<Vec<_>>();
let std_split = s.split_whitespace().collect::<Vec<_>>();
assert_eq!(substrs, std_split);
}
#[test]
fn splits_simple_whitespace() {
compare_same("hello there world");
}
#[test]
fn multiple_interior_whitespace() {
compare_same("hello\t\t\tthere world");
}
#[test]
fn leading_whitespace() {
compare_same(" hello there world");
}
#[test]
fn trailing_whitespace() {
compare_same(" hello there world");
}
}