use std::fmt;
type ParseResult<'a, T> = std::result::Result<(T, &'a str), &'a str>;
pub fn word(input: &str) -> ParseResult<&str> {
let end_pos = input
.char_indices()
.find_map(|(i, c)| is_whitespace(c).then_some(i))
.unwrap_or(input.len());
let next_pos = input[end_pos..]
.char_indices()
.find_map(|(i, c)| (!is_whitespace(c)).then_some(end_pos + i))
.unwrap_or(input.len());
debug_assert!(next_pos >= end_pos);
if end_pos > 0 {
Ok((&input[0..end_pos], &input[next_pos..]))
} else {
Err(input)
}
}
pub fn words(input: &str) -> (Vec<&str>, &str) {
let mut ret = Vec::new();
let mut current = input;
while let Ok((word, rest)) = word(current) {
current = rest;
ret.push(word);
}
(ret, current)
}
pub fn line(input: &str) -> ParseResult<&str> {
input
.char_indices()
.find_map(|(i, c)| {
if c == '\n' {
Some(Ok((&input[..i], &input[i + 1..])))
} else {
None
}
})
.unwrap_or(Ok((input, "")))
}
pub fn indent(input: &str) -> ParseResult<Indent> {
let mut pos = input;
loop {
match line_indent(pos)? {
(Some(prefix), rest) => return Ok((prefix, rest)),
(None, rest) => {
debug_assert!(rest.len() < pos.len());
pos = rest;
}
}
}
}
fn line_indent(input: &str) -> ParseResult<Option<Indent>> {
let mut indent = Indent::default();
for (i, c) in input.char_indices() {
if !is_whitespace(c) {
return Ok((Some(indent), &input[i..]));
}
if c == '\n' {
return Ok((None, &input[i + 1..]));
}
if let Some(i2) = indent.increment(c) {
indent = i2;
} else {
return Err(input);
}
}
Ok((Some(Indent::default()), ""))
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum Indent {
Undetermined,
Spaces(usize),
Tabs(usize),
}
impl Default for Indent {
fn default() -> Self {
Indent::Undetermined
}
}
impl Indent {
pub fn increment(self, c: char) -> Option<Indent> {
use Indent::*;
match self {
Undetermined if c == ' ' => Some(Spaces(1)),
Undetermined if c == '\t' => Some(Tabs(1)),
Spaces(n) if c == ' ' => Some(Spaces(n + 1)),
Tabs(n) if c == '\t' => Some(Tabs(n + 1)),
_ => None,
}
}
pub fn as_char(self) -> char {
match self {
Indent::Undetermined => '\0',
Indent::Spaces(_) => ' ',
Indent::Tabs(_) => '\t',
}
}
pub fn len(self) -> usize {
match self {
Indent::Undetermined => 0,
Indent::Spaces(n) | Indent::Tabs(n) => n,
}
}
pub fn compatible_with(self, other: Indent) -> bool {
!matches!(
(self, other),
(Indent::Tabs(_), Indent::Spaces(_))
| (Indent::Spaces(_), Indent::Tabs(_))
)
}
}
impl std::ops::Add for Indent {
type Output = Indent;
fn add(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Indent::Undetermined, Indent::Undetermined) => {
Indent::Undetermined
}
(Indent::Undetermined, Indent::Spaces(a)) => Indent::Spaces(a),
(Indent::Spaces(a), Indent::Undetermined) => Indent::Spaces(a),
(Indent::Undetermined, Indent::Tabs(a)) => Indent::Tabs(a),
(Indent::Tabs(a), Indent::Undetermined) => Indent::Tabs(a),
(Indent::Tabs(a), Indent::Tabs(b)) => Indent::Tabs(a + b),
(Indent::Spaces(a), Indent::Spaces(b)) => Indent::Spaces(a + b),
_ => panic!("Invalid indent addition"),
}
}
}
impl std::ops::Sub for Indent {
type Output = Indent;
fn sub(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Indent::Undetermined, Indent::Undetermined) => {
Indent::Undetermined
}
(Indent::Spaces(a), Indent::Undetermined) => Indent::Spaces(a),
(Indent::Tabs(a), Indent::Undetermined) => Indent::Tabs(a),
(Indent::Tabs(a), Indent::Tabs(b)) if b <= a => Indent::Tabs(a - b),
(Indent::Spaces(a), Indent::Spaces(b)) if b <= a => {
Indent::Spaces(a - b)
}
_ => panic!("Invalid indent subtraction"),
}
}
}
impl fmt::Display for Indent {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Indent::*;
let (c, n) = match self {
Undetermined => ('\0', 0),
Spaces(n) | Tabs(n) => (self.as_char(), *n),
};
for _ in 0..n {
write!(f, "{}", c)?;
}
Ok(())
}
}
pub fn is_whitespace(c: char) -> bool {
c == ' ' || c == '\t' || c == '\n'
}
#[cfg(test)]
mod tests {
use crate as idm;
use crate::de::parse::{self, Indent};
#[test]
fn test_whitespace() {
assert!(idm::is_whitespace(' '));
assert!(idm::is_whitespace('\t'));
assert!('\u{00a0}'.is_whitespace());
assert!(!idm::is_whitespace('\u{00a0}'));
}
#[test]
fn test_indent_parse() {
assert_eq!(parse::indent(""), Ok((Indent::default(), "")));
assert_eq!(parse::indent(" xyzzy"), Ok((Indent::Spaces(2), "xyzzy")));
assert_eq!(parse::indent("\txyzzy"), Ok((Indent::Tabs(1), "xyzzy")));
assert!(parse::indent(" \txyzzy").is_err());
assert!(parse::indent("\t xyzzy").is_err());
assert_eq!(
parse::indent("\n xyzzy"),
Ok((Indent::Spaces(2), "xyzzy"))
);
assert_eq!(
parse::indent(" \n xyzzy"),
Ok((Indent::Spaces(2), "xyzzy"))
);
assert_eq!(parse::indent(" \n "), Ok((Indent::default(), "")));
}
#[test]
fn test_word() {
assert!(parse::word("").is_err());
assert!(parse::word(" ").is_err());
assert!(parse::word(" \n ").is_err());
assert_eq!(parse::word("a"), Ok(("a", "")));
assert_eq!(parse::word("a "), Ok(("a", "")));
assert_eq!(parse::word("a b "), Ok(("a", "b ")));
assert_eq!(parse::word("a\nb"), Ok(("a", "b")));
}
}