use serde::{Deserialize, Serialize};
use enum_as_inner::EnumAsInner;
use schemars::JsonSchema;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct Tokens(pub Vec<Token>);
#[derive(Clone, PartialEq, Serialize, Deserialize, Eq, JsonSchema)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
}
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize, JsonSchema)]
pub enum TokenKind {
NewLine,
Ident(String),
Keyword(String),
#[cfg_attr(
feature = "serde_yaml",
serde(with = "serde_yaml::with::singleton_map"),
schemars(with = "Literal")
)]
Literal(Literal),
Param(String),
Range {
bind_left: bool,
bind_right: bool,
},
Interpolation(char, String),
Control(char),
ArrowThin, ArrowFat, Eq, Ne, Gte, Lte, RegexSearch, And, Or, Coalesce, DivInt, Pow, Annotate,
Comment(String),
DocComment(String),
LineWrap(Vec<TokenKind>),
Start,
}
#[derive(
Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr, JsonSchema,
)]
pub enum Literal {
Null,
Integer(i64),
Float(f64),
Boolean(bool),
String(String),
RawString(String),
Date(String),
Time(String),
Timestamp(String),
ValueAndUnit(ValueAndUnit),
}
impl TokenKind {
pub fn range(bind_left: bool, bind_right: bool) -> Self {
TokenKind::Range {
bind_left,
bind_right,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub struct ValueAndUnit {
pub n: i64, pub unit: String, }
impl std::fmt::Display for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Literal::Null => write!(f, "null")?,
Literal::Integer(i) => write!(f, "{i}")?,
Literal::Float(i) => write!(f, "{i}")?,
Literal::String(s) => {
write!(f, "{}", quote_string(escape_all_except_quotes(s).as_str()))?;
}
Literal::RawString(s) => {
write!(f, "r{}", quote_string(s))?;
}
Literal::Boolean(b) => {
f.write_str(if *b { "true" } else { "false" })?;
}
Literal::Date(inner) | Literal::Time(inner) | Literal::Timestamp(inner) => {
write!(f, "@{inner}")?;
}
Literal::ValueAndUnit(i) => {
write!(f, "{}{}", i.n, i.unit)?;
}
}
Ok(())
}
}
fn quote_string(s: &str) -> String {
if !s.contains('"') {
return format!(r#""{s}""#);
}
if !s.contains('\'') {
return format!("'{s}'");
}
let quote = if s.starts_with('"') || s.ends_with('"') {
'\''
} else {
'"'
};
let max_consecutive = s
.split(|c| c != quote)
.map(|quote_sequence| quote_sequence.len())
.max()
.unwrap_or(0);
let next_odd = max_consecutive.div_ceil(2) * 2 + 1;
let delim = quote.to_string().repeat(next_odd);
format!("{delim}{s}{delim}")
}
fn escape_all_except_quotes(s: &str) -> String {
let mut result = String::new();
for ch in s.chars() {
if ch == '"' || ch == '\'' {
result.push(ch);
} else {
result.extend(ch.escape_default());
}
}
result
}
#[allow(clippy::derived_hash_with_manual_eq)]
impl std::hash::Hash for TokenKind {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
}
}
impl std::cmp::Eq for TokenKind {}
impl std::fmt::Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::NewLine => write!(f, "new line"),
TokenKind::Ident(s) => {
if s.is_empty() {
write!(f, "an identifier")
} else {
write!(f, "{s}")
}
}
TokenKind::Keyword(s) => write!(f, "keyword {s}"),
TokenKind::Literal(lit) => write!(f, "{lit}"),
TokenKind::Control(c) => write!(f, "{c}"),
TokenKind::ArrowThin => f.write_str("->"),
TokenKind::ArrowFat => f.write_str("=>"),
TokenKind::Eq => f.write_str("=="),
TokenKind::Ne => f.write_str("!="),
TokenKind::Gte => f.write_str(">="),
TokenKind::Lte => f.write_str("<="),
TokenKind::RegexSearch => f.write_str("~="),
TokenKind::And => f.write_str("&&"),
TokenKind::Or => f.write_str("||"),
TokenKind::Coalesce => f.write_str("??"),
TokenKind::DivInt => f.write_str("//"),
TokenKind::Pow => f.write_str("**"),
TokenKind::Annotate => f.write_str("@{"),
TokenKind::Param(id) => write!(f, "${id}"),
TokenKind::Range {
bind_left,
bind_right,
} => write!(
f,
"'{}..{}'",
if *bind_left { "" } else { " " },
if *bind_right { "" } else { " " }
),
TokenKind::Interpolation(c, s) => {
write!(f, "{c}\"{s}\"")
}
TokenKind::Comment(s) => {
writeln!(f, "#{s}")
}
TokenKind::DocComment(s) => {
writeln!(f, "#!{s}")
}
TokenKind::LineWrap(comments) => {
write!(f, "\n\\ ")?;
for comment in comments {
write!(f, "{comment}")?;
}
Ok(())
}
TokenKind::Start => write!(f, "start of input"),
}
}
}
impl std::fmt::Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}..{}: {:?}", self.span.start, self.span.end, self.kind)
}
}
#[cfg(test)]
mod test {
use insta::assert_snapshot;
use super::*;
#[test]
fn test_string_quoting() {
fn make_str(s: &str) -> Literal {
Literal::String(s.to_string())
}
assert_snapshot!(
make_str("hello").to_string(),
@r#""hello""#
);
assert_snapshot!(
make_str(r#"he's nice"#).to_string(),
@r#""he's nice""#
);
assert_snapshot!(
make_str(r#"he said "what up""#).to_string(),
@r#"'he said "what up"'"#
);
assert_snapshot!(
make_str(r#"he said "what's up""#).to_string(),
@r#"'''he said "what's up"'''"#
);
assert_snapshot!(
make_str(r#" single' three double""" four double"""" "#).to_string(),
@r#"""""" single' three double""" four double"""" """"""#
);
assert_snapshot!(
make_str(r#""Starts with a double quote and ' contains a single quote"#).to_string(),
@r#"'''"Starts with a double quote and ' contains a single quote'''"#
);
}
#[test]
fn test_string_escapes() {
assert_snapshot!(
Literal::String(r#"hello\nworld"#.to_string()).to_string(),
@r#""hello\\nworld""#
);
assert_snapshot!(
Literal::String(r#"hello\tworld"#.to_string()).to_string(),
@r#""hello\\tworld""#
);
assert_snapshot!(
Literal::String(r#"hello
world"#.to_string()).to_string(),
@r#""hello\n world""#
);
}
#[test]
fn test_raw_string_quoting() {
fn make_str(s: &str) -> Literal {
Literal::RawString(s.to_string())
}
assert_snapshot!(
make_str("hello").to_string(),
@r#"r"hello""#
);
}
}