use logos::{Lexer, Logos};
#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Escape {
#[token(r#"\b"#)]
Backspace,
#[token(r#"\t"#)]
Tab,
#[regex(r#"(\\\s*\n)|(\\\s*\r\n)"#)]
Newline,
#[token(r#"\n"#)]
LineFeed,
#[token(r#"\f"#)]
FormFeed,
#[token(r#"\r"#)]
CarriageReturn,
#[token(r#"\""#)]
Quote,
#[token(r#"\\"#)]
Backslash,
#[regex(r#"\\u[0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_]"#)]
Unicode,
#[regex(r#"\\U[0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_]"#)]
UnicodeLarge,
#[regex(r#"\\."#)]
Unknown,
#[error]
UnEscaped,
}
use Escape::*;
pub fn escape(s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
for c in s.chars() {
match c {
'\u{0008}' => escaped.push_str(r#"\b"#),
'\u{0009}' => escaped.push_str(r#"\t"#),
'\u{000A}' => escaped.push_str(r#"\n"#),
'\u{000C}' => escaped.push_str(r#"\f"#),
'\u{000D}' => escaped.push_str(r#"\r"#),
'\u{0022}' => escaped.push_str(r#"\""#),
'\u{005C}' => escaped.push_str(r#"\\"#),
_ => {
escaped.push(c);
}
}
}
escaped
}
pub fn unescape(s: &str) -> Result<String, usize> {
let mut new_s = String::with_capacity(s.len());
let mut lexer: Lexer<Escape> = Lexer::new(s);
while let Some(t) = lexer.next() {
match t {
Backspace => new_s += "\u{0008}",
Tab => new_s += "\u{0009}",
LineFeed => new_s += "\u{000A}",
FormFeed => new_s += "\u{000C}",
CarriageReturn => new_s += "\u{000D}",
Quote => new_s += "\u{0022}",
Backslash => new_s += "\u{005C}",
Newline => {}
Unicode => {
new_s += &std::char::from_u32(
u32::from_str_radix(&lexer.slice()[2..], 16).map_err(|_| lexer.span().start)?,
)
.ok_or(lexer.span().start)?
.to_string();
}
UnicodeLarge => {
new_s += &std::char::from_u32(
u32::from_str_radix(&lexer.slice()[2..], 16).map_err(|_| lexer.span().start)?,
)
.ok_or(lexer.span().start)?
.to_string();
}
Unknown => return Err(lexer.span().end),
UnEscaped => {
new_s += lexer.slice();
}
}
}
Ok(new_s + lexer.remainder())
}
pub fn check_escape(s: &str) -> Result<(), Vec<usize>> {
let mut lexer: Lexer<Escape> = Lexer::new(s);
let mut invalid = Vec::new();
while let Some(t) = lexer.next() {
match t {
Backspace => {}
Tab => {}
LineFeed => {}
FormFeed => {}
CarriageReturn => {}
Quote => {}
Backslash => {}
Newline => {}
Unicode => {
let char_val = match u32::from_str_radix(&lexer.slice()[2..], 16) {
Ok(v) => v,
Err(_) => {
invalid.push(lexer.span().start);
continue;
}
};
match std::char::from_u32(char_val) {
None => {
invalid.push(lexer.span().start);
}
Some(_) => {}
};
}
UnicodeLarge => {
let char_val = match u32::from_str_radix(&lexer.slice()[2..], 16) {
Ok(v) => v,
Err(_) => {
invalid.push(lexer.span().start);
continue;
}
};
match std::char::from_u32(char_val) {
None => {
invalid.push(lexer.span().start);
}
Some(_) => {}
};
}
Unknown => invalid.push(lexer.span().start),
UnEscaped => {}
}
}
if invalid.is_empty() {
Ok(())
} else {
Err(invalid)
}
}