pub enum EscapeAction {
Escape(char),
Literal,
}
pub fn unescape_inline_char(next: Option<char>) -> EscapeAction {
match next {
Some(ch) if !ch.is_alphanumeric() => EscapeAction::Escape(ch),
_ => EscapeAction::Literal,
}
}
pub fn unescape_inline(text: &str) -> String {
let chars: Vec<char> = text.chars().collect();
let mut result = String::with_capacity(text.len());
let mut i = 0;
while i < chars.len() {
if chars[i] == '\\' {
if let Some(&next) = chars.get(i + 1) {
if next.is_alphanumeric() {
result.push('\\');
i += 1;
} else {
result.push(next);
i += 2;
}
} else {
result.push('\\');
i += 1;
}
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
pub fn escape_inline(text: &str) -> String {
let mut result = String::with_capacity(text.len());
for ch in text.chars() {
if is_inline_special(ch) {
result.push('\\');
}
result.push(ch);
}
result
}
fn is_inline_special(ch: char) -> bool {
matches!(ch, '\\' | '*' | '_' | '`' | '#' | '[' | ']')
}
fn is_quote_escaped_by_prev_token(prev: Option<&crate::lex::token::Token>) -> bool {
use crate::lex::token::Token;
match prev {
Some(Token::Text(s)) => {
let trailing = s.bytes().rev().take_while(|&b| b == b'\\').count();
trailing % 2 == 1
}
_ => false,
}
}
pub fn find_structural_lex_markers(tokens: &[crate::lex::token::Token]) -> Vec<usize> {
use crate::lex::token::Token;
let mut markers = Vec::new();
let mut in_quotes = false;
for (i, token) in tokens.iter().enumerate() {
match token {
Token::Quote => {
if !is_quote_escaped_by_prev_token(if i > 0 { Some(&tokens[i - 1]) } else { None })
{
in_quotes = !in_quotes;
}
}
Token::LexMarker if !in_quotes => markers.push(i),
_ => {}
}
}
markers
}
pub fn find_structural_lex_marker_pairs<R>(tokens: &[(crate::lex::token::Token, R)]) -> Vec<usize> {
use crate::lex::token::Token;
let mut markers = Vec::new();
let mut in_quotes = false;
for (i, (token, _)) in tokens.iter().enumerate() {
match token {
Token::Quote => {
let prev = if i > 0 { Some(&tokens[i - 1].0) } else { None };
if !is_quote_escaped_by_prev_token(prev) {
in_quotes = !in_quotes;
}
}
Token::LexMarker if !in_quotes => markers.push(i),
_ => {}
}
}
markers
}
pub fn is_quote_escaped(source: &[u8], pos: usize) -> bool {
let mut backslash_count = 0;
let mut check = pos;
while check > 0 && source[check - 1] == b'\\' {
backslash_count += 1;
check -= 1;
}
backslash_count % 2 == 1
}
pub fn unescape_quoted(raw: &str) -> String {
let inner = if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
&raw[1..raw.len() - 1]
} else {
raw
};
let mut result = String::with_capacity(inner.len());
let chars: Vec<char> = inner.chars().collect();
let mut i = 0;
while i < chars.len() {
if chars[i] == '\\' {
if let Some(&next) = chars.get(i + 1) {
if next == '"' || next == '\\' {
result.push(next);
i += 2;
continue;
}
}
}
result.push(chars[i]);
i += 1;
}
result
}
pub fn escape_quoted(text: &str) -> String {
let mut result = String::with_capacity(text.len());
for ch in text.chars() {
if ch == '\\' || ch == '"' {
result.push('\\');
}
result.push(ch);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unescape_plain_text_unchanged() {
assert_eq!(unescape_inline("hello world"), "hello world");
}
#[test]
fn unescape_empty_string() {
assert_eq!(unescape_inline(""), "");
}
#[test]
fn unescape_asterisk() {
assert_eq!(unescape_inline("\\*literal\\*"), "*literal*");
}
#[test]
fn unescape_underscore() {
assert_eq!(unescape_inline("\\_not emphasis\\_"), "_not emphasis_");
}
#[test]
fn unescape_backtick() {
assert_eq!(unescape_inline("\\`not code\\`"), "`not code`");
}
#[test]
fn unescape_hash() {
assert_eq!(unescape_inline("\\#not math\\#"), "#not math#");
}
#[test]
fn unescape_brackets() {
assert_eq!(unescape_inline("\\[not a ref\\]"), "[not a ref]");
}
#[test]
fn unescape_backslash_before_alphanumeric_preserved() {
assert_eq!(unescape_inline("C:\\Users\\name"), "C:\\Users\\name");
}
#[test]
fn unescape_double_backslash() {
assert_eq!(unescape_inline("C:\\\\Users\\\\name"), "C:\\Users\\name");
}
#[test]
fn unescape_trailing_backslash() {
assert_eq!(unescape_inline("text\\"), "text\\");
}
#[test]
fn unescape_backslash_before_space() {
assert_eq!(unescape_inline("hello\\ world"), "hello world");
}
#[test]
fn unescape_backslash_before_punctuation() {
assert_eq!(unescape_inline("\\!\\?\\,\\."), "!?,.");
}
#[test]
fn unescape_multiple_consecutive_backslashes() {
assert_eq!(unescape_inline("\\\\\\\\"), "\\\\");
}
#[test]
fn unescape_triple_backslash_then_star() {
assert_eq!(unescape_inline("\\\\\\*"), "\\*");
}
#[test]
fn unescape_mixed_escaped_and_plain() {
assert_eq!(
unescape_inline("plain \\*escaped\\* plain"),
"plain *escaped* plain"
);
}
#[test]
fn unescape_backslash_before_digit_preserved() {
assert_eq!(unescape_inline("item\\1"), "item\\1");
}
#[test]
fn unescape_backslash_before_unicode_letter_preserved() {
assert_eq!(unescape_inline("path\\ñ"), "path\\ñ");
}
#[test]
fn unescape_backslash_before_non_ascii_symbol() {
assert_eq!(unescape_inline("\\→"), "→");
}
#[test]
fn escape_plain_text_unchanged() {
assert_eq!(escape_inline("hello world"), "hello world");
}
#[test]
fn escape_empty_string() {
assert_eq!(escape_inline(""), "");
}
#[test]
fn escape_special_chars() {
assert_eq!(escape_inline("*bold*"), "\\*bold\\*");
assert_eq!(escape_inline("_emph_"), "\\_emph\\_");
assert_eq!(escape_inline("`code`"), "\\`code\\`");
assert_eq!(escape_inline("#math#"), "\\#math\\#");
assert_eq!(escape_inline("[ref]"), "\\[ref\\]");
}
#[test]
fn escape_backslash() {
assert_eq!(escape_inline("C:\\Users"), "C:\\\\Users");
}
#[test]
fn roundtrip_plain_text() {
let original = "hello world";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn roundtrip_special_chars() {
let original = "*bold* and _emph_ and `code` and #math# and [ref]";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn roundtrip_backslashes() {
let original = "C:\\Users\\name";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn roundtrip_mixed() {
let original = "path\\file *bold* and \\more";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn unescape_quoted_simple() {
assert_eq!(unescape_quoted("\"Hello World\""), "Hello World");
}
#[test]
fn unescape_quoted_with_escaped_quote() {
assert_eq!(unescape_quoted("\"say \\\"hello\\\"\""), "say \"hello\"");
}
#[test]
fn unescape_quoted_with_escaped_backslash() {
assert_eq!(unescape_quoted("\"path\\\\to\""), "path\\to");
}
#[test]
fn unescape_quoted_escaped_backslash_before_quote() {
assert_eq!(unescape_quoted("\"end\\\\\""), "end\\");
}
#[test]
fn unescape_quoted_other_backslash_literal() {
assert_eq!(unescape_quoted("\"hello\\nworld\""), "hello\\nworld");
}
#[test]
fn unescape_quoted_empty() {
assert_eq!(unescape_quoted("\"\""), "");
}
#[test]
fn unescape_quoted_no_quotes() {
assert_eq!(unescape_quoted("simple"), "simple");
}
#[test]
fn escape_quoted_simple() {
assert_eq!(escape_quoted("Hello World"), "Hello World");
}
#[test]
fn escape_quoted_with_quote() {
assert_eq!(escape_quoted("say \"hello\""), "say \\\"hello\\\"");
}
#[test]
fn escape_quoted_with_backslash() {
assert_eq!(escape_quoted("path\\to"), "path\\\\to");
}
#[test]
fn escape_quoted_empty() {
assert_eq!(escape_quoted(""), "");
}
#[test]
fn roundtrip_quoted_simple() {
let original = "Hello World";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn roundtrip_quoted_with_quotes() {
let original = "say \"hello\" and \"bye\"";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn roundtrip_quoted_with_backslashes() {
let original = "C:\\Users\\name";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn roundtrip_quoted_with_both() {
let original = "path\\to \"file\"";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn is_quote_escaped_no_backslash() {
assert!(!is_quote_escaped(b"hello\"", 5));
}
#[test]
fn is_quote_escaped_single_backslash() {
assert!(is_quote_escaped(b"hello\\\"", 6));
}
#[test]
fn is_quote_escaped_double_backslash() {
assert!(!is_quote_escaped(b"hello\\\\\"", 7));
}
#[test]
fn is_quote_escaped_triple_backslash() {
assert!(is_quote_escaped(b"hello\\\\\\\"", 8));
}
#[test]
fn is_quote_escaped_at_start() {
assert!(!is_quote_escaped(b"\"", 0));
}
#[test]
fn structural_markers_no_quotes() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::LexMarker,
];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 4]);
}
#[test]
fn structural_markers_with_quoted_marker() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::Text("foo".into()),
Token::Equals,
Token::Quote, Token::LexMarker, Token::Whitespace(1),
Token::Text("value".into()),
Token::Quote, Token::Whitespace(1),
Token::LexMarker, ];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
}
#[test]
fn structural_markers_data_line_with_quoted_marker() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Equals,
Token::Quote,
Token::LexMarker, Token::Text("value".into()),
Token::Quote,
];
assert_eq!(find_structural_lex_markers(&tokens), vec![0]);
}
#[test]
fn structural_markers_escaped_quote_does_not_toggle() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::Text("foo".into()),
Token::Equals,
Token::Quote, Token::Text("value with \\".into()), Token::Quote, Token::Text(" inside".into()), Token::Quote, Token::Whitespace(1),
Token::LexMarker, ];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
}
#[test]
fn structural_markers_double_backslash_before_quote_not_escaped() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::Text("foo".into()),
Token::Equals,
Token::Quote, Token::Text("val\\\\".into()), Token::Quote, Token::Whitespace(1),
Token::LexMarker, ];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 10]);
}
#[test]
fn is_quote_escaped_by_prev_token_tests() {
use crate::lex::token::Token;
assert!(!is_quote_escaped_by_prev_token(None));
assert!(!is_quote_escaped_by_prev_token(Some(&Token::Whitespace(1))));
assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello".into()
))));
assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello\\".into()
))));
assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello\\\\".into()
))));
assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello\\\\\\".into()
))));
}
}