use std::borrow::Cow;
pub enum EscapeAction {
Escape(char),
Literal,
}
pub fn unescape_inline_char(next: Option<char>) -> EscapeAction {
match next {
Some(ch) if !ch.is_alphanumeric() => EscapeAction::Escape(ch),
_ => EscapeAction::Literal,
}
}
pub fn unescape_inline(text: &str) -> String {
let chars: Vec<char> = text.chars().collect();
let mut result = String::with_capacity(text.len());
let mut i = 0;
while i < chars.len() {
if chars[i] == '\\' {
if let Some(&next) = chars.get(i + 1) {
if next.is_alphanumeric() {
result.push('\\');
i += 1;
} else {
result.push(next);
i += 2;
}
} else {
result.push('\\');
i += 1;
}
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
pub fn escape_inline(text: &str) -> String {
let mut result = String::with_capacity(text.len());
for ch in text.chars() {
if is_inline_special(ch) {
result.push('\\');
}
result.push(ch);
}
result
}
fn is_inline_special(ch: char) -> bool {
matches!(ch, '\\' | '*' | '_' | '`' | '#' | '[' | ']')
}
fn is_quote_escaped_by_prev_token(prev: Option<&crate::lex::token::Token>) -> bool {
use crate::lex::token::Token;
match prev {
Some(Token::Text(s)) => {
let trailing = s.bytes().rev().take_while(|&b| b == b'\\').count();
trailing % 2 == 1
}
_ => false,
}
}
pub fn find_structural_lex_markers(tokens: &[crate::lex::token::Token]) -> Vec<usize> {
use crate::lex::token::Token;
let mut markers = Vec::new();
let mut in_quotes = false;
for (i, token) in tokens.iter().enumerate() {
match token {
Token::Quote => {
if !is_quote_escaped_by_prev_token(if i > 0 { Some(&tokens[i - 1]) } else { None })
{
in_quotes = !in_quotes;
}
}
Token::LexMarker if !in_quotes => markers.push(i),
_ => {}
}
}
markers
}
pub fn find_structural_lex_marker_pairs<R>(tokens: &[(crate::lex::token::Token, R)]) -> Vec<usize> {
use crate::lex::token::Token;
let mut markers = Vec::new();
let mut in_quotes = false;
for (i, (token, _)) in tokens.iter().enumerate() {
match token {
Token::Quote => {
let prev = if i > 0 { Some(&tokens[i - 1].0) } else { None };
if !is_quote_escaped_by_prev_token(prev) {
in_quotes = !in_quotes;
}
}
Token::LexMarker if !in_quotes => markers.push(i),
_ => {}
}
}
markers
}
fn trailing_backslashes_before(bytes: &[u8], pos: usize) -> usize {
let mut n = 0usize;
let mut i = pos;
while i > 0 && bytes[i - 1] == b'\\' {
n += 1;
i -= 1;
}
n
}
pub fn is_structural_at(bytes: &[u8], pos: usize, literal_delim: Option<u8>) -> bool {
if pos >= bytes.len() {
return false;
}
if trailing_backslashes_before(bytes, pos) % 2 == 1 {
return false;
}
if let Some(delim) = literal_delim {
let mut in_literal = false;
let mut i = 0;
while i < pos {
if bytes[i] == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
in_literal = !in_literal;
}
i += 1;
}
if in_literal {
return false;
}
}
true
}
pub fn split_respecting_escape(s: &str, sep: char) -> Vec<Cow<'_, str>> {
split_inner(s, sep, None)
}
pub fn split_respecting_escape_and_literals(
s: &str,
sep: char,
literal_delim: char,
) -> Vec<Cow<'_, str>> {
split_inner(s, sep, Some(literal_delim))
}
pub fn split_respecting_escape_with_ranges<'a>(
s: &'a str,
sep: char,
literal_delim: Option<char>,
) -> Vec<(Cow<'a, str>, std::ops::Range<usize>)> {
split_with_ranges_inner(s, sep, literal_delim)
}
pub fn find_respecting_escape(s: &str, needle: char) -> Option<usize> {
find_inner(s, needle, None)
}
pub fn find_respecting_escape_and_literals(
s: &str,
needle: char,
literal_delim: char,
) -> Option<usize> {
find_inner(s, needle, Some(literal_delim))
}
fn split_inner(s: &str, sep: char, literal_delim: Option<char>) -> Vec<Cow<'_, str>> {
if s.is_empty() {
return vec![Cow::Borrowed("")];
}
let bytes = s.as_bytes();
let sep_is_ascii = sep.is_ascii();
let literal_is_ascii = literal_delim.is_none_or(|c| c.is_ascii());
if sep_is_ascii && literal_is_ascii {
split_inner_ascii(s, bytes, sep as u8, literal_delim.map(|c| c as u8))
} else {
split_inner_chars(s, sep, literal_delim)
}
}
fn split_inner_ascii<'a>(
s: &'a str,
bytes: &[u8],
sep: u8,
literal_delim: Option<u8>,
) -> Vec<Cow<'a, str>> {
let mut segments = Vec::new();
let mut seg_start = 0usize;
let mut in_literal = false;
let mut i = 0usize;
while i < bytes.len() {
let b = bytes[i];
if let Some(delim) = literal_delim {
if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
in_literal = !in_literal;
i += 1;
continue;
}
}
if !in_literal && b == sep && trailing_backslashes_before(bytes, i) % 2 == 0 {
segments.push(extract_segment(s, seg_start, i, sep, literal_delim));
seg_start = i + 1;
}
i += 1;
}
segments.push(extract_segment(
s,
seg_start,
bytes.len(),
sep,
literal_delim,
));
segments
}
fn split_inner_chars<'a>(s: &'a str, sep: char, literal_delim: Option<char>) -> Vec<Cow<'a, str>> {
let mut segments = Vec::new();
let mut seg_start = 0usize;
let mut in_literal = false;
let mut prev_backslashes = 0usize;
for (i, ch) in s.char_indices() {
let is_escaped = prev_backslashes % 2 == 1;
if let Some(delim) = literal_delim {
if ch == delim && !is_escaped {
in_literal = !in_literal;
prev_backslashes = 0;
continue;
}
}
if !in_literal && ch == sep && !is_escaped {
segments.push(extract_segment_char(s, seg_start, i, sep, literal_delim));
seg_start = i + ch.len_utf8();
prev_backslashes = 0;
continue;
}
if ch == '\\' {
prev_backslashes += 1;
} else {
prev_backslashes = 0;
}
}
segments.push(extract_segment_char(
s,
seg_start,
s.len(),
sep,
literal_delim,
));
segments
}
fn extract_segment<'a>(
s: &'a str,
start: usize,
end: usize,
sep: u8,
literal_delim: Option<u8>,
) -> Cow<'a, str> {
let slice = &s[start..end];
if !needs_strip_ascii(slice.as_bytes(), sep, literal_delim) {
return Cow::Borrowed(slice);
}
Cow::Owned(strip_escapes_ascii(slice.as_bytes(), sep, literal_delim))
}
fn extract_segment_char<'a>(
s: &'a str,
start: usize,
end: usize,
sep: char,
literal_delim: Option<char>,
) -> Cow<'a, str> {
let slice = &s[start..end];
if !needs_strip_char(slice, sep, literal_delim) {
return Cow::Borrowed(slice);
}
Cow::Owned(strip_escapes_char(slice, sep, literal_delim))
}
fn needs_strip_ascii(bytes: &[u8], sep: u8, literal_delim: Option<u8>) -> bool {
let mut in_literal = false;
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if let Some(delim) = literal_delim {
if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
in_literal = !in_literal;
i += 1;
continue;
}
}
if !in_literal && b == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == sep {
return true;
}
i += 1;
}
false
}
fn strip_escapes_ascii(bytes: &[u8], sep: u8, literal_delim: Option<u8>) -> String {
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
let mut in_literal = false;
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if let Some(delim) = literal_delim {
if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
in_literal = !in_literal;
out.push(b);
i += 1;
continue;
}
}
if !in_literal && b == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == sep {
out.push(sep);
i += 2;
continue;
}
out.push(b);
i += 1;
}
String::from_utf8(out).expect("byte-level manipulations preserve UTF-8 validity")
}
fn needs_strip_char(slice: &str, sep: char, literal_delim: Option<char>) -> bool {
let chars: Vec<char> = slice.chars().collect();
let mut in_literal = false;
let mut prev_backslashes = 0usize;
for (i, &ch) in chars.iter().enumerate() {
let is_escaped = prev_backslashes % 2 == 1;
if let Some(delim) = literal_delim {
if ch == delim && !is_escaped {
in_literal = !in_literal;
prev_backslashes = 0;
continue;
}
}
if !in_literal && ch == '\\' && chars.get(i + 1).copied() == Some(sep) {
return true;
}
if ch == '\\' {
prev_backslashes += 1;
} else {
prev_backslashes = 0;
}
}
false
}
fn strip_escapes_char(slice: &str, sep: char, literal_delim: Option<char>) -> String {
let chars: Vec<char> = slice.chars().collect();
let mut out = String::with_capacity(slice.len());
let mut in_literal = false;
let mut prev_backslashes = 0usize;
let mut i = 0;
while i < chars.len() {
let ch = chars[i];
let is_escaped = prev_backslashes % 2 == 1;
if let Some(delim) = literal_delim {
if ch == delim && !is_escaped {
in_literal = !in_literal;
out.push(ch);
prev_backslashes = 0;
i += 1;
continue;
}
}
if !in_literal && ch == '\\' && chars.get(i + 1).copied() == Some(sep) {
out.push(sep);
prev_backslashes = 0;
i += 2;
continue;
}
out.push(ch);
if ch == '\\' {
prev_backslashes += 1;
} else {
prev_backslashes = 0;
}
i += 1;
}
out
}
fn split_with_ranges_inner<'a>(
s: &'a str,
sep: char,
literal_delim: Option<char>,
) -> Vec<(Cow<'a, str>, std::ops::Range<usize>)> {
if s.is_empty() {
return vec![(Cow::Borrowed(""), 0..0)];
}
let bytes = s.as_bytes();
let sep_is_ascii = sep.is_ascii();
let literal_is_ascii = literal_delim.is_none_or(|c| c.is_ascii());
if sep_is_ascii && literal_is_ascii {
let mut segments = Vec::new();
let mut seg_start = 0usize;
let mut in_literal = false;
let mut i = 0usize;
let sep_byte = sep as u8;
let literal_byte = literal_delim.map(|c| c as u8);
while i < bytes.len() {
let b = bytes[i];
if let Some(delim) = literal_byte {
if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
in_literal = !in_literal;
i += 1;
continue;
}
}
if !in_literal && b == sep_byte && trailing_backslashes_before(bytes, i) % 2 == 0 {
let seg = extract_segment(s, seg_start, i, sep_byte, literal_byte);
segments.push((seg, seg_start..i));
seg_start = i + 1;
}
i += 1;
}
let seg = extract_segment(s, seg_start, bytes.len(), sep_byte, literal_byte);
segments.push((seg, seg_start..bytes.len()));
segments
} else {
let mut segments = Vec::new();
let mut seg_start = 0usize;
let mut in_literal = false;
let mut prev_backslashes = 0usize;
for (i, ch) in s.char_indices() {
let is_escaped = prev_backslashes % 2 == 1;
if let Some(delim) = literal_delim {
if ch == delim && !is_escaped {
in_literal = !in_literal;
prev_backslashes = 0;
continue;
}
}
if !in_literal && ch == sep && !is_escaped {
let seg = extract_segment_char(s, seg_start, i, sep, literal_delim);
segments.push((seg, seg_start..i));
seg_start = i + ch.len_utf8();
prev_backslashes = 0;
continue;
}
if ch == '\\' {
prev_backslashes += 1;
} else {
prev_backslashes = 0;
}
}
let seg = extract_segment_char(s, seg_start, s.len(), sep, literal_delim);
segments.push((seg, seg_start..s.len()));
segments
}
}
fn find_inner(s: &str, needle: char, literal_delim: Option<char>) -> Option<usize> {
let bytes = s.as_bytes();
let mut in_literal = false;
for (i, ch) in s.char_indices() {
if let Some(delim) = literal_delim {
if ch == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
in_literal = !in_literal;
continue;
}
}
if !in_literal && ch == needle && trailing_backslashes_before(bytes, i) % 2 == 0 {
return Some(i);
}
}
None
}
pub fn is_quote_escaped(source: &[u8], pos: usize) -> bool {
let mut backslash_count = 0;
let mut check = pos;
while check > 0 && source[check - 1] == b'\\' {
backslash_count += 1;
check -= 1;
}
backslash_count % 2 == 1
}
pub fn unescape_quoted(raw: &str) -> String {
let inner = if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
&raw[1..raw.len() - 1]
} else {
raw
};
let mut result = String::with_capacity(inner.len());
let chars: Vec<char> = inner.chars().collect();
let mut i = 0;
while i < chars.len() {
if chars[i] == '\\' {
if let Some(&next) = chars.get(i + 1) {
if next == '"' || next == '\\' {
result.push(next);
i += 2;
continue;
}
}
}
result.push(chars[i]);
i += 1;
}
result
}
pub fn escape_quoted(text: &str) -> String {
let mut result = String::with_capacity(text.len());
for ch in text.chars() {
if ch == '\\' || ch == '"' {
result.push('\\');
}
result.push(ch);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unescape_plain_text_unchanged() {
assert_eq!(unescape_inline("hello world"), "hello world");
}
#[test]
fn unescape_empty_string() {
assert_eq!(unescape_inline(""), "");
}
#[test]
fn unescape_asterisk() {
assert_eq!(unescape_inline("\\*literal\\*"), "*literal*");
}
#[test]
fn unescape_underscore() {
assert_eq!(unescape_inline("\\_not emphasis\\_"), "_not emphasis_");
}
#[test]
fn unescape_backtick() {
assert_eq!(unescape_inline("\\`not code\\`"), "`not code`");
}
#[test]
fn unescape_hash() {
assert_eq!(unescape_inline("\\#not math\\#"), "#not math#");
}
#[test]
fn unescape_brackets() {
assert_eq!(unescape_inline("\\[not a ref\\]"), "[not a ref]");
}
#[test]
fn unescape_backslash_before_alphanumeric_preserved() {
assert_eq!(unescape_inline("C:\\Users\\name"), "C:\\Users\\name");
}
#[test]
fn unescape_double_backslash() {
assert_eq!(unescape_inline("C:\\\\Users\\\\name"), "C:\\Users\\name");
}
#[test]
fn unescape_trailing_backslash() {
assert_eq!(unescape_inline("text\\"), "text\\");
}
#[test]
fn unescape_backslash_before_space() {
assert_eq!(unescape_inline("hello\\ world"), "hello world");
}
#[test]
fn unescape_backslash_before_punctuation() {
assert_eq!(unescape_inline("\\!\\?\\,\\."), "!?,.");
}
#[test]
fn unescape_multiple_consecutive_backslashes() {
assert_eq!(unescape_inline("\\\\\\\\"), "\\\\");
}
#[test]
fn unescape_triple_backslash_then_star() {
assert_eq!(unescape_inline("\\\\\\*"), "\\*");
}
#[test]
fn unescape_mixed_escaped_and_plain() {
assert_eq!(
unescape_inline("plain \\*escaped\\* plain"),
"plain *escaped* plain"
);
}
#[test]
fn unescape_backslash_before_digit_preserved() {
assert_eq!(unescape_inline("item\\1"), "item\\1");
}
#[test]
fn unescape_backslash_before_unicode_letter_preserved() {
assert_eq!(unescape_inline("path\\ñ"), "path\\ñ");
}
#[test]
fn unescape_backslash_before_non_ascii_symbol() {
assert_eq!(unescape_inline("\\→"), "→");
}
#[test]
fn escape_plain_text_unchanged() {
assert_eq!(escape_inline("hello world"), "hello world");
}
#[test]
fn escape_empty_string() {
assert_eq!(escape_inline(""), "");
}
#[test]
fn escape_special_chars() {
assert_eq!(escape_inline("*bold*"), "\\*bold\\*");
assert_eq!(escape_inline("_emph_"), "\\_emph\\_");
assert_eq!(escape_inline("`code`"), "\\`code\\`");
assert_eq!(escape_inline("#math#"), "\\#math\\#");
assert_eq!(escape_inline("[ref]"), "\\[ref\\]");
}
#[test]
fn escape_backslash() {
assert_eq!(escape_inline("C:\\Users"), "C:\\\\Users");
}
#[test]
fn roundtrip_plain_text() {
let original = "hello world";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn roundtrip_special_chars() {
let original = "*bold* and _emph_ and `code` and #math# and [ref]";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn roundtrip_backslashes() {
let original = "C:\\Users\\name";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn roundtrip_mixed() {
let original = "path\\file *bold* and \\more";
assert_eq!(unescape_inline(&escape_inline(original)), original);
}
#[test]
fn unescape_quoted_simple() {
assert_eq!(unescape_quoted("\"Hello World\""), "Hello World");
}
#[test]
fn unescape_quoted_with_escaped_quote() {
assert_eq!(unescape_quoted("\"say \\\"hello\\\"\""), "say \"hello\"");
}
#[test]
fn unescape_quoted_with_escaped_backslash() {
assert_eq!(unescape_quoted("\"path\\\\to\""), "path\\to");
}
#[test]
fn unescape_quoted_escaped_backslash_before_quote() {
assert_eq!(unescape_quoted("\"end\\\\\""), "end\\");
}
#[test]
fn unescape_quoted_other_backslash_literal() {
assert_eq!(unescape_quoted("\"hello\\nworld\""), "hello\\nworld");
}
#[test]
fn unescape_quoted_empty() {
assert_eq!(unescape_quoted("\"\""), "");
}
#[test]
fn unescape_quoted_no_quotes() {
assert_eq!(unescape_quoted("simple"), "simple");
}
#[test]
fn escape_quoted_simple() {
assert_eq!(escape_quoted("Hello World"), "Hello World");
}
#[test]
fn escape_quoted_with_quote() {
assert_eq!(escape_quoted("say \"hello\""), "say \\\"hello\\\"");
}
#[test]
fn escape_quoted_with_backslash() {
assert_eq!(escape_quoted("path\\to"), "path\\\\to");
}
#[test]
fn escape_quoted_empty() {
assert_eq!(escape_quoted(""), "");
}
#[test]
fn roundtrip_quoted_simple() {
let original = "Hello World";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn roundtrip_quoted_with_quotes() {
let original = "say \"hello\" and \"bye\"";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn roundtrip_quoted_with_backslashes() {
let original = "C:\\Users\\name";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn roundtrip_quoted_with_both() {
let original = "path\\to \"file\"";
let escaped = format!("\"{}\"", escape_quoted(original));
assert_eq!(unescape_quoted(&escaped), original);
}
#[test]
fn is_quote_escaped_no_backslash() {
assert!(!is_quote_escaped(b"hello\"", 5));
}
#[test]
fn is_quote_escaped_single_backslash() {
assert!(is_quote_escaped(b"hello\\\"", 6));
}
#[test]
fn is_quote_escaped_double_backslash() {
assert!(!is_quote_escaped(b"hello\\\\\"", 7));
}
#[test]
fn is_quote_escaped_triple_backslash() {
assert!(is_quote_escaped(b"hello\\\\\\\"", 8));
}
#[test]
fn is_quote_escaped_at_start() {
assert!(!is_quote_escaped(b"\"", 0));
}
#[test]
fn structural_markers_no_quotes() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker,
Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::LexMarker,
];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 4]);
}
#[test]
fn structural_markers_with_quoted_marker() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::Text("foo".into()),
Token::Equals,
Token::Quote, Token::LexMarker, Token::Whitespace(1),
Token::Text("value".into()),
Token::Quote, Token::Whitespace(1),
Token::LexMarker, ];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
}
#[test]
fn structural_markers_data_line_with_quoted_marker() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Equals,
Token::Quote,
Token::LexMarker, Token::Text("value".into()),
Token::Quote,
];
assert_eq!(find_structural_lex_markers(&tokens), vec![0]);
}
#[test]
fn structural_markers_escaped_quote_does_not_toggle() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::Text("foo".into()),
Token::Equals,
Token::Quote, Token::Text("value with \\".into()), Token::Quote, Token::Text(" inside".into()), Token::Quote, Token::Whitespace(1),
Token::LexMarker, ];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
}
#[test]
fn structural_markers_double_backslash_before_quote_not_escaped() {
use crate::lex::token::Token;
let tokens = vec![
Token::LexMarker, Token::Whitespace(1),
Token::Text("note".into()),
Token::Whitespace(1),
Token::Text("foo".into()),
Token::Equals,
Token::Quote, Token::Text("val\\\\".into()), Token::Quote, Token::Whitespace(1),
Token::LexMarker, ];
assert_eq!(find_structural_lex_markers(&tokens), vec![0, 10]);
}
fn collect(segments: Vec<Cow<'_, str>>) -> Vec<String> {
segments.into_iter().map(|s| s.into_owned()).collect()
}
#[test]
fn split_no_separator() {
assert_eq!(
collect(split_respecting_escape("hello", '|')),
vec!["hello"]
);
}
#[test]
fn split_empty_input() {
assert_eq!(collect(split_respecting_escape("", '|')), vec![""]);
}
#[test]
fn split_simple() {
assert_eq!(
collect(split_respecting_escape("a|b|c", '|')),
vec!["a", "b", "c"]
);
}
#[test]
fn split_trailing_empty() {
assert_eq!(
collect(split_respecting_escape("a|b|", '|')),
vec!["a", "b", ""]
);
}
#[test]
fn split_leading_empty() {
assert_eq!(
collect(split_respecting_escape("|a|b", '|')),
vec!["", "a", "b"]
);
}
#[test]
fn split_only_separators() {
assert_eq!(
collect(split_respecting_escape("|||", '|')),
vec!["", "", "", ""]
);
}
#[test]
fn split_escaped_separator() {
assert_eq!(
collect(split_respecting_escape("a\\|b|c", '|')),
vec!["a|b", "c"]
);
}
#[test]
fn split_double_backslash_then_sep_splits() {
assert_eq!(
collect(split_respecting_escape("a\\\\|b", '|')),
vec!["a\\\\", "b"]
);
}
#[test]
fn split_triple_backslash_then_sep_is_escaped() {
assert_eq!(
collect(split_respecting_escape("a\\\\\\|b", '|')),
vec!["a\\\\|b"]
);
}
#[test]
fn split_multiple_escapes_in_one_segment() {
assert_eq!(
collect(split_respecting_escape("\\|a\\|b\\|", '|')),
vec!["|a|b|"]
);
}
#[test]
fn split_trailing_backslash_no_sep() {
assert_eq!(
collect(split_respecting_escape("abc\\", '|')),
vec!["abc\\"]
);
}
#[test]
fn split_preserves_unrelated_backslashes() {
assert_eq!(
collect(split_respecting_escape("a\\n|b", '|')),
vec!["a\\n", "b"]
);
}
#[test]
fn split_different_separator() {
assert_eq!(
collect(split_respecting_escape("a,b\\,c,d", ',')),
vec!["a", "b,c", "d"]
);
}
#[test]
fn split_borrowed_when_no_strip() {
let segments = split_respecting_escape("a|b|c", '|');
for seg in &segments {
assert!(
matches!(seg, Cow::Borrowed(_)),
"expected Borrowed, got {seg:?}"
);
}
}
#[test]
fn split_owned_when_strip_happens() {
let segments = split_respecting_escape("a\\|b|c", '|');
assert!(matches!(segments[0], Cow::Owned(_)));
assert!(matches!(segments[1], Cow::Borrowed(_)));
}
#[test]
fn split_unicode_content() {
assert_eq!(
collect(split_respecting_escape("α|β|γ", '|')),
vec!["α", "β", "γ"]
);
}
#[test]
fn split_unicode_with_escape() {
assert_eq!(
collect(split_respecting_escape("α\\|β|γ", '|')),
vec!["α|β", "γ"]
);
}
#[test]
fn split_non_ascii_separator() {
assert_eq!(
collect(split_respecting_escape("a→b→c", '→')),
vec!["a", "b", "c"]
);
}
#[test]
fn split_non_ascii_separator_with_escape() {
assert_eq!(
collect(split_respecting_escape("a\\→b→c", '→')),
vec!["a→b", "c"]
);
}
#[test]
fn split_literal_region_protects_separator() {
assert_eq!(
collect(split_respecting_escape_and_literals("a|`b|c`|d", '|', '`')),
vec!["a", "`b|c`", "d"]
);
}
#[test]
fn split_literal_region_multiple_pipes() {
assert_eq!(
collect(split_respecting_escape_and_literals(
"a|`x|y|z`|b",
'|',
'`'
)),
vec!["a", "`x|y|z`", "b"]
);
}
#[test]
fn split_escape_outside_literal_still_works() {
assert_eq!(
collect(split_respecting_escape_and_literals(
"a\\|b|`c|d`|e",
'|',
'`'
)),
vec!["a|b", "`c|d`", "e"]
);
}
#[test]
fn split_unbalanced_literal_delim() {
assert_eq!(
collect(split_respecting_escape_and_literals("a|`b|c", '|', '`')),
vec!["a", "`b|c"]
);
}
#[test]
fn split_escaped_literal_delim_does_not_open_region() {
assert_eq!(
collect(split_respecting_escape_and_literals("a|\\`b|c", '|', '`')),
vec!["a", "\\`b", "c"]
);
}
#[test]
fn split_escaped_literal_delim_before_escaped_sep_non_ascii() {
let segments = split_respecting_escape_and_literals("a\\α\\|b", '|', 'α');
assert_eq!(
segments.len(),
1,
"escaped pipe must not split; got segments={segments:?}"
);
assert_eq!(
segments[0].as_ref(),
"a\\α|b",
"escaped pipe must be stripped; escaped alpha must not open a literal region"
);
}
#[test]
fn split_empty_cells_between_literal_regions() {
assert_eq!(
collect(split_respecting_escape_and_literals("`a`|`b`", '|', '`')),
vec!["`a`", "`b`"]
);
}
#[test]
fn find_first_unescaped() {
assert_eq!(find_respecting_escape("a|b|c", '|'), Some(1));
}
#[test]
fn find_skips_escaped() {
assert_eq!(find_respecting_escape("a\\|b|c", '|'), Some(4));
}
#[test]
fn find_none_when_only_escaped() {
assert_eq!(find_respecting_escape("a\\|b\\|c", '|'), None);
}
#[test]
fn find_respects_literal_region() {
assert_eq!(
find_respecting_escape_and_literals("`a|b`|c", '|', '`'),
Some(5)
);
}
#[test]
fn find_empty() {
assert_eq!(find_respecting_escape("", '|'), None);
}
#[test]
fn structural_at_unescaped() {
assert!(is_structural_at(b"a|b", 1, None));
}
#[test]
fn structural_at_escaped() {
assert!(!is_structural_at(b"a\\|b", 2, None));
}
#[test]
fn structural_at_double_escape() {
assert!(is_structural_at(b"a\\\\|b", 3, None));
}
#[test]
fn structural_at_inside_literal() {
assert!(!is_structural_at(b"`a|b`", 2, Some(b'`')));
}
#[test]
fn structural_at_outside_literal() {
assert!(is_structural_at(b"`a`|b", 3, Some(b'`')));
}
#[test]
fn structural_at_out_of_bounds() {
assert!(!is_structural_at(b"abc", 3, None));
assert!(!is_structural_at(b"", 0, None));
}
#[test]
fn is_quote_escaped_by_prev_token_tests() {
use crate::lex::token::Token;
assert!(!is_quote_escaped_by_prev_token(None));
assert!(!is_quote_escaped_by_prev_token(Some(&Token::Whitespace(1))));
assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello".into()
))));
assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello\\".into()
))));
assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello\\\\".into()
))));
assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
"hello\\\\\\".into()
))));
}
}