1use std::borrow::Cow;
25
26pub enum EscapeAction {
28 Escape(char),
30 Literal,
32}
33
34pub fn unescape_inline_char(next: Option<char>) -> EscapeAction {
39 match next {
40 Some(ch) if !ch.is_alphanumeric() => EscapeAction::Escape(ch),
41 _ => EscapeAction::Literal,
42 }
43}
44
45pub fn unescape_inline(text: &str) -> String {
53 let chars: Vec<char> = text.chars().collect();
54 let mut result = String::with_capacity(text.len());
55 let mut i = 0;
56
57 while i < chars.len() {
58 if chars[i] == '\\' {
59 if let Some(&next) = chars.get(i + 1) {
60 if next.is_alphanumeric() {
61 result.push('\\');
63 i += 1;
64 } else {
65 result.push(next);
67 i += 2;
68 }
69 } else {
70 result.push('\\');
72 i += 1;
73 }
74 } else {
75 result.push(chars[i]);
76 i += 1;
77 }
78 }
79
80 result
81}
82
83pub fn escape_inline(text: &str) -> String {
90 let mut result = String::with_capacity(text.len());
91
92 for ch in text.chars() {
93 if is_inline_special(ch) {
94 result.push('\\');
95 }
96 result.push(ch);
97 }
98
99 result
100}
101
102fn is_inline_special(ch: char) -> bool {
104 matches!(ch, '\\' | '*' | '_' | '`' | '#' | '[' | ']')
105}
106
107fn is_quote_escaped_by_prev_token(prev: Option<&crate::lex::token::Token>) -> bool {
112 use crate::lex::token::Token;
113 match prev {
114 Some(Token::Text(s)) => {
115 let trailing = s.bytes().rev().take_while(|&b| b == b'\\').count();
116 trailing % 2 == 1
117 }
118 _ => false,
119 }
120}
121
122pub fn find_structural_lex_markers(tokens: &[crate::lex::token::Token]) -> Vec<usize> {
130 use crate::lex::token::Token;
131 let mut markers = Vec::new();
132 let mut in_quotes = false;
133 for (i, token) in tokens.iter().enumerate() {
134 match token {
135 Token::Quote => {
136 if !is_quote_escaped_by_prev_token(if i > 0 { Some(&tokens[i - 1]) } else { None })
137 {
138 in_quotes = !in_quotes;
139 }
140 }
141 Token::LexMarker if !in_quotes => markers.push(i),
142 _ => {}
143 }
144 }
145 markers
146}
147
148pub fn find_structural_lex_marker_pairs<R>(tokens: &[(crate::lex::token::Token, R)]) -> Vec<usize> {
153 use crate::lex::token::Token;
154 let mut markers = Vec::new();
155 let mut in_quotes = false;
156 for (i, (token, _)) in tokens.iter().enumerate() {
157 match token {
158 Token::Quote => {
159 let prev = if i > 0 { Some(&tokens[i - 1].0) } else { None };
160 if !is_quote_escaped_by_prev_token(prev) {
161 in_quotes = !in_quotes;
162 }
163 }
164 Token::LexMarker if !in_quotes => markers.push(i),
165 _ => {}
166 }
167 }
168 markers
169}
170
171fn trailing_backslashes_before(bytes: &[u8], pos: usize) -> usize {
177 let mut n = 0usize;
178 let mut i = pos;
179 while i > 0 && bytes[i - 1] == b'\\' {
180 n += 1;
181 i -= 1;
182 }
183 n
184}
185
186pub fn is_structural_at(bytes: &[u8], pos: usize, literal_delim: Option<u8>) -> bool {
193 if pos >= bytes.len() {
194 return false;
195 }
196 if trailing_backslashes_before(bytes, pos) % 2 == 1 {
198 return false;
199 }
200 if let Some(delim) = literal_delim {
202 let mut in_literal = false;
203 let mut i = 0;
204 while i < pos {
205 if bytes[i] == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
206 in_literal = !in_literal;
207 }
208 i += 1;
209 }
210 if in_literal {
211 return false;
212 }
213 }
214 true
215}
216
217pub fn split_respecting_escape(s: &str, sep: char) -> Vec<Cow<'_, str>> {
229 split_inner(s, sep, None)
230}
231
232pub fn split_respecting_escape_and_literals(
236 s: &str,
237 sep: char,
238 literal_delim: char,
239) -> Vec<Cow<'_, str>> {
240 split_inner(s, sep, Some(literal_delim))
241}
242
243pub fn split_respecting_escape_with_ranges<'a>(
248 s: &'a str,
249 sep: char,
250 literal_delim: Option<char>,
251) -> Vec<(Cow<'a, str>, std::ops::Range<usize>)> {
252 split_with_ranges_inner(s, sep, literal_delim)
253}
254
255pub fn find_respecting_escape(s: &str, needle: char) -> Option<usize> {
261 find_inner(s, needle, None)
262}
263
264pub fn find_respecting_escape_and_literals(
266 s: &str,
267 needle: char,
268 literal_delim: char,
269) -> Option<usize> {
270 find_inner(s, needle, Some(literal_delim))
271}
272
273fn split_inner(s: &str, sep: char, literal_delim: Option<char>) -> Vec<Cow<'_, str>> {
274 if s.is_empty() {
275 return vec![Cow::Borrowed("")];
276 }
277 let bytes = s.as_bytes();
278 let sep_is_ascii = sep.is_ascii();
279 let literal_is_ascii = literal_delim.is_none_or(|c| c.is_ascii());
280 if sep_is_ascii && literal_is_ascii {
283 split_inner_ascii(s, bytes, sep as u8, literal_delim.map(|c| c as u8))
284 } else {
285 split_inner_chars(s, sep, literal_delim)
286 }
287}
288
289fn split_inner_ascii<'a>(
290 s: &'a str,
291 bytes: &[u8],
292 sep: u8,
293 literal_delim: Option<u8>,
294) -> Vec<Cow<'a, str>> {
295 let mut segments = Vec::new();
296 let mut seg_start = 0usize;
297 let mut in_literal = false;
298 let mut i = 0usize;
299 while i < bytes.len() {
300 let b = bytes[i];
301 if let Some(delim) = literal_delim {
302 if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
303 in_literal = !in_literal;
304 i += 1;
305 continue;
306 }
307 }
308 if !in_literal && b == sep && trailing_backslashes_before(bytes, i) % 2 == 0 {
309 segments.push(extract_segment(s, seg_start, i, sep, literal_delim));
310 seg_start = i + 1;
311 }
312 i += 1;
313 }
314 segments.push(extract_segment(
315 s,
316 seg_start,
317 bytes.len(),
318 sep,
319 literal_delim,
320 ));
321 segments
322}
323
324fn split_inner_chars<'a>(s: &'a str, sep: char, literal_delim: Option<char>) -> Vec<Cow<'a, str>> {
325 let mut segments = Vec::new();
326 let mut seg_start = 0usize;
327 let mut in_literal = false;
328 let mut prev_backslashes = 0usize;
329 for (i, ch) in s.char_indices() {
330 let is_escaped = prev_backslashes % 2 == 1;
331 if let Some(delim) = literal_delim {
332 if ch == delim && !is_escaped {
333 in_literal = !in_literal;
334 prev_backslashes = 0;
335 continue;
336 }
337 }
338 if !in_literal && ch == sep && !is_escaped {
339 segments.push(extract_segment_char(s, seg_start, i, sep, literal_delim));
340 seg_start = i + ch.len_utf8();
341 prev_backslashes = 0;
342 continue;
343 }
344 if ch == '\\' {
345 prev_backslashes += 1;
346 } else {
347 prev_backslashes = 0;
348 }
349 }
350 segments.push(extract_segment_char(
351 s,
352 seg_start,
353 s.len(),
354 sep,
355 literal_delim,
356 ));
357 segments
358}
359
360fn extract_segment<'a>(
363 s: &'a str,
364 start: usize,
365 end: usize,
366 sep: u8,
367 literal_delim: Option<u8>,
368) -> Cow<'a, str> {
369 let slice = &s[start..end];
370 if !needs_strip_ascii(slice.as_bytes(), sep, literal_delim) {
372 return Cow::Borrowed(slice);
373 }
374 Cow::Owned(strip_escapes_ascii(slice.as_bytes(), sep, literal_delim))
375}
376
377fn extract_segment_char<'a>(
378 s: &'a str,
379 start: usize,
380 end: usize,
381 sep: char,
382 literal_delim: Option<char>,
383) -> Cow<'a, str> {
384 let slice = &s[start..end];
385 if !needs_strip_char(slice, sep, literal_delim) {
386 return Cow::Borrowed(slice);
387 }
388 Cow::Owned(strip_escapes_char(slice, sep, literal_delim))
389}
390
391fn needs_strip_ascii(bytes: &[u8], sep: u8, literal_delim: Option<u8>) -> bool {
392 let mut in_literal = false;
393 let mut i = 0;
394 while i < bytes.len() {
395 let b = bytes[i];
396 if let Some(delim) = literal_delim {
397 if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
398 in_literal = !in_literal;
399 i += 1;
400 continue;
401 }
402 }
403 if !in_literal && b == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == sep {
404 return true;
405 }
406 i += 1;
407 }
408 false
409}
410
411fn strip_escapes_ascii(bytes: &[u8], sep: u8, literal_delim: Option<u8>) -> String {
412 let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
413 let mut in_literal = false;
414 let mut i = 0;
415 while i < bytes.len() {
416 let b = bytes[i];
417 if let Some(delim) = literal_delim {
418 if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
419 in_literal = !in_literal;
420 out.push(b);
421 i += 1;
422 continue;
423 }
424 }
425 if !in_literal && b == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == sep {
426 out.push(sep);
427 i += 2;
428 continue;
429 }
430 out.push(b);
431 i += 1;
432 }
433 String::from_utf8(out).expect("byte-level manipulations preserve UTF-8 validity")
437}
438
439fn needs_strip_char(slice: &str, sep: char, literal_delim: Option<char>) -> bool {
440 let chars: Vec<char> = slice.chars().collect();
441 let mut in_literal = false;
442 let mut prev_backslashes = 0usize;
443 for (i, &ch) in chars.iter().enumerate() {
444 let is_escaped = prev_backslashes % 2 == 1;
445 if let Some(delim) = literal_delim {
446 if ch == delim && !is_escaped {
447 in_literal = !in_literal;
448 prev_backslashes = 0;
449 continue;
450 }
451 }
452 if !in_literal && ch == '\\' && chars.get(i + 1).copied() == Some(sep) {
453 return true;
454 }
455 if ch == '\\' {
456 prev_backslashes += 1;
457 } else {
458 prev_backslashes = 0;
459 }
460 }
461 false
462}
463
464fn strip_escapes_char(slice: &str, sep: char, literal_delim: Option<char>) -> String {
465 let chars: Vec<char> = slice.chars().collect();
466 let mut out = String::with_capacity(slice.len());
467 let mut in_literal = false;
468 let mut prev_backslashes = 0usize;
469 let mut i = 0;
470 while i < chars.len() {
471 let ch = chars[i];
472 let is_escaped = prev_backslashes % 2 == 1;
473 if let Some(delim) = literal_delim {
474 if ch == delim && !is_escaped {
475 in_literal = !in_literal;
476 out.push(ch);
477 prev_backslashes = 0;
478 i += 1;
479 continue;
480 }
481 }
482 if !in_literal && ch == '\\' && chars.get(i + 1).copied() == Some(sep) {
483 out.push(sep);
484 prev_backslashes = 0;
485 i += 2;
486 continue;
487 }
488 out.push(ch);
489 if ch == '\\' {
490 prev_backslashes += 1;
491 } else {
492 prev_backslashes = 0;
493 }
494 i += 1;
495 }
496 out
497}
498
499fn split_with_ranges_inner<'a>(
500 s: &'a str,
501 sep: char,
502 literal_delim: Option<char>,
503) -> Vec<(Cow<'a, str>, std::ops::Range<usize>)> {
504 if s.is_empty() {
505 return vec![(Cow::Borrowed(""), 0..0)];
506 }
507 let bytes = s.as_bytes();
508 let sep_is_ascii = sep.is_ascii();
509 let literal_is_ascii = literal_delim.is_none_or(|c| c.is_ascii());
510 if sep_is_ascii && literal_is_ascii {
511 let mut segments = Vec::new();
512 let mut seg_start = 0usize;
513 let mut in_literal = false;
514 let mut i = 0usize;
515 let sep_byte = sep as u8;
516 let literal_byte = literal_delim.map(|c| c as u8);
517 while i < bytes.len() {
518 let b = bytes[i];
519 if let Some(delim) = literal_byte {
520 if b == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
521 in_literal = !in_literal;
522 i += 1;
523 continue;
524 }
525 }
526 if !in_literal && b == sep_byte && trailing_backslashes_before(bytes, i) % 2 == 0 {
527 let seg = extract_segment(s, seg_start, i, sep_byte, literal_byte);
528 segments.push((seg, seg_start..i));
529 seg_start = i + 1;
530 }
531 i += 1;
532 }
533 let seg = extract_segment(s, seg_start, bytes.len(), sep_byte, literal_byte);
534 segments.push((seg, seg_start..bytes.len()));
535 segments
536 } else {
537 let mut segments = Vec::new();
538 let mut seg_start = 0usize;
539 let mut in_literal = false;
540 let mut prev_backslashes = 0usize;
541 for (i, ch) in s.char_indices() {
542 let is_escaped = prev_backslashes % 2 == 1;
543 if let Some(delim) = literal_delim {
544 if ch == delim && !is_escaped {
545 in_literal = !in_literal;
546 prev_backslashes = 0;
547 continue;
548 }
549 }
550 if !in_literal && ch == sep && !is_escaped {
551 let seg = extract_segment_char(s, seg_start, i, sep, literal_delim);
552 segments.push((seg, seg_start..i));
553 seg_start = i + ch.len_utf8();
554 prev_backslashes = 0;
555 continue;
556 }
557 if ch == '\\' {
558 prev_backslashes += 1;
559 } else {
560 prev_backslashes = 0;
561 }
562 }
563 let seg = extract_segment_char(s, seg_start, s.len(), sep, literal_delim);
564 segments.push((seg, seg_start..s.len()));
565 segments
566 }
567}
568
569fn find_inner(s: &str, needle: char, literal_delim: Option<char>) -> Option<usize> {
570 let bytes = s.as_bytes();
571 let mut in_literal = false;
572 for (i, ch) in s.char_indices() {
573 if let Some(delim) = literal_delim {
574 if ch == delim && trailing_backslashes_before(bytes, i) % 2 == 0 {
575 in_literal = !in_literal;
576 continue;
577 }
578 }
579 if !in_literal && ch == needle && trailing_backslashes_before(bytes, i) % 2 == 0 {
580 return Some(i);
581 }
582 }
583 None
584}
585
586pub fn is_quote_escaped(source: &[u8], pos: usize) -> bool {
593 let mut backslash_count = 0;
594 let mut check = pos;
595 while check > 0 && source[check - 1] == b'\\' {
596 backslash_count += 1;
597 check -= 1;
598 }
599 backslash_count % 2 == 1
600}
601
602pub fn unescape_quoted(raw: &str) -> String {
609 let inner = if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
611 &raw[1..raw.len() - 1]
612 } else {
613 raw
614 };
615
616 let mut result = String::with_capacity(inner.len());
617 let chars: Vec<char> = inner.chars().collect();
618 let mut i = 0;
619
620 while i < chars.len() {
621 if chars[i] == '\\' {
622 if let Some(&next) = chars.get(i + 1) {
623 if next == '"' || next == '\\' {
624 result.push(next);
625 i += 2;
626 continue;
627 }
628 }
629 }
630 result.push(chars[i]);
631 i += 1;
632 }
633
634 result
635}
636
637pub fn escape_quoted(text: &str) -> String {
641 let mut result = String::with_capacity(text.len());
642 for ch in text.chars() {
643 if ch == '\\' || ch == '"' {
644 result.push('\\');
645 }
646 result.push(ch);
647 }
648 result
649}
650
651#[cfg(test)]
652mod tests {
653 use super::*;
654
655 #[test]
658 fn unescape_plain_text_unchanged() {
659 assert_eq!(unescape_inline("hello world"), "hello world");
660 }
661
662 #[test]
663 fn unescape_empty_string() {
664 assert_eq!(unescape_inline(""), "");
665 }
666
667 #[test]
668 fn unescape_asterisk() {
669 assert_eq!(unescape_inline("\\*literal\\*"), "*literal*");
670 }
671
672 #[test]
673 fn unescape_underscore() {
674 assert_eq!(unescape_inline("\\_not emphasis\\_"), "_not emphasis_");
675 }
676
677 #[test]
678 fn unescape_backtick() {
679 assert_eq!(unescape_inline("\\`not code\\`"), "`not code`");
680 }
681
682 #[test]
683 fn unescape_hash() {
684 assert_eq!(unescape_inline("\\#not math\\#"), "#not math#");
685 }
686
687 #[test]
688 fn unescape_brackets() {
689 assert_eq!(unescape_inline("\\[not a ref\\]"), "[not a ref]");
690 }
691
692 #[test]
693 fn unescape_backslash_before_alphanumeric_preserved() {
694 assert_eq!(unescape_inline("C:\\Users\\name"), "C:\\Users\\name");
695 }
696
697 #[test]
698 fn unescape_double_backslash() {
699 assert_eq!(unescape_inline("C:\\\\Users\\\\name"), "C:\\Users\\name");
700 }
701
702 #[test]
703 fn unescape_trailing_backslash() {
704 assert_eq!(unescape_inline("text\\"), "text\\");
705 }
706
707 #[test]
708 fn unescape_backslash_before_space() {
709 assert_eq!(unescape_inline("hello\\ world"), "hello world");
710 }
711
712 #[test]
713 fn unescape_backslash_before_punctuation() {
714 assert_eq!(unescape_inline("\\!\\?\\,\\."), "!?,.");
715 }
716
717 #[test]
718 fn unescape_multiple_consecutive_backslashes() {
719 assert_eq!(unescape_inline("\\\\\\\\"), "\\\\");
721 }
722
723 #[test]
724 fn unescape_triple_backslash_then_star() {
725 assert_eq!(unescape_inline("\\\\\\*"), "\\*");
727 }
728
729 #[test]
730 fn unescape_mixed_escaped_and_plain() {
731 assert_eq!(
732 unescape_inline("plain \\*escaped\\* plain"),
733 "plain *escaped* plain"
734 );
735 }
736
737 #[test]
738 fn unescape_backslash_before_digit_preserved() {
739 assert_eq!(unescape_inline("item\\1"), "item\\1");
740 }
741
742 #[test]
743 fn unescape_backslash_before_unicode_letter_preserved() {
744 assert_eq!(unescape_inline("path\\ñ"), "path\\ñ");
745 }
746
747 #[test]
748 fn unescape_backslash_before_non_ascii_symbol() {
749 assert_eq!(unescape_inline("\\→"), "→");
751 }
752
753 #[test]
756 fn escape_plain_text_unchanged() {
757 assert_eq!(escape_inline("hello world"), "hello world");
758 }
759
760 #[test]
761 fn escape_empty_string() {
762 assert_eq!(escape_inline(""), "");
763 }
764
765 #[test]
766 fn escape_special_chars() {
767 assert_eq!(escape_inline("*bold*"), "\\*bold\\*");
768 assert_eq!(escape_inline("_emph_"), "\\_emph\\_");
769 assert_eq!(escape_inline("`code`"), "\\`code\\`");
770 assert_eq!(escape_inline("#math#"), "\\#math\\#");
771 assert_eq!(escape_inline("[ref]"), "\\[ref\\]");
772 }
773
774 #[test]
775 fn escape_backslash() {
776 assert_eq!(escape_inline("C:\\Users"), "C:\\\\Users");
777 }
778
779 #[test]
782 fn roundtrip_plain_text() {
783 let original = "hello world";
784 assert_eq!(unescape_inline(&escape_inline(original)), original);
785 }
786
787 #[test]
788 fn roundtrip_special_chars() {
789 let original = "*bold* and _emph_ and `code` and #math# and [ref]";
790 assert_eq!(unescape_inline(&escape_inline(original)), original);
791 }
792
793 #[test]
794 fn roundtrip_backslashes() {
795 let original = "C:\\Users\\name";
796 assert_eq!(unescape_inline(&escape_inline(original)), original);
797 }
798
799 #[test]
800 fn roundtrip_mixed() {
801 let original = "path\\file *bold* and \\more";
802 assert_eq!(unescape_inline(&escape_inline(original)), original);
803 }
804
805 #[test]
808 fn unescape_quoted_simple() {
809 assert_eq!(unescape_quoted("\"Hello World\""), "Hello World");
810 }
811
812 #[test]
813 fn unescape_quoted_with_escaped_quote() {
814 assert_eq!(unescape_quoted("\"say \\\"hello\\\"\""), "say \"hello\"");
815 }
816
817 #[test]
818 fn unescape_quoted_with_escaped_backslash() {
819 assert_eq!(unescape_quoted("\"path\\\\to\""), "path\\to");
820 }
821
822 #[test]
823 fn unescape_quoted_escaped_backslash_before_quote() {
824 assert_eq!(unescape_quoted("\"end\\\\\""), "end\\");
826 }
827
828 #[test]
829 fn unescape_quoted_other_backslash_literal() {
830 assert_eq!(unescape_quoted("\"hello\\nworld\""), "hello\\nworld");
832 }
833
834 #[test]
835 fn unescape_quoted_empty() {
836 assert_eq!(unescape_quoted("\"\""), "");
837 }
838
839 #[test]
840 fn unescape_quoted_no_quotes() {
841 assert_eq!(unescape_quoted("simple"), "simple");
843 }
844
845 #[test]
848 fn escape_quoted_simple() {
849 assert_eq!(escape_quoted("Hello World"), "Hello World");
850 }
851
852 #[test]
853 fn escape_quoted_with_quote() {
854 assert_eq!(escape_quoted("say \"hello\""), "say \\\"hello\\\"");
855 }
856
857 #[test]
858 fn escape_quoted_with_backslash() {
859 assert_eq!(escape_quoted("path\\to"), "path\\\\to");
860 }
861
862 #[test]
863 fn escape_quoted_empty() {
864 assert_eq!(escape_quoted(""), "");
865 }
866
867 #[test]
870 fn roundtrip_quoted_simple() {
871 let original = "Hello World";
872 let escaped = format!("\"{}\"", escape_quoted(original));
873 assert_eq!(unescape_quoted(&escaped), original);
874 }
875
876 #[test]
877 fn roundtrip_quoted_with_quotes() {
878 let original = "say \"hello\" and \"bye\"";
879 let escaped = format!("\"{}\"", escape_quoted(original));
880 assert_eq!(unescape_quoted(&escaped), original);
881 }
882
883 #[test]
884 fn roundtrip_quoted_with_backslashes() {
885 let original = "C:\\Users\\name";
886 let escaped = format!("\"{}\"", escape_quoted(original));
887 assert_eq!(unescape_quoted(&escaped), original);
888 }
889
890 #[test]
891 fn roundtrip_quoted_with_both() {
892 let original = "path\\to \"file\"";
893 let escaped = format!("\"{}\"", escape_quoted(original));
894 assert_eq!(unescape_quoted(&escaped), original);
895 }
896
897 #[test]
900 fn is_quote_escaped_no_backslash() {
901 assert!(!is_quote_escaped(b"hello\"", 5));
902 }
903
904 #[test]
905 fn is_quote_escaped_single_backslash() {
906 assert!(is_quote_escaped(b"hello\\\"", 6));
907 }
908
909 #[test]
910 fn is_quote_escaped_double_backslash() {
911 assert!(!is_quote_escaped(b"hello\\\\\"", 7));
912 }
913
914 #[test]
915 fn is_quote_escaped_triple_backslash() {
916 assert!(is_quote_escaped(b"hello\\\\\\\"", 8));
917 }
918
919 #[test]
920 fn is_quote_escaped_at_start() {
921 assert!(!is_quote_escaped(b"\"", 0));
922 }
923
924 #[test]
927 fn structural_markers_no_quotes() {
928 use crate::lex::token::Token;
929 let tokens = vec![
930 Token::LexMarker,
931 Token::Whitespace(1),
932 Token::Text("note".into()),
933 Token::Whitespace(1),
934 Token::LexMarker,
935 ];
936 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 4]);
937 }
938
939 #[test]
940 fn structural_markers_with_quoted_marker() {
941 use crate::lex::token::Token;
942 let tokens = vec![
944 Token::LexMarker, Token::Whitespace(1),
946 Token::Text("note".into()),
947 Token::Whitespace(1),
948 Token::Text("foo".into()),
949 Token::Equals,
950 Token::Quote, Token::LexMarker, Token::Whitespace(1),
953 Token::Text("value".into()),
954 Token::Quote, Token::Whitespace(1),
956 Token::LexMarker, ];
958 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
959 }
960
961 #[test]
962 fn structural_markers_data_line_with_quoted_marker() {
963 use crate::lex::token::Token;
964 let tokens = vec![
966 Token::LexMarker, Token::Whitespace(1),
968 Token::Text("note".into()),
969 Token::Equals,
970 Token::Quote,
971 Token::LexMarker, Token::Text("value".into()),
973 Token::Quote,
974 ];
975 assert_eq!(find_structural_lex_markers(&tokens), vec![0]);
977 }
978
979 #[test]
980 fn structural_markers_escaped_quote_does_not_toggle() {
981 use crate::lex::token::Token;
982 let tokens = vec![
985 Token::LexMarker, Token::Whitespace(1),
987 Token::Text("note".into()),
988 Token::Whitespace(1),
989 Token::Text("foo".into()),
990 Token::Equals,
991 Token::Quote, Token::Text("value with \\".into()), Token::Quote, Token::Text(" inside".into()), Token::Quote, Token::Whitespace(1),
997 Token::LexMarker, ];
999 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
1000 }
1001
1002 #[test]
1003 fn structural_markers_double_backslash_before_quote_not_escaped() {
1004 use crate::lex::token::Token;
1005 let tokens = vec![
1009 Token::LexMarker, Token::Whitespace(1),
1011 Token::Text("note".into()),
1012 Token::Whitespace(1),
1013 Token::Text("foo".into()),
1014 Token::Equals,
1015 Token::Quote, Token::Text("val\\\\".into()), Token::Quote, Token::Whitespace(1),
1019 Token::LexMarker, ];
1021 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 10]);
1022 }
1023
1024 fn collect(segments: Vec<Cow<'_, str>>) -> Vec<String> {
1027 segments.into_iter().map(|s| s.into_owned()).collect()
1028 }
1029
1030 #[test]
1031 fn split_no_separator() {
1032 assert_eq!(
1033 collect(split_respecting_escape("hello", '|')),
1034 vec!["hello"]
1035 );
1036 }
1037
1038 #[test]
1039 fn split_empty_input() {
1040 assert_eq!(collect(split_respecting_escape("", '|')), vec![""]);
1041 }
1042
1043 #[test]
1044 fn split_simple() {
1045 assert_eq!(
1046 collect(split_respecting_escape("a|b|c", '|')),
1047 vec!["a", "b", "c"]
1048 );
1049 }
1050
1051 #[test]
1052 fn split_trailing_empty() {
1053 assert_eq!(
1054 collect(split_respecting_escape("a|b|", '|')),
1055 vec!["a", "b", ""]
1056 );
1057 }
1058
1059 #[test]
1060 fn split_leading_empty() {
1061 assert_eq!(
1062 collect(split_respecting_escape("|a|b", '|')),
1063 vec!["", "a", "b"]
1064 );
1065 }
1066
1067 #[test]
1068 fn split_only_separators() {
1069 assert_eq!(
1070 collect(split_respecting_escape("|||", '|')),
1071 vec!["", "", "", ""]
1072 );
1073 }
1074
1075 #[test]
1076 fn split_escaped_separator() {
1077 assert_eq!(
1078 collect(split_respecting_escape("a\\|b|c", '|')),
1079 vec!["a|b", "c"]
1080 );
1081 }
1082
1083 #[test]
1084 fn split_double_backslash_then_sep_splits() {
1085 assert_eq!(
1087 collect(split_respecting_escape("a\\\\|b", '|')),
1088 vec!["a\\\\", "b"]
1089 );
1090 }
1091
1092 #[test]
1093 fn split_triple_backslash_then_sep_is_escaped() {
1094 assert_eq!(
1096 collect(split_respecting_escape("a\\\\\\|b", '|')),
1097 vec!["a\\\\|b"]
1098 );
1099 }
1100
1101 #[test]
1102 fn split_multiple_escapes_in_one_segment() {
1103 assert_eq!(
1104 collect(split_respecting_escape("\\|a\\|b\\|", '|')),
1105 vec!["|a|b|"]
1106 );
1107 }
1108
1109 #[test]
1110 fn split_trailing_backslash_no_sep() {
1111 assert_eq!(
1112 collect(split_respecting_escape("abc\\", '|')),
1113 vec!["abc\\"]
1114 );
1115 }
1116
1117 #[test]
1118 fn split_preserves_unrelated_backslashes() {
1119 assert_eq!(
1121 collect(split_respecting_escape("a\\n|b", '|')),
1122 vec!["a\\n", "b"]
1123 );
1124 }
1125
1126 #[test]
1127 fn split_different_separator() {
1128 assert_eq!(
1129 collect(split_respecting_escape("a,b\\,c,d", ',')),
1130 vec!["a", "b,c", "d"]
1131 );
1132 }
1133
1134 #[test]
1135 fn split_borrowed_when_no_strip() {
1136 let segments = split_respecting_escape("a|b|c", '|');
1138 for seg in &segments {
1139 assert!(
1140 matches!(seg, Cow::Borrowed(_)),
1141 "expected Borrowed, got {seg:?}"
1142 );
1143 }
1144 }
1145
1146 #[test]
1147 fn split_owned_when_strip_happens() {
1148 let segments = split_respecting_escape("a\\|b|c", '|');
1149 assert!(matches!(segments[0], Cow::Owned(_)));
1150 assert!(matches!(segments[1], Cow::Borrowed(_)));
1151 }
1152
1153 #[test]
1154 fn split_unicode_content() {
1155 assert_eq!(
1156 collect(split_respecting_escape("α|β|γ", '|')),
1157 vec!["α", "β", "γ"]
1158 );
1159 }
1160
1161 #[test]
1162 fn split_unicode_with_escape() {
1163 assert_eq!(
1164 collect(split_respecting_escape("α\\|β|γ", '|')),
1165 vec!["α|β", "γ"]
1166 );
1167 }
1168
1169 #[test]
1170 fn split_non_ascii_separator() {
1171 assert_eq!(
1172 collect(split_respecting_escape("a→b→c", '→')),
1173 vec!["a", "b", "c"]
1174 );
1175 }
1176
1177 #[test]
1178 fn split_non_ascii_separator_with_escape() {
1179 assert_eq!(
1180 collect(split_respecting_escape("a\\→b→c", '→')),
1181 vec!["a→b", "c"]
1182 );
1183 }
1184
1185 #[test]
1188 fn split_literal_region_protects_separator() {
1189 assert_eq!(
1190 collect(split_respecting_escape_and_literals("a|`b|c`|d", '|', '`')),
1191 vec!["a", "`b|c`", "d"]
1192 );
1193 }
1194
1195 #[test]
1196 fn split_literal_region_multiple_pipes() {
1197 assert_eq!(
1198 collect(split_respecting_escape_and_literals(
1199 "a|`x|y|z`|b",
1200 '|',
1201 '`'
1202 )),
1203 vec!["a", "`x|y|z`", "b"]
1204 );
1205 }
1206
1207 #[test]
1208 fn split_escape_outside_literal_still_works() {
1209 assert_eq!(
1210 collect(split_respecting_escape_and_literals(
1211 "a\\|b|`c|d`|e",
1212 '|',
1213 '`'
1214 )),
1215 vec!["a|b", "`c|d`", "e"]
1216 );
1217 }
1218
1219 #[test]
1220 fn split_unbalanced_literal_delim() {
1221 assert_eq!(
1223 collect(split_respecting_escape_and_literals("a|`b|c", '|', '`')),
1224 vec!["a", "`b|c"]
1225 );
1226 }
1227
1228 #[test]
1229 fn split_escaped_literal_delim_does_not_open_region() {
1230 assert_eq!(
1232 collect(split_respecting_escape_and_literals("a|\\`b|c", '|', '`')),
1233 vec!["a", "\\`b", "c"]
1234 );
1235 }
1236
1237 #[test]
1238 fn split_escaped_literal_delim_before_escaped_sep_non_ascii() {
1239 let segments = split_respecting_escape_and_literals("a\\α\\|b", '|', 'α');
1245 assert_eq!(
1246 segments.len(),
1247 1,
1248 "escaped pipe must not split; got segments={segments:?}"
1249 );
1250 assert_eq!(
1251 segments[0].as_ref(),
1252 "a\\α|b",
1253 "escaped pipe must be stripped; escaped alpha must not open a literal region"
1254 );
1255 }
1256
1257 #[test]
1258 fn split_empty_cells_between_literal_regions() {
1259 assert_eq!(
1260 collect(split_respecting_escape_and_literals("`a`|`b`", '|', '`')),
1261 vec!["`a`", "`b`"]
1262 );
1263 }
1264
1265 #[test]
1268 fn find_first_unescaped() {
1269 assert_eq!(find_respecting_escape("a|b|c", '|'), Some(1));
1270 }
1271
1272 #[test]
1273 fn find_skips_escaped() {
1274 assert_eq!(find_respecting_escape("a\\|b|c", '|'), Some(4));
1275 }
1276
1277 #[test]
1278 fn find_none_when_only_escaped() {
1279 assert_eq!(find_respecting_escape("a\\|b\\|c", '|'), None);
1280 }
1281
1282 #[test]
1283 fn find_respects_literal_region() {
1284 assert_eq!(
1285 find_respecting_escape_and_literals("`a|b`|c", '|', '`'),
1286 Some(5)
1287 );
1288 }
1289
1290 #[test]
1291 fn find_empty() {
1292 assert_eq!(find_respecting_escape("", '|'), None);
1293 }
1294
1295 #[test]
1298 fn structural_at_unescaped() {
1299 assert!(is_structural_at(b"a|b", 1, None));
1300 }
1301
1302 #[test]
1303 fn structural_at_escaped() {
1304 assert!(!is_structural_at(b"a\\|b", 2, None));
1305 }
1306
1307 #[test]
1308 fn structural_at_double_escape() {
1309 assert!(is_structural_at(b"a\\\\|b", 3, None));
1311 }
1312
1313 #[test]
1314 fn structural_at_inside_literal() {
1315 assert!(!is_structural_at(b"`a|b`", 2, Some(b'`')));
1317 }
1318
1319 #[test]
1320 fn structural_at_outside_literal() {
1321 assert!(is_structural_at(b"`a`|b", 3, Some(b'`')));
1322 }
1323
1324 #[test]
1325 fn structural_at_out_of_bounds() {
1326 assert!(!is_structural_at(b"abc", 3, None));
1327 assert!(!is_structural_at(b"", 0, None));
1328 }
1329
1330 #[test]
1331 fn is_quote_escaped_by_prev_token_tests() {
1332 use crate::lex::token::Token;
1333 assert!(!is_quote_escaped_by_prev_token(None));
1335 assert!(!is_quote_escaped_by_prev_token(Some(&Token::Whitespace(1))));
1337 assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
1339 "hello".into()
1340 ))));
1341 assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
1343 "hello\\".into()
1344 ))));
1345 assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
1347 "hello\\\\".into()
1348 ))));
1349 assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
1351 "hello\\\\\\".into()
1352 ))));
1353 }
1354}