1pub enum EscapeAction {
18 Escape(char),
20 Literal,
22}
23
24pub fn unescape_inline_char(next: Option<char>) -> EscapeAction {
29 match next {
30 Some(ch) if !ch.is_alphanumeric() => EscapeAction::Escape(ch),
31 _ => EscapeAction::Literal,
32 }
33}
34
35pub fn unescape_inline(text: &str) -> String {
43 let chars: Vec<char> = text.chars().collect();
44 let mut result = String::with_capacity(text.len());
45 let mut i = 0;
46
47 while i < chars.len() {
48 if chars[i] == '\\' {
49 if let Some(&next) = chars.get(i + 1) {
50 if next.is_alphanumeric() {
51 result.push('\\');
53 i += 1;
54 } else {
55 result.push(next);
57 i += 2;
58 }
59 } else {
60 result.push('\\');
62 i += 1;
63 }
64 } else {
65 result.push(chars[i]);
66 i += 1;
67 }
68 }
69
70 result
71}
72
73pub fn escape_inline(text: &str) -> String {
80 let mut result = String::with_capacity(text.len());
81
82 for ch in text.chars() {
83 if is_inline_special(ch) {
84 result.push('\\');
85 }
86 result.push(ch);
87 }
88
89 result
90}
91
92fn is_inline_special(ch: char) -> bool {
94 matches!(ch, '\\' | '*' | '_' | '`' | '#' | '[' | ']')
95}
96
97fn is_quote_escaped_by_prev_token(prev: Option<&crate::lex::token::Token>) -> bool {
102 use crate::lex::token::Token;
103 match prev {
104 Some(Token::Text(s)) => {
105 let trailing = s.bytes().rev().take_while(|&b| b == b'\\').count();
106 trailing % 2 == 1
107 }
108 _ => false,
109 }
110}
111
112pub fn find_structural_lex_markers(tokens: &[crate::lex::token::Token]) -> Vec<usize> {
120 use crate::lex::token::Token;
121 let mut markers = Vec::new();
122 let mut in_quotes = false;
123 for (i, token) in tokens.iter().enumerate() {
124 match token {
125 Token::Quote => {
126 if !is_quote_escaped_by_prev_token(if i > 0 { Some(&tokens[i - 1]) } else { None })
127 {
128 in_quotes = !in_quotes;
129 }
130 }
131 Token::LexMarker if !in_quotes => markers.push(i),
132 _ => {}
133 }
134 }
135 markers
136}
137
138pub fn find_structural_lex_marker_pairs<R>(tokens: &[(crate::lex::token::Token, R)]) -> Vec<usize> {
143 use crate::lex::token::Token;
144 let mut markers = Vec::new();
145 let mut in_quotes = false;
146 for (i, (token, _)) in tokens.iter().enumerate() {
147 match token {
148 Token::Quote => {
149 let prev = if i > 0 { Some(&tokens[i - 1].0) } else { None };
150 if !is_quote_escaped_by_prev_token(prev) {
151 in_quotes = !in_quotes;
152 }
153 }
154 Token::LexMarker if !in_quotes => markers.push(i),
155 _ => {}
156 }
157 }
158 markers
159}
160
161pub fn is_quote_escaped(source: &[u8], pos: usize) -> bool {
168 let mut backslash_count = 0;
169 let mut check = pos;
170 while check > 0 && source[check - 1] == b'\\' {
171 backslash_count += 1;
172 check -= 1;
173 }
174 backslash_count % 2 == 1
175}
176
177pub fn unescape_quoted(raw: &str) -> String {
184 let inner = if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
186 &raw[1..raw.len() - 1]
187 } else {
188 raw
189 };
190
191 let mut result = String::with_capacity(inner.len());
192 let chars: Vec<char> = inner.chars().collect();
193 let mut i = 0;
194
195 while i < chars.len() {
196 if chars[i] == '\\' {
197 if let Some(&next) = chars.get(i + 1) {
198 if next == '"' || next == '\\' {
199 result.push(next);
200 i += 2;
201 continue;
202 }
203 }
204 }
205 result.push(chars[i]);
206 i += 1;
207 }
208
209 result
210}
211
212pub fn escape_quoted(text: &str) -> String {
216 let mut result = String::with_capacity(text.len());
217 for ch in text.chars() {
218 if ch == '\\' || ch == '"' {
219 result.push('\\');
220 }
221 result.push(ch);
222 }
223 result
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
233 fn unescape_plain_text_unchanged() {
234 assert_eq!(unescape_inline("hello world"), "hello world");
235 }
236
237 #[test]
238 fn unescape_empty_string() {
239 assert_eq!(unescape_inline(""), "");
240 }
241
242 #[test]
243 fn unescape_asterisk() {
244 assert_eq!(unescape_inline("\\*literal\\*"), "*literal*");
245 }
246
247 #[test]
248 fn unescape_underscore() {
249 assert_eq!(unescape_inline("\\_not emphasis\\_"), "_not emphasis_");
250 }
251
252 #[test]
253 fn unescape_backtick() {
254 assert_eq!(unescape_inline("\\`not code\\`"), "`not code`");
255 }
256
257 #[test]
258 fn unescape_hash() {
259 assert_eq!(unescape_inline("\\#not math\\#"), "#not math#");
260 }
261
262 #[test]
263 fn unescape_brackets() {
264 assert_eq!(unescape_inline("\\[not a ref\\]"), "[not a ref]");
265 }
266
267 #[test]
268 fn unescape_backslash_before_alphanumeric_preserved() {
269 assert_eq!(unescape_inline("C:\\Users\\name"), "C:\\Users\\name");
270 }
271
272 #[test]
273 fn unescape_double_backslash() {
274 assert_eq!(unescape_inline("C:\\\\Users\\\\name"), "C:\\Users\\name");
275 }
276
277 #[test]
278 fn unescape_trailing_backslash() {
279 assert_eq!(unescape_inline("text\\"), "text\\");
280 }
281
282 #[test]
283 fn unescape_backslash_before_space() {
284 assert_eq!(unescape_inline("hello\\ world"), "hello world");
285 }
286
287 #[test]
288 fn unescape_backslash_before_punctuation() {
289 assert_eq!(unescape_inline("\\!\\?\\,\\."), "!?,.");
290 }
291
292 #[test]
293 fn unescape_multiple_consecutive_backslashes() {
294 assert_eq!(unescape_inline("\\\\\\\\"), "\\\\");
296 }
297
298 #[test]
299 fn unescape_triple_backslash_then_star() {
300 assert_eq!(unescape_inline("\\\\\\*"), "\\*");
302 }
303
304 #[test]
305 fn unescape_mixed_escaped_and_plain() {
306 assert_eq!(
307 unescape_inline("plain \\*escaped\\* plain"),
308 "plain *escaped* plain"
309 );
310 }
311
312 #[test]
313 fn unescape_backslash_before_digit_preserved() {
314 assert_eq!(unescape_inline("item\\1"), "item\\1");
315 }
316
317 #[test]
318 fn unescape_backslash_before_unicode_letter_preserved() {
319 assert_eq!(unescape_inline("path\\ñ"), "path\\ñ");
320 }
321
322 #[test]
323 fn unescape_backslash_before_non_ascii_symbol() {
324 assert_eq!(unescape_inline("\\→"), "→");
326 }
327
328 #[test]
331 fn escape_plain_text_unchanged() {
332 assert_eq!(escape_inline("hello world"), "hello world");
333 }
334
335 #[test]
336 fn escape_empty_string() {
337 assert_eq!(escape_inline(""), "");
338 }
339
340 #[test]
341 fn escape_special_chars() {
342 assert_eq!(escape_inline("*bold*"), "\\*bold\\*");
343 assert_eq!(escape_inline("_emph_"), "\\_emph\\_");
344 assert_eq!(escape_inline("`code`"), "\\`code\\`");
345 assert_eq!(escape_inline("#math#"), "\\#math\\#");
346 assert_eq!(escape_inline("[ref]"), "\\[ref\\]");
347 }
348
349 #[test]
350 fn escape_backslash() {
351 assert_eq!(escape_inline("C:\\Users"), "C:\\\\Users");
352 }
353
354 #[test]
357 fn roundtrip_plain_text() {
358 let original = "hello world";
359 assert_eq!(unescape_inline(&escape_inline(original)), original);
360 }
361
362 #[test]
363 fn roundtrip_special_chars() {
364 let original = "*bold* and _emph_ and `code` and #math# and [ref]";
365 assert_eq!(unescape_inline(&escape_inline(original)), original);
366 }
367
368 #[test]
369 fn roundtrip_backslashes() {
370 let original = "C:\\Users\\name";
371 assert_eq!(unescape_inline(&escape_inline(original)), original);
372 }
373
374 #[test]
375 fn roundtrip_mixed() {
376 let original = "path\\file *bold* and \\more";
377 assert_eq!(unescape_inline(&escape_inline(original)), original);
378 }
379
380 #[test]
383 fn unescape_quoted_simple() {
384 assert_eq!(unescape_quoted("\"Hello World\""), "Hello World");
385 }
386
387 #[test]
388 fn unescape_quoted_with_escaped_quote() {
389 assert_eq!(unescape_quoted("\"say \\\"hello\\\"\""), "say \"hello\"");
390 }
391
392 #[test]
393 fn unescape_quoted_with_escaped_backslash() {
394 assert_eq!(unescape_quoted("\"path\\\\to\""), "path\\to");
395 }
396
397 #[test]
398 fn unescape_quoted_escaped_backslash_before_quote() {
399 assert_eq!(unescape_quoted("\"end\\\\\""), "end\\");
401 }
402
403 #[test]
404 fn unescape_quoted_other_backslash_literal() {
405 assert_eq!(unescape_quoted("\"hello\\nworld\""), "hello\\nworld");
407 }
408
409 #[test]
410 fn unescape_quoted_empty() {
411 assert_eq!(unescape_quoted("\"\""), "");
412 }
413
414 #[test]
415 fn unescape_quoted_no_quotes() {
416 assert_eq!(unescape_quoted("simple"), "simple");
418 }
419
420 #[test]
423 fn escape_quoted_simple() {
424 assert_eq!(escape_quoted("Hello World"), "Hello World");
425 }
426
427 #[test]
428 fn escape_quoted_with_quote() {
429 assert_eq!(escape_quoted("say \"hello\""), "say \\\"hello\\\"");
430 }
431
432 #[test]
433 fn escape_quoted_with_backslash() {
434 assert_eq!(escape_quoted("path\\to"), "path\\\\to");
435 }
436
437 #[test]
438 fn escape_quoted_empty() {
439 assert_eq!(escape_quoted(""), "");
440 }
441
442 #[test]
445 fn roundtrip_quoted_simple() {
446 let original = "Hello World";
447 let escaped = format!("\"{}\"", escape_quoted(original));
448 assert_eq!(unescape_quoted(&escaped), original);
449 }
450
451 #[test]
452 fn roundtrip_quoted_with_quotes() {
453 let original = "say \"hello\" and \"bye\"";
454 let escaped = format!("\"{}\"", escape_quoted(original));
455 assert_eq!(unescape_quoted(&escaped), original);
456 }
457
458 #[test]
459 fn roundtrip_quoted_with_backslashes() {
460 let original = "C:\\Users\\name";
461 let escaped = format!("\"{}\"", escape_quoted(original));
462 assert_eq!(unescape_quoted(&escaped), original);
463 }
464
465 #[test]
466 fn roundtrip_quoted_with_both() {
467 let original = "path\\to \"file\"";
468 let escaped = format!("\"{}\"", escape_quoted(original));
469 assert_eq!(unescape_quoted(&escaped), original);
470 }
471
472 #[test]
475 fn is_quote_escaped_no_backslash() {
476 assert!(!is_quote_escaped(b"hello\"", 5));
477 }
478
479 #[test]
480 fn is_quote_escaped_single_backslash() {
481 assert!(is_quote_escaped(b"hello\\\"", 6));
482 }
483
484 #[test]
485 fn is_quote_escaped_double_backslash() {
486 assert!(!is_quote_escaped(b"hello\\\\\"", 7));
487 }
488
489 #[test]
490 fn is_quote_escaped_triple_backslash() {
491 assert!(is_quote_escaped(b"hello\\\\\\\"", 8));
492 }
493
494 #[test]
495 fn is_quote_escaped_at_start() {
496 assert!(!is_quote_escaped(b"\"", 0));
497 }
498
499 #[test]
502 fn structural_markers_no_quotes() {
503 use crate::lex::token::Token;
504 let tokens = vec![
505 Token::LexMarker,
506 Token::Whitespace(1),
507 Token::Text("note".into()),
508 Token::Whitespace(1),
509 Token::LexMarker,
510 ];
511 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 4]);
512 }
513
514 #[test]
515 fn structural_markers_with_quoted_marker() {
516 use crate::lex::token::Token;
517 let tokens = vec![
519 Token::LexMarker, Token::Whitespace(1),
521 Token::Text("note".into()),
522 Token::Whitespace(1),
523 Token::Text("foo".into()),
524 Token::Equals,
525 Token::Quote, Token::LexMarker, Token::Whitespace(1),
528 Token::Text("value".into()),
529 Token::Quote, Token::Whitespace(1),
531 Token::LexMarker, ];
533 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
534 }
535
536 #[test]
537 fn structural_markers_data_line_with_quoted_marker() {
538 use crate::lex::token::Token;
539 let tokens = vec![
541 Token::LexMarker, Token::Whitespace(1),
543 Token::Text("note".into()),
544 Token::Equals,
545 Token::Quote,
546 Token::LexMarker, Token::Text("value".into()),
548 Token::Quote,
549 ];
550 assert_eq!(find_structural_lex_markers(&tokens), vec![0]);
552 }
553
554 #[test]
555 fn structural_markers_escaped_quote_does_not_toggle() {
556 use crate::lex::token::Token;
557 let tokens = vec![
560 Token::LexMarker, Token::Whitespace(1),
562 Token::Text("note".into()),
563 Token::Whitespace(1),
564 Token::Text("foo".into()),
565 Token::Equals,
566 Token::Quote, Token::Text("value with \\".into()), Token::Quote, Token::Text(" inside".into()), Token::Quote, Token::Whitespace(1),
572 Token::LexMarker, ];
574 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 12]);
575 }
576
577 #[test]
578 fn structural_markers_double_backslash_before_quote_not_escaped() {
579 use crate::lex::token::Token;
580 let tokens = vec![
584 Token::LexMarker, Token::Whitespace(1),
586 Token::Text("note".into()),
587 Token::Whitespace(1),
588 Token::Text("foo".into()),
589 Token::Equals,
590 Token::Quote, Token::Text("val\\\\".into()), Token::Quote, Token::Whitespace(1),
594 Token::LexMarker, ];
596 assert_eq!(find_structural_lex_markers(&tokens), vec![0, 10]);
597 }
598
599 #[test]
600 fn is_quote_escaped_by_prev_token_tests() {
601 use crate::lex::token::Token;
602 assert!(!is_quote_escaped_by_prev_token(None));
604 assert!(!is_quote_escaped_by_prev_token(Some(&Token::Whitespace(1))));
606 assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
608 "hello".into()
609 ))));
610 assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
612 "hello\\".into()
613 ))));
614 assert!(!is_quote_escaped_by_prev_token(Some(&Token::Text(
616 "hello\\\\".into()
617 ))));
618 assert!(is_quote_escaped_by_prev_token(Some(&Token::Text(
620 "hello\\\\\\".into()
621 ))));
622 }
623}