1use crate::num::NonZeroChar;
8use std::ffi::CStr;
9use std::num::NonZeroU8;
10use std::ops::Range;
11use std::str::Chars;
12
13)]
17pub enum EscapeError {
18 ZeroChars,
20 MoreThanOneChar,
22
23 LoneSlash,
25 InvalidEscape,
27 BareCarriageReturn,
29 BareCarriageReturnInRawString,
31 EscapeOnlyChar,
33
34 TooShortHexEscape,
36 InvalidCharInHexEscape,
38 OutOfRangeHexEscape,
40
41 NoBraceInUnicodeEscape,
43 InvalidCharInUnicodeEscape,
45 EmptyUnicodeEscape,
47 UnclosedUnicodeEscape,
49 LeadingUnderscoreUnicodeEscape,
51 OverlongUnicodeEscape,
53 LoneSurrogateUnicodeEscape,
55 OutOfRangeUnicodeEscape,
57
58 UnicodeEscapeInByte,
60 NonAsciiCharInByte,
62
63 NulInCStr,
65
66 UnskippedWhitespaceWarning,
69
70 MultipleSkippedLinesWarning,
72}
73
74impl EscapeError {
75 pub fn is_fatal(&self) -> bool {
77 !#[allow(non_exhaustive_omitted_patterns)] match self {
EscapeError::UnskippedWhitespaceWarning |
EscapeError::MultipleSkippedLinesWarning => true,
_ => false,
}matches!(
78 self,
79 EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning
80 )
81 }
82}
83
84pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
91 str::check_raw(src, callback);
92}
93
94pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
101 <[u8]>::check_raw(src, callback);
102}
103
104pub fn check_raw_c_str(
111 src: &str,
112 callback: impl FnMut(Range<usize>, Result<NonZeroChar, EscapeError>),
113) {
114 CStr::check_raw(src, callback);
115}
116
117trait CheckRaw {
119 type RawUnit;
121
122 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError>;
124
125 fn check_raw(
131 src: &str,
132 mut callback: impl FnMut(Range<usize>, Result<Self::RawUnit, EscapeError>),
133 ) {
134 let mut chars = src.chars();
135 while let Some(c) = chars.next() {
136 let start = src.len() - chars.as_str().len() - c.len_utf8();
137 let res = match c {
138 '\r' => Err(EscapeError::BareCarriageReturnInRawString),
139 _ => Self::char2raw_unit(c),
140 };
141 let end = src.len() - chars.as_str().len();
142 callback(start..end, res);
143 }
144
145 }
157}
158
159impl CheckRaw for str {
160 type RawUnit = char;
161
162 #[inline]
163 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
164 Ok(c)
165 }
166}
167
168impl CheckRaw for [u8] {
169 type RawUnit = u8;
170
171 #[inline]
172 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
173 char2byte(c)
174 }
175}
176
177#[inline]
179fn char2byte(c: char) -> Result<u8, EscapeError> {
180 if c.is_ascii() {
182 Ok(c as u8)
183 } else {
184 Err(EscapeError::NonAsciiCharInByte)
185 }
186}
187
188impl CheckRaw for CStr {
189 type RawUnit = NonZeroChar;
190
191 #[inline]
192 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
193 NonZeroChar::new(c).ok_or(EscapeError::NulInCStr)
194 }
195}
196
197#[inline]
202pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
203 str::unescape_single(&mut src.chars())
204}
205
206#[inline]
211pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
212 <[u8]>::unescape_single(&mut src.chars())
213}
214
215pub fn unescape_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
221 str::unescape(src, callback)
222}
223
224pub fn unescape_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
230 <[u8]>::unescape(src, callback)
231}
232
233pub fn unescape_c_str(
239 src: &str,
240 callback: impl FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
241) {
242 CStr::unescape(src, callback)
243}
244
245)]
250pub enum MixedUnit {
251 Char(NonZeroChar),
258
259 HighByte(NonZeroU8),
265}
266
267impl From<NonZeroChar> for MixedUnit {
268 #[inline]
269 fn from(c: NonZeroChar) -> Self {
270 MixedUnit::Char(c)
271 }
272}
273
274impl From<NonZeroU8> for MixedUnit {
275 #[inline]
276 fn from(byte: NonZeroU8) -> Self {
277 if byte.get().is_ascii() {
278 MixedUnit::Char(NonZeroChar::new(byte.get() as char).unwrap())
279 } else {
280 MixedUnit::HighByte(byte)
281 }
282 }
283}
284
285impl TryFrom<char> for MixedUnit {
286 type Error = EscapeError;
287
288 #[inline]
289 fn try_from(c: char) -> Result<Self, EscapeError> {
290 NonZeroChar::new(c)
291 .map(MixedUnit::Char)
292 .ok_or(EscapeError::NulInCStr)
293 }
294}
295
296impl TryFrom<u8> for MixedUnit {
297 type Error = EscapeError;
298
299 #[inline]
300 fn try_from(byte: u8) -> Result<Self, EscapeError> {
301 NonZeroU8::new(byte)
302 .map(From::from)
303 .ok_or(EscapeError::NulInCStr)
304 }
305}
306
307trait Unescape {
309 type Unit;
311
312 const ZERO_RESULT: Result<Self::Unit, EscapeError>;
314
315 fn nonzero_byte2unit(b: NonZeroU8) -> Self::Unit;
317
318 fn char2unit(c: char) -> Result<Self::Unit, EscapeError>;
320
321 fn hex2unit(b: u8) -> Result<Self::Unit, EscapeError>;
323
324 fn unicode2unit(r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError>;
326
327 fn unescape_single(chars: &mut Chars<'_>) -> Result<Self::Unit, EscapeError> {
329 let res = match chars.next().ok_or(EscapeError::ZeroChars)? {
330 '\\' => Self::unescape_1(chars),
331 '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
332 '\r' => Err(EscapeError::BareCarriageReturn),
333 c => Self::char2unit(c),
334 }?;
335 if chars.next().is_some() {
336 return Err(EscapeError::MoreThanOneChar);
337 }
338 Ok(res)
339 }
340
341 fn unescape_1(chars: &mut Chars<'_>) -> Result<Self::Unit, EscapeError> {
343 let c = chars.next().ok_or(EscapeError::LoneSlash)?;
345 if c == '0' {
346 Self::ZERO_RESULT
347 } else {
348 simple_escape(c)
349 .map(|b| Self::nonzero_byte2unit(b))
350 .or_else(|c| match c {
351 'x' => Self::hex2unit(hex_escape(chars)?),
352 'u' => Self::unicode2unit({
353 let value = unicode_escape(chars)?;
354 if value > char::MAX as u32 {
355 Err(EscapeError::OutOfRangeUnicodeEscape)
356 } else {
357 char::from_u32(value).ok_or(EscapeError::LoneSurrogateUnicodeEscape)
358 }
359 }),
360 _ => Err(EscapeError::InvalidEscape),
361 })
362 }
363 }
364
365 fn unescape(
371 src: &str,
372 mut callback: impl FnMut(Range<usize>, Result<Self::Unit, EscapeError>),
373 ) {
374 let mut chars = src.chars();
375 while let Some(c) = chars.next() {
376 let start = src.len() - chars.as_str().len() - c.len_utf8();
377 let res = match c {
378 '\\' => {
379 if let Some(b'\n') = chars.as_str().as_bytes().first() {
380 let _ = chars.next();
381 let callback_err = |range, err| callback(range, Err(err));
384 skip_ascii_whitespace(&mut chars, start, callback_err);
385 continue;
386 } else {
387 Self::unescape_1(&mut chars)
388 }
389 }
390 '"' => Err(EscapeError::EscapeOnlyChar),
391 '\r' => Err(EscapeError::BareCarriageReturn),
392 c => Self::char2unit(c),
393 };
394 let end = src.len() - chars.as_str().len();
395 callback(start..end, res);
396 }
397 }
398}
399
400#[inline] fn simple_escape(c: char) -> Result<NonZeroU8, char> {
405 Ok(NonZeroU8::new(match c {
407 '"' => b'"',
408 'n' => b'\n',
409 'r' => b'\r',
410 't' => b'\t',
411 '\\' => b'\\',
412 '\'' => b'\'',
413 _ => Err(c)?,
414 })
415 .unwrap())
416}
417
418#[inline] fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError> {
423 let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
424 let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
425
426 let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
427 let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
428
429 Ok((hi * 16 + lo) as u8)
430}
431
432#[inline] fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeError> {
438 if chars.next() != Some('{') {
439 return Err(EscapeError::NoBraceInUnicodeEscape);
440 }
441
442 let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
444 '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
445 '}' => return Err(EscapeError::EmptyUnicodeEscape),
446 c => c
447 .to_digit(16)
448 .ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
449 };
450
451 let mut n_digits = 1;
454 loop {
455 match chars.next() {
456 None => return Err(EscapeError::UnclosedUnicodeEscape),
457 Some('_') => continue,
458 Some('}') => {
459 return if n_digits > 6 {
462 Err(EscapeError::OverlongUnicodeEscape)
463 } else {
464 Ok(value)
465 };
466 }
467 Some(c) => {
468 let digit: u32 = c
469 .to_digit(16)
470 .ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
471 n_digits += 1;
472 if n_digits > 6 {
473 continue;
475 }
476 value = value * 16 + digit;
477 }
478 };
479 }
480}
481
482#[inline] fn skip_ascii_whitespace(
489 chars: &mut Chars<'_>,
490 start: usize,
491 mut callback: impl FnMut(Range<usize>, EscapeError),
492) {
493 let rest = chars.as_str();
494 let first_non_space = rest
495 .bytes()
496 .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
497 .unwrap_or(rest.len());
498 let (space, rest) = rest.split_at(first_non_space);
499 let end = start + 2 + first_non_space;
501 if space.contains('\n') {
502 callback(start..end, EscapeError::MultipleSkippedLinesWarning);
503 }
504 *chars = rest.chars();
505 if let Some(c) = chars.clone().next() {
506 if c.is_whitespace() {
507 callback(
509 start..end + c.len_utf8(),
510 EscapeError::UnskippedWhitespaceWarning,
511 );
512 }
513 }
514}
515
516impl Unescape for str {
517 type Unit = char;
518
519 const ZERO_RESULT: Result<Self::Unit, EscapeError> = Ok('\0');
520
521 #[inline]
522 fn nonzero_byte2unit(b: NonZeroU8) -> Self::Unit {
523 b.get().into()
524 }
525
526 #[inline]
527 fn char2unit(c: char) -> Result<Self::Unit, EscapeError> {
528 Ok(c)
529 }
530
531 #[inline]
532 fn hex2unit(b: u8) -> Result<Self::Unit, EscapeError> {
533 if b.is_ascii() {
534 Ok(b as char)
535 } else {
536 Err(EscapeError::OutOfRangeHexEscape)
537 }
538 }
539
540 #[inline]
541 fn unicode2unit(r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError> {
542 r
543 }
544}
545
546impl Unescape for [u8] {
547 type Unit = u8;
548
549 const ZERO_RESULT: Result<Self::Unit, EscapeError> = Ok(b'\0');
550
551 #[inline]
552 fn nonzero_byte2unit(b: NonZeroU8) -> Self::Unit {
553 b.get()
554 }
555
556 #[inline]
557 fn char2unit(c: char) -> Result<Self::Unit, EscapeError> {
558 char2byte(c)
559 }
560
561 #[inline]
562 fn hex2unit(b: u8) -> Result<Self::Unit, EscapeError> {
563 Ok(b)
564 }
565
566 #[inline]
567 fn unicode2unit(_r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError> {
568 Err(EscapeError::UnicodeEscapeInByte)
569 }
570}
571
572impl Unescape for CStr {
573 type Unit = MixedUnit;
574
575 const ZERO_RESULT: Result<Self::Unit, EscapeError> = Err(EscapeError::NulInCStr);
576
577 #[inline]
578 fn nonzero_byte2unit(b: NonZeroU8) -> Self::Unit {
579 b.into()
580 }
581
582 #[inline]
583 fn char2unit(c: char) -> Result<Self::Unit, EscapeError> {
584 c.try_into()
585 }
586
587 #[inline]
588 fn hex2unit(byte: u8) -> Result<Self::Unit, EscapeError> {
589 byte.try_into()
590 }
591
592 #[inline]
593 fn unicode2unit(r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError> {
594 Self::char2unit(r?)
595 }
596}
597
598)]
600pub enum Mode {
601 Char,
603
604 Byte,
606
607 Str,
609 RawStr,
611
612 ByteStr,
614 RawByteStr,
616
617 CStr,
619 RawCStr,
621}
622
623impl Mode {
624 pub fn in_double_quotes(self) -> bool {
625 match self {
626 Mode::Str
627 | Mode::RawStr
628 | Mode::ByteStr
629 | Mode::RawByteStr
630 | Mode::CStr
631 | Mode::RawCStr => true,
632 Mode::Char | Mode::Byte => false,
633 }
634 }
635
636 pub fn prefix_noraw(self) -> &'static str {
637 match self {
638 Mode::Char | Mode::Str | Mode::RawStr => "",
639 Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b",
640 Mode::CStr | Mode::RawCStr => "c",
641 }
642 }
643}
644
645pub fn check_for_errors(
653 src: &str,
654 mode: Mode,
655 mut error_callback: impl FnMut(Range<usize>, EscapeError),
656) {
657 match mode {
658 Mode::Char => {
659 let mut chars = src.chars();
660 if let Err(e) = str::unescape_single(&mut chars) {
661 error_callback(0..(src.len() - chars.as_str().len()), e);
662 }
663 }
664 Mode::Byte => {
665 let mut chars = src.chars();
666 if let Err(e) = <[u8]>::unescape_single(&mut chars) {
667 error_callback(0..(src.len() - chars.as_str().len()), e);
668 }
669 }
670 Mode::Str => unescape_str(src, |range, res| {
671 if let Err(e) = res {
672 error_callback(range, e);
673 }
674 }),
675 Mode::ByteStr => unescape_byte_str(src, |range, res| {
676 if let Err(e) = res {
677 error_callback(range, e);
678 }
679 }),
680 Mode::CStr => unescape_c_str(src, |range, res| {
681 if let Err(e) = res {
682 error_callback(range, e);
683 }
684 }),
685 Mode::RawStr => check_raw_str(src, |range, res| {
686 if let Err(e) = res {
687 error_callback(range, e);
688 }
689 }),
690 Mode::RawByteStr => check_raw_byte_str(src, |range, res| {
691 if let Err(e) = res {
692 error_callback(range, e);
693 }
694 }),
695 Mode::RawCStr => check_raw_c_str(src, |range, res| {
696 if let Err(e) = res {
697 error_callback(range, e);
698 }
699 }),
700 }
701}