1#![no_std]
5
6use core::ffi::CStr;
7use core::num::NonZero;
8use core::ops::Range;
9use core::str::Chars;
10
11#[derive(Debug, PartialEq, Eq)]
15pub enum EscapeError {
16 ZeroChars,
18 MoreThanOneChar,
20
21 LoneSlash,
23 InvalidEscape,
25 BareCarriageReturn,
27 BareCarriageReturnInRawString,
29 EscapeOnlyChar,
31
32 TooShortHexEscape,
34 InvalidCharInHexEscape,
36 OutOfRangeHexEscape,
38
39 NoBraceInUnicodeEscape,
41 InvalidCharInUnicodeEscape,
43 EmptyUnicodeEscape,
45 UnclosedUnicodeEscape,
47 LeadingUnderscoreUnicodeEscape,
49 OverlongUnicodeEscape,
51 LoneSurrogateUnicodeEscape,
53 OutOfRangeUnicodeEscape,
55
56 UnicodeEscapeInByte,
58 NonAsciiCharInByte,
60
61 NulInCStr,
63
64 UnskippedWhitespaceWarning,
67
68 MultipleSkippedLinesWarning,
70}
71
72impl EscapeError {
73 pub fn is_fatal(&self) -> bool {
75 !matches!(
76 self,
77 EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning
78 )
79 }
80}
81
82pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
89 str::check_raw(src, callback);
90}
91
92pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
99 <[u8]>::check_raw(src, callback);
100}
101
102pub fn check_raw_c_str(
109 src: &str,
110 callback: impl FnMut(Range<usize>, Result<NonZero<char>, EscapeError>),
111) {
112 CStr::check_raw(src, callback);
113}
114
115trait CheckRaw {
117 type RawUnit;
119
120 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError>;
122
123 fn check_raw(
129 src: &str,
130 mut callback: impl FnMut(Range<usize>, Result<Self::RawUnit, EscapeError>),
131 ) {
132 let mut chars = src.chars();
133 while let Some(c) = chars.next() {
134 let start = src.len() - chars.as_str().len() - c.len_utf8();
135 let res = match c {
136 '\r' => Err(EscapeError::BareCarriageReturnInRawString),
137 _ => Self::char2raw_unit(c),
138 };
139 let end = src.len() - chars.as_str().len();
140 callback(start..end, res);
141 }
142
143 }
155}
156
157impl CheckRaw for str {
158 type RawUnit = char;
159
160 #[inline]
161 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
162 Ok(c)
163 }
164}
165
166impl CheckRaw for [u8] {
167 type RawUnit = u8;
168
169 #[inline]
170 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
171 char2byte(c)
172 }
173}
174
175#[inline]
177fn char2byte(c: char) -> Result<u8, EscapeError> {
178 if c.is_ascii() {
180 Ok(c as u8)
181 } else {
182 Err(EscapeError::NonAsciiCharInByte)
183 }
184}
185
186impl CheckRaw for CStr {
187 type RawUnit = NonZero<char>;
188
189 #[inline]
190 fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
191 NonZero::new(c).ok_or(EscapeError::NulInCStr)
192 }
193}
194
195#[inline]
200pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
201 str::unescape_single(&mut src.chars())
202}
203
204#[inline]
209pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
210 <[u8]>::unescape_single(&mut src.chars())
211}
212
213pub fn unescape_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
219 str::unescape(src, callback)
220}
221
222pub fn unescape_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
228 <[u8]>::unescape(src, callback)
229}
230
231pub fn unescape_c_str(
237 src: &str,
238 callback: impl FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
239) {
240 CStr::unescape(src, callback)
241}
242
243#[derive(Copy, Clone, Debug, PartialEq, Eq)]
248pub enum MixedUnit {
249 Char(NonZero<char>),
256
257 HighByte(NonZero<u8>),
263}
264
265impl From<NonZero<char>> for MixedUnit {
266 #[inline]
267 fn from(c: NonZero<char>) -> Self {
268 MixedUnit::Char(c)
269 }
270}
271
272impl From<NonZero<u8>> for MixedUnit {
273 #[inline]
274 fn from(byte: NonZero<u8>) -> Self {
275 if byte.get().is_ascii() {
276 MixedUnit::Char(NonZero::new(byte.get() as char).unwrap())
277 } else {
278 MixedUnit::HighByte(byte)
279 }
280 }
281}
282
283impl TryFrom<char> for MixedUnit {
284 type Error = EscapeError;
285
286 #[inline]
287 fn try_from(c: char) -> Result<Self, EscapeError> {
288 NonZero::new(c)
289 .map(MixedUnit::Char)
290 .ok_or(EscapeError::NulInCStr)
291 }
292}
293
294impl TryFrom<u8> for MixedUnit {
295 type Error = EscapeError;
296
297 #[inline]
298 fn try_from(byte: u8) -> Result<Self, EscapeError> {
299 NonZero::new(byte)
300 .map(From::from)
301 .ok_or(EscapeError::NulInCStr)
302 }
303}
304
305trait Unescape {
307 type Unit;
309
310 const ZERO_RESULT: Result<Self::Unit, EscapeError>;
312
313 fn nonzero_byte2unit(b: NonZero<u8>) -> Self::Unit;
315
316 fn char2unit(c: char) -> Result<Self::Unit, EscapeError>;
318
319 fn hex2unit(b: u8) -> Result<Self::Unit, EscapeError>;
321
322 fn unicode2unit(r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError>;
324
325 fn unescape_single(chars: &mut Chars<'_>) -> Result<Self::Unit, EscapeError> {
327 let res = match chars.next().ok_or(EscapeError::ZeroChars)? {
328 '\\' => Self::unescape_1(chars),
329 '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
330 '\r' => Err(EscapeError::BareCarriageReturn),
331 c => Self::char2unit(c),
332 }?;
333 if chars.next().is_some() {
334 return Err(EscapeError::MoreThanOneChar);
335 }
336 Ok(res)
337 }
338
339 fn unescape_1(chars: &mut Chars<'_>) -> Result<Self::Unit, EscapeError> {
341 let c = chars.next().ok_or(EscapeError::LoneSlash)?;
343 if c == '0' {
344 Self::ZERO_RESULT
345 } else {
346 simple_escape(c)
347 .map(|b| Self::nonzero_byte2unit(b))
348 .or_else(|c| match c {
349 'x' => Self::hex2unit(hex_escape(chars)?),
350 'u' => Self::unicode2unit({
351 let value = unicode_escape(chars)?;
352 if value > char::MAX as u32 {
353 Err(EscapeError::OutOfRangeUnicodeEscape)
354 } else {
355 char::from_u32(value).ok_or(EscapeError::LoneSurrogateUnicodeEscape)
356 }
357 }),
358 _ => Err(EscapeError::InvalidEscape),
359 })
360 }
361 }
362
363 fn unescape(
369 src: &str,
370 mut callback: impl FnMut(Range<usize>, Result<Self::Unit, EscapeError>),
371 ) {
372 let mut chars = src.chars();
373 while let Some(c) = chars.next() {
374 let start = src.len() - chars.as_str().len() - c.len_utf8();
375 let res = match c {
376 '\\' => {
377 if let Some(b'\n') = chars.as_str().as_bytes().first() {
378 let _ = chars.next();
379 let callback_err = |range, err| callback(range, Err(err));
382 skip_ascii_whitespace(&mut chars, start, callback_err);
383 continue;
384 } else {
385 Self::unescape_1(&mut chars)
386 }
387 }
388 '"' => Err(EscapeError::EscapeOnlyChar),
389 '\r' => Err(EscapeError::BareCarriageReturn),
390 c => Self::char2unit(c),
391 };
392 let end = src.len() - chars.as_str().len();
393 callback(start..end, res);
394 }
395 }
396}
397
398#[inline] fn simple_escape(c: char) -> Result<NonZero<u8>, char> {
403 Ok(NonZero::new(match c {
405 '"' => b'"',
406 'n' => b'\n',
407 'r' => b'\r',
408 't' => b'\t',
409 '\\' => b'\\',
410 '\'' => b'\'',
411 _ => Err(c)?,
412 })
413 .unwrap())
414}
415
416#[inline] fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError> {
421 let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
422 let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
423
424 let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
425 let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
426
427 Ok((hi * 16 + lo) as u8)
428}
429
430#[inline] fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeError> {
436 if chars.next() != Some('{') {
437 return Err(EscapeError::NoBraceInUnicodeEscape);
438 }
439
440 let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
442 '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
443 '}' => return Err(EscapeError::EmptyUnicodeEscape),
444 c => c
445 .to_digit(16)
446 .ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
447 };
448
449 let mut n_digits = 1;
452 loop {
453 match chars.next() {
454 None => return Err(EscapeError::UnclosedUnicodeEscape),
455 Some('_') => continue,
456 Some('}') => {
457 return if n_digits > 6 {
460 Err(EscapeError::OverlongUnicodeEscape)
461 } else {
462 Ok(value)
463 };
464 }
465 Some(c) => {
466 let digit: u32 = c
467 .to_digit(16)
468 .ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
469 n_digits += 1;
470 if n_digits > 6 {
471 continue;
473 }
474 value = value * 16 + digit;
475 }
476 };
477 }
478}
479
480#[inline] fn skip_ascii_whitespace(
487 chars: &mut Chars<'_>,
488 start: usize,
489 mut callback: impl FnMut(Range<usize>, EscapeError),
490) {
491 let rest = chars.as_str();
492 let first_non_space = rest
493 .bytes()
494 .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
495 .unwrap_or(rest.len());
496 let (space, rest) = rest.split_at(first_non_space);
497 let end = start + 2 + first_non_space;
499 if space.contains('\n') {
500 callback(start..end, EscapeError::MultipleSkippedLinesWarning);
501 }
502 *chars = rest.chars();
503 if let Some(c) = chars.clone().next() {
504 if c.is_whitespace() {
505 callback(
507 start..end + c.len_utf8(),
508 EscapeError::UnskippedWhitespaceWarning,
509 );
510 }
511 }
512}
513
514impl Unescape for str {
515 type Unit = char;
516
517 const ZERO_RESULT: Result<Self::Unit, EscapeError> = Ok('\0');
518
519 #[inline]
520 fn nonzero_byte2unit(b: NonZero<u8>) -> Self::Unit {
521 b.get().into()
522 }
523
524 #[inline]
525 fn char2unit(c: char) -> Result<Self::Unit, EscapeError> {
526 Ok(c)
527 }
528
529 #[inline]
530 fn hex2unit(b: u8) -> Result<Self::Unit, EscapeError> {
531 if b.is_ascii() {
532 Ok(b as char)
533 } else {
534 Err(EscapeError::OutOfRangeHexEscape)
535 }
536 }
537
538 #[inline]
539 fn unicode2unit(r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError> {
540 r
541 }
542}
543
544impl Unescape for [u8] {
545 type Unit = u8;
546
547 const ZERO_RESULT: Result<Self::Unit, EscapeError> = Ok(b'\0');
548
549 #[inline]
550 fn nonzero_byte2unit(b: NonZero<u8>) -> Self::Unit {
551 b.get()
552 }
553
554 #[inline]
555 fn char2unit(c: char) -> Result<Self::Unit, EscapeError> {
556 char2byte(c)
557 }
558
559 #[inline]
560 fn hex2unit(b: u8) -> Result<Self::Unit, EscapeError> {
561 Ok(b)
562 }
563
564 #[inline]
565 fn unicode2unit(_r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError> {
566 Err(EscapeError::UnicodeEscapeInByte)
567 }
568}
569
570impl Unescape for CStr {
571 type Unit = MixedUnit;
572
573 const ZERO_RESULT: Result<Self::Unit, EscapeError> = Err(EscapeError::NulInCStr);
574
575 #[inline]
576 fn nonzero_byte2unit(b: NonZero<u8>) -> Self::Unit {
577 b.into()
578 }
579
580 #[inline]
581 fn char2unit(c: char) -> Result<Self::Unit, EscapeError> {
582 c.try_into()
583 }
584
585 #[inline]
586 fn hex2unit(byte: u8) -> Result<Self::Unit, EscapeError> {
587 byte.try_into()
588 }
589
590 #[inline]
591 fn unicode2unit(r: Result<char, EscapeError>) -> Result<Self::Unit, EscapeError> {
592 Self::char2unit(r?)
593 }
594}
595
596#[derive(Debug, Clone, Copy, PartialEq)]
598pub enum Mode {
599 Char,
601
602 Byte,
604
605 Str,
607 RawStr,
609
610 ByteStr,
612 RawByteStr,
614
615 CStr,
617 RawCStr,
619}
620
621impl Mode {
622 pub fn in_double_quotes(self) -> bool {
623 match self {
624 Mode::Str
625 | Mode::RawStr
626 | Mode::ByteStr
627 | Mode::RawByteStr
628 | Mode::CStr
629 | Mode::RawCStr => true,
630 Mode::Char | Mode::Byte => false,
631 }
632 }
633
634 pub fn prefix_noraw(self) -> &'static str {
635 match self {
636 Mode::Char | Mode::Str | Mode::RawStr => "",
637 Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b",
638 Mode::CStr | Mode::RawCStr => "c",
639 }
640 }
641}
642
643pub fn check_for_errors(
651 src: &str,
652 mode: Mode,
653 mut error_callback: impl FnMut(Range<usize>, EscapeError),
654) {
655 match mode {
656 Mode::Char => {
657 let mut chars = src.chars();
658 if let Err(e) = str::unescape_single(&mut chars) {
659 error_callback(0..(src.len() - chars.as_str().len()), e);
660 }
661 }
662 Mode::Byte => {
663 let mut chars = src.chars();
664 if let Err(e) = <[u8]>::unescape_single(&mut chars) {
665 error_callback(0..(src.len() - chars.as_str().len()), e);
666 }
667 }
668 Mode::Str => unescape_str(src, |range, res| {
669 if let Err(e) = res {
670 error_callback(range, e);
671 }
672 }),
673 Mode::ByteStr => unescape_byte_str(src, |range, res| {
674 if let Err(e) = res {
675 error_callback(range, e);
676 }
677 }),
678 Mode::CStr => unescape_c_str(src, |range, res| {
679 if let Err(e) = res {
680 error_callback(range, e);
681 }
682 }),
683 Mode::RawStr => check_raw_str(src, |range, res| {
684 if let Err(e) = res {
685 error_callback(range, e);
686 }
687 }),
688 Mode::RawByteStr => check_raw_byte_str(src, |range, res| {
689 if let Err(e) = res {
690 error_callback(range, e);
691 }
692 }),
693 Mode::RawCStr => check_raw_c_str(src, |range, res| {
694 if let Err(e) = res {
695 error_callback(range, e);
696 }
697 }),
698 }
699}