1use std::ops::Deref;
2use std::{char, cmp, io, str, u32};
3
4use super::error::{Error, ErrorCode, Result};
5use super::iter::LineColIterator;
6
7pub trait Read<'de>: private::Sealed {
16 #[doc(hidden)]
17 fn next(&mut self) -> Result<Option<u8>>;
18 #[doc(hidden)]
19 fn peek(&mut self) -> Result<Option<u8>>;
20
21 #[doc(hidden)]
23 fn discard(&mut self);
24
25 #[doc(hidden)]
33 fn position(&self) -> Position;
34
35 #[doc(hidden)]
43 fn peek_position(&self) -> Position;
44
45 #[doc(hidden)]
48 fn byte_offset(&self) -> usize;
49
50 #[doc(hidden)]
54 fn parse_r6rs_str<'s>(
55 &'s mut self,
56 scratch: &'s mut Vec<u8>,
57 ) -> Result<Reference<'de, 's, str>>;
58
59 #[doc(hidden)]
63 fn parse_elisp_str<'s>(
64 &'s mut self,
65 scratch: &'s mut Vec<u8>,
66 ) -> Result<ElispStrReference<'de, 's>>;
67
68 #[doc(hidden)]
70 fn parse_symbol<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
71
72 #[doc(hidden)]
74 fn parse_r6rs_char(&mut self, scratch: &mut Vec<u8>) -> Result<char> {
75 parse_r6rs_char(self, scratch)
78 }
79
80 fn parse_elisp_char(&mut self, scratch: &mut Vec<u8>) -> Result<char> {
82 parse_elisp_char(self, scratch)
85 }
86}
87
88#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, PartialOrd, Ord)]
90pub struct Position {
91 line: usize,
92 column: usize,
93}
94
95impl Position {
96 pub(crate) fn new(line: usize, column: usize) -> Self {
97 Position { line, column }
98 }
99
100 pub fn line(&self) -> usize {
102 self.line
103 }
104 pub fn column(&self) -> usize {
109 self.column
110 }
111}
112
113pub enum Reference<'b, 'c, T: ?Sized + 'static> {
114 Borrowed(&'b T),
115 Copied(&'c T),
116}
117
118impl<'b, 'c, T: ?Sized + 'static> Deref for Reference<'b, 'c, T> {
119 type Target = T;
120
121 fn deref(&self) -> &Self::Target {
122 match *self {
123 Reference::Borrowed(b) => b,
124 Reference::Copied(c) => c,
125 }
126 }
127}
128
129#[derive(Debug)]
130pub enum ElispStr<U, M> {
131 Unibyte(U),
132 Multibyte(M),
133}
134
135pub type ElispStrReference<'b, 'c> = ElispStr<Reference<'b, 'c, [u8]>, Reference<'b, 'c, str>>;
136
137#[derive(Debug)]
138enum ElispEscape {
139 Unibyte,
140 Multibyte,
141 Indeterminate,
142}
143
144pub struct IoRead<R>
146where
147 R: io::Read,
148{
149 iter: LineColIterator<io::Bytes<R>>,
150 ch: Option<u8>,
152}
153
154pub struct SliceRead<'a> {
159 slice: &'a [u8],
160 index: usize,
162}
163
164pub struct StrRead<'a> {
168 delegate: SliceRead<'a>,
169}
170
171mod private {
173 pub trait Sealed {}
174}
175
176impl<R> IoRead<R>
179where
180 R: io::Read,
181{
182 pub fn new(reader: R) -> Self {
184 IoRead {
185 iter: LineColIterator::new(reader.bytes()),
186 ch: None,
187 }
188 }
189}
190
191impl<R> private::Sealed for IoRead<R> where R: io::Read {}
192
193impl<R> IoRead<R>
194where
195 R: io::Read,
196{
197 fn parse_r6rs_str_bytes<'s, T, F>(
198 &'s mut self,
199 scratch: &'s mut Vec<u8>,
200 result: F,
201 ) -> Result<T>
202 where
203 T: 's,
204 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
205 {
206 loop {
207 let ch = next_or_eof(self)?;
208 match ch {
209 b'"' => {
210 return result(self, scratch);
211 }
212 b'\\' => {
213 parse_r6rs_escape(self, scratch)?;
214 }
215 _ => {
216 scratch.push(ch);
217 }
218 }
219 }
220 }
221
222 fn parse_symbol_bytes<'s, T, F>(&'s mut self, scratch: &'s mut Vec<u8>, result: F) -> Result<T>
223 where
224 T: 's,
225 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
226 {
227 loop {
228 match self.peek()? {
229 Some(b' ') | Some(b'\n') | Some(b'\t') | Some(b'\r') | Some(b')') | Some(b']')
230 | Some(b'(') | Some(b'[') | Some(b';') | None => {
231 if scratch == b"." {
232 return error(self, ErrorCode::InvalidSymbol);
233 }
234 return result(self, scratch);
235 }
236 Some(ch) => {
237 self.discard();
238 scratch.push(ch);
239 }
240 }
241 }
242 }
243}
244
245impl<'de, R> Read<'de> for IoRead<R>
246where
247 R: io::Read,
248{
249 #[inline]
250 fn next(&mut self) -> Result<Option<u8>> {
251 match self.ch.take() {
252 Some(ch) => Ok(Some(ch)),
253 None => match self.iter.next() {
254 Some(Err(err)) => Err(Error::io(err)),
255 Some(Ok(ch)) => Ok(Some(ch)),
256 None => Ok(None),
257 },
258 }
259 }
260
261 #[inline]
262 fn peek(&mut self) -> Result<Option<u8>> {
263 match self.ch {
264 Some(ch) => Ok(Some(ch)),
265 None => match self.iter.next() {
266 Some(Err(err)) => Err(Error::io(err)),
267 Some(Ok(ch)) => {
268 self.ch = Some(ch);
269 Ok(self.ch)
270 }
271 None => Ok(None),
272 },
273 }
274 }
275
276 #[inline]
277 fn discard(&mut self) {
278 self.ch = None;
279 }
280
281 fn position(&self) -> Position {
282 Position {
283 line: self.iter.line(),
284 column: self.iter.col(),
285 }
286 }
287
288 fn peek_position(&self) -> Position {
289 self.position()
292 }
293
294 fn byte_offset(&self) -> usize {
295 match self.ch {
296 Some(_) => self.iter.byte_offset() - 1,
297 None => self.iter.byte_offset(),
298 }
299 }
300
301 fn parse_r6rs_str<'s>(
302 &'s mut self,
303 scratch: &'s mut Vec<u8>,
304 ) -> Result<Reference<'de, 's, str>> {
305 self.parse_r6rs_str_bytes(scratch, as_str)
306 .map(Reference::Copied)
307 }
308
309 fn parse_elisp_str<'s>(
310 &'s mut self,
311 scratch: &'s mut Vec<u8>,
312 ) -> Result<ElispStrReference<'de, 's>> {
313 let mut seen_ub_escape = false;
314 let mut seen_mb_escape = false;
315 let mut seen_non_ascii = false;
316 loop {
317 let ch = next_or_eof(self)?;
318 match ch {
319 b'"' => {
320 let s = if seen_ub_escape && !(seen_mb_escape || seen_non_ascii) {
321 ElispStr::Unibyte(Reference::Copied(scratch.as_slice()))
322 } else {
323 ElispStr::Multibyte(Reference::Copied(as_str(self, scratch)?))
324 };
325 return Ok(s);
326 }
327 b'\\' => match parse_elisp_escape(self, scratch)? {
328 ElispEscape::Unibyte => seen_ub_escape = true,
329 ElispEscape::Multibyte => seen_mb_escape = true,
330 ElispEscape::Indeterminate => {}
331 },
332 _ => {
333 if ch > 127 {
334 seen_non_ascii = true;
335 }
336 scratch.push(ch);
337 }
338 }
339 }
340 }
341
342 fn parse_symbol<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
343 self.parse_symbol_bytes(scratch, as_str)
344 .map(Reference::Copied)
345 }
346}
347
348impl<'a> SliceRead<'a> {
351 pub fn new(slice: &'a [u8]) -> Self {
353 SliceRead { slice, index: 0 }
354 }
355
356 fn position_of_index(&self, i: usize) -> Position {
357 let mut position = Position { line: 1, column: 0 };
358 for ch in &self.slice[..i] {
359 match *ch {
360 b'\n' => {
361 position.line += 1;
362 position.column = 0;
363 }
364 _ => {
365 position.column += 1;
366 }
367 }
368 }
369 position
370 }
371
372 fn parse_symbol_bytes<'s, T: ?Sized, F>(
373 &'s mut self,
374 scratch: &'s mut Vec<u8>,
375 result: F,
376 ) -> Result<Reference<'a, 's, T>>
377 where
378 T: 's,
379 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
380 {
381 let start = self.index;
383
384 loop {
385 match self.peek_byte() {
386 None | Some(b' ') | Some(b'\n') | Some(b'\t') | Some(b'\r') | Some(b')')
387 | Some(b']') | Some(b'(') | Some(b'[') | Some(b';') => {
388 if scratch.is_empty() {
389 let borrowed = &self.slice[start..self.index];
392 if borrowed == b"." {
393 return error(self, ErrorCode::InvalidSymbol);
394 }
395 return result(self, borrowed).map(Reference::Borrowed);
396 } else {
397 scratch.extend_from_slice(&self.slice[start..self.index]);
398 if scratch == b"." {
399 return error(self, ErrorCode::InvalidSymbol);
400 }
401 let copied = scratch as &[u8];
403 return result(self, copied).map(Reference::Copied);
404 }
405 }
406 _ => self.index += 1,
407 }
408 }
409 }
410
411 fn peek_byte(&self) -> Option<u8> {
412 if self.index < self.slice.len() {
413 Some(self.slice[self.index])
414 } else {
415 None
416 }
417 }
418
419 fn parse_elisp_str_bytes<'s, F>(
420 &'s mut self,
421 scratch: &'s mut Vec<u8>,
422 result: F,
423 ) -> Result<ElispStrReference<'a, 's>>
424 where
425 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f str>,
426 {
427 let mut start = self.index;
429 let mut seen_ub_escape = false;
430 let mut seen_mb_escape = false;
431 let mut seen_non_ascii = false;
432
433 loop {
434 while self.index < self.slice.len() {
435 let ch = self.slice[self.index];
436 if ch > 127 {
437 seen_non_ascii = true;
438 }
439 if needs_escape(ch) {
440 break;
441 }
442 self.index += 1;
443 }
444 if self.index == self.slice.len() {
445 return error(self, ErrorCode::EofWhileParsingString);
446 }
447 match self.slice[self.index] {
448 b'"' => {
449 if scratch.is_empty() {
450 let borrowed = &self.slice[start..self.index];
453 self.index += 1;
454 if seen_ub_escape && !(seen_mb_escape || seen_non_ascii) {
455 return Ok(ElispStr::Unibyte(Reference::Borrowed(borrowed)));
456 } else {
457 return result(self, borrowed)
458 .map(|s| ElispStr::Multibyte(Reference::Borrowed(s)));
459 }
460 } else {
461 scratch.extend_from_slice(&self.slice[start..self.index]);
462 self.index += 1;
463 if seen_ub_escape && !(seen_mb_escape || seen_non_ascii) {
464 return Ok(ElispStr::Unibyte(Reference::Copied(scratch)));
465 } else {
466 return result(self, scratch)
467 .map(|s| ElispStr::Multibyte(Reference::Copied(s)));
468 }
469 }
470 }
471 b'\\' => {
472 scratch.extend_from_slice(&self.slice[start..self.index]);
473 self.index += 1;
474 match parse_elisp_escape(self, scratch)? {
475 ElispEscape::Multibyte => seen_mb_escape = true,
476 ElispEscape::Unibyte => seen_ub_escape = true,
477 ElispEscape::Indeterminate => {}
478 }
479 start = self.index;
480 }
481 _ => unreachable!(),
482 }
483 }
484 }
485
486 fn parse_r6rs_str_bytes<'s, T: ?Sized, F>(
490 &'s mut self,
491 scratch: &'s mut Vec<u8>,
492 result: F,
493 ) -> Result<Reference<'a, 's, T>>
494 where
495 T: 's,
496 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
497 {
498 let mut start = self.index;
500
501 loop {
502 while self.index < self.slice.len() && !needs_escape(self.slice[self.index]) {
503 self.index += 1;
504 }
505 if self.index == self.slice.len() {
506 return error(self, ErrorCode::EofWhileParsingString);
507 }
508 match self.slice[self.index] {
509 b'"' => {
510 if scratch.is_empty() {
511 let borrowed = &self.slice[start..self.index];
514 self.index += 1;
515 return result(self, borrowed).map(Reference::Borrowed);
516 } else {
517 scratch.extend_from_slice(&self.slice[start..self.index]);
518 self.index += 1;
519 return result(self, scratch).map(Reference::Copied);
520 }
521 }
522 b'\\' => {
523 scratch.extend_from_slice(&self.slice[start..self.index]);
524 self.index += 1;
525 parse_r6rs_escape(self, scratch)?;
526 start = self.index;
527 }
528 _ => unreachable!(),
529 }
530 }
531 }
532}
533
534impl<'a> private::Sealed for SliceRead<'a> {}
535
536impl<'a> Read<'a> for SliceRead<'a> {
537 #[inline]
538 fn next(&mut self) -> Result<Option<u8>> {
539 Ok(if self.index < self.slice.len() {
542 let ch = self.slice[self.index];
543 self.index += 1;
544 Some(ch)
545 } else {
546 None
547 })
548 }
549
550 #[inline]
551 fn peek(&mut self) -> Result<Option<u8>> {
552 Ok(if self.index < self.slice.len() {
555 Some(self.slice[self.index])
556 } else {
557 None
558 })
559 }
560
561 #[inline]
562 fn discard(&mut self) {
563 self.index += 1;
564 }
565
566 fn position(&self) -> Position {
567 self.position_of_index(self.index)
568 }
569
570 fn peek_position(&self) -> Position {
571 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
574 }
575
576 fn byte_offset(&self) -> usize {
577 self.index
578 }
579
580 fn parse_r6rs_str<'s>(
581 &'s mut self,
582 scratch: &'s mut Vec<u8>,
583 ) -> Result<Reference<'a, 's, str>> {
584 self.parse_r6rs_str_bytes(scratch, as_str)
585 }
586
587 fn parse_elisp_str<'s>(
588 &'s mut self,
589 scratch: &'s mut Vec<u8>,
590 ) -> Result<ElispStrReference<'a, 's>> {
591 self.parse_elisp_str_bytes(scratch, as_str)
592 }
593
594 fn parse_symbol<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
595 self.parse_symbol_bytes(scratch, as_str)
596 }
597}
598
599impl<'a> StrRead<'a> {
602 pub fn new(s: &'a str) -> Self {
604 StrRead {
605 delegate: SliceRead::new(s.as_bytes()),
606 }
607 }
608}
609
610impl<'a> private::Sealed for StrRead<'a> {}
611
612impl<'a> Read<'a> for StrRead<'a> {
613 #[inline]
614 fn next(&mut self) -> Result<Option<u8>> {
615 self.delegate.next()
616 }
617
618 #[inline]
619 fn peek(&mut self) -> Result<Option<u8>> {
620 self.delegate.peek()
621 }
622
623 #[inline]
624 fn discard(&mut self) {
625 self.delegate.discard();
626 }
627
628 fn position(&self) -> Position {
629 self.delegate.position()
630 }
631
632 fn peek_position(&self) -> Position {
633 self.delegate.peek_position()
634 }
635
636 fn byte_offset(&self) -> usize {
637 self.delegate.byte_offset()
638 }
639
640 fn parse_r6rs_str<'s>(
641 &'s mut self,
642 scratch: &'s mut Vec<u8>,
643 ) -> Result<Reference<'a, 's, str>> {
644 self.delegate.parse_r6rs_str_bytes(scratch, |_, bytes| {
645 Ok(unsafe { str::from_utf8_unchecked(bytes) })
648 })
649 }
650
651 fn parse_elisp_str<'s>(
652 &'s mut self,
653 scratch: &'s mut Vec<u8>,
654 ) -> Result<ElispStrReference<'a, 's>> {
655 self.delegate.parse_elisp_str_bytes(scratch, as_str)
659 }
660
661 fn parse_symbol<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
662 self.delegate.parse_symbol_bytes(scratch, |_, bytes| {
663 Ok(unsafe { str::from_utf8_unchecked(bytes) })
666 })
667 }
668}
669
670fn next_or_eof<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<u8> {
671 match read.next()? {
672 Some(b) => Ok(b),
673 None => error(read, ErrorCode::EofWhileParsingString),
674 }
675}
676
677fn next_or_eof_char<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<u8> {
678 match read.next()? {
679 Some(b) => Ok(b),
680 None => error(read, ErrorCode::EofWhileParsingCharacterConstant),
681 }
682}
683
684fn error<'de, R: ?Sized + Read<'de>, T>(read: &R, reason: ErrorCode) -> Result<T> {
685 let position = read.position();
686 Err(Error::syntax(reason, position.line, position.column))
687}
688
689fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
690 str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
691}
692
693fn as_char<'de, 's, R: Read<'de> + ?Sized>(read: &R, value: u32) -> Result<char> {
694 match char::from_u32(value) {
695 None => error(read, ErrorCode::InvalidUnicodeCodePoint),
696 Some(c) => Ok(c),
697 }
698}
699
700fn needs_escape(c: u8) -> bool {
701 c == b'\\' || c == b'"'
702}
703
704fn decode_r6rs_hex_escape<'de, R: Read<'de>>(read: &mut R) -> Result<u32> {
707 let mut n = 0;
708 loop {
709 let next = next_or_eof(read)?;
710 if next == b';' {
711 return Ok(n);
712 }
713 match decode_hex_val(next) {
714 None => return error(read, ErrorCode::EofWhileParsingString),
715 Some(val) => {
716 if n >= (1 << 24) {
717 return error(read, ErrorCode::InvalidUnicodeCodePoint);
719 }
720 n = (n << 4) + u32::from(val);
721 }
722 }
723 }
724}
725
726fn parse_r6rs_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec<u8>) -> Result<()> {
729 let ch = next_or_eof(read)?;
730
731 match ch {
732 b'"' => scratch.push(b'"'),
733 b'\\' => scratch.push(b'\\'),
734 b'a' => scratch.push(0x07),
735 b'b' => scratch.push(0x08),
736 b'f' => scratch.push(0x0C),
737 b'n' => scratch.push(b'\n'),
738 b'r' => scratch.push(b'\r'),
739 b't' => scratch.push(b'\t'),
740 b'v' => scratch.push(0x0B),
741 b'|' => scratch.push(b'|'),
742 b'x' => {
744 let c = {
745 let n = decode_r6rs_hex_escape(read)?;
746 match char::from_u32(n) {
747 Some(c) => c,
748 None => {
749 return error(read, ErrorCode::InvalidUnicodeCodePoint);
750 }
751 }
752 };
753 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
754 }
755 _ => {
756 return error(read, ErrorCode::InvalidEscape);
757 }
758 }
759
760 Ok(())
761}
762
763fn decode_elisp_hex_escape<'de, R: Read<'de> + ?Sized>(read: &mut R) -> Result<u32> {
767 let mut n = 0;
768 while let Some(c) = read.peek()? {
769 match decode_hex_val(c) {
770 None => break,
771 Some(val) => {
772 read.discard();
773 if n >= (1 << 24) {
774 return error(read, ErrorCode::InvalidUnicodeCodePoint);
776 }
777 n = (n << 4) + u32::from(val);
778 }
779 }
780 }
781 Ok(n)
782}
783
784fn decode_elisp_uni_escape<'de, R: Read<'de> + ?Sized>(read: &mut R, count: u8) -> Result<u32> {
785 let mut n = 0;
786 for _ in 0..count {
787 let c = next_or_eof(read)?;
788 let val = match decode_hex_val(c) {
789 None => return error(read, ErrorCode::InvalidEscape),
790 Some(val) => val,
791 };
792 if n >= (1 << 24) {
793 return error(read, ErrorCode::InvalidUnicodeCodePoint);
795 }
796 n = (n << 4) + u32::from(val);
797 }
798 Ok(n)
799}
800
801fn decode_elisp_octal_escape<'de, R: Read<'de> + ?Sized>(read: &mut R, initial: u8) -> Result<u32> {
806 let mut n = u32::from(initial - b'0');
807 while let Some(c) = read.peek()? {
808 match decode_octal_val(c) {
809 None => break,
810 Some(val) => {
811 read.discard();
812 if n >= (1 << 24) {
813 return error(read, ErrorCode::InvalidUnicodeCodePoint);
815 }
816 n = (n << 3) + u32::from(val);
817 }
818 }
819 }
820 Ok(n)
821}
822
823fn parse_elisp_char_escape<'de, R, F>(
824 read: &mut R,
825 scratch: &mut Vec<u8>,
826 decode: F,
827) -> Result<ElispEscape>
828where
829 R: Read<'de>,
830 F: FnOnce(&mut R) -> Result<u32>,
831{
832 let n = decode(read)?;
833 match char::from_u32(n) {
834 Some(c) => {
835 if n > 255 {
836 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
837 Ok(ElispEscape::Multibyte)
838 } else {
839 scratch.push(n as u8);
840 Ok(ElispEscape::Unibyte)
841 }
842 }
843 None => error(read, ErrorCode::InvalidUnicodeCodePoint),
844 }
845}
846
847fn parse_elisp_uni_char_escape<'de, R, F>(
848 read: &mut R,
849 scratch: &mut Vec<u8>,
850 decode: F,
851) -> Result<ElispEscape>
852where
853 R: Read<'de>,
854 F: FnOnce(&mut R) -> Result<u32>,
855{
856 let n = decode(read)?;
857 match char::from_u32(n) {
858 Some(c) => {
859 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
860 Ok(ElispEscape::Multibyte)
861 }
862 None => error(read, ErrorCode::InvalidUnicodeCodePoint),
863 }
864}
865
866fn parse_elisp_escape<'de, R: Read<'de>>(
876 read: &mut R,
877 scratch: &mut Vec<u8>,
878) -> Result<ElispEscape> {
879 let ch = next_or_eof(read)?;
880
881 match ch {
882 b'"' => scratch.push(b'"'),
883 b'\\' => scratch.push(b'\\'),
884 b' ' => {} b'a' => scratch.push(0x07),
886 b'b' => scratch.push(0x08),
887 b't' => scratch.push(b'\t'),
888 b'n' => scratch.push(b'\n'),
889 b'v' => scratch.push(0x0B),
890 b'f' => scratch.push(0x0C),
891 b'r' => scratch.push(b'\r'),
892 b'e' => scratch.push(0x1B),
893 b's' => scratch.push(b' '),
894 b'd' => scratch.push(0x7F),
895 b'^' => {
896 let ch = next_or_eof(read)?.to_ascii_lowercase();
898 if ch.is_ascii_lowercase() {
899 scratch.push(ch - b'a');
900 } else {
901 return error(read, ErrorCode::InvalidEscape);
902 }
903 }
904 b'N' => {
905 if next_or_eof(read)? != b'{' {
908 return error(read, ErrorCode::InvalidEscape);
909 }
910 if next_or_eof(read)? != b'U' || next_or_eof(read)? != b'+' {
911 return error(read, ErrorCode::InvalidEscape);
912 }
913 let escape = parse_elisp_uni_char_escape(read, scratch, decode_elisp_hex_escape)?;
914 if next_or_eof(read)? != b'}' {
915 return error(read, ErrorCode::InvalidEscape);
916 }
917 return Ok(escape);
918 }
919 b'u' => {
920 return parse_elisp_uni_char_escape(read, scratch, |read| {
923 decode_elisp_uni_escape(read, 4)
924 });
925 }
926 b'U' => {
927 return parse_elisp_uni_char_escape(read, scratch, |read| {
930 decode_elisp_uni_escape(read, 8)
931 });
932 }
933 b'x' => {
934 return parse_elisp_char_escape(read, scratch, decode_elisp_hex_escape);
938 }
939 b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' => {
940 return parse_elisp_char_escape(read, scratch, |read| {
944 decode_elisp_octal_escape(read, ch)
945 });
946 }
947 _ => scratch.push(ch),
948 }
949 Ok(ElispEscape::Indeterminate)
950}
951
952fn parse_r6rs_char<'de, R: Read<'de> + ?Sized>(
955 read: &mut R,
956 scratch: &mut Vec<u8>,
957) -> Result<char> {
958 let initial = next_or_eof_char(read)?;
959 if initial == b'x' {
960 match decode_r6rs_char_hex_escape(read)? {
961 Some(n) => match char::from_u32(n) {
962 Some(c) => Ok(c),
963 None => error(read, ErrorCode::InvalidUnicodeCodePoint),
964 },
965 None => Ok('x'),
966 }
967 } else if initial > 0x7F {
968 decode_utf8_sequence(read, scratch, initial)
969 } else {
970 let next = match read.peek()? {
972 Some(next) => {
973 if is_delimiter(next) {
974 return Ok(char::from(initial));
975 } else {
976 next
977 }
978 }
979 None => return Ok(char::from(initial)),
980 };
981 scratch.clear();
983 scratch.push(initial);
984 scratch.push(next);
985 read.discard();
986 while let Some(next) = read.peek()? {
987 if is_delimiter(next) {
988 break;
989 }
990 scratch.push(next);
991 read.discard();
992 }
993 match scratch.as_slice() {
994 b"nul" => Ok('\x00'),
995 b"alarm" => Ok('\x07'),
996 b"backspace" => Ok('\x08'),
997 b"tab" => Ok('\t'),
998 b"linefeed" => Ok('\n'),
999 b"newline" => Ok('\n'),
1000 b"vtab" => Ok('\x0B'),
1001 b"page" => Ok('\x0C'),
1002 b"return" => Ok('\r'),
1003 b"esc" => Ok('\x1B'),
1004 b"space" => Ok(' '),
1005 b"delete" => Ok('\x7F'),
1006 _ => error(read, ErrorCode::InvalidCharacterConstant),
1007 }
1008 }
1009}
1010
1011fn decode_r6rs_char_hex_escape<'de, R: Read<'de> + ?Sized>(read: &mut R) -> Result<Option<u32>> {
1014 let mut n = 0;
1015 let mut first = true;
1016 loop {
1017 let next = match read.peek()? {
1018 Some(c) => {
1019 if is_delimiter(c) {
1020 return Ok(if first { None } else { Some(n) });
1021 } else {
1022 c
1023 }
1024 }
1025 None => return Ok(if first { None } else { Some(n) }),
1026 };
1027 read.discard();
1028 first = false;
1029 match decode_hex_val(next) {
1030 None => return error(read, ErrorCode::EofWhileParsingCharacterConstant),
1031 Some(val) => {
1032 if n >= (1 << 24) {
1033 return error(read, ErrorCode::InvalidUnicodeCodePoint);
1035 }
1036 n = (n << 4) + u32::from(val);
1037 }
1038 }
1039 }
1040}
1041
1042fn parse_elisp_char<'de, R: Read<'de> + ?Sized>(
1046 read: &mut R,
1047 scratch: &mut Vec<u8>,
1048) -> Result<char> {
1049 let initial = match read.next()? {
1050 None => return error(read, ErrorCode::EofWhileParsingCharacterConstant),
1051 Some(c) => c,
1052 };
1053 if initial > 0x7F {
1054 decode_utf8_sequence(read, scratch, initial)
1055 } else {
1056 match initial {
1058 b'(' | b')' | b'[' | b']' | b';' => error(read, ErrorCode::InvalidCharacterConstant),
1059 b'\\' => decode_elisp_char_escape(read, scratch),
1060 _ => Ok(char::from(initial)),
1061 }
1062 }
1063}
1064
1065fn decode_elisp_char_escape<'de, R: Read<'de> + ?Sized>(
1066 read: &mut R,
1067 scratch: &mut Vec<u8>,
1068) -> Result<char> {
1069 let ch = next_or_eof_char(read)?;
1070 match ch {
1071 b'a' => Ok('\x07'),
1072 b'b' => Ok('\x08'),
1073 b't' => Ok('\t'),
1074 b'n' => Ok('\n'),
1075 b'v' => Ok('\x0B'),
1076 b'f' => Ok('\x0C'),
1077 b'r' => Ok('\r'),
1078 b'e' => Ok('\x1B'),
1079 b's' => Ok(' '),
1080 b'\\' => Ok('\\'),
1081 b'd' => Ok('\x7F'),
1082 b'^' => {
1083 let ch = next_or_eof_char(read)?.to_ascii_lowercase();
1085 if ch.is_ascii_lowercase() {
1086 Ok(char::from(ch - b'a'))
1087 } else {
1088 error(read, ErrorCode::InvalidEscape)
1089 }
1090 }
1091 b'N' => {
1092 if next_or_eof_char(read)? != b'{' {
1095 return error(read, ErrorCode::InvalidEscape);
1096 }
1097 if next_or_eof_char(read)? != b'U' || next_or_eof_char(read)? != b'+' {
1098 return error(read, ErrorCode::InvalidEscape);
1099 }
1100 let n = decode_elisp_hex_escape(read)?;
1101 if next_or_eof(read)? != b'}' {
1102 return error(read, ErrorCode::InvalidEscape);
1103 }
1104 match char::from_u32(n) {
1105 Some(c) => Ok(c),
1106 None => error(read, ErrorCode::InvalidEscape),
1107 }
1108 }
1109 b'u' => {
1110 decode_elisp_uni_escape(read, 4).and_then(|n| as_char(read, n))
1112 }
1113 b'U' => {
1114 decode_elisp_uni_escape(read, 8).and_then(|n| as_char(read, n))
1117 }
1118 b'x' => {
1119 decode_elisp_hex_escape(read).and_then(|n| as_char(read, n))
1121 }
1122 b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' => {
1123 decode_elisp_octal_escape(read, ch).and_then(|n| as_char(read, n))
1125 }
1126 next => {
1127 if next > 0x7F {
1128 decode_utf8_sequence(read, scratch, next)
1129 } else {
1130 Ok(char::from(next))
1131 }
1132 }
1133 }
1134}
1135
1136#[allow(clippy::zero_prefixed_literal)]
1137static HEX: [u8; 256] = {
1138 const __: u8 = 255; [
1140 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
1158};
1159
1160fn decode_hex_val(val: u8) -> Option<u8> {
1161 let n = HEX[val as usize];
1162 if n == 255 {
1163 None
1164 } else {
1165 Some(n)
1166 }
1167}
1168
1169fn decode_octal_val(val: u8) -> Option<u8> {
1170 if (b'0'..=b'7').contains(&val) {
1171 Some(val - b'0')
1172 } else {
1173 None
1174 }
1175}
1176
1177fn is_delimiter(c: u8) -> bool {
1178 DELIMITER.contains(&c)
1179}
1180
1181static DELIMITER: [u8; 12] = [
1183 b'(', b')', b'[', b']', b'"', b';', b'#', b' ', b'\n', b'\t', b'\r', 0x0C,
1184];
1185
1186fn decode_utf8_sequence<'de, R: Read<'de> + ?Sized>(
1189 read: &mut R,
1190 scratch: &mut Vec<u8>,
1191 initial: u8,
1192) -> Result<char> {
1193 if !(0xC2..=0xF4).contains(&initial) {
1201 return error(read, ErrorCode::InvalidUnicodeCodePoint);
1202 }
1203 scratch.clear();
1204 scratch.push(initial);
1205 let len = (initial - 0xC0) >> 4;
1206 for _ in 0..len {
1207 let b = match read.next()? {
1208 Some(c) => c,
1209 None => return error(read, ErrorCode::InvalidUnicodeCodePoint),
1210 };
1211 scratch.push(b);
1212 }
1213 match str::from_utf8(scratch) {
1214 Err(_) => error(read, ErrorCode::InvalidUnicodeCodePoint),
1215 Ok(s) => Ok(s.chars().next().unwrap()),
1216 }
1217}