1use crate::error::io_syntax_error;
4use crate::error::is_eof_io_error;
5use crate::error::syntax_error;
6use crate::error::Error;
7use crate::error::ExpectedKind;
8use crate::error::Received;
9
10use crate::hex;
11
12use crate::value::boundary as B;
13use crate::value::reader::BinarySource;
14use crate::value::reader::ReaderResult;
15use crate::value::repr::Annotations;
16use crate::value::CompoundClass;
17use crate::value::DomainParse;
18use crate::value::IOValue;
19use crate::value::IOValueDomainCodec;
20use crate::value::Map;
21use crate::value::NestedValue;
22use crate::value::Reader;
23use crate::value::Record;
24use crate::value::Set;
25use crate::value::Token;
26use crate::value::Value;
27use crate::value::ViaCodec;
28
29use lazy_static::lazy_static;
30
31use num::bigint::BigInt;
32
33use std::convert::TryInto;
34use std::io;
35use std::marker::PhantomData;
36
37pub struct TextReader<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'de>> {
39 pub source: &'src mut S,
41 pub dec: Dec,
43 pub toplevel_whitespace_mode: ToplevelWhitespaceMode,
45 phantom: PhantomData<&'de N>,
46}
47
48pub enum ToplevelWhitespaceMode {
57 Document,
58 Value,
59}
60
61fn decode_utf8(bs: Vec<u8>) -> io::Result<String> {
62 Ok(String::from_utf8(bs).map_err(|_| io_syntax_error("Invalid UTF-8"))?)
63}
64
65fn append_codepoint(bs: &mut Vec<u8>, n: u32) -> io::Result<()> {
66 let c = char::from_u32(n).ok_or_else(|| io_syntax_error("Bad code point"))?;
67 let mut buf = [0; 4];
68 let _ = c.encode_utf8(&mut buf);
69 bs.extend(&buf[0..c.len_utf8()]);
70 Ok(())
71}
72
73impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'de>>
74 TextReader<'de, 'src, N, Dec, S>
75{
76 pub fn new(source: &'src mut S, dec: Dec) -> Self {
78 TextReader {
79 source,
80 dec,
81 toplevel_whitespace_mode: ToplevelWhitespaceMode::Document,
82 phantom: PhantomData,
83 }
84 }
85
86 pub fn toplevel_whitespace_mode(mut self, new_mode: ToplevelWhitespaceMode) -> Self {
87 self.toplevel_whitespace_mode = new_mode;
88 self
89 }
90
91 fn peek(&mut self) -> io::Result<u8> {
92 self.source.peek()
93 }
94
95 fn skip(&mut self) -> io::Result<()> {
96 self.source.skip()
97 }
98
99 fn next_byte(&mut self) -> io::Result<u8> {
100 let b = self.source.peek()?;
101 self.source.skip()?;
102 Ok(b)
103 }
104
105 fn skip_whitespace(&mut self) {
106 self.skip_whitespace_and_maybe_commas(false)
107 }
108
109 fn skip_whitespace_and_maybe_commas(&mut self, skip_commas: bool) {
110 while let Ok(c) = self.peek() {
112 match c {
113 b' ' | b'\t' | b'\r' | b'\n' => {
114 let _ = self.skip();
115 ()
116 }
117 b',' if skip_commas => {
118 let _ = self.skip();
119 ()
120 }
121 _ => break,
122 }
123 }
124 }
125
126 fn expected(&mut self, k: ExpectedKind) -> Error {
128 match Reader::<N>::demand_next(self, true) {
129 Ok(v) => Error::Expected(k, Received::ReceivedOtherValue(format!("{:?}", v))),
130 Err(e) => e.into(),
131 }
132 }
133
134 fn gather_annotations(&mut self, vs: &mut Vec<N>) -> ReaderResult<()> {
135 loop {
136 self.skip_whitespace();
137 match self.peek()? {
138 b'#' => {
139 let m = self.source.mark()?;
140 self.skip()?;
141 match self.next_byte()? {
142 b' ' | b'\t' => vs.push(N::new(self.comment_line()?)),
143 b'\n' | b'\r' => vs.push(N::new("")),
144 b'!' => vs.push(Value::simple_record1(
145 "interpreter", N::new(self.comment_line()?)).wrap()),
146 _ => {
147 self.source.restore(&m)?;
148 return Ok(());
149 }
150 }
151 }
152 b'@' => {
153 self.skip()?;
154 vs.push(self.demand_next(true)?)
155 }
156 _ => return Ok(()),
157 }
158 }
159 }
160
161 fn prepend_annotations_to_next(&mut self, mut annotations: Vec<N>) -> ReaderResult<N> {
162 let (existing_annotations, v) = Reader::<N>::demand_next(self, true)?.pieces();
163 annotations.extend_from_slice(existing_annotations.slice());
164 Ok(N::wrap(Annotations::new(Some(annotations)), v))
165 }
166
167 fn skip_annotations(&mut self) -> ReaderResult<()> {
168 loop {
169 self.skip_whitespace();
170 match self.peek()? {
171 b'#' => {
172 let m = self.source.mark()?;
173 self.skip()?;
174 match self.next_byte()? {
175 b' ' | b'\t' => { self.comment_line()?; () }
176 b'\n' | b'\r' => (),
177 b'!' => { self.comment_line()?; () }
178 _ => {
179 self.source.restore(&m)?;
180 return Ok(());
181 }
182 }
183 }
184 b'@' => {
185 self.skip()?;
186 self.skip_value()?;
187 }
188 _ => return Ok(()),
189 }
190 }
191 }
192
193 pub fn next_iovalue(&mut self, read_annotations: bool) -> io::Result<IOValue> {
195 let mut r = TextReader::new(self.source, ViaCodec::new(IOValueDomainCodec));
196 let v = r.demand_next(read_annotations)?;
197 Ok(v)
198 }
199
200 fn comment_line(&mut self) -> io::Result<String> {
201 let mut bs = Vec::new();
202 loop {
203 let b = self.peek()?;
204 self.skip()?;
205 match b {
206 b'\r' | b'\n' => return Ok(decode_utf8(bs)?),
207 _ => bs.push(b),
208 }
209 }
210 }
211
212 fn read_hex_float(&mut self) -> io::Result<N> {
213 if self.next_byte()? != b'"' {
214 return Err(io_syntax_error(
215 "Missing open-double-quote in hex-encoded floating-point number",
216 ));
217 }
218 let bs = self.read_hex_binary()?;
219 if bs.len() != 8 {
220 return Err(io_syntax_error(
221 "Incorrect number of bytes in hex-encoded floating-point number",
222 ));
223 }
224 Ok(Value::from(f64::from_bits(u64::from_be_bytes(bs.try_into().unwrap()))).wrap())
225 }
226
227 fn read_stringlike<X, H, R>(
228 &mut self,
229 mut seed: R,
230 xform_item: X,
231 terminator: u8,
232 hexescape: u8,
233 hexescaper: H,
234 ) -> io::Result<R>
235 where
236 X: Fn(&mut R, u8) -> io::Result<()>,
237 H: Fn(&mut R, &mut Self) -> io::Result<()>,
238 {
239 loop {
240 match self.next_byte()? {
241 c if c == terminator => return Ok(seed),
242 b'\\' => match self.next_byte()? {
243 c if c == hexescape => hexescaper(&mut seed, self)?,
244 c if c == terminator || c == b'\\' || c == b'/' => xform_item(&mut seed, c)?,
245 b'b' => xform_item(&mut seed, b'\x08')?,
246 b'f' => xform_item(&mut seed, b'\x0c')?,
247 b'n' => xform_item(&mut seed, b'\x0a')?,
248 b'r' => xform_item(&mut seed, b'\x0d')?,
249 b't' => xform_item(&mut seed, b'\x09')?,
250 _ => return Err(io_syntax_error("Invalid escape code")),
251 },
252 c => xform_item(&mut seed, c)?,
253 }
254 }
255 }
256
257 fn hexnum(&mut self, count: usize) -> io::Result<u32> {
258 let mut v: u32 = 0;
259 for _ in 0..count {
260 let c = self.next_byte()?;
261 match (c as char).to_digit(16) {
262 Some(d) => v = v << 4 | d,
263 None => return Err(io_syntax_error("Bad hex escape")),
264 }
265 }
266 Ok(v)
267 }
268
269 fn read_string(&mut self, delimiter: u8) -> io::Result<String> {
270 decode_utf8(self.read_stringlike(
271 Vec::new(),
272 |bs, c| Ok(bs.push(c)),
273 delimiter,
274 b'u',
275 |bs, r| {
276 let n1 = r.hexnum(4)?;
277 if (0xd800..=0xdbff).contains(&n1) {
278 let mut ok = true;
279 ok = ok && r.next_byte()? == b'\\';
280 ok = ok && r.next_byte()? == b'u';
281 if !ok {
282 Err(io_syntax_error("Missing second half of surrogate pair"))
283 } else {
284 let n2 = r.hexnum(4)?;
285 if (0xdc00..=0xdfff).contains(&n2) {
286 let n = ((n1 - 0xd800) << 10) + (n2 - 0xdc00) + 0x10000;
287 append_codepoint(bs, n)
288 } else {
289 Err(io_syntax_error("Bad second half of surrogate pair"))
290 }
291 }
292 } else {
293 append_codepoint(bs, n1)
294 }
295 },
296 )?)
297 }
298
299 fn read_literal_binary(&mut self) -> io::Result<N> {
300 Ok(N::new(
301 &self.read_stringlike(
302 Vec::new(),
303 |bs, b| Ok(bs.push(b)),
304 b'"',
305 b'x',
306 |bs, r| Ok(bs.push(r.hexnum(2)? as u8)),
307 )?[..],
308 ))
309 }
310
311 fn read_hex_binary(&mut self) -> io::Result<Vec<u8>> {
312 let mut s = String::new();
313 loop {
314 self.skip_whitespace();
315 let c1 = self.next_byte()? as char;
316 if c1 == '"' {
317 return Ok(hex::HexParser::Strict.decode(&s).unwrap());
318 }
319 let c2 = self.next_byte()? as char;
320 if !(c1.is_digit(16) && c2.is_digit(16)) {
321 return Err(io_syntax_error("Invalid hex binary"));
322 }
323 s.push(c1);
324 s.push(c2);
325 }
326 }
327
328 fn read_base64_binary(&mut self) -> io::Result<N> {
329 let mut bs = Vec::new();
330 loop {
331 self.skip_whitespace();
332 let mut c = self.next_byte()?;
333 if c == b']' {
334 let bs = base64::decode_config(&decode_utf8(bs)?, base64::STANDARD_NO_PAD)
335 .map_err(|_| io_syntax_error("Invalid base64 character"))?;
336 return Ok(N::new(&bs[..]));
337 }
338 if c == b'-' {
339 c = b'+';
340 }
341 if c == b'_' {
342 c = b'/';
343 }
344 if c == b'=' {
345 continue;
346 }
347 bs.push(c);
348 }
349 }
350
351 fn upto(&mut self, delimiter: u8, read_annotations: bool, skip_commas: bool) -> io::Result<Vec<N>> {
352 let mut vs = Vec::new();
353 loop {
354 self.skip_whitespace_and_maybe_commas(skip_commas);
355 if self.peek()? == delimiter {
356 self.skip()?;
357 return Ok(vs);
358 }
359 vs.push(Reader::<N>::demand_next(self, read_annotations)?);
360 }
361 }
362
363 fn read_set(&mut self, read_annotations: bool) -> io::Result<N> {
364 let items = self.upto(b'}', read_annotations, true)?;
365 let mut s = Set::<N>::new();
366 for i in items {
367 if s.contains(&i) {
368 return Err(io_syntax_error("Duplicate set element"));
369 }
370 s.insert(i);
371 }
372 Ok(N::new(s))
373 }
374
375 fn read_dictionary(&mut self, read_annotations: bool) -> io::Result<N> {
376 let mut d = Map::new();
377 loop {
378 self.skip_whitespace_and_maybe_commas(true);
379 if self.peek()? == b'}' {
380 self.skip()?;
381 return Ok(N::new(d));
382 }
383 let k = Reader::<N>::demand_next(self, read_annotations)?;
384 self.skip_whitespace();
385 if self.next_byte()? != b':' {
386 return Err(io_syntax_error("Missing expected key/value separator"));
387 }
388 if d.contains_key(&k) {
389 return Err(io_syntax_error("Duplicate key"));
390 }
391 let v = Reader::<N>::demand_next(self, read_annotations)?;
392 d.insert(k, v);
393 }
394 }
395
396 fn require_delimiter(&mut self, msg: &'static str) -> io::Result<()> {
397 if self.delimiter_follows()? {
398 Ok(())
399 } else {
400 Err(io_syntax_error(msg))
401 }
402 }
403
404 fn delimiter_follows(&mut self) -> io::Result<bool> {
405 let c = match self.peek() {
406 Err(e) if is_eof_io_error(&e) => return Ok(true),
407 Err(e) => return Err(e)?,
408 Ok(c) if (c as char).is_whitespace() => return Ok(true),
409 Ok(c) => c,
410 };
411 Ok(match c {
412 b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'<' | b'>' | b'"' | b'\'' | b';' | b','
413 | b'@' | b'#' | b':' | b' ' => true,
414 _ => false,
415 })
416 }
417
418 fn read_raw_symbol_or_number(&mut self, mut bs: Vec<u8>) -> io::Result<N> {
419 lazy_static! {
420 static ref NUMBER_RE: regex::Regex =
421 regex::Regex::new(r"^([-+]?\d+)((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))?$")
422 .unwrap();
423 }
424 while !self.delimiter_follows()? {
425 bs.push(self.next_byte()?);
426 }
427 let s = decode_utf8(bs)?;
428 match NUMBER_RE.captures(&s) {
429 None => Ok(N::symbol(&s)),
430 Some(m) => match m.get(2) {
431 None => Ok(N::new(s.parse::<BigInt>().map_err(|_| {
432 io_syntax_error(&format!("Invalid signed-integer number: {:?}", s))})?)),
433 Some(_) => Ok(N::new(s.parse::<f64>().map_err(|_| {
434 io_syntax_error(&format!("Invalid double-precision number: {:?}", s))})?)),
435 },
436 }
437 }
438}
439
440impl<'de, 'src, N: NestedValue, Dec: DomainParse<N::Embedded>, S: BinarySource<'de>> Reader<'de, N>
441 for TextReader<'de, 'src, N, Dec, S>
442{
443 fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
444 'restart: loop {
445 match self.toplevel_whitespace_mode {
446 ToplevelWhitespaceMode::Document => self.skip_whitespace(),
447 ToplevelWhitespaceMode::Value => (),
448 }
449 match self.peek() {
450 Err(e) if is_eof_io_error(&e) => return Ok(None),
451 _ => (),
452 }
453 match self.toplevel_whitespace_mode {
454 ToplevelWhitespaceMode::Document => (),
455 ToplevelWhitespaceMode::Value => self.skip_whitespace(),
456 }
457 return Ok(Some(match self.peek()? {
458 b'"' => {
459 self.skip()?;
460 N::new(self.read_string(b'"')?)
461 }
462 b'\'' => {
463 self.skip()?;
464 N::symbol(&self.read_string(b'\'')?)
465 }
466 b';' => {
467 return Err(io_syntax_error(
468 "Semicolon is reserved syntax"
469 ));
470 }
471 b'@' => {
472 if read_annotations {
473 let mut annotations = Vec::new();
474 self.gather_annotations(&mut annotations)?;
475 self.prepend_annotations_to_next(annotations)?
476 } else {
477 self.skip_annotations()?;
478 self.demand_next(read_annotations)?
479 }
480 }
481 b':' => {
482 return Err(io_syntax_error(
483 "Unexpected key/value separator between items",
484 ));
485 }
486 b'#' => {
487 self.skip()?;
488 match self.next_byte()? {
489 b' ' | b'\t' => {
490 if read_annotations {
491 let mut annotations = vec![N::new(self.comment_line()?)];
492 self.gather_annotations(&mut annotations)?;
493 self.prepend_annotations_to_next(annotations)?
494 } else {
495 self.comment_line()?;
496 continue 'restart;
497 }
498 }
499 b'\n' | b'\r' => {
500 if read_annotations {
501 let mut annotations = vec![N::new("")];
502 self.gather_annotations(&mut annotations)?;
503 self.prepend_annotations_to_next(annotations)?
504 } else {
505 continue 'restart;
506 }
507 }
508 b'!' => {
509 if read_annotations {
510 let mut annotations = vec![
511 Value::simple_record1("interpreter", N::new(self.comment_line()?)).wrap()];
512 self.gather_annotations(&mut annotations)?;
513 self.prepend_annotations_to_next(annotations)?
514 } else {
515 self.comment_line()?;
516 continue 'restart;
517 }
518 }
519 b'f' => { self.require_delimiter("Delimiter must follow #f")?; N::new(false) }
520 b't' => { self.require_delimiter("Delimiter must follow #t")?; N::new(true) }
521 b'{' => self.read_set(read_annotations)?,
522 b'"' => self.read_literal_binary()?,
523 b'x' => match self.next_byte()? {
524 b'"' => N::new(&self.read_hex_binary()?[..]),
525 b'd' => self.read_hex_float()?,
526 _ => return Err(io_syntax_error("Invalid #x syntax")),
527 },
528 b'[' => self.read_base64_binary()?,
529 b':' => {
530 let v = self.next_iovalue(read_annotations)?;
531 Value::Embedded(self.dec.parse_embedded(&v)?).wrap()
532 }
533 other => {
534 return Err(io_syntax_error(&format!("Invalid # syntax: {:?}", other)))
535 }
536 }
537 }
538 b'<' => {
539 self.skip()?;
540 let vs = self.upto(b'>', read_annotations, false)?;
541 if vs.is_empty() {
542 return Err(io_syntax_error("Missing record label"));
543 }
544 Value::Record(Record(vs)).wrap()
545 }
546 b'[' => {
547 self.skip()?;
548 N::new(self.upto(b']', read_annotations, true)?)
549 }
550 b'{' => {
551 self.skip()?;
552 self.read_dictionary(read_annotations)?
553 }
554 b'>' => return Err(io_syntax_error("Unexpected >")),
555 b']' => return Err(io_syntax_error("Unexpected ]")),
556 b'}' => return Err(io_syntax_error("Unexpected }")),
557 b',' => return Err(io_syntax_error("Unexpected ,")),
558 other => {
559 self.skip()?;
560 self.read_raw_symbol_or_number(vec![other])?
561 }
562 }))
563 }
564 }
565
566 fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<()> {
567 self.skip_annotations()?;
568 if self.peek()? != b'<' {
569 return Err(self.expected(ExpectedKind::Record(arity)));
570 }
571 self.skip()?;
572 Ok(())
573 }
574
575 fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
576 self.skip_annotations()?;
577 let mark = Reader::<N>::mark(self)?;
578 match self.next_byte()? {
579 b'#' => match self.next_byte()? {
580 b'{' => return Ok(B::Item::SetValue),
581 _ => (),
582 },
583 b'[' => return Ok(B::Item::SequenceValue),
584 _ => (),
585 }
586 Reader::<N>::restore(self, &mark)?;
587 Err(self.expected(ExpectedKind::SequenceOrSet))
588 }
589
590 fn open_sequence(&mut self) -> ReaderResult<()> {
591 self.skip_annotations()?;
592 if self.peek()? != b'[' {
593 return Err(self.expected(ExpectedKind::Sequence));
594 }
595 self.skip()?;
596 Ok(())
597 }
598
599 fn open_set(&mut self) -> ReaderResult<()> {
600 self.skip_annotations()?;
601 let mark = Reader::<N>::mark(self)?;
602 match self.next_byte()? {
603 b'#' => match self.next_byte()? {
604 b'{' => return Ok(()),
605 _ => (),
606 },
607 _ => (),
608 }
609 Reader::<N>::restore(self, &mark)?;
610 Err(self.expected(ExpectedKind::Set))
611 }
612
613 fn open_dictionary(&mut self) -> ReaderResult<()> {
614 self.skip_annotations()?;
615 if self.peek()? != b'{' {
616 return Err(self.expected(ExpectedKind::Dictionary));
617 }
618 self.skip()?;
619 Ok(())
620 }
621
622 #[inline]
623 fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
624 match b {
625 B::Type {
626 closing: Some(B::Item::DictionaryKey),
627 opening: Some(B::Item::DictionaryValue),
628 } => {
629 self.skip_whitespace();
630 if self.next_byte()? != b':' {
631 return Err(syntax_error("Missing expected key/value separator"));
632 }
633 }
634 B::Type { closing: Some(B::Item::DictionaryValue), opening: None } |
635 B::Type { closing: None, opening: Some(B::Item::DictionaryKey) } |
636 B::Type { closing: Some(B::Item::SetValue), opening: _ } |
637 B::Type { closing: _, opening: Some(B::Item::SetValue) } |
638 B::Type { closing: Some(B::Item::SequenceValue), opening: _ } |
639 B::Type { closing: _, opening: Some(B::Item::SequenceValue) } |
640 B::Type {
641 closing: Some(B::Item::DictionaryValue),
642 opening: Some(B::Item::DictionaryKey),
643 } => {
644 self.skip_whitespace_and_maybe_commas(true);
645 }
646 _ => (),
647 }
648 Ok(())
649 }
650
651 fn close_compound(&mut self, b: &mut B::Type) -> ReaderResult<bool> {
652 self.skip_whitespace();
653 match self.peek()? {
654 b'>' | b']' | b'}' => {
655 self.skip()?;
656 b.shift(None);
657 self.boundary(b)?;
658 Ok(true)
659 }
660 _ => {
661 Ok(false)
662 }
663 }
664 }
665
666 fn open_embedded(&mut self) -> ReaderResult<()> {
667 self.skip_annotations()?;
668 let mark = Reader::<N>::mark(self)?;
669 match self.next_byte()? {
670 b'#' => match self.next_byte()? {
671 b':' => return Ok(()),
672 _ => (),
673 },
674 _ => (),
675 }
676 Reader::<N>::restore(self, &mark)?;
677 Err(self.expected(ExpectedKind::Embedded))
678 }
679
680 fn close_embedded(&mut self) -> ReaderResult<()> {
681 Ok(())
682 }
683
684 type Mark = S::Mark;
685
686 fn mark(&mut self) -> io::Result<Self::Mark> {
687 self.source.mark()
688 }
689
690 fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
691 self.source.restore(mark)
692 }
693
694 fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>> {
695 self.skip_annotations()?;
696 let mark = Reader::<N>::mark(self)?;
697 Ok(match self.next_byte()? {
698 b'<' => Token::Compound(CompoundClass::Record),
699 b'[' => Token::Compound(CompoundClass::Sequence),
700 b'{' => Token::Compound(CompoundClass::Dictionary),
701 b'>' => Token::End,
702 b']' => Token::End,
703 b'}' => Token::End,
704 b'#' => match self.next_byte()? {
705 b':' => {
706 let v = self.next_iovalue(read_embedded_annotations)?;
707 Token::Embedded(self.dec.parse_embedded(&v)?)
708 }
709 b'{' => Token::Compound(CompoundClass::Set),
710 _ => {
711 Reader::<N>::restore(self, &mark)?;
712 Token::Atom(self.demand_next(false)?)
713 }
714 },
715 _ => {
716 Reader::<N>::restore(self, &mark)?;
717 Token::Atom(self.demand_next(false)?)
718 }
719 })
720 }
721
722 fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)> {
723 let mut annotations = Vec::new();
724 self.gather_annotations(&mut annotations)?;
725 Ok((annotations, self.next_token(true)?))
726 }
727}