1use alloc::string::String;
19
20use crate::scan_buffer::ScanBuffer;
21use crate::scanner::{
22 ParsedNumber, ScanError, ScanErrorKind, Scanner, SpannedToken, Token as ScanToken,
23 decode_string_owned, parse_number,
24};
25use facet_reflect::Span;
26
27#[derive(Debug)]
29pub enum ReaderError {
30 Io(std::io::Error),
32 Scan(ScanError),
34}
35
36impl From<std::io::Error> for ReaderError {
37 fn from(err: std::io::Error) -> Self {
38 ReaderError::Io(err)
39 }
40}
41
42impl From<ScanError> for ReaderError {
43 fn from(err: ScanError) -> Self {
44 ReaderError::Scan(err)
45 }
46}
47
48impl core::fmt::Display for ReaderError {
49 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
50 match self {
51 ReaderError::Io(e) => write!(f, "IO error: {e}"),
52 ReaderError::Scan(e) => write!(f, "scan error: {:?}", e.kind),
53 }
54 }
55}
56
57impl std::error::Error for ReaderError {}
58
59#[derive(Debug, Clone, PartialEq)]
61pub enum JsonToken {
62 ObjectStart,
64 ObjectEnd,
66 ArrayStart,
68 ArrayEnd,
70 Colon,
72 Comma,
74 Null,
76 True,
78 False,
80 String(String),
82 U64(u64),
84 I64(i64),
86 U128(u128),
88 I128(i128),
90 F64(f64),
92 Eof,
94}
95
96#[derive(Debug, Clone)]
98pub struct SpannedJsonToken {
99 pub token: JsonToken,
101 pub span: Span,
103}
104
105#[cfg(feature = "std")]
107pub struct JsonReader<R> {
108 reader: R,
109 buffer: ScanBuffer,
110 scanner: Scanner,
111 bytes_processed: usize,
113}
114
115#[cfg(feature = "std")]
116impl<R: std::io::Read> JsonReader<R> {
117 pub fn new(reader: R) -> Self {
119 Self {
120 reader,
121 buffer: ScanBuffer::new(),
122 scanner: Scanner::new(),
123 bytes_processed: 0,
124 }
125 }
126
127 pub fn with_capacity(reader: R, capacity: usize) -> Self {
129 Self {
130 reader,
131 buffer: ScanBuffer::with_capacity(capacity),
132 scanner: Scanner::new(),
133 bytes_processed: 0,
134 }
135 }
136
137 pub fn next_token(&mut self) -> Option<Result<SpannedJsonToken, ReaderError>> {
139 loop {
140 if self.buffer.filled() == 0 && !self.buffer.is_eof() {
142 match self.buffer.refill(&mut self.reader) {
143 Ok(0) => {
144 return Some(Ok(SpannedJsonToken {
145 token: JsonToken::Eof,
146 span: Span::new(self.bytes_processed, 0),
147 }));
148 }
149 Ok(_) => {}
150 Err(e) => return Some(Err(ReaderError::Io(e))),
151 }
152 }
153
154 let result = self.scanner.next_token(self.buffer.data());
155
156 match result {
157 Ok(spanned) => {
158 match &spanned.token {
159 ScanToken::NeedMore { .. } => {
160 if self.buffer.filled() == self.buffer.capacity() {
163 self.buffer.grow();
164 }
165
166 match self.buffer.refill(&mut self.reader) {
167 Ok(0) if self.buffer.is_eof() => {
168 return Some(Err(ReaderError::Scan(ScanError {
170 kind: ScanErrorKind::UnexpectedEof("incomplete token"),
171 span: Span::new(self.bytes_processed, 0),
172 })));
173 }
174 Ok(_) => continue,
175 Err(e) => return Some(Err(ReaderError::Io(e))),
176 }
177 }
178 ScanToken::Eof => {
179 if !self.buffer.is_eof() {
181 self.bytes_processed += self.scanner.pos();
183 self.buffer.reset();
184 self.scanner.set_pos(0);
185
186 match self.buffer.refill(&mut self.reader) {
187 Ok(0) => {
188 return Some(Ok(SpannedJsonToken {
189 token: JsonToken::Eof,
190 span: Span::new(self.bytes_processed, 0),
191 }));
192 }
193 Ok(_) => continue,
194 Err(e) => return Some(Err(ReaderError::Io(e))),
195 }
196 }
197 return Some(Ok(SpannedJsonToken {
199 token: JsonToken::Eof,
200 span: Span::new(self.bytes_processed + spanned.span.offset, 0),
201 }));
202 }
203 _ => {
204 return Some(self.materialize_token(&spanned));
206 }
207 }
208 }
209 Err(e) => {
210 return Some(Err(ReaderError::Scan(ScanError {
211 kind: e.kind,
212 span: Span::new(self.bytes_processed + e.span.offset, e.span.len),
213 })));
214 }
215 }
216 }
217 }
218
219 fn materialize_token(&self, spanned: &SpannedToken) -> Result<SpannedJsonToken, ReaderError> {
220 let buf = self.buffer.data();
221 let span = Span::new(self.bytes_processed + spanned.span.offset, spanned.span.len);
222
223 let token = match &spanned.token {
224 ScanToken::ObjectStart => JsonToken::ObjectStart,
225 ScanToken::ObjectEnd => JsonToken::ObjectEnd,
226 ScanToken::ArrayStart => JsonToken::ArrayStart,
227 ScanToken::ArrayEnd => JsonToken::ArrayEnd,
228 ScanToken::Colon => JsonToken::Colon,
229 ScanToken::Comma => JsonToken::Comma,
230 ScanToken::Null => JsonToken::Null,
231 ScanToken::True => JsonToken::True,
232 ScanToken::False => JsonToken::False,
233 ScanToken::String { start, end, .. } => {
234 let s = decode_string_owned(buf, *start, *end).map_err(ReaderError::Scan)?;
235 JsonToken::String(s)
236 }
237 ScanToken::Number { start, end, hint } => {
238 let parsed = parse_number(buf, *start, *end, *hint).map_err(ReaderError::Scan)?;
239 match parsed {
240 ParsedNumber::U64(n) => JsonToken::U64(n),
241 ParsedNumber::I64(n) => JsonToken::I64(n),
242 ParsedNumber::U128(n) => JsonToken::U128(n),
243 ParsedNumber::I128(n) => JsonToken::I128(n),
244 ParsedNumber::F64(n) => JsonToken::F64(n),
245 }
246 }
247 ScanToken::Eof | ScanToken::NeedMore { .. } => unreachable!(),
248 };
249
250 Ok(SpannedJsonToken { token, span })
251 }
252}
253
254#[cfg(feature = "tokio")]
256pub struct AsyncJsonReader<R> {
257 reader: R,
258 buffer: ScanBuffer,
259 scanner: Scanner,
260 bytes_processed: usize,
261}
262
263#[cfg(feature = "tokio")]
264impl<R: tokio::io::AsyncRead + Unpin> AsyncJsonReader<R> {
265 pub fn new(reader: R) -> Self {
267 Self {
268 reader,
269 buffer: ScanBuffer::new(),
270 scanner: Scanner::new(),
271 bytes_processed: 0,
272 }
273 }
274
275 pub fn with_capacity(reader: R, capacity: usize) -> Self {
277 Self {
278 reader,
279 buffer: ScanBuffer::with_capacity(capacity),
280 scanner: Scanner::new(),
281 bytes_processed: 0,
282 }
283 }
284
285 pub async fn next_token(&mut self) -> Option<Result<SpannedJsonToken, ReaderError>> {
287 loop {
288 if self.buffer.filled() == 0 && !self.buffer.is_eof() {
289 match self.buffer.refill_tokio(&mut self.reader).await {
290 Ok(0) => {
291 return Some(Ok(SpannedJsonToken {
292 token: JsonToken::Eof,
293 span: Span::new(self.bytes_processed, 0),
294 }));
295 }
296 Ok(_) => {}
297 Err(e) => return Some(Err(ReaderError::Io(e))),
298 }
299 }
300
301 let result = self.scanner.next_token(self.buffer.data());
302
303 match result {
304 Ok(spanned) => match &spanned.token {
305 ScanToken::NeedMore { .. } => {
306 if self.buffer.filled() == self.buffer.capacity() {
307 self.buffer.grow();
308 }
309 match self.buffer.refill_tokio(&mut self.reader).await {
310 Ok(0) if self.buffer.is_eof() => {
311 return Some(Err(ReaderError::Scan(ScanError {
312 kind: ScanErrorKind::UnexpectedEof("incomplete token"),
313 span: Span::new(self.bytes_processed, 0),
314 })));
315 }
316 Ok(_) => continue,
317 Err(e) => return Some(Err(ReaderError::Io(e))),
318 }
319 }
320 ScanToken::Eof => {
321 if !self.buffer.is_eof() {
322 self.bytes_processed += self.scanner.pos();
323 self.buffer.reset();
324 self.scanner.set_pos(0);
325 match self.buffer.refill_tokio(&mut self.reader).await {
326 Ok(0) => {
327 return Some(Ok(SpannedJsonToken {
328 token: JsonToken::Eof,
329 span: Span::new(self.bytes_processed, 0),
330 }));
331 }
332 Ok(_) => continue,
333 Err(e) => return Some(Err(ReaderError::Io(e))),
334 }
335 }
336 return Some(Ok(SpannedJsonToken {
337 token: JsonToken::Eof,
338 span: Span::new(self.bytes_processed + spanned.span.offset, 0),
339 }));
340 }
341 _ => {
342 return Some(self.materialize_token(&spanned));
343 }
344 },
345 Err(e) => {
346 return Some(Err(ReaderError::Scan(ScanError {
347 kind: e.kind,
348 span: Span::new(self.bytes_processed + e.span.offset, e.span.len),
349 })));
350 }
351 }
352 }
353 }
354
355 fn materialize_token(&self, spanned: &SpannedToken) -> Result<SpannedJsonToken, ReaderError> {
356 let buf = self.buffer.data();
357 let span = Span::new(self.bytes_processed + spanned.span.offset, spanned.span.len);
358
359 let token = match &spanned.token {
360 ScanToken::ObjectStart => JsonToken::ObjectStart,
361 ScanToken::ObjectEnd => JsonToken::ObjectEnd,
362 ScanToken::ArrayStart => JsonToken::ArrayStart,
363 ScanToken::ArrayEnd => JsonToken::ArrayEnd,
364 ScanToken::Colon => JsonToken::Colon,
365 ScanToken::Comma => JsonToken::Comma,
366 ScanToken::Null => JsonToken::Null,
367 ScanToken::True => JsonToken::True,
368 ScanToken::False => JsonToken::False,
369 ScanToken::String { start, end, .. } => {
370 let s = decode_string_owned(buf, *start, *end).map_err(ReaderError::Scan)?;
371 JsonToken::String(s)
372 }
373 ScanToken::Number { start, end, hint } => {
374 let parsed = parse_number(buf, *start, *end, *hint).map_err(ReaderError::Scan)?;
375 match parsed {
376 ParsedNumber::U64(n) => JsonToken::U64(n),
377 ParsedNumber::I64(n) => JsonToken::I64(n),
378 ParsedNumber::U128(n) => JsonToken::U128(n),
379 ParsedNumber::I128(n) => JsonToken::I128(n),
380 ParsedNumber::F64(n) => JsonToken::F64(n),
381 }
382 }
383 ScanToken::Eof | ScanToken::NeedMore { .. } => unreachable!(),
384 };
385
386 Ok(SpannedJsonToken { token, span })
387 }
388}
389
390#[cfg(feature = "futures-io")]
392pub struct FuturesJsonReader<R> {
393 reader: R,
394 buffer: ScanBuffer,
395 scanner: Scanner,
396 bytes_processed: usize,
397}
398
399#[cfg(feature = "futures-io")]
400impl<R: futures_io::AsyncRead + Unpin> FuturesJsonReader<R> {
401 pub fn new(reader: R) -> Self {
403 Self {
404 reader,
405 buffer: ScanBuffer::new(),
406 scanner: Scanner::new(),
407 bytes_processed: 0,
408 }
409 }
410
411 pub fn with_capacity(reader: R, capacity: usize) -> Self {
413 Self {
414 reader,
415 buffer: ScanBuffer::with_capacity(capacity),
416 scanner: Scanner::new(),
417 bytes_processed: 0,
418 }
419 }
420
421 pub async fn next_token(&mut self) -> Option<Result<SpannedJsonToken, ReaderError>> {
423 loop {
424 if self.buffer.filled() == 0 && !self.buffer.is_eof() {
425 match self.buffer.refill_futures(&mut self.reader).await {
426 Ok(0) => {
427 return Some(Ok(SpannedJsonToken {
428 token: JsonToken::Eof,
429 span: Span::new(self.bytes_processed, 0),
430 }));
431 }
432 Ok(_) => {}
433 Err(e) => return Some(Err(ReaderError::Io(e))),
434 }
435 }
436
437 let result = self.scanner.next_token(self.buffer.data());
438
439 match result {
440 Ok(spanned) => match &spanned.token {
441 ScanToken::NeedMore { .. } => {
442 if self.buffer.filled() == self.buffer.capacity() {
443 self.buffer.grow();
444 }
445 match self.buffer.refill_futures(&mut self.reader).await {
446 Ok(0) if self.buffer.is_eof() => {
447 return Some(Err(ReaderError::Scan(ScanError {
448 kind: ScanErrorKind::UnexpectedEof("incomplete token"),
449 span: Span::new(self.bytes_processed, 0),
450 })));
451 }
452 Ok(_) => continue,
453 Err(e) => return Some(Err(ReaderError::Io(e))),
454 }
455 }
456 ScanToken::Eof => {
457 if !self.buffer.is_eof() {
458 self.bytes_processed += self.scanner.pos();
459 self.buffer.reset();
460 self.scanner.set_pos(0);
461 match self.buffer.refill_futures(&mut self.reader).await {
462 Ok(0) => {
463 return Some(Ok(SpannedJsonToken {
464 token: JsonToken::Eof,
465 span: Span::new(self.bytes_processed, 0),
466 }));
467 }
468 Ok(_) => continue,
469 Err(e) => return Some(Err(ReaderError::Io(e))),
470 }
471 }
472 return Some(Ok(SpannedJsonToken {
473 token: JsonToken::Eof,
474 span: Span::new(self.bytes_processed + spanned.span.offset, 0),
475 }));
476 }
477 _ => {
478 return Some(self.materialize_token(&spanned));
479 }
480 },
481 Err(e) => {
482 return Some(Err(ReaderError::Scan(ScanError {
483 kind: e.kind,
484 span: Span::new(self.bytes_processed + e.span.offset, e.span.len),
485 })));
486 }
487 }
488 }
489 }
490
491 fn materialize_token(&self, spanned: &SpannedToken) -> Result<SpannedJsonToken, ReaderError> {
492 let buf = self.buffer.data();
493 let span = Span::new(self.bytes_processed + spanned.span.offset, spanned.span.len);
494
495 let token = match &spanned.token {
496 ScanToken::ObjectStart => JsonToken::ObjectStart,
497 ScanToken::ObjectEnd => JsonToken::ObjectEnd,
498 ScanToken::ArrayStart => JsonToken::ArrayStart,
499 ScanToken::ArrayEnd => JsonToken::ArrayEnd,
500 ScanToken::Colon => JsonToken::Colon,
501 ScanToken::Comma => JsonToken::Comma,
502 ScanToken::Null => JsonToken::Null,
503 ScanToken::True => JsonToken::True,
504 ScanToken::False => JsonToken::False,
505 ScanToken::String { start, end, .. } => {
506 let s = decode_string_owned(buf, *start, *end).map_err(ReaderError::Scan)?;
507 JsonToken::String(s)
508 }
509 ScanToken::Number { start, end, hint } => {
510 let parsed = parse_number(buf, *start, *end, *hint).map_err(ReaderError::Scan)?;
511 match parsed {
512 ParsedNumber::U64(n) => JsonToken::U64(n),
513 ParsedNumber::I64(n) => JsonToken::I64(n),
514 ParsedNumber::U128(n) => JsonToken::U128(n),
515 ParsedNumber::I128(n) => JsonToken::I128(n),
516 ParsedNumber::F64(n) => JsonToken::F64(n),
517 }
518 }
519 ScanToken::Eof | ScanToken::NeedMore { .. } => unreachable!(),
520 };
521
522 Ok(SpannedJsonToken { token, span })
523 }
524}
525
526#[cfg(all(test, feature = "std"))]
531mod tests {
532 use super::*;
533 use std::io::{Cursor, Read};
534
535 struct ShortReadAdapter<R> {
537 inner: R,
538 max_bytes_per_read: usize,
539 }
540
541 impl<R> ShortReadAdapter<R> {
542 fn new(inner: R, max_bytes_per_read: usize) -> Self {
543 Self {
544 inner,
545 max_bytes_per_read,
546 }
547 }
548 }
549
550 impl<R: Read> Read for ShortReadAdapter<R> {
551 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
552 let len = buf.len().min(self.max_bytes_per_read);
553 self.inner.read(&mut buf[..len])
554 }
555 }
556
557 fn collect_tokens<R: Read>(reader: &mut JsonReader<R>) -> Vec<JsonToken> {
558 let mut tokens = Vec::new();
559 loop {
560 let result = reader.next_token().unwrap().unwrap();
561 let is_eof = matches!(result.token, JsonToken::Eof);
562 tokens.push(result.token);
563 if is_eof {
564 break;
565 }
566 }
567 tokens
568 }
569
570 #[test]
571 fn test_simple() {
572 let json = r#"{"name": "test", "value": 42}"#;
573 let mut reader = JsonReader::new(Cursor::new(json));
574 let tokens = collect_tokens(&mut reader);
575
576 assert_eq!(
577 tokens,
578 vec![
579 JsonToken::ObjectStart,
580 JsonToken::String("name".to_string()),
581 JsonToken::Colon,
582 JsonToken::String("test".to_string()),
583 JsonToken::Comma,
584 JsonToken::String("value".to_string()),
585 JsonToken::Colon,
586 JsonToken::U64(42),
587 JsonToken::ObjectEnd,
588 JsonToken::Eof,
589 ]
590 );
591 }
592
593 #[test]
594 fn test_small_buffer() {
595 let json = r#"{"hello": "world"}"#;
597 let mut reader = JsonReader::with_capacity(Cursor::new(json), 4);
598 let tokens = collect_tokens(&mut reader);
599
600 assert_eq!(
601 tokens,
602 vec![
603 JsonToken::ObjectStart,
604 JsonToken::String("hello".to_string()),
605 JsonToken::Colon,
606 JsonToken::String("world".to_string()),
607 JsonToken::ObjectEnd,
608 JsonToken::Eof,
609 ]
610 );
611 }
612
613 #[test]
614 fn test_short_reads() {
615 let json = r#"{"hello": "world"}"#;
617 let adapter = ShortReadAdapter::new(Cursor::new(json), 2);
618 let mut reader = JsonReader::with_capacity(adapter, 4);
619 let tokens = collect_tokens(&mut reader);
620
621 assert_eq!(
622 tokens,
623 vec![
624 JsonToken::ObjectStart,
625 JsonToken::String("hello".to_string()),
626 JsonToken::Colon,
627 JsonToken::String("world".to_string()),
628 JsonToken::ObjectEnd,
629 JsonToken::Eof,
630 ]
631 );
632 }
633
634 #[test]
635 fn test_single_byte_reads() {
636 let json = r#"[1, 2, 3]"#;
638 let adapter = ShortReadAdapter::new(Cursor::new(json), 1);
639 let mut reader = JsonReader::with_capacity(adapter, 2);
640 let tokens = collect_tokens(&mut reader);
641
642 assert_eq!(
643 tokens,
644 vec![
645 JsonToken::ArrayStart,
646 JsonToken::U64(1),
647 JsonToken::Comma,
648 JsonToken::U64(2),
649 JsonToken::Comma,
650 JsonToken::U64(3),
651 JsonToken::ArrayEnd,
652 JsonToken::Eof,
653 ]
654 );
655 }
656
657 #[test]
658 fn test_numbers() {
659 let json = r#"[1, -5, 3.14, 1e10]"#;
660 let mut reader = JsonReader::new(Cursor::new(json));
661 let tokens = collect_tokens(&mut reader);
662
663 assert!(matches!(tokens[1], JsonToken::U64(1)));
664 assert!(matches!(tokens[3], JsonToken::I64(-5)));
665 assert!(matches!(tokens[5], JsonToken::F64(_)));
666 assert!(matches!(tokens[7], JsonToken::F64(_)));
667 }
668
669 #[test]
670 fn test_escapes() {
671 let json = r#"{"msg": "hello\nworld"}"#;
672 let mut reader = JsonReader::new(Cursor::new(json));
673 let tokens = collect_tokens(&mut reader);
674
675 assert_eq!(tokens[3], JsonToken::String("hello\nworld".to_string()));
676 }
677
678 #[test]
679 fn test_escapes_with_short_reads() {
680 let json = r#"{"msg": "a\nb\tc"}"#;
682 let adapter = ShortReadAdapter::new(Cursor::new(json), 3);
683 let mut reader = JsonReader::with_capacity(adapter, 4);
684 let tokens = collect_tokens(&mut reader);
685
686 assert_eq!(tokens[3], JsonToken::String("a\nb\tc".to_string()));
687 }
688}
689
690#[cfg(all(test, feature = "tokio"))]
691mod tokio_tests {
692 use super::*;
693 use std::io::Cursor;
694
695 #[tokio::test]
696 async fn test_async_simple() {
697 let json = r#"{"name": "test", "value": 42}"#;
698 let cursor = Cursor::new(json.as_bytes().to_vec());
699 let mut reader = AsyncJsonReader::new(cursor);
700
701 let mut tokens = Vec::new();
702 loop {
703 let result = reader.next_token().await.unwrap().unwrap();
704 let is_eof = matches!(result.token, JsonToken::Eof);
705 tokens.push(result.token);
706 if is_eof {
707 break;
708 }
709 }
710
711 assert_eq!(
712 tokens,
713 vec![
714 JsonToken::ObjectStart,
715 JsonToken::String("name".to_string()),
716 JsonToken::Colon,
717 JsonToken::String("test".to_string()),
718 JsonToken::Comma,
719 JsonToken::String("value".to_string()),
720 JsonToken::Colon,
721 JsonToken::U64(42),
722 JsonToken::ObjectEnd,
723 JsonToken::Eof,
724 ]
725 );
726 }
727
728 #[tokio::test]
729 async fn test_async_small_buffer() {
730 let json = r#"{"hello": "world"}"#;
731 let cursor = Cursor::new(json.as_bytes().to_vec());
732 let mut reader = AsyncJsonReader::with_capacity(cursor, 4);
733
734 let mut tokens = Vec::new();
735 loop {
736 let result = reader.next_token().await.unwrap().unwrap();
737 let is_eof = matches!(result.token, JsonToken::Eof);
738 tokens.push(result.token);
739 if is_eof {
740 break;
741 }
742 }
743
744 assert_eq!(
745 tokens,
746 vec![
747 JsonToken::ObjectStart,
748 JsonToken::String("hello".to_string()),
749 JsonToken::Colon,
750 JsonToken::String("world".to_string()),
751 JsonToken::ObjectEnd,
752 JsonToken::Eof,
753 ]
754 );
755 }
756}