1#![deny(
6 ambiguous_glob_reexports,
7 anonymous_parameters,
8 array_into_iter,
9 asm_sub_register,
10 bad_asm_style,
11 bare_trait_objects,
12 break_with_label_and_loop,
13 clashing_extern_declarations,
14 coherence_leak_check,
15 confusable_idents,
16 const_evaluatable_unchecked,
17 const_item_mutation,
18 dead_code,
19 deprecated,
20 deprecated_where_clause_location,
21 deref_into_dyn_supertrait,
22 deref_nullptr,
23 drop_bounds,
24 dropping_copy_types,
25 dropping_references,
26 duplicate_macro_attributes,
27 dyn_drop,
28 ellipsis_inclusive_range_patterns,
29 exported_private_dependencies,
30 for_loops_over_fallibles,
31 forbidden_lint_groups,
32 forgetting_copy_types,
33 forgetting_references,
34 function_item_references,
35 improper_ctypes,
36 improper_ctypes_definitions,
37 incomplete_features,
38 inline_no_sanitize,
39 invalid_doc_attributes,
40 invalid_macro_export_arguments,
41 invalid_value,
42 irrefutable_let_patterns,
43 large_assignments,
44 late_bound_lifetime_arguments,
45 legacy_derive_helpers,
46 map_unit_fn,
47 missing_docs,
48 named_arguments_used_positionally,
49 no_mangle_generic_items,
50 non_camel_case_types,
51 non_fmt_panics,
52 non_shorthand_field_patterns,
53 non_snake_case,
54 non_upper_case_globals,
55 opaque_hidden_inferred_bound,
56 overlapping_range_endpoints,
57 path_statements,
58 redundant_semicolons,
59 renamed_and_removed_lints,
60 repr_transparent_external_private_fields,
61 semicolon_in_expressions_from_macros,
62 special_module_name,
63 stable_features,
64 suspicious_double_ref_op,
65 trivial_bounds,
66 trivial_numeric_casts,
68 type_alias_bounds,
69 tyvar_behind_raw_pointer,
70 uncommon_codepoints,
71 unconditional_recursion,
72 undefined_naked_function_abi,
73 unexpected_cfgs,
74 ungated_async_fn_track_caller,
75 uninhabited_static,
76 unknown_lints,
77 unnameable_test_items,
78 unreachable_code,
79 unreachable_patterns,
80 unsafe_code,
81 unstable_features,
82 unstable_name_collisions,
83 unstable_syntax_pre_expansion,
84 unused_allocation,
85 unused_assignments,
86 unused_attributes,
87 unused_braces,
88 unused_braces,
89 unused_comparisons,
90 unused_doc_comments,
91 unused_features,
92 unused_features,
93 unused_import_braces,
94 unused_imports,
95 unused_imports,
96 unused_labels,
97 unused_labels,
98 unused_macros,
99 unused_macros,
100 unused_must_use,
101 unused_mut,
102 unused_mut,
103 unused_parens,
104 unused_parens,
105 unused_qualifications,
106 unused_unsafe,
107 unused_unsafe,
108 unused_variables,
109 warnings,
110 while_true
111)]
112
113use std::fs::File;
114use std::io;
115use std::io::{BufRead, BufReader, BufWriter, Read, StderrLock, StdinLock, StdoutLock, Write};
116
117use memchr;
118
119mod errors;
120
121const BUF_EXTEND_SIZE: usize = 4 << 20; const DEFAULT_MAX_IDENT_DEPTH: usize = 4 << 20;
127
128pub const DEFAULT_MIN_JSON_SIZE: usize = 4;
130
131const CHAR_LEFT_SQUARE_BRACKET: u8 = 0x5B; const CHAR_LEFT_CURLY_BRACKET: u8 = 0x7B; const CHAR_RIGHT_SQUARE_BRACKET: u8 = 0x5D; const CHAR_RIGHT_CURLY_BRACKET: u8 = 0x7D; const CHAR_COLON: u8 = 0x3A; const CHAR_COMMA: u8 = 0x2C; const CHAR_SPACE: u8 = 0x20;
144const CHAR_TAB: u8 = 0x09;
145const CHAR_NEWLINE: u8 = 0x0A;
146const CHAR_CARRIAGE_RETURN: u8 = 0x0D;
147
148const CHAR_START_FALSE: u8 = 0x66; const CHAR_START_NULL: u8 = 0x6E; const CHAR_START_TRUE: u8 = 0x74; const CHAR_MINUS: u8 = 0x2D; const CHAR_PLUS: u8 = 0x2B; const CHAR_ZERO: u8 = 0x30; const CHAR_NINE: u8 = 0x39; const CHAR_DECIMAL: u8 = 0x2E; const CHAR_EXP_LOWER: u8 = 0x65; const CHAR_EXP_UPPER: u8 = 0x45; const CHAR_QUOT_MARK: u8 = 0x22; const CHAR_ESCAPE: u8 = 0x5C; const CHAR_SLASH: u8 = 0x2F; const CHAR_ESC_BACKSPACE: u8 = 0x62; const CHAR_ESC_FORM_FEED: u8 = 0x66; const CHAR_ESC_LINE_FEED: u8 = 0x6E; const CHAR_ESC_CARRIAGE_RETURN: u8 = 0x72; const CHAR_ESC_TAB: u8 = 0x74; const CHAR_U: u8 = 0x75; enum Cause {
174 Found(u8),
175 Corrupted(u8),
176 Completed,
177 Exhausted,
178}
179
180fn byte_needs_escape(b: u8) -> bool {
181 b < 0x1F
182}
183
184fn byte_can_escape(b: u8) -> bool {
185 match b {
186 CHAR_QUOT_MARK
187 | CHAR_ESCAPE
188 | CHAR_SLASH
189 | CHAR_ESC_BACKSPACE
190 | CHAR_ESC_FORM_FEED
191 | CHAR_ESC_LINE_FEED
192 | CHAR_ESC_CARRIAGE_RETURN
193 | CHAR_ESC_TAB
194 | CHAR_U => true,
195 _ => false,
196 }
197}
198
199fn _closing_ident(b: u8) -> u8 {
200 b + 0x02
201}
202
203struct Report {
204 status: Cause,
205 start: usize,
206 end: usize,
207 partial_end: usize,
208}
209
210impl Report {
211 fn print(&self, writer: &mut Writer) -> Result<(), errors::Err> {
226 let w = writer.mut_ref();
227
228 let status = match self.status {
229 Cause::Exhausted => "exhausted",
230 Cause::Corrupted(_) => "corrupted",
231 Cause::Completed => "completed",
232 _ => unreachable!(),
233 };
234 w.write_all(
235 format!(
236 "{},{},{},{}\n",
237 status, self.start, self.end, self.partial_end
238 )
239 .as_ref(),
240 )?;
241 Ok(())
242 }
243}
244
245#[derive(Debug)]
246struct JsonTracker {
247 cur: usize,
248 partial_close_end: usize,
249 ident_levels: Vec<u8>,
250 cur_ident_level: usize,
251 in_key: bool,
252 processed: Vec<u8>,
253 replace_newlines: bool,
254}
255
256impl JsonTracker {
257 fn new(max_size: Option<usize>, max_ident_depth: Option<usize>) -> JsonTracker {
258 let _max_size = match max_size {
259 Some(size) => size,
260 None => BUF_EXTEND_SIZE,
261 };
262
263 let _max_ident_depth = match max_ident_depth {
264 Some(size) => size,
265 None => DEFAULT_MAX_IDENT_DEPTH,
266 };
267 JsonTracker {
268 cur: 0,
269 partial_close_end: 0,
270 ident_levels: vec![0u8; _max_ident_depth],
271 cur_ident_level: 0,
272 in_key: false,
273 processed: vec![0u8; _max_size],
274 replace_newlines: false,
275 }
276 }
277
278 fn advance(&mut self, mut b: u8) {
279 if self.replace_newlines && b == CHAR_NEWLINE {
285 b = CHAR_SPACE;
286 }
287 if self.cur < self.processed.len() {
288 self.processed[self.cur] = b;
289 } else {
290 self.processed.reserve(BUF_EXTEND_SIZE);
291 self.processed.push(b);
292 }
293 self.cur += 1;
294 }
295
296 fn last_byte(&self) -> Option<u8> {
297 if self.cur == 0 {
298 return None;
299 }
300
301 Some(self.processed[self.cur - 1])
302 }
303
304 fn last_ident(&self) -> Option<u8> {
305 if self.cur_ident_level == 0 {
306 return None;
307 }
308
309 Some(self.ident_levels[self.cur_ident_level - 1])
310 }
311
312 fn add_ident(&mut self, b: u8) {
313 self.cur_ident_level += 1;
314 self.ident_levels[self.cur_ident_level - 1] = b;
315 self.partial_close_end = self.cur;
316 self.advance(b);
317 }
318
319 fn remove_ident(&mut self, expected: u8) -> Result<bool, ()> {
320 match self.last_ident() {
321 None => return Err(()),
322 Some(ident) => {
323 if ident != expected {
324 return Err(());
325 }
326 }
327 }
328
329 self.partial_close_end = self.cur;
330 self.cur_ident_level -= 1;
331 self.advance(_closing_ident(expected)); match self.cur_ident_level {
334 0 => Ok(true),
335 _ => Ok(false),
336 }
337 }
338
339 fn quick_clean(&mut self) -> () {
340 self.cur = 0;
341 self.partial_close_end = 0;
342 self.cur_ident_level = 0;
343 self.in_key = false;
344 }
345}
346
347pub enum Reader<'a> {
349 File(BufReader<File>),
351 Stdin(StdinLock<'a>),
353 Local(BufReader<&'a [u8]>),
355}
356
357impl<'a> Reader<'a> {
358 pub fn from_file(file: File, buf_size: Option<usize>) -> Reader<'a> {
360 match buf_size {
361 Some(size) => Reader::File(BufReader::with_capacity(size, file)),
362 None => Reader::File(BufReader::new(file)),
363 }
364 }
365
366 pub fn from_stdin() -> Reader<'a> {
368 Reader::Stdin(io::stdin().lock())
369 }
370
371 fn mut_ref(&mut self) -> &mut dyn BufRead {
372 match self {
375 Reader::File(r) => r,
376 Reader::Stdin(r) => r,
377 Reader::Local(r) => r,
378 }
379 }
380}
381
382pub enum Writer<'a> {
384 File(BufWriter<File>),
386 Stdout(StdoutLock<'a>),
388 Stderr(StderrLock<'a>),
390 Local(BufWriter<Vec<u8>>),
392}
393
394impl<'a> Writer<'a> {
395 pub fn to_file(file: File, buf_size: Option<usize>) -> Writer<'a> {
397 match buf_size {
398 Some(size) => Writer::File(BufWriter::with_capacity(size, file)),
399 None => Writer::File(BufWriter::new(file)),
400 }
401 }
402
403 pub fn to_stdout() -> Writer<'a> {
405 Writer::Stdout(io::stdout().lock())
406 }
407
408 pub fn to_stderr() -> Writer<'a> {
410 Writer::Stderr(io::stderr().lock())
411 }
412
413 fn mut_ref(&mut self) -> &mut dyn Write {
414 match self {
417 Self::File(w) => w,
418 Self::Stdout(w) => w,
419 Self::Stderr(w) => w,
420 Self::Local(w) => w,
421 }
422 }
423}
424
425pub struct Carver<'a> {
428 jt: JsonTracker,
429 reader: Reader<'a>,
430 json_writer: Writer<'a>,
431 report_writer: Writer<'a>,
432 pub min_size: usize,
434 pub fix_incomplete: bool,
436 pub report_all: bool,
438}
439
440impl<'a> Carver<'a> {
441 pub fn new(
444 reader: Reader<'a>,
445 json_writer: Writer<'a>,
446 report_writer: Writer<'a>,
447 max_size: Option<usize>,
448 max_ident_depth: Option<usize>,
449 ) -> Self {
450 Carver {
451 jt: JsonTracker::new(max_size, max_ident_depth),
452 reader: reader,
453 json_writer: json_writer,
454 report_writer: report_writer,
455 min_size: DEFAULT_MIN_JSON_SIZE,
456 fix_incomplete: false,
457 report_all: false,
458 }
459 }
460
461 pub fn replace_newlines(&mut self, opt: bool) {
463 self.jt.replace_newlines = opt;
464 }
465
466 fn scout(&mut self) -> Result<Option<(usize, u8)>, io::Error> {
473 let mut read = 0;
474 let mut ch = 0;
475 let r = self.reader.mut_ref();
476 loop {
477 let (done, used) = {
478 let available = match r.fill_buf() {
479 Ok(n) => n,
480 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
481 Err(e) => return Err(e),
482 };
483 match memchr::memchr2(CHAR_LEFT_SQUARE_BRACKET, CHAR_LEFT_CURLY_BRACKET, available)
484 {
485 Some(i) => {
486 ch = available[i];
489 (true, i + 1)
490 }
491 None => (false, available.len()),
492 }
493 };
494 r.consume(used);
495 read += used;
496 if done {
497 return Ok(Some((read, ch)));
498 }
499 if used == 0 {
500 return Ok(None);
501 }
502 }
503 }
504
505 fn handle_left_square_bracket(&mut self) -> Result<Cause, io::Error> {
506 self.jt.add_ident(CHAR_LEFT_SQUARE_BRACKET);
507 for b in self.reader.mut_ref().bytes() {
508 let b = b?;
509 match b {
510 CHAR_LEFT_SQUARE_BRACKET
511 | CHAR_LEFT_CURLY_BRACKET
512 | CHAR_RIGHT_SQUARE_BRACKET
513 | CHAR_QUOT_MARK
514 | CHAR_MINUS
515 | CHAR_ZERO..=CHAR_NINE
516 | CHAR_START_FALSE
517 | CHAR_START_NULL
518 | CHAR_START_TRUE => return Ok(Cause::Found(b)),
519 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => self.jt.advance(b),
520 _ => return Ok(Cause::Corrupted(b)),
521 }
522 }
523 Ok(Cause::Exhausted)
524 }
525
526 fn handle_left_curly_bracket(&mut self) -> Result<Cause, io::Error> {
527 self.jt.add_ident(CHAR_LEFT_CURLY_BRACKET);
528 for b in self.reader.mut_ref().bytes() {
529 let b = b?;
530 match b {
531 CHAR_QUOT_MARK => {
532 self.jt.in_key = true;
533 return Ok(Cause::Found(b));
534 }
535 CHAR_RIGHT_CURLY_BRACKET => return Ok(Cause::Found(b)),
536 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => {
537 self.jt.advance(b);
538 }
539 _ => return Ok(Cause::Corrupted(b)),
540 }
541 }
542 Ok(Cause::Exhausted)
543 }
544
545 fn handle_right_square_bracket(&mut self) -> Result<Cause, io::Error> {
546 match self.jt.remove_ident(CHAR_LEFT_SQUARE_BRACKET) {
547 Ok(true) => return Ok(Cause::Completed),
548 Ok(false) => (),
549 Err(_) => return Ok(Cause::Corrupted(CHAR_RIGHT_SQUARE_BRACKET)),
550 }
551
552 for b in self.reader.mut_ref().bytes() {
553 let b = b?;
554 match b {
555 CHAR_COMMA | CHAR_RIGHT_SQUARE_BRACKET | CHAR_RIGHT_CURLY_BRACKET => {
556 return Ok(Cause::Found(b))
557 }
558 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => self.jt.advance(b),
559 _ => return Ok(Cause::Corrupted(b)),
560 }
561 }
562 Ok(Cause::Exhausted)
563 }
564
565 fn handle_right_curly_bracket(&mut self) -> Result<Cause, io::Error> {
566 match self.jt.remove_ident(CHAR_LEFT_CURLY_BRACKET) {
567 Ok(true) => return Ok(Cause::Completed),
568 Ok(false) => (),
569 Err(_) => return Ok(Cause::Corrupted(CHAR_RIGHT_CURLY_BRACKET)),
570 }
571
572 for b in self.reader.mut_ref().bytes() {
573 let b = b?;
574 match b {
575 CHAR_COMMA | CHAR_RIGHT_SQUARE_BRACKET | CHAR_RIGHT_CURLY_BRACKET => {
576 return Ok(Cause::Found(b))
577 }
578 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => self.jt.advance(b),
579 _ => return Ok(Cause::Corrupted(b)),
580 }
581 }
582 Ok(Cause::Exhausted)
583 }
584
585 fn handle_colon(&mut self) -> Result<Cause, io::Error> {
586 self.jt.in_key = false;
587 self.jt.advance(CHAR_COLON);
588 for b in self.reader.mut_ref().bytes() {
589 let b = b?;
590 match b {
591 CHAR_LEFT_CURLY_BRACKET
592 | CHAR_LEFT_SQUARE_BRACKET
593 | CHAR_MINUS
594 | CHAR_ZERO..=CHAR_NINE
595 | CHAR_QUOT_MARK
596 | CHAR_START_FALSE
597 | CHAR_START_NULL
598 | CHAR_START_TRUE => return Ok(Cause::Found(b)),
599 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => self.jt.advance(b),
600 _ => return Ok(Cause::Corrupted(b)),
601 }
602 }
603 Ok(Cause::Exhausted)
604 }
605
606 fn handle_comma(&mut self) -> Result<Cause, io::Error> {
607 self.jt.advance(CHAR_COMMA);
608 match self.jt.last_ident() {
609 Some(CHAR_LEFT_SQUARE_BRACKET) => {
610 for b in self.reader.mut_ref().bytes() {
611 let b = b?;
612 match b {
613 CHAR_LEFT_CURLY_BRACKET
614 | CHAR_LEFT_SQUARE_BRACKET
615 | CHAR_MINUS
616 | CHAR_ZERO..=CHAR_NINE
617 | CHAR_QUOT_MARK
618 | CHAR_START_FALSE
619 | CHAR_START_NULL
620 | CHAR_START_TRUE => return Ok(Cause::Found(b)),
621 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => {
622 self.jt.advance(b)
623 }
624 _ => return Ok(Cause::Corrupted(b)),
625 }
626 }
627 Ok(Cause::Exhausted)
628 }
629 Some(CHAR_LEFT_CURLY_BRACKET) => {
630 for b in self.reader.mut_ref().bytes() {
631 let b = b?;
632 match b {
633 CHAR_QUOT_MARK => {
634 self.jt.in_key = true;
635 return Ok(Cause::Found(b));
636 }
637 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => {
638 self.jt.advance(b)
639 }
640 _ => return Ok(Cause::Corrupted(b)),
641 }
642 }
643 Ok(Cause::Exhausted)
644 }
645 Some(_) => unreachable!(), None => unreachable!(), }
648 }
649
650 fn handle_string(&mut self) -> Result<Cause, io::Error> {
651 self.jt.advance(CHAR_QUOT_MARK);
652 let mut in_string = true;
653 let mut in_escape = false;
654 let mut in_escaped_unicode = 0;
655
656 for b in self.reader.mut_ref().bytes() {
657 let b = b?;
658 let last_ident = self.jt.last_ident().unwrap();
661
662 if !in_string {
663 match (b, last_ident, self.jt.in_key) {
664 (CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN, _, _) => (),
665 (CHAR_COMMA | CHAR_RIGHT_SQUARE_BRACKET, CHAR_LEFT_SQUARE_BRACKET, _) => {
667 return Ok(Cause::Found(b))
668 }
669 (CHAR_COMMA | CHAR_RIGHT_CURLY_BRACKET, CHAR_LEFT_CURLY_BRACKET, false) => {
671 return Ok(Cause::Found(b))
672 }
673 (CHAR_COLON, CHAR_LEFT_CURLY_BRACKET, true) => return Ok(Cause::Found(b)),
675 (_, _, _) => return Ok(Cause::Corrupted(b)),
676 }
677 } else {
678 match (b, in_escape, in_escaped_unicode) {
679 (CHAR_ESCAPE, false, 0) => in_escape = true,
680 (CHAR_QUOT_MARK, false, 0) => in_string = false,
681 (0x00..0x1F, _, _) => return Ok(Cause::Corrupted(b)),
682 (_, false, 0) => {
683 if byte_needs_escape(b) {
684 return Ok(Cause::Corrupted(b));
685 }
686 }
687 (CHAR_U, true, 0) => {
688 in_escaped_unicode = 4;
689 in_escape = false;
690 }
691 (_, true, 0) => {
692 if byte_can_escape(b) {
693 in_escape = false;
694 } else {
695 return Ok(Cause::Corrupted(b));
696 }
697 }
698 (_, _, 1..=4) => {
699 if b.is_ascii_hexdigit() {
700 in_escaped_unicode -= 1;
701 } else {
702 return Ok(Cause::Corrupted(b));
703 }
704 }
705 (_, _, _) => return Ok(Cause::Corrupted(b)),
706 }
707 }
708 self.jt.advance(b);
709 }
710 Ok(Cause::Exhausted)
711 }
712
713 fn handle_number(&mut self, start_num: u8) -> Result<Cause, io::Error> {
714 self.jt.advance(start_num);
715 let mut in_frac = false;
716 let mut in_exp = false;
717 let mut in_leading_zero: Option<bool> = None;
718
719 for b in self.reader.mut_ref().bytes() {
720 let b = b?;
721 let last_byte = self.jt.last_byte().unwrap();
724
725 if in_leading_zero == None {
730 in_leading_zero = match last_byte {
731 CHAR_MINUS => None,
732 CHAR_ZERO => Some(true),
733 _ => Some(false),
734 }
735 }
736 if in_leading_zero == Some(true) {
737 in_leading_zero = match b {
738 CHAR_ZERO..=CHAR_NINE => return Ok(Cause::Corrupted(b)),
739 _ => Some(false),
740 }
741 }
742
743 match (last_byte, b) {
744 (CHAR_MINUS | CHAR_PLUS | CHAR_DECIMAL, CHAR_ZERO..=CHAR_NINE) => (),
746 (
748 CHAR_EXP_LOWER | CHAR_EXP_UPPER,
749 CHAR_ZERO..=CHAR_NINE | CHAR_MINUS | CHAR_PLUS,
750 ) => (),
751 (
754 CHAR_ZERO..=CHAR_NINE,
755 CHAR_ZERO..=CHAR_NINE | CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE,
756 ) => (),
757 (CHAR_ZERO..=CHAR_NINE, CHAR_DECIMAL) => match (in_frac, in_exp) {
760 (true, _) | (_, true) => return Ok(Cause::Corrupted(b)),
761 (false, _) => in_frac = true,
762 },
763 (CHAR_ZERO..=CHAR_NINE, CHAR_EXP_LOWER | CHAR_EXP_UPPER) => match in_exp {
766 true => return Ok(Cause::Corrupted(b)),
767 false => in_exp = true,
768 },
769 (
772 CHAR_SPACE
773 | CHAR_TAB
774 | CHAR_NEWLINE
775 | CHAR_CARRIAGE_RETURN
776 | CHAR_ZERO..=CHAR_NINE,
777 CHAR_COMMA | CHAR_RIGHT_SQUARE_BRACKET | CHAR_RIGHT_CURLY_BRACKET,
778 ) => return Ok(Cause::Found(b)),
779 (_, _) => return Ok(Cause::Corrupted(b)),
781 }
782 self.jt.advance(b);
783 }
784 Ok(Cause::Exhausted)
785 }
786
787 fn handle_literal(&mut self, start_char: u8) -> Result<Cause, io::Error> {
788 self.jt.advance(start_char);
789 let literal: &[u8] = match start_char {
790 CHAR_START_FALSE => "alse".as_bytes(),
791 CHAR_START_NULL => "ull".as_bytes(),
792 CHAR_START_TRUE => "rue".as_bytes(),
793 _ => unreachable!(),
794 };
795
796 for (i, b) in self.reader.mut_ref().bytes().enumerate() {
797 let b = b?;
798 if literal[i] != b {
799 return Ok(Cause::Corrupted(b));
800 }
801 self.jt.advance(b);
802 if literal.len() == i + 1 {
803 break;
804 }
805 }
806
807 for b in self.reader.mut_ref().bytes() {
808 let b = b?;
809 match b {
810 CHAR_COMMA | CHAR_RIGHT_SQUARE_BRACKET | CHAR_RIGHT_CURLY_BRACKET => {
811 return Ok(Cause::Found(b));
812 }
813 CHAR_SPACE | CHAR_TAB | CHAR_NEWLINE | CHAR_CARRIAGE_RETURN => self.jt.advance(b),
814 _ => return Ok(Cause::Corrupted(b)),
815 }
816 }
817 Ok(Cause::Exhausted)
818 }
819
820 fn hunt(&mut self, mut ch: u8) -> Result<Cause, ()> {
821 loop {
822 let res = match ch {
823 CHAR_LEFT_SQUARE_BRACKET => self.handle_left_square_bracket(),
824 CHAR_LEFT_CURLY_BRACKET => self.handle_left_curly_bracket(),
825 CHAR_RIGHT_SQUARE_BRACKET => self.handle_right_square_bracket(),
826 CHAR_RIGHT_CURLY_BRACKET => self.handle_right_curly_bracket(),
827 CHAR_COLON => self.handle_colon(),
828 CHAR_COMMA => self.handle_comma(),
829 CHAR_QUOT_MARK => self.handle_string(),
830 CHAR_MINUS | CHAR_ZERO..=CHAR_NINE => self.handle_number(ch),
831 CHAR_START_FALSE | CHAR_START_NULL | CHAR_START_TRUE => self.handle_literal(ch),
832 _ => {
833 return Err(());
834 }
835 };
836
837 ch = match res {
838 Ok(Cause::Completed) => {
839 return Ok(Cause::Completed);
840 }
841 Ok(Cause::Found(ch)) => ch,
842 Ok(Cause::Corrupted(ch)) => {
843 return Ok(Cause::Corrupted(ch));
844 }
845 Ok(Cause::Exhausted) => {
846 return Ok(Cause::Exhausted);
847 }
848 Err(_) => {
849 return Err(()); }
851 }
852 }
853 }
854
855 fn _print_incomplete(&mut self) -> Result<(), errors::Err> {
856 let w = self.json_writer.mut_ref();
857 w.write_all(&self.jt.processed[..self.jt.partial_close_end + 1])?;
858 for i in (0..self.jt.cur_ident_level).rev() {
859 let closing_ident = _closing_ident(self.jt.ident_levels[i]);
860 w.write_all(&[closing_ident])?;
861 }
862 w.write_all(&[CHAR_NEWLINE])?;
863 Ok(())
864 }
865
866 pub fn parse(&mut self) -> Result<(), errors::Err> {
868 let mut start = 0;
869 let mut lastb: Option<u8> = None;
870
871 loop {
872 let (read, ch) = match lastb {
873 Some(CHAR_LEFT_CURLY_BRACKET) | Some(CHAR_LEFT_SQUARE_BRACKET) => {
874 (0, lastb.unwrap())
876 }
877 _ => match self.scout() {
878 Ok(None) => {
879 break;
880 }
881 Ok(Some((read, ch))) => (read, ch),
882 Err(_) => {
883 break;
884 }
885 },
886 };
887 start = start + read - 1;
888 if lastb.is_some() {
889 start += 1;
890 }
891
892 match self.hunt(ch) {
893 Ok(Cause::Completed) => {
894 let end = start + self.jt.cur - 1;
895 let w = self.json_writer.mut_ref();
896 if self.jt.cur >= self.min_size {
897 w.write_all(&self.jt.processed[..self.jt.cur])?;
898 w.write_all(&[CHAR_NEWLINE])?;
899 if self.report_all {
900 let report = Report {
901 status: Cause::Completed,
902 start: start,
903 end: end,
904 partial_end: end,
905 };
906 report.print(&mut self.report_writer)?;
907 }
908 }
909 start = end + 1;
910 lastb = None;
911 }
912 Ok(Cause::Corrupted(ch)) => {
913 let corrupted_end = start + self.jt.cur - 1;
914 let partial_end = start + self.jt.partial_close_end;
915 if self.jt.partial_close_end >= self.min_size {
916 let report = Report {
917 status: Cause::Corrupted(ch),
918 start: start,
919 end: corrupted_end,
920 partial_end: partial_end,
921 };
922 report.print(&mut self.report_writer)?;
923 if self.fix_incomplete {
924 self._print_incomplete()?
925 }
926 }
927 start = corrupted_end + 1;
928 lastb = Some(ch);
929 }
930 Ok(Cause::Exhausted) => {
931 let corrupted_end = start + self.jt.cur - 1;
932 let partial_end = start + self.jt.partial_close_end;
933 if self.jt.partial_close_end >= self.min_size {
934 let report = Report {
935 status: Cause::Exhausted,
936 start: start,
937 end: corrupted_end,
938 partial_end: partial_end,
939 };
940 report.print(&mut self.report_writer)?;
941 if self.fix_incomplete {
942 self._print_incomplete()?
943 }
944 }
945 break;
946 }
947 Ok(Cause::Found(_)) => unreachable!(),
948 Err(_) => {
949 break;
950 }
951 };
952 self.jt.quick_clean();
953 }
954 Ok(())
955 }
956}
957
958#[cfg(test)]
959mod tests {
960 use rstest::rstest;
961 use std::fs;
962 use std::path::PathBuf;
963
964 use super::*;
965
966 fn create_carver<'a>(buf: &'a [u8]) -> Carver<'a> {
967 let reader = BufReader::new(buf);
968 let json_writer = BufWriter::new(vec![]);
969 let report_writer = BufWriter::new(vec![]);
970 let mut carver = Carver::new(
971 Reader::Local(reader),
972 Writer::Local(json_writer),
973 Writer::Local(report_writer),
974 None,
975 None,
976 );
977 carver.min_size = 0;
978 carver
979 }
980
981 fn get_buf(writer: &Writer) -> Vec<u8> {
982 let mut res_buf = match writer {
983 Writer::Local(w) => w.buffer().to_vec(),
984 _ => unreachable!(),
985 };
986 if res_buf.last() == Some(&CHAR_NEWLINE) {
987 res_buf.pop();
988 }
989 res_buf
990 }
991
992 fn parse(buf: &[u8]) -> Vec<u8> {
994 let buf_disp = String::from_utf8_lossy(buf);
995 eprintln!("### Evaluating buffer: {buf_disp}");
996 let mut carver = create_carver(buf);
997 let res = carver.parse();
998 assert!(res.is_ok());
999 let res_buf = get_buf(&carver.json_writer);
1000 let res_buf_disp = String::from_utf8_lossy(&res_buf);
1001 eprintln!("### Result is: {res_buf_disp}");
1002 res_buf
1003 }
1004
1005 fn report_incomplete(buf: &[u8], fix: bool) -> (Vec<u8>, Vec<u8>) {
1006 let buf_disp = String::from_utf8_lossy(buf);
1007 eprintln!("### Evaluating buffer: {buf_disp}");
1008 let mut carver = create_carver(buf);
1009 carver.fix_incomplete = fix;
1010 let res = carver.parse();
1011 assert!(res.is_ok());
1012 let json_buf = get_buf(&carver.json_writer);
1013 let report_buf = get_buf(&carver.report_writer);
1014 let json_buf_disp = String::from_utf8_lossy(&json_buf);
1015 let report_buf_disp = String::from_utf8_lossy(&report_buf);
1016 eprintln!("### Result is: {json_buf_disp}");
1017 eprintln!("### Report is: {report_buf_disp}");
1018 (json_buf, report_buf)
1019 }
1020
1021 fn collect(buf: &[u8]) -> Vec<String> {
1024 let buf: Vec<u8> = parse(buf);
1025 let s: String = String::from_utf8(buf)
1026 .unwrap()
1027 .trim_end_matches("\n")
1028 .to_string();
1029 let mut v: Vec<String> = vec![];
1030 for line in s.lines() {
1031 v.push(line.to_owned())
1032 }
1033 v
1034 }
1035
1036 #[test]
1037 fn test_parse_found() {
1038 let buf = "{}";
1039 assert_eq!(collect(buf.as_bytes()), [buf]);
1040 let buf = "[{}]";
1041 assert_eq!(collect(buf.as_bytes()), [buf]);
1042 let buf = "{ {} ]";
1043 assert_eq!(collect(buf.as_bytes()), ["{}"]);
1044 let buf = "{ []";
1045 assert_eq!(collect(buf.as_bytes()), ["[]"]);
1046 let buf = "hey\n{[]}";
1047 assert_eq!(collect(buf.as_bytes()), ["[]"]);
1048 let buf = "hey";
1049 assert_eq!(collect(buf.as_bytes()), vec![] as Vec<String>);
1050 let buf = "[[[[[[[{}]]]]]]]";
1051 assert_eq!(collect(buf.as_bytes()), [buf]);
1052 let buf = "I[{}]want[[]]moar";
1053 assert_eq!(collect(buf.as_bytes()), ["[{}]", "[[]]"]);
1054 let buf = r#"{"hey": "there"}"#;
1055 assert_eq!(collect(buf.as_bytes()), [buf]);
1056 let buf = r#"{"hey": "there"}{"how": "are", "you": "doing?"}"#;
1057 assert_eq!(
1058 collect(buf.as_bytes()),
1059 [r#"{"hey": "there"}"#, r#"{"how": "are", "you": "doing?"}"#,]
1060 );
1061 let buf = r#"["test", ["nested", {"json": "objs"}]]"#;
1062 assert_eq!(collect(buf.as_bytes()), [buf]);
1063 let buf = r#"[1, 2]"#;
1064 assert_eq!(collect(buf.as_bytes()), [buf]);
1065 let buf = r#"[1, {"test": -2}]"#;
1066 assert_eq!(collect(buf.as_bytes()), [buf]);
1067 let buf = r#"[1]{[-9]test: 9}"#;
1068 assert_eq!(collect(buf.as_bytes()), ["[1]", "[-9]"]);
1069 let buf = r#"{"numbers": 9, "literals": true, "lists": ["1", false, {}]}"#;
1070 assert_eq!(collect(buf.as_bytes()), [buf]);
1071 let buf = r#"[trap, [nullify, 1], {"true": true}]"#;
1072 assert_eq!(collect(buf.as_bytes()), [r#"{"true": true}"#]);
1073 let buf = r#"[1]{"key":"val": [2],[fal[3]]]"#;
1074 assert_eq!(collect(buf.as_bytes()), ["[1]", "[2]", "[3]"]);
1075 }
1076
1077 #[test]
1078 fn test_parse_fail() {
1079 let bad_buffers: Vec<&str> = vec![
1080 "hey",
1081 r#"{"hey", "there"}"#,
1082 "{:}",
1083 "{]}",
1084 r#"{9: "9"}"#,
1085 r#"{"more": "colons": "bad"}"#,
1086 r#"{"test":, "bad"}"#,
1087 r#"[:]"#,
1088 r#"["a", "b",]"#,
1089 r#"["a", "b", {": "test"}]"#,
1090 "999",
1091 r#"{999: "666"}"#,
1092 r#"[999: "666"]"#,
1093 r#"[999 , ]"#,
1094 r#"[trap]"#,
1095 r#"[nullify]"#,
1096 r#"{true: false}"#,
1097 r#"[true"#,
1098 r#"[false"#,
1099 r#"[null"#,
1100 r#"[9"#,
1101 r#"["test"#,
1102 r#"["test""#,
1103 "[{",
1104 "[",
1105 "{",
1106 ];
1107 for buf in bad_buffers {
1108 assert_eq!(collect(buf.as_bytes()), vec![] as Vec<String>);
1109 }
1110 }
1111
1112 #[test]
1113 fn test_report_incomplete() {
1114 let buf = "{";
1115 let buf_expected = "{}";
1116 let report_expected = "exhausted,0,0,0";
1117 let (buf, report) = report_incomplete(buf.as_bytes(), true);
1118 assert_eq!(buf, buf_expected.as_bytes());
1119 assert_eq!(report, report_expected.as_bytes());
1120
1121 let buf = "[{[{[[";
1122 let buf_expected = "[{}]\n\
1123 [{}]\n\
1124 [[]]";
1125 let report_expected = "corrupted,0,1,1\n\
1126 corrupted,2,3,3\n\
1127 exhausted,4,5,5";
1128 let (buf, report) = report_incomplete(buf.as_bytes(), true);
1129 assert_eq!(buf, buf_expected.as_bytes());
1130 assert_eq!(report, report_expected.as_bytes());
1131
1132 let buf = r#"{"test": {"inside": [1, 2]"#;
1133 let buf_expected = r#"{"test": {"inside": [1, 2]}}"#;
1134 let report_expected = "exhausted,0,25,25";
1135 let (buf, report) = report_incomplete(buf.as_bytes(), true);
1136 assert_eq!(buf, buf_expected.as_bytes());
1137 assert_eq!(report, report_expected.as_bytes());
1138
1139 let buf = r#"[1, 2, 3, {"test"[true, null, far{"key": "value",[9]"#;
1140 let buf_expected = "[1, 2, 3, {}]\n\
1141 []\n\
1142 {}\n\
1143 [9]";
1144 let report_expected = "corrupted,0,16,10\n\
1145 corrupted,17,31,17\n\
1146 corrupted,33,48,33";
1147 let (buf, report) = report_incomplete(buf.as_bytes(), true);
1148 assert_eq!(buf, buf_expected.as_bytes());
1149 assert_eq!(report, report_expected.as_bytes());
1150
1151 let buf = r#"[1]{"key":"val": [2],[fal[3]]]"#;
1152 let buf_expected = "[1]\n\
1153 {}\n\
1154 [2]\n\
1155 []\n\
1156 [3]";
1157 let report_expected = "corrupted,3,14,3\n\
1158 corrupted,22,25,22";
1159 let (buf, report) = report_incomplete(buf.as_bytes(), true);
1160 assert_eq!(buf, buf_expected.as_bytes());
1161 assert_eq!(report, report_expected.as_bytes());
1162 }
1163
1164 #[rstest]
1165 fn json_test_suite_success(#[files("tests/JSONTestSuite/**/y_*.json")] path: PathBuf) {
1166 let buf: Vec<u8> = fs::read(path).unwrap();
1167 let mut res_buf = buf.clone();
1168 if res_buf.last() == Some(&CHAR_NEWLINE) {
1169 res_buf.pop();
1170 }
1171 assert_eq!(parse(&buf), res_buf);
1172 }
1173
1174 #[rstest]
1175 fn json_test_suite_impl(#[files("tests/JSONTestSuite/**/i_*.json")] path: PathBuf) {
1176 let buf: Vec<u8> = fs::read(path).unwrap();
1177 parse(&buf);
1178 }
1179
1180 #[rstest]
1181 fn json_test_suite_fail(
1182 #[files("tests/JSONTestSuite/**/n_*.json")]
1183 #[files("tests/test_valid_but_no_brackets/*.json")]
1184 path: PathBuf,
1185 ) {
1186 let buf: Vec<u8> = fs::read(path).unwrap();
1187 assert_eq!(parse(&buf).len(), 0);
1188 }
1189
1190 #[rstest]
1191 fn json_test_suite_partial(#[files("tests/test_partial/*.json")] path: PathBuf) {
1192 let buf: Vec<u8> = fs::read(path).unwrap();
1193 let res = parse(&buf);
1194 assert!(res.len() > 0);
1195 assert_ne!(res, buf);
1196 }
1197}