1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
//! Classifiers working on the input stream.
//!
//! - [`quotes`] contains the low-level [`QuoteClassifiedIterator`](`quotes::QuoteClassifiedIterator`)
//! computing basic information on which characters are escaped or within quotes.
//! - [`structural`] contains the [`StructuralIterator`](`structural::StructuralIterator`)
//! that wraps over a quote classifier to extract a stream of [`Structural`](`structural::Structural`) characters.
//! - [`depth`] contains the [`DepthIterator`](`depth::DepthIterator`) that works on top of a quote classifier
//! to provide quick fast-forwarding over the stream while keeping track of the depth.
//!
//! This base module provides the [`ResumeClassifierState`] struct common between all
//! higher-level classifiers that work on top of a [`QuoteClassifiedIterator`](`quotes::QuoteClassifiedIterator`).
//! It allows saving the state of a classifier and can be later used to resume classification
//! from a, possibly different, high-level classifier. This state's index can be pushed
//! forward.
//!
//! # Examples
//! ```rust
//! use rsonpath_lib::classification::quotes::classify_quoted_sequences;
//! use rsonpath_lib::classification::structural::{
//!     classify_structural_characters, resume_structural_classification, Structural,
//!     StructuralIterator,
//! };
//! use aligners::AlignedBytes;
//!
//! let input = AlignedBytes::new_padded(r#"{"a":[42, {}, 44]}"#.as_bytes());
//! let quote_classifier = classify_quoted_sequences(&input);
//! let mut structural_classifier = classify_structural_characters(quote_classifier);
//! structural_classifier.turn_colons_on(0);
//! structural_classifier.turn_commas_on(0);
//!
//! // Classify first two structural characters.
//! assert_eq!(
//!     structural_classifier.next(),
//!     Some(Structural::Opening(0))
//! );
//! assert_eq!(
//!     structural_classifier.next(),
//!     Some(Structural::Colon(4))
//! );
//!
//! // We stop at the first non-classified character, Opening(5).
//! let mut resume_state = structural_classifier.stop();
//! assert_eq!(resume_state.get_idx(), 5);
//!
//! // Skip 6 bytes.
//! resume_state.offset_bytes(6);
//! assert_eq!(resume_state.get_idx(), 11);
//!
//! // Resume.
//! let mut structural_classifier_2 = resume_structural_classification(resume_state);
//! assert_eq!(
//!     structural_classifier_2.next(),
//!     Some(Structural::Closing(11))
//! );
//! ```
pub mod depth;
pub mod quotes;
pub mod structural;

use crate::debug;
use quotes::{QuoteClassifiedBlock, QuoteClassifiedIterator};

/// State allowing resumption of a classifier from a particular place
/// in the input along with the stopped [`QuoteClassifiedIterator`].
pub struct ResumeClassifierState<'a, I: QuoteClassifiedIterator<'a>> {
    /// The stopped iterator.
    pub iter: I,
    /// The block at which classification was stopped.
    pub block: Option<ResumeClassifierBlockState<'a>>,
    /// Was comma classification turned on when the classification was stopped.
    pub are_commas_on: bool,
    /// Was colon classification turned on when the classification was stopped.
    pub are_colons_on: bool,
}

/// State of the block at which classification was stopped.
pub struct ResumeClassifierBlockState<'a> {
    /// Quote classified information about the block.
    pub block: QuoteClassifiedBlock<'a>,
    /// The index at which classification was stopped.
    pub idx: usize,
}

impl<'a, I: QuoteClassifiedIterator<'a>> ResumeClassifierState<'a, I> {
    /// Get the index in the original bytes input at which classification has stopped.
    #[inline(always)]
    pub fn get_idx(&self) -> usize {
        debug!(
            "iter offset: {}, block idx: {:?}",
            self.iter.get_offset(),
            self.block.as_ref().map(|b| b.idx)
        );

        self.iter.get_offset() + self.block.as_ref().map_or(0, |b| b.idx)
    }

    /// Move the state forward by `count` bytes.
    ///
    /// # Panics
    /// If the `count` is not positive.
    #[inline]
    pub fn offset_bytes(&mut self, count: isize) {
        assert!(count > 0);
        let count = count as usize;

        let remaining_in_block = self.block.as_ref().map_or(0, |b| b.block.len() - b.idx);

        match self.block.as_mut() {
            Some(b) if b.block.len() - b.idx > count => {
                b.idx += count;
            }
            _ => {
                let blocks_to_advance = (count - remaining_in_block) / I::block_size();

                let remainder = (self.block.as_ref().map_or(0, |b| b.idx) + count
                    - blocks_to_advance * I::block_size())
                    % I::block_size();

                self.iter.offset(blocks_to_advance as isize);
                let next_block = self.iter.next();

                self.block = next_block.map(|b| ResumeClassifierBlockState {
                    block: b,
                    idx: remainder,
                });
            }
        }

        debug!(
            "offset_bytes({count}) results in idx moved to {}",
            self.get_idx()
        );
    }
}