1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
//! Classifiers working on the input stream.
//!
//! - [`quotes`] contains the low-level [`QuoteClassifiedIterator`](`quotes::QuoteClassifiedIterator`)
//! computing basic information on which characters are escaped or within quotes.
//! - [`structural`] contains the [`StructuralIterator`](`structural::StructuralIterator`)
//! that wraps over a quote classifier to extract a stream of [`Structural`](`structural::Structural`) characters.
//! - [`depth`] contains the [`DepthIterator`](`depth::DepthIterator`) that works on top of a quote classifier
//! to provide quick fast-forwarding over the stream while keeping track of the depth.
//!
//! This base module provides the [`ResumeClassifierState`] struct common between all
//! higher-level classifiers that work on top of a [`QuoteClassifiedIterator`](`quotes::QuoteClassifiedIterator`).
//! It allows saving the state of a classifier and can be later used to resume classification
//! from a, possibly different, high-level classifier. This state's index can be pushed
//! forward.
//!
//! # Examples
//! ```rust
//! use rsonpath::classification::quotes::classify_quoted_sequences;
//! use rsonpath::classification::structural::{
//!     classify_structural_characters, resume_structural_classification,
//!     BracketType, Structural, StructuralIterator,
//! };
//! use rsonpath::input::{Input, OwnedBytes};
//! use rsonpath::result::empty::EmptyRecorder;
//! use rsonpath::FallibleIterator;
//!
//! let json = r#"{"a":[42, {}, 44]}"#.to_owned();
//! let input = OwnedBytes::try_from(json).unwrap();
//! let iter = input.iter_blocks::<_, 64>(&EmptyRecorder);
//! let quote_classifier = classify_quoted_sequences(iter);
//! let mut structural_classifier = classify_structural_characters(quote_classifier);
//! structural_classifier.turn_colons_on(0);
//! structural_classifier.turn_commas_on(0);
//!
//! // Classify first two structural characters.
//! assert_eq!(
//!     structural_classifier.next().unwrap(),
//!     Some(Structural::Opening(BracketType::Curly, 0))
//! );
//! assert_eq!(
//!     structural_classifier.next().unwrap(),
//!     Some(Structural::Colon(4))
//! );
//!
//! // We stop at the first non-classified character, Opening(5).
//! let mut resume_state = structural_classifier.stop();
//! assert_eq!(resume_state.get_idx(), 5);
//!
//! // Skip to index 11.
//! resume_state.forward_to(11);
//! assert_eq!(resume_state.get_idx(), 11);
//!
//! // Resume.
//! let mut structural_classifier_2 = resume_structural_classification(resume_state);
//! assert_eq!(
//!     structural_classifier_2.next().unwrap(),
//!     Some(Structural::Closing(BracketType::Curly, 11))
//! );
//! ```
pub mod depth;
pub(crate) mod mask;
pub mod memmem;
pub mod quotes;
pub mod structural;

use crate::{
    debug,
    input::{error::InputError, InputBlockIterator},
};
use quotes::{QuoteClassifiedBlock, QuoteClassifiedIterator};

/// State allowing resumption of a classifier from a particular place
/// in the input along with the stopped [`QuoteClassifiedIterator`].
pub struct ResumeClassifierState<'i, I, Q, M, const N: usize>
where
    I: InputBlockIterator<'i, N>,
{
    /// The stopped iterator.
    pub iter: Q,
    /// The block at which classification was stopped.
    pub block: Option<ResumeClassifierBlockState<'i, I, M, N>>,
    /// Was comma classification turned on when the classification was stopped.
    pub are_commas_on: bool,
    /// Was colon classification turned on when the classification was stopped.
    pub are_colons_on: bool,
}

/// State of the block at which classification was stopped.
pub struct ResumeClassifierBlockState<'i, I, M, const N: usize>
where
    I: InputBlockIterator<'i, N>,
{
    /// Quote classified information about the block.
    pub block: QuoteClassifiedBlock<I::Block, M, N>,
    /// The index at which classification was stopped.
    pub idx: usize,
}

impl<'i, I, Q, M, const N: usize> ResumeClassifierState<'i, I, Q, M, N>
where
    I: InputBlockIterator<'i, N>,
    Q: QuoteClassifiedIterator<'i, I, M, N>,
{
    /// Get the index in the original bytes input at which classification has stopped.
    #[inline(always)]
    pub fn get_idx(&self) -> usize {
        debug!(
            "iter offset: {}, block idx: {:?}",
            self.iter.get_offset(),
            self.block.as_ref().map(|b| b.idx)
        );

        self.iter.get_offset() + self.block.as_ref().map_or(0, |b| b.idx)
    }

    /// Move the state forward to `index`.
    ///
    /// # Errors
    /// If the offset crosses block boundaries, then a new block is read from the underlying
    /// [`Input`](crate::input::Input) implementation, which can fail.
    ///
    /// # Panics
    /// If the `index` is not ahead of the current position of the state ([`get_idx`](ResumeClassifierState::get_idx)).
    #[inline]
    #[allow(clippy::panic_in_result_fn)]
    pub fn forward_to(&mut self, index: usize) -> Result<(), InputError> {
        let current_block_start = self.iter.get_offset();
        let current_block_idx = self.block.as_ref().map_or(0, |b| b.idx);
        let current_idx = current_block_start + current_block_idx;

        debug!(
            "Calling forward_to({index}) when the inner iter offset is {current_block_start} and block idx is {current_block_idx:?}"
        );

        // We want to move by this much forward, and delta > 0.
        assert!(index > current_idx);
        let delta = index - current_idx;

        // First we virtually pretend to move *backward*, setting the index of the current block to zero,
        // and adjust the delta to cover that distance. This makes calculations simpler.
        // Then we need to skip zero or more blocks and set our self.block to the last one we visit.
        let remaining = delta + current_block_idx;
        let blocks_to_skip = remaining / N;
        let remainder = remaining % N;

        match self.block.as_mut() {
            Some(b) if blocks_to_skip == 0 => {
                b.idx = remaining;
            }
            Some(_) => {
                self.block = self
                    .iter
                    .offset(blocks_to_skip as isize)?
                    .map(|b| ResumeClassifierBlockState {
                        block: b,
                        idx: remainder,
                    });
            }
            None => {
                self.block = self
                    .iter
                    .offset((blocks_to_skip + 1) as isize)?
                    .map(|b| ResumeClassifierBlockState {
                        block: b,
                        idx: remainder,
                    });
            }
        }

        debug!("forward_to({index}) results in idx moved to {}", self.get_idx());

        Ok(())
    }
}