1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
//! Classifiers working on the input stream.
//!
//! - [`quotes`] contains the low-level [`QuoteClassifiedIterator`](`quotes::QuoteClassifiedIterator`)
//! computing basic information on which characters are escaped or within quotes.
//! - [`structural`] contains the [`StructuralIterator`](`structural::StructuralIterator`)
//! that wraps over a quote classifier to extract a stream of [`Structural`](`structural::Structural`) characters.
//! - [`depth`] contains the [`DepthIterator`](`depth::DepthIterator`) that works on top of a quote classifier
//! to provide quick fast-forwarding over the stream while keeping track of the depth.
//!
//! This base module provides the [`ResumeClassifierState`] struct common between all
//! higher-level classifiers that work on top of a [`QuoteClassifiedIterator`](`quotes::QuoteClassifiedIterator`).
//! It allows saving the state of a classifier and can be later used to resume classification
//! from a, possibly different, high-level classifier. This state's index can be pushed
//! forward.
#[cfg(test)]
mod classifier_correctness_tests;
pub mod depth;
pub(crate) mod mask;
pub mod memmem;
pub mod quotes;
pub mod simd;
pub mod structural;
use crate::{
    debug,
    input::{error::InputError, InputBlockIterator},
};
use quotes::{QuoteClassifiedBlock, QuoteClassifiedIterator};
/// State allowing resumption of a classifier from a particular place
/// in the input along with the stopped [`QuoteClassifiedIterator`].
pub struct ResumeClassifierState<'i, I, Q, M, const N: usize>
where
    I: InputBlockIterator<'i, N>,
{
    /// The stopped iterator.
    pub iter: Q,
    /// The block at which classification was stopped.
    pub block: Option<ResumeClassifierBlockState<'i, I, M, N>>,
    /// Was comma classification turned on when the classification was stopped.
    pub are_commas_on: bool,
    /// Was colon classification turned on when the classification was stopped.
    pub are_colons_on: bool,
}
/// State of the block at which classification was stopped.
pub struct ResumeClassifierBlockState<'i, I, M, const N: usize>
where
    I: InputBlockIterator<'i, N>,
{
    /// Quote classified information about the block.
    pub block: QuoteClassifiedBlock<I::Block, M, N>,
    /// The index at which classification was stopped.
    pub idx: usize,
}
impl<'i, I, Q, M, const N: usize> ResumeClassifierState<'i, I, Q, M, N>
where
    I: InputBlockIterator<'i, N>,
    Q: QuoteClassifiedIterator<'i, I, M, N>,
{
    /// Get the index in the original bytes input at which classification has stopped.
    #[inline(always)]
    pub fn get_idx(&self) -> usize {
        debug!(
            "iter offset: {}, block idx: {:?}",
            self.iter.get_offset(),
            self.block.as_ref().map(|b| b.idx)
        );
        self.iter.get_offset() + self.block.as_ref().map_or(0, |b| b.idx)
    }
    /// Move the state forward to `index`.
    ///
    /// # Errors
    /// If the offset crosses block boundaries, then a new block is read from the underlying
    /// [`Input`](crate::input::Input) implementation, which can fail.
    ///
    /// # Panics
    /// If the `index` is not ahead of the current position of the state ([`get_idx`](ResumeClassifierState::get_idx)).
    #[inline]
    #[allow(clippy::panic_in_result_fn)]
    pub fn forward_to(&mut self, index: usize) -> Result<(), InputError> {
        let current_block_start = self.iter.get_offset();
        let current_block_idx = self.block.as_ref().map_or(0, |b| b.idx);
        let current_idx = current_block_start + current_block_idx;
        debug!(
            "Calling forward_to({index}) when the inner iter offset is {current_block_start} and block idx is {current_block_idx:?}"
        );
        // We want to move by this much forward, and delta > 0.
        assert!(index > current_idx);
        let delta = index - current_idx;
        // First we virtually pretend to move *backward*, setting the index of the current block to zero,
        // and adjust the delta to cover that distance. This makes calculations simpler.
        // Then we need to skip zero or more blocks and set our self.block to the last one we visit.
        let remaining = delta + current_block_idx;
        let blocks_to_skip = remaining / N;
        let remainder = remaining % N;
        match self.block.as_mut() {
            Some(b) if blocks_to_skip == 0 => {
                b.idx = remaining;
            }
            Some(_) => {
                self.block = self
                    .iter
                    .offset(blocks_to_skip as isize)?
                    .map(|b| ResumeClassifierBlockState {
                        block: b,
                        idx: remainder,
                    });
            }
            None => {
                self.block = self
                    .iter
                    .offset((blocks_to_skip + 1) as isize)?
                    .map(|b| ResumeClassifierBlockState {
                        block: b,
                        idx: remainder,
                    });
            }
        }
        debug!("forward_to({index}) results in idx moved to {}", self.get_idx());
        Ok(())
    }
}