lucisearch 0.8.0

/// A single token produced by the analysis pipeline.
///
/// Carries the token text (possibly transformed by filters), byte offsets into
/// the original text, and a logical position for phrase queries and proximity
/// scoring.
///
/// See [[analyzers]].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Token {
    /// The token text, possibly transformed by token filters.
    pub text: String,
    /// Byte offset of the token start in the original text.
    pub offset_from: usize,
    /// Byte offset past the token end in the original text.
    pub offset_to: usize,
    /// Monotonically increasing position in the token stream. Used for
    /// phrase queries and proximity scoring.
    pub position: u32,
}

impl Token {
    /// Create a new token with the given text, offsets, and position.
    pub fn new(
        text: impl Into<String>,
        offset_from: usize,
        offset_to: usize,
        position: u32,
    ) -> Self {
        Self {
            text: text.into(),
            offset_from,
            offset_to,
            position,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn token_fields() {
        let token = Token::new("hello", 0, 5, 0);
        assert_eq!(token.text, "hello");
        assert_eq!(token.offset_from, 0);
        assert_eq!(token.offset_to, 5);
        assert_eq!(token.position, 0);
    }

    #[test]
    fn token_clone() {
        let token = Token::new("world", 6, 11, 1);
        let cloned = token.clone();
        assert_eq!(token, cloned);
    }
}