Skip to main content

luci/analysis/
token.rs

1/// A single token produced by the analysis pipeline.
2///
3/// Carries the token text (possibly transformed by filters), byte offsets into
4/// the original text, and a logical position for phrase queries and proximity
5/// scoring.
6///
7/// See [[analyzers]].
8#[derive(Clone, Debug, PartialEq, Eq)]
9pub struct Token {
10    /// The token text, possibly transformed by token filters.
11    pub text: String,
12    /// Byte offset of the token start in the original text.
13    pub offset_from: usize,
14    /// Byte offset past the token end in the original text.
15    pub offset_to: usize,
16    /// Monotonically increasing position in the token stream. Used for
17    /// phrase queries and proximity scoring.
18    pub position: u32,
19}
20
21impl Token {
22    /// Create a new token with the given text, offsets, and position.
23    pub fn new(
24        text: impl Into<String>,
25        offset_from: usize,
26        offset_to: usize,
27        position: u32,
28    ) -> Self {
29        Self {
30            text: text.into(),
31            offset_from,
32            offset_to,
33            position,
34        }
35    }
36}
37
38#[cfg(test)]
39mod tests {
40    use super::*;
41
42    #[test]
43    fn token_fields() {
44        let token = Token::new("hello", 0, 5, 0);
45        assert_eq!(token.text, "hello");
46        assert_eq!(token.offset_from, 0);
47        assert_eq!(token.offset_to, 5);
48        assert_eq!(token.position, 0);
49    }
50
51    #[test]
52    fn token_clone() {
53        let token = Token::new("world", 6, 11, 1);
54        let cloned = token.clone();
55        assert_eq!(token, cloned);
56    }
57}