luci/analysis/token.rs
1/// A single token produced by the analysis pipeline.
2///
3/// Carries the token text (possibly transformed by filters), byte offsets into
4/// the original text, and a logical position for phrase queries and proximity
5/// scoring.
6///
7/// See [[analyzers]].
8#[derive(Clone, Debug, PartialEq, Eq)]
9pub struct Token {
10 /// The token text, possibly transformed by token filters.
11 pub text: String,
12 /// Byte offset of the token start in the original text.
13 pub offset_from: usize,
14 /// Byte offset past the token end in the original text.
15 pub offset_to: usize,
16 /// Monotonically increasing position in the token stream. Used for
17 /// phrase queries and proximity scoring.
18 pub position: u32,
19}
20
21impl Token {
22 /// Create a new token with the given text, offsets, and position.
23 pub fn new(
24 text: impl Into<String>,
25 offset_from: usize,
26 offset_to: usize,
27 position: u32,
28 ) -> Self {
29 Self {
30 text: text.into(),
31 offset_from,
32 offset_to,
33 position,
34 }
35 }
36}
37
38#[cfg(test)]
39mod tests {
40 use super::*;
41
42 #[test]
43 fn token_fields() {
44 let token = Token::new("hello", 0, 5, 0);
45 assert_eq!(token.text, "hello");
46 assert_eq!(token.offset_from, 0);
47 assert_eq!(token.offset_to, 5);
48 assert_eq!(token.position, 0);
49 }
50
51 #[test]
52 fn token_clone() {
53 let token = Token::new("world", 6, 11, 1);
54 let cloned = token.clone();
55 assert_eq!(token, cloned);
56 }
57}