1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
use super::{Token, TokenStream, Tokenizer};
use crate::schema::FACET_SEP_BYTE;
#[derive(Clone)]
pub struct FacetTokenizer;
#[derive(Debug)]
enum State {
RootFacetNotEmitted,
UpToPosition(usize),
Terminated,
}
pub struct FacetTokenStream<'a> {
text: &'a str,
state: State,
token: Token,
}
impl<'a> Tokenizer<'a> for FacetTokenizer {
type TokenStreamImpl = FacetTokenStream<'a>;
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
FacetTokenStream {
text,
state: State::RootFacetNotEmitted,
token: Token::default(),
}
}
}
impl<'a> TokenStream for FacetTokenStream<'a> {
fn advance(&mut self) -> bool {
match self.state {
State::RootFacetNotEmitted => {
self.state = if self.text.is_empty() {
State::Terminated
} else {
State::UpToPosition(0)
};
true
}
State::UpToPosition(cursor) => {
let bytes: &[u8] = self.text.as_bytes();
if let Some(next_sep_pos) = bytes[cursor + 1..]
.iter()
.cloned()
.position(|b| b == FACET_SEP_BYTE)
.map(|pos| cursor + 1 + pos)
{
let facet_part = &self.text[cursor..next_sep_pos];
self.token.text.push_str(facet_part);
self.state = State::UpToPosition(next_sep_pos);
} else {
let facet_part = &self.text[cursor..];
self.token.text.push_str(facet_part);
self.state = State::Terminated;
}
true
}
State::Terminated => false,
}
}
fn token(&self) -> &Token {
&self.token
}
fn token_mut(&mut self) -> &mut Token {
&mut self.token
}
}
#[cfg(test)]
mod tests {
use super::FacetTokenizer;
use crate::schema::Facet;
use crate::tokenizer::{Token, TokenStream, Tokenizer};
#[test]
fn test_facet_tokenizer() {
let facet = Facet::from_path(vec!["top", "a", "b"]);
let mut tokens = vec![];
{
let mut add_token = |token: &Token| {
let facet = Facet::from_encoded(token.text.as_bytes().to_owned()).unwrap();
tokens.push(format!("{}", facet));
};
FacetTokenizer
.token_stream(facet.encoded_str())
.process(&mut add_token);
}
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], "/");
assert_eq!(tokens[1], "/top");
assert_eq!(tokens[2], "/top/a");
assert_eq!(tokens[3], "/top/a/b");
}
#[test]
fn test_facet_tokenizer_root_facets() {
let facet = Facet::root();
let mut tokens = vec![];
{
let mut add_token = |token: &Token| {
let facet = Facet::from_encoded(token.text.as_bytes().to_owned()).unwrap();
tokens.push(format!("{}", facet));
};
FacetTokenizer
.token_stream(facet.encoded_str())
.process(&mut add_token);
}
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], "/");
}
}