summavy/termdict/fst_termdict/
streamer.rs

1use std::io;
2
3use tantivy_fst::automaton::AlwaysMatch;
4use tantivy_fst::map::{Stream, StreamBuilder};
5use tantivy_fst::{Automaton, IntoStreamer, Streamer};
6
7use super::TermDictionary;
8use crate::postings::TermInfo;
9use crate::termdict::TermOrdinal;
10
11/// `TermStreamerBuilder` is a helper object used to define
12/// a range of terms that should be streamed.
13pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
14where A: Automaton
15{
16    fst_map: &'a TermDictionary,
17    stream_builder: StreamBuilder<'a, A>,
18}
19
20impl<'a, A> TermStreamerBuilder<'a, A>
21where A: Automaton
22{
23    pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
24        TermStreamerBuilder {
25            fst_map,
26            stream_builder,
27        }
28    }
29
30    /// Limit the range to terms greater or equal to the bound
31    pub fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
32        self.stream_builder = self.stream_builder.ge(bound);
33        self
34    }
35
36    /// Limit the range to terms strictly greater than the bound
37    pub fn gt<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
38        self.stream_builder = self.stream_builder.gt(bound);
39        self
40    }
41
42    /// Limit the range to terms lesser or equal to the bound
43    pub fn le<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
44        self.stream_builder = self.stream_builder.le(bound);
45        self
46    }
47
48    /// Limit the range to terms lesser or equal to the bound
49    pub fn lt<T: AsRef<[u8]>>(mut self, bound: T) -> Self {
50        self.stream_builder = self.stream_builder.lt(bound);
51        self
52    }
53
54    /// Iterate over the range backwards.
55    pub fn backward(mut self) -> Self {
56        self.stream_builder = self.stream_builder.backward();
57        self
58    }
59
60    /// Creates the stream corresponding to the range
61    /// of terms defined using the `TermStreamerBuilder`.
62    pub fn into_stream(self) -> io::Result<TermStreamer<'a, A>> {
63        Ok(TermStreamer {
64            fst_map: self.fst_map,
65            stream: self.stream_builder.into_stream(),
66            term_ord: 0u64,
67            current_key: Vec::with_capacity(100),
68            current_value: TermInfo::default(),
69        })
70    }
71}
72
73/// `TermStreamer` acts as a cursor over a range of terms of a segment.
74/// Terms are guaranteed to be sorted.
75pub struct TermStreamer<'a, A = AlwaysMatch>
76where A: Automaton
77{
78    pub(crate) fst_map: &'a TermDictionary,
79    pub(crate) stream: Stream<'a, A>,
80    term_ord: TermOrdinal,
81    current_key: Vec<u8>,
82    current_value: TermInfo,
83}
84
85impl<'a, A> TermStreamer<'a, A>
86where A: Automaton
87{
88    /// Advance position the stream on the next item.
89    /// Before the first call to `.advance()`, the stream
90    /// is an uninitialized state.
91    pub fn advance(&mut self) -> bool {
92        if let Some((term, term_ord)) = self.stream.next() {
93            self.current_key.clear();
94            self.current_key.extend_from_slice(term);
95            self.term_ord = term_ord;
96            self.current_value = self.fst_map.term_info_from_ord(term_ord);
97            true
98        } else {
99            false
100        }
101    }
102
103    /// Returns the `TermOrdinal` of the given term.
104    ///
105    /// May panic if the called as `.advance()` as never
106    /// been called before.
107    pub fn term_ord(&self) -> TermOrdinal {
108        self.term_ord
109    }
110
111    /// Accesses the current key.
112    ///
113    /// `.key()` should return the key that was returned
114    /// by the `.next()` method.
115    ///
116    /// If the end of the stream as been reached, and `.next()`
117    /// has been called and returned `None`, `.key()` remains
118    /// the value of the last key encountered.
119    ///
120    /// Before any call to `.next()`, `.key()` returns an empty array.
121    pub fn key(&self) -> &[u8] {
122        &self.current_key
123    }
124
125    /// Accesses the current value.
126    ///
127    /// Calling `.value()` after the end of the stream will return the
128    /// last `.value()` encountered.
129    ///
130    /// # Panics
131    ///
132    /// Calling `.value()` before the first call to `.advance()` returns
133    /// `V::default()`.
134    pub fn value(&self) -> &TermInfo {
135        &self.current_value
136    }
137
138    /// Return the next `(key, value)` pair.
139    #[allow(clippy::should_implement_trait)]
140    pub fn next(&mut self) -> Option<(&[u8], &TermInfo)> {
141        if self.advance() {
142            Some((self.key(), self.value()))
143        } else {
144            None
145        }
146    }
147}