1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
use std::fmt;

use super::Field;
use crate::common;
use crate::schema::Facet;
use crate::DateTime;
use byteorder::{BigEndian, ByteOrder};
use std::str;

/// Size (in bytes) of the buffer of a int field.
const INT_TERM_LEN: usize = 4 + 8;

/// Term represents the value that the token can take.
///
/// It actually wraps a `Vec<u8>`.
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub struct Term<B = Vec<u8>>(B)
where
    B: AsRef<[u8]>;

impl Term {
    /// Builds a term given a field, and a i64-value
    ///
    /// Assuming the term has a field id of 1, and a i64 value of 3234,
    /// the Term will have 12 bytes.
    ///
    /// The first four byte are dedicated to storing the field id as a u64.
    /// The 8 following bytes are encoding the u64 value.
    pub fn from_field_i64(field: Field, val: i64) -> Term {
        let val_u64: u64 = common::i64_to_u64(val);
        Term::from_field_u64(field, val_u64)
    }

    /// Builds a term given a field, and a f64-value
    ///
    /// Assuming the term has a field id of 1, and a f64 value of 1.5,
    /// the Term will have 12 bytes.
    ///
    /// The first four byte are dedicated to storing the field id as a u64.
    /// The 8 following bytes are encoding the f64 as a u64 value.
    pub fn from_field_f64(field: Field, val: f64) -> Term {
        let val_u64: u64 = common::f64_to_u64(val);
        Term::from_field_u64(field, val_u64)
    }

    /// Builds a term given a field, and a DateTime value
    ///
    /// Assuming the term has a field id of 1, and a timestamp i64 value of 3234,
    /// the Term will have 12 bytes.
    ///
    /// The first four byte are dedicated to storing the field id as a u64.
    /// The 8 following bytes are encoding the DateTime as i64 timestamp value.
    pub fn from_field_date(field: Field, val: &DateTime) -> Term {
        let val_timestamp = val.timestamp();
        Term::from_field_i64(field, val_timestamp)
    }

    /// Creates a `Term` given a facet.
    pub fn from_facet(field: Field, facet: &Facet) -> Term {
        let bytes = facet.encoded_str().as_bytes();
        let buffer = Vec::with_capacity(4 + bytes.len());
        let mut term = Term(buffer);
        term.set_field(field);
        term.set_bytes(bytes);
        term
    }

    /// Builds a term given a field, and a string value
    ///
    /// Assuming the term has a field id of 2, and a text value of "abc",
    /// the Term will have 4 bytes.
    /// The first byte is 2, and the three following bytes are the utf-8
    /// representation of "abc".
    pub fn from_field_text(field: Field, text: &str) -> Term {
        let buffer = Vec::with_capacity(4 + text.len());
        let mut term = Term(buffer);
        term.set_field(field);
        term.set_text(text);
        term
    }

    /// Builds a term given a field, and a u64-value
    ///
    /// Assuming the term has a field id of 1, and a u64 value of 3234,
    /// the Term will have 12 bytes.
    ///
    /// The first four byte are dedicated to storing the field id as a u64.
    /// The 8 following bytes are encoding the u64 value.
    pub fn from_field_u64(field: Field, val: u64) -> Term {
        let mut term = Term(vec![0u8; INT_TERM_LEN]);
        term.set_field(field);
        term.set_u64(val);
        term
    }

    /// Creates a new Term for a given field.
    pub(crate) fn for_field(field: Field) -> Term {
        let mut term = Term(Vec::with_capacity(100));
        term.set_field(field);
        term
    }

    /// Returns the field.
    pub fn set_field(&mut self, field: Field) {
        if self.0.len() < 4 {
            self.0.resize(4, 0u8);
        }
        BigEndian::write_u32(&mut self.0[0..4], field.field_id());
    }

    /// Sets a u64 value in the term.
    ///
    /// U64 are serialized using (8-byte) BigEndian
    /// representation.
    /// The use of BigEndian has the benefit of preserving
    /// the natural order of the values.
    pub fn set_u64(&mut self, val: u64) {
        self.0.resize(INT_TERM_LEN, 0u8);
        BigEndian::write_u64(&mut self.0[4..], val);
    }

    /// Sets a `i64` value in the term.
    pub fn set_i64(&mut self, val: i64) {
        self.set_u64(common::i64_to_u64(val));
    }

    /// Sets a `f64` value in the term.
    pub fn set_f64(&mut self, val: f64) {
        self.set_u64(common::f64_to_u64(val));
    }

    fn set_bytes(&mut self, bytes: &[u8]) {
        self.0.resize(4, 0u8);
        self.0.extend(bytes);
    }

    pub(crate) fn from_field_bytes(field: Field, bytes: &[u8]) -> Term {
        let mut term = Term::for_field(field);
        term.set_bytes(bytes);
        term
    }

    /// Set the texts only, keeping the field untouched.
    pub fn set_text(&mut self, text: &str) {
        self.set_bytes(text.as_bytes());
    }
}

impl<B> Term<B>
where
    B: AsRef<[u8]>,
{
    /// Wraps a source of data
    pub fn wrap(data: B) -> Term<B> {
        Term(data)
    }

    /// Returns the field.
    pub fn field(&self) -> Field {
        Field::from_field_id(BigEndian::read_u32(&self.0.as_ref()[..4]))
    }

    /// Returns the `u64` value stored in a term.
    ///
    /// # Panics
    /// ... or returns an invalid value
    /// if the term is not a `u64` field.
    pub fn get_u64(&self) -> u64 {
        BigEndian::read_u64(&self.0.as_ref()[4..])
    }

    /// Returns the `i64` value stored in a term.
    ///
    /// # Panics
    /// ... or returns an invalid value
    /// if the term is not a `i64` field.
    pub fn get_i64(&self) -> i64 {
        common::u64_to_i64(BigEndian::read_u64(&self.0.as_ref()[4..]))
    }

    /// Returns the `f64` value stored in a term.
    ///
    /// # Panics
    /// ... or returns an invalid value
    /// if the term is not a `f64` field.
    pub fn get_f64(&self) -> f64 {
        common::u64_to_f64(BigEndian::read_u64(&self.0.as_ref()[4..]))
    }

    /// Returns the text associated with the term.
    ///
    /// # Panics
    /// If the value is not valid utf-8. This may happen
    /// if the index is corrupted or if you try to
    /// call this method on a non-string type.
    pub fn text(&self) -> &str {
        str::from_utf8(self.value_bytes()).expect("Term does not contain valid utf-8.")
    }

    /// Returns the serialized value of the term.
    /// (this does not include the field.)
    ///
    /// If the term is a string, its value is utf-8 encoded.
    /// If the term is a u64, its value is encoded according
    /// to `byteorder::LittleEndian`.
    pub fn value_bytes(&self) -> &[u8] {
        &self.0.as_ref()[4..]
    }

    /// Returns the underlying `&[u8]`
    pub fn as_slice(&self) -> &[u8] {
        self.0.as_ref()
    }
}

impl<B> AsRef<[u8]> for Term<B>
where
    B: AsRef<[u8]>,
{
    fn as_ref(&self) -> &[u8] {
        self.0.as_ref()
    }
}

impl fmt::Debug for Term {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "Term(field={},bytes={:?})",
            self.field().field_id(),
            self.value_bytes()
        )
    }
}

#[cfg(test)]
mod tests {

    use crate::schema::*;

    #[test]
    pub fn test_term() {
        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field("text", STRING);
        let title_field = schema_builder.add_text_field("title", STRING);
        let count_field = schema_builder.add_text_field("count", STRING);
        {
            let term = Term::from_field_text(title_field, "test");
            assert_eq!(term.field(), title_field);
            assert_eq!(&term.as_slice()[0..4], &[0u8, 0u8, 0u8, 1u8]);
            assert_eq!(&term.as_slice()[4..], "test".as_bytes());
        }
        {
            let term = Term::from_field_u64(count_field, 983u64);
            assert_eq!(term.field(), count_field);
            assert_eq!(&term.as_slice()[0..4], &[0u8, 0u8, 0u8, 2u8]);
            assert_eq!(term.as_slice().len(), 4 + 8);
            assert_eq!(term.as_slice()[4], 0u8);
            assert_eq!(term.as_slice()[5], 0u8);
            assert_eq!(term.as_slice()[6], 0u8);
            assert_eq!(term.as_slice()[7], 0u8);
            assert_eq!(term.as_slice()[8], 0u8);
            assert_eq!(term.as_slice()[9], 0u8);
            assert_eq!(term.as_slice()[10], (933u64 / 256u64) as u8);
            assert_eq!(term.as_slice()[11], (983u64 % 256u64) as u8);
        }
    }
}