use std::fmt;
use std::io::Read;
use mem_dbg::MemSize;
use crate::encoding::{geo, range, sortable_bytes};
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, MemSize)]
#[mem_size_flat]
pub enum IndexOptions {
None = 0,
Docs = 1,
DocsAndFreqs = 2,
DocsAndFreqsAndPositions = 3,
DocsAndFreqsAndPositionsAndOffsets = 4,
}
impl IndexOptions {
pub fn has_freqs(self) -> bool {
self >= IndexOptions::DocsAndFreqs
}
pub fn has_positions(self) -> bool {
self >= IndexOptions::DocsAndFreqsAndPositions
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, MemSize)]
#[mem_size_flat]
pub enum DocValuesType {
None = 0,
Numeric = 1,
Binary = 2,
Sorted = 3,
SortedNumeric = 4,
SortedSet = 5,
}
#[derive(Clone, Debug)]
pub struct FieldType {
stored: bool,
tokenized: bool,
omit_norms: bool,
index_options: IndexOptions,
doc_values_type: DocValuesType,
store_term_vectors: bool,
store_term_vector_offsets: bool,
store_term_vector_positions: bool,
store_term_vector_payloads: bool,
point_dimension_count: u32,
point_index_dimension_count: u32,
point_num_bytes: u32,
}
impl FieldType {
pub fn stored(&self) -> bool {
self.stored
}
pub fn tokenized(&self) -> bool {
self.tokenized
}
pub fn omit_norms(&self) -> bool {
self.omit_norms
}
pub fn index_options(&self) -> IndexOptions {
self.index_options
}
pub fn doc_values_type(&self) -> DocValuesType {
self.doc_values_type
}
pub fn store_term_vectors(&self) -> bool {
self.store_term_vectors
}
pub fn store_term_vector_offsets(&self) -> bool {
self.store_term_vector_offsets
}
pub fn store_term_vector_positions(&self) -> bool {
self.store_term_vector_positions
}
pub fn store_term_vector_payloads(&self) -> bool {
self.store_term_vector_payloads
}
pub fn point_dimension_count(&self) -> u32 {
self.point_dimension_count
}
pub fn point_index_dimension_count(&self) -> u32 {
self.point_index_dimension_count
}
pub fn point_num_bytes(&self) -> u32 {
self.point_num_bytes
}
pub fn is_indexed(&self) -> bool {
self.index_options != IndexOptions::None
}
pub fn has_points(&self) -> bool {
self.point_dimension_count > 0
}
pub fn has_doc_values(&self) -> bool {
self.doc_values_type != DocValuesType::None
}
pub fn has_norms(&self) -> bool {
self.is_indexed() && !self.omit_norms
}
}
#[derive(Clone, Debug)]
pub struct FieldTypeBuilder {
stored: bool,
tokenized: bool,
omit_norms: bool,
index_options: IndexOptions,
doc_values_type: DocValuesType,
store_term_vectors: bool,
store_term_vector_offsets: bool,
store_term_vector_positions: bool,
store_term_vector_payloads: bool,
point_dimension_count: u32,
point_index_dimension_count: u32,
point_num_bytes: u32,
}
impl FieldTypeBuilder {
pub fn new() -> Self {
Self {
stored: false,
tokenized: false,
omit_norms: false,
index_options: IndexOptions::None,
doc_values_type: DocValuesType::None,
store_term_vectors: false,
store_term_vector_offsets: false,
store_term_vector_positions: false,
store_term_vector_payloads: false,
point_dimension_count: 0,
point_index_dimension_count: 0,
point_num_bytes: 0,
}
}
pub fn stored(mut self, value: bool) -> Self {
self.stored = value;
self
}
pub fn tokenized(mut self, value: bool) -> Self {
self.tokenized = value;
self
}
pub fn omit_norms(mut self, value: bool) -> Self {
self.omit_norms = value;
self
}
pub fn index_options(mut self, value: IndexOptions) -> Self {
self.index_options = value;
self
}
pub fn doc_values_type(mut self, value: DocValuesType) -> Self {
self.doc_values_type = value;
self
}
pub fn store_term_vectors(mut self, value: bool) -> Self {
self.store_term_vectors = value;
self
}
pub fn store_term_vector_positions(mut self, value: bool) -> Self {
self.store_term_vector_positions = value;
self
}
pub fn store_term_vector_offsets(mut self, value: bool) -> Self {
self.store_term_vector_offsets = value;
self
}
pub fn store_term_vector_payloads(mut self, value: bool) -> Self {
self.store_term_vector_payloads = value;
self
}
pub fn point_dimensions(mut self, count: u32, index_count: u32, num_bytes: u32) -> Self {
self.point_dimension_count = count;
self.point_index_dimension_count = index_count;
self.point_num_bytes = num_bytes;
self
}
pub fn build(self) -> FieldType {
FieldType {
stored: self.stored,
tokenized: self.tokenized,
omit_norms: self.omit_norms,
index_options: self.index_options,
doc_values_type: self.doc_values_type,
store_term_vectors: self.store_term_vectors,
store_term_vector_offsets: self.store_term_vector_offsets,
store_term_vector_positions: self.store_term_vector_positions,
store_term_vector_payloads: self.store_term_vector_payloads,
point_dimension_count: self.point_dimension_count,
point_index_dimension_count: self.point_index_dimension_count,
point_num_bytes: self.point_num_bytes,
}
}
}
impl Default for FieldTypeBuilder {
fn default() -> Self {
Self::new()
}
}
pub enum FieldValue {
Text(String),
Int(i32),
Long(i64),
Float(f32),
Double(f64),
Bytes(Vec<u8>),
Reader(Box<dyn Read + Send>),
Feature {
term: String,
freq: i32,
},
}
impl fmt::Debug for FieldValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FieldValue::Text(s) => f.debug_tuple("Text").field(s).finish(),
FieldValue::Int(v) => f.debug_tuple("Int").field(v).finish(),
FieldValue::Long(v) => f.debug_tuple("Long").field(v).finish(),
FieldValue::Float(v) => f.debug_tuple("Float").field(v).finish(),
FieldValue::Double(v) => f.debug_tuple("Double").field(v).finish(),
FieldValue::Bytes(b) => f.debug_tuple("Bytes").field(b).finish(),
FieldValue::Reader(_) => f.debug_tuple("Reader").field(&"...").finish(),
FieldValue::Feature { term, freq } => f
.debug_struct("Feature")
.field("term", term)
.field("freq", freq)
.finish(),
}
}
}
#[derive(Debug)]
pub struct Field {
name: String,
field_type: FieldType,
value: FieldValue,
}
impl Field {
pub fn new(name: String, field_type: FieldType, value: FieldValue) -> Self {
Self {
name,
field_type,
value,
}
}
pub fn name(&self) -> &str {
&self.name
}
pub fn field_type(&self) -> &FieldType {
&self.field_type
}
pub fn value(&self) -> &FieldValue {
&self.value
}
pub(crate) fn value_mut(&mut self) -> &mut FieldValue {
&mut self.value
}
pub fn string_value(&self) -> Option<&str> {
match &self.value {
FieldValue::Text(s) => Some(s),
FieldValue::Feature { term, .. } => Some(term),
_ => None,
}
}
pub fn numeric_value(&self) -> Option<i64> {
match &self.value {
FieldValue::Int(v) => Some(*v as i64),
FieldValue::Long(v) => Some(*v),
FieldValue::Float(v) => Some(sortable_bytes::float_to_int(*v) as i64),
FieldValue::Double(v) => Some(sortable_bytes::double_to_long(*v)),
_ => None,
}
}
pub fn point_bytes(&self) -> Option<Vec<u8>> {
if !self.field_type.has_points() {
return None;
}
match &self.value {
FieldValue::Int(v) => Some(sortable_bytes::from_int(*v).to_vec()),
FieldValue::Long(v) => Some(sortable_bytes::from_long(*v).to_vec()),
FieldValue::Float(v) => Some(sortable_bytes::from_float(*v).to_vec()),
FieldValue::Double(v) => Some(sortable_bytes::from_double(*v).to_vec()),
FieldValue::Bytes(b) => Some(b.clone()),
_ => None,
}
}
pub fn stored_value(&self) -> Option<StoredValue> {
if !self.field_type.stored() {
return None;
}
match &self.value {
FieldValue::Text(s) => Some(StoredValue::String(s.clone())),
FieldValue::Int(v) => Some(StoredValue::Int(*v)),
FieldValue::Long(v) => Some(StoredValue::Long(*v)),
FieldValue::Float(v) => Some(StoredValue::Float(*v)),
FieldValue::Double(v) => Some(StoredValue::Double(*v)),
FieldValue::Bytes(b) => Some(StoredValue::Bytes(b.clone())),
FieldValue::Reader(_) | FieldValue::Feature { .. } => None,
}
}
}
#[derive(Clone, Debug, MemSize)]
pub enum StoredValue {
String(String),
Int(i32),
Long(i64),
Float(f32),
Double(f64),
Bytes(Vec<u8>),
}
#[derive(Debug, Default)]
pub struct Document {
pub fields: Vec<Field>,
}
impl Document {
pub fn new() -> Self {
Self { fields: Vec::new() }
}
pub fn add(&mut self, field: Field) {
self.fields.push(field);
}
}
pub fn keyword_field(name: &str, value: &str) -> Field {
let ft = FieldTypeBuilder::new()
.stored(true)
.index_options(IndexOptions::Docs)
.omit_norms(true)
.doc_values_type(DocValuesType::SortedSet)
.build();
Field::new(name.to_string(), ft, FieldValue::Text(value.to_string()))
}
pub fn long_field(name: &str, value: i64) -> Field {
let ft = FieldTypeBuilder::new()
.point_dimensions(1, 1, 8)
.doc_values_type(DocValuesType::SortedNumeric)
.build();
Field::new(name.to_string(), ft, FieldValue::Long(value))
}
fn text_field_type(term_vectors: bool) -> FieldType {
let mut b = FieldTypeBuilder::new()
.index_options(IndexOptions::DocsAndFreqsAndPositions)
.tokenized(true);
if term_vectors {
b = b
.store_term_vectors(true)
.store_term_vector_positions(true)
.store_term_vector_offsets(true);
}
b.build()
}
pub fn text_field(name: &str, value: &str) -> Field {
Field::new(
name.to_string(),
text_field_type(false),
FieldValue::Text(value.to_string()),
)
}
pub fn text_field_reader(name: &str, reader: impl Read + Send + 'static) -> Field {
Field::new(
name.to_string(),
text_field_type(false),
FieldValue::Reader(Box::new(reader)),
)
}
pub fn text_field_reader_with_term_vectors(
name: &str,
reader: impl Read + Send + 'static,
) -> Field {
Field::new(
name.to_string(),
text_field_type(true),
FieldValue::Reader(Box::new(reader)),
)
}
pub fn text_field_with_term_vectors(name: &str, value: &str) -> Field {
Field::new(
name.to_string(),
text_field_type(true),
FieldValue::Text(value.to_string()),
)
}
pub fn string_field(name: &str, value: &str, stored: bool) -> Field {
let ft = FieldTypeBuilder::new()
.stored(stored)
.index_options(IndexOptions::Docs)
.omit_norms(true)
.build();
Field::new(name.to_string(), ft, FieldValue::Text(value.to_string()))
}
pub fn int_field(name: &str, value: i32, stored: bool) -> Field {
let ft = FieldTypeBuilder::new()
.stored(stored)
.point_dimensions(1, 1, 4)
.doc_values_type(DocValuesType::SortedNumeric)
.build();
Field::new(name.to_string(), ft, FieldValue::Int(value))
}
pub fn float_field(name: &str, value: f32, stored: bool) -> Field {
let ft = FieldTypeBuilder::new()
.stored(stored)
.point_dimensions(1, 1, 4)
.doc_values_type(DocValuesType::SortedNumeric)
.build();
Field::new(name.to_string(), ft, FieldValue::Float(value))
}
pub fn double_field(name: &str, value: f64, stored: bool) -> Field {
let ft = FieldTypeBuilder::new()
.stored(stored)
.point_dimensions(1, 1, 8)
.doc_values_type(DocValuesType::SortedNumeric)
.build();
Field::new(name.to_string(), ft, FieldValue::Double(value))
}
pub fn numeric_doc_values_field(name: &str, value: i64) -> Field {
let ft = FieldTypeBuilder::new()
.doc_values_type(DocValuesType::Numeric)
.build();
Field::new(name.to_string(), ft, FieldValue::Long(value))
}
pub fn binary_doc_values_field(name: &str, value: Vec<u8>) -> Field {
let ft = FieldTypeBuilder::new()
.doc_values_type(DocValuesType::Binary)
.build();
Field::new(name.to_string(), ft, FieldValue::Bytes(value))
}
pub fn sorted_doc_values_field(name: &str, value: &[u8]) -> Field {
let ft = FieldTypeBuilder::new()
.doc_values_type(DocValuesType::Sorted)
.build();
Field::new(name.to_string(), ft, FieldValue::Bytes(value.to_vec()))
}
pub fn sorted_set_doc_values_field(name: &str, value: &str) -> Field {
let ft = FieldTypeBuilder::new()
.doc_values_type(DocValuesType::SortedSet)
.build();
Field::new(name.to_string(), ft, FieldValue::Text(value.to_string()))
}
pub fn sorted_numeric_doc_values_field(name: &str, value: i64) -> Field {
let ft = FieldTypeBuilder::new()
.doc_values_type(DocValuesType::SortedNumeric)
.build();
Field::new(name.to_string(), ft, FieldValue::Long(value))
}
fn stored_field(name: &str, value: FieldValue) -> Field {
let ft = FieldTypeBuilder::new().stored(true).build();
Field::new(name.to_string(), ft, value)
}
pub fn stored_string_field(name: &str, value: &str) -> Field {
stored_field(name, FieldValue::Text(value.to_string()))
}
pub fn stored_int_field(name: &str, value: i32) -> Field {
stored_field(name, FieldValue::Int(value))
}
pub fn stored_long_field(name: &str, value: i64) -> Field {
stored_field(name, FieldValue::Long(value))
}
pub fn stored_float_field(name: &str, value: f32) -> Field {
stored_field(name, FieldValue::Float(value))
}
pub fn stored_double_field(name: &str, value: f64) -> Field {
stored_field(name, FieldValue::Double(value))
}
pub fn stored_bytes_field(name: &str, value: Vec<u8>) -> Field {
stored_field(name, FieldValue::Bytes(value))
}
pub fn lat_lon_point(name: &str, lat: f64, lon: f64) -> Field {
let encoded_lat = geo::encode_latitude(lat);
let encoded_lon = geo::encode_longitude(lon);
let mut bytes = Vec::with_capacity(8);
bytes.extend_from_slice(&sortable_bytes::from_int(encoded_lat));
bytes.extend_from_slice(&sortable_bytes::from_int(encoded_lon));
let ft = FieldTypeBuilder::new().point_dimensions(2, 2, 4).build();
Field::new(name.to_string(), ft, FieldValue::Bytes(bytes))
}
pub fn int_range_field(name: &str, mins: &[i32], maxs: &[i32]) -> Field {
let bytes = range::encode_int(mins, maxs);
let dims = (mins.len() * 2) as u32;
let ft = FieldTypeBuilder::new()
.point_dimensions(dims, dims, 4)
.build();
Field::new(name.to_string(), ft, FieldValue::Bytes(bytes))
}
pub fn long_range_field(name: &str, mins: &[i64], maxs: &[i64]) -> Field {
let bytes = range::encode_long(mins, maxs);
let dims = (mins.len() * 2) as u32;
let ft = FieldTypeBuilder::new()
.point_dimensions(dims, dims, 8)
.build();
Field::new(name.to_string(), ft, FieldValue::Bytes(bytes))
}
pub fn float_range_field(name: &str, mins: &[f32], maxs: &[f32]) -> Field {
let bytes = range::encode_float(mins, maxs);
let dims = (mins.len() * 2) as u32;
let ft = FieldTypeBuilder::new()
.point_dimensions(dims, dims, 4)
.build();
Field::new(name.to_string(), ft, FieldValue::Bytes(bytes))
}
pub fn double_range_field(name: &str, mins: &[f64], maxs: &[f64]) -> Field {
let bytes = range::encode_double(mins, maxs);
let dims = (mins.len() * 2) as u32;
let ft = FieldTypeBuilder::new()
.point_dimensions(dims, dims, 8)
.build();
Field::new(name.to_string(), ft, FieldValue::Bytes(bytes))
}
pub fn feature_field(name: &str, feature_name: &str, feature_value: f32) -> Field {
assert!(
feature_value.is_finite() && feature_value > 0.0,
"feature_value must be finite and positive, got {feature_value}"
);
let freq = (f32::to_bits(feature_value) >> 15) as i32;
let ft = FieldTypeBuilder::new()
.omit_norms(true)
.index_options(IndexOptions::DocsAndFreqs)
.build();
Field::new(
name.to_string(),
ft,
FieldValue::Feature {
term: feature_name.to_string(),
freq,
},
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_keyword_field() {
let f = keyword_field("path", "/foo/bar.txt");
assert_eq!(f.name(), "path");
assert_eq!(f.field_type().index_options(), IndexOptions::Docs);
assert!(f.field_type().omit_norms());
assert!(!f.field_type().tokenized());
assert!(f.field_type().stored());
assert_eq!(f.field_type().doc_values_type(), DocValuesType::SortedSet);
assert_eq!(f.string_value(), Some("/foo/bar.txt"));
}
#[test]
fn test_long_field() {
let f = long_field("modified", 1234567890);
assert_eq!(f.name(), "modified");
assert_eq!(f.field_type().index_options(), IndexOptions::None);
assert!(!f.field_type().stored());
assert_eq!(
f.field_type().doc_values_type(),
DocValuesType::SortedNumeric
);
assert_eq!(f.field_type().point_dimension_count(), 1);
assert_eq!(f.field_type().point_num_bytes(), 8);
assert_eq!(f.numeric_value(), Some(1234567890));
}
#[test]
fn test_text_field() {
let f = text_field("contents", "hello world");
assert_eq!(f.name(), "contents");
assert_eq!(
f.field_type().index_options(),
IndexOptions::DocsAndFreqsAndPositions
);
assert!(f.field_type().tokenized());
assert!(!f.field_type().stored());
assert!(!f.field_type().omit_norms());
assert_eq!(f.field_type().doc_values_type(), DocValuesType::None);
assert_eq!(f.string_value(), Some("hello world"));
}
#[test]
fn test_document() {
let mut doc = Document::new();
doc.add(keyword_field("path", "/foo.txt"));
doc.add(long_field("modified", 100));
doc.add(text_field("contents", "hello"));
assert_len_eq_x!(&doc.fields, 3);
}
#[test]
fn test_point_bytes() {
let f = long_field("modified", 42);
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 8);
assert_eq!(pb, [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A]);
}
#[test]
fn test_stored_value() {
let f = keyword_field("path", "/foo.txt");
assert_some!(f.stored_value());
let f = text_field("contents", "hello");
assert_none!(f.stored_value()); }
#[test]
fn test_index_options_ordering() {
assert_lt!(IndexOptions::None, IndexOptions::Docs);
assert_lt!(IndexOptions::Docs, IndexOptions::DocsAndFreqs);
assert_lt!(
IndexOptions::DocsAndFreqs,
IndexOptions::DocsAndFreqsAndPositions
);
assert_lt!(
IndexOptions::DocsAndFreqsAndPositions,
IndexOptions::DocsAndFreqsAndPositionsAndOffsets
);
}
#[test]
fn test_field_type_helpers() {
let ft_keyword = keyword_field("x", "y").field_type().clone();
assert!(ft_keyword.is_indexed());
assert!(!ft_keyword.has_points());
assert!(ft_keyword.has_doc_values());
assert!(!ft_keyword.has_norms());
let ft_long = long_field("x", 1).field_type().clone();
assert!(!ft_long.is_indexed());
assert!(ft_long.has_points());
assert!(ft_long.has_doc_values());
let ft_text = text_field("x", "y").field_type().clone();
assert!(ft_text.is_indexed());
assert!(!ft_text.has_points());
assert!(!ft_text.has_doc_values());
assert!(ft_text.has_norms());
}
#[test]
fn test_string_field() {
let f = string_field("title", "hello", true);
assert_eq!(f.name(), "title");
assert_eq!(f.field_type().index_options(), IndexOptions::Docs);
assert!(f.field_type().omit_norms());
assert!(!f.field_type().tokenized());
assert!(f.field_type().stored());
assert_eq!(f.field_type().doc_values_type(), DocValuesType::None);
assert_eq!(f.string_value(), Some("hello"));
let f_unstored = string_field("tag", "rust", false);
assert!(!f_unstored.field_type().stored());
assert_none!(f_unstored.stored_value());
}
#[test]
fn test_int_field() {
let f = int_field("size", 42, true);
assert_eq!(f.name(), "size");
assert!(f.field_type().stored());
assert_eq!(f.field_type().point_dimension_count(), 1);
assert_eq!(f.field_type().point_num_bytes(), 4);
assert_eq!(
f.field_type().doc_values_type(),
DocValuesType::SortedNumeric
);
assert_eq!(f.numeric_value(), Some(42));
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 4);
assert_eq!(pb, sortable_bytes::from_int(42).to_vec());
if let Some(StoredValue::Int(v)) = f.stored_value() {
assert_eq!(v, 42);
} else {
panic!("expected StoredValue::Int");
}
}
#[test]
fn test_float_field() {
let f = float_field("score", 1.5, true);
assert_eq!(f.name(), "score");
assert!(f.field_type().stored());
assert_eq!(f.field_type().point_dimension_count(), 1);
assert_eq!(f.field_type().point_num_bytes(), 4);
assert_eq!(
f.field_type().doc_values_type(),
DocValuesType::SortedNumeric
);
assert_eq!(
f.numeric_value(),
Some(sortable_bytes::float_to_int(1.5) as i64)
);
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 4);
assert_eq!(pb, sortable_bytes::from_float(1.5).to_vec());
if let Some(StoredValue::Float(v)) = f.stored_value() {
assert_eq!(v, 1.5);
} else {
panic!("expected StoredValue::Float");
}
}
#[test]
fn test_double_field() {
let f = double_field("rating", 9.87, true);
assert_eq!(f.name(), "rating");
assert!(f.field_type().stored());
assert_eq!(f.field_type().point_dimension_count(), 1);
assert_eq!(f.field_type().point_num_bytes(), 8);
assert_eq!(
f.field_type().doc_values_type(),
DocValuesType::SortedNumeric
);
assert_eq!(
f.numeric_value(),
Some(sortable_bytes::double_to_long(9.87))
);
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 8);
assert_eq!(pb, sortable_bytes::from_double(9.87).to_vec());
if let Some(StoredValue::Double(v)) = f.stored_value() {
assert_eq!(v, 9.87);
} else {
panic!("expected StoredValue::Double");
}
}
#[test]
fn test_stored_field_variants() {
let f = stored_string_field("notes", "hello");
assert!(f.field_type().stored());
assert!(!f.field_type().is_indexed());
assert!(!f.field_type().has_points());
if let Some(StoredValue::String(s)) = f.stored_value() {
assert_eq!(s, "hello");
} else {
panic!("expected StoredValue::String");
}
let f = stored_int_field("count", 99);
if let Some(StoredValue::Int(v)) = f.stored_value() {
assert_eq!(v, 99);
} else {
panic!("expected StoredValue::Int");
}
let f = stored_long_field("big", 123456789);
if let Some(StoredValue::Long(v)) = f.stored_value() {
assert_eq!(v, 123456789);
} else {
panic!("expected StoredValue::Long");
}
let f = stored_float_field("ratio", 1.5);
if let Some(StoredValue::Float(v)) = f.stored_value() {
assert_eq!(v, 1.5);
} else {
panic!("expected StoredValue::Float");
}
let f = stored_double_field("precise", 7.654);
if let Some(StoredValue::Double(v)) = f.stored_value() {
assert_eq!(v, 7.654);
} else {
panic!("expected StoredValue::Double");
}
let f = stored_bytes_field("raw", vec![1, 2, 3]);
if let Some(StoredValue::Bytes(b)) = f.stored_value() {
assert_eq!(b, vec![1, 2, 3]);
} else {
panic!("expected StoredValue::Bytes");
}
}
#[test]
fn test_numeric_doc_values_field() {
let f = numeric_doc_values_field("count", 42);
assert_eq!(f.name(), "count");
assert_eq!(f.field_type().doc_values_type(), DocValuesType::Numeric);
assert!(!f.field_type().stored());
assert!(!f.field_type().is_indexed());
assert!(!f.field_type().has_points());
assert_eq!(f.numeric_value(), Some(42));
}
#[test]
fn test_binary_doc_values_field() {
let f = binary_doc_values_field("payload", vec![1, 2, 3]);
assert_eq!(f.name(), "payload");
assert_eq!(f.field_type().doc_values_type(), DocValuesType::Binary);
assert!(!f.field_type().stored());
assert!(!f.field_type().is_indexed());
assert!(!f.field_type().has_points());
if let FieldValue::Bytes(b) = f.value() {
assert_eq!(b, &[1, 2, 3]);
} else {
panic!("expected FieldValue::Bytes");
}
}
#[test]
fn test_sorted_doc_values_field() {
let f = sorted_doc_values_field("category", b"animals");
assert_eq!(f.name(), "category");
assert_eq!(f.field_type().doc_values_type(), DocValuesType::Sorted);
assert!(!f.field_type().stored());
assert!(!f.field_type().is_indexed());
assert!(!f.field_type().has_points());
}
#[test]
fn test_sorted_set_doc_values_field() {
let f = sorted_set_doc_values_field("tag", "rust");
assert_eq!(f.name(), "tag");
assert_eq!(f.field_type().doc_values_type(), DocValuesType::SortedSet);
assert!(!f.field_type().stored());
assert!(!f.field_type().is_indexed());
assert!(!f.field_type().has_points());
}
#[test]
fn test_sorted_numeric_doc_values_field() {
let f = sorted_numeric_doc_values_field("timestamp", 1000);
assert_eq!(f.name(), "timestamp");
assert_eq!(
f.field_type().doc_values_type(),
DocValuesType::SortedNumeric
);
assert!(!f.field_type().stored());
assert!(!f.field_type().is_indexed());
assert!(!f.field_type().has_points());
assert_eq!(f.numeric_value(), Some(1000));
}
#[test]
fn test_field_type_builder_defaults() {
let ft = FieldTypeBuilder::new().build();
assert!(!ft.stored());
assert!(!ft.tokenized());
assert!(!ft.omit_norms());
assert_eq!(ft.index_options(), IndexOptions::None);
assert_eq!(ft.doc_values_type(), DocValuesType::None);
assert!(!ft.store_term_vectors());
assert!(!ft.store_term_vector_offsets());
assert!(!ft.store_term_vector_positions());
assert!(!ft.store_term_vector_payloads());
assert_eq!(ft.point_dimension_count(), 0);
assert_eq!(ft.point_index_dimension_count(), 0);
assert_eq!(ft.point_num_bytes(), 0);
}
#[test]
fn test_numeric_value_non_numeric() {
let f = keyword_field("path", "/foo");
assert_none!(f.numeric_value());
}
#[test]
fn test_point_bytes_non_point() {
let f = text_field("contents", "hello");
assert_none!(f.point_bytes());
}
#[test]
fn test_point_bytes_bytes_field() {
let ft = FieldTypeBuilder::new().point_dimensions(1, 1, 4).build();
let f = Field::new(
"raw_point".to_string(),
ft,
FieldValue::Bytes(vec![0x80, 0x00, 0x00, 0x2A]),
);
let pb = f.point_bytes().unwrap();
assert_eq!(pb, vec![0x80, 0x00, 0x00, 0x2A]);
}
#[test]
fn test_field_value_debug_all_variants() {
let cases: Vec<FieldValue> = vec![
FieldValue::Text("hello".to_string()),
FieldValue::Int(42),
FieldValue::Long(100),
FieldValue::Float(1.5),
FieldValue::Double(2.5),
FieldValue::Bytes(vec![1, 2]),
FieldValue::Reader(Box::new(std::io::Cursor::new(vec![]))),
FieldValue::Feature {
term: "feat".to_string(),
freq: 100,
},
];
for val in &cases {
let s = format!("{:?}", val);
assert_not_empty!(s);
}
}
#[test]
fn test_int_field_not_stored() {
let f = int_field("x", 10, false);
assert!(!f.field_type().stored());
assert_none!(f.stored_value());
assert_some!(f.point_bytes());
}
#[test]
fn test_text_field_reader() {
let f = text_field_reader("contents", std::io::Cursor::new(b"hello world".to_vec()));
assert_eq!(f.name(), "contents");
assert_eq!(
f.field_type().index_options(),
IndexOptions::DocsAndFreqsAndPositions
);
assert!(f.field_type().tokenized());
assert!(!f.field_type().stored());
assert_matches!(f.value(), FieldValue::Reader(_));
assert_none!(f.string_value());
assert_none!(f.stored_value());
assert_none!(f.point_bytes());
}
#[test]
fn test_field_value_debug() {
let reader_val = FieldValue::Reader(Box::new(std::io::Cursor::new(vec![])));
let debug_str = format!("{:?}", reader_val);
assert_contains!(debug_str, "Reader");
}
#[test]
fn test_lat_lon_point() {
let f = lat_lon_point("location", 40.7128, -74.006);
assert_eq!(f.name(), "location");
assert_eq!(f.field_type().point_dimension_count(), 2);
assert_eq!(f.field_type().point_index_dimension_count(), 2);
assert_eq!(f.field_type().point_num_bytes(), 4);
assert!(!f.field_type().stored());
assert_eq!(f.field_type().doc_values_type(), DocValuesType::None);
assert!(!f.field_type().is_indexed());
assert!(f.field_type().has_points());
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 8);
let expected_lat = sortable_bytes::from_int(geo::encode_latitude(40.7128));
let expected_lon = sortable_bytes::from_int(geo::encode_longitude(-74.006));
assert_eq!(&pb[0..4], &expected_lat);
assert_eq!(&pb[4..8], &expected_lon);
}
#[test]
fn test_int_range_field() {
let f = int_range_field("range", &[10, 20], &[30, 40]);
assert_eq!(f.name(), "range");
assert_eq!(f.field_type().point_dimension_count(), 4);
assert_eq!(f.field_type().point_index_dimension_count(), 4);
assert_eq!(f.field_type().point_num_bytes(), 4);
assert!(!f.field_type().stored());
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 16); }
#[test]
fn test_long_range_field() {
let f = long_range_field("range", &[100], &[200]);
assert_eq!(f.field_type().point_dimension_count(), 2);
assert_eq!(f.field_type().point_num_bytes(), 8);
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 16); }
#[test]
fn test_float_range_field() {
let f = float_range_field("range", &[1.0], &[2.0]);
assert_eq!(f.field_type().point_dimension_count(), 2);
assert_eq!(f.field_type().point_num_bytes(), 4);
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 8);
}
#[test]
fn test_double_range_field() {
let f = double_range_field("range", &[1.0, 2.0], &[3.0, 4.0]);
assert_eq!(f.field_type().point_dimension_count(), 4);
assert_eq!(f.field_type().point_num_bytes(), 8);
let pb = f.point_bytes().unwrap();
assert_len_eq_x!(&pb, 32); }
#[test]
fn test_feature_field() {
let f = feature_field("features", "pagerank", 1.0);
assert_eq!(f.name(), "features");
assert!(!f.field_type().tokenized());
assert!(f.field_type().omit_norms());
assert_eq!(f.field_type().index_options(), IndexOptions::DocsAndFreqs);
assert!(!f.field_type().stored());
assert!(!f.field_type().has_points());
assert_matches!(f.value(), FieldValue::Feature { term, freq }
if term == "pagerank" && *freq == 32512);
assert_eq!(f.string_value(), Some("pagerank"));
assert_none!(f.numeric_value());
assert_none!(f.stored_value());
assert_none!(f.point_bytes());
}
#[test]
fn test_feature_field_encoding_known_values() {
let f = feature_field("f", "x", 0.5);
assert_matches!(f.value(), FieldValue::Feature { freq, .. } if *freq == 32256);
let f = feature_field("f", "x", 10.0);
assert_matches!(f.value(), FieldValue::Feature { freq, .. } if *freq == 33344);
}
#[test]
#[should_panic(expected = "finite and positive")]
fn test_feature_field_zero_value() {
feature_field("f", "x", 0.0);
}
#[test]
#[should_panic(expected = "finite and positive")]
fn test_feature_field_negative_value() {
feature_field("f", "x", -1.0);
}
#[test]
#[should_panic(expected = "finite and positive")]
fn test_feature_field_nan_value() {
feature_field("f", "x", f32::NAN);
}
#[test]
fn test_feature_field_debug() {
let f = feature_field("features", "pagerank", 1.0);
let debug_str = format!("{:?}", f.value());
assert_contains!(debug_str, "Feature");
assert_contains!(debug_str, "pagerank");
}
#[test]
fn test_field_type_builder_each_setter() {
let ft = FieldTypeBuilder::new()
.stored(true)
.tokenized(true)
.omit_norms(true)
.index_options(IndexOptions::DocsAndFreqsAndPositionsAndOffsets)
.doc_values_type(DocValuesType::SortedNumeric)
.store_term_vectors(true)
.store_term_vector_positions(true)
.store_term_vector_offsets(true)
.store_term_vector_payloads(true)
.point_dimensions(3, 2, 8)
.build();
assert!(ft.stored());
assert!(ft.tokenized());
assert!(ft.omit_norms());
assert_eq!(
ft.index_options(),
IndexOptions::DocsAndFreqsAndPositionsAndOffsets
);
assert_eq!(ft.doc_values_type(), DocValuesType::SortedNumeric);
assert!(ft.store_term_vectors());
assert!(ft.store_term_vector_positions());
assert!(ft.store_term_vector_offsets());
assert!(ft.store_term_vector_payloads());
assert_eq!(ft.point_dimension_count(), 3);
assert_eq!(ft.point_index_dimension_count(), 2);
assert_eq!(ft.point_num_bytes(), 8);
}
#[test]
fn test_field_type_builder_chaining() {
let ft = FieldTypeBuilder::new()
.stored(true)
.stored(false)
.tokenized(true)
.build();
assert!(!ft.stored());
assert!(ft.tokenized());
}
#[test]
fn test_field_type_builder_default_trait() {
let ft = FieldTypeBuilder::default().build();
assert!(!ft.stored());
assert!(!ft.tokenized());
assert_eq!(ft.index_options(), IndexOptions::None);
assert_eq!(ft.doc_values_type(), DocValuesType::None);
}
#[test]
fn test_text_field_with_term_vectors() {
let f = text_field_with_term_vectors("contents", "hello world");
assert_eq!(f.name(), "contents");
assert_eq!(
f.field_type().index_options(),
IndexOptions::DocsAndFreqsAndPositions
);
assert!(f.field_type().tokenized());
assert!(!f.field_type().stored());
assert!(f.field_type().store_term_vectors());
assert!(f.field_type().store_term_vector_positions());
assert!(f.field_type().store_term_vector_offsets());
assert!(!f.field_type().store_term_vector_payloads());
assert!(f.field_type().has_norms());
assert_eq!(f.string_value(), Some("hello world"));
}
#[test]
fn test_text_field_reader_with_term_vectors() {
let reader = std::io::Cursor::new(b"hello world");
let f = text_field_reader_with_term_vectors("contents", reader);
assert_eq!(f.name(), "contents");
assert_eq!(
f.field_type().index_options(),
IndexOptions::DocsAndFreqsAndPositions
);
assert!(f.field_type().tokenized());
assert!(!f.field_type().stored());
assert!(f.field_type().store_term_vectors());
assert!(f.field_type().store_term_vector_positions());
assert!(f.field_type().store_term_vector_offsets());
assert!(!f.field_type().store_term_vector_payloads());
assert!(f.field_type().has_norms());
assert_matches!(f.value(), FieldValue::Reader(_));
}
}