use std::hash::Hash;
use std::net::Ipv6Addr;
use std::{fmt, str};
use columnar::MonotonicallyMappableToU128;
use common::json_path_writer::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP_STR};
use common::JsonPathWriter;
use serde::{Deserialize, Serialize};
use super::date_time_options::DATE_TIME_PRECISION_INDEXED;
use super::{Field, Schema};
use crate::fastfield::FastValue;
use crate::json_utils::split_json_path;
use crate::schema::{Facet, Type};
use crate::DateTime;
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub struct Term {
field: Field,
serialized_value_bytes: Vec<u8>,
}
const TERM_TYPE_TAG_LEN: usize = 1;
impl Term {
#[deprecated(
note = "we want to avoid working on the serialized representation directly, replace with \
typed API calls (add more if needed) or use serde to serialize/deserialize"
)]
pub fn wrap(serialized: &[u8]) -> Term {
let field_id_bytes: [u8; 4] = serialized[0..4].try_into().unwrap();
let field_id = u32::from_be_bytes(field_id_bytes);
Term {
field: Field::from_field_id(field_id),
serialized_value_bytes: serialized[4..].to_vec(),
}
}
#[deprecated(
note = "we want to avoid working on the serialized representation directly, replace with \
typed API calls (add more if needed) or use serde to serialize/deserialize"
)]
pub fn serialized_term(&self) -> Vec<u8> {
let mut serialized = Vec::with_capacity(4 + self.serialized_value_bytes.len());
serialized.extend(self.field.field_id().to_be_bytes().as_ref());
serialized.extend_from_slice(&self.serialized_value_bytes);
serialized
}
pub fn with_capacity(capacity: usize) -> Term {
let mut data = Vec::with_capacity(TERM_TYPE_TAG_LEN + capacity);
data.resize(TERM_TYPE_TAG_LEN, 0u8);
Term {
field: Field::from_field_id(0u32),
serialized_value_bytes: data,
}
}
pub fn from_field_json_path(field: Field, json_path: &str, expand_dots_enabled: bool) -> Term {
let paths = split_json_path(json_path);
let mut json_path = JsonPathWriter::with_expand_dots(expand_dots_enabled);
for path in paths {
json_path.push(&path);
}
json_path.set_end();
let mut term = Term::with_type_and_field(Type::Json, field);
term.append_bytes(json_path.as_str().as_bytes());
term
}
pub fn get_full_path(&self, schema: &Schema) -> String {
let field = self.field();
let mut field = schema.get_field_name(field).to_string();
if let Some(json_path) = self.get_json_path() {
field.push('.');
field.push_str(&json_path);
};
field
}
pub fn get_json_path(&self) -> Option<String> {
let value = self.value();
if let Some((json_path, _)) = value.as_json() {
Some(unsafe {
std::str::from_utf8_unchecked(&json_path[..json_path.len() - 1]).to_string()
})
} else {
None
}
}
pub(crate) fn with_type_and_field(typ: Type, field: Field) -> Term {
let mut term = Self::with_capacity(8);
term.set_field_and_type(field, typ);
term
}
fn with_bytes_and_field_and_payload(typ: Type, field: Field, bytes: &[u8]) -> Term {
let mut term = Self::with_capacity(bytes.len());
term.set_field_and_type(field, typ);
term.serialized_value_bytes.extend_from_slice(bytes);
term
}
pub(crate) fn from_fast_value<T: FastValue>(field: Field, val: &T) -> Term {
let mut term = Self::with_type_and_field(T::to_type(), field);
term.set_bytes(val.to_u64().to_be_bytes().as_ref());
term
}
pub(crate) fn set_field_and_type(&mut self, field: Field, typ: Type) {
assert!(self.is_empty());
self.field = field;
self.serialized_value_bytes[0] = typ.to_code();
}
pub fn is_empty(&self) -> bool {
self.serialized_value_bytes.len() == TERM_TYPE_TAG_LEN
}
pub fn from_field_ip_addr(field: Field, ip_addr: Ipv6Addr) -> Term {
let mut term = Self::with_type_and_field(Type::IpAddr, field);
term.set_bytes(ip_addr.to_u128().to_be_bytes().as_ref());
term
}
pub fn from_field_u64(field: Field, val: u64) -> Term {
Term::from_fast_value(field, &val)
}
pub fn from_field_i64(field: Field, val: i64) -> Term {
Term::from_fast_value(field, &val)
}
pub fn from_field_f64(field: Field, val: f64) -> Term {
Term::from_fast_value(field, &val)
}
pub fn from_field_bool(field: Field, val: bool) -> Term {
Term::from_fast_value(field, &val)
}
pub fn from_field_date(field: Field, val: DateTime) -> Term {
Term::from_fast_value(field, &val)
}
pub fn from_field_date_for_search(field: Field, val: DateTime) -> Term {
Term::from_fast_value(field, &val.truncate(DATE_TIME_PRECISION_INDEXED))
}
pub fn from_facet(field: Field, facet: &Facet) -> Term {
let facet_encoded_str = facet.encoded_str();
Term::with_bytes_and_field_and_payload(Type::Facet, field, facet_encoded_str.as_bytes())
}
pub fn from_field_text(field: Field, text: &str) -> Term {
Term::with_bytes_and_field_and_payload(Type::Str, field, text.as_bytes())
}
pub fn from_field_bytes(field: Field, bytes: &[u8]) -> Term {
Term::with_bytes_and_field_and_payload(Type::Bytes, field, bytes)
}
pub fn clear_with_type(&mut self, typ: Type) {
self.truncate_value_bytes(0);
self.serialized_value_bytes[0] = typ.to_code();
}
pub fn append_type_and_fast_value<T: FastValue>(&mut self, val: T) {
self.serialized_value_bytes.push(T::to_type().to_code());
let value = val.to_u64();
self.serialized_value_bytes
.extend(value.to_be_bytes().as_ref());
}
pub fn append_type_and_str(&mut self, val: &str) {
self.serialized_value_bytes.push(Type::Str.to_code());
self.serialized_value_bytes.extend(val.as_bytes().as_ref());
}
pub fn set_bytes(&mut self, bytes: &[u8]) {
self.truncate_value_bytes(0);
self.serialized_value_bytes.extend(bytes);
}
pub fn truncate_value_bytes(&mut self, len: usize) {
self.serialized_value_bytes
.truncate(len + TERM_TYPE_TAG_LEN);
}
pub fn len_bytes(&self) -> usize {
self.serialized_value_bytes.len() - TERM_TYPE_TAG_LEN
}
#[inline]
pub fn append_bytes(&mut self, bytes: &[u8]) -> &mut [u8] {
let len_before = self.serialized_value_bytes.len();
self.serialized_value_bytes.extend_from_slice(bytes);
&mut self.serialized_value_bytes[len_before..]
}
pub fn typ(&self) -> Type {
self.value().typ()
}
pub fn field(&self) -> Field {
self.field
}
pub fn serialized_value_bytes(&self) -> &[u8] {
&self.serialized_value_bytes[TERM_TYPE_TAG_LEN..]
}
pub fn value(&self) -> ValueBytes<&[u8]> {
ValueBytes::wrap(self.serialized_value_bytes.as_ref())
}
}
#[derive(Clone)]
pub struct ValueBytes<B>(B)
where B: AsRef<[u8]>;
impl<B> ValueBytes<B>
where B: AsRef<[u8]>
{
pub fn wrap(data: B) -> ValueBytes<B> {
ValueBytes(data)
}
pub fn to_owned(&self) -> ValueBytes<Vec<u8>> {
ValueBytes(self.0.as_ref().to_vec())
}
fn typ_code(&self) -> u8 {
self.0.as_ref()[0]
}
pub fn typ(&self) -> Type {
Type::from_code(self.typ_code()).expect("The term has an invalid type code")
}
pub fn as_u64(&self) -> Option<u64> {
self.get_fast_type::<u64>()
}
fn get_fast_type<T: FastValue>(&self) -> Option<T> {
if self.typ() != T::to_type() {
return None;
}
let value_bytes = self.raw_value_bytes_payload();
let value_u64 = u64::from_be_bytes(value_bytes.try_into().ok()?);
Some(T::from_u64(value_u64))
}
pub fn as_i64(&self) -> Option<i64> {
self.get_fast_type::<i64>()
}
pub fn as_f64(&self) -> Option<f64> {
self.get_fast_type::<f64>()
}
pub fn as_bool(&self) -> Option<bool> {
self.get_fast_type::<bool>()
}
pub fn as_date(&self) -> Option<DateTime> {
self.get_fast_type::<DateTime>()
}
pub fn as_str(&self) -> Option<&str> {
if self.typ() != Type::Str {
return None;
}
str::from_utf8(self.raw_value_bytes_payload()).ok()
}
pub fn as_facet(&self) -> Option<Facet> {
if self.typ() != Type::Facet {
return None;
}
let facet_encode_str = str::from_utf8(self.raw_value_bytes_payload()).ok()?;
Some(Facet::from_encoded_string(facet_encode_str.to_string()))
}
pub fn as_bytes(&self) -> Option<&[u8]> {
if self.typ() != Type::Bytes {
return None;
}
Some(self.raw_value_bytes_payload())
}
pub fn as_ip_addr(&self) -> Option<Ipv6Addr> {
if self.typ() != Type::IpAddr {
return None;
}
let ip_u128 = u128::from_be_bytes(self.raw_value_bytes_payload().try_into().ok()?);
Some(Ipv6Addr::from_u128(ip_u128))
}
pub fn json_path_type(&self) -> Option<Type> {
let json_value_bytes = self.as_json_value_bytes()?;
Some(json_value_bytes.typ())
}
pub(crate) fn as_json(&self) -> Option<(&[u8], ValueBytes<&[u8]>)> {
if self.typ() != Type::Json {
return None;
}
let bytes = self.raw_value_bytes_payload();
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
let (json_path_bytes, term) = bytes.split_at(pos + 1);
Some((json_path_bytes, ValueBytes::wrap(term)))
}
pub(crate) fn as_json_value_bytes(&self) -> Option<ValueBytes<&[u8]>> {
if self.typ() != Type::Json {
return None;
}
let bytes = self.raw_value_bytes_payload();
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
Some(ValueBytes::wrap(&bytes[pos + 1..]))
}
pub(crate) fn raw_value_bytes_payload(&self) -> &[u8] {
&self.0.as_ref()[1..]
}
pub(crate) fn value_bytes_payload(&self) -> Vec<u8> {
if let Some(value_bytes) = self.as_json_value_bytes() {
value_bytes.raw_value_bytes_payload().to_vec()
} else {
self.raw_value_bytes_payload().to_vec()
}
}
pub fn as_serialized(&self) -> &[u8] {
self.0.as_ref()
}
fn debug_value_bytes(&self, f: &mut fmt::Formatter) -> fmt::Result {
let typ = self.typ();
write!(f, "type={typ:?}, ")?;
match typ {
Type::Str => {
let s = self.as_str();
write_opt(f, s)?;
}
Type::U64 => {
write_opt(f, self.as_u64())?;
}
Type::I64 => {
write_opt(f, self.as_i64())?;
}
Type::F64 => {
write_opt(f, self.as_f64())?;
}
Type::Bool => {
write_opt(f, self.as_bool())?;
}
Type::Date => {
write_opt(f, self.as_date())?;
}
Type::Facet => {
write_opt(f, self.as_facet())?;
}
Type::Bytes => {
write_opt(f, self.as_bytes())?;
}
Type::Json => {
if let Some((path_bytes, sub_value_bytes)) = self.as_json() {
let path = str::from_utf8(&path_bytes[..path_bytes.len() - 1])
.map_err(|_| std::fmt::Error)?;
let path_pretty = path.replace(JSON_PATH_SEGMENT_SEP_STR, ".");
write!(f, "path={path_pretty}, ")?;
sub_value_bytes.debug_value_bytes(f)?;
}
}
Type::IpAddr => {
write_opt(f, self.as_ip_addr())?;
}
}
Ok(())
}
}
fn write_opt<T: std::fmt::Debug>(f: &mut fmt::Formatter, val_opt: Option<T>) -> fmt::Result {
if let Some(val) = val_opt {
write!(f, "{val:?}")?;
}
Ok(())
}
impl fmt::Debug for Term {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let field_id = self.field.field_id();
write!(f, "Term(field={field_id}, ")?;
let value_bytes = ValueBytes::wrap(&self.serialized_value_bytes);
value_bytes.debug_value_bytes(f)?;
write!(f, ")",)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::schema::*;
#[test]
pub fn test_term_str() {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("text", STRING);
let title_field = schema_builder.add_text_field("title", STRING);
let term = Term::from_field_text(title_field, "test");
assert_eq!(term.field(), title_field);
assert_eq!(term.typ(), Type::Str);
assert_eq!(term.value().as_str(), Some("test"))
}
#[test]
pub fn test_term_u64() {
let mut schema_builder = Schema::builder();
let count_field = schema_builder.add_u64_field("count", INDEXED);
let term = Term::from_field_u64(count_field, 983u64);
assert_eq!(term.field(), count_field);
assert_eq!(term.typ(), Type::U64);
assert_eq!(term.serialized_value_bytes().len(), 8);
assert_eq!(term.value().as_u64(), Some(983u64))
}
#[test]
pub fn test_term_bool() {
let mut schema_builder = Schema::builder();
let bool_field = schema_builder.add_bool_field("bool", INDEXED);
let term = Term::from_field_bool(bool_field, true);
assert_eq!(term.field(), bool_field);
assert_eq!(term.typ(), Type::Bool);
assert_eq!(term.serialized_value_bytes().len(), 8);
assert_eq!(term.value().as_bool(), Some(true))
}
}