use std::net::IpAddr;
use std::str::FromStr;
use base64::engine::general_purpose::STANDARD as BASE64;
use base64::Engine;
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
use thiserror::Error;
use super::ip_options::IpAddrOptions;
use super::{Cardinality, IntoIpv6Addr};
use crate::schema::bytes_options::BytesOptions;
use crate::schema::facet_options::FacetOptions;
use crate::schema::{
DateOptions, Facet, IndexRecordOption, JsonObjectOptions, NumericOptions, TextFieldIndexing,
TextOptions, Value,
};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::tokenizer::PreTokenizedString;
use crate::DateTime;
#[derive(Debug, PartialEq, Error)]
pub enum ValueParsingError {
#[error("Overflow error. Expected {expected}, got {json}")]
OverflowError {
expected: &'static str,
json: serde_json::Value,
},
#[error("Type error. Expected {expected}, got {json}")]
TypeError {
expected: &'static str,
json: serde_json::Value,
},
#[error("Parse error on {json}: {error}")]
ParseError {
error: String,
json: serde_json::Value,
},
#[error("Invalid base64: {base64}")]
InvalidBase64 { base64: String },
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[repr(u8)]
pub enum Type {
Str = b's',
U64 = b'u',
I64 = b'i',
F64 = b'f',
Bool = b'o',
Date = b'd',
Facet = b'h',
Bytes = b'b',
Json = b'j',
IpAddr = b'p',
}
const ALL_TYPES: [Type; 10] = [
Type::Str,
Type::U64,
Type::I64,
Type::F64,
Type::Bool,
Type::Date,
Type::Facet,
Type::Bytes,
Type::Json,
Type::IpAddr,
];
impl Type {
pub fn iter_values() -> impl Iterator<Item = Type> {
ALL_TYPES.iter().cloned()
}
pub fn to_code(&self) -> u8 {
*self as u8
}
pub fn name(&self) -> &'static str {
match self {
Type::Str => "Str",
Type::U64 => "U64",
Type::I64 => "I64",
Type::F64 => "F64",
Type::Bool => "Bool",
Type::Date => "Date",
Type::Facet => "Facet",
Type::Bytes => "Bytes",
Type::Json => "Json",
Type::IpAddr => "IpAddr",
}
}
pub fn from_code(code: u8) -> Option<Self> {
match code {
b's' => Some(Type::Str),
b'u' => Some(Type::U64),
b'i' => Some(Type::I64),
b'f' => Some(Type::F64),
b'o' => Some(Type::Bool),
b'd' => Some(Type::Date),
b'h' => Some(Type::Facet),
b'b' => Some(Type::Bytes),
b'j' => Some(Type::Json),
b'p' => Some(Type::IpAddr),
_ => None,
}
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", content = "options")]
#[serde(rename_all = "snake_case")]
pub enum FieldType {
#[serde(rename = "text")]
Str(TextOptions),
U64(NumericOptions),
I64(NumericOptions),
F64(NumericOptions),
Bool(NumericOptions),
Date(DateOptions),
Facet(FacetOptions),
Bytes(BytesOptions),
JsonObject(JsonObjectOptions),
IpAddr(IpAddrOptions),
}
impl FieldType {
pub fn value_type(&self) -> Type {
match *self {
FieldType::Str(_) => Type::Str,
FieldType::U64(_) => Type::U64,
FieldType::I64(_) => Type::I64,
FieldType::F64(_) => Type::F64,
FieldType::Bool(_) => Type::Bool,
FieldType::Date(_) => Type::Date,
FieldType::Facet(_) => Type::Facet,
FieldType::Bytes(_) => Type::Bytes,
FieldType::JsonObject(_) => Type::Json,
FieldType::IpAddr(_) => Type::IpAddr,
}
}
pub fn is_ip_addr(&self) -> bool {
matches!(self, FieldType::IpAddr(_))
}
pub fn is_date(&self) -> bool {
matches!(self, FieldType::Date(_))
}
pub fn is_indexed(&self) -> bool {
match *self {
FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(),
FieldType::U64(ref int_options)
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => int_options.is_indexed(),
FieldType::Date(ref date_options) => date_options.is_indexed(),
FieldType::Facet(ref _facet_options) => true,
FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(),
FieldType::JsonObject(ref json_object_options) => json_object_options.is_indexed(),
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_indexed(),
}
}
pub fn index_record_option(&self) -> Option<IndexRecordOption> {
match self {
FieldType::Str(text_options) => text_options
.get_indexing_options()
.map(|text_indexing| text_indexing.index_option()),
FieldType::JsonObject(json_object_options) => json_object_options
.get_text_indexing_options()
.map(|text_indexing| text_indexing.index_option()),
field_type => {
if field_type.is_indexed() {
Some(IndexRecordOption::Basic)
} else {
None
}
}
}
}
pub fn is_fast(&self) -> bool {
match *self {
FieldType::Bytes(ref bytes_options) => bytes_options.is_fast(),
FieldType::Str(ref text_options) => text_options.is_fast(),
FieldType::U64(ref int_options)
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => int_options.is_fast(),
FieldType::Date(ref date_options) => date_options.is_fast(),
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_fast(),
FieldType::Facet(_) => true,
FieldType::JsonObject(_) => false,
}
}
pub fn fastfield_cardinality(&self) -> Option<Cardinality> {
match *self {
FieldType::Bytes(ref bytes_options) => {
bytes_options.is_fast().then_some(Cardinality::SingleValue)
}
FieldType::Str(ref text_options) => {
text_options.is_fast().then_some(Cardinality::MultiValues)
}
FieldType::U64(ref int_options)
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality(),
FieldType::Date(ref date_options) => date_options.get_fastfield_cardinality(),
FieldType::Facet(_) => Some(Cardinality::MultiValues),
FieldType::JsonObject(_) => None,
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.get_fastfield_cardinality(),
}
}
pub fn has_fieldnorms(&self) -> bool {
match *self {
FieldType::Str(ref text_options) => text_options
.get_indexing_options()
.map(|options| options.fieldnorms())
.unwrap_or(false),
FieldType::U64(ref int_options)
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => int_options.fieldnorms(),
FieldType::Date(ref date_options) => date_options.fieldnorms(),
FieldType::Facet(_) => false,
FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(),
FieldType::JsonObject(ref _json_object_options) => false,
FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.fieldnorms(),
}
}
pub fn get_index_record_option(&self) -> Option<IndexRecordOption> {
match *self {
FieldType::Str(ref text_options) => text_options
.get_indexing_options()
.map(TextFieldIndexing::index_option),
FieldType::U64(ref int_options)
| FieldType::I64(ref int_options)
| FieldType::F64(ref int_options)
| FieldType::Bool(ref int_options) => {
if int_options.is_indexed() {
Some(IndexRecordOption::Basic)
} else {
None
}
}
FieldType::Date(ref date_options) => {
if date_options.is_indexed() {
Some(IndexRecordOption::Basic)
} else {
None
}
}
FieldType::Facet(ref _facet_options) => Some(IndexRecordOption::Basic),
FieldType::Bytes(ref bytes_options) => {
if bytes_options.is_indexed() {
Some(IndexRecordOption::Basic)
} else {
None
}
}
FieldType::JsonObject(ref json_obj_options) => json_obj_options
.get_text_indexing_options()
.map(TextFieldIndexing::index_option),
FieldType::IpAddr(ref ip_addr_options) => {
if ip_addr_options.is_indexed() {
Some(IndexRecordOption::Basic)
} else {
None
}
}
}
}
pub fn value_from_json(&self, json: JsonValue) -> Result<Value, ValueParsingError> {
match json {
JsonValue::String(field_text) => {
match self {
FieldType::Date(_) => {
let dt_with_fixed_tz = OffsetDateTime::parse(&field_text, &Rfc3339)
.map_err(|_err| ValueParsingError::TypeError {
expected: "rfc3339 format",
json: JsonValue::String(field_text),
})?;
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
}
FieldType::Str(_) => Ok(Value::Str(field_text)),
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
Err(ValueParsingError::TypeError {
expected: "an integer",
json: JsonValue::String(field_text),
})
}
FieldType::Bool(_) => Err(ValueParsingError::TypeError {
expected: "a boolean",
json: JsonValue::String(field_text),
}),
FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))),
FieldType::Bytes(_) => BASE64
.decode(&field_text)
.map(Value::Bytes)
.map_err(|_| ValueParsingError::InvalidBase64 { base64: field_text }),
FieldType::JsonObject(_) => Err(ValueParsingError::TypeError {
expected: "a json object",
json: JsonValue::String(field_text),
}),
FieldType::IpAddr(_) => {
let ip_addr: IpAddr = IpAddr::from_str(&field_text).map_err(|err| {
ValueParsingError::ParseError {
error: err.to_string(),
json: JsonValue::String(field_text),
}
})?;
Ok(Value::IpAddr(ip_addr.into_ipv6_addr()))
}
}
}
JsonValue::Number(field_val_num) => match self {
FieldType::I64(_) | FieldType::Date(_) => {
if let Some(field_val_i64) = field_val_num.as_i64() {
Ok(Value::I64(field_val_i64))
} else {
Err(ValueParsingError::OverflowError {
expected: "an i64 int",
json: JsonValue::Number(field_val_num),
})
}
}
FieldType::U64(_) => {
if let Some(field_val_u64) = field_val_num.as_u64() {
Ok(Value::U64(field_val_u64))
} else {
Err(ValueParsingError::OverflowError {
expected: "u64",
json: JsonValue::Number(field_val_num),
})
}
}
FieldType::F64(_) => {
if let Some(field_val_f64) = field_val_num.as_f64() {
Ok(Value::F64(field_val_f64))
} else {
Err(ValueParsingError::OverflowError {
expected: "a f64",
json: JsonValue::Number(field_val_num),
})
}
}
FieldType::Bool(_) => Err(ValueParsingError::TypeError {
expected: "a boolean",
json: JsonValue::Number(field_val_num),
}),
FieldType::Str(_) | FieldType::Facet(_) | FieldType::Bytes(_) => {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
})
}
FieldType::JsonObject(_) => Err(ValueParsingError::TypeError {
expected: "a json object",
json: JsonValue::Number(field_val_num),
}),
FieldType::IpAddr(_) => Err(ValueParsingError::TypeError {
expected: "a string with an ip addr",
json: JsonValue::Number(field_val_num),
}),
},
JsonValue::Object(json_map) => match self {
FieldType::Str(_) => {
if let Ok(tok_str_val) = serde_json::from_value::<PreTokenizedString>(
serde_json::Value::Object(json_map.clone()),
) {
Ok(Value::PreTokStr(tok_str_val))
} else {
Err(ValueParsingError::TypeError {
expected: "a string or an pretokenized string",
json: JsonValue::Object(json_map),
})
}
}
FieldType::JsonObject(_) => Ok(Value::JsonObject(json_map)),
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Object(json_map),
}),
},
JsonValue::Bool(json_bool_val) => match self {
FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)),
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Bool(json_bool_val),
}),
},
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: json.clone(),
}),
}
}
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::FieldType;
use crate::schema::field_type::ValueParsingError;
use crate::schema::{Schema, TextOptions, Type, Value, INDEXED};
use crate::time::{Date, Month, PrimitiveDateTime, Time};
use crate::tokenizer::{PreTokenizedString, Token};
use crate::{DateTime, Document};
#[test]
fn test_deserialize_json_date() {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field("date", INDEXED);
let schema = schema_builder.build();
let doc_json = r#"{"date": "2019-10-12T07:20:50.52+02:00"}"#;
let doc = schema.parse_document(doc_json).unwrap();
let date = doc.get_first(date_field).unwrap();
assert_eq!("Date(2019-10-12T05:20:50.52Z)", format!("{:?}", date));
}
#[test]
fn test_serialize_json_date() {
let mut doc = Document::new();
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field("date", INDEXED);
let schema = schema_builder.build();
let naive_date = Date::from_calendar_date(1982, Month::September, 17).unwrap();
let naive_time = Time::from_hms(13, 20, 0).unwrap();
let date_time = PrimitiveDateTime::new(naive_date, naive_time);
doc.add_date(date_field, DateTime::from_primitive(date_time));
let doc_json = schema.to_json(&doc);
assert_eq!(doc_json, r#"{"date":["1982-09-17T13:20:00Z"]}"#);
}
#[test]
fn test_bytes_value_from_json() {
let result = FieldType::Bytes(Default::default())
.value_from_json(json!("dGhpcyBpcyBhIHRlc3Q="))
.unwrap();
assert_eq!(result, Value::Bytes("this is a test".as_bytes().to_vec()));
let result = FieldType::Bytes(Default::default()).value_from_json(json!(521));
match result {
Err(ValueParsingError::TypeError { .. }) => {}
_ => panic!("Expected parse failure for wrong type"),
}
let result = FieldType::Bytes(Default::default()).value_from_json(json!("-"));
match result {
Err(ValueParsingError::InvalidBase64 { .. }) => {}
_ => panic!("Expected parse failure for invalid base64"),
}
}
#[test]
fn test_pre_tok_str_value_from_json() {
let pre_tokenized_string_json = r#"{
"text": "The Old Man",
"tokens": [
{
"offset_from": 0,
"offset_to": 3,
"position": 0,
"text": "The",
"position_length": 1
},
{
"offset_from": 4,
"offset_to": 7,
"position": 1,
"text": "Old",
"position_length": 1
},
{
"offset_from": 8,
"offset_to": 11,
"position": 2,
"text": "Man",
"position_length": 1
}
]
}"#;
let expected_value = Value::PreTokStr(PreTokenizedString {
text: String::from("The Old Man"),
tokens: vec![
Token {
offset_from: 0,
offset_to: 3,
position: 0,
text: String::from("The"),
position_length: 1,
},
Token {
offset_from: 4,
offset_to: 7,
position: 1,
text: String::from("Old"),
position_length: 1,
},
Token {
offset_from: 8,
offset_to: 11,
position: 2,
text: String::from("Man"),
position_length: 1,
},
],
});
let deserialized_value = FieldType::Str(TextOptions::default())
.value_from_json(serde_json::from_str(pre_tokenized_string_json).unwrap())
.unwrap();
assert_eq!(deserialized_value, expected_value);
let serialized_value_json = serde_json::to_string_pretty(&expected_value).unwrap();
assert_eq!(serialized_value_json, pre_tokenized_string_json);
}
#[test]
fn test_type_codes() {
for type_val in Type::iter_values() {
let code = type_val.to_code();
assert_eq!(Type::from_code(code), Some(type_val));
}
assert_eq!(Type::from_code(b'z'), None);
}
}