use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Field(pub u32);
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum FieldType {
#[serde(rename = "text")]
Text,
#[serde(rename = "u64")]
U64,
#[serde(rename = "i64")]
I64,
#[serde(rename = "f64")]
F64,
#[serde(rename = "bytes")]
Bytes,
#[serde(rename = "sparse_vector")]
SparseVector,
#[serde(rename = "dense_vector")]
DenseVector,
#[serde(rename = "json")]
Json,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldEntry {
pub name: String,
pub field_type: FieldType,
pub indexed: bool,
pub stored: bool,
pub tokenizer: Option<String>,
#[serde(default)]
pub multi: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub positions: Option<PositionMode>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dense_vector_config: Option<DenseVectorConfig>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum PositionMode {
Ordinal,
TokenPosition,
Full,
}
impl PositionMode {
pub fn tracks_ordinal(&self) -> bool {
matches!(self, PositionMode::Ordinal | PositionMode::Full)
}
pub fn tracks_token_position(&self) -> bool {
matches!(self, PositionMode::TokenPosition | PositionMode::Full)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum VectorIndexType {
Flat,
#[default]
RaBitQ,
IvfRaBitQ,
ScaNN,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DenseVectorConfig {
pub dim: usize,
#[serde(default)]
pub index_type: VectorIndexType,
#[serde(default = "default_store_raw")]
pub store_raw: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub num_clusters: Option<usize>,
#[serde(default = "default_nprobe")]
pub nprobe: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub mrl_dim: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build_threshold: Option<usize>,
}
fn default_store_raw() -> bool {
true
}
fn default_nprobe() -> usize {
32
}
impl DenseVectorConfig {
pub fn new(dim: usize) -> Self {
Self {
dim,
index_type: VectorIndexType::RaBitQ,
store_raw: true,
num_clusters: None,
nprobe: 32,
mrl_dim: None,
build_threshold: None,
}
}
pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
Self {
dim,
index_type: VectorIndexType::IvfRaBitQ,
store_raw: true,
num_clusters,
nprobe,
mrl_dim: None,
build_threshold: None,
}
}
pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
Self {
dim,
index_type: VectorIndexType::ScaNN,
store_raw: true,
num_clusters,
nprobe,
mrl_dim: None,
build_threshold: None,
}
}
pub fn flat(dim: usize) -> Self {
Self {
dim,
index_type: VectorIndexType::Flat,
store_raw: true,
num_clusters: None,
nprobe: 0,
mrl_dim: None,
build_threshold: None,
}
}
pub fn without_raw(dim: usize) -> Self {
Self {
dim,
index_type: VectorIndexType::RaBitQ,
store_raw: false,
num_clusters: None,
nprobe: 32,
mrl_dim: None,
build_threshold: None,
}
}
pub fn with_mrl_dim(mut self, mrl_dim: usize) -> Self {
self.mrl_dim = Some(mrl_dim);
self
}
pub fn with_build_threshold(mut self, threshold: usize) -> Self {
self.build_threshold = Some(threshold);
self
}
pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
self.num_clusters = Some(num_clusters);
self
}
pub fn index_dim(&self) -> usize {
self.mrl_dim.unwrap_or(self.dim)
}
pub fn uses_ivf(&self) -> bool {
matches!(
self.index_type,
VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
)
}
pub fn uses_scann(&self) -> bool {
self.index_type == VectorIndexType::ScaNN
}
pub fn is_flat(&self) -> bool {
self.index_type == VectorIndexType::Flat
}
pub fn default_build_threshold(&self) -> usize {
self.build_threshold.unwrap_or(match self.index_type {
VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
})
}
pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
self.num_clusters.unwrap_or_else(|| {
let optimal = (num_vectors as f64).sqrt() as usize;
optimal.clamp(16, 4096)
})
}
}
use super::query_field_router::QueryRouterRule;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Schema {
fields: Vec<FieldEntry>,
name_to_field: HashMap<String, Field>,
#[serde(default)]
default_fields: Vec<Field>,
#[serde(default)]
query_routers: Vec<QueryRouterRule>,
}
impl Schema {
pub fn builder() -> SchemaBuilder {
SchemaBuilder::default()
}
pub fn get_field(&self, name: &str) -> Option<Field> {
self.name_to_field.get(name).copied()
}
pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
self.fields.get(field.0 as usize)
}
pub fn get_field_name(&self, field: Field) -> Option<&str> {
self.fields.get(field.0 as usize).map(|e| e.name.as_str())
}
pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
self.fields
.iter()
.enumerate()
.map(|(i, e)| (Field(i as u32), e))
}
pub fn num_fields(&self) -> usize {
self.fields.len()
}
pub fn default_fields(&self) -> &[Field] {
&self.default_fields
}
pub fn set_default_fields(&mut self, fields: Vec<Field>) {
self.default_fields = fields;
}
pub fn query_routers(&self) -> &[QueryRouterRule] {
&self.query_routers
}
pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
self.query_routers = rules;
}
}
#[derive(Debug, Default)]
pub struct SchemaBuilder {
fields: Vec<FieldEntry>,
default_fields: Vec<String>,
query_routers: Vec<QueryRouterRule>,
}
impl SchemaBuilder {
pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
self.add_field_with_tokenizer(
name,
FieldType::Text,
indexed,
stored,
Some("default".to_string()),
)
}
pub fn add_text_field_with_tokenizer(
&mut self,
name: &str,
indexed: bool,
stored: bool,
tokenizer: &str,
) -> Field {
self.add_field_with_tokenizer(
name,
FieldType::Text,
indexed,
stored,
Some(tokenizer.to_string()),
)
}
pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
self.add_field(name, FieldType::U64, indexed, stored)
}
pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
self.add_field(name, FieldType::I64, indexed, stored)
}
pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
self.add_field(name, FieldType::F64, indexed, stored)
}
pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
self.add_field(name, FieldType::Bytes, false, stored)
}
pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
self.add_field(name, FieldType::Json, false, stored)
}
pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
self.add_sparse_vector_field_with_config(
name,
indexed,
stored,
crate::structures::SparseVectorConfig::default(),
)
}
pub fn add_sparse_vector_field_with_config(
&mut self,
name: &str,
indexed: bool,
stored: bool,
config: crate::structures::SparseVectorConfig,
) -> Field {
let field = Field(self.fields.len() as u32);
self.fields.push(FieldEntry {
name: name.to_string(),
field_type: FieldType::SparseVector,
indexed,
stored,
tokenizer: None,
multi: false,
positions: None,
sparse_vector_config: Some(config),
dense_vector_config: None,
});
field
}
pub fn set_sparse_vector_config(
&mut self,
field: Field,
config: crate::structures::SparseVectorConfig,
) {
if let Some(entry) = self.fields.get_mut(field.0 as usize) {
entry.sparse_vector_config = Some(config);
}
}
pub fn add_dense_vector_field(
&mut self,
name: &str,
dim: usize,
indexed: bool,
stored: bool,
) -> Field {
self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
}
pub fn add_dense_vector_field_with_config(
&mut self,
name: &str,
indexed: bool,
stored: bool,
config: DenseVectorConfig,
) -> Field {
let field = Field(self.fields.len() as u32);
self.fields.push(FieldEntry {
name: name.to_string(),
field_type: FieldType::DenseVector,
indexed,
stored,
tokenizer: None,
multi: false,
positions: None,
sparse_vector_config: None,
dense_vector_config: Some(config),
});
field
}
fn add_field(
&mut self,
name: &str,
field_type: FieldType,
indexed: bool,
stored: bool,
) -> Field {
self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
}
fn add_field_with_tokenizer(
&mut self,
name: &str,
field_type: FieldType,
indexed: bool,
stored: bool,
tokenizer: Option<String>,
) -> Field {
self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
}
fn add_field_full(
&mut self,
name: &str,
field_type: FieldType,
indexed: bool,
stored: bool,
tokenizer: Option<String>,
multi: bool,
) -> Field {
let field = Field(self.fields.len() as u32);
self.fields.push(FieldEntry {
name: name.to_string(),
field_type,
indexed,
stored,
tokenizer,
multi,
positions: None,
sparse_vector_config: None,
dense_vector_config: None,
});
field
}
pub fn set_multi(&mut self, field: Field, multi: bool) {
if let Some(entry) = self.fields.get_mut(field.0 as usize) {
entry.multi = multi;
}
}
pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
if let Some(entry) = self.fields.get_mut(field.0 as usize) {
entry.positions = Some(mode);
}
}
pub fn set_default_fields(&mut self, field_names: Vec<String>) {
self.default_fields = field_names;
}
pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
self.query_routers = rules;
}
pub fn build(self) -> Schema {
let mut name_to_field = HashMap::new();
for (i, entry) in self.fields.iter().enumerate() {
name_to_field.insert(entry.name.clone(), Field(i as u32));
}
let default_fields: Vec<Field> = self
.default_fields
.iter()
.filter_map(|name| name_to_field.get(name).copied())
.collect();
Schema {
fields: self.fields,
name_to_field,
default_fields,
query_routers: self.query_routers,
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum FieldValue {
#[serde(rename = "text")]
Text(String),
#[serde(rename = "u64")]
U64(u64),
#[serde(rename = "i64")]
I64(i64),
#[serde(rename = "f64")]
F64(f64),
#[serde(rename = "bytes")]
Bytes(Vec<u8>),
#[serde(rename = "sparse_vector")]
SparseVector(Vec<(u32, f32)>),
#[serde(rename = "dense_vector")]
DenseVector(Vec<f32>),
#[serde(rename = "json")]
Json(serde_json::Value),
}
impl FieldValue {
pub fn as_text(&self) -> Option<&str> {
match self {
FieldValue::Text(s) => Some(s),
_ => None,
}
}
pub fn as_u64(&self) -> Option<u64> {
match self {
FieldValue::U64(v) => Some(*v),
_ => None,
}
}
pub fn as_i64(&self) -> Option<i64> {
match self {
FieldValue::I64(v) => Some(*v),
_ => None,
}
}
pub fn as_f64(&self) -> Option<f64> {
match self {
FieldValue::F64(v) => Some(*v),
_ => None,
}
}
pub fn as_bytes(&self) -> Option<&[u8]> {
match self {
FieldValue::Bytes(b) => Some(b),
_ => None,
}
}
pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
match self {
FieldValue::SparseVector(entries) => Some(entries),
_ => None,
}
}
pub fn as_dense_vector(&self) -> Option<&[f32]> {
match self {
FieldValue::DenseVector(v) => Some(v),
_ => None,
}
}
pub fn as_json(&self) -> Option<&serde_json::Value> {
match self {
FieldValue::Json(v) => Some(v),
_ => None,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Document {
field_values: Vec<(Field, FieldValue)>,
}
impl Document {
pub fn new() -> Self {
Self::default()
}
pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
self.field_values
.push((field, FieldValue::Text(value.into())));
}
pub fn add_u64(&mut self, field: Field, value: u64) {
self.field_values.push((field, FieldValue::U64(value)));
}
pub fn add_i64(&mut self, field: Field, value: i64) {
self.field_values.push((field, FieldValue::I64(value)));
}
pub fn add_f64(&mut self, field: Field, value: f64) {
self.field_values.push((field, FieldValue::F64(value)));
}
pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
self.field_values.push((field, FieldValue::Bytes(value)));
}
pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
self.field_values
.push((field, FieldValue::SparseVector(entries)));
}
pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
self.field_values
.push((field, FieldValue::DenseVector(values)));
}
pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
self.field_values.push((field, FieldValue::Json(value)));
}
pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
self.field_values
.iter()
.find(|(f, _)| *f == field)
.map(|(_, v)| v)
}
pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
self.field_values
.iter()
.filter(move |(f, _)| *f == field)
.map(|(_, v)| v)
}
pub fn field_values(&self) -> &[(Field, FieldValue)] {
&self.field_values
}
pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
use std::collections::HashMap;
let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
HashMap::new();
for (field, value) in &self.field_values {
if let Some(entry) = schema.get_field_entry(*field) {
let json_value = match value {
FieldValue::Text(s) => serde_json::Value::String(s.clone()),
FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
FieldValue::F64(n) => serde_json::json!(n),
FieldValue::Bytes(b) => {
use base64::Engine;
serde_json::Value::String(
base64::engine::general_purpose::STANDARD.encode(b),
)
}
FieldValue::SparseVector(entries) => {
let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
serde_json::json!({
"indices": indices,
"values": values
})
}
FieldValue::DenseVector(values) => {
serde_json::json!(values)
}
FieldValue::Json(v) => v.clone(),
};
field_values_map
.entry(*field)
.or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
.2
.push(json_value);
}
}
let mut map = serde_json::Map::new();
for (_field, (name, is_multi, values)) in field_values_map {
let json_value = if is_multi || values.len() > 1 {
serde_json::Value::Array(values)
} else {
values.into_iter().next().unwrap()
};
map.insert(name, json_value);
}
serde_json::Value::Object(map)
}
pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
let obj = json.as_object()?;
let mut doc = Document::new();
for (key, value) in obj {
if let Some(field) = schema.get_field(key) {
let field_entry = schema.get_field_entry(field)?;
Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
}
}
Some(doc)
}
fn add_json_value(
doc: &mut Document,
field: Field,
field_type: &FieldType,
value: &serde_json::Value,
) {
match value {
serde_json::Value::String(s) => {
if matches!(field_type, FieldType::Text) {
doc.add_text(field, s.clone());
}
}
serde_json::Value::Number(n) => {
match field_type {
FieldType::I64 => {
if let Some(i) = n.as_i64() {
doc.add_i64(field, i);
}
}
FieldType::U64 => {
if let Some(u) = n.as_u64() {
doc.add_u64(field, u);
} else if let Some(i) = n.as_i64() {
if i >= 0 {
doc.add_u64(field, i as u64);
}
}
}
FieldType::F64 => {
if let Some(f) = n.as_f64() {
doc.add_f64(field, f);
}
}
_ => {}
}
}
serde_json::Value::Array(arr) => {
for item in arr {
Self::add_json_value(doc, field, field_type, item);
}
}
serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
if let (Some(indices_val), Some(values_val)) =
(obj.get("indices"), obj.get("values"))
{
let indices: Vec<u32> = indices_val
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_u64().map(|n| n as u32))
.collect()
})
.unwrap_or_default();
let values: Vec<f32> = values_val
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_f64().map(|n| n as f32))
.collect()
})
.unwrap_or_default();
if indices.len() == values.len() {
let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
doc.add_sparse_vector(field, entries);
}
}
}
_ if matches!(field_type, FieldType::Json) => {
doc.add_json(field, value.clone());
}
serde_json::Value::Object(_) => {}
_ => {}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_schema_builder() {
let mut builder = Schema::builder();
let title = builder.add_text_field("title", true, true);
let body = builder.add_text_field("body", true, false);
let count = builder.add_u64_field("count", true, true);
let schema = builder.build();
assert_eq!(schema.get_field("title"), Some(title));
assert_eq!(schema.get_field("body"), Some(body));
assert_eq!(schema.get_field("count"), Some(count));
assert_eq!(schema.get_field("nonexistent"), None);
}
#[test]
fn test_document() {
let mut builder = Schema::builder();
let title = builder.add_text_field("title", true, true);
let count = builder.add_u64_field("count", true, true);
let _schema = builder.build();
let mut doc = Document::new();
doc.add_text(title, "Hello World");
doc.add_u64(count, 42);
assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
}
#[test]
fn test_document_serialization() {
let mut builder = Schema::builder();
let title = builder.add_text_field("title", true, true);
let count = builder.add_u64_field("count", true, true);
let _schema = builder.build();
let mut doc = Document::new();
doc.add_text(title, "Hello World");
doc.add_u64(count, 42);
let json = serde_json::to_string(&doc).unwrap();
println!("Serialized doc: {}", json);
let doc2: Document = serde_json::from_str(&json).unwrap();
assert_eq!(
doc2.field_values().len(),
2,
"Should have 2 field values after deserialization"
);
assert_eq!(
doc2.get_first(title).unwrap().as_text(),
Some("Hello World")
);
assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
}
#[test]
fn test_multivalue_field() {
let mut builder = Schema::builder();
let uris = builder.add_text_field("uris", true, true);
let title = builder.add_text_field("title", true, true);
let schema = builder.build();
let mut doc = Document::new();
doc.add_text(uris, "one");
doc.add_text(uris, "two");
doc.add_text(title, "Test Document");
assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
let all_uris: Vec<_> = doc.get_all(uris).collect();
assert_eq!(all_uris.len(), 2);
assert_eq!(all_uris[0].as_text(), Some("one"));
assert_eq!(all_uris[1].as_text(), Some("two"));
let json = doc.to_json(&schema);
let uris_json = json.get("uris").unwrap();
assert!(uris_json.is_array(), "Multi-value field should be an array");
let uris_arr = uris_json.as_array().unwrap();
assert_eq!(uris_arr.len(), 2);
assert_eq!(uris_arr[0].as_str(), Some("one"));
assert_eq!(uris_arr[1].as_str(), Some("two"));
let title_json = json.get("title").unwrap();
assert!(
title_json.is_string(),
"Single-value field should be a string"
);
assert_eq!(title_json.as_str(), Some("Test Document"));
}
#[test]
fn test_multivalue_from_json() {
let mut builder = Schema::builder();
let uris = builder.add_text_field("uris", true, true);
let title = builder.add_text_field("title", true, true);
let schema = builder.build();
let json = serde_json::json!({
"uris": ["one", "two"],
"title": "Test Document"
});
let doc = Document::from_json(&json, &schema).unwrap();
let all_uris: Vec<_> = doc.get_all(uris).collect();
assert_eq!(all_uris.len(), 2);
assert_eq!(all_uris[0].as_text(), Some("one"));
assert_eq!(all_uris[1].as_text(), Some("two"));
assert_eq!(
doc.get_first(title).unwrap().as_text(),
Some("Test Document")
);
let json_out = doc.to_json(&schema);
let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
assert_eq!(uris_out.len(), 2);
assert_eq!(uris_out[0].as_str(), Some("one"));
assert_eq!(uris_out[1].as_str(), Some("two"));
}
#[test]
fn test_multi_attribute_forces_array() {
let mut builder = Schema::builder();
let uris = builder.add_text_field("uris", true, true);
builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
let schema = builder.build();
assert!(schema.get_field_entry(uris).unwrap().multi);
assert!(!schema.get_field_entry(title).unwrap().multi);
let mut doc = Document::new();
doc.add_text(uris, "only_one");
doc.add_text(title, "Test Document");
let json = doc.to_json(&schema);
let uris_json = json.get("uris").unwrap();
assert!(
uris_json.is_array(),
"Multi field should be array even with single value"
);
let uris_arr = uris_json.as_array().unwrap();
assert_eq!(uris_arr.len(), 1);
assert_eq!(uris_arr[0].as_str(), Some("only_one"));
let title_json = json.get("title").unwrap();
assert!(
title_json.is_string(),
"Non-multi single-value field should be a string"
);
assert_eq!(title_json.as_str(), Some("Test Document"));
}
#[test]
fn test_sparse_vector_field() {
let mut builder = Schema::builder();
let embedding = builder.add_sparse_vector_field("embedding", true, true);
let title = builder.add_text_field("title", true, true);
let schema = builder.build();
assert_eq!(schema.get_field("embedding"), Some(embedding));
assert_eq!(
schema.get_field_entry(embedding).unwrap().field_type,
FieldType::SparseVector
);
let mut doc = Document::new();
doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
doc.add_text(title, "Test Document");
let entries = doc
.get_first(embedding)
.unwrap()
.as_sparse_vector()
.unwrap();
assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
let json = doc.to_json(&schema);
let embedding_json = json.get("embedding").unwrap();
assert!(embedding_json.is_object());
assert_eq!(
embedding_json
.get("indices")
.unwrap()
.as_array()
.unwrap()
.len(),
3
);
let doc2 = Document::from_json(&json, &schema).unwrap();
let entries2 = doc2
.get_first(embedding)
.unwrap()
.as_sparse_vector()
.unwrap();
assert_eq!(entries2[0].0, 0);
assert!((entries2[0].1 - 1.0).abs() < 1e-6);
assert_eq!(entries2[1].0, 5);
assert!((entries2[1].1 - 2.5).abs() < 1e-6);
assert_eq!(entries2[2].0, 10);
assert!((entries2[2].1 - 0.5).abs() < 1e-6);
}
#[test]
fn test_json_field() {
let mut builder = Schema::builder();
let metadata = builder.add_json_field("metadata", true);
let title = builder.add_text_field("title", true, true);
let schema = builder.build();
assert_eq!(schema.get_field("metadata"), Some(metadata));
assert_eq!(
schema.get_field_entry(metadata).unwrap().field_type,
FieldType::Json
);
assert!(!schema.get_field_entry(metadata).unwrap().indexed);
assert!(schema.get_field_entry(metadata).unwrap().stored);
let json_value = serde_json::json!({
"author": "John Doe",
"tags": ["rust", "search"],
"nested": {"key": "value"}
});
let mut doc = Document::new();
doc.add_json(metadata, json_value.clone());
doc.add_text(title, "Test Document");
let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
assert_eq!(stored_json, &json_value);
assert_eq!(
stored_json.get("author").unwrap().as_str(),
Some("John Doe")
);
let doc_json = doc.to_json(&schema);
let metadata_out = doc_json.get("metadata").unwrap();
assert_eq!(metadata_out, &json_value);
let doc2 = Document::from_json(&doc_json, &schema).unwrap();
let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
assert_eq!(stored_json2, &json_value);
}
#[test]
fn test_json_field_various_types() {
let mut builder = Schema::builder();
let data = builder.add_json_field("data", true);
let _schema = builder.build();
let arr_value = serde_json::json!([1, 2, 3, "four", null]);
let mut doc = Document::new();
doc.add_json(data, arr_value.clone());
assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
let str_value = serde_json::json!("just a string");
let mut doc2 = Document::new();
doc2.add_json(data, str_value.clone());
assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
let num_value = serde_json::json!(42.5);
let mut doc3 = Document::new();
doc3.add_json(data, num_value.clone());
assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
let null_value = serde_json::Value::Null;
let mut doc4 = Document::new();
doc4.add_json(data, null_value.clone());
assert_eq!(
doc4.get_first(data).unwrap().as_json().unwrap(),
&null_value
);
let bool_value = serde_json::json!(true);
let mut doc5 = Document::new();
doc5.add_json(data, bool_value.clone());
assert_eq!(
doc5.get_first(data).unwrap().as_json().unwrap(),
&bool_value
);
}
}