1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct FieldEntry {
33 pub name: String,
34 pub field_type: FieldType,
35 pub indexed: bool,
36 pub stored: bool,
37 pub tokenizer: Option<String>,
39 #[serde(default)]
41 pub multi: bool,
42}
43
44use super::query_field_router::QueryRouterRule;
45
46#[derive(Debug, Clone, Default, Serialize, Deserialize)]
48pub struct Schema {
49 fields: Vec<FieldEntry>,
50 name_to_field: HashMap<String, Field>,
51 #[serde(default)]
53 default_fields: Vec<Field>,
54 #[serde(default)]
56 query_routers: Vec<QueryRouterRule>,
57}
58
59impl Schema {
60 pub fn builder() -> SchemaBuilder {
61 SchemaBuilder::default()
62 }
63
64 pub fn get_field(&self, name: &str) -> Option<Field> {
65 self.name_to_field.get(name).copied()
66 }
67
68 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
69 self.fields.get(field.0 as usize)
70 }
71
72 pub fn get_field_name(&self, field: Field) -> Option<&str> {
73 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
74 }
75
76 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
77 self.fields
78 .iter()
79 .enumerate()
80 .map(|(i, e)| (Field(i as u32), e))
81 }
82
83 pub fn num_fields(&self) -> usize {
84 self.fields.len()
85 }
86
87 pub fn default_fields(&self) -> &[Field] {
89 &self.default_fields
90 }
91
92 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
94 self.default_fields = fields;
95 }
96
97 pub fn query_routers(&self) -> &[QueryRouterRule] {
99 &self.query_routers
100 }
101
102 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
104 self.query_routers = rules;
105 }
106}
107
108#[derive(Debug, Default)]
110pub struct SchemaBuilder {
111 fields: Vec<FieldEntry>,
112 default_fields: Vec<String>,
113 query_routers: Vec<QueryRouterRule>,
114}
115
116impl SchemaBuilder {
117 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
118 self.add_field_with_tokenizer(
119 name,
120 FieldType::Text,
121 indexed,
122 stored,
123 Some("default".to_string()),
124 )
125 }
126
127 pub fn add_text_field_with_tokenizer(
128 &mut self,
129 name: &str,
130 indexed: bool,
131 stored: bool,
132 tokenizer: &str,
133 ) -> Field {
134 self.add_field_with_tokenizer(
135 name,
136 FieldType::Text,
137 indexed,
138 stored,
139 Some(tokenizer.to_string()),
140 )
141 }
142
143 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
144 self.add_field(name, FieldType::U64, indexed, stored)
145 }
146
147 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
148 self.add_field(name, FieldType::I64, indexed, stored)
149 }
150
151 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
152 self.add_field(name, FieldType::F64, indexed, stored)
153 }
154
155 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
156 self.add_field(name, FieldType::Bytes, false, stored)
157 }
158
159 fn add_field(
160 &mut self,
161 name: &str,
162 field_type: FieldType,
163 indexed: bool,
164 stored: bool,
165 ) -> Field {
166 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
167 }
168
169 fn add_field_with_tokenizer(
170 &mut self,
171 name: &str,
172 field_type: FieldType,
173 indexed: bool,
174 stored: bool,
175 tokenizer: Option<String>,
176 ) -> Field {
177 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
178 }
179
180 fn add_field_full(
181 &mut self,
182 name: &str,
183 field_type: FieldType,
184 indexed: bool,
185 stored: bool,
186 tokenizer: Option<String>,
187 multi: bool,
188 ) -> Field {
189 let field = Field(self.fields.len() as u32);
190 self.fields.push(FieldEntry {
191 name: name.to_string(),
192 field_type,
193 indexed,
194 stored,
195 tokenizer,
196 multi,
197 });
198 field
199 }
200
201 pub fn set_multi(&mut self, field: Field, multi: bool) {
203 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
204 entry.multi = multi;
205 }
206 }
207
208 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
210 self.default_fields = field_names;
211 }
212
213 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
215 self.query_routers = rules;
216 }
217
218 pub fn build(self) -> Schema {
219 let mut name_to_field = HashMap::new();
220 for (i, entry) in self.fields.iter().enumerate() {
221 name_to_field.insert(entry.name.clone(), Field(i as u32));
222 }
223
224 let default_fields: Vec<Field> = self
226 .default_fields
227 .iter()
228 .filter_map(|name| name_to_field.get(name).copied())
229 .collect();
230
231 Schema {
232 fields: self.fields,
233 name_to_field,
234 default_fields,
235 query_routers: self.query_routers,
236 }
237 }
238}
239
240#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
242pub enum FieldValue {
243 #[serde(rename = "text")]
244 Text(String),
245 #[serde(rename = "u64")]
246 U64(u64),
247 #[serde(rename = "i64")]
248 I64(i64),
249 #[serde(rename = "f64")]
250 F64(f64),
251 #[serde(rename = "bytes")]
252 Bytes(Vec<u8>),
253}
254
255impl FieldValue {
256 pub fn as_text(&self) -> Option<&str> {
257 match self {
258 FieldValue::Text(s) => Some(s),
259 _ => None,
260 }
261 }
262
263 pub fn as_u64(&self) -> Option<u64> {
264 match self {
265 FieldValue::U64(v) => Some(*v),
266 _ => None,
267 }
268 }
269
270 pub fn as_i64(&self) -> Option<i64> {
271 match self {
272 FieldValue::I64(v) => Some(*v),
273 _ => None,
274 }
275 }
276
277 pub fn as_f64(&self) -> Option<f64> {
278 match self {
279 FieldValue::F64(v) => Some(*v),
280 _ => None,
281 }
282 }
283
284 pub fn as_bytes(&self) -> Option<&[u8]> {
285 match self {
286 FieldValue::Bytes(b) => Some(b),
287 _ => None,
288 }
289 }
290}
291
292#[derive(Debug, Clone, Default, Serialize, Deserialize)]
294pub struct Document {
295 field_values: Vec<(Field, FieldValue)>,
296}
297
298impl Document {
299 pub fn new() -> Self {
300 Self::default()
301 }
302
303 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
304 self.field_values
305 .push((field, FieldValue::Text(value.into())));
306 }
307
308 pub fn add_u64(&mut self, field: Field, value: u64) {
309 self.field_values.push((field, FieldValue::U64(value)));
310 }
311
312 pub fn add_i64(&mut self, field: Field, value: i64) {
313 self.field_values.push((field, FieldValue::I64(value)));
314 }
315
316 pub fn add_f64(&mut self, field: Field, value: f64) {
317 self.field_values.push((field, FieldValue::F64(value)));
318 }
319
320 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
321 self.field_values.push((field, FieldValue::Bytes(value)));
322 }
323
324 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
325 self.field_values
326 .iter()
327 .find(|(f, _)| *f == field)
328 .map(|(_, v)| v)
329 }
330
331 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
332 self.field_values
333 .iter()
334 .filter(move |(f, _)| *f == field)
335 .map(|(_, v)| v)
336 }
337
338 pub fn field_values(&self) -> &[(Field, FieldValue)] {
339 &self.field_values
340 }
341
342 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
348 use std::collections::HashMap;
349
350 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
352 HashMap::new();
353
354 for (field, value) in &self.field_values {
355 if let Some(entry) = schema.get_field_entry(*field) {
356 let json_value = match value {
357 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
358 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
359 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
360 FieldValue::F64(n) => serde_json::json!(n),
361 FieldValue::Bytes(b) => {
362 use base64::Engine;
363 serde_json::Value::String(
364 base64::engine::general_purpose::STANDARD.encode(b),
365 )
366 }
367 };
368 field_values_map
369 .entry(*field)
370 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
371 .2
372 .push(json_value);
373 }
374 }
375
376 let mut map = serde_json::Map::new();
378 for (_field, (name, is_multi, values)) in field_values_map {
379 let json_value = if is_multi || values.len() > 1 {
380 serde_json::Value::Array(values)
381 } else {
382 values.into_iter().next().unwrap()
383 };
384 map.insert(name, json_value);
385 }
386
387 serde_json::Value::Object(map)
388 }
389
390 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
399 let obj = json.as_object()?;
400 let mut doc = Document::new();
401
402 for (key, value) in obj {
403 if let Some(field) = schema.get_field(key) {
404 let field_entry = schema.get_field_entry(field)?;
405 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
406 }
407 }
408
409 Some(doc)
410 }
411
412 fn add_json_value(
414 doc: &mut Document,
415 field: Field,
416 field_type: &FieldType,
417 value: &serde_json::Value,
418 ) {
419 match value {
420 serde_json::Value::String(s) => {
421 if matches!(field_type, FieldType::Text) {
422 doc.add_text(field, s.clone());
423 }
424 }
425 serde_json::Value::Number(n) => {
426 match field_type {
427 FieldType::I64 => {
428 if let Some(i) = n.as_i64() {
429 doc.add_i64(field, i);
430 }
431 }
432 FieldType::U64 => {
433 if let Some(u) = n.as_u64() {
434 doc.add_u64(field, u);
435 } else if let Some(i) = n.as_i64() {
436 if i >= 0 {
438 doc.add_u64(field, i as u64);
439 }
440 }
441 }
442 FieldType::F64 => {
443 if let Some(f) = n.as_f64() {
444 doc.add_f64(field, f);
445 }
446 }
447 _ => {}
448 }
449 }
450 serde_json::Value::Array(arr) => {
452 for item in arr {
453 Self::add_json_value(doc, field, field_type, item);
454 }
455 }
456 _ => {}
457 }
458 }
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464
465 #[test]
466 fn test_schema_builder() {
467 let mut builder = Schema::builder();
468 let title = builder.add_text_field("title", true, true);
469 let body = builder.add_text_field("body", true, false);
470 let count = builder.add_u64_field("count", true, true);
471 let schema = builder.build();
472
473 assert_eq!(schema.get_field("title"), Some(title));
474 assert_eq!(schema.get_field("body"), Some(body));
475 assert_eq!(schema.get_field("count"), Some(count));
476 assert_eq!(schema.get_field("nonexistent"), None);
477 }
478
479 #[test]
480 fn test_document() {
481 let mut builder = Schema::builder();
482 let title = builder.add_text_field("title", true, true);
483 let count = builder.add_u64_field("count", true, true);
484 let _schema = builder.build();
485
486 let mut doc = Document::new();
487 doc.add_text(title, "Hello World");
488 doc.add_u64(count, 42);
489
490 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
491 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
492 }
493
494 #[test]
495 fn test_document_serialization() {
496 let mut builder = Schema::builder();
497 let title = builder.add_text_field("title", true, true);
498 let count = builder.add_u64_field("count", true, true);
499 let _schema = builder.build();
500
501 let mut doc = Document::new();
502 doc.add_text(title, "Hello World");
503 doc.add_u64(count, 42);
504
505 let json = serde_json::to_string(&doc).unwrap();
507 println!("Serialized doc: {}", json);
508
509 let doc2: Document = serde_json::from_str(&json).unwrap();
511 assert_eq!(
512 doc2.field_values().len(),
513 2,
514 "Should have 2 field values after deserialization"
515 );
516 assert_eq!(
517 doc2.get_first(title).unwrap().as_text(),
518 Some("Hello World")
519 );
520 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
521 }
522
523 #[test]
524 fn test_multivalue_field() {
525 let mut builder = Schema::builder();
526 let uris = builder.add_text_field("uris", true, true);
527 let title = builder.add_text_field("title", true, true);
528 let schema = builder.build();
529
530 let mut doc = Document::new();
532 doc.add_text(uris, "one");
533 doc.add_text(uris, "two");
534 doc.add_text(title, "Test Document");
535
536 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
538
539 let all_uris: Vec<_> = doc.get_all(uris).collect();
541 assert_eq!(all_uris.len(), 2);
542 assert_eq!(all_uris[0].as_text(), Some("one"));
543 assert_eq!(all_uris[1].as_text(), Some("two"));
544
545 let json = doc.to_json(&schema);
547 let uris_json = json.get("uris").unwrap();
548 assert!(uris_json.is_array(), "Multi-value field should be an array");
549 let uris_arr = uris_json.as_array().unwrap();
550 assert_eq!(uris_arr.len(), 2);
551 assert_eq!(uris_arr[0].as_str(), Some("one"));
552 assert_eq!(uris_arr[1].as_str(), Some("two"));
553
554 let title_json = json.get("title").unwrap();
556 assert!(title_json.is_string(), "Single-value field should be a string");
557 assert_eq!(title_json.as_str(), Some("Test Document"));
558 }
559
560 #[test]
561 fn test_multivalue_from_json() {
562 let mut builder = Schema::builder();
563 let uris = builder.add_text_field("uris", true, true);
564 let title = builder.add_text_field("title", true, true);
565 let schema = builder.build();
566
567 let json = serde_json::json!({
569 "uris": ["one", "two"],
570 "title": "Test Document"
571 });
572
573 let doc = Document::from_json(&json, &schema).unwrap();
575
576 let all_uris: Vec<_> = doc.get_all(uris).collect();
578 assert_eq!(all_uris.len(), 2);
579 assert_eq!(all_uris[0].as_text(), Some("one"));
580 assert_eq!(all_uris[1].as_text(), Some("two"));
581
582 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Test Document"));
584
585 let json_out = doc.to_json(&schema);
587 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
588 assert_eq!(uris_out.len(), 2);
589 assert_eq!(uris_out[0].as_str(), Some("one"));
590 assert_eq!(uris_out[1].as_str(), Some("two"));
591 }
592
593 #[test]
594 fn test_multi_attribute_forces_array() {
595 let mut builder = Schema::builder();
598 let uris = builder.add_text_field("uris", true, true);
599 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
601 let schema = builder.build();
602
603 assert!(schema.get_field_entry(uris).unwrap().multi);
605 assert!(!schema.get_field_entry(title).unwrap().multi);
606
607 let mut doc = Document::new();
609 doc.add_text(uris, "only_one");
610 doc.add_text(title, "Test Document");
611
612 let json = doc.to_json(&schema);
614
615 let uris_json = json.get("uris").unwrap();
616 assert!(uris_json.is_array(), "Multi field should be array even with single value");
617 let uris_arr = uris_json.as_array().unwrap();
618 assert_eq!(uris_arr.len(), 1);
619 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
620
621 let title_json = json.get("title").unwrap();
623 assert!(title_json.is_string(), "Non-multi single-value field should be a string");
624 assert_eq!(title_json.as_str(), Some("Test Document"));
625 }
626}