1use std::collections::BTreeMap;
6use std::path::Path;
7
8use serde::{Deserialize, Serialize};
9
10use crate::error::{Result, VaultdbError};
11use crate::record::Value;
12
13#[derive(Debug, Serialize, Deserialize)]
15pub struct VaultSchema {
16 pub collections: BTreeMap<String, CollectionSchema>,
17}
18
19#[derive(Debug, Serialize, Deserialize)]
21pub struct CollectionSchema {
22 #[serde(default, skip_serializing_if = "Option::is_none")]
23 pub description: Option<String>,
24 pub folder: String,
25 #[serde(default, skip_serializing_if = "Vec::is_empty")]
26 pub filter: Vec<String>,
27 #[serde(default, skip_serializing_if = "Vec::is_empty")]
28 pub required: Vec<String>,
29 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
30 pub fields: BTreeMap<String, FieldSchema>,
31}
32
33#[derive(Debug, Serialize, Deserialize)]
35pub struct FieldSchema {
36 #[serde(rename = "type")]
37 pub field_type: String,
38 #[serde(rename = "enum")]
39 #[serde(default, skip_serializing_if = "Vec::is_empty")]
40 pub enum_values: Vec<Value>,
41 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub min: Option<f64>,
43 #[serde(default, skip_serializing_if = "Option::is_none")]
44 pub max: Option<f64>,
45 #[serde(default, skip_serializing_if = "Option::is_none")]
46 pub required: Option<bool>,
47}
48
49pub fn load_schema(path: &Path) -> Result<VaultSchema> {
56 let content = std::fs::read_to_string(path).map_err(|_| {
57 VaultdbError::SchemaError(format!("cannot read schema file: {}", path.display()))
58 })?;
59 serde_yaml::from_str(&content)
60 .map_err(|e| VaultdbError::SchemaError(format!("parsing {}: {}", path.display(), e)))
61}
62
63pub fn schema_to_yaml(schema: &VaultSchema) -> Result<String> {
65 serde_yaml::to_string(schema)
66 .map_err(|e| VaultdbError::SchemaError(format!("rendering schema as YAML: {}", e)))
67}
68
69#[derive(Debug)]
71pub struct Violation {
72 pub file: String,
73 pub field: String,
74 pub message: String,
75}
76
77impl std::fmt::Display for Violation {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 write!(f, "{}: {} — {}", self.file, self.field, self.message)
80 }
81}
82
83pub fn validate_record(
85 filename: &str,
86 fields: &BTreeMap<String, Value>,
87 schema: &CollectionSchema,
88) -> Vec<Violation> {
89 let mut violations = Vec::new();
90
91 for req in &schema.required {
93 match fields.get(req) {
94 None | Some(Value::Null) => {
95 violations.push(Violation {
96 file: filename.to_string(),
97 field: req.clone(),
98 message: "required field is missing or null".into(),
99 });
100 }
101 _ => {}
102 }
103 }
104
105 for (field_name, field_schema) in &schema.fields {
107 let value = match fields.get(field_name) {
108 Some(v) if !matches!(v, Value::Null) => v,
109 _ => continue, };
111
112 let actual_type = value.type_name();
114 let expected_type = &field_schema.field_type;
115 if !type_matches(actual_type, expected_type) {
116 violations.push(Violation {
117 file: filename.to_string(),
118 field: field_name.clone(),
119 message: format!("expected type '{}', got '{}'", expected_type, actual_type),
120 });
121 }
122
123 if !field_schema.enum_values.is_empty() {
125 let display = value.display_value();
126 let matches_enum = field_schema.enum_values.iter().any(|e| match e {
127 Value::String(s) => s == &display,
128 Value::Integer(i) => i.to_string() == display,
129 Value::Float(f) => f.to_string() == display,
130 Value::Bool(b) => b.to_string() == display,
131 _ => false,
132 });
133 if !matches_enum {
134 violations.push(Violation {
135 file: filename.to_string(),
136 field: field_name.clone(),
137 message: format!(
138 "value '{}' not in allowed values: {:?}",
139 display,
140 field_schema
141 .enum_values
142 .iter()
143 .map(value_display)
144 .collect::<Vec<_>>()
145 ),
146 });
147 }
148 }
149
150 if let Some(min) = field_schema.min
152 && let Some(num) = value.as_float()
153 && num < min
154 {
155 violations.push(Violation {
156 file: filename.to_string(),
157 field: field_name.clone(),
158 message: format!("value {} is below minimum {}", num, min),
159 });
160 }
161 if let Some(max) = field_schema.max
162 && let Some(num) = value.as_float()
163 && num > max
164 {
165 violations.push(Violation {
166 file: filename.to_string(),
167 field: field_name.clone(),
168 message: format!("value {} exceeds maximum {}", num, max),
169 });
170 }
171 }
172
173 violations
174}
175
176fn value_display(v: &Value) -> String {
177 match v {
178 Value::String(s) => s.clone(),
179 Value::Integer(i) => i.to_string(),
180 Value::Float(f) => f.to_string(),
181 Value::Bool(b) => b.to_string(),
182 Value::Null => "null".to_string(),
183 other => format!("{:?}", other),
184 }
185}
186
187fn type_matches(actual: &str, expected: &str) -> bool {
188 match expected {
189 "string" => actual == "string",
190 "integer" => actual == "integer",
191 "float" => actual == "float" || actual == "integer",
192 "number" => actual == "integer" || actual == "float",
193 "bool" => actual == "bool",
194 "list" => actual == "list",
195 "map" => actual == "map",
196 _ => true, }
198}
199
200pub fn infer_schema(folder_name: &str, records: &[crate::record::Record]) -> CollectionSchema {
202 let mut field_types: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
203 let mut field_values: BTreeMap<String, Vec<String>> = BTreeMap::new();
204 let mut field_count: BTreeMap<String, usize> = BTreeMap::new();
205 let total = records.len();
206
207 for record in records {
208 for (key, value) in &record.fields {
209 let type_name = value.type_name().to_string();
210 *field_types
211 .entry(key.clone())
212 .or_default()
213 .entry(type_name)
214 .or_insert(0) += 1;
215 *field_count.entry(key.clone()).or_insert(0) += 1;
216
217 if !matches!(value, Value::Null | Value::List(_) | Value::Map(_)) {
218 field_values
219 .entry(key.clone())
220 .or_default()
221 .push(value.display_value());
222 }
223 }
224 }
225
226 let mut fields = BTreeMap::new();
227 let mut required = Vec::new();
228
229 for (key, types) in &field_types {
230 let dominant_type = types
232 .iter()
233 .filter(|(t, _)| *t != "null")
234 .max_by_key(|(_, count)| *count)
235 .map(|(t, _)| t.clone())
236 .unwrap_or_else(|| "string".to_string());
237
238 let non_null_count = types
240 .iter()
241 .filter(|(t, _)| *t != "null")
242 .map(|(_, c)| c)
243 .sum::<usize>();
244
245 if non_null_count == total && total > 0 {
246 required.push(key.clone());
247 }
248
249 let enum_values = if let Some(values) = field_values.get(key) {
251 let mut unique: Vec<String> = values.clone();
252 unique.sort();
253 unique.dedup();
254 if unique.len() <= 10 && unique.len() < values.len() / 2 {
255 unique
256 .into_iter()
257 .map(|v| {
258 if let Ok(n) = v.parse::<i64>() {
260 Value::Integer(n)
261 } else {
262 Value::String(v)
263 }
264 })
265 .collect()
266 } else {
267 vec![]
268 }
269 } else {
270 vec![]
271 };
272
273 fields.insert(
274 key.clone(),
275 FieldSchema {
276 field_type: dominant_type,
277 enum_values,
278 min: None,
279 max: None,
280 required: None,
281 },
282 );
283 }
284
285 CollectionSchema {
286 description: Some(format!("Auto-inferred schema for {}", folder_name)),
287 folder: folder_name.to_string(),
288 filter: vec![],
289 required,
290 fields,
291 }
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297 use crate::record::{Record, Value};
298 use std::path::PathBuf;
299
300 fn make_record(fields: Vec<(&str, Value)>) -> Record {
301 let mut map = BTreeMap::new();
302 for (k, v) in fields {
303 map.insert(k.to_string(), v);
304 }
305 Record {
306 path: PathBuf::from("/vault/notes/test.md"),
307 fields: map,
308 raw_content: None,
309 }
310 }
311
312 #[test]
313 fn validate_required_field_missing() {
314 let schema = CollectionSchema {
315 description: None,
316 folder: "notes".into(),
317 filter: vec![],
318 required: vec!["status".into()],
319 fields: BTreeMap::new(),
320 };
321
322 let record = make_record(vec![("tags", Value::String("x".into()))]);
323 let violations = validate_record("test.md", &record.fields, &schema);
324 assert_eq!(violations.len(), 1);
325 assert!(violations[0].message.contains("required"));
326 }
327
328 #[test]
329 fn validate_type_mismatch() {
330 let mut fields = BTreeMap::new();
331 fields.insert(
332 "year".into(),
333 FieldSchema {
334 field_type: "integer".into(),
335 enum_values: vec![],
336 min: None,
337 max: None,
338 required: None,
339 },
340 );
341
342 let schema = CollectionSchema {
343 description: None,
344 folder: "notes".into(),
345 filter: vec![],
346 required: vec![],
347 fields,
348 };
349
350 let record = make_record(vec![("year", Value::String("not a number".into()))]);
351 let violations = validate_record("test.md", &record.fields, &schema);
352 assert_eq!(violations.len(), 1);
353 assert!(violations[0].message.contains("type"));
354 }
355
356 #[test]
357 fn validate_enum_violation() {
358 let mut fields = BTreeMap::new();
359 fields.insert(
360 "status".into(),
361 FieldSchema {
362 field_type: "string".into(),
363 enum_values: vec![
364 Value::String("to-watch".into()),
365 Value::String("watched".into()),
366 ],
367 min: None,
368 max: None,
369 required: None,
370 },
371 );
372
373 let schema = CollectionSchema {
374 description: None,
375 folder: "notes".into(),
376 filter: vec![],
377 required: vec![],
378 fields,
379 };
380
381 let record = make_record(vec![("status", Value::String("invalid".into()))]);
382 let violations = validate_record("test.md", &record.fields, &schema);
383 assert_eq!(violations.len(), 1);
384 assert!(violations[0].message.contains("not in allowed"));
385 }
386
387 #[test]
388 fn validate_min_max() {
389 let mut fields = BTreeMap::new();
390 fields.insert(
391 "rating".into(),
392 FieldSchema {
393 field_type: "number".into(),
394 enum_values: vec![],
395 min: Some(1.0),
396 max: Some(10.0),
397 required: None,
398 },
399 );
400
401 let schema = CollectionSchema {
402 description: None,
403 folder: "notes".into(),
404 filter: vec![],
405 required: vec![],
406 fields,
407 };
408
409 let record = make_record(vec![("rating", Value::Integer(15))]);
410 let violations = validate_record("test.md", &record.fields, &schema);
411 assert_eq!(violations.len(), 1);
412 assert!(violations[0].message.contains("exceeds maximum"));
413 }
414
415 #[test]
416 fn validate_passes_clean_record() {
417 let mut fields = BTreeMap::new();
418 fields.insert(
419 "status".into(),
420 FieldSchema {
421 field_type: "string".into(),
422 enum_values: vec![Value::String("to-watch".into())],
423 min: None,
424 max: None,
425 required: None,
426 },
427 );
428
429 let schema = CollectionSchema {
430 description: None,
431 folder: "notes".into(),
432 filter: vec![],
433 required: vec!["status".into()],
434 fields,
435 };
436
437 let record = make_record(vec![("status", Value::String("to-watch".into()))]);
438 let violations = validate_record("test.md", &record.fields, &schema);
439 assert!(violations.is_empty());
440 }
441
442 #[test]
443 fn infer_schema_basic() {
444 let records = vec![
445 make_record(vec![
446 ("status", Value::String("active".into())),
447 ("year", Value::Integer(2020)),
448 ]),
449 make_record(vec![
450 ("status", Value::String("draft".into())),
451 ("year", Value::Integer(2021)),
452 ]),
453 ];
454
455 let schema = infer_schema("notes", &records);
456 assert_eq!(schema.fields.get("status").unwrap().field_type, "string");
457 assert_eq!(schema.fields.get("year").unwrap().field_type, "integer");
458 assert!(schema.required.contains(&"status".to_string()));
459 assert!(schema.required.contains(&"year".to_string()));
460 }
461}