Skip to main content

vaultdb_core/
schema.rs

1//! Schema inference and validation. `infer_schema` walks records to discover
2//! field types and cardinalities; `validate_record` checks a record against a
3//! schema; `schema_to_yaml` renders a schema to YAML for persistence.
4
5use std::collections::BTreeMap;
6use std::path::{Path, PathBuf};
7
8use serde::{Deserialize, Serialize};
9
10use crate::error::{Result, VaultdbError};
11use crate::record::Value;
12
13/// Canonical filename for the persisted schema, relative to the vault root.
14/// CLI and MCP both load `<vault>/vaultdb-schema.yaml` via this constant.
15pub const SCHEMA_FILENAME: &str = "vaultdb-schema.yaml";
16
17/// Resolve the schema file path for a vault root.
18pub fn schema_path(vault_root: &Path) -> PathBuf {
19    vault_root.join(SCHEMA_FILENAME)
20}
21
22/// Top-level schema file structure.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct VaultSchema {
25    pub collections: BTreeMap<String, CollectionSchema>,
26}
27
28impl VaultSchema {
29    /// Collections whose `folder` matches `folder` exactly, or is a path
30    /// under it (e.g. when `folder = "Notes"`, also matches collections
31    /// declared with `folder: Notes/movie`). Used by `schema show` /
32    /// `schema validate` to scope queries to a folder, and by the MCP
33    /// `schema_show` tool's optional folder filter.
34    pub fn collections_for_folder<'a>(
35        &'a self,
36        folder: &str,
37    ) -> Vec<(&'a String, &'a CollectionSchema)> {
38        let prefix = format!("{}/", folder);
39        self.collections
40            .iter()
41            .filter(|(_, c)| c.folder == folder || c.folder.starts_with(&prefix))
42            .collect()
43    }
44
45    /// The single collection whose `folder` matches `folder` exactly.
46    /// Used by `CreateBuilder` to pick the unambiguous schema for a
47    /// `vaultdb create <folder>` invocation. Prefix matches don't apply
48    /// here — for a create, the user means a specific folder.
49    pub fn collection_for_folder<'a>(&'a self, folder: &str) -> Option<&'a CollectionSchema> {
50        self.collections.values().find(|c| c.folder == folder)
51    }
52}
53
54/// Schema for a single collection (a folder + optional filter).
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct CollectionSchema {
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub description: Option<String>,
59    pub folder: String,
60    #[serde(default, skip_serializing_if = "Vec::is_empty")]
61    pub filter: Vec<String>,
62    #[serde(default, skip_serializing_if = "Vec::is_empty")]
63    pub required: Vec<String>,
64    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
65    pub fields: BTreeMap<String, FieldSchema>,
66}
67
68/// Schema for a single field.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct FieldSchema {
71    #[serde(rename = "type")]
72    pub field_type: String,
73    #[serde(rename = "enum")]
74    #[serde(default, skip_serializing_if = "Vec::is_empty")]
75    pub enum_values: Vec<Value>,
76    #[serde(default, skip_serializing_if = "Option::is_none")]
77    pub min: Option<f64>,
78    #[serde(default, skip_serializing_if = "Option::is_none")]
79    pub max: Option<f64>,
80    /// Static default applied when a record is created without an explicit
81    /// value for this field. Validated against `field_type` and `enum_values`
82    /// at schema load time, so bad defaults fail loudly rather than silently
83    /// landing in user files.
84    #[serde(default, skip_serializing_if = "Option::is_none")]
85    pub default: Option<Value>,
86    /// Dynamic default — one of the closed enum values `today`, `now`,
87    /// `epoch`. Resolved at the moment a record is created, not at schema
88    /// load. Mutually exclusive with `default`.
89    #[serde(default, skip_serializing_if = "Option::is_none")]
90    pub default_expr: Option<String>,
91}
92
93/// Recognised values for `FieldSchema::default_expr`. Any other value is
94/// rejected at schema load time. Resolution to a concrete `Value`
95/// happens in `resolve_default_expr` below; `CreateBuilder` calls it
96/// at the moment a record is created.
97pub const DEFAULT_EXPRS: &[&str] = &["today", "now", "epoch"];
98
99/// Resolve a `default_expr` keyword to a concrete `Value` using
100/// wall-clock now. Returns `SchemaError` for unknown keywords
101/// (defence-in-depth — `load_schema` rejects these earlier, but the
102/// helper stays safe to call from any code path).
103pub fn resolve_default_expr(expr: &str) -> Result<Value> {
104    match expr {
105        "today" => Ok(Value::String(crate::record::today_string())),
106        "now" => Ok(Value::String(crate::record::now_string())),
107        "epoch" => Ok(Value::Integer(crate::record::epoch_seconds())),
108        other => Err(VaultdbError::SchemaError(format!(
109            "unknown default_expr '{}' (expected one of {:?})",
110            other, DEFAULT_EXPRS
111        ))),
112    }
113}
114
115/// Load schema from a file.
116///
117/// Errors are mapped to `VaultdbError::SchemaError` with a human-readable
118/// reason — the underlying YAML parser is an implementation detail and is
119/// deliberately not exposed in the public error type, so consumers don't
120/// transitively depend on whichever YAML crate vaultdb chooses today.
121///
122/// After parsing, every field's `default` and `default_expr` is validated:
123/// - `default_expr` must be one of [`DEFAULT_EXPRS`].
124/// - `default` literals must be compatible with `field_type`.
125/// - `default` literals must satisfy `enum_values` when both are set.
126/// - `default` and `default_expr` are mutually exclusive.
127pub fn load_schema(path: &Path) -> Result<VaultSchema> {
128    let content = std::fs::read_to_string(path).map_err(|_| {
129        VaultdbError::SchemaError(format!("cannot read schema file: {}", path.display()))
130    })?;
131    let parsed: VaultSchema = serde_yaml::from_str(&content)
132        .map_err(|e| VaultdbError::SchemaError(format!("parsing {}: {}", path.display(), e)))?;
133    validate_schema_defaults(&parsed)?;
134    Ok(parsed)
135}
136
137/// Walk every field in every collection and check that any declared
138/// defaults are well-formed. Exposed publicly so consumers that build a
139/// `VaultSchema` in code (not by loading a file) can run the same check.
140pub fn validate_schema_defaults(schema: &VaultSchema) -> Result<()> {
141    for (col_name, col) in &schema.collections {
142        for (field_name, field) in &col.fields {
143            validate_field_defaults(col_name, field_name, field)?;
144        }
145    }
146    Ok(())
147}
148
149fn validate_field_defaults(col: &str, field: &str, schema: &FieldSchema) -> Result<()> {
150    if schema.default.is_some() && schema.default_expr.is_some() {
151        return Err(VaultdbError::SchemaError(format!(
152            "collection '{}', field '{}': `default` and `default_expr` are mutually exclusive",
153            col, field
154        )));
155    }
156
157    if let Some(expr) = &schema.default_expr
158        && !DEFAULT_EXPRS.contains(&expr.as_str())
159    {
160        return Err(VaultdbError::SchemaError(format!(
161            "collection '{}', field '{}': default_expr '{}' is not recognised (expected one of {:?})",
162            col, field, expr, DEFAULT_EXPRS
163        )));
164    }
165
166    if let Some(val) = &schema.default {
167        // Type compatibility check. Reuses `type_matches` so the rules
168        // stay aligned with what `validate_record` enforces at runtime.
169        let actual = val.type_name();
170        if !type_matches(actual, &schema.field_type) {
171            return Err(VaultdbError::SchemaError(format!(
172                "collection '{}', field '{}': default has type '{}', incompatible with field type '{}'",
173                col, field, actual, schema.field_type
174            )));
175        }
176
177        // Format check for constrained string types. A bad `default:
178        // 2024-99-99` should fail at schema load, not when a user
179        // creates a note.
180        if let Value::String(s) = val {
181            let format_ok = match schema.field_type.as_str() {
182                "wikilink" => is_valid_wikilink(s),
183                "date" => is_valid_date(s),
184                "url" => is_valid_url(s),
185                _ => true,
186            };
187            if !format_ok {
188                return Err(VaultdbError::SchemaError(format!(
189                    "collection '{}', field '{}': default '{}' is not a valid {}",
190                    col, field, s, schema.field_type
191                )));
192            }
193        }
194
195        // Enum compatibility.
196        if !schema.enum_values.is_empty() {
197            let display = val.display_value();
198            let matches_enum = schema.enum_values.iter().any(|e| match e {
199                Value::String(s) => s == &display,
200                Value::Integer(i) => i.to_string() == display,
201                Value::Float(f) => f.to_string() == display,
202                Value::Bool(b) => b.to_string() == display,
203                _ => false,
204            });
205            if !matches_enum {
206                return Err(VaultdbError::SchemaError(format!(
207                    "collection '{}', field '{}': default '{}' is not in `enum` values",
208                    col, field, display
209                )));
210            }
211        }
212    }
213
214    Ok(())
215}
216
217/// Serialize a schema to YAML string.
218pub fn schema_to_yaml(schema: &VaultSchema) -> Result<String> {
219    serde_yaml::to_string(schema)
220        .map_err(|e| VaultdbError::SchemaError(format!("rendering schema as YAML: {}", e)))
221}
222
223/// A single validation violation.
224#[derive(Debug)]
225pub struct Violation {
226    pub file: String,
227    pub field: String,
228    pub message: String,
229}
230
231impl std::fmt::Display for Violation {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        write!(f, "{}: {} — {}", self.file, self.field, self.message)
234    }
235}
236
237/// Validate a record's fields against a collection schema.
238pub fn validate_record(
239    filename: &str,
240    fields: &BTreeMap<String, Value>,
241    schema: &CollectionSchema,
242) -> Vec<Violation> {
243    let mut violations = Vec::new();
244
245    // Check required fields
246    for req in &schema.required {
247        match fields.get(req) {
248            None | Some(Value::Null) => {
249                violations.push(Violation {
250                    file: filename.to_string(),
251                    field: req.clone(),
252                    message: "required field is missing or null".into(),
253                });
254            }
255            _ => {}
256        }
257    }
258
259    // Check field constraints
260    for (field_name, field_schema) in &schema.fields {
261        let value = match fields.get(field_name) {
262            Some(v) if !matches!(v, Value::Null) => v,
263            _ => continue, // skip absent/null fields (required check handles those)
264        };
265
266        // Type check
267        let actual_type = value.type_name();
268        let expected_type = &field_schema.field_type;
269        if !type_matches(actual_type, expected_type) {
270            violations.push(Violation {
271                file: filename.to_string(),
272                field: field_name.clone(),
273                message: format!("expected type '{}', got '{}'", expected_type, actual_type),
274            });
275        }
276
277        // Enum check
278        if !field_schema.enum_values.is_empty() {
279            let display = value.display_value();
280            let matches_enum = field_schema.enum_values.iter().any(|e| match e {
281                Value::String(s) => s == &display,
282                Value::Integer(i) => i.to_string() == display,
283                Value::Float(f) => f.to_string() == display,
284                Value::Bool(b) => b.to_string() == display,
285                _ => false,
286            });
287            if !matches_enum {
288                violations.push(Violation {
289                    file: filename.to_string(),
290                    field: field_name.clone(),
291                    message: format!(
292                        "value '{}' not in allowed values: {:?}",
293                        display,
294                        field_schema
295                            .enum_values
296                            .iter()
297                            .map(value_display)
298                            .collect::<Vec<_>>()
299                    ),
300                });
301            }
302        }
303
304        // Min/max check for numeric fields
305        if let Some(min) = field_schema.min
306            && let Some(num) = value.as_float()
307            && num < min
308        {
309            violations.push(Violation {
310                file: filename.to_string(),
311                field: field_name.clone(),
312                message: format!("value {} is below minimum {}", num, min),
313            });
314        }
315        if let Some(max) = field_schema.max
316            && let Some(num) = value.as_float()
317            && num > max
318        {
319            violations.push(Violation {
320                file: filename.to_string(),
321                field: field_name.clone(),
322                message: format!("value {} exceeds maximum {}", num, max),
323            });
324        }
325
326        // Format checks for the "string-shaped but constrained" types.
327        // These don't introduce new Value variants — on disk they're
328        // still YAML strings — but validate_record refuses values that
329        // don't match the expected format. Match arms with guards
330        // keep clippy's `collapsible_match` happy.
331        if let Value::String(s) = value {
332            let bad = match expected_type.as_str() {
333                "wikilink" if !is_valid_wikilink(s) => Some(format!(
334                    "value '{}' is not a valid wikilink; expected [[name]], [[name|alias]], [[name#section]], or [[name#section|alias]]",
335                    s
336                )),
337                "date" if !is_valid_date(s) => Some(format!(
338                    "value '{}' is not a valid date; expected YYYY-MM-DD",
339                    s
340                )),
341                "url" if !is_valid_url(s) => Some(format!("value '{}' is not a valid URL", s)),
342                _ => None,
343            };
344            if let Some(message) = bad {
345                violations.push(Violation {
346                    file: filename.to_string(),
347                    field: field_name.clone(),
348                    message,
349                });
350            }
351        }
352    }
353
354    violations
355}
356
357fn value_display(v: &Value) -> String {
358    match v {
359        Value::String(s) => s.clone(),
360        Value::Integer(i) => i.to_string(),
361        Value::Float(f) => f.to_string(),
362        Value::Bool(b) => b.to_string(),
363        Value::Null => "null".to_string(),
364        other => format!("{:?}", other),
365    }
366}
367
368fn type_matches(actual: &str, expected: &str) -> bool {
369    match expected {
370        "string" => actual == "string",
371        "integer" => actual == "integer",
372        "float" => actual == "float" || actual == "integer",
373        "number" => actual == "integer" || actual == "float",
374        "bool" => actual == "bool",
375        "list" => actual == "list",
376        "map" => actual == "map",
377        // Constrained string types: stored as YAML strings on disk,
378        // distinguished from plain string by a format check applied
379        // after the type check in `validate_record`.
380        "wikilink" | "date" | "url" => actual == "string",
381        _ => true, // unknown type — don't enforce
382    }
383}
384
385/// True if `s` is a syntactically valid wikilink: `[[target]]`, with
386/// optional `|alias` and/or `#section`. Target must be non-empty and
387/// must not contain `]]`. Does NOT verify the target exists in the
388/// vault — that requires a `LinkGraph` and is out of scope for v1.
389pub fn is_valid_wikilink(s: &str) -> bool {
390    let inner = match s.strip_prefix("[[").and_then(|x| x.strip_suffix("]]")) {
391        Some(i) => i,
392        None => return false,
393    };
394    // Brackets are the outer delimiters only — anything inside that
395    // contains `[` or `]` is malformed (e.g. `[[a][b]]`).
396    if inner.is_empty() || inner.contains('[') || inner.contains(']') {
397        return false;
398    }
399    // Target is everything up to the first `|` or `#`.
400    let target_end = inner.find(['|', '#']).unwrap_or(inner.len());
401    !inner[..target_end].trim().is_empty()
402}
403
404/// True if `s` parses as a calendar date in `YYYY-MM-DD` form, with the
405/// month and day in valid ranges. Does NOT do leap-year validation —
406/// `2024-02-30` passes today; a stricter check can come in Phase 8 if
407/// the false-negative cost ever materialises.
408pub fn is_valid_date(s: &str) -> bool {
409    let parts: Vec<&str> = s.split('-').collect();
410    if parts.len() != 3 {
411        return false;
412    }
413    if parts[0].len() != 4 || parts[1].len() != 2 || parts[2].len() != 2 {
414        return false;
415    }
416    let year = parts[0].parse::<u32>();
417    let month = parts[1].parse::<u32>();
418    let day = parts[2].parse::<u32>();
419    match (year, month, day) {
420        (Ok(_), Ok(m), Ok(d)) => (1..=12).contains(&m) && (1..=31).contains(&d),
421        _ => false,
422    }
423}
424
425/// True if `s` parses as an absolute URL (i.e. has a scheme like
426/// `https`, `http`, `mailto`, `file`, …). Relative URLs are rejected
427/// on purpose — a vault field declared `type: url` should be navigable
428/// on its own.
429pub fn is_valid_url(s: &str) -> bool {
430    url::Url::parse(s).is_ok()
431}
432
433/// Infer a schema from a set of records.
434pub fn infer_schema(folder_name: &str, records: &[crate::record::Record]) -> CollectionSchema {
435    let mut field_types: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
436    let mut field_values: BTreeMap<String, Vec<String>> = BTreeMap::new();
437    let mut field_count: BTreeMap<String, usize> = BTreeMap::new();
438    let total = records.len();
439
440    for record in records {
441        for (key, value) in &record.fields {
442            let type_name = value.type_name().to_string();
443            *field_types
444                .entry(key.clone())
445                .or_default()
446                .entry(type_name)
447                .or_insert(0) += 1;
448            *field_count.entry(key.clone()).or_insert(0) += 1;
449
450            if !matches!(value, Value::Null | Value::List(_) | Value::Map(_)) {
451                field_values
452                    .entry(key.clone())
453                    .or_default()
454                    .push(value.display_value());
455            }
456        }
457    }
458
459    let mut fields = BTreeMap::new();
460    let mut required = Vec::new();
461
462    for (key, types) in &field_types {
463        // Determine the dominant type
464        let dominant_type = types
465            .iter()
466            .filter(|(t, _)| *t != "null")
467            .max_by_key(|(_, count)| *count)
468            .map(|(t, _)| t.clone())
469            .unwrap_or_else(|| "string".to_string());
470
471        // Check if field is present in all records with non-null values
472        let non_null_count = types
473            .iter()
474            .filter(|(t, _)| *t != "null")
475            .map(|(_, c)| c)
476            .sum::<usize>();
477
478        if non_null_count == total && total > 0 {
479            required.push(key.clone());
480        }
481
482        // Infer enum if there are few unique values
483        let enum_values = if let Some(values) = field_values.get(key) {
484            let mut unique: Vec<String> = values.clone();
485            unique.sort();
486            unique.dedup();
487            if unique.len() <= 10 && unique.len() < values.len() / 2 {
488                unique
489                    .into_iter()
490                    .map(|v| {
491                        // Try to parse as integer
492                        if let Ok(n) = v.parse::<i64>() {
493                            Value::Integer(n)
494                        } else {
495                            Value::String(v)
496                        }
497                    })
498                    .collect()
499            } else {
500                vec![]
501            }
502        } else {
503            vec![]
504        };
505
506        fields.insert(
507            key.clone(),
508            FieldSchema {
509                field_type: dominant_type,
510                enum_values,
511                min: None,
512                max: None,
513                default: None,
514                default_expr: None,
515            },
516        );
517        // `required` is tracked at the collection level (above); kept out
518        // of `FieldSchema` deliberately so there's a single source of truth.
519    }
520
521    CollectionSchema {
522        description: Some(format!("Auto-inferred schema for {}", folder_name)),
523        folder: folder_name.to_string(),
524        filter: vec![],
525        required,
526        fields,
527    }
528}
529
530#[cfg(test)]
531mod tests {
532    use super::*;
533    use crate::record::{Record, Value};
534    use std::path::PathBuf;
535
536    fn make_record(fields: Vec<(&str, Value)>) -> Record {
537        let mut map = BTreeMap::new();
538        for (k, v) in fields {
539            map.insert(k.to_string(), v);
540        }
541        Record {
542            path: PathBuf::from("/vault/notes/test.md"),
543            fields: map,
544            raw_content: None,
545        }
546    }
547
548    #[test]
549    fn validate_required_field_missing() {
550        let schema = CollectionSchema {
551            description: None,
552            folder: "notes".into(),
553            filter: vec![],
554            required: vec!["status".into()],
555            fields: BTreeMap::new(),
556        };
557
558        let record = make_record(vec![("tags", Value::String("x".into()))]);
559        let violations = validate_record("test.md", &record.fields, &schema);
560        assert_eq!(violations.len(), 1);
561        assert!(violations[0].message.contains("required"));
562    }
563
564    #[test]
565    fn validate_type_mismatch() {
566        let mut fields = BTreeMap::new();
567        fields.insert(
568            "year".into(),
569            FieldSchema {
570                field_type: "integer".into(),
571                enum_values: vec![],
572                min: None,
573                max: None,
574                default: None,
575                default_expr: None,
576            },
577        );
578
579        let schema = CollectionSchema {
580            description: None,
581            folder: "notes".into(),
582            filter: vec![],
583            required: vec![],
584            fields,
585        };
586
587        let record = make_record(vec![("year", Value::String("not a number".into()))]);
588        let violations = validate_record("test.md", &record.fields, &schema);
589        assert_eq!(violations.len(), 1);
590        assert!(violations[0].message.contains("type"));
591    }
592
593    #[test]
594    fn validate_enum_violation() {
595        let mut fields = BTreeMap::new();
596        fields.insert(
597            "status".into(),
598            FieldSchema {
599                field_type: "string".into(),
600                enum_values: vec![
601                    Value::String("to-watch".into()),
602                    Value::String("watched".into()),
603                ],
604                min: None,
605                max: None,
606                default: None,
607                default_expr: None,
608            },
609        );
610
611        let schema = CollectionSchema {
612            description: None,
613            folder: "notes".into(),
614            filter: vec![],
615            required: vec![],
616            fields,
617        };
618
619        let record = make_record(vec![("status", Value::String("invalid".into()))]);
620        let violations = validate_record("test.md", &record.fields, &schema);
621        assert_eq!(violations.len(), 1);
622        assert!(violations[0].message.contains("not in allowed"));
623    }
624
625    #[test]
626    fn validate_min_max() {
627        let mut fields = BTreeMap::new();
628        fields.insert(
629            "rating".into(),
630            FieldSchema {
631                field_type: "number".into(),
632                enum_values: vec![],
633                min: Some(1.0),
634                max: Some(10.0),
635                default: None,
636                default_expr: None,
637            },
638        );
639
640        let schema = CollectionSchema {
641            description: None,
642            folder: "notes".into(),
643            filter: vec![],
644            required: vec![],
645            fields,
646        };
647
648        let record = make_record(vec![("rating", Value::Integer(15))]);
649        let violations = validate_record("test.md", &record.fields, &schema);
650        assert_eq!(violations.len(), 1);
651        assert!(violations[0].message.contains("exceeds maximum"));
652    }
653
654    #[test]
655    fn validate_passes_clean_record() {
656        let mut fields = BTreeMap::new();
657        fields.insert(
658            "status".into(),
659            FieldSchema {
660                field_type: "string".into(),
661                enum_values: vec![Value::String("to-watch".into())],
662                min: None,
663                max: None,
664                default: None,
665                default_expr: None,
666            },
667        );
668
669        let schema = CollectionSchema {
670            description: None,
671            folder: "notes".into(),
672            filter: vec![],
673            required: vec!["status".into()],
674            fields,
675        };
676
677        let record = make_record(vec![("status", Value::String("to-watch".into()))]);
678        let violations = validate_record("test.md", &record.fields, &schema);
679        assert!(violations.is_empty());
680    }
681
682    #[test]
683    fn infer_schema_basic() {
684        let records = vec![
685            make_record(vec![
686                ("status", Value::String("active".into())),
687                ("year", Value::Integer(2020)),
688            ]),
689            make_record(vec![
690                ("status", Value::String("draft".into())),
691                ("year", Value::Integer(2021)),
692            ]),
693        ];
694
695        let schema = infer_schema("notes", &records);
696        assert_eq!(schema.fields.get("status").unwrap().field_type, "string");
697        assert_eq!(schema.fields.get("year").unwrap().field_type, "integer");
698        assert!(schema.required.contains(&"status".to_string()));
699        assert!(schema.required.contains(&"year".to_string()));
700    }
701
702    // ── Phase 1: wikilink / date / url type validation ────────────────
703
704    fn schema_with_field(name: &str, field_type: &str) -> CollectionSchema {
705        let mut fields = BTreeMap::new();
706        fields.insert(
707            name.into(),
708            FieldSchema {
709                field_type: field_type.into(),
710                enum_values: vec![],
711                min: None,
712                max: None,
713                default: None,
714                default_expr: None,
715            },
716        );
717        CollectionSchema {
718            description: None,
719            folder: "notes".into(),
720            filter: vec![],
721            required: vec![],
722            fields,
723        }
724    }
725
726    #[test]
727    fn wikilink_accepts_plain() {
728        assert!(is_valid_wikilink("[[name]]"));
729        assert!(is_valid_wikilink("[[kyoto-university-kyoto-yoshida-KG9l]]"));
730    }
731
732    #[test]
733    fn wikilink_accepts_alias_and_section() {
734        assert!(is_valid_wikilink("[[name|alias]]"));
735        assert!(is_valid_wikilink("[[name#section]]"));
736        assert!(is_valid_wikilink("[[name#section|alias]]"));
737    }
738
739    #[test]
740    fn wikilink_rejects_malformed() {
741        assert!(!is_valid_wikilink("name"));
742        assert!(!is_valid_wikilink("[name]"));
743        assert!(!is_valid_wikilink("[[]]"));
744        assert!(!is_valid_wikilink("[[  ]]"));
745        assert!(!is_valid_wikilink("[[a][b]]"));
746    }
747
748    #[test]
749    fn validate_wikilink_field_catches_bad_value() {
750        let schema = schema_with_field("university", "wikilink");
751        let record = make_record(vec![("university", Value::String("kyoto".into()))]);
752        let violations = validate_record("p.md", &record.fields, &schema);
753        assert_eq!(violations.len(), 1, "{:?}", violations);
754        assert!(violations[0].message.contains("wikilink"));
755    }
756
757    #[test]
758    fn validate_wikilink_field_passes_good_value() {
759        let schema = schema_with_field("university", "wikilink");
760        let record = make_record(vec![(
761            "university",
762            Value::String("[[kyoto-university-KG9l]]".into()),
763        )]);
764        let violations = validate_record("p.md", &record.fields, &schema);
765        assert!(violations.is_empty(), "{:?}", violations);
766    }
767
768    #[test]
769    fn date_accepts_iso_calendar() {
770        assert!(is_valid_date("2024-05-13"));
771        assert!(is_valid_date("1999-01-01"));
772        assert!(is_valid_date("2030-12-31"));
773    }
774
775    #[test]
776    fn date_rejects_garbage_and_wrong_components() {
777        assert!(!is_valid_date("not-a-date"));
778        assert!(!is_valid_date("2024/05/13"));
779        assert!(!is_valid_date("2024-13-01")); // bad month
780        assert!(!is_valid_date("2024-00-15")); // bad month
781        assert!(!is_valid_date("2024-05-32")); // bad day
782        assert!(!is_valid_date("24-05-13")); // 2-digit year
783        assert!(!is_valid_date("2024-5-13")); // 1-digit month
784    }
785
786    #[test]
787    fn validate_date_field_catches_bad_value() {
788        let schema = schema_with_field("due", "date");
789        let record = make_record(vec![("due", Value::String("not-a-date".into()))]);
790        let violations = validate_record("t.md", &record.fields, &schema);
791        assert_eq!(violations.len(), 1);
792        assert!(violations[0].message.contains("date"));
793    }
794
795    #[test]
796    fn validate_date_field_passes_good_value() {
797        let schema = schema_with_field("due", "date");
798        let record = make_record(vec![("due", Value::String("2024-05-13".into()))]);
799        let violations = validate_record("t.md", &record.fields, &schema);
800        assert!(violations.is_empty());
801    }
802
803    #[test]
804    fn url_accepts_common_schemes() {
805        assert!(is_valid_url("https://example.com"));
806        assert!(is_valid_url("http://example.com/path?q=1"));
807        assert!(is_valid_url("mailto:a@b.com"));
808        assert!(is_valid_url("file:///tmp/x"));
809    }
810
811    #[test]
812    fn url_rejects_garbage_and_relative() {
813        assert!(!is_valid_url("not a url"));
814        assert!(!is_valid_url("/relative/path"));
815        assert!(!is_valid_url("example.com")); // no scheme
816    }
817
818    #[test]
819    fn validate_url_field_catches_bad_value() {
820        let schema = schema_with_field("homepage", "url");
821        let record = make_record(vec![("homepage", Value::String("example.com".into()))]);
822        let violations = validate_record("p.md", &record.fields, &schema);
823        assert_eq!(violations.len(), 1);
824        assert!(violations[0].message.contains("URL"));
825    }
826
827    #[test]
828    fn validate_url_field_passes_good_value() {
829        let schema = schema_with_field("homepage", "url");
830        let record = make_record(vec![(
831            "homepage",
832            Value::String("https://example.com".into()),
833        )]);
834        let violations = validate_record("p.md", &record.fields, &schema);
835        assert!(violations.is_empty());
836    }
837
838    #[test]
839    fn constrained_type_still_requires_string_actual() {
840        // A wikilink-typed field receiving an integer should fail the
841        // type check, not just the wikilink-format check.
842        let schema = schema_with_field("university", "wikilink");
843        let record = make_record(vec![("university", Value::Integer(42))]);
844        let violations = validate_record("p.md", &record.fields, &schema);
845        assert!(violations.iter().any(|v| v.message.contains("type")));
846    }
847
848    // ── Phase 2: default / default_expr validation at load time ────────
849
850    fn schema_with_defaulted_field(
851        name: &str,
852        field_type: &str,
853        default: Option<Value>,
854        default_expr: Option<String>,
855        enum_values: Vec<Value>,
856    ) -> VaultSchema {
857        let mut fields = BTreeMap::new();
858        fields.insert(
859            name.into(),
860            FieldSchema {
861                field_type: field_type.into(),
862                enum_values,
863                min: None,
864                max: None,
865                default,
866                default_expr,
867            },
868        );
869        VaultSchema {
870            collections: BTreeMap::from([(
871                "movies".to_string(),
872                CollectionSchema {
873                    description: None,
874                    folder: "Notes/movie".into(),
875                    filter: vec![],
876                    required: vec![],
877                    fields,
878                },
879            )]),
880        }
881    }
882
883    #[test]
884    fn default_literal_matching_type_passes() {
885        let s = schema_with_defaulted_field(
886            "year",
887            "integer",
888            Some(Value::Integer(2024)),
889            None,
890            vec![],
891        );
892        assert!(validate_schema_defaults(&s).is_ok());
893    }
894
895    #[test]
896    fn default_literal_wrong_type_rejected() {
897        let s = schema_with_defaulted_field(
898            "year",
899            "integer",
900            Some(Value::String("nope".into())),
901            None,
902            vec![],
903        );
904        let err = validate_schema_defaults(&s).unwrap_err().to_string();
905        assert!(err.contains("incompatible"), "got: {}", err);
906        assert!(err.contains("year"));
907    }
908
909    #[test]
910    fn default_literal_outside_enum_rejected() {
911        let s = schema_with_defaulted_field(
912            "status",
913            "string",
914            Some(Value::String("invalid".into())),
915            None,
916            vec![
917                Value::String("to-watch".into()),
918                Value::String("watched".into()),
919            ],
920        );
921        let err = validate_schema_defaults(&s).unwrap_err().to_string();
922        assert!(err.contains("enum"), "got: {}", err);
923    }
924
925    #[test]
926    fn default_literal_inside_enum_passes() {
927        let s = schema_with_defaulted_field(
928            "status",
929            "string",
930            Some(Value::String("to-watch".into())),
931            None,
932            vec![
933                Value::String("to-watch".into()),
934                Value::String("watched".into()),
935            ],
936        );
937        assert!(validate_schema_defaults(&s).is_ok());
938    }
939
940    #[test]
941    fn default_expr_known_keyword_passes() {
942        for expr in DEFAULT_EXPRS {
943            let s =
944                schema_with_defaulted_field("due", "date", None, Some(expr.to_string()), vec![]);
945            assert!(
946                validate_schema_defaults(&s).is_ok(),
947                "default_expr '{}' should be valid",
948                expr
949            );
950        }
951    }
952
953    #[test]
954    fn default_expr_unknown_keyword_rejected() {
955        let s = schema_with_defaulted_field("due", "date", None, Some("tomorrow".into()), vec![]);
956        let err = validate_schema_defaults(&s).unwrap_err().to_string();
957        assert!(err.contains("default_expr"), "got: {}", err);
958        assert!(err.contains("tomorrow"));
959    }
960
961    #[test]
962    fn default_and_default_expr_mutually_exclusive() {
963        let s = schema_with_defaulted_field(
964            "due",
965            "date",
966            Some(Value::String("2024-05-13".into())),
967            Some("today".into()),
968            vec![],
969        );
970        let err = validate_schema_defaults(&s).unwrap_err().to_string();
971        assert!(err.contains("mutually exclusive"), "got: {}", err);
972    }
973
974    #[test]
975    fn default_for_wikilink_must_be_well_formed() {
976        let s = schema_with_defaulted_field(
977            "university",
978            "wikilink",
979            Some(Value::String("kyoto".into())),
980            None,
981            vec![],
982        );
983        let err = validate_schema_defaults(&s).unwrap_err().to_string();
984        assert!(err.contains("wikilink"), "got: {}", err);
985
986        // And the right shape passes.
987        let s = schema_with_defaulted_field(
988            "university",
989            "wikilink",
990            Some(Value::String("[[kyoto-university-KG9l]]".into())),
991            None,
992            vec![],
993        );
994        assert!(validate_schema_defaults(&s).is_ok());
995    }
996
997    #[test]
998    fn default_for_date_must_be_well_formed() {
999        let s = schema_with_defaulted_field(
1000            "due",
1001            "date",
1002            Some(Value::String("2024-99-99".into())),
1003            None,
1004            vec![],
1005        );
1006        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1007        assert!(err.contains("date"), "got: {}", err);
1008    }
1009
1010    #[test]
1011    fn load_schema_runs_default_validation() {
1012        // End-to-end: write a YAML with a bad default to disk, load it,
1013        // and verify we get a SchemaError pointing at the field.
1014        use std::io::Write;
1015        let mut tmp = tempfile::NamedTempFile::new().unwrap();
1016        writeln!(
1017            tmp,
1018            r#"
1019collections:
1020  movies:
1021    folder: Notes/movie
1022    fields:
1023      year:
1024        type: integer
1025        default: "not an integer"
1026"#
1027        )
1028        .unwrap();
1029        let err = load_schema(tmp.path()).unwrap_err().to_string();
1030        assert!(err.contains("year"), "got: {}", err);
1031        assert!(err.contains("incompatible"), "got: {}", err);
1032    }
1033}