Skip to main content

vaultdb_core/
schema.rs

1//! Schema inference and validation. `infer_schema` walks records to discover
2//! field types and cardinalities; `validate_record` checks a record against a
3//! schema; `schema_to_yaml` renders a schema to YAML for persistence.
4
5use std::collections::BTreeMap;
6use std::path::{Path, PathBuf};
7
8use serde::{Deserialize, Serialize};
9
10use crate::error::{Result, VaultdbError};
11use crate::record::Value;
12
13/// Canonical filename for the persisted schema, relative to the vault root.
14/// CLI and MCP both load `<vault>/vaultdb-schema.yaml` via this constant.
15pub const SCHEMA_FILENAME: &str = "vaultdb-schema.yaml";
16
17/// Resolve the schema file path for a vault root.
18pub fn schema_path(vault_root: &Path) -> PathBuf {
19    vault_root.join(SCHEMA_FILENAME)
20}
21
22/// Top-level schema file structure.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct VaultSchema {
25    pub collections: BTreeMap<String, CollectionSchema>,
26}
27
28impl VaultSchema {
29    /// Collections whose `folder` matches `folder` exactly, or is a path
30    /// under it (e.g. when `folder = "Notes"`, also matches collections
31    /// declared with `folder: Notes/movie`). Used by `schema show` /
32    /// `schema validate` to scope queries to a folder, and by the MCP
33    /// `schema_show` tool's optional folder filter.
34    pub fn collections_for_folder<'a>(
35        &'a self,
36        folder: &str,
37    ) -> Vec<(&'a String, &'a CollectionSchema)> {
38        let prefix = format!("{}/", folder);
39        self.collections
40            .iter()
41            .filter(|(_, c)| c.folder == folder || c.folder.starts_with(&prefix))
42            .collect()
43    }
44
45    /// The single collection whose `folder` matches `folder` exactly.
46    /// Used by `CreateBuilder` to pick the unambiguous schema for a
47    /// `vaultdb create <folder>` invocation. Prefix matches don't apply
48    /// here — for a create, the user means a specific folder.
49    pub fn collection_for_folder<'a>(&'a self, folder: &str) -> Option<&'a CollectionSchema> {
50        self.collections.values().find(|c| c.folder == folder)
51    }
52
53    /// Every collection that *applies to a record* at `record_folder`
54    /// carrying the given fields.
55    ///
56    /// A collection applies when:
57    /// - its `folder` is `==` `record_folder` or an ancestor of it
58    ///   (`"Notes"` is an ancestor of `"Notes/movie"`); AND
59    /// - every parsed `filter:` expression evaluates true against the
60    ///   projected record. Filter parsing failures abort with
61    ///   `SchemaError` — we'd rather block a write than silently treat a
62    ///   broken filter as "no filter."
63    ///
64    /// Unlike `collections_for_folder`, this picks *ancestors*, not
65    /// descendants — given a record, "which collections govern me?"
66    /// rather than "which collections live under this folder?".
67    pub fn applicable_collections<'a>(
68        &'a self,
69        record_folder: &str,
70        projected: &crate::record::Record,
71        vault_root: &Path,
72    ) -> Result<Vec<&'a CollectionSchema>> {
73        let mut out = Vec::new();
74        for c in self.collections.values() {
75            if !folder_is_ancestor_or_equal(&c.folder, record_folder) {
76                continue;
77            }
78            let mut all_pass = true;
79            for f in &c.filter {
80                let expr = crate::query::Expr::parse(f).map_err(|e| {
81                    VaultdbError::SchemaError(format!(
82                        "parsing filter '{}' on collection with folder '{}': {}",
83                        f, c.folder, e
84                    ))
85                })?;
86                if !crate::filter::evaluate_expr(&expr, projected, vault_root, None) {
87                    all_pass = false;
88                    break;
89                }
90            }
91            if all_pass {
92                out.push(c);
93            }
94        }
95        // Stable order: shallowest folder first, so callers that layer
96        // defaults from this list in iteration order get
97        // deepest-folder-wins for free.
98        out.sort_by_key(|c| c.folder.matches('/').count());
99        Ok(out)
100    }
101}
102
103/// True if `ancestor` equals `child` or is a path-prefix of it
104/// (`"Notes"` is an ancestor of `"Notes/movie"`; `"Note"` is not).
105/// Empty `ancestor` matches everything (root scope).
106fn folder_is_ancestor_or_equal(ancestor: &str, child: &str) -> bool {
107    if ancestor.is_empty() {
108        return true;
109    }
110    if ancestor == child {
111        return true;
112    }
113    let prefix = format!("{}/", ancestor);
114    child.starts_with(&prefix)
115}
116
117/// Schema for a single collection (a folder + optional filter).
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct CollectionSchema {
120    #[serde(default, skip_serializing_if = "Option::is_none")]
121    pub description: Option<String>,
122    pub folder: String,
123    #[serde(default, skip_serializing_if = "Vec::is_empty")]
124    pub filter: Vec<String>,
125    #[serde(default, skip_serializing_if = "Vec::is_empty")]
126    pub required: Vec<String>,
127    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
128    pub fields: BTreeMap<String, FieldSchema>,
129}
130
131/// Schema for a single field.
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct FieldSchema {
134    #[serde(rename = "type")]
135    pub field_type: String,
136    #[serde(rename = "enum")]
137    #[serde(default, skip_serializing_if = "Vec::is_empty")]
138    pub enum_values: Vec<Value>,
139    #[serde(default, skip_serializing_if = "Option::is_none")]
140    pub min: Option<f64>,
141    #[serde(default, skip_serializing_if = "Option::is_none")]
142    pub max: Option<f64>,
143    /// Static default applied when a record is created without an explicit
144    /// value for this field. Validated against `field_type` and `enum_values`
145    /// at schema load time, so bad defaults fail loudly rather than silently
146    /// landing in user files.
147    #[serde(default, skip_serializing_if = "Option::is_none")]
148    pub default: Option<Value>,
149    /// Dynamic default — one of the closed enum values `today`, `now`,
150    /// `epoch`. Resolved at the moment a record is created, not at schema
151    /// load. Mutually exclusive with `default`.
152    #[serde(default, skip_serializing_if = "Option::is_none")]
153    pub default_expr: Option<String>,
154}
155
156/// Recognised values for `FieldSchema::default_expr`. Any other value is
157/// rejected at schema load time. Resolution to a concrete `Value`
158/// happens in `resolve_default_expr` below; `CreateBuilder` calls it
159/// at the moment a record is created.
160pub const DEFAULT_EXPRS: &[&str] = &["today", "now", "epoch"];
161
162/// Resolve a `default_expr` keyword to a concrete `Value` using
163/// wall-clock now. Returns `SchemaError` for unknown keywords
164/// (defence-in-depth — `load_schema` rejects these earlier, but the
165/// helper stays safe to call from any code path).
166pub fn resolve_default_expr(expr: &str) -> Result<Value> {
167    match expr {
168        "today" => Ok(Value::String(crate::record::today_string())),
169        "now" => Ok(Value::String(crate::record::now_string())),
170        "epoch" => Ok(Value::Integer(crate::record::epoch_seconds())),
171        other => Err(VaultdbError::SchemaError(format!(
172            "unknown default_expr '{}' (expected one of {:?})",
173            other, DEFAULT_EXPRS
174        ))),
175    }
176}
177
178/// Load schema from a file.
179///
180/// Errors are mapped to `VaultdbError::SchemaError` with a human-readable
181/// reason — the underlying YAML parser is an implementation detail and is
182/// deliberately not exposed in the public error type, so consumers don't
183/// transitively depend on whichever YAML crate vaultdb chooses today.
184///
185/// After parsing, every field's `default` and `default_expr` is validated:
186/// - `default_expr` must be one of [`DEFAULT_EXPRS`].
187/// - `default` literals must be compatible with `field_type`.
188/// - `default` literals must satisfy `enum_values` when both are set.
189/// - `default` and `default_expr` are mutually exclusive.
190pub fn load_schema(path: &Path) -> Result<VaultSchema> {
191    let content = std::fs::read_to_string(path).map_err(|_| {
192        VaultdbError::SchemaError(format!("cannot read schema file: {}", path.display()))
193    })?;
194    let parsed: VaultSchema = serde_yaml::from_str(&content)
195        .map_err(|e| VaultdbError::SchemaError(format!("parsing {}: {}", path.display(), e)))?;
196    validate_schema_defaults(&parsed)?;
197    validate_schema_consistency(&parsed)?;
198    Ok(parsed)
199}
200
201/// Walk every field in every collection and check that any declared
202/// defaults are well-formed. Exposed publicly so consumers that build a
203/// `VaultSchema` in code (not by loading a file) can run the same check.
204pub fn validate_schema_defaults(schema: &VaultSchema) -> Result<()> {
205    for (col_name, col) in &schema.collections {
206        for (field_name, field) in &col.fields {
207            validate_field_defaults(col_name, field_name, field)?;
208        }
209    }
210    Ok(())
211}
212
213fn validate_field_defaults(col: &str, field: &str, schema: &FieldSchema) -> Result<()> {
214    if schema.default.is_some() && schema.default_expr.is_some() {
215        return Err(VaultdbError::SchemaError(format!(
216            "collection '{}', field '{}': `default` and `default_expr` are mutually exclusive",
217            col, field
218        )));
219    }
220
221    if let Some(expr) = &schema.default_expr
222        && !DEFAULT_EXPRS.contains(&expr.as_str())
223    {
224        return Err(VaultdbError::SchemaError(format!(
225            "collection '{}', field '{}': default_expr '{}' is not recognised (expected one of {:?})",
226            col, field, expr, DEFAULT_EXPRS
227        )));
228    }
229
230    if let Some(val) = &schema.default {
231        // Type compatibility check. Reuses `type_matches` so the rules
232        // stay aligned with what `validate_record` enforces at runtime.
233        let actual = val.type_name();
234        if !type_matches(actual, &schema.field_type) {
235            return Err(VaultdbError::SchemaError(format!(
236                "collection '{}', field '{}': default has type '{}', incompatible with field type '{}'",
237                col, field, actual, schema.field_type
238            )));
239        }
240
241        // Format check for constrained string types. A bad `default:
242        // 2024-99-99` should fail at schema load, not when a user
243        // creates a note.
244        if let Value::String(s) = val {
245            let format_ok = match schema.field_type.as_str() {
246                "wikilink" => is_valid_wikilink(s),
247                "date" => is_valid_date(s),
248                "url" => is_valid_url(s),
249                _ => true,
250            };
251            if !format_ok {
252                return Err(VaultdbError::SchemaError(format!(
253                    "collection '{}', field '{}': default '{}' is not a valid {}",
254                    col, field, s, schema.field_type
255                )));
256            }
257        }
258
259        // Enum compatibility.
260        if !schema.enum_values.is_empty() {
261            let display = val.display_value();
262            let matches_enum = schema.enum_values.iter().any(|e| match e {
263                Value::String(s) => s == &display,
264                Value::Integer(i) => i.to_string() == display,
265                Value::Float(f) => f.to_string() == display,
266                Value::Bool(b) => b.to_string() == display,
267                _ => false,
268            });
269            if !matches_enum {
270                return Err(VaultdbError::SchemaError(format!(
271                    "collection '{}', field '{}': default '{}' is not in `enum` values",
272                    col, field, display
273                )));
274            }
275        }
276    }
277
278    Ok(())
279}
280
281/// Cross-collection consistency checks. Runs after `validate_schema_defaults`
282/// at schema load. Rejects schemas where two **folder-overlapping**
283/// collections (one folder is `==` or an ancestor of the other) declare
284/// the same field name in mutually unsatisfiable ways:
285///
286/// - **Type conflict (Tier 1):** different `type:` strings. Two
287///   collections that both apply to the same record can't disagree on
288///   the field's type — no value would satisfy both. The check is
289///   strict-equal on the type string; intentional narrowings between
290///   `string` and `wikilink` are not treated as compatible. The user
291///   should redeclare the field consistently.
292/// - **Default vs sibling (Tier 1):** a `default:` (or resolved
293///   `default_expr:`) on field `X` in collection A must satisfy every
294///   *other* folder-overlapping collection that also declares `X`.
295///   Without this check, the moment the default fires on create, the
296///   sibling collection's validator would refuse the write — turning a
297///   silent schema bug into a confusing user-facing error.
298/// - **Disjoint enum (Tier 2):** both have non-empty `enum:` whose
299///   intersection is empty. Narrowing (subset) is allowed and expected
300///   — that's how `Notes.db-table = [movie, book, ...]` works alongside
301///   `movies.db-table = [movie]`. Only fully-disjoint sets fail.
302/// - **Disjoint range (Tier 2):** intersected `min`/`max` is empty
303///   (`max(min_a, min_b) > min(max_a, max_b)`).
304///
305/// Collection-pair overlap is decided by folder alone in v1 — a filter
306/// pair that's *obviously* disjoint (same field, different equality
307/// scalars) could safely skip the field-consistency check, but the
308/// disjointness analysis isn't worth the complexity yet. Conservative
309/// over-checking means at worst a spurious schema-load error, which the
310/// author can resolve by aligning the field declarations.
311pub fn validate_schema_consistency(schema: &VaultSchema) -> Result<()> {
312    let entries: Vec<(&String, &CollectionSchema)> = schema.collections.iter().collect();
313
314    // Pairwise checks for shared-field consistency. Skip pairs whose
315    // filters are demonstrably disjoint: even if their folders overlap
316    // (e.g. one folder is an ancestor of the other), the filters mean
317    // no record can match both, so their field declarations don't have
318    // to align. See `filters_demonstrably_disjoint` for the limits of
319    // what "demonstrably" covers.
320    for i in 0..entries.len() {
321        let (name_a, col_a) = entries[i];
322        for entry_b in entries.iter().skip(i + 1) {
323            let (name_b, col_b) = *entry_b;
324            if !folders_overlap(&col_a.folder, &col_b.folder) {
325                continue;
326            }
327            if filters_demonstrably_disjoint(&col_a.filter, &col_b.filter)? {
328                continue;
329            }
330            for (field_name, fs_a) in &col_a.fields {
331                let Some(fs_b) = col_b.fields.get(field_name) else {
332                    continue;
333                };
334                check_field_pair(name_a, col_a, fs_a, name_b, col_b, fs_b, field_name)?;
335            }
336        }
337    }
338
339    // Defaults must satisfy every other folder-overlapping collection
340    // that declares the same field — unless the two collections'
341    // filters are demonstrably disjoint, in which case the default
342    // never lands in a record governed by the other collection.
343    for (col_name, col) in &schema.collections {
344        for (field_name, fs) in &col.fields {
345            let resolved: Option<Value> = if let Some(d) = &fs.default {
346                Some(d.clone())
347            } else if let Some(e) = &fs.default_expr {
348                resolve_default_expr(e).ok()
349            } else {
350                None
351            };
352            let Some(val) = resolved else {
353                continue;
354            };
355
356            for (other_name, other_col) in &schema.collections {
357                if other_name == col_name {
358                    continue;
359                }
360                if !folders_overlap(&col.folder, &other_col.folder) {
361                    continue;
362                }
363                if filters_demonstrably_disjoint(&col.filter, &other_col.filter)? {
364                    continue;
365                }
366                let Some(other_fs) = other_col.fields.get(field_name) else {
367                    continue;
368                };
369                if let Err(why) = default_satisfies(&val, other_fs) {
370                    return Err(VaultdbError::SchemaError(format!(
371                        "collection '{}': default for field '{}' would violate overlapping \
372                         collection '{}' (folder '{}'): {}",
373                        col_name, field_name, other_name, other_col.folder, why
374                    )));
375                }
376            }
377        }
378    }
379
380    Ok(())
381}
382
383/// Conservative filter-disjointness check used to skip cross-checks
384/// between collections that can never co-apply to a single record.
385///
386/// Returns `true` only when at least one *forced* equality constraint
387/// in `a` contradicts a forced equality constraint in `b` on the same
388/// field — e.g. `db-table = movie` in one and `db-table = book` in
389/// the other. "Forced" means the predicate appears under top-level
390/// `And` chains or directly; predicates nested inside `Or` or `Not`
391/// are skipped because they don't have to hold. Anything fancier (range
392/// constraints, inequality, multi-value membership) is treated as
393/// non-disjoint — the field-consistency check then runs and may fire
394/// a spurious error, which is the trade-off we accept until a real
395/// schema needs the smarter analysis.
396fn filters_demonstrably_disjoint(a: &[String], b: &[String]) -> Result<bool> {
397    let constraints_a = parse_forced_equalities(a)?;
398    let constraints_b = parse_forced_equalities(b)?;
399    for (fa, va) in &constraints_a {
400        for (fb, vb) in &constraints_b {
401            if fa == fb && va != vb {
402                return Ok(true);
403            }
404        }
405    }
406    Ok(false)
407}
408
409/// Parse a collection's filter strings (implicitly AND-ed by the
410/// validate / applicable_collections logic) and collect every
411/// equality predicate that MUST hold for the combined filter to
412/// match. Returns `(field_name, value)` pairs.
413fn parse_forced_equalities(filters: &[String]) -> Result<Vec<(String, Value)>> {
414    let mut out = Vec::new();
415    for f in filters {
416        let expr = crate::query::Expr::parse(f)
417            .map_err(|e| VaultdbError::SchemaError(format!("parsing filter '{}': {}", f, e)))?;
418        collect_forced_equalities(&expr, &mut out);
419    }
420    Ok(out)
421}
422
423fn collect_forced_equalities(expr: &crate::query::Expr, out: &mut Vec<(String, Value)>) {
424    use crate::query::{Expr, Predicate};
425    match expr {
426        Expr::Predicate(Predicate::Equals { field, value }) => {
427            out.push((field.clone(), value.clone()));
428        }
429        Expr::And(es) => {
430            for e in es {
431                collect_forced_equalities(e, out);
432            }
433        }
434        // `Or`, `Not`, and other predicates don't force a specific
435        // value on a specific field at the top level — skip them.
436        _ => {}
437    }
438}
439
440fn check_field_pair(
441    name_a: &str,
442    col_a: &CollectionSchema,
443    fs_a: &FieldSchema,
444    name_b: &str,
445    col_b: &CollectionSchema,
446    fs_b: &FieldSchema,
447    field_name: &str,
448) -> Result<()> {
449    if fs_a.field_type != fs_b.field_type {
450        return Err(VaultdbError::SchemaError(format!(
451            "collections '{}' (folder '{}') and '{}' (folder '{}') both declare field '{}' \
452             but with incompatible types '{}' vs '{}' — a single record under these folders \
453             must satisfy both, so the types must match",
454            name_a,
455            col_a.folder,
456            name_b,
457            col_b.folder,
458            field_name,
459            fs_a.field_type,
460            fs_b.field_type
461        )));
462    }
463
464    if !fs_a.enum_values.is_empty() && !fs_b.enum_values.is_empty() {
465        let any_shared = fs_a
466            .enum_values
467            .iter()
468            .any(|v| fs_b.enum_values.iter().any(|w| v == w));
469        if !any_shared {
470            return Err(VaultdbError::SchemaError(format!(
471                "collections '{}' and '{}' declare field '{}' with disjoint enum values \
472                 (folders '{}' and '{}' overlap, so no value can satisfy both)",
473                name_a, name_b, field_name, col_a.folder, col_b.folder
474            )));
475        }
476    }
477
478    let lo = match (fs_a.min, fs_b.min) {
479        (Some(a), Some(b)) => Some(a.max(b)),
480        (Some(a), None) => Some(a),
481        (None, Some(b)) => Some(b),
482        (None, None) => None,
483    };
484    let hi = match (fs_a.max, fs_b.max) {
485        (Some(a), Some(b)) => Some(a.min(b)),
486        (Some(a), None) => Some(a),
487        (None, Some(b)) => Some(b),
488        (None, None) => None,
489    };
490    if let (Some(l), Some(h)) = (lo, hi)
491        && l > h
492    {
493        return Err(VaultdbError::SchemaError(format!(
494            "collections '{}' and '{}' declare field '{}' with disjoint numeric ranges: \
495             effective min={} > max={}",
496            name_a, name_b, field_name, l, h
497        )));
498    }
499
500    Ok(())
501}
502
503/// True if `val` would pass `field_type`/`enum_values` validation against
504/// `fs`. Returns a human-readable reason on failure. Used by the
505/// cross-collection default check; mirrors the same single-collection
506/// rules that `validate_field_defaults` and `validate_record` enforce.
507fn default_satisfies(val: &Value, fs: &FieldSchema) -> std::result::Result<(), String> {
508    let actual = val.type_name();
509    if !type_matches(actual, &fs.field_type) {
510        return Err(format!(
511            "value type '{}' incompatible with field type '{}'",
512            actual, fs.field_type
513        ));
514    }
515    if let Value::String(s) = val {
516        let format_ok = match fs.field_type.as_str() {
517            "wikilink" => is_valid_wikilink(s),
518            "date" => is_valid_date(s),
519            "url" => is_valid_url(s),
520            _ => true,
521        };
522        if !format_ok {
523            return Err(format!("value '{}' is not a valid {}", s, fs.field_type));
524        }
525    }
526    if !fs.enum_values.is_empty() {
527        let display = val.display_value();
528        let m = fs.enum_values.iter().any(|e| match e {
529            Value::String(s) => s == &display,
530            Value::Integer(i) => i.to_string() == display,
531            Value::Float(f) => f.to_string() == display,
532            Value::Bool(b) => b.to_string() == display,
533            _ => false,
534        });
535        if !m {
536            return Err(format!("value '{}' not in declared enum values", display));
537        }
538    }
539    Ok(())
540}
541
542/// True if either folder is `==` the other or an ancestor of it.
543/// Used to decide which collection pairs need cross-checks.
544fn folders_overlap(a: &str, b: &str) -> bool {
545    folder_is_ancestor_or_equal(a, b) || folder_is_ancestor_or_equal(b, a)
546}
547
548/// Serialize a schema to YAML string.
549pub fn schema_to_yaml(schema: &VaultSchema) -> Result<String> {
550    serde_yaml::to_string(schema)
551        .map_err(|e| VaultdbError::SchemaError(format!("rendering schema as YAML: {}", e)))
552}
553
554/// A single validation violation.
555#[derive(Debug)]
556pub struct Violation {
557    pub file: String,
558    pub field: String,
559    pub message: String,
560}
561
562impl std::fmt::Display for Violation {
563    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
564        write!(f, "{}: {} — {}", self.file, self.field, self.message)
565    }
566}
567
568/// Validate a record's fields against a collection schema.
569pub fn validate_record(
570    filename: &str,
571    fields: &BTreeMap<String, Value>,
572    schema: &CollectionSchema,
573) -> Vec<Violation> {
574    let mut violations = Vec::new();
575
576    // Check required fields
577    for req in &schema.required {
578        match fields.get(req) {
579            None | Some(Value::Null) => {
580                violations.push(Violation {
581                    file: filename.to_string(),
582                    field: req.clone(),
583                    message: "required field is missing or null".into(),
584                });
585            }
586            _ => {}
587        }
588    }
589
590    // Check field constraints
591    for (field_name, field_schema) in &schema.fields {
592        let value = match fields.get(field_name) {
593            Some(v) if !matches!(v, Value::Null) => v,
594            _ => continue, // skip absent/null fields (required check handles those)
595        };
596
597        // Type check
598        let actual_type = value.type_name();
599        let expected_type = &field_schema.field_type;
600        if !type_matches(actual_type, expected_type) {
601            violations.push(Violation {
602                file: filename.to_string(),
603                field: field_name.clone(),
604                message: format!("expected type '{}', got '{}'", expected_type, actual_type),
605            });
606        }
607
608        // Enum check
609        if !field_schema.enum_values.is_empty() {
610            let display = value.display_value();
611            let matches_enum = field_schema.enum_values.iter().any(|e| match e {
612                Value::String(s) => s == &display,
613                Value::Integer(i) => i.to_string() == display,
614                Value::Float(f) => f.to_string() == display,
615                Value::Bool(b) => b.to_string() == display,
616                _ => false,
617            });
618            if !matches_enum {
619                violations.push(Violation {
620                    file: filename.to_string(),
621                    field: field_name.clone(),
622                    message: format!(
623                        "value '{}' not in allowed values: {:?}",
624                        display,
625                        field_schema
626                            .enum_values
627                            .iter()
628                            .map(value_display)
629                            .collect::<Vec<_>>()
630                    ),
631                });
632            }
633        }
634
635        // Min/max check for numeric fields
636        if let Some(min) = field_schema.min
637            && let Some(num) = value.as_float()
638            && num < min
639        {
640            violations.push(Violation {
641                file: filename.to_string(),
642                field: field_name.clone(),
643                message: format!("value {} is below minimum {}", num, min),
644            });
645        }
646        if let Some(max) = field_schema.max
647            && let Some(num) = value.as_float()
648            && num > max
649        {
650            violations.push(Violation {
651                file: filename.to_string(),
652                field: field_name.clone(),
653                message: format!("value {} exceeds maximum {}", num, max),
654            });
655        }
656
657        // Format checks for the "string-shaped but constrained" types.
658        // These don't introduce new Value variants — on disk they're
659        // still YAML strings — but validate_record refuses values that
660        // don't match the expected format. Match arms with guards
661        // keep clippy's `collapsible_match` happy.
662        if let Value::String(s) = value {
663            let bad = match expected_type.as_str() {
664                "wikilink" if !is_valid_wikilink(s) => Some(format!(
665                    "value '{}' is not a valid wikilink; expected [[name]], [[name|alias]], [[name#section]], or [[name#section|alias]]",
666                    s
667                )),
668                "date" if !is_valid_date(s) => Some(format!(
669                    "value '{}' is not a valid date; expected YYYY-MM-DD",
670                    s
671                )),
672                "url" if !is_valid_url(s) => Some(format!("value '{}' is not a valid URL", s)),
673                _ => None,
674            };
675            if let Some(message) = bad {
676                violations.push(Violation {
677                    file: filename.to_string(),
678                    field: field_name.clone(),
679                    message,
680                });
681            }
682        }
683    }
684
685    violations
686}
687
688fn value_display(v: &Value) -> String {
689    match v {
690        Value::String(s) => s.clone(),
691        Value::Integer(i) => i.to_string(),
692        Value::Float(f) => f.to_string(),
693        Value::Bool(b) => b.to_string(),
694        Value::Null => "null".to_string(),
695        other => format!("{:?}", other),
696    }
697}
698
699fn type_matches(actual: &str, expected: &str) -> bool {
700    match expected {
701        "string" => actual == "string",
702        "integer" => actual == "integer",
703        "float" => actual == "float" || actual == "integer",
704        "number" => actual == "integer" || actual == "float",
705        "bool" => actual == "bool",
706        "list" => actual == "list",
707        "map" => actual == "map",
708        // Constrained string types: stored as YAML strings on disk,
709        // distinguished from plain string by a format check applied
710        // after the type check in `validate_record`.
711        "wikilink" | "date" | "url" => actual == "string",
712        _ => true, // unknown type — don't enforce
713    }
714}
715
716/// True if `s` is a syntactically valid wikilink: `[[target]]`, with
717/// optional `|alias` and/or `#section`. Target must be non-empty and
718/// must not contain `]]`. Does NOT verify the target exists in the
719/// vault — that requires a `LinkGraph` and is out of scope for v1.
720pub fn is_valid_wikilink(s: &str) -> bool {
721    let inner = match s.strip_prefix("[[").and_then(|x| x.strip_suffix("]]")) {
722        Some(i) => i,
723        None => return false,
724    };
725    // Brackets are the outer delimiters only — anything inside that
726    // contains `[` or `]` is malformed (e.g. `[[a][b]]`).
727    if inner.is_empty() || inner.contains('[') || inner.contains(']') {
728        return false;
729    }
730    // Target is everything up to the first `|` or `#`.
731    let target_end = inner.find(['|', '#']).unwrap_or(inner.len());
732    !inner[..target_end].trim().is_empty()
733}
734
735/// True if `s` parses as a calendar date in `YYYY-MM-DD` form, with the
736/// month and day in valid ranges. Does NOT do leap-year validation —
737/// `2024-02-30` passes today; a stricter check can come in Phase 8 if
738/// the false-negative cost ever materialises.
739pub fn is_valid_date(s: &str) -> bool {
740    let parts: Vec<&str> = s.split('-').collect();
741    if parts.len() != 3 {
742        return false;
743    }
744    if parts[0].len() != 4 || parts[1].len() != 2 || parts[2].len() != 2 {
745        return false;
746    }
747    let year = parts[0].parse::<u32>();
748    let month = parts[1].parse::<u32>();
749    let day = parts[2].parse::<u32>();
750    match (year, month, day) {
751        (Ok(_), Ok(m), Ok(d)) => (1..=12).contains(&m) && (1..=31).contains(&d),
752        _ => false,
753    }
754}
755
756/// True if `s` parses as an absolute URL (i.e. has a scheme like
757/// `https`, `http`, `mailto`, `file`, …). Relative URLs are rejected
758/// on purpose — a vault field declared `type: url` should be navigable
759/// on its own.
760pub fn is_valid_url(s: &str) -> bool {
761    url::Url::parse(s).is_ok()
762}
763
764/// Infer a schema from a set of records.
765pub fn infer_schema(folder_name: &str, records: &[crate::record::Record]) -> CollectionSchema {
766    let mut field_types: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
767    let mut field_values: BTreeMap<String, Vec<String>> = BTreeMap::new();
768    let mut field_count: BTreeMap<String, usize> = BTreeMap::new();
769    let total = records.len();
770
771    for record in records {
772        for (key, value) in &record.fields {
773            let type_name = value.type_name().to_string();
774            *field_types
775                .entry(key.clone())
776                .or_default()
777                .entry(type_name)
778                .or_insert(0) += 1;
779            *field_count.entry(key.clone()).or_insert(0) += 1;
780
781            if !matches!(value, Value::Null | Value::List(_) | Value::Map(_)) {
782                field_values
783                    .entry(key.clone())
784                    .or_default()
785                    .push(value.display_value());
786            }
787        }
788    }
789
790    let mut fields = BTreeMap::new();
791    let mut required = Vec::new();
792
793    for (key, types) in &field_types {
794        // Determine the dominant type
795        let dominant_type = types
796            .iter()
797            .filter(|(t, _)| *t != "null")
798            .max_by_key(|(_, count)| *count)
799            .map(|(t, _)| t.clone())
800            .unwrap_or_else(|| "string".to_string());
801
802        // Check if field is present in all records with non-null values
803        let non_null_count = types
804            .iter()
805            .filter(|(t, _)| *t != "null")
806            .map(|(_, c)| c)
807            .sum::<usize>();
808
809        if non_null_count == total && total > 0 {
810            required.push(key.clone());
811        }
812
813        // Infer enum if there are few unique values
814        let enum_values = if let Some(values) = field_values.get(key) {
815            let mut unique: Vec<String> = values.clone();
816            unique.sort();
817            unique.dedup();
818            if unique.len() <= 10 && unique.len() < values.len() / 2 {
819                unique
820                    .into_iter()
821                    .map(|v| {
822                        // Try to parse as integer
823                        if let Ok(n) = v.parse::<i64>() {
824                            Value::Integer(n)
825                        } else {
826                            Value::String(v)
827                        }
828                    })
829                    .collect()
830            } else {
831                vec![]
832            }
833        } else {
834            vec![]
835        };
836
837        fields.insert(
838            key.clone(),
839            FieldSchema {
840                field_type: dominant_type,
841                enum_values,
842                min: None,
843                max: None,
844                default: None,
845                default_expr: None,
846            },
847        );
848        // `required` is tracked at the collection level (above); kept out
849        // of `FieldSchema` deliberately so there's a single source of truth.
850    }
851
852    CollectionSchema {
853        description: Some(format!("Auto-inferred schema for {}", folder_name)),
854        folder: folder_name.to_string(),
855        filter: vec![],
856        required,
857        fields,
858    }
859}
860
861#[cfg(test)]
862mod tests {
863    use super::*;
864    use crate::record::{Record, Value};
865    use std::path::PathBuf;
866
867    fn make_record(fields: Vec<(&str, Value)>) -> Record {
868        let mut map = BTreeMap::new();
869        for (k, v) in fields {
870            map.insert(k.to_string(), v);
871        }
872        Record {
873            path: PathBuf::from("/vault/notes/test.md"),
874            fields: map,
875            raw_content: None,
876        }
877    }
878
879    #[test]
880    fn validate_required_field_missing() {
881        let schema = CollectionSchema {
882            description: None,
883            folder: "notes".into(),
884            filter: vec![],
885            required: vec!["status".into()],
886            fields: BTreeMap::new(),
887        };
888
889        let record = make_record(vec![("tags", Value::String("x".into()))]);
890        let violations = validate_record("test.md", &record.fields, &schema);
891        assert_eq!(violations.len(), 1);
892        assert!(violations[0].message.contains("required"));
893    }
894
895    #[test]
896    fn validate_type_mismatch() {
897        let mut fields = BTreeMap::new();
898        fields.insert(
899            "year".into(),
900            FieldSchema {
901                field_type: "integer".into(),
902                enum_values: vec![],
903                min: None,
904                max: None,
905                default: None,
906                default_expr: None,
907            },
908        );
909
910        let schema = CollectionSchema {
911            description: None,
912            folder: "notes".into(),
913            filter: vec![],
914            required: vec![],
915            fields,
916        };
917
918        let record = make_record(vec![("year", Value::String("not a number".into()))]);
919        let violations = validate_record("test.md", &record.fields, &schema);
920        assert_eq!(violations.len(), 1);
921        assert!(violations[0].message.contains("type"));
922    }
923
924    #[test]
925    fn validate_enum_violation() {
926        let mut fields = BTreeMap::new();
927        fields.insert(
928            "status".into(),
929            FieldSchema {
930                field_type: "string".into(),
931                enum_values: vec![
932                    Value::String("to-watch".into()),
933                    Value::String("watched".into()),
934                ],
935                min: None,
936                max: None,
937                default: None,
938                default_expr: None,
939            },
940        );
941
942        let schema = CollectionSchema {
943            description: None,
944            folder: "notes".into(),
945            filter: vec![],
946            required: vec![],
947            fields,
948        };
949
950        let record = make_record(vec![("status", Value::String("invalid".into()))]);
951        let violations = validate_record("test.md", &record.fields, &schema);
952        assert_eq!(violations.len(), 1);
953        assert!(violations[0].message.contains("not in allowed"));
954    }
955
956    #[test]
957    fn validate_min_max() {
958        let mut fields = BTreeMap::new();
959        fields.insert(
960            "rating".into(),
961            FieldSchema {
962                field_type: "number".into(),
963                enum_values: vec![],
964                min: Some(1.0),
965                max: Some(10.0),
966                default: None,
967                default_expr: None,
968            },
969        );
970
971        let schema = CollectionSchema {
972            description: None,
973            folder: "notes".into(),
974            filter: vec![],
975            required: vec![],
976            fields,
977        };
978
979        let record = make_record(vec![("rating", Value::Integer(15))]);
980        let violations = validate_record("test.md", &record.fields, &schema);
981        assert_eq!(violations.len(), 1);
982        assert!(violations[0].message.contains("exceeds maximum"));
983    }
984
985    #[test]
986    fn validate_passes_clean_record() {
987        let mut fields = BTreeMap::new();
988        fields.insert(
989            "status".into(),
990            FieldSchema {
991                field_type: "string".into(),
992                enum_values: vec![Value::String("to-watch".into())],
993                min: None,
994                max: None,
995                default: None,
996                default_expr: None,
997            },
998        );
999
1000        let schema = CollectionSchema {
1001            description: None,
1002            folder: "notes".into(),
1003            filter: vec![],
1004            required: vec!["status".into()],
1005            fields,
1006        };
1007
1008        let record = make_record(vec![("status", Value::String("to-watch".into()))]);
1009        let violations = validate_record("test.md", &record.fields, &schema);
1010        assert!(violations.is_empty());
1011    }
1012
1013    #[test]
1014    fn infer_schema_basic() {
1015        let records = vec![
1016            make_record(vec![
1017                ("status", Value::String("active".into())),
1018                ("year", Value::Integer(2020)),
1019            ]),
1020            make_record(vec![
1021                ("status", Value::String("draft".into())),
1022                ("year", Value::Integer(2021)),
1023            ]),
1024        ];
1025
1026        let schema = infer_schema("notes", &records);
1027        assert_eq!(schema.fields.get("status").unwrap().field_type, "string");
1028        assert_eq!(schema.fields.get("year").unwrap().field_type, "integer");
1029        assert!(schema.required.contains(&"status".to_string()));
1030        assert!(schema.required.contains(&"year".to_string()));
1031    }
1032
1033    // ── Phase 1: wikilink / date / url type validation ────────────────
1034
1035    fn schema_with_field(name: &str, field_type: &str) -> CollectionSchema {
1036        let mut fields = BTreeMap::new();
1037        fields.insert(
1038            name.into(),
1039            FieldSchema {
1040                field_type: field_type.into(),
1041                enum_values: vec![],
1042                min: None,
1043                max: None,
1044                default: None,
1045                default_expr: None,
1046            },
1047        );
1048        CollectionSchema {
1049            description: None,
1050            folder: "notes".into(),
1051            filter: vec![],
1052            required: vec![],
1053            fields,
1054        }
1055    }
1056
1057    #[test]
1058    fn wikilink_accepts_plain() {
1059        assert!(is_valid_wikilink("[[name]]"));
1060        assert!(is_valid_wikilink("[[kyoto-university-kyoto-yoshida-KG9l]]"));
1061    }
1062
1063    #[test]
1064    fn wikilink_accepts_alias_and_section() {
1065        assert!(is_valid_wikilink("[[name|alias]]"));
1066        assert!(is_valid_wikilink("[[name#section]]"));
1067        assert!(is_valid_wikilink("[[name#section|alias]]"));
1068    }
1069
1070    #[test]
1071    fn wikilink_rejects_malformed() {
1072        assert!(!is_valid_wikilink("name"));
1073        assert!(!is_valid_wikilink("[name]"));
1074        assert!(!is_valid_wikilink("[[]]"));
1075        assert!(!is_valid_wikilink("[[  ]]"));
1076        assert!(!is_valid_wikilink("[[a][b]]"));
1077    }
1078
1079    #[test]
1080    fn validate_wikilink_field_catches_bad_value() {
1081        let schema = schema_with_field("university", "wikilink");
1082        let record = make_record(vec![("university", Value::String("kyoto".into()))]);
1083        let violations = validate_record("p.md", &record.fields, &schema);
1084        assert_eq!(violations.len(), 1, "{:?}", violations);
1085        assert!(violations[0].message.contains("wikilink"));
1086    }
1087
1088    #[test]
1089    fn validate_wikilink_field_passes_good_value() {
1090        let schema = schema_with_field("university", "wikilink");
1091        let record = make_record(vec![(
1092            "university",
1093            Value::String("[[kyoto-university-KG9l]]".into()),
1094        )]);
1095        let violations = validate_record("p.md", &record.fields, &schema);
1096        assert!(violations.is_empty(), "{:?}", violations);
1097    }
1098
1099    #[test]
1100    fn date_accepts_iso_calendar() {
1101        assert!(is_valid_date("2024-05-13"));
1102        assert!(is_valid_date("1999-01-01"));
1103        assert!(is_valid_date("2030-12-31"));
1104    }
1105
1106    #[test]
1107    fn date_rejects_garbage_and_wrong_components() {
1108        assert!(!is_valid_date("not-a-date"));
1109        assert!(!is_valid_date("2024/05/13"));
1110        assert!(!is_valid_date("2024-13-01")); // bad month
1111        assert!(!is_valid_date("2024-00-15")); // bad month
1112        assert!(!is_valid_date("2024-05-32")); // bad day
1113        assert!(!is_valid_date("24-05-13")); // 2-digit year
1114        assert!(!is_valid_date("2024-5-13")); // 1-digit month
1115    }
1116
1117    #[test]
1118    fn validate_date_field_catches_bad_value() {
1119        let schema = schema_with_field("due", "date");
1120        let record = make_record(vec![("due", Value::String("not-a-date".into()))]);
1121        let violations = validate_record("t.md", &record.fields, &schema);
1122        assert_eq!(violations.len(), 1);
1123        assert!(violations[0].message.contains("date"));
1124    }
1125
1126    #[test]
1127    fn validate_date_field_passes_good_value() {
1128        let schema = schema_with_field("due", "date");
1129        let record = make_record(vec![("due", Value::String("2024-05-13".into()))]);
1130        let violations = validate_record("t.md", &record.fields, &schema);
1131        assert!(violations.is_empty());
1132    }
1133
1134    #[test]
1135    fn url_accepts_common_schemes() {
1136        assert!(is_valid_url("https://example.com"));
1137        assert!(is_valid_url("http://example.com/path?q=1"));
1138        assert!(is_valid_url("mailto:a@b.com"));
1139        assert!(is_valid_url("file:///tmp/x"));
1140    }
1141
1142    #[test]
1143    fn url_rejects_garbage_and_relative() {
1144        assert!(!is_valid_url("not a url"));
1145        assert!(!is_valid_url("/relative/path"));
1146        assert!(!is_valid_url("example.com")); // no scheme
1147    }
1148
1149    #[test]
1150    fn validate_url_field_catches_bad_value() {
1151        let schema = schema_with_field("homepage", "url");
1152        let record = make_record(vec![("homepage", Value::String("example.com".into()))]);
1153        let violations = validate_record("p.md", &record.fields, &schema);
1154        assert_eq!(violations.len(), 1);
1155        assert!(violations[0].message.contains("URL"));
1156    }
1157
1158    #[test]
1159    fn validate_url_field_passes_good_value() {
1160        let schema = schema_with_field("homepage", "url");
1161        let record = make_record(vec![(
1162            "homepage",
1163            Value::String("https://example.com".into()),
1164        )]);
1165        let violations = validate_record("p.md", &record.fields, &schema);
1166        assert!(violations.is_empty());
1167    }
1168
1169    #[test]
1170    fn constrained_type_still_requires_string_actual() {
1171        // A wikilink-typed field receiving an integer should fail the
1172        // type check, not just the wikilink-format check.
1173        let schema = schema_with_field("university", "wikilink");
1174        let record = make_record(vec![("university", Value::Integer(42))]);
1175        let violations = validate_record("p.md", &record.fields, &schema);
1176        assert!(violations.iter().any(|v| v.message.contains("type")));
1177    }
1178
1179    // ── Phase 2: default / default_expr validation at load time ────────
1180
1181    fn schema_with_defaulted_field(
1182        name: &str,
1183        field_type: &str,
1184        default: Option<Value>,
1185        default_expr: Option<String>,
1186        enum_values: Vec<Value>,
1187    ) -> VaultSchema {
1188        let mut fields = BTreeMap::new();
1189        fields.insert(
1190            name.into(),
1191            FieldSchema {
1192                field_type: field_type.into(),
1193                enum_values,
1194                min: None,
1195                max: None,
1196                default,
1197                default_expr,
1198            },
1199        );
1200        VaultSchema {
1201            collections: BTreeMap::from([(
1202                "movies".to_string(),
1203                CollectionSchema {
1204                    description: None,
1205                    folder: "Notes/movie".into(),
1206                    filter: vec![],
1207                    required: vec![],
1208                    fields,
1209                },
1210            )]),
1211        }
1212    }
1213
1214    #[test]
1215    fn default_literal_matching_type_passes() {
1216        let s = schema_with_defaulted_field(
1217            "year",
1218            "integer",
1219            Some(Value::Integer(2024)),
1220            None,
1221            vec![],
1222        );
1223        assert!(validate_schema_defaults(&s).is_ok());
1224    }
1225
1226    #[test]
1227    fn default_literal_wrong_type_rejected() {
1228        let s = schema_with_defaulted_field(
1229            "year",
1230            "integer",
1231            Some(Value::String("nope".into())),
1232            None,
1233            vec![],
1234        );
1235        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1236        assert!(err.contains("incompatible"), "got: {}", err);
1237        assert!(err.contains("year"));
1238    }
1239
1240    #[test]
1241    fn default_literal_outside_enum_rejected() {
1242        let s = schema_with_defaulted_field(
1243            "status",
1244            "string",
1245            Some(Value::String("invalid".into())),
1246            None,
1247            vec![
1248                Value::String("to-watch".into()),
1249                Value::String("watched".into()),
1250            ],
1251        );
1252        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1253        assert!(err.contains("enum"), "got: {}", err);
1254    }
1255
1256    #[test]
1257    fn default_literal_inside_enum_passes() {
1258        let s = schema_with_defaulted_field(
1259            "status",
1260            "string",
1261            Some(Value::String("to-watch".into())),
1262            None,
1263            vec![
1264                Value::String("to-watch".into()),
1265                Value::String("watched".into()),
1266            ],
1267        );
1268        assert!(validate_schema_defaults(&s).is_ok());
1269    }
1270
1271    #[test]
1272    fn default_expr_known_keyword_passes() {
1273        for expr in DEFAULT_EXPRS {
1274            let s =
1275                schema_with_defaulted_field("due", "date", None, Some(expr.to_string()), vec![]);
1276            assert!(
1277                validate_schema_defaults(&s).is_ok(),
1278                "default_expr '{}' should be valid",
1279                expr
1280            );
1281        }
1282    }
1283
1284    #[test]
1285    fn default_expr_unknown_keyword_rejected() {
1286        let s = schema_with_defaulted_field("due", "date", None, Some("tomorrow".into()), vec![]);
1287        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1288        assert!(err.contains("default_expr"), "got: {}", err);
1289        assert!(err.contains("tomorrow"));
1290    }
1291
1292    #[test]
1293    fn default_and_default_expr_mutually_exclusive() {
1294        let s = schema_with_defaulted_field(
1295            "due",
1296            "date",
1297            Some(Value::String("2024-05-13".into())),
1298            Some("today".into()),
1299            vec![],
1300        );
1301        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1302        assert!(err.contains("mutually exclusive"), "got: {}", err);
1303    }
1304
1305    #[test]
1306    fn default_for_wikilink_must_be_well_formed() {
1307        let s = schema_with_defaulted_field(
1308            "university",
1309            "wikilink",
1310            Some(Value::String("kyoto".into())),
1311            None,
1312            vec![],
1313        );
1314        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1315        assert!(err.contains("wikilink"), "got: {}", err);
1316
1317        // And the right shape passes.
1318        let s = schema_with_defaulted_field(
1319            "university",
1320            "wikilink",
1321            Some(Value::String("[[kyoto-university-KG9l]]".into())),
1322            None,
1323            vec![],
1324        );
1325        assert!(validate_schema_defaults(&s).is_ok());
1326    }
1327
1328    #[test]
1329    fn default_for_date_must_be_well_formed() {
1330        let s = schema_with_defaulted_field(
1331            "due",
1332            "date",
1333            Some(Value::String("2024-99-99".into())),
1334            None,
1335            vec![],
1336        );
1337        let err = validate_schema_defaults(&s).unwrap_err().to_string();
1338        assert!(err.contains("date"), "got: {}", err);
1339    }
1340
1341    #[test]
1342    fn load_schema_runs_default_validation() {
1343        // End-to-end: write a YAML with a bad default to disk, load it,
1344        // and verify we get a SchemaError pointing at the field.
1345        use std::io::Write;
1346        let mut tmp = tempfile::NamedTempFile::new().unwrap();
1347        writeln!(
1348            tmp,
1349            r#"
1350collections:
1351  movies:
1352    folder: Notes/movie
1353    fields:
1354      year:
1355        type: integer
1356        default: "not an integer"
1357"#
1358        )
1359        .unwrap();
1360        let err = load_schema(tmp.path()).unwrap_err().to_string();
1361        assert!(err.contains("year"), "got: {}", err);
1362        assert!(err.contains("incompatible"), "got: {}", err);
1363    }
1364
1365    // ── Cross-collection consistency (Tier 1 + Tier 2) ────────────────────
1366
1367    fn fs_basic(field_type: &str) -> FieldSchema {
1368        FieldSchema {
1369            field_type: field_type.into(),
1370            enum_values: vec![],
1371            min: None,
1372            max: None,
1373            default: None,
1374            default_expr: None,
1375        }
1376    }
1377
1378    fn col(folder: &str, fields: Vec<(&str, FieldSchema)>) -> CollectionSchema {
1379        let mut m = BTreeMap::new();
1380        for (k, v) in fields {
1381            m.insert(k.into(), v);
1382        }
1383        CollectionSchema {
1384            description: None,
1385            folder: folder.into(),
1386            filter: vec![],
1387            required: vec![],
1388            fields: m,
1389        }
1390    }
1391
1392    fn schema_of(pairs: Vec<(&str, CollectionSchema)>) -> VaultSchema {
1393        let mut m = BTreeMap::new();
1394        for (k, v) in pairs {
1395            m.insert(k.into(), v);
1396        }
1397        VaultSchema { collections: m }
1398    }
1399
1400    #[test]
1401    fn consistency_rejects_conflicting_field_types() {
1402        // Notes folder is ancestor of Notes/movie; both declare `tags`
1403        // but with incompatible types.
1404        let s = schema_of(vec![
1405            ("Notes", col("Notes", vec![("tags", fs_basic("list"))])),
1406            (
1407                "movies",
1408                col("Notes/movie", vec![("tags", fs_basic("string"))]),
1409            ),
1410        ]);
1411        let err = validate_schema_consistency(&s).unwrap_err().to_string();
1412        assert!(err.contains("tags"), "got: {}", err);
1413        assert!(err.contains("incompatible"), "got: {}", err);
1414    }
1415
1416    #[test]
1417    fn consistency_allows_non_overlapping_folders_with_different_types() {
1418        // Notes/movie and Notes/book are siblings — neither is ancestor
1419        // of the other, so a single record can't be in both. The
1420        // type-conflict check must skip this pair.
1421        let s = schema_of(vec![
1422            (
1423                "movies",
1424                col("Notes/movie", vec![("rating", fs_basic("string"))]),
1425            ),
1426            (
1427                "games",
1428                col("Notes/game", vec![("rating", fs_basic("integer"))]),
1429            ),
1430        ]);
1431        validate_schema_consistency(&s).unwrap();
1432    }
1433
1434    #[test]
1435    fn consistency_allows_enum_narrowing() {
1436        // Subset is the documented "narrowing" pattern (Notes.db-table
1437        // declares all valid values; movies.db-table narrows to one).
1438        let mut catchall = fs_basic("string");
1439        catchall.enum_values = vec![Value::String("movie".into()), Value::String("book".into())];
1440        let mut narrow = fs_basic("string");
1441        narrow.enum_values = vec![Value::String("movie".into())];
1442
1443        let s = schema_of(vec![
1444            ("Notes", col("Notes", vec![("db-table", catchall)])),
1445            ("movies", col("Notes/movie", vec![("db-table", narrow)])),
1446        ]);
1447        validate_schema_consistency(&s).unwrap();
1448    }
1449
1450    #[test]
1451    fn consistency_rejects_disjoint_enums() {
1452        let mut a = fs_basic("string");
1453        a.enum_values = vec![Value::String("movie".into())];
1454        let mut b = fs_basic("string");
1455        b.enum_values = vec![Value::String("book".into())];
1456
1457        let s = schema_of(vec![
1458            ("Notes", col("Notes", vec![("db-table", a)])),
1459            ("movies", col("Notes/movie", vec![("db-table", b)])),
1460        ]);
1461        let err = validate_schema_consistency(&s).unwrap_err().to_string();
1462        assert!(err.contains("disjoint enum"), "got: {}", err);
1463    }
1464
1465    #[test]
1466    fn consistency_rejects_disjoint_ranges() {
1467        let mut a = fs_basic("integer");
1468        a.min = Some(2000.0);
1469        a.max = Some(3000.0);
1470        let mut b = fs_basic("integer");
1471        b.min = Some(1000.0);
1472        b.max = Some(1500.0);
1473        let s = schema_of(vec![
1474            ("Notes", col("Notes", vec![("year", a)])),
1475            ("movies", col("Notes/movie", vec![("year", b)])),
1476        ]);
1477        let err = validate_schema_consistency(&s).unwrap_err().to_string();
1478        assert!(err.contains("disjoint numeric ranges"), "got: {}", err);
1479    }
1480
1481    #[test]
1482    fn consistency_rejects_default_violating_overlapping_collection() {
1483        // Movies declares status default = "to-watch", but Notes
1484        // catch-all narrows status enum to ["a", "b"]. The default
1485        // would silently break every movie create.
1486        let mut catchall = fs_basic("string");
1487        catchall.enum_values = vec![Value::String("a".into()), Value::String("b".into())];
1488        let mut movie = fs_basic("string");
1489        movie.enum_values = vec![
1490            Value::String("a".into()),
1491            Value::String("b".into()),
1492            Value::String("to-watch".into()),
1493        ];
1494        movie.default = Some(Value::String("to-watch".into()));
1495
1496        let s = schema_of(vec![
1497            ("Notes", col("Notes", vec![("status", catchall)])),
1498            ("movies", col("Notes/movie", vec![("status", movie)])),
1499        ]);
1500        let err = validate_schema_consistency(&s).unwrap_err().to_string();
1501        assert!(err.contains("default"), "got: {}", err);
1502        assert!(err.contains("status"), "got: {}", err);
1503    }
1504
1505    #[test]
1506    fn consistency_skips_check_when_filters_are_disjoint() {
1507        // Real-world case from the user's vault: two collections whose
1508        // folders overlap (Notes is ancestor of Notes/archive), but
1509        // whose filters are mutually exclusive equality predicates on
1510        // the same field (`db-table = index` vs `db-table = archive`).
1511        // No record can satisfy both filters at once, so the
1512        // disjoint-enums on `db-table` is *not* an error — it's the
1513        // pattern that makes the filter scheme work.
1514        let mut indexes_db = fs_basic("string");
1515        indexes_db.enum_values = vec![Value::String("index".into())];
1516        let mut archive_db = fs_basic("string");
1517        archive_db.enum_values = vec![Value::String("archive".into())];
1518
1519        let s = schema_of(vec![
1520            (
1521                "indexes",
1522                CollectionSchema {
1523                    description: None,
1524                    folder: "Notes".into(),
1525                    filter: vec!["db-table = index".into()],
1526                    required: vec![],
1527                    fields: {
1528                        let mut m = BTreeMap::new();
1529                        m.insert("db-table".into(), indexes_db);
1530                        m
1531                    },
1532                },
1533            ),
1534            (
1535                "archive",
1536                CollectionSchema {
1537                    description: None,
1538                    folder: "Notes/archive".into(),
1539                    filter: vec!["db-table = archive".into()],
1540                    required: vec![],
1541                    fields: {
1542                        let mut m = BTreeMap::new();
1543                        m.insert("db-table".into(), archive_db);
1544                        m
1545                    },
1546                },
1547            ),
1548        ]);
1549        validate_schema_consistency(&s).unwrap();
1550    }
1551
1552    #[test]
1553    fn consistency_accepts_default_compatible_with_overlapping_collection() {
1554        // Default = "to-watch" satisfies BOTH collections' enums.
1555        let mut catchall = fs_basic("string");
1556        catchall.enum_values = vec![
1557            Value::String("to-watch".into()),
1558            Value::String("watched".into()),
1559        ];
1560        let mut movie = fs_basic("string");
1561        movie.enum_values = vec![
1562            Value::String("to-watch".into()),
1563            Value::String("watched".into()),
1564        ];
1565        movie.default = Some(Value::String("to-watch".into()));
1566
1567        let s = schema_of(vec![
1568            ("Notes", col("Notes", vec![("status", catchall)])),
1569            ("movies", col("Notes/movie", vec![("status", movie)])),
1570        ]);
1571        validate_schema_consistency(&s).unwrap();
1572    }
1573}