Skip to main content

dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_norway::{Mapping, Value};
18
19/// The two canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 2] = ["sources", "records"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_norway::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `meta-type` — records-only; the epistemic class `fact`/`operational`/
81    /// `conclusion`. Absent ⇒ `fact` (the effective default is applied by the
82    /// index/query layer for record-layer files; sources carry none).
83    pub meta_type: Option<String>,
84    /// `id` — optional; derived from the file path when absent.
85    pub id: Option<String>,
86    /// `created` — RFC3339; required and auto-set on content-file create.
87    pub created: Option<DateTime<FixedOffset>>,
88    /// `updated` — RFC3339; required and auto-maintained on content files.
89    pub updated: Option<DateTime<FixedOffset>>,
90    /// `summary` — the one-line catalog line; required on every content file.
91    pub summary: Option<String>,
92    /// `status` — optional lifecycle state.
93    pub status: Option<String>,
94    /// `tags` — optional flat list of short scalar labels.
95    pub tags: Vec<String>,
96    /// All other frontmatter keys (type-specific + custom), preserved verbatim
97    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
98    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
99    /// [`WikiLink`]s.
100    pub extra: BTreeMap<String, Value>,
101}
102
103impl Frontmatter {
104    /// Parse a YAML frontmatter block (the text between the opening and closing
105    /// `---` fences, exclusive) into a [`Frontmatter`].
106    ///
107    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
108    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
109    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
110        // An empty (or whitespace-only) frontmatter block is a valid, empty
111        // mapping — not a YAML error.
112        let value: Value = if yaml.trim().is_empty() {
113            Value::Mapping(Mapping::new())
114        } else {
115            serde_norway::from_str(yaml).map_err(|source| ParseError::MalformedYaml {
116                file: file.to_path_buf(),
117                source,
118            })?
119        };
120
121        // Top-level frontmatter must be a mapping. A scalar or sequence at the
122        // top level is malformed for our purposes; surface it as such.
123        let map = match value {
124            Value::Mapping(m) => m,
125            Value::Null => Mapping::new(),
126            other => {
127                // serde_norway::Error has no public constructor, so let the
128                // deserializer decide: a value that coerces to a Mapping (e.g. a
129                // YAML-tagged mapping `!tag\n k: v`, where the tag is ambient) is
130                // accepted as that mapping; a genuine scalar or sequence top
131                // level fails to coerce and IS the malformed case. (Using a
132                // match here, not `expect_err`, avoids a panic on the
133                // tagged-mapping case, which deserializes to a Mapping just
134                // fine.)
135                match serde_norway::from_value::<Mapping>(other) {
136                    Ok(m) => m,
137                    Err(source) => {
138                        return Err(ParseError::MalformedYaml {
139                            file: file.to_path_buf(),
140                            source,
141                        });
142                    }
143                }
144            }
145        };
146
147        let mut fm = Frontmatter::default();
148        for (k, v) in map {
149            let key = match k.as_str() {
150                Some(s) => s.to_string(),
151                // Non-string keys (`2026:`, `true:`, `3.14:`) are unusual but
152                // valid YAML; per SPEC § "Unknown fields pass through" they must
153                // not be corrupted on re-emit. Stringify them through the YAML
154                // scalar emitter — `2026`, `true`, `3.14` — NOT the Rust `Debug`
155                // formatter (which produced `Number(2026)`, `Bool(true)`, …), so
156                // the key text survives. `extra` is `String`-keyed, so on the
157                // write side the key re-emits as a quoted-string key carrying that
158                // text (e.g. `'2026':`) — the type narrows from number to string,
159                // but the data is no longer destroyed and ordinary string keys are
160                // wholly unaffected.
161                None => yaml_scalar_key(&k),
162            };
163            match key.as_str() {
164                // Coerce scalar values rather than `v.as_str()` (which is None
165                // for Number/Bool/Null). A bare scalar that YAML reads as a
166                // non-string — `summary: 2026`, `id: 100`, `status: 0` — would
167                // otherwise be set to None AND dropped (it is a matched arm, so
168                // the raw value never reaches `extra`), and `to_yaml` then omits
169                // the None field, so `dbmd format` (read_file -> write_file)
170                // silently deletes the line from disk. `scalar_string` mirrors
171                // the coercion `validate`/`store` already apply to these fields,
172                // so a numeric/bool-looking scalar is preserved as its string
173                // form and round-trips instead of being destroyed.
174                //
175                // A sequence/mapping value on a universal key (`status: [a, b]`,
176                // a nested-mapping `summary:`) is NOT a valid scalar; rather than
177                // let the matched arm consume-and-drop it (silent data loss on
178                // the next re-emit), `scalar_string` returns None and we fall
179                // through to preserving the raw value in `extra` so `to_yaml`
180                // re-emits it verbatim. The universal accessors stay None (the
181                // value was never a valid scalar for that field), but the
182                // operator's bytes are never destroyed.
183                "type" => match scalar_string(&v) {
184                    Some(s) => fm.type_ = Some(s),
185                    None => {
186                        fm.extra.insert(key, v);
187                    }
188                },
189                "meta-type" => match scalar_string(&v) {
190                    Some(s) => fm.meta_type = Some(s),
191                    None => {
192                        fm.extra.insert(key, v);
193                    }
194                },
195                "id" => match scalar_string(&v) {
196                    Some(s) => fm.id = Some(s),
197                    None => {
198                        fm.extra.insert(key, v);
199                    }
200                },
201                "created" => fm.created = parse_timestamp(&v, "created", file)?,
202                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
203                "summary" => match scalar_string(&v) {
204                    Some(s) => fm.summary = Some(s),
205                    None => {
206                        fm.extra.insert(key, v);
207                    }
208                },
209                "status" => match scalar_string(&v) {
210                    Some(s) => fm.status = Some(s),
211                    None => {
212                        fm.extra.insert(key, v);
213                    }
214                },
215                "tags" => match parse_tags_preserving(&v) {
216                    Ok(tags) => fm.tags = tags,
217                    // A `tags` value with a non-scalar item (`tags: [[vip]]`,
218                    // `tags: [a, [b]]`) is preserved verbatim in `extra` rather
219                    // than silently filtered down / erased on re-emit. The typed
220                    // `tags` vec stays empty (no valid scalar list was present),
221                    // so `to_yaml` won't ALSO emit a `tags:` from the vec.
222                    Err(raw) => {
223                        fm.extra.insert(key, raw);
224                    }
225                },
226                _ => {
227                    fm.extra.insert(key, v);
228                }
229            }
230        }
231        Ok(fm)
232    }
233
234    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
235    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
236    pub fn to_yaml(&self) -> String {
237        // Build an order-preserving mapping in canonical key order:
238        //   type, meta-type, id, created, updated, summary  (universal head)
239        //   <type-specific extra, BTreeMap-sorted>
240        //   status, tags                          (universal tail)
241        // serde_norway::Mapping preserves insertion order, so one serialize call
242        // emits the block in exactly this order with correct YAML quoting.
243        let mut map = Mapping::new();
244
245        if let Some(t) = &self.type_ {
246            map.insert(Value::String("type".into()), Value::String(t.clone()));
247        }
248        if let Some(mt) = &self.meta_type {
249            map.insert(Value::String("meta-type".into()), Value::String(mt.clone()));
250        }
251        if let Some(id) = &self.id {
252            map.insert(Value::String("id".into()), Value::String(id.clone()));
253        }
254        if let Some(created) = &self.created {
255            map.insert(
256                Value::String("created".into()),
257                Value::String(created.to_rfc3339()),
258            );
259        }
260        if let Some(updated) = &self.updated {
261            map.insert(
262                Value::String("updated".into()),
263                Value::String(updated.to_rfc3339()),
264            );
265        }
266        if let Some(summary) = &self.summary {
267            map.insert(
268                Value::String("summary".into()),
269                Value::String(summary.clone()),
270            );
271        }
272
273        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
274        // is canonicalized so a wiki-link round-trips to the form the writer and
275        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
276        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
277        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
278        // block sequence `- - x` that a verbatim re-emit would produce and that
279        // destroys the link. See [`canonicalize_extra_value`].
280        for (k, v) in &self.extra {
281            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
282        }
283
284        if let Some(status) = &self.status {
285            map.insert(
286                Value::String("status".into()),
287                Value::String(status.clone()),
288            );
289        }
290        if !self.tags.is_empty() {
291            map.insert(
292                Value::String("tags".into()),
293                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
294            );
295        }
296
297        if map.is_empty() {
298            return String::new();
299        }
300        serde_norway::to_string(&Value::Mapping(map)).unwrap_or_default()
301    }
302
303    /// True if the file is content (under `sources/` or `records/`)
304    /// and not an `index.md`. Used by validate to decide which files require a
305    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
306    pub fn is_content_file(path: &Path) -> bool {
307        // index.md is a meta file at every level, never content.
308        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
309            return false;
310        }
311        // Content iff some path component is one of the two layer dirs. This
312        // works for both store-relative (`sources/emails/x.md`) and absolute
313        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
314        // root, under no layer, so they fall through to false.
315        path.components().any(|c| {
316            c.as_os_str()
317                .to_str()
318                .is_some_and(|s| LAYER_DIRS.contains(&s))
319        })
320    }
321
322    /// Resolve the file's effective `id`: the explicit `id` field if present,
323    /// otherwise derived from the store-relative path (filename without `.md`).
324    pub fn effective_id(&self, store_relative_path: &Path) -> String {
325        if let Some(id) = &self.id {
326            if !id.is_empty() {
327                return id.clone();
328            }
329        }
330        // Derived id = filename without the `.md` extension.
331        store_relative_path
332            .file_stem()
333            .and_then(|s| s.to_str())
334            .unwrap_or_default()
335            .to_string()
336    }
337
338    /// The effective `meta-type` for a record: the declared value, or `fact`
339    /// when absent. Records only — sources carry no meta-type; callers apply
340    /// this only to record-layer files.
341    pub fn effective_meta_type(&self) -> &str {
342        self.meta_type.as_deref().unwrap_or("fact")
343    }
344
345    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
346    /// typed fields first and then [`extra`](Frontmatter::extra).
347    pub fn get(&self, key: &str) -> Option<Value> {
348        match key {
349            "type" => self.type_.clone().map(Value::String),
350            "meta-type" => self.meta_type.clone().map(Value::String),
351            "id" => self.id.clone().map(Value::String),
352            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
353            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
354            "summary" => self.summary.clone().map(Value::String),
355            "status" => self.status.clone().map(Value::String),
356            "tags" => {
357                if self.tags.is_empty() {
358                    None
359                } else {
360                    Some(Value::Sequence(
361                        self.tags.iter().cloned().map(Value::String).collect(),
362                    ))
363                }
364            }
365            _ => self.extra.get(key).cloned(),
366        }
367    }
368
369    /// Set a single frontmatter key from a string value, routing universal-
370    /// contract keys to their typed fields and everything else to
371    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
372    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
373        match key {
374            "type" => self.type_ = Some(value.to_string()),
375            "meta-type" => self.meta_type = Some(value.to_string()),
376            "id" => self.id = Some(value.to_string()),
377            "created" => {
378                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
379            }
380            "updated" => {
381                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
382            }
383            "summary" => self.summary = Some(value.to_string()),
384            "status" => self.status = Some(value.to_string()),
385            "tags" => {
386                // Accept either a YAML flow list (`[a, b]`) or a single scalar
387                // tag. Anything that parses to a sequence becomes the tag list;
388                // otherwise the whole string is one tag.
389                self.tags = match serde_norway::from_str::<Value>(value) {
390                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
391                    _ => vec![value.to_string()],
392                };
393            }
394            _ => {
395                // A custom / type-specific field. The value is a scalar string by
396                // default, but the spec's list-valued link fields (e.g.
397                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
398                // block sequence of quoted wiki-links — never the flow-form string
399                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
400                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
401                // sequence whose every item is a clean single wiki-link, store the
402                // canonical sequence so `to_yaml` emits block form. Everything else
403                // — plain text, and a single inline `[[x]]` (which YAML reads as a
404                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
405                // verbatim scalar string, preserving the prior behavior.
406                let stored = parse_link_list_value(value)
407                    .unwrap_or_else(|| Value::String(value.to_string()));
408                self.extra.insert(key.to_string(), stored);
409            }
410        }
411        Ok(())
412    }
413
414    /// Extract every frontmatter field whose value is a wiki-link (scalar
415    /// inline form or a block-sequence list), pairing each with its key. The
416    /// validate engine checks these against `(link)` schema annotations.
417    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
418        let mut out = Vec::new();
419        // `summary` may carry navigational wiki-links (spec encourages it).
420        if let Some(summary) = &self.summary {
421            for link in extract_wiki_links(summary, Path::new("")) {
422                out.push(("summary".to_string(), link));
423            }
424        }
425        // Every type-specific / custom field: a scalar wiki-link or a list of
426        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
427        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
428        for (key, value) in &self.extra {
429            for link in links_in_field_value(value) {
430                out.push((key.clone(), link));
431            }
432        }
433        out
434    }
435}
436
437/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
438///
439/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
440/// flags whether it's a full store-relative path (the doctrine) versus a
441/// short-form (a validation error).
442#[derive(Debug, Clone, PartialEq, Eq)]
443pub struct WikiLink {
444    /// The link target as written, without the `[[ ]]` and without `|display`.
445    pub target: String,
446    /// The optional `|display` text override.
447    pub display: Option<String>,
448    /// True when `target` is a full store-relative path (contains a `/` and
449    /// resolves under a known layer); false for short-form targets like
450    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
451    pub is_full_path: bool,
452    /// True when `target` carries a trailing `.md` extension — validate warns
453    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
454    pub has_md_extension: bool,
455    /// Where the link appears: `(file, line, col)`, 1-based line and column.
456    pub location: (PathBuf, u32, u32),
457}
458
459/// A standard markdown link `[text](url)` — an external reference, kept in a
460/// stream separate from [`WikiLink`] so external targets are visible to the
461/// toolkit without being conflated with in-store edges. Not graph-validated.
462#[derive(Debug, Clone, PartialEq, Eq)]
463pub struct MarkdownLink {
464    /// The link text inside `[ ]`.
465    pub text: String,
466    /// The URL or path inside `( )`.
467    pub url: String,
468    /// Where the link appears: `(file, line, col)`, 1-based.
469    pub location: (PathBuf, u32, u32),
470}
471
472/// A `##`/`###` section of a markdown body: the heading text plus the byte
473/// slice of the body it spans (heading line through the line before the next
474/// heading of equal-or-shallower depth).
475#[derive(Debug, Clone, PartialEq, Eq)]
476pub struct Section {
477    /// The heading text (without the leading `#`s).
478    pub heading: String,
479    /// Heading depth (number of leading `#`s).
480    pub level: u8,
481    /// The 1-based line where the heading appears.
482    pub line: u32,
483    /// The section body, from the heading line to the next sibling-or-shallower
484    /// heading (exclusive), as a slice of the original body.
485    pub body: String,
486}
487
488/// The parsed structured content of a store's `DB.md` config file.
489///
490/// All four parts are optional in the source; absent parts fall back to spec
491/// defaults. Produced by [`parse_db_md`].
492#[derive(Debug, Clone, Default, PartialEq)]
493pub struct Config {
494    /// Body of the `## Agent instructions` section — free-form prose passed to
495    /// the agent's system prompt.
496    pub agent_instructions: Option<String>,
497    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
498    /// refuses to write (`POLICY_FROZEN_PAGE`).
499    pub frozen_pages: Vec<PathBuf>,
500    /// `## Policies` → `### Ignored types`: type names the curator never
501    /// synthesizes (still readable as ambient context).
502    pub ignored_types: Vec<String>,
503    /// `## Schemas` → one entry per `### <type>` sub-section.
504    pub schemas: BTreeMap<String, Schema>,
505}
506
507impl Config {
508    /// The `### Frozen pages` entry that matches a store-relative `target`, if
509    /// any. The **single** frozen-page matcher every write surface must funnel
510    /// through so the policy is enforced identically on `write` / `fm set` /
511    /// `fm init` / `link` / `rename` / `format`.
512    ///
513    /// Comparison is normalized so a policy line and a write target match
514    /// regardless of incidental spelling differences:
515    /// - `/` path separators on every OS,
516    /// - a single leading `./` dropped,
517    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
518    ///   frozen entries verbatim, so an operator who writes the natural
519    ///   extensionless spelling (`records/decisions/q1`) must protect the file
520    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
521    ///
522    /// Returns the matched config entry verbatim (its original spelling) so the
523    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
524    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
525        let want = normalize_frozen_path(target);
526        self.frozen_pages
527            .iter()
528            .find(|frozen| {
529                let pat = normalize_frozen_path(frozen);
530                // A literal entry matches by exact normalized equality; an entry
531                // carrying a `*`/`**` glob matches by segment-wise glob so a
532                // pattern like `records/decisions/*` actually protects the
533                // concrete files under it instead of silently failing open.
534                if pat.contains('*') {
535                    frozen_glob_matches(&pat, &want)
536                } else {
537                    pat == want
538                }
539            })
540            .cloned()
541    }
542
543    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
544    /// over [`Config::frozen_match`] for callers that only need presence.
545    pub fn is_frozen(&self, target: &Path) -> bool {
546        self.frozen_match(target).is_some()
547    }
548}
549
550/// Normalize a path for frozen-page comparison: `/` separators, a leading `./`
551/// or `/` dropped, and a trailing `.md` dropped. Both the policy entry and the
552/// write target pass through this before equality/glob, so the match is
553/// separator-, `./`-, leading-`/`-, and `.md`-insensitive. Without the leading
554/// `/` drop, an operator who wrote `/records/decisions/q1.md` normalized to a
555/// path that never equals the target's `records/decisions/q1`, silently failing
556/// the freeze OPEN.
557fn normalize_frozen_path(p: &Path) -> String {
558    use std::path::Component;
559    // Keep only the `Normal` path segments, dropping `RootDir`/`Prefix` (a
560    // leading `/` or drive prefix) and `CurDir` (`.`). This is what makes a
561    // leading-slash entry (`/records/decisions/q1.md`) normalize to the same
562    // `records/decisions/q1` as the store-relative target, instead of the
563    // doubled-`//` prefix `Path::components` + naive join produced — which never
564    // equalled the target and silently failed the freeze OPEN.
565    let unix: String = p
566        .components()
567        .filter_map(|c| match c {
568            Component::Normal(s) => s.to_str(),
569            _ => None,
570        })
571        .collect::<Vec<_>>()
572        .join("/");
573    unix.strip_suffix(".md").unwrap_or(&unix).to_string()
574}
575
576/// Match a normalized frozen-page glob `pat` against a normalized target `path`,
577/// segment by segment. `*` matches any run of characters *within a single path
578/// segment* (never crossing `/`); `**` as a whole segment matches zero or more
579/// whole segments. Both sides are already `normalize_frozen_path`-normalized, so
580/// this only deals with `/`-joined segment text. Keeps the substrate dependency-
581/// free (no glob crate) while making `records/decisions/*` actually freeze the
582/// files beneath it instead of failing open.
583fn frozen_glob_matches(pat: &str, path: &str) -> bool {
584    let pat_segs: Vec<&str> = pat.split('/').collect();
585    let path_segs: Vec<&str> = path.split('/').collect();
586    glob_segments(&pat_segs, &path_segs)
587}
588
589/// Recursive segment matcher for [`frozen_glob_matches`]. `**` consumes any
590/// number of path segments; every other pattern segment must match exactly one
591/// path segment (with `*` wildcards inside it).
592fn glob_segments(pat: &[&str], path: &[&str]) -> bool {
593    match pat.split_first() {
594        None => path.is_empty(),
595        Some((&"**", rest_pat)) => {
596            // `**` matches zero segments here, or one-or-more by consuming a path
597            // segment and recursing on the same `**`.
598            if glob_segments(rest_pat, path) {
599                return true;
600            }
601            !path.is_empty() && glob_segments(pat, &path[1..])
602        }
603        Some((&first_pat, rest_pat)) => match path.split_first() {
604            Some((&first_path, rest_path)) => {
605                glob_segment_text(first_pat, first_path) && glob_segments(rest_pat, rest_path)
606            }
607            None => false,
608        },
609    }
610}
611
612/// Match a single glob segment against a single path segment. `*` matches any
613/// run of characters within the segment; all other characters are literal.
614fn glob_segment_text(pat: &str, seg: &str) -> bool {
615    if !pat.contains('*') {
616        return pat == seg;
617    }
618    // Split on `*` into literal fragments. The first fragment must be a prefix,
619    // the last a suffix, and the middle fragments must appear in order.
620    let parts: Vec<&str> = pat.split('*').collect();
621    let mut pos = 0usize;
622    for (idx, part) in parts.iter().enumerate() {
623        if part.is_empty() {
624            continue;
625        }
626        if idx == 0 {
627            // Leading literal must be a prefix.
628            if !seg[pos..].starts_with(part) {
629                return false;
630            }
631            pos += part.len();
632        } else if idx == parts.len() - 1 {
633            // Trailing literal must be a suffix at or after the current cursor.
634            return seg[pos..].ends_with(part);
635        } else {
636            // Interior literal: find it at or after the cursor.
637            match seg[pos..].find(part) {
638                Some(off) => pos += off + part.len(),
639                None => return false,
640            }
641        }
642    }
643    true
644}
645
646/// A user-declared type schema parsed from a `DB.md` `### <type>` sub-section.
647/// The store's `## Schemas` is the **only** source of schema enforcement — the
648/// toolkit ships no built-in or implicit per-type schema (see SPEC § Schemas).
649#[derive(Debug, Clone, Default, PartialEq)]
650pub struct Schema {
651    /// One [`FieldSpec`] per bulleted field line, in source order.
652    pub fields: Vec<FieldSpec>,
653    /// `- unique: <field>[, <field> …]` directives — each inner vec is one
654    /// uniqueness constraint over the listed field(s) (compound when >1). Two
655    /// records of this type whose listed values collide warn as
656    /// `DUP_UNIQUE_KEY`.
657    pub unique_keys: Vec<Vec<String>>,
658    /// `- summary_template: <template>` directive — the `{field}` interpolation
659    /// pattern `dbmd fm init` / `dbmd write` use to compose a default `summary`
660    /// for this type. `None` falls back to the body's first paragraph.
661    pub summary_template: Option<String>,
662    /// `- shard: by-date | flat` directive — whether records of this type are
663    /// date-sharded on disk (`records/<type>/<YYYY>/<MM>/…`) or kept flat.
664    /// `None` = no directive declared, so the store's built-in default for the
665    /// type applies ([`crate::store::Store::type_shards`]); `Some(true)` forces
666    /// date-sharding (e.g. a custom event type the toolkit has no built-in for);
667    /// `Some(false)` forces flat. This is the v0.2 generic-model way to declare
668    /// sharding — the toolkit ships no implicit per-type behavior beyond the
669    /// example-type defaults.
670    pub shard: Option<bool>,
671}
672
673/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
674///
675/// Modifiers are comma-separated inside the parens; this captures the
676/// recognized ones as typed fields and stashes anything unrecognized in
677/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
678#[derive(Debug, Clone, Default, PartialEq)]
679pub struct FieldSpec {
680    /// The field name.
681    pub name: String,
682    /// `required` modifier present.
683    pub required: bool,
684    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
685    /// `url`), if any.
686    pub shape: Option<Shape>,
687    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
688    /// start with. The trailing slash is required in the source syntax.
689    pub link_prefix: Option<PathBuf>,
690    /// `default <value>` — the value written when the field is absent.
691    pub default: Option<Value>,
692    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
693    /// on the line because of its own commas).
694    pub enum_values: Option<Vec<String>>,
695    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
696    /// validate surfaces these as `Info`, never errors.
697    pub unknown_modifiers: Vec<String>,
698}
699
700/// A recognized shape modifier for a schema field. Validate enforces the
701/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
702#[derive(Debug, Clone, Copy, PartialEq, Eq)]
703pub enum Shape {
704    /// Any scalar string.
705    String,
706    /// Integer.
707    Int,
708    /// Boolean.
709    Bool,
710    /// RFC3339 / ISO-8601 date.
711    Date,
712    /// `<local>@<domain>` email address.
713    Email,
714    /// A currency amount.
715    Currency,
716    /// A URL.
717    Url,
718}
719
720/// The result of splitting a raw file into its frontmatter block and body.
721///
722/// `body` is the verbatim remainder after the closing `---` fence — the writer
723/// preserves it byte-for-byte so operator edits are never reflowed.
724#[derive(Debug, Clone, PartialEq, Eq)]
725pub struct ParsedFile {
726    /// The raw frontmatter YAML (between the fences, exclusive of them).
727    pub frontmatter_yaml: String,
728    /// The verbatim body (everything after the closing `---`).
729    pub body: String,
730}
731
732/// Split a file's full text into its frontmatter block and body. The
733/// frontmatter block must be the very first thing in the file, delimited by
734/// `---` on its own line at start and end. Returns
735/// [`ParseError::MissingFrontmatter`] if absent.
736pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
737    // Tolerate a single leading UTF-8 BOM (U+FEFF) before the opening fence,
738    // matching `store::frontmatter_block` and `index::extract_frontmatter_block`
739    // which already strip it. Without this, a BOM-prefixed file (common from
740    // Windows / exported markdown dropped into `sources/`) gets walked and
741    // indexed by `dbmd index` yet hard-fails every write/edit surface that
742    // routes through `read_file` (`fm get/set`, `format`, `link`, `write`). The
743    // BOM is dropped from the emitted body so the canonical writer never carries
744    // it forward.
745    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
746
747    // The opening fence must be the very first line: `---`, no leading
748    // whitespace, nothing before it. Trailing whitespace on the fence line is
749    // tolerated via `trim_end()` (which strips spaces/tabs as well as CR/LF) so
750    // this matches `index::extract_frontmatter_block` and
751    // `validate::split_frontmatter`, both of which use `trim_end()`. Without this
752    // agreement a fence written `--- ` (a single trailing space — invisible in an
753    // editor, easily produced by hand edits or exporters) was indexed and
754    // validated clean by those scanners yet hard-failed every write/edit surface
755    // routed through `read_file` (`fm get/set`, `format`, `link`, `write`) — the
756    // same cross-scanner drift class already fixed for the UTF-8 BOM above.
757    let mut lines = text.split_inclusive('\n');
758    let first = lines.next().unwrap_or("");
759    if first.trim_end() != "---" {
760        return Err(ParseError::MissingFrontmatter {
761            file: file.to_path_buf(),
762        });
763    }
764
765    // Scan for the closing fence line. Track byte offsets so we can slice the
766    // YAML (between fences, exclusive) and the body (verbatim, after the
767    // closing fence's line terminator).
768    let opening_len = first.len();
769    let mut offset = opening_len;
770    for line in lines {
771        if line.trim_end() == "---" {
772            let yaml = &text[opening_len..offset];
773            let body_start = offset + line.len();
774            let body = &text[body_start..];
775            return Ok(ParsedFile {
776                frontmatter_yaml: yaml.to_string(),
777                body: body.to_string(),
778            });
779        }
780        offset += line.len();
781    }
782
783    // Opening fence present but no closing fence: malformed frontmatter block.
784    Err(ParseError::MissingFrontmatter {
785        file: file.to_path_buf(),
786    })
787}
788
789/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
790/// verbatim body string.
791pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
792    let text = std::fs::read_to_string(path)?;
793    let parsed = split_frontmatter(&text, path)?;
794    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
795    Ok((fm, parsed.body))
796}
797
798/// Atomically write a markdown file from frontmatter + body: emit the
799/// frontmatter in canonical key order, then the body verbatim, via a
800/// temp-file-rename so a reader never sees a half-written file. Preserves the
801/// operator-edited body exactly as given.
802pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
803    let contents = render_file(frontmatter, body);
804
805    // One durable, atomic write for all primary data (see `crate::fsx`):
806    // temp-file + fsync + rename + parent-fsync. Content records are primary
807    // data, so they get the durable path (unlike the rebuildable index).
808    crate::fsx::write_atomic(path, contents.as_bytes())?;
809    Ok(())
810}
811
812/// Atomically create a markdown file from frontmatter + body, refusing with
813/// [`std::io::ErrorKind::AlreadyExists`] if the destination already exists.
814///
815/// This is the create-new sibling of [`write_file`]: same canonical rendering
816/// and durable temp-file path, but backed by [`crate::fsx::write_atomic_new`] so
817/// two concurrent creators for the same path cannot both succeed.
818pub fn write_file_new(
819    path: &Path,
820    frontmatter: &Frontmatter,
821    body: &str,
822) -> Result<(), ParseError> {
823    let contents = render_file(frontmatter, body);
824    crate::fsx::write_atomic_new(path, contents.as_bytes())?;
825    Ok(())
826}
827
828fn render_file(frontmatter: &Frontmatter, body: &str) -> String {
829    let yaml = frontmatter.to_yaml();
830    // `to_yaml` already terminates each block with a newline. Compose the file
831    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
832    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
833    contents.push_str("---\n");
834    contents.push_str(&yaml);
835    contents.push_str("---\n");
836    contents.push_str(body);
837    contents
838}
839
840/// Extract every wiki-link from a body (and inline frontmatter), returning the
841/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
842/// `(file, line, col)` locations set.
843pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
844    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
845    let re = RE.get_or_init(|| {
846        // [[target]] or [[target|display]]; target/display exclude brackets and
847        // (for target) the `|` separator so nested forms don't over-match.
848        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
849    });
850
851    let mut out = Vec::new();
852    for (line_idx, line) in body.lines().enumerate() {
853        for caps in re.captures_iter(line) {
854            let whole = caps.get(0).expect("group 0 always present");
855            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
856            let display = caps.get(2).map(|m| m.as_str().to_string());
857            out.push(WikiLink {
858                is_full_path: target_is_full_path(&target),
859                has_md_extension: target_has_md_extension(&target),
860                target,
861                display,
862                location: (
863                    file.to_path_buf(),
864                    (line_idx as u32) + 1,
865                    char_column(line, whole.start()),
866                ),
867            });
868        }
869    }
870    out
871}
872
873/// Extract every standard markdown link `[text](url)` from a body into a
874/// separate stream, kept distinct from wiki-links.
875pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
876    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
877    let re = RE.get_or_init(|| {
878        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
879        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
880        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
881    });
882
883    let mut out = Vec::new();
884    for (line_idx, line) in body.lines().enumerate() {
885        for caps in re.captures_iter(line) {
886            let whole = caps.get(0).expect("group 0 always present");
887            out.push(MarkdownLink {
888                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
889                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
890                location: (
891                    file.to_path_buf(),
892                    (line_idx as u32) + 1,
893                    char_column(line, whole.start()),
894                ),
895            });
896        }
897    }
898    out
899}
900
901/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
902/// written so YAML parses it as nested sequences instead of a clean list of
903/// strings. Returns the offending keys so validate can emit
904/// `WIKI_LINK_FLOW_FORM_LIST`.
905///
906/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
907/// shapes nest:
908///
909/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
910///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
911/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
912///   (triple-nested). The list mis-encoding — flagged.
913/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
914///   is flagged too; the canonical list form must quote each item
915///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
916///
917/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
918/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
919/// single item is a sequence-of-scalars.
920pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
921    let value: Value = match serde_norway::from_str(frontmatter_yaml) {
922        Ok(v) => v,
923        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
924        Err(_) => return Vec::new(),
925    };
926    let Value::Mapping(map) = value else {
927        return Vec::new();
928    };
929
930    let mut out = Vec::new();
931    for (k, v) in &map {
932        if let Value::Sequence(items) = v {
933            // Triple-nesting: some outer item is a sequence that itself holds a
934            // sequence. Scalar inline `[[x]]` is only double-nested, so it
935            // never matches.
936            let is_link_list = items.iter().any(|item| match item {
937                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
938                _ => false,
939            });
940            if is_link_list {
941                if let Some(key) = k.as_str() {
942                    out.push(key.to_string());
943                }
944            }
945        }
946    }
947    out
948}
949
950/// Extract the `##`/`###` sections of a markdown body into a flat list with
951/// body slices.
952pub fn extract_sections(body: &str) -> Vec<Section> {
953    // Keep each line's start so we can slice the body verbatim (exact newlines).
954    let lines: Vec<&str> = body.split_inclusive('\n').collect();
955
956    // First pass: classify heading levels (0 = not a heading), honoring fenced
957    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
958    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
959    let mut fence: Option<(u8, usize)> = None;
960    for line in &lines {
961        let content = line.trim_end_matches(['\n', '\r']);
962        if let Some(f) = fence {
963            if is_closing_fence(content, f) {
964                fence = None;
965            }
966            levels.push(0);
967            continue;
968        }
969        if let Some(opened) = opening_fence(content) {
970            fence = Some(opened);
971            levels.push(0);
972            continue;
973        }
974        levels.push(heading_level(content));
975    }
976
977    // Second pass: emit `##`+ headings; each section body runs from its heading
978    // line to the next heading at an equal-or-shallower level (exclusive).
979    let mut sections = Vec::new();
980    for (i, &lvl) in levels.iter().enumerate() {
981        if lvl < 2 {
982            continue;
983        }
984        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
985        let heading = heading_text(heading_line, lvl);
986
987        let mut end = lines.len();
988        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
989            if other != 0 && other <= lvl {
990                end = j;
991                break;
992            }
993        }
994
995        sections.push(Section {
996            heading,
997            level: lvl,
998            line: (i + 1) as u32,
999            body: lines[i..end].concat(),
1000        });
1001    }
1002    sections
1003}
1004
1005/// Extract the `##`/`###` sections of a **whole file** (frontmatter + body),
1006/// returning each [`Section`] with `line` numbered against the *source file*,
1007/// not the body.
1008///
1009/// [`extract_sections`] numbers headings 1-based within the body it is handed —
1010/// the right frame for callers that already track the frontmatter offset
1011/// (`validate` adds `fm_end_line`). But the single-file views (`dbmd sections`,
1012/// `dbmd outline`) present `Section::line` as a source line an agent can jump to;
1013/// because every db.md file opens with a frontmatter block, the body-relative
1014/// number is off by the block's length (`opening fence + frontmatter lines +
1015/// closing fence`) for every file. This helper does the offset once, in the
1016/// parser, so those surfaces report true file lines. A file with no leading
1017/// frontmatter block is treated as all-body (offset 0), so the function never
1018/// fails just because a file lacks frontmatter.
1019pub fn extract_sections_in_file(text: &str) -> Vec<Section> {
1020    // Tolerate a leading BOM the same way `split_frontmatter` does, so the line
1021    // count and the body slice agree with the read path.
1022    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
1023
1024    // Find the body and how many source lines precede it. The body begins right
1025    // after the closing fence; the number of lines consumed by the frontmatter
1026    // block (both fences + the YAML between) is the offset to add to each
1027    // body-relative heading line.
1028    let (body, offset) = match split_frontmatter(text, Path::new("<sections>")) {
1029        Ok(parsed) => {
1030            // Lines before the body = total lines in `text` minus lines in body.
1031            let total_lines = count_lines(text);
1032            let body_lines = count_lines(&parsed.body);
1033            (parsed.body, total_lines.saturating_sub(body_lines))
1034        }
1035        // No frontmatter block: the whole text is body, no offset.
1036        Err(_) => (text.to_string(), 0),
1037    };
1038
1039    let mut sections = extract_sections(&body);
1040    for s in &mut sections {
1041        s.line += offset;
1042    }
1043    sections
1044}
1045
1046/// Count the number of lines a string spans for line-number offsetting: one line
1047/// per `\n`, plus one more for a final line with no trailing newline. An empty
1048/// string is zero lines.
1049fn count_lines(s: &str) -> u32 {
1050    if s.is_empty() {
1051        return 0;
1052    }
1053    let newlines = s.bytes().filter(|&b| b == b'\n').count() as u32;
1054    if s.ends_with('\n') {
1055        newlines
1056    } else {
1057        newlines + 1
1058    }
1059}
1060
1061/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
1062/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
1063/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
1064/// ignored; absent sections leave their [`Config`] fields at default.
1065pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
1066    // The structured sections live in the body (after frontmatter). DB.md must
1067    // still start with a valid `---` block (`type: db-md`); if it's missing we
1068    // surface MissingFrontmatter like any other file.
1069    let parsed = split_frontmatter(text, file)?;
1070    let _frontmatter = Frontmatter::parse(&parsed.frontmatter_yaml, file)?;
1071    let sections = extract_sections(&parsed.body);
1072
1073    let mut config = Config::default();
1074    // Track which H2 region each H3 belongs to as we walk the flat list.
1075    let mut current_h2: Option<String> = None;
1076
1077    for section in &sections {
1078        match section.level {
1079            2 => {
1080                let name = section.heading.trim().to_ascii_lowercase();
1081                current_h2 = Some(name.clone());
1082                if name == "agent instructions" {
1083                    let prose = section_prose(&section.body);
1084                    if !prose.is_empty() {
1085                        config.agent_instructions = Some(prose);
1086                    }
1087                }
1088            }
1089            3 => {
1090                let h2 = current_h2.as_deref().unwrap_or("");
1091                let h3 = section.heading.trim().to_ascii_lowercase();
1092                match (h2, h3.as_str()) {
1093                    ("policies", "frozen pages") => {
1094                        config.frozen_pages = bullet_lines(&section.body)
1095                            .into_iter()
1096                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
1097                            .collect();
1098                    }
1099                    ("policies", "ignored types") => {
1100                        config.ignored_types = bullet_lines(&section.body)
1101                            .into_iter()
1102                            .flat_map(|b| extract_type_list_bullet(&b))
1103                            .collect();
1104                    }
1105                    ("schemas", _) => {
1106                        // The H3 heading text (as written) is the type name.
1107                        let type_name = section.heading.trim().to_string();
1108                        let mut schema = Schema::default();
1109                        for b in bullet_lines(&section.body) {
1110                            match parse_schema_bullet(&b) {
1111                                SchemaBullet::Field(f) => schema.fields.push(f),
1112                                SchemaBullet::Unique(k) if !k.is_empty() => {
1113                                    schema.unique_keys.push(k)
1114                                }
1115                                SchemaBullet::SummaryTemplate(t) if !t.is_empty() => {
1116                                    schema.summary_template = Some(t)
1117                                }
1118                                SchemaBullet::Shard(Some(b)) => schema.shard = Some(b),
1119                                // Empty `unique:`/`summary_template:`, or a `shard:`
1120                                // with an unrecognized value — ignored.
1121                                SchemaBullet::Unique(_)
1122                                | SchemaBullet::SummaryTemplate(_)
1123                                | SchemaBullet::Shard(None) => {}
1124                            }
1125                        }
1126                        config.schemas.insert(type_name, schema);
1127                    }
1128                    _ => {}
1129                }
1130            }
1131            _ => {}
1132        }
1133    }
1134
1135    Ok(config)
1136}
1137
1138/// One parsed bullet inside a `### <type>` schema block: an ordinary field, or a
1139/// reserved directive (`unique:` / `summary_template:` / `shard:`). The names
1140/// `unique`, `summary_template`, and `shard` are reserved and cannot be used as
1141/// field names.
1142#[derive(Debug)]
1143enum SchemaBullet {
1144    /// An ordinary `- <name> (<modifiers>)` field.
1145    Field(FieldSpec),
1146    /// `- unique: <field>[, <field> …]` — a (possibly compound) uniqueness key.
1147    Unique(Vec<String>),
1148    /// `- summary_template: <template>` — the default-`summary` pattern.
1149    SummaryTemplate(String),
1150    /// `- shard: by-date | flat` — date-shard records of this type, or keep them
1151    /// flat. `None` = an unrecognized value, ignored like an unknown modifier.
1152    Shard(Option<bool>),
1153}
1154
1155/// Classify one `## Schemas` bullet as a directive or a field. The directive
1156/// forms are `- unique: a, b, …` and `- summary_template: …`; the keyword check
1157/// guards against false positives — a field like `- status (enum: a, b)` has a
1158/// `(` before any `:`, so its head isn't a bare reserved keyword and it parses
1159/// as a [`FieldSpec`].
1160fn parse_schema_bullet(bullet_line: &str) -> SchemaBullet {
1161    let line = bullet_line.trim();
1162    let line = line
1163        .strip_prefix("- ")
1164        .or_else(|| line.strip_prefix("* "))
1165        .or_else(|| line.strip_prefix("+ "))
1166        .or_else(|| line.strip_prefix('-'))
1167        .unwrap_or(line)
1168        .trim();
1169
1170    if let Some((head, rest)) = line.split_once(':') {
1171        match head.trim().to_ascii_lowercase().as_str() {
1172            "unique" => {
1173                let fields = rest
1174                    .split(',')
1175                    .map(|f| f.trim().to_string())
1176                    .filter(|f| !f.is_empty())
1177                    .collect();
1178                return SchemaBullet::Unique(fields);
1179            }
1180            "summary_template" => {
1181                return SchemaBullet::SummaryTemplate(rest.trim().to_string());
1182            }
1183            "shard" => {
1184                // `by-date` (synonyms: date/sharded/true) enables date-sharding;
1185                // `flat` (none/false) forces flat; anything else is ignored.
1186                let v = match rest.trim().to_ascii_lowercase().as_str() {
1187                    "by-date" | "date" | "sharded" | "true" => Some(true),
1188                    "flat" | "none" | "false" => Some(false),
1189                    _ => None,
1190                };
1191                return SchemaBullet::Shard(v);
1192            }
1193            _ => {}
1194        }
1195    }
1196
1197    SchemaBullet::Field(parse_field_spec(bullet_line))
1198}
1199
1200/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
1201/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
1202/// in [`FieldSpec::unknown_modifiers`].
1203pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
1204    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
1205    let line = bullet_line.trim();
1206    let line = line
1207        .strip_prefix("- ")
1208        .or_else(|| line.strip_prefix("* "))
1209        .or_else(|| line.strip_prefix("+ "))
1210        .or_else(|| line.strip_prefix('-'))
1211        .unwrap_or(line)
1212        .trim();
1213
1214    // Split `<name> (<modifiers>)` — the canonical paren form — OR the natural
1215    // mis-spelling `<name>: <modifiers>` (colon instead of parens). The two
1216    // delimiters are interchangeable for the field head; whichever appears FIRST
1217    // wins, so a paren form whose modifiers contain a colon (`status (enum: a,
1218    // b)`) still parses by parens (the `(` precedes the `:`), while a bare
1219    // `title: string, required` parses by colon instead of being swallowed whole
1220    // into the field name with every modifier silently dropped.
1221    let paren = line.find('(');
1222    let colon = line.find(':');
1223    // Choose the head delimiter. The paren form wins when its `(` precedes any
1224    // `:` (so `status (enum: a, b)` parses by parens, the colon being inside the
1225    // modifiers); otherwise a `:` before the paren — or with no paren at all —
1226    // selects the colon form `<name>: <modifiers>`, the natural mis-spelling that
1227    // must NOT be swallowed whole into the field name with every modifier lost.
1228    let use_paren = matches!((paren, colon), (Some(p), c) if c.is_none_or(|c| p < c));
1229    let (name, modifiers) = if use_paren {
1230        let open = paren.expect("use_paren implies a paren");
1231        let name = line[..open].trim().to_string();
1232        let after = &line[open + 1..];
1233        let mods = match after.rfind(')') {
1234            Some(close) => &after[..close],
1235            None => after, // tolerate a missing close paren
1236        };
1237        (name, mods.trim())
1238    } else if let Some(c) = colon {
1239        // Colon form: everything after the first colon is the modifier list,
1240        // parsed identically to the parenthesized modifiers below.
1241        let name = line[..c].trim().to_string();
1242        (name, line[c + 1..].trim())
1243    } else {
1244        // Neither delimiter: a free-form optional field of any shape — name only.
1245        (line.to_string(), "")
1246    };
1247
1248    let mut spec = FieldSpec {
1249        name,
1250        ..FieldSpec::default()
1251    };
1252
1253    if modifiers.is_empty() {
1254        return spec;
1255    }
1256
1257    // Modifiers are comma-separated. `enum` and `default` are special: their own
1258    // values may contain commas, so each is a *greedy* clause that runs from its
1259    // keyword to the start of the next recognized greedy clause (or end of line).
1260    // This lets `default North America, EMEA fallback` keep its comma and lets a
1261    // `default …` written after an `enum …` still be recognized, instead of the
1262    // value being truncated at the first comma or absorbed into the enum list.
1263    let raw: Vec<&str> = modifiers.split(',').collect();
1264    let mut i = 0;
1265    while i < raw.len() {
1266        let token = raw[i].trim();
1267        if token.is_empty() {
1268            i += 1;
1269            continue;
1270        }
1271        let lower = token.to_ascii_lowercase();
1272
1273        if lower == "required" {
1274            spec.required = true;
1275            i += 1;
1276        } else if let Some(shape) = shape_from_str(&lower) {
1277            spec.shape = Some(shape);
1278            i += 1;
1279        } else if let Some(rest) = lower.strip_prefix("link to ") {
1280            // The trailing slash is required in the source; store the prefix
1281            // without it so `Path::starts_with` comparisons are clean.
1282            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
1283            let _ = rest; // lowercase form only used for the keyword match
1284            spec.link_prefix = Some(PathBuf::from(prefix));
1285            i += 1;
1286        } else if token.len() >= "default ".len() && lower.starts_with("default ") {
1287            // Greedy `default <value>`: the value is this token (after the
1288            // keyword) plus every following comma-token up to the next greedy
1289            // clause, rejoined with the commas the split removed — so a comma
1290            // inside the default value is preserved. Original case is kept.
1291            let end = next_greedy_clause(&raw, i + 1);
1292            let mut value = token["default ".len()..].to_string();
1293            for tok in &raw[i + 1..end] {
1294                value.push(',');
1295                value.push_str(tok);
1296            }
1297            spec.default = Some(Value::String(value.trim().to_string()));
1298            i = end;
1299        } else if lower == "enum" || lower.starts_with("enum:") {
1300            // Greedy `enum` (bare `enum, a, b` or `enum: a, b`): the values run
1301            // from here to the next greedy clause (e.g. a trailing `default …`),
1302            // NOT unconditionally to end-of-line — so a `default` after `enum` is
1303            // parsed instead of swallowed as a bogus enum member.
1304            let end = next_greedy_clause(&raw, i + 1);
1305            // Rejoin this clause's tokens (trimmed so the `enum` head sits at the
1306            // start), drop the leading `enum`/`enum:` head, then re-split the
1307            // remainder into values.
1308            let joined = raw[i..end].join(",");
1309            let joined = joined.trim();
1310            let after_kw = match joined.find(':') {
1311                // `enum: a, b` — values follow the colon.
1312                Some(colon) => &joined[colon + 1..],
1313                // bare `enum, a, b` — values follow the keyword itself.
1314                None => joined.get("enum".len()..).unwrap_or(""),
1315            };
1316            let values: Vec<String> = after_kw
1317                .split(',')
1318                .map(|v| v.trim().to_string())
1319                .filter(|v| !v.is_empty())
1320                .collect();
1321            spec.enum_values = Some(values);
1322            i = end;
1323        } else {
1324            // Unrecognized modifier — captured verbatim, surfaced as Info.
1325            spec.unknown_modifiers.push(token.to_string());
1326            i += 1;
1327        }
1328    }
1329
1330    spec
1331}
1332
1333// ── Private helpers ─────────────────────────────────────────────────────────
1334
1335/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
1336/// is treated as absent; anything else must be an RFC3339 string.
1337fn parse_timestamp(
1338    value: &Value,
1339    key: &str,
1340    file: &Path,
1341) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
1342    match value {
1343        Value::Null => Ok(None),
1344        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
1345        other => Err(ParseError::BadTimestamp {
1346            file: file.to_path_buf(),
1347            key: key.to_string(),
1348            value: format!("{other:?}"),
1349        }),
1350    }
1351}
1352
1353/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
1354fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
1355    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
1356        file: file.to_path_buf(),
1357        key: key.to_string(),
1358        value: s.to_string(),
1359    })
1360}
1361
1362/// Coerce a YAML scalar value to its string form for the universal-contract
1363/// fields (`type`/`id`/`summary`/`status`). Mirrors `validate::scalar_string`
1364/// and `store::yaml_scalar_string` so the four modules agree on one coercion
1365/// rule: a bare numeric/bool scalar (`id: 100`, `summary: 2026`, `status: 0`)
1366/// is preserved as its string form rather than being read as None and silently
1367/// dropped on the next `to_yaml` re-emit. Returns `None` only for genuinely
1368/// non-scalar values (sequences, mappings, null), which were never a valid
1369/// shape for these fields.
1370fn scalar_string(value: &Value) -> Option<String> {
1371    match value {
1372        Value::String(s) => Some(s.clone()),
1373        Value::Number(n) => Some(n.to_string()),
1374        Value::Bool(b) => Some(b.to_string()),
1375        _ => None,
1376    }
1377}
1378
1379/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
1380/// (the canonical form) or a single scalar (coerced to a one-element list).
1381fn parse_tags(value: &Value) -> Vec<String> {
1382    match value {
1383        Value::Sequence(items) => items
1384            .iter()
1385            .filter_map(|v| match v {
1386                Value::String(s) => Some(s.clone()),
1387                Value::Number(n) => Some(n.to_string()),
1388                Value::Bool(b) => Some(b.to_string()),
1389                _ => None,
1390            })
1391            .collect(),
1392        Value::String(s) => vec![s.clone()],
1393        _ => Vec::new(),
1394    }
1395}
1396
1397/// Read a `tags` value into a flat `Vec<String>` **without losing data**: a
1398/// sequence of clean scalars (the canonical form) or a single scalar coerce to a
1399/// string list. Any other shape — a sequence with a non-scalar item
1400/// (`tags: [[vip]]` → `Seq[Seq[String]]`, `tags: [a, [b]]`), or a mapping — is
1401/// rejected as `Err(value.clone())` so the caller preserves the raw value in
1402/// `extra` rather than silently filtering items out / erasing the field on the
1403/// next re-emit. This is the `tags` analog of routing a non-scalar universal
1404/// value to pass-through instead of the destroy path.
1405fn parse_tags_preserving(value: &Value) -> Result<Vec<String>, Value> {
1406    match value {
1407        Value::Sequence(items) => {
1408            let mut out = Vec::with_capacity(items.len());
1409            for item in items {
1410                match item {
1411                    Value::String(s) => out.push(s.clone()),
1412                    Value::Number(n) => out.push(n.to_string()),
1413                    Value::Bool(b) => out.push(b.to_string()),
1414                    // A non-scalar item (nested sequence/mapping/null) means this
1415                    // is not a clean tag list; preserve the whole value verbatim.
1416                    _ => return Err(value.clone()),
1417                }
1418            }
1419            Ok(out)
1420        }
1421        Value::String(s) => Ok(vec![s.clone()]),
1422        Value::Number(n) => Ok(vec![n.to_string()]),
1423        Value::Bool(b) => Ok(vec![b.to_string()]),
1424        // A mapping / null `tags` value is not a list; preserve it verbatim.
1425        _ => Err(value.clone()),
1426    }
1427}
1428
1429/// Render a non-string YAML mapping key as the scalar text YAML would emit for
1430/// it (`2026`, `true`, `3.14`, …), so a numeric/bool/float frontmatter key
1431/// preserves its key *text* on round-trip instead of being rewritten to its Rust
1432/// `Debug` form (`Number(2026)`, `Bool(true)`, `'Null'`). The key re-emits as a
1433/// string-typed key carrying the original text (`'2026':`) — the type narrows to
1434/// string, but the operator's data is no longer corrupted, and ordinary string
1435/// keys are wholly unaffected. Falls back to `Debug` only for a key shape that
1436/// cannot be a scalar (a sequence/mapping key — not expressible in our
1437/// `String`-keyed `extra`), which never occurs in practice.
1438fn yaml_scalar_key(key: &Value) -> String {
1439    match key {
1440        Value::String(s) => s.clone(),
1441        Value::Number(n) => n.to_string(),
1442        Value::Bool(b) => b.to_string(),
1443        Value::Null => "null".to_string(),
1444        // Non-scalar key: not representable as a plain `extra` string key; keep
1445        // the defensive Debug form so nothing panics (unreachable in practice).
1446        other => format!("{other:?}"),
1447    }
1448}
1449
1450/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
1451/// location, or `None` if the string is not a bare wiki-link. Used for
1452/// frontmatter-valued links where there is no body position to report.
1453fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
1454    let s = s.trim();
1455    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
1456    // Reject anything with further brackets (e.g. the nested flow-form item),
1457    // which is not a clean single wiki-link.
1458    if inner.contains('[') || inner.contains(']') {
1459        return None;
1460    }
1461    let (target, display) = match inner.split_once('|') {
1462        Some((t, d)) => (t.to_string(), Some(d.to_string())),
1463        None => (inner.to_string(), None),
1464    };
1465    Some(WikiLink {
1466        is_full_path: target_is_full_path(&target),
1467        has_md_extension: target_has_md_extension(&target),
1468        target,
1469        display,
1470        location: (PathBuf::new(), 0, 0),
1471    })
1472}
1473
1474/// Extract every wiki-link from a single frontmatter field value, accepting the
1475/// two canonical forms the spec defines (SPEC § Linking):
1476///
1477/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1478///   canonical unquoted inline (`f: [[x]]`) form, and
1479/// - a **list** field whose items are quoted wiki-link strings
1480///   (`- "[[x]]"`).
1481///
1482/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1483/// so the parsed [`Value`] shapes are not what one would naively expect:
1484///
1485/// | source                         | parsed `Value`                     | here |
1486/// |--------------------------------|------------------------------------|------|
1487/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1488/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1489/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1490/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1491///
1492/// The last row — an *unquoted list* — parses identically to the flow-form list
1493/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1494/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1495/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1496/// edge enumerator only ever sees the valid canonical forms.
1497///
1498/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1499/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1500/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1501fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1502    // Quoted scalar: `field: "[[x]]"`.
1503    if let Value::String(s) = value {
1504        return parse_wiki_link_str(s).into_iter().collect();
1505    }
1506    let Value::Sequence(items) = value else {
1507        return Vec::new();
1508    };
1509    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1510    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1511    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1512    if items.len() == 1 {
1513        if let Some(link) = unquoted_inline_link(&items[0]) {
1514            return vec![link];
1515        }
1516    }
1517    // Otherwise a list of quoted wiki-link strings; non-string items (the
1518    // unquoted-list mis-encoding) are left for validate to flag.
1519    items
1520        .iter()
1521        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1522        .collect()
1523}
1524
1525/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1526///
1527/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1528/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1529/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1530/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1531/// Re-emitting that verbatim yields the block sequence
1532///
1533/// ```text
1534/// company:
1535/// - - records/companies/northstar
1536/// ```
1537///
1538/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1539/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1540/// a value back into the canonical emitted form before it is written:
1541///
1542/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1543///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_norway emits
1544///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1545///   that [`links_in_field_value`] reads back as the same scalar link;
1546/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1547///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1548///   `set` write-in path and the canonical list form;
1549/// - everything else → returned verbatim (the common no-op for non-link values).
1550///
1551/// `|display` is preserved in both link branches. This is the single point that
1552/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1553/// corrupting a pre-existing canonical link, since they all funnel through
1554/// `to_yaml`.
1555fn canonicalize_extra_value(value: &Value) -> Value {
1556    match value {
1557        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1558        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1559        // YAML nested sequence). Non-link strings are returned untouched.
1560        Value::String(s) => match parse_wiki_link_str(s) {
1561            Some(link) => Value::String(wiki_link_literal(&link)),
1562            None => value.clone(),
1563        },
1564        Value::Sequence(items) => {
1565            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1566            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1567            // scalar string so the link is preserved rather than block-emitted.
1568            if items.len() == 1 {
1569                if let Some(link) = unquoted_inline_link(&items[0]) {
1570                    return Value::String(wiki_link_literal(&link));
1571                }
1572            }
1573            // List of wiki-links: re-emit as a block sequence of quoted-link
1574            // strings, the canonical list form `to_yaml` renders block-style and
1575            // `links_in_field_value` accepts. Only canonicalize when *every* item
1576            // is a clean single wiki-link; a list with any non-link item is left
1577            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1578            // validate flags) are untouched.
1579            let mut links = Vec::with_capacity(items.len());
1580            for item in items {
1581                match link_from_flow_list_item(item) {
1582                    Some(link) => links.push(link),
1583                    None => return value.clone(),
1584                }
1585            }
1586            if links.is_empty() {
1587                return value.clone();
1588            }
1589            Value::Sequence(
1590                links
1591                    .iter()
1592                    .map(|l| Value::String(wiki_link_literal(l)))
1593                    .collect(),
1594            )
1595        }
1596        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1597        _ => value.clone(),
1598    }
1599}
1600
1601/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1602/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1603fn wiki_link_literal(link: &WikiLink) -> String {
1604    match &link.display {
1605        Some(d) => format!("[[{}|{}]]", link.target, d),
1606        None => format!("[[{}]]", link.target),
1607    }
1608}
1609
1610/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1611/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1612/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1613/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1614/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1615/// from being mistaken for a wiki-link.
1616fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1617    let Value::Sequence(items) = v else {
1618        return None;
1619    };
1620    if items.len() != 1 {
1621        return None;
1622    }
1623    let s = items[0].as_str()?;
1624    // A clean unquoted wiki-link has no further brackets inside it.
1625    if s.contains('[') || s.contains(']') {
1626        return None;
1627    }
1628    parse_wiki_link_str(&format!("[[{s}]]"))
1629}
1630
1631/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1632/// wiki-links** that should be stored as a YAML block sequence, returning the
1633/// canonical `Value::Sequence` of quoted-link strings when so.
1634///
1635/// The value path of every write surface stringifies its argument; without this
1636/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1637/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1638/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1639/// two list spellings an agent naturally types and normalizes both to the block
1640/// form the canonical writer emits and `dbmd validate` accepts:
1641///
1642/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1643/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1644///
1645/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1646/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1647/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1648/// renders inline), an empty list, or a list with any non-link item. A single
1649/// link must stay scalar; only genuine multi-item-or-explicit lists become
1650/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1651/// validator never disagree.
1652fn parse_link_list_value(value: &str) -> Option<Value> {
1653    let trimmed = value.trim();
1654    // Only a YAML *flow sequence* literal is a list candidate; anything not
1655    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1656    // the single-inline-link guard below).
1657    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1658        return None;
1659    }
1660    let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(trimmed) else {
1661        return None;
1662    };
1663    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1664    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1665    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1666    // list. `links_in_field_value` reads it back as a scalar link either way.
1667    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1668        return None;
1669    }
1670    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1671    // spellings an agent types (see [`link_from_flow_list_item`]).
1672    let mut links = Vec::with_capacity(items.len());
1673    for item in &items {
1674        links.push(link_from_flow_list_item(item)?);
1675    }
1676    if links.is_empty() {
1677        return None;
1678    }
1679    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1680    // renders block-style and `links_in_field_value` accepts. `|display` is
1681    // preserved.
1682    let normalized = links
1683        .iter()
1684        .map(|l| Value::String(wiki_link_literal(l)))
1685        .collect();
1686    Some(Value::Sequence(normalized))
1687}
1688
1689/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1690/// across the spellings an agent types for a list. After top-level flow parsing,
1691/// a list item arrives in one of:
1692///
1693/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1694/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1695///   (one level deeper than a bare unquoted scalar, because the surrounding list
1696///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1697///   `Seq[String(x)]` with [`unquoted_inline_link`].
1698///
1699/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1700/// caller falls back to a scalar string and never fabricates a partial list.
1701fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1702    match item {
1703        Value::String(s) => parse_wiki_link_str(s),
1704        Value::Sequence(inner) => {
1705            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1706            // wrapper to expose the inline-link shape `Seq[String(x)]`.
1707            //
1708            // Only this triple-nested shape is a wiki-link. We deliberately do
1709            // NOT fall back to `unquoted_inline_link(item)` on the bare double
1710            // nesting `Seq[String(x)]` (a plain one-element string list `[x]`):
1711            // that fallback fabricated a wiki-link out of an ordinary nested
1712            // string list — `groups: [[alpha], [beta]]` (data `[["alpha"],
1713            // ["beta"]]`) was rewritten to `- '[[alpha]]'` / `- '[[beta]]'`,
1714            // silently changing the field's type and manufacturing short-form
1715            // links the tool then flags as `WIKI_LINK_SHORT_FORM`. An unknown
1716            // nested string list must pass through verbatim (SPEC § "Unknown
1717            // fields pass through").
1718            if inner.len() == 1 {
1719                if let Some(link) = unquoted_inline_link(&inner[0]) {
1720                    return Some(link);
1721                }
1722            }
1723            None
1724        }
1725        _ => None,
1726    }
1727}
1728
1729/// A target is a full store-relative path when its first path segment is one of
1730/// the three canonical layer dirs and at least one `/` separator follows. A
1731/// trailing `.md` does not affect this classification.
1732fn target_is_full_path(target: &str) -> bool {
1733    let target = target.trim();
1734    match target.split_once('/') {
1735        Some((head, _rest)) => LAYER_DIRS.contains(&head),
1736        None => false,
1737    }
1738}
1739
1740/// True when the target carries a trailing `.md` extension (validate warns
1741/// `WIKI_LINK_HAS_EXTENSION`).
1742fn target_has_md_extension(target: &str) -> bool {
1743    target.trim().ends_with(".md")
1744}
1745
1746/// 1-based character (Unicode scalar) column of `byte_offset` within `line`.
1747fn char_column(line: &str, byte_offset: usize) -> u32 {
1748    (line[..byte_offset].chars().count() as u32) + 1
1749}
1750
1751/// Index of the first comma-token in `raw[from..]` that *starts a greedy
1752/// modifier clause* (`enum`, `enum:…`, or `default …`), or `raw.len()` when none
1753/// remain. Used to bound a greedy `default`/`enum` value so it stops at the next
1754/// such clause instead of either truncating at the first comma or swallowing a
1755/// following greedy clause whole.
1756fn next_greedy_clause(raw: &[&str], from: usize) -> usize {
1757    let mut j = from;
1758    while j < raw.len() {
1759        let lower = raw[j].trim().to_ascii_lowercase();
1760        if lower == "enum" || lower.starts_with("enum:") || lower.starts_with("default ") {
1761            return j;
1762        }
1763        j += 1;
1764    }
1765    raw.len()
1766}
1767
1768/// Map a lowercase shape keyword to its [`Shape`].
1769fn shape_from_str(s: &str) -> Option<Shape> {
1770    match s {
1771        "string" => Some(Shape::String),
1772        "int" => Some(Shape::Int),
1773        "bool" => Some(Shape::Bool),
1774        "date" => Some(Shape::Date),
1775        "email" => Some(Shape::Email),
1776        "currency" => Some(Shape::Currency),
1777        "url" => Some(Shape::Url),
1778        _ => None,
1779    }
1780}
1781
1782/// The ATX heading level of a line (number of leading `#`), or 0 if not a
1783/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
1784/// end-of-line) after the `#` run, caps the run at six.
1785fn heading_level(line: &str) -> u8 {
1786    let indent = line.len() - line.trim_start_matches(' ').len();
1787    if indent > 3 {
1788        return 0;
1789    }
1790    let rest = &line[indent..];
1791    let hashes = rest.len() - rest.trim_start_matches('#').len();
1792    if hashes == 0 || hashes > 6 {
1793        return 0;
1794    }
1795    let after = &rest[hashes..];
1796    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
1797        hashes as u8
1798    } else {
1799        0
1800    }
1801}
1802
1803/// The heading text after the `#` run, trimmed, with a trailing ATX *closing*
1804/// `#` sequence removed per CommonMark (`## Title ##` → `Title`).
1805///
1806/// CommonMark only treats a trailing run of `#` as a closing sequence when it is
1807/// **preceded by a space or tab** (or the content is empty). A `#` that abuts the
1808/// preceding word is literal heading text: `## C#` → `C#`, `## F#` → `F#`,
1809/// `## issue-123#` → `issue-123#`. The old unconditional `trim_end_matches('#')`
1810/// stripped those, corrupting `dbmd sections`/`outline` heading text and — via
1811/// `parse_db_md` using the heading verbatim as the schema type key — silently
1812/// binding a `### c#` schema to `type: c` instead of `type: c#`.
1813fn heading_text(line: &str, level: u8) -> String {
1814    let indent = line.len() - line.trim_start_matches(' ').len();
1815    let after_hashes = &line[indent + level as usize..];
1816    let trimmed = after_hashes.trim();
1817
1818    // Peel a trailing run of `#`. It is a closing sequence only if what precedes
1819    // it (within `trimmed`) is empty or ends in a space/tab; otherwise the `#`s
1820    // are literal content.
1821    let without_hashes = trimmed.trim_end_matches('#');
1822    if without_hashes.len() == trimmed.len() {
1823        // No trailing `#` at all.
1824        return trimmed.to_string();
1825    }
1826    if without_hashes.is_empty() || without_hashes.ends_with([' ', '\t']) {
1827        // A genuine closing sequence (`## Title ##`, `## ##`): drop it and the
1828        // whitespace before it.
1829        without_hashes.trim_end().to_string()
1830    } else {
1831        // The `#` run abuts content (`## C#`): keep it as literal heading text.
1832        trimmed.to_string()
1833    }
1834}
1835
1836/// If `line` opens a fenced code block, return `(fence byte, run length)`.
1837fn opening_fence(line: &str) -> Option<(u8, usize)> {
1838    let indent = line.len() - line.trim_start_matches(' ').len();
1839    if indent > 3 {
1840        return None;
1841    }
1842    let rest = &line[indent..];
1843    let byte = rest.bytes().next()?;
1844    if byte != b'`' && byte != b'~' {
1845        return None;
1846    }
1847    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1848    if run < 3 {
1849        return None;
1850    }
1851    // A backtick fence's info string may not itself contain a backtick.
1852    if byte == b'`' && rest[run..].contains('`') {
1853        return None;
1854    }
1855    Some((byte, run))
1856}
1857
1858/// True if `line` closes the currently open fence: same char, run at least as
1859/// long, nothing but trailing whitespace after.
1860fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
1861    let (byte, open_len) = fence;
1862    let indent = line.len() - line.trim_start_matches(' ').len();
1863    if indent > 3 {
1864        return false;
1865    }
1866    let rest = &line[indent..];
1867    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1868    if run < open_len {
1869        return false;
1870    }
1871    rest[run..].trim().is_empty()
1872}
1873
1874/// The prose body of a section: everything after the heading line, trimmed.
1875fn section_prose(section_body: &str) -> String {
1876    match section_body.split_once('\n') {
1877        Some((_heading, rest)) => rest.trim().to_string(),
1878        None => String::new(),
1879    }
1880}
1881
1882/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
1883/// line, each returned with its leading whitespace trimmed.
1884fn bullet_lines(section_body: &str) -> Vec<String> {
1885    section_body
1886        .lines()
1887        .skip(1) // the heading line
1888        .map(str::trim)
1889        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
1890        .map(|l| l.to_string())
1891        .collect()
1892}
1893
1894/// Cut a bullet's content at the first comment separator, returning only the
1895/// meaningful prefix. Recognizes the em-dash (` — `), en-dash (` – `), double-
1896/// hyphen (` -- `), and the plain single-ASCII-hyphen (` - `) spellings an
1897/// operator naturally types — without the single-hyphen form, a comment like
1898/// `records/decisions/q3.md - finalized` left the whole line (comment included)
1899/// as the frozen path, so the entry never matched and the freeze failed OPEN.
1900/// A store-relative path never contains a ` - ` (paths are `/`-joined, spaceless),
1901/// so this does not truncate legitimate path text.
1902fn strip_bullet_comment(content: &str) -> &str {
1903    let mut cut = content.len();
1904    for sep in [" — ", " -- ", " – ", " - "] {
1905        if let Some(idx) = content.find(sep) {
1906            cut = cut.min(idx);
1907        }
1908    }
1909    content[..cut].trim()
1910}
1911
1912/// Strip the leading bullet marker, returning the trimmed content after it.
1913fn bullet_content(bullet: &str) -> &str {
1914    let t = bullet.trim();
1915    t.strip_prefix("- ")
1916        .or_else(|| t.strip_prefix("* "))
1917        .or_else(|| t.strip_prefix("+ "))
1918        .unwrap_or(t)
1919        .trim()
1920}
1921
1922/// Extract a store-relative path from a Frozen-pages bullet. The path may be
1923/// wrapped in backticks and followed by an em-dash comment.
1924fn extract_path_bullet(bullet: &str) -> String {
1925    let content = bullet_content(bullet);
1926    // Prefer a backtick-delimited span if present.
1927    if let Some(start) = content.find('`') {
1928        if let Some(end_rel) = content[start + 1..].find('`') {
1929            return content[start + 1..start + 1 + end_rel].trim().to_string();
1930        }
1931    }
1932    // Otherwise take the text up to a comment separator, stripping quotes.
1933    strip_bullet_comment(content)
1934        .trim_matches('"')
1935        .trim_matches('\'')
1936        .trim()
1937        .to_string()
1938}
1939
1940/// Extract a comma-separated type list from an Ignored-types bullet, stripping
1941/// backticks/quotes and any trailing em-dash comment.
1942fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
1943    let content = strip_bullet_comment(bullet_content(bullet));
1944    content
1945        .split(',')
1946        .map(|t| {
1947            t.trim()
1948                .trim_matches('`')
1949                .trim_matches('"')
1950                .trim_matches('\'')
1951                .trim()
1952                .to_string()
1953        })
1954        .filter(|t| !t.is_empty())
1955        .collect()
1956}
1957
1958#[cfg(test)]
1959mod tests {
1960    use super::*;
1961    use std::path::Path;
1962    use tempfile::tempdir;
1963
1964    // ── Config::frozen_match (the single write-surface policy matcher) ───────
1965
1966    #[test]
1967    fn frozen_match_is_md_insensitive_both_directions() {
1968        // A policy entry stored WITHOUT `.md` (the natural extensionless
1969        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
1970        // write target — the regression every write surface had.
1971        let cfg = Config {
1972            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1973            ..Config::default()
1974        };
1975        assert_eq!(
1976            cfg.frozen_match(Path::new("records/decisions/q1.md")),
1977            Some(PathBuf::from("records/decisions/q1")),
1978            "extensionless policy entry must freeze the .md file"
1979        );
1980        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1981
1982        // The symmetric case: a policy entry WITH `.md` matches a bare target.
1983        let cfg = Config {
1984            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1985            ..Config::default()
1986        };
1987        assert_eq!(
1988            cfg.frozen_match(Path::new("records/decisions/q1")),
1989            Some(PathBuf::from("records/decisions/q1.md")),
1990        );
1991        // And the same-spelling cases still match.
1992        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1993    }
1994
1995    #[test]
1996    fn frozen_match_drops_leading_dot_slash() {
1997        let cfg = Config {
1998            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1999            ..Config::default()
2000        };
2001        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
2002        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
2003    }
2004
2005    #[test]
2006    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
2007        let cfg = Config {
2008            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
2009            ..Config::default()
2010        };
2011        assert!(cfg
2012            .frozen_match(Path::new("records/decisions/q2.md"))
2013            .is_none());
2014        // A prefix is not a match: `q1` must not freeze `q1-draft`.
2015        assert!(cfg
2016            .frozen_match(Path::new("records/decisions/q1-draft.md"))
2017            .is_none());
2018        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
2019    }
2020
2021    // ── split_frontmatter ───────────────────────────────────────────────────
2022
2023    #[test]
2024    fn split_frontmatter_separates_yaml_and_verbatim_body() {
2025        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
2026        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
2027        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
2028        // Body is everything after the closing fence's newline, byte-for-byte.
2029        assert_eq!(p.body, "# Heading\n\nBody line.\n");
2030    }
2031
2032    #[test]
2033    fn split_frontmatter_preserves_body_without_trailing_newline() {
2034        let text = "---\ntype: x\n---\nno trailing newline";
2035        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
2036        assert_eq!(p.body, "no trailing newline");
2037    }
2038
2039    #[test]
2040    fn split_frontmatter_empty_body_when_nothing_after_fence() {
2041        let text = "---\ntype: x\n---\n";
2042        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
2043        assert_eq!(p.body, "");
2044    }
2045
2046    #[test]
2047    fn split_frontmatter_missing_opening_fence_errors() {
2048        let text = "# No frontmatter here\ntype: x\n";
2049        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
2050        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2051    }
2052
2053    #[test]
2054    fn split_frontmatter_leading_content_before_fence_rejected() {
2055        // The opening fence must be the very first line; a blank line first is
2056        // not allowed.
2057        let text = "\n---\ntype: x\n---\nbody";
2058        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
2059        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2060    }
2061
2062    #[test]
2063    fn split_frontmatter_unterminated_block_errors() {
2064        let text = "---\ntype: x\nsummary: y\n";
2065        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
2066        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2067    }
2068
2069    // ── Frontmatter::parse ───────────────────────────────────────────────────
2070
2071    #[test]
2072    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
2073        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
2074        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
2075        assert_eq!(fm.type_.as_deref(), Some("contact"));
2076        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
2077        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
2078        assert_eq!(fm.status.as_deref(), Some("active"));
2079        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
2080        // Type-specific fields are NOT promoted to typed slots.
2081        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
2082        assert!(!fm.extra.contains_key("tags"));
2083        assert_eq!(
2084            fm.extra.get("email").and_then(|v| v.as_str()),
2085            Some("sarah@northstar.io")
2086        );
2087        assert_eq!(
2088            fm.extra.get("role").and_then(|v| v.as_str()),
2089            Some("Director")
2090        );
2091    }
2092
2093    #[test]
2094    fn parse_reads_rfc3339_timestamps() {
2095        let yaml =
2096            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
2097        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
2098        let created = fm.created.expect("created parsed");
2099        // -07:00 offset is 7 * 3600 seconds west.
2100        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
2101        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
2102        assert!(fm.updated.is_some());
2103    }
2104
2105    #[test]
2106    fn parse_rejects_non_rfc3339_timestamp() {
2107        // A date-only value is not a full RFC3339 timestamp; created/updated
2108        // require the full form.
2109        let yaml = "type: email\ncreated: 2026-05-27";
2110        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
2111        match err {
2112            ParseError::BadTimestamp { key, value, .. } => {
2113                assert_eq!(key, "created");
2114                assert_eq!(value, "2026-05-27");
2115            }
2116            other => panic!("expected BadTimestamp, got {other:?}"),
2117        }
2118    }
2119
2120    #[test]
2121    fn parse_malformed_yaml_errors() {
2122        // Unclosed flow mapping is invalid YAML.
2123        let yaml = "type: contact\n  bad: : :\n- nope";
2124        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
2125        assert!(matches!(err, ParseError::MalformedYaml { .. }));
2126    }
2127
2128    #[test]
2129    fn frontmatter_with_yaml_tag_on_mapping_does_not_panic() {
2130        // Regression: a YAML tag on the top-level mapping made the old
2131        // `expect_err` path PANIC, because a tagged mapping deserializes to a
2132        // `Mapping` just fine. It must now be handled — accepted as the inner
2133        // mapping, never a panic.
2134        let fm = Frontmatter::parse("!mytag\ntype: contact\nsummary: hi\n", Path::new("x.md"))
2135            .expect("tagged-mapping frontmatter must parse, not panic");
2136        assert_eq!(fm.type_.as_deref(), Some("contact"));
2137        // A genuine scalar/sequence top level is still malformed (and still
2138        // doesn't panic).
2139        assert!(Frontmatter::parse("- a\n- b\n", Path::new("x.md")).is_err());
2140    }
2141
2142    #[test]
2143    fn parse_empty_block_is_empty_frontmatter() {
2144        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
2145        assert_eq!(fm, Frontmatter::default());
2146    }
2147
2148    #[test]
2149    fn parse_scalar_top_level_is_malformed() {
2150        // A bare scalar at the top level is not a frontmatter mapping.
2151        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
2152        assert!(matches!(err, ParseError::MalformedYaml { .. }));
2153    }
2154
2155    // ── to_yaml canonical order ──────────────────────────────────────────────
2156
2157    #[test]
2158    fn to_yaml_emits_canonical_key_order() {
2159        let mut fm = Frontmatter {
2160            type_: Some("contact".into()),
2161            id: Some("sarah-chen".into()),
2162            summary: Some("Director of Ops".into()),
2163            status: Some("active".into()),
2164            tags: vec!["vip".into()],
2165            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
2166            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
2167            ..Default::default()
2168        };
2169        // Two type-specific fields, inserted in NON-alphabetical order to prove
2170        // the writer sorts them (BTreeMap) between the universal head and tail.
2171        fm.extra
2172            .insert("role".into(), Value::String("Director".into()));
2173        fm.extra.insert(
2174            "company".into(),
2175            Value::String("[[records/companies/northstar]]".into()),
2176        );
2177
2178        let yaml = fm.to_yaml();
2179        let keys: Vec<&str> = yaml
2180            .lines()
2181            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
2182            .map(|l| l.split(':').next().unwrap())
2183            .collect();
2184        assert_eq!(
2185            keys,
2186            vec![
2187                "type", "id", "created", "updated", "summary", // universal head
2188                "company", "role",   // type-specific, sorted
2189                "status", // universal tail
2190                "tags",
2191            ],
2192            "canonical order violated; got:\n{yaml}"
2193        );
2194        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
2195        assert!(
2196            yaml.contains("2026-05-27T08:00:00-07:00"),
2197            "created timestamp missing; got:\n{yaml}"
2198        );
2199        // The value re-parses to the same instant regardless of quoting.
2200        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
2201        assert_eq!(reparsed.created, fm.created);
2202        assert_eq!(reparsed.updated, fm.updated);
2203    }
2204
2205    #[test]
2206    fn to_yaml_omits_absent_optional_fields() {
2207        let fm = Frontmatter {
2208            type_: Some("note".into()),
2209            ..Default::default()
2210        };
2211        let yaml = fm.to_yaml();
2212        assert!(yaml.contains("type: note"));
2213        assert!(!yaml.contains("status"));
2214        assert!(!yaml.contains("tags"));
2215        assert!(!yaml.contains("summary"));
2216    }
2217
2218    // ── Regression: non-string scalar universal fields round-trip (finding #1) ─
2219
2220    #[test]
2221    fn regression_parse_preserves_non_string_scalar_universal_fields() {
2222        // A hand/externally-authored file whose universal fields are bare
2223        // scalars YAML reads as Number/Bool — `id: 100`, `summary: 2026`,
2224        // `status: 0`, `type: 42` — must be PRESERVED as their string form, not
2225        // read as None. Before the fix, `v.as_str()` returned None for these and
2226        // the matched arm discarded the value entirely (never reaching `extra`).
2227        let yaml = "type: 42\nid: 100\nsummary: 2026\nstatus: 0";
2228        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
2229        assert_eq!(fm.type_.as_deref(), Some("42"), "type scalar dropped");
2230        assert_eq!(fm.id.as_deref(), Some("100"), "id scalar dropped");
2231        assert_eq!(
2232            fm.summary.as_deref(),
2233            Some("2026"),
2234            "summary scalar dropped"
2235        );
2236        assert_eq!(fm.status.as_deref(), Some("0"), "status scalar dropped");
2237        // The values must surface through the public `get` accessor too.
2238        assert_eq!(
2239            fm.get("summary")
2240                .and_then(|v| v.as_str().map(str::to_string)),
2241            Some("2026".to_string())
2242        );
2243    }
2244
2245    #[test]
2246    fn regression_format_round_trip_does_not_delete_numeric_frontmatter() {
2247        // The exact finding-#1 trigger: `dbmd format` is read_file -> write_file.
2248        // A file whose `id`/`summary`/`status` are bare numeric scalars must
2249        // still carry those fields after the canonical re-emit. Before the fix,
2250        // the lines were silently deleted from disk (only `type` survived).
2251        let dir = tempdir().unwrap();
2252        let path = dir.path().join("x.md");
2253        let original = "---\ntype: contact\nid: 100\nsummary: 2026\nstatus: 0\n---\nbody\n";
2254        std::fs::write(&path, original).unwrap();
2255
2256        // Re-emit through the canonical writer, exactly as `dbmd format` does.
2257        let (fm, body) = read_file(&path).unwrap();
2258        write_file(&path, &fm, &body).unwrap();
2259
2260        let after = std::fs::read_to_string(&path).unwrap();
2261        // None of the four fields may vanish; they survive as string scalars.
2262        let reparsed = Frontmatter::parse(
2263            &split_frontmatter(&after, &path).unwrap().frontmatter_yaml,
2264            &path,
2265        )
2266        .unwrap();
2267        assert_eq!(reparsed.type_.as_deref(), Some("contact"));
2268        assert_eq!(reparsed.id.as_deref(), Some("100"), "id deleted by format");
2269        assert_eq!(
2270            reparsed.summary.as_deref(),
2271            Some("2026"),
2272            "summary deleted by format"
2273        );
2274        assert_eq!(
2275            reparsed.status.as_deref(),
2276            Some("0"),
2277            "status deleted by format"
2278        );
2279        // The body is preserved verbatim.
2280        assert_eq!(body, "body\n");
2281    }
2282
2283    // ── Regression: BOM-prefixed files parse like store/index (finding #19) ────
2284
2285    #[test]
2286    fn regression_split_frontmatter_tolerates_leading_utf8_bom() {
2287        // A BOM-prefixed file (EF BB BF + `---\n...`) is walked and indexed by
2288        // `dbmd index` (store/index strip the BOM) but, before the fix, every
2289        // write/edit surface routed through `read_file` hard-failed with
2290        // MissingFrontmatter. `split_frontmatter` must now strip a single leading
2291        // U+FEFF and emit a BOM-free body.
2292        let text = "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n";
2293        let parsed = split_frontmatter(text, Path::new("note.md")).unwrap();
2294        assert_eq!(parsed.frontmatter_yaml, "type: note\nsummary: x\n");
2295        // Body never carries the BOM forward into the canonical writer.
2296        assert_eq!(parsed.body, "body\n");
2297        assert!(!parsed.body.starts_with('\u{feff}'));
2298    }
2299
2300    #[test]
2301    fn regression_read_file_parses_bom_prefixed_file() {
2302        // End-to-end through the same `read_file` path `dbmd fm get/set`,
2303        // `format`, `link`, and `write` use. Before the fix this returned
2304        // Err(MissingFrontmatter) on a file the catalog had already indexed.
2305        let dir = tempdir().unwrap();
2306        let path = dir.path().join("note.md");
2307        std::fs::write(&path, "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n").unwrap();
2308
2309        let (fm, body) = read_file(&path).expect("BOM-prefixed file must parse");
2310        assert_eq!(fm.type_.as_deref(), Some("note"));
2311        assert_eq!(fm.summary.as_deref(), Some("x"));
2312        assert_eq!(body, "body\n");
2313    }
2314
2315    #[test]
2316    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
2317        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
2318        // *unquoted* inline `company: [[records/companies/northstar]]`
2319        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
2320        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
2321        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
2322        //     company:
2323        //     - - records/companies/northstar
2324        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
2325        // any `fm set` / `link` write) silently destroyed the link.
2326        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
2327        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2328        // Sanity: it really parsed as the nested sequence, not a string.
2329        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2330
2331        let out = fm.to_yaml();
2332        // The link must survive as a quoted inline scalar — brackets intact, and
2333        // never the bracket-less block sequence `- - records/...`.
2334        assert!(
2335            out.contains("[[records/companies/northstar]]"),
2336            "canonical writer dropped the wiki-link brackets; got:\n{out}"
2337        );
2338        assert!(
2339            !out.contains("- - "),
2340            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
2341        );
2342
2343        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
2344        // one link with the right target (the edge graph/backlinks rely on).
2345        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
2346        let fields = reparsed.link_fields();
2347        let links: Vec<(&str, &str, Option<&str>)> = fields
2348            .iter()
2349            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2350            .collect();
2351        assert_eq!(
2352            links,
2353            vec![("company", "records/companies/northstar", None)]
2354        );
2355
2356        // A second re-emit is a fixed point — no progressive corruption across
2357        // repeated curator-loop writes.
2358        assert_eq!(
2359            reparsed.to_yaml(),
2360            out,
2361            "to_yaml is not idempotent on links"
2362        );
2363    }
2364
2365    #[test]
2366    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
2367        // The `|display` segment must survive the unquoted-inline round-trip too.
2368        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
2369        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2370        let out = fm.to_yaml();
2371        assert!(
2372            out.contains("[[records/companies/northstar|Northstar]]"),
2373            "display segment lost on round-trip; got:\n{out}"
2374        );
2375        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
2376        let f = reparsed.link_fields();
2377        assert_eq!(f.len(), 1);
2378        assert_eq!(f[0].1.target, "records/companies/northstar");
2379        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
2380    }
2381
2382    #[test]
2383    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
2384        // A genuine quoted block list of links round-trips as a clean string
2385        // list — never collapsed to a scalar — and a plain nested sequence that
2386        // is NOT a wiki-link is left exactly as written (no false conversion).
2387        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
2388        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2389        let out = fm.to_yaml();
2390
2391        // Both attendee links survive as quoted strings.
2392        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
2393        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
2394
2395        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
2396        let fields = reparsed.link_fields();
2397        let attendees: Vec<&str> = fields
2398            .iter()
2399            .filter(|(k, _)| k == "attendees")
2400            .map(|(_, l)| l.target.as_str())
2401            .collect();
2402        assert_eq!(
2403            attendees,
2404            vec!["records/contacts/elena", "records/contacts/sarah"]
2405        );
2406        // The non-link nested sequence is preserved verbatim, not touched.
2407        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
2408    }
2409
2410    // ── read_file / write_file round-trip ────────────────────────────────────
2411
2412    #[test]
2413    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
2414        let dir = tempdir().unwrap();
2415        let path = dir.path().join("sources/emails/x.md");
2416        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
2417        let mut fm = Frontmatter {
2418            type_: Some("email".into()),
2419            summary: Some("renewal note".into()),
2420            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
2421            ..Default::default()
2422        };
2423        fm.extra
2424            .insert("from".into(), Value::String("elena@northstar.io".into()));
2425
2426        write_file(&path, &fm, body).unwrap();
2427
2428        let (read_fm, read_body) = read_file(&path).unwrap();
2429        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
2430        assert_eq!(read_fm.type_.as_deref(), Some("email"));
2431        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
2432        assert_eq!(
2433            read_fm.extra.get("from").and_then(|v| v.as_str()),
2434            Some("elena@northstar.io")
2435        );
2436        // The on-disk file starts with a fence and ends with the verbatim body.
2437        let raw = std::fs::read_to_string(&path).unwrap();
2438        assert!(raw.starts_with("---\n"));
2439        assert!(raw.ends_with(body));
2440    }
2441
2442    #[test]
2443    fn roundtrip_modify_summary_then_write_changes_only_summary() {
2444        let dir = tempdir().unwrap();
2445        let path = dir.path().join("records/contacts/sarah.md");
2446        let body = "Long-form operator notes about Sarah.\n";
2447        let fm = Frontmatter {
2448            type_: Some("contact".into()),
2449            summary: Some("old summary".into()),
2450            ..Default::default()
2451        };
2452        write_file(&path, &fm, body).unwrap();
2453
2454        // Read → modify summary → write back.
2455        let (mut fm2, body2) = read_file(&path).unwrap();
2456        fm2.summary = Some("new summary".into());
2457        write_file(&path, &fm2, &body2).unwrap();
2458
2459        let (fm3, body3) = read_file(&path).unwrap();
2460        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
2461        assert_eq!(fm3.type_.as_deref(), Some("contact"));
2462        assert_eq!(body3, body, "body unchanged across the round-trip");
2463    }
2464
2465    #[test]
2466    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
2467        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
2468        // a hand-written file carrying the canonical UNQUOTED scalar link
2469        // `company: [[records/companies/northstar]]`, read from disk then written
2470        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
2471        // value to the bracket-less block sequence `company:\n- - records/...`,
2472        // and every reader (validate/graph/backlinks) then lost the edge.
2473        let dir = tempdir().unwrap();
2474        let path = dir.path().join("records/contacts/sarah-chen.md");
2475        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
2476        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2477        std::fs::write(&path, file).unwrap();
2478
2479        // Read → write back unchanged (the canonical no-op re-emit).
2480        let (fm, body) = read_file(&path).unwrap();
2481        write_file(&path, &fm, &body).unwrap();
2482
2483        // On-disk bytes still carry the bracketed link, never `- - records/...`.
2484        let raw = std::fs::read_to_string(&path).unwrap();
2485        assert!(
2486            raw.contains("[[records/companies/northstar]]"),
2487            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
2488        );
2489        assert!(
2490            !raw.contains("- - "),
2491            "on-disk value became a nested block sequence; got:\n{raw}"
2492        );
2493
2494        // And the edge is still readable after the round-trip.
2495        let (fm2, _) = read_file(&path).unwrap();
2496        let fields = fm2.link_fields();
2497        let links: Vec<(&str, &str)> = fields
2498            .iter()
2499            .map(|(k, l)| (k.as_str(), l.target.as_str()))
2500            .collect();
2501        assert_eq!(links, vec![("company", "records/companies/northstar")]);
2502    }
2503
2504    #[test]
2505    fn write_file_does_not_leave_temp_files_behind() {
2506        let dir = tempdir().unwrap();
2507        let path = dir.path().join("records/x.md");
2508        let fm = Frontmatter {
2509            type_: Some("note".into()),
2510            ..Default::default()
2511        };
2512        write_file(&path, &fm, "body\n").unwrap();
2513        // The directory should contain only the target file, no `.x.md.tmp.*`.
2514        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
2515            .unwrap()
2516            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
2517            .collect();
2518        assert_eq!(entries, vec!["x.md".to_string()]);
2519    }
2520
2521    // ── is_content_file ──────────────────────────────────────────────────────
2522
2523    #[test]
2524    fn is_content_file_recognizes_layers_and_excludes_meta() {
2525        assert!(Frontmatter::is_content_file(Path::new(
2526            "sources/emails/2026-05-22.md"
2527        )));
2528        assert!(Frontmatter::is_content_file(Path::new(
2529            "records/contacts/sarah-chen.md"
2530        )));
2531        // A synthesis profile the agent authored lives under `records/` (the
2532        // old `wiki/` layer is gone, so a `wiki/...` path is NOT content).
2533        assert!(Frontmatter::is_content_file(Path::new(
2534            "records/profiles/sarah-chen.md"
2535        )));
2536        assert!(!Frontmatter::is_content_file(Path::new(
2537            "wiki/people/sarah-chen.md"
2538        )));
2539        // Absolute paths under a layer are still content.
2540        assert!(Frontmatter::is_content_file(Path::new(
2541            "/home/db/records/companies/northstar.md"
2542        )));
2543        // index.md at any level is meta.
2544        assert!(!Frontmatter::is_content_file(Path::new(
2545            "records/contacts/index.md"
2546        )));
2547        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
2548        // Root meta files.
2549        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
2550        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
2551    }
2552
2553    // ── effective_id ─────────────────────────────────────────────────────────
2554
2555    #[test]
2556    fn effective_id_prefers_explicit_then_derives_from_path() {
2557        let with_id = Frontmatter {
2558            id: Some("explicit-id".into()),
2559            ..Default::default()
2560        };
2561        assert_eq!(
2562            with_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2563            "explicit-id"
2564        );
2565        let no_id = Frontmatter::default();
2566        assert_eq!(
2567            no_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2568            "sarah-chen"
2569        );
2570    }
2571
2572    // ── get / set ────────────────────────────────────────────────────────────
2573
2574    #[test]
2575    fn set_routes_universal_and_custom_keys() {
2576        let mut fm = Frontmatter::default();
2577        fm.set("type", "contact").unwrap();
2578        fm.set("summary", "hi").unwrap();
2579        fm.set("company", "[[records/companies/northstar]]")
2580            .unwrap();
2581        assert_eq!(fm.type_.as_deref(), Some("contact"));
2582        assert_eq!(fm.summary.as_deref(), Some("hi"));
2583        // Custom key landed in extra, not a typed slot.
2584        assert_eq!(
2585            fm.extra.get("company").and_then(|v| v.as_str()),
2586            Some("[[records/companies/northstar]]")
2587        );
2588        // get reads from both typed fields and extra.
2589        assert_eq!(
2590            fm.get("type").and_then(|v| v.as_str().map(String::from)),
2591            Some("contact".into())
2592        );
2593        assert_eq!(
2594            fm.get("company").and_then(|v| v.as_str().map(String::from)),
2595            Some("[[records/companies/northstar]]".into())
2596        );
2597        assert!(fm.get("nonexistent").is_none());
2598    }
2599
2600    #[test]
2601    fn set_timestamp_validates_rfc3339() {
2602        let mut fm = Frontmatter::default();
2603        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
2604        assert!(fm.created.is_some());
2605        let err = fm.set("updated", "not-a-date").unwrap_err();
2606        assert!(matches!(err, ParseError::BadTimestamp { .. }));
2607    }
2608
2609    // ── extract_wiki_links ───────────────────────────────────────────────────
2610
2611    #[test]
2612    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
2613        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[records/profiles/sarah-chen.md|Sarah]].\n";
2614        let links = extract_wiki_links(body, Path::new("doc.md"));
2615        assert_eq!(links.len(), 3);
2616
2617        // Full path, no extension, no display.
2618        assert_eq!(links[0].target, "records/contacts/sarah-chen");
2619        assert!(links[0].is_full_path);
2620        assert!(!links[0].has_md_extension);
2621        assert_eq!(links[0].display, None);
2622        assert_eq!(links[0].location.1, 1, "first link on line 1");
2623
2624        // Short form: not a full path.
2625        assert_eq!(links[1].target, "sarah-chen");
2626        assert!(!links[1].is_full_path, "bare target is short-form");
2627
2628        // Full path WITH .md extension and a display override on line 2.
2629        assert_eq!(links[2].target, "records/profiles/sarah-chen.md");
2630        assert!(links[2].is_full_path);
2631        assert!(links[2].has_md_extension);
2632        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2633        assert_eq!(links[2].location.1, 2);
2634    }
2635
2636    #[test]
2637    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2638        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
2639        let body = "café [[records/x/y]]";
2640        let links = extract_wiki_links(body, Path::new("d.md"));
2641        assert_eq!(links.len(), 1);
2642        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
2643        assert_eq!(links[0].location.2, 6);
2644    }
2645
2646    #[test]
2647    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
2648        let links = extract_wiki_links(
2649            "records/contacts/sarah-chen is not a link",
2650            Path::new("d.md"),
2651        );
2652        assert!(links.is_empty());
2653    }
2654
2655    // ── extract_markdown_links ───────────────────────────────────────────────
2656
2657    #[test]
2658    fn extract_markdown_links_captures_external_and_not_wiki_links() {
2659        let body =
2660            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
2661        let md = extract_markdown_links(body, Path::new("d.md"));
2662        assert_eq!(
2663            md.len(),
2664            1,
2665            "wiki-link must not be captured as a markdown link"
2666        );
2667        assert_eq!(md[0].text, "the thread");
2668        assert_eq!(md[0].url, "https://x.com/a");
2669        assert_eq!(md[0].location.1, 1);
2670
2671        // And the wiki-link extractor must not pick up the markdown link.
2672        let wl = extract_wiki_links(body, Path::new("d.md"));
2673        assert_eq!(wl.len(), 1);
2674        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
2675    }
2676
2677    // ── link_fields ──────────────────────────────────────────────────────────
2678
2679    #[test]
2680    fn link_fields_extracts_scalar_list_and_summary_links() {
2681        // The canonical list form quotes each item so YAML parses it as clean
2682        // strings; a scalar field may be quoted OR written in the canonical
2683        // unquoted inline form `company: [[x]]` (SPEC § Linking).
2684        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
2685        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2686        // Sanity: company really did parse as a scalar string here.
2687        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
2688        let fields = fm.link_fields();
2689
2690        // company (scalar) once, with the right target.
2691        let company: Vec<&str> = fields
2692            .iter()
2693            .filter(|(k, _)| k == "company")
2694            .map(|(_, l)| l.target.as_str())
2695            .collect();
2696        assert_eq!(company, vec!["records/companies/northstar"]);
2697        // attendees (block list) twice.
2698        let attendees: Vec<&str> = fields
2699            .iter()
2700            .filter(|(k, _)| k == "attendees")
2701            .map(|(_, l)| l.target.as_str())
2702            .collect();
2703        assert_eq!(
2704            attendees,
2705            vec!["records/contacts/elena", "records/contacts/sarah"]
2706        );
2707        // summary link surfaced.
2708        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
2709        // Plain-text field is not a link.
2710        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
2711    }
2712
2713    #[test]
2714    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
2715        // Regression: the canonical scalar wiki-link form is the *unquoted*
2716        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
2717        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
2718        // a naive `as_str()`-only walk drops it. link_fields() must still
2719        // surface exactly one link with the correct target.
2720        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
2721        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2722        // Sanity: it really did parse as the nested sequence form, NOT a string.
2723        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2724
2725        let fields = fm.link_fields();
2726        let links: Vec<(&str, &str, Option<&str>)> = fields
2727            .iter()
2728            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2729            .collect();
2730        assert_eq!(
2731            links,
2732            vec![("company", "records/companies/northstar", None)]
2733        );
2734
2735        // The `|display` segment survives the unquoted inline form too.
2736        let fm2 = Frontmatter::parse(
2737            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
2738            Path::new("m.md"),
2739        )
2740        .unwrap();
2741        let f2 = fm2.link_fields();
2742        assert_eq!(f2.len(), 1);
2743        assert_eq!(f2[0].0, "company");
2744        assert_eq!(f2[0].1.target, "records/companies/northstar");
2745        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
2746    }
2747
2748    #[test]
2749    fn link_fields_ignores_plain_one_item_flow_list() {
2750        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
2751        // — one nesting level shallower than an unquoted `[[foo]]` — and must
2752        // NOT be mistaken for a wiki-link.
2753        let yaml = "type: contact\naliases: [foo]";
2754        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2755        assert_eq!(fm.link_fields(), Vec::new());
2756    }
2757
2758    // ── detect_flow_form_link_lists ──────────────────────────────────────────
2759
2760    #[test]
2761    fn detect_flow_form_flags_list_misencodings_not_scalars() {
2762        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
2763        // inline wiki-link (double-nested) is NOT.
2764        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
2765        let flagged = detect_flow_form_link_lists(bad);
2766        assert_eq!(flagged, vec!["attendees".to_string()]);
2767
2768        // An UNquoted block list is also a mis-encoding (parses triple-nested).
2769        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
2770        assert_eq!(
2771            detect_flow_form_link_lists(unquoted_block),
2772            vec!["attendees".to_string()]
2773        );
2774
2775        // The canonical QUOTED block form parses to clean strings — NOT flagged.
2776        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
2777        assert!(detect_flow_form_link_lists(good).is_empty());
2778
2779        // A plain scalar list of strings is not flagged.
2780        let plain = "tags: [a, b, c]";
2781        assert!(detect_flow_form_link_lists(plain).is_empty());
2782    }
2783
2784    // ── extract_sections ─────────────────────────────────────────────────────
2785
2786    #[test]
2787    fn extract_sections_levels_nesting_and_boundaries() {
2788        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
2789        let secs = extract_sections(body);
2790        let headings: Vec<(&str, u8)> =
2791            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
2792        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
2793
2794        // "First" (H2) body extends through its H3 child, stopping at "Second".
2795        let first = &secs[0];
2796        assert!(first.body.contains("alpha"));
2797        assert!(first.body.contains("### Sub"));
2798        assert!(first.body.contains("beta"));
2799        assert!(!first.body.contains("Second"));
2800
2801        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
2802        let sub = &secs[1];
2803        assert!(sub.body.contains("beta"));
2804        assert!(!sub.body.contains("gamma"));
2805
2806        // 1-based line numbers within the body.
2807        assert_eq!(first.line, 2);
2808        assert_eq!(secs[2].line, 6);
2809    }
2810
2811    #[test]
2812    fn extract_sections_ignores_headings_in_fenced_code() {
2813        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
2814        let secs = extract_sections(body);
2815        assert_eq!(secs.len(), 1);
2816        assert_eq!(secs[0].heading, "Real");
2817        // The fenced "## Fake" is part of Real's body, not its own section.
2818        assert!(secs[0].body.contains("## Fake heading in code"));
2819    }
2820
2821    // ── parse_field_spec ─────────────────────────────────────────────────────
2822
2823    #[test]
2824    fn parse_field_spec_required_and_shape() {
2825        let f = parse_field_spec("- email (required, email)");
2826        assert_eq!(f.name, "email");
2827        assert!(f.required);
2828        assert_eq!(f.shape, Some(Shape::Email));
2829        assert!(f.unknown_modifiers.is_empty());
2830    }
2831
2832    #[test]
2833    fn parse_field_spec_link_prefix_strips_trailing_slash() {
2834        let f = parse_field_spec("- company (required, link to records/companies/)");
2835        assert!(f.required);
2836        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
2837        assert_eq!(f.shape, None);
2838    }
2839
2840    #[test]
2841    fn parse_field_spec_default_preserves_case_and_value() {
2842        let f = parse_field_spec("- currency (default USD)");
2843        assert_eq!(f.name, "currency");
2844        assert_eq!(f.default, Some(Value::String("USD".into())));
2845    }
2846
2847    #[test]
2848    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
2849        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
2850        assert!(f.required);
2851        assert_eq!(
2852            f.enum_values,
2853            Some(vec![
2854                "open".to_string(),
2855                "closed".to_string(),
2856                "pending".to_string()
2857            ])
2858        );
2859    }
2860
2861    #[test]
2862    fn parse_field_spec_bare_enum_keyword_is_not_itself_a_value() {
2863        // `enum` with no colon: the values are the remaining tokens; the keyword
2864        // itself must NOT leak in as an allowed value.
2865        let f = parse_field_spec("- status (required, enum, open, closed)");
2866        assert!(f.required);
2867        assert_eq!(
2868            f.enum_values,
2869            Some(vec!["open".to_string(), "closed".to_string()])
2870        );
2871    }
2872
2873    #[test]
2874    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
2875        let f = parse_field_spec("- weird (required, frobnicate, string)");
2876        assert!(f.required);
2877        assert_eq!(f.shape, Some(Shape::String));
2878        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
2879    }
2880
2881    #[test]
2882    fn parse_field_spec_no_parens_is_freeform_optional() {
2883        let f = parse_field_spec("- nickname");
2884        assert_eq!(f.name, "nickname");
2885        assert!(!f.required);
2886        assert_eq!(f.shape, None);
2887        assert!(f.link_prefix.is_none());
2888        assert!(f.enum_values.is_none());
2889        assert!(f.unknown_modifiers.is_empty());
2890    }
2891
2892    // ── parse_schema_bullet (directives) ─────────────────────────────────────
2893
2894    #[test]
2895    fn schema_bullet_unique_single_field() {
2896        match parse_schema_bullet("- unique: email") {
2897            SchemaBullet::Unique(fields) => assert_eq!(fields, vec!["email".to_string()]),
2898            other => panic!("expected Unique, got {other:?}"),
2899        }
2900    }
2901
2902    #[test]
2903    fn schema_bullet_unique_compound_trims_and_splits() {
2904        match parse_schema_bullet("- unique: date, amount , vendor") {
2905            SchemaBullet::Unique(fields) => assert_eq!(
2906                fields,
2907                vec![
2908                    "date".to_string(),
2909                    "amount".to_string(),
2910                    "vendor".to_string()
2911                ]
2912            ),
2913            other => panic!("expected Unique, got {other:?}"),
2914        }
2915    }
2916
2917    #[test]
2918    fn schema_bullet_summary_template_keeps_braces_and_inner_colons() {
2919        match parse_schema_bullet("- summary_template: {role} at {company} (x: y)") {
2920            SchemaBullet::SummaryTemplate(t) => assert_eq!(t, "{role} at {company} (x: y)"),
2921            other => panic!("expected SummaryTemplate, got {other:?}"),
2922        }
2923    }
2924
2925    #[test]
2926    fn schema_bullet_field_with_enum_modifier_is_not_a_directive() {
2927        // A field whose modifiers contain a colon (`enum:`) parses as a field, not
2928        // a directive — its head has a `(` before any `:`.
2929        match parse_schema_bullet("- status (enum: open, closed)") {
2930            SchemaBullet::Field(f) => {
2931                assert_eq!(f.name, "status");
2932                assert_eq!(
2933                    f.enum_values,
2934                    Some(vec!["open".to_string(), "closed".to_string()])
2935                );
2936            }
2937            other => panic!("expected Field, got {other:?}"),
2938        }
2939    }
2940
2941    #[test]
2942    fn parse_db_md_schema_captures_unique_and_summary_template() {
2943        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### contact\n- email (required, email)\n- unique: email\n- summary_template: {role} at {company}\n";
2944        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2945        let s = config.schemas.get("contact").expect("contact schema");
2946        assert_eq!(s.fields.len(), 1, "directives are not parsed as fields");
2947        assert_eq!(s.unique_keys, vec![vec!["email".to_string()]]);
2948        assert_eq!(s.summary_template.as_deref(), Some("{role} at {company}"));
2949    }
2950
2951    #[test]
2952    fn schema_bullet_shard_directive_parses_values() {
2953        assert!(matches!(
2954            parse_schema_bullet("- shard: by-date"),
2955            SchemaBullet::Shard(Some(true))
2956        ));
2957        assert!(matches!(
2958            parse_schema_bullet("- shard: flat"),
2959            SchemaBullet::Shard(Some(false))
2960        ));
2961        // An unrecognized value is ignored (None), like an unknown modifier.
2962        assert!(matches!(
2963            parse_schema_bullet("- shard: weekly"),
2964            SchemaBullet::Shard(None)
2965        ));
2966        // A field whose name has a `(` before any `:` is still a field — the same
2967        // guard that keeps `- status (enum: a, b)` a field, not a directive.
2968        assert!(matches!(
2969            parse_schema_bullet("- shardiness (string)"),
2970            SchemaBullet::Field(_)
2971        ));
2972    }
2973
2974    #[test]
2975    fn parse_db_md_schema_captures_shard_directive() {
2976        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### shipment\n- carrier (string)\n- shard: by-date\n\n### contact\n- shard: flat\n";
2977        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2978        let shipment = config.schemas.get("shipment").expect("shipment schema");
2979        assert_eq!(shipment.shard, Some(true));
2980        assert_eq!(
2981            shipment.fields.len(),
2982            1,
2983            "`shard:` is a directive, not a field"
2984        );
2985        assert_eq!(config.schemas.get("contact").unwrap().shard, Some(false));
2986    }
2987
2988    // ── parse_db_md ──────────────────────────────────────────────────────────
2989
2990    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `wiki/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
2991
2992    #[test]
2993    fn parse_db_md_extracts_all_canonical_sections() {
2994        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
2995
2996        // Agent instructions: free-form prose, heading line stripped.
2997        let ai = config
2998            .agent_instructions
2999            .expect("agent instructions present");
3000        assert!(ai.starts_with("Prioritize creating"));
3001        assert!(!ai.contains("## Agent instructions"));
3002
3003        // Frozen pages: paths extracted from backticked bullets, comments dropped.
3004        assert_eq!(
3005            config.frozen_pages,
3006            vec![
3007                PathBuf::from("records/decisions/2026-q1-strategy.md"),
3008                PathBuf::from("wiki/synthesis/2026-annual-plan.md"),
3009            ]
3010        );
3011
3012        // Ignored types: comma list, backticks/comment stripped.
3013        assert_eq!(
3014            config.ignored_types,
3015            vec!["test".to_string(), "temp".to_string()]
3016        );
3017
3018        // Schemas: two types, each with its fields in source order.
3019        assert_eq!(config.schemas.len(), 2);
3020        let contact = config.schemas.get("contact").expect("contact schema");
3021        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
3022        assert_eq!(names, vec!["name", "email", "company", "role"]);
3023        assert!(contact.fields[0].required); // name
3024        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
3025        assert_eq!(
3026            contact.fields[2].link_prefix,
3027            Some(PathBuf::from("records/companies"))
3028        ); // company
3029
3030        let expense = config.schemas.get("expense").expect("expense schema");
3031        let cur = expense
3032            .fields
3033            .iter()
3034            .find(|f| f.name == "currency")
3035            .unwrap();
3036        assert_eq!(cur.default, Some(Value::String("USD".into())));
3037    }
3038
3039    #[test]
3040    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
3041        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
3042        // unknown modifier, and bullets that appear with NO `### <type>`
3043        // heading (so they belong to no schema and are dropped).
3044        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
3045        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
3046
3047        // The orphan bullet under `## Schemas` with no `### type` heading is not
3048        // captured as a schema.
3049        assert_eq!(config.schemas.len(), 1);
3050        let ticket = config.schemas.get("ticket").expect("ticket schema");
3051        assert_eq!(ticket.fields.len(), 2);
3052
3053        let priority = &ticket.fields[0];
3054        assert!(priority.required);
3055        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
3056        assert_eq!(
3057            priority.enum_values,
3058            Some(vec!["low".to_string(), "high".to_string()])
3059        );
3060
3061        // A bullet with an unclosed paren still yields a usable name.
3062        let broken = &ticket.fields[1];
3063        assert_eq!(broken.name, "broken");
3064    }
3065
3066    #[test]
3067    fn parse_db_md_missing_frontmatter_errors() {
3068        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
3069        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
3070        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
3071    }
3072
3073    #[test]
3074    fn parse_db_md_absent_sections_default_empty() {
3075        let text = "---\ntype: db-md\n---\n\n# Title only\n";
3076        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
3077        assert_eq!(config, Config::default());
3078    }
3079
3080    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
3081
3082    /// `Frontmatter::set` is the value path every write surface (`fm set`,
3083    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
3084    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
3085    /// quoted links — readable back by [`links_in_field_value`] and accepted by
3086    /// `dbmd validate` — never the flow-form scalar string that trips
3087    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
3088    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
3089    #[test]
3090    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
3091        for value in [
3092            "[[[records/contacts/a]], [[records/contacts/b]]]",
3093            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
3094        ] {
3095            let mut fm = Frontmatter::default();
3096            fm.set("attendees", value).unwrap();
3097
3098            // Stored as a 2-element sequence of clean quoted links.
3099            let stored = fm.extra.get("attendees").expect("attendees set");
3100            let Value::Sequence(items) = stored else {
3101                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
3102            };
3103            assert_eq!(items.len(), 2, "input {value}");
3104            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
3105            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
3106
3107            // The edge enumerator reads exactly the two links back (no stray
3108            // bracket targets, the flow-form-string symptom).
3109            let links: Vec<_> = links_in_field_value(stored)
3110                .into_iter()
3111                .map(|l| l.target)
3112                .collect();
3113            assert_eq!(
3114                links,
3115                vec!["records/contacts/a", "records/contacts/b"],
3116                "input {value}"
3117            );
3118
3119            // And the canonical writer renders it block-style, not as a scalar.
3120            let yaml = fm.to_yaml();
3121            assert!(
3122                yaml.contains("attendees:\n"),
3123                "expected block list in:\n{yaml}"
3124            );
3125            assert!(
3126                !yaml.contains("attendees: '[["),
3127                "must not be a flow-form scalar string in:\n{yaml}"
3128            );
3129        }
3130    }
3131
3132    /// A *single* inline wiki-link stays a scalar string (renders inline
3133    /// `field: [[x]]`), and a single link must never be widened to a one-item
3134    /// list — preserving the common `contact.company` / `expense.vendor` shape.
3135    #[test]
3136    fn set_single_inline_wiki_link_stays_scalar() {
3137        let mut fm = Frontmatter::default();
3138        fm.set("company", "[[records/companies/tideform]]").unwrap();
3139        assert_eq!(
3140            fm.extra.get("company"),
3141            Some(&Value::String("[[records/companies/tideform]]".into())),
3142        );
3143        // Still recognized as one link.
3144        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
3145            .into_iter()
3146            .map(|l| l.target)
3147            .collect();
3148        assert_eq!(links, vec!["records/companies/tideform"]);
3149    }
3150
3151    /// Plain text and a non-link flow list are left as verbatim scalar strings —
3152    /// the list normalization only triggers when every item is a clean wiki-link.
3153    #[test]
3154    fn set_non_link_values_stay_scalar_strings() {
3155        let mut fm = Frontmatter::default();
3156        fm.set("location", "Video call (remote)").unwrap();
3157        assert_eq!(
3158            fm.extra.get("location"),
3159            Some(&Value::String("Video call (remote)".into())),
3160        );
3161
3162        // A flow list whose items are NOT wiki-links must not be reinterpreted as
3163        // a link sequence; it stays the scalar string the agent passed.
3164        fm.set("note", "[draft, wip]").unwrap();
3165        assert_eq!(
3166            fm.extra.get("note"),
3167            Some(&Value::String("[draft, wip]".into()))
3168        );
3169    }
3170
3171    // ── Regression: non-string YAML keys round-trip (no Rust Debug corruption) ─
3172
3173    #[test]
3174    fn regression_non_string_yaml_keys_keep_their_text_on_round_trip() {
3175        // A numeric/bool/null/float frontmatter key is valid YAML and must NOT be
3176        // rewritten to its Rust `Debug` form (`Number(2026)`, `Bool(true)`,
3177        // `'Null'`). After the fix the key text survives (the key narrows to a
3178        // string-typed key, but the operator's data is no longer corrupted).
3179        let yaml = "type: note\n2026: planning notes\ntrue: yes-key\n3.14: f\n";
3180        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3181        // Keys are stored as their scalar text, not the Debug string.
3182        assert!(fm.extra.contains_key("2026"), "numeric key text lost");
3183        assert!(fm.extra.contains_key("true"), "bool key text lost");
3184        assert!(fm.extra.contains_key("3.14"), "float key text lost");
3185        assert!(!fm.extra.keys().any(|k| k.starts_with("Number(")));
3186        assert!(!fm.extra.keys().any(|k| k.starts_with("Bool(")));
3187
3188        // And a re-emit never produces the Debug forms on disk.
3189        let out = fm.to_yaml();
3190        assert!(!out.contains("Number("), "Debug-form key emitted:\n{out}");
3191        assert!(!out.contains("Bool("), "Debug-form key emitted:\n{out}");
3192        // The key text is still present (quoted, since it now reads as a string).
3193        assert!(out.contains("2026"), "numeric key dropped:\n{out}");
3194        assert!(out.contains("planning notes"), "value dropped:\n{out}");
3195    }
3196
3197    // ── Regression: universal-key sequence/mapping values are preserved (#2) ───
3198
3199    #[test]
3200    fn regression_universal_key_non_scalar_value_is_preserved_not_deleted() {
3201        // A universal key carrying a sequence/mapping (`status: [active, draft]`)
3202        // is not a valid scalar for that field. Before the fix, the matched arm
3203        // consumed-and-dropped it (scalar_string -> None) and `to_yaml` then
3204        // omitted the field — `dbmd format` silently DELETED it. It must now pass
3205        // through `extra` and re-emit verbatim.
3206        let yaml = "type: note\nstatus:\n  - active\n  - draft\nsummary:\n  a: 1\n  b: 2\n";
3207        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3208        // The typed accessors stay None (no valid scalar), but the data lives in
3209        // extra so nothing is lost.
3210        assert!(fm.status.is_none());
3211        assert!(fm.summary.is_none());
3212        assert!(fm.extra.contains_key("status"), "status value destroyed");
3213        assert!(fm.extra.contains_key("summary"), "summary value destroyed");
3214
3215        // A re-emit keeps both fields' data on disk.
3216        let out = fm.to_yaml();
3217        assert!(out.contains("status"), "status deleted on re-emit:\n{out}");
3218        assert!(out.contains("active"), "status items deleted:\n{out}");
3219        assert!(
3220            out.contains("summary"),
3221            "summary deleted on re-emit:\n{out}"
3222        );
3223
3224        // Round-trips as a fixed point — repeated curator-loop writes don't lose
3225        // the data.
3226        let reparsed = Frontmatter::parse(&out, Path::new("x.md")).unwrap();
3227        assert!(reparsed.extra.contains_key("status"));
3228        assert!(reparsed.extra.contains_key("summary"));
3229    }
3230
3231    // ── Regression: non-scalar tags items don't erase the tags field (#5) ──────
3232
3233    #[test]
3234    fn regression_non_scalar_tags_value_is_preserved_not_erased() {
3235        // `tags: [[vip]]` (an authoring slip — wiki-link brackets around a tag)
3236        // parses to a nested sequence; before the fix `parse_tags` filtered the
3237        // non-scalar item out and `to_yaml` then omitted the now-empty tags vec,
3238        // silently DELETING the tags line. It must now survive the re-emit (the
3239        // key data is preserved; the field is never dropped).
3240        let yaml = "type: note\ntags: [[vip]]\n";
3241        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3242        // The typed tags vec is empty (no clean scalar list), but the raw value
3243        // is preserved in extra so nothing is destroyed.
3244        assert!(fm.tags.is_empty());
3245        assert!(fm.extra.contains_key("tags"), "tags value destroyed");
3246
3247        let out = fm.to_yaml();
3248        assert!(out.contains("tags"), "tags deleted on re-emit:\n{out}");
3249        // The `vip` text survives on disk in some form (never erased).
3250        assert!(out.contains("vip"), "tag content erased:\n{out}");
3251
3252        // A clean tag list still parses to the typed vec (not regressed).
3253        let clean =
3254            Frontmatter::parse("type: note\ntags: [vip, renewal]\n", Path::new("x.md")).unwrap();
3255        assert_eq!(clean.tags, vec!["vip".to_string(), "renewal".to_string()]);
3256        assert!(!clean.extra.contains_key("tags"));
3257    }
3258
3259    // ── Regression: plain nested string lists are NOT fabricated into links (#3) ─
3260
3261    #[test]
3262    fn regression_plain_nested_string_list_is_not_turned_into_wiki_links() {
3263        // `groups: [[alpha], [beta]]` is the data [["alpha"],["beta"]] — an
3264        // unknown nested string list that must pass through verbatim. Before the
3265        // fix, canonicalize_extra_value fabricated `- '[[alpha]]'` / `- '[[beta]]'`
3266        // (short-form links the tool then flagged), changing the field's type.
3267        let yaml = "type: note\ngroups: [[alpha], [beta]]\n";
3268        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3269        let before = fm.extra.get("groups").cloned();
3270
3271        let out = fm.to_yaml();
3272        // No fabricated wiki-link brackets in the emitted YAML.
3273        assert!(!out.contains("[[alpha]]"), "fabricated a wiki-link:\n{out}");
3274        assert!(!out.contains("[[beta]]"), "fabricated a wiki-link:\n{out}");
3275
3276        // The value is unchanged across the canonical re-emit.
3277        let reparsed = Frontmatter::parse(&out, Path::new("x.md")).unwrap();
3278        assert_eq!(
3279            reparsed.extra.get("groups"),
3280            before.as_ref(),
3281            "nested string list mutated by canonicalize_extra_value"
3282        );
3283        // And it surfaces no links.
3284        assert!(reparsed.link_fields().is_empty());
3285    }
3286
3287    // ── Regression: fence-line trailing whitespace is tolerated (#4) ───────────
3288
3289    #[test]
3290    fn regression_split_frontmatter_tolerates_trailing_whitespace_on_fences() {
3291        // A fence written `--- ` (trailing space — invisible in editors) is
3292        // indexed/validated clean by index.rs/validate.rs (both use `trim_end()`)
3293        // but, before the fix, hard-failed every read/edit surface routed through
3294        // `split_frontmatter`. All three must now agree.
3295        let text = "--- \ntype: note\nsummary: x\n---\t\nbody\n";
3296        let parsed = split_frontmatter(text, Path::new("f.md")).unwrap();
3297        assert_eq!(parsed.frontmatter_yaml, "type: note\nsummary: x\n");
3298        assert_eq!(parsed.body, "body\n");
3299
3300        // End to end through read_file's parse.
3301        let fm = Frontmatter::parse(&parsed.frontmatter_yaml, Path::new("f.md")).unwrap();
3302        assert_eq!(fm.type_.as_deref(), Some("note"));
3303    }
3304
3305    // ── Regression: CommonMark trailing-'#' heading rule (#6) ──────────────────
3306
3307    #[test]
3308    fn regression_heading_text_keeps_abutting_hash_drops_closing_sequence() {
3309        // `## C#` → `C#` (the `#` abuts content, not a closing sequence).
3310        assert_eq!(heading_text("## C#", 2), "C#");
3311        assert_eq!(heading_text("## F#", 2), "F#");
3312        assert_eq!(heading_text("## issue-123#", 2), "issue-123#");
3313        // A genuine ATX closing sequence (space before the `#` run) is dropped.
3314        assert_eq!(heading_text("## Title ##", 2), "Title");
3315        assert_eq!(heading_text("## Title #", 2), "Title");
3316        // All-hashes content collapses to empty.
3317        assert_eq!(heading_text("## ##", 2), "");
3318        // No trailing hashes — unchanged.
3319        assert_eq!(heading_text("## Plain", 2), "Plain");
3320    }
3321
3322    #[test]
3323    fn regression_extract_sections_keeps_csharp_heading_and_schema_type_binds() {
3324        // `dbmd sections` must report `C#`, not `C`.
3325        let secs = extract_sections("## C#\nbody\n");
3326        assert_eq!(secs.len(), 1);
3327        assert_eq!(secs[0].heading, "C#");
3328
3329        // And a `### c#` schema must register under `c#`, not `c`.
3330        let db = "---\ntype: db-md\n---\n\n## Schemas\n\n### c#\n- name (required)\n";
3331        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
3332        assert!(
3333            config.schemas.contains_key("c#"),
3334            "schema bound to wrong key"
3335        );
3336        assert!(!config.schemas.contains_key("c"));
3337    }
3338
3339    // ── Regression: section line numbers offset by the frontmatter block (#7) ──
3340
3341    #[test]
3342    fn regression_extract_sections_in_file_reports_source_line_numbers() {
3343        // A heading on file line 6 (after a 4-line frontmatter block + 1 body
3344        // line) must be reported as L6, not the body-relative L2.
3345        let text = "---\ntype: note\nsummary: x\n---\nbody line\n## Heading\nmore\n";
3346        let secs = extract_sections_in_file(text);
3347        assert_eq!(secs.len(), 1);
3348        assert_eq!(secs[0].heading, "Heading");
3349        assert_eq!(secs[0].line, 6, "section line not offset by frontmatter");
3350
3351        // The body-relative helper is unchanged (validate relies on that frame).
3352        let body_secs = extract_sections("body line\n## Heading\nmore\n");
3353        assert_eq!(body_secs[0].line, 2);
3354
3355        // No frontmatter: whole text is body, no offset.
3356        let plain = extract_sections_in_file("## Top\nx\n## Next\n");
3357        assert_eq!(plain[0].line, 1);
3358        assert_eq!(plain[1].line, 3);
3359    }
3360
3361    // ── Regression: colon-form schema field bullet parses modifiers (#8) ───────
3362
3363    #[test]
3364    fn regression_colon_form_field_bullet_parses_modifiers() {
3365        // `- title: string, required` is the natural mis-spelling of
3366        // `- title (string, required)`; before the fix the whole text became the
3367        // field name and every modifier was silently lost.
3368        let f = parse_field_spec("- title: string, required");
3369        assert_eq!(f.name, "title");
3370        assert!(f.required, "required modifier lost on colon-form");
3371        assert_eq!(f.shape, Some(Shape::String));
3372
3373        // Through the schema-bullet classifier (the real path), it is a Field.
3374        match parse_schema_bullet("- title: string, required") {
3375            SchemaBullet::Field(f) => {
3376                assert_eq!(f.name, "title");
3377                assert!(f.required);
3378                assert_eq!(f.shape, Some(Shape::String));
3379            }
3380            other => panic!("expected Field, got {other:?}"),
3381        }
3382
3383        // A paren form whose modifiers contain a colon still parses by parens.
3384        let g = parse_field_spec("- status (enum: open, closed)");
3385        assert_eq!(g.name, "status");
3386        assert_eq!(
3387            g.enum_values,
3388            Some(vec!["open".to_string(), "closed".to_string()])
3389        );
3390    }
3391
3392    // ── Regression: comma inside a `default` value is preserved (#9) ───────────
3393
3394    #[test]
3395    fn regression_default_value_preserves_internal_commas() {
3396        let f = parse_field_spec("- title (default Director, Operations)");
3397        assert_eq!(
3398            f.default,
3399            Some(Value::String("Director, Operations".into())),
3400            "comma-bearing default truncated"
3401        );
3402
3403        let g = parse_field_spec("- region (default North America, EMEA fallback)");
3404        assert_eq!(
3405            g.default,
3406            Some(Value::String("North America, EMEA fallback".into()))
3407        );
3408
3409        // A single-token default still works (no regression).
3410        let h = parse_field_spec("- currency (default USD)");
3411        assert_eq!(h.default, Some(Value::String("USD".into())));
3412    }
3413
3414    // ── Regression: a `default` after `enum` is parsed, not swallowed (#10) ────
3415
3416    #[test]
3417    fn regression_default_after_enum_is_parsed_not_an_enum_member() {
3418        let f = parse_field_spec("- status (enum: open, closed, default open)");
3419        assert_eq!(
3420            f.enum_values,
3421            Some(vec!["open".to_string(), "closed".to_string()]),
3422            "`default open` leaked into the enum list"
3423        );
3424        assert_eq!(
3425            f.default,
3426            Some(Value::String("open".into())),
3427            "default after enum was dropped"
3428        );
3429
3430        // The bare `enum` keyword form, with a trailing default.
3431        let g = parse_field_spec("- status (enum, open, closed, default open)");
3432        assert_eq!(
3433            g.enum_values,
3434            Some(vec!["open".to_string(), "closed".to_string()])
3435        );
3436        assert_eq!(g.default, Some(Value::String("open".into())));
3437    }
3438
3439    // ── Regression: frozen-page policy does not fail open (#11) ────────────────
3440
3441    #[test]
3442    fn regression_frozen_match_handles_leading_slash() {
3443        let cfg = Config {
3444            frozen_pages: vec![PathBuf::from("/records/decisions/q1.md")],
3445            ..Config::default()
3446        };
3447        assert!(
3448            cfg.is_frozen(Path::new("records/decisions/q1.md")),
3449            "leading-slash entry failed open"
3450        );
3451        assert!(cfg.is_frozen(Path::new("records/decisions/q1")));
3452    }
3453
3454    #[test]
3455    fn regression_frozen_match_supports_globs() {
3456        let cfg = Config {
3457            frozen_pages: vec![PathBuf::from("records/decisions/*")],
3458            ..Config::default()
3459        };
3460        assert!(
3461            cfg.is_frozen(Path::new("records/decisions/q1.md")),
3462            "glob entry failed to protect a concrete file"
3463        );
3464        assert!(cfg.is_frozen(Path::new("records/decisions/q2.md")));
3465        // The glob does not cross a `/` segment.
3466        assert!(!cfg.is_frozen(Path::new("records/decisions/sub/q1.md")));
3467        // `**` crosses segments.
3468        let deep = Config {
3469            frozen_pages: vec![PathBuf::from("records/**")],
3470            ..Config::default()
3471        };
3472        assert!(deep.is_frozen(Path::new("records/decisions/sub/q1.md")));
3473        assert!(deep.is_frozen(Path::new("records/x.md")));
3474        assert!(!deep.is_frozen(Path::new("wiki/x.md")));
3475        // A `*.md`-style intra-segment glob.
3476        let suffix = Config {
3477            frozen_pages: vec![PathBuf::from("records/decisions/q*")],
3478            ..Config::default()
3479        };
3480        assert!(suffix.is_frozen(Path::new("records/decisions/q1.md")));
3481        assert!(!suffix.is_frozen(Path::new("records/decisions/draft.md")));
3482    }
3483
3484    #[test]
3485    fn regression_frozen_entry_single_hyphen_comment_is_stripped() {
3486        // `records/decisions/q3.md - finalized` (single ASCII hyphen comment, no
3487        // backticks): the comment must be stripped so the entry is just the path.
3488        let path = extract_path_bullet("- records/decisions/q3.md - finalized");
3489        assert_eq!(path, "records/decisions/q3.md");
3490
3491        // End to end: such a bullet freezes the file.
3492        let cfg = Config {
3493            frozen_pages: vec![PathBuf::from(extract_path_bullet(
3494                "- records/decisions/q3.md - finalized",
3495            ))],
3496            ..Config::default()
3497        };
3498        assert!(
3499            cfg.is_frozen(Path::new("records/decisions/q3.md")),
3500            "single-hyphen-comment entry failed open"
3501        );
3502    }
3503}