Skip to main content

dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_norway::{Mapping, Value};
18
19/// The three canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 3] = ["sources", "records", "wiki"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_norway::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `id` — optional; derived from the file path when absent.
81    pub id: Option<String>,
82    /// `created` — RFC3339; required and auto-set on content-file create.
83    pub created: Option<DateTime<FixedOffset>>,
84    /// `updated` — RFC3339; required and auto-maintained on content files.
85    pub updated: Option<DateTime<FixedOffset>>,
86    /// `summary` — the one-line catalog line; required on every content file.
87    pub summary: Option<String>,
88    /// `status` — optional lifecycle state.
89    pub status: Option<String>,
90    /// `tags` — optional flat list of short scalar labels.
91    pub tags: Vec<String>,
92    /// All other frontmatter keys (type-specific + custom), preserved verbatim
93    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
94    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
95    /// [`WikiLink`]s.
96    pub extra: BTreeMap<String, Value>,
97}
98
99impl Frontmatter {
100    /// Parse a YAML frontmatter block (the text between the opening and closing
101    /// `---` fences, exclusive) into a [`Frontmatter`].
102    ///
103    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
104    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
105    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
106        // An empty (or whitespace-only) frontmatter block is a valid, empty
107        // mapping — not a YAML error.
108        let value: Value = if yaml.trim().is_empty() {
109            Value::Mapping(Mapping::new())
110        } else {
111            serde_norway::from_str(yaml).map_err(|source| ParseError::MalformedYaml {
112                file: file.to_path_buf(),
113                source,
114            })?
115        };
116
117        // Top-level frontmatter must be a mapping. A scalar or sequence at the
118        // top level is malformed for our purposes; surface it as such.
119        let map = match value {
120            Value::Mapping(m) => m,
121            Value::Null => Mapping::new(),
122            other => {
123                // serde_norway::Error has no public constructor, so let the
124                // deserializer decide: a value that coerces to a Mapping (e.g. a
125                // YAML-tagged mapping `!tag\n k: v`, where the tag is ambient) is
126                // accepted as that mapping; a genuine scalar or sequence top
127                // level fails to coerce and IS the malformed case. (Using a
128                // match here, not `expect_err`, avoids a panic on the
129                // tagged-mapping case, which deserializes to a Mapping just
130                // fine.)
131                match serde_norway::from_value::<Mapping>(other) {
132                    Ok(m) => m,
133                    Err(source) => {
134                        return Err(ParseError::MalformedYaml {
135                            file: file.to_path_buf(),
136                            source,
137                        });
138                    }
139                }
140            }
141        };
142
143        let mut fm = Frontmatter::default();
144        for (k, v) in map {
145            let key = match k.as_str() {
146                Some(s) => s.to_string(),
147                // Non-string keys are unusual; stringify defensively and keep
148                // them in `extra` so nothing is silently dropped.
149                None => format!("{k:?}"),
150            };
151            match key.as_str() {
152                // Coerce scalar values rather than `v.as_str()` (which is None
153                // for Number/Bool/Null). A bare scalar that YAML reads as a
154                // non-string — `summary: 2026`, `id: 100`, `status: 0` — would
155                // otherwise be set to None AND dropped (it is a matched arm, so
156                // the raw value never reaches `extra`), and `to_yaml` then omits
157                // the None field, so `dbmd format` (read_file -> write_file)
158                // silently deletes the line from disk. `scalar_string` mirrors
159                // the coercion `validate`/`store` already apply to these fields,
160                // so a numeric/bool-looking scalar is preserved as its string
161                // form and round-trips instead of being destroyed.
162                "type" => fm.type_ = scalar_string(&v),
163                "id" => fm.id = scalar_string(&v),
164                "created" => fm.created = parse_timestamp(&v, "created", file)?,
165                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
166                "summary" => fm.summary = scalar_string(&v),
167                "status" => fm.status = scalar_string(&v),
168                "tags" => fm.tags = parse_tags(&v),
169                _ => {
170                    fm.extra.insert(key, v);
171                }
172            }
173        }
174        Ok(fm)
175    }
176
177    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
178    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
179    pub fn to_yaml(&self) -> String {
180        // Build an order-preserving mapping in canonical key order:
181        //   type, id, created, updated, summary  (universal head)
182        //   <type-specific extra, BTreeMap-sorted>
183        //   status, tags                          (universal tail)
184        // serde_norway::Mapping preserves insertion order, so one serialize call
185        // emits the block in exactly this order with correct YAML quoting.
186        let mut map = Mapping::new();
187
188        if let Some(t) = &self.type_ {
189            map.insert(Value::String("type".into()), Value::String(t.clone()));
190        }
191        if let Some(id) = &self.id {
192            map.insert(Value::String("id".into()), Value::String(id.clone()));
193        }
194        if let Some(created) = &self.created {
195            map.insert(
196                Value::String("created".into()),
197                Value::String(created.to_rfc3339()),
198            );
199        }
200        if let Some(updated) = &self.updated {
201            map.insert(
202                Value::String("updated".into()),
203                Value::String(updated.to_rfc3339()),
204            );
205        }
206        if let Some(summary) = &self.summary {
207            map.insert(
208                Value::String("summary".into()),
209                Value::String(summary.clone()),
210            );
211        }
212
213        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
214        // is canonicalized so a wiki-link round-trips to the form the writer and
215        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
216        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
217        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
218        // block sequence `- - x` that a verbatim re-emit would produce and that
219        // destroys the link. See [`canonicalize_extra_value`].
220        for (k, v) in &self.extra {
221            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
222        }
223
224        if let Some(status) = &self.status {
225            map.insert(
226                Value::String("status".into()),
227                Value::String(status.clone()),
228            );
229        }
230        if !self.tags.is_empty() {
231            map.insert(
232                Value::String("tags".into()),
233                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
234            );
235        }
236
237        if map.is_empty() {
238            return String::new();
239        }
240        serde_norway::to_string(&Value::Mapping(map)).unwrap_or_default()
241    }
242
243    /// True if the file is content (under `sources/`, `records/`, or `wiki/`)
244    /// and not an `index.md`. Used by validate to decide which files require a
245    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
246    pub fn is_content_file(path: &Path) -> bool {
247        // index.md is a meta file at every level, never content.
248        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
249            return false;
250        }
251        // Content iff some path component is one of the three layer dirs. This
252        // works for both store-relative (`sources/emails/x.md`) and absolute
253        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
254        // root, under no layer, so they fall through to false.
255        path.components().any(|c| {
256            c.as_os_str()
257                .to_str()
258                .is_some_and(|s| LAYER_DIRS.contains(&s))
259        })
260    }
261
262    /// Resolve the file's effective `id`: the explicit `id` field if present,
263    /// otherwise derived from the store-relative path (filename without `.md`).
264    pub fn effective_id(&self, store_relative_path: &Path) -> String {
265        if let Some(id) = &self.id {
266            if !id.is_empty() {
267                return id.clone();
268            }
269        }
270        // Derived id = filename without the `.md` extension.
271        store_relative_path
272            .file_stem()
273            .and_then(|s| s.to_str())
274            .unwrap_or_default()
275            .to_string()
276    }
277
278    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
279    /// typed fields first and then [`extra`](Frontmatter::extra).
280    pub fn get(&self, key: &str) -> Option<Value> {
281        match key {
282            "type" => self.type_.clone().map(Value::String),
283            "id" => self.id.clone().map(Value::String),
284            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
285            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
286            "summary" => self.summary.clone().map(Value::String),
287            "status" => self.status.clone().map(Value::String),
288            "tags" => {
289                if self.tags.is_empty() {
290                    None
291                } else {
292                    Some(Value::Sequence(
293                        self.tags.iter().cloned().map(Value::String).collect(),
294                    ))
295                }
296            }
297            _ => self.extra.get(key).cloned(),
298        }
299    }
300
301    /// Set a single frontmatter key from a string value, routing universal-
302    /// contract keys to their typed fields and everything else to
303    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
304    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
305        match key {
306            "type" => self.type_ = Some(value.to_string()),
307            "id" => self.id = Some(value.to_string()),
308            "created" => {
309                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
310            }
311            "updated" => {
312                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
313            }
314            "summary" => self.summary = Some(value.to_string()),
315            "status" => self.status = Some(value.to_string()),
316            "tags" => {
317                // Accept either a YAML flow list (`[a, b]`) or a single scalar
318                // tag. Anything that parses to a sequence becomes the tag list;
319                // otherwise the whole string is one tag.
320                self.tags = match serde_norway::from_str::<Value>(value) {
321                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
322                    _ => vec![value.to_string()],
323                };
324            }
325            _ => {
326                // A custom / type-specific field. The value is a scalar string by
327                // default, but the spec's list-valued link fields (e.g.
328                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
329                // block sequence of quoted wiki-links — never the flow-form string
330                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
331                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
332                // sequence whose every item is a clean single wiki-link, store the
333                // canonical sequence so `to_yaml` emits block form. Everything else
334                // — plain text, and a single inline `[[x]]` (which YAML reads as a
335                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
336                // verbatim scalar string, preserving the prior behavior.
337                let stored = parse_link_list_value(value)
338                    .unwrap_or_else(|| Value::String(value.to_string()));
339                self.extra.insert(key.to_string(), stored);
340            }
341        }
342        Ok(())
343    }
344
345    /// Extract every frontmatter field whose value is a wiki-link (scalar
346    /// inline form or a block-sequence list), pairing each with its key. The
347    /// validate engine checks these against `(link)` schema annotations.
348    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
349        let mut out = Vec::new();
350        // `summary` may carry navigational wiki-links (spec encourages it).
351        if let Some(summary) = &self.summary {
352            for link in extract_wiki_links(summary, Path::new("")) {
353                out.push(("summary".to_string(), link));
354            }
355        }
356        // Every type-specific / custom field: a scalar wiki-link or a list of
357        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
358        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
359        for (key, value) in &self.extra {
360            for link in links_in_field_value(value) {
361                out.push((key.clone(), link));
362            }
363        }
364        out
365    }
366}
367
368/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
369///
370/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
371/// flags whether it's a full store-relative path (the doctrine) versus a
372/// short-form (a validation error).
373#[derive(Debug, Clone, PartialEq, Eq)]
374pub struct WikiLink {
375    /// The link target as written, without the `[[ ]]` and without `|display`.
376    pub target: String,
377    /// The optional `|display` text override.
378    pub display: Option<String>,
379    /// True when `target` is a full store-relative path (contains a `/` and
380    /// resolves under a known layer); false for short-form targets like
381    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
382    pub is_full_path: bool,
383    /// True when `target` carries a trailing `.md` extension — validate warns
384    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
385    pub has_md_extension: bool,
386    /// Where the link appears: `(file, line, col)`, 1-based line and column.
387    pub location: (PathBuf, u32, u32),
388}
389
390/// A standard markdown link `[text](url)` — an external reference, kept in a
391/// stream separate from [`WikiLink`] so external targets are visible to the
392/// toolkit without being conflated with in-store edges. Not graph-validated.
393#[derive(Debug, Clone, PartialEq, Eq)]
394pub struct MarkdownLink {
395    /// The link text inside `[ ]`.
396    pub text: String,
397    /// The URL or path inside `( )`.
398    pub url: String,
399    /// Where the link appears: `(file, line, col)`, 1-based.
400    pub location: (PathBuf, u32, u32),
401}
402
403/// A `##`/`###` section of a markdown body: the heading text plus the byte
404/// slice of the body it spans (heading line through the line before the next
405/// heading of equal-or-shallower depth).
406#[derive(Debug, Clone, PartialEq, Eq)]
407pub struct Section {
408    /// The heading text (without the leading `#`s).
409    pub heading: String,
410    /// Heading depth (number of leading `#`s).
411    pub level: u8,
412    /// The 1-based line where the heading appears.
413    pub line: u32,
414    /// The section body, from the heading line to the next sibling-or-shallower
415    /// heading (exclusive), as a slice of the original body.
416    pub body: String,
417}
418
419/// The parsed structured content of a store's `DB.md` config file.
420///
421/// All four parts are optional in the source; absent parts fall back to spec
422/// defaults. Produced by [`parse_db_md`].
423#[derive(Debug, Clone, Default, PartialEq)]
424pub struct Config {
425    /// Body of the `## Agent instructions` section — free-form prose passed to
426    /// the agent's system prompt.
427    pub agent_instructions: Option<String>,
428    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
429    /// refuses to write (`POLICY_FROZEN_PAGE`).
430    pub frozen_pages: Vec<PathBuf>,
431    /// `## Policies` → `### Ignored types`: type names the curator never
432    /// synthesizes (still readable as ambient context).
433    pub ignored_types: Vec<String>,
434    /// `## Schemas` → one entry per `### <type>` sub-section.
435    pub schemas: BTreeMap<String, Schema>,
436}
437
438impl Config {
439    /// The `### Frozen pages` entry that matches a store-relative `target`, if
440    /// any. The **single** frozen-page matcher every write surface must funnel
441    /// through so the policy is enforced identically on `write` / `fm set` /
442    /// `fm init` / `link` / `rename` / `format`.
443    ///
444    /// Comparison is normalized so a policy line and a write target match
445    /// regardless of incidental spelling differences:
446    /// - `/` path separators on every OS,
447    /// - a single leading `./` dropped,
448    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
449    ///   frozen entries verbatim, so an operator who writes the natural
450    ///   extensionless spelling (`records/decisions/q1`) must protect the file
451    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
452    ///
453    /// Returns the matched config entry verbatim (its original spelling) so the
454    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
455    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
456        let want = normalize_frozen_path(target);
457        self.frozen_pages
458            .iter()
459            .find(|frozen| normalize_frozen_path(frozen) == want)
460            .cloned()
461    }
462
463    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
464    /// over [`Config::frozen_match`] for callers that only need presence.
465    pub fn is_frozen(&self, target: &Path) -> bool {
466        self.frozen_match(target).is_some()
467    }
468}
469
470/// Normalize a path for frozen-page comparison: `/` separators, a single
471/// leading `./` dropped, and a trailing `.md` dropped. Both the policy entry
472/// and the write target pass through this before equality, so the match is
473/// separator-, `./`-, and `.md`-insensitive.
474fn normalize_frozen_path(p: &Path) -> String {
475    let unix: String = p
476        .components()
477        .filter_map(|c| c.as_os_str().to_str())
478        .collect::<Vec<_>>()
479        .join("/");
480    let no_dot = unix.strip_prefix("./").unwrap_or(&unix);
481    no_dot.strip_suffix(".md").unwrap_or(no_dot).to_string()
482}
483
484/// A user-declared type schema parsed from a `DB.md` `### <type>` sub-section.
485/// The store's `## Schemas` is the **only** source of schema enforcement — the
486/// toolkit ships no built-in or implicit per-type schema (see SPEC § Schemas).
487#[derive(Debug, Clone, Default, PartialEq)]
488pub struct Schema {
489    /// One [`FieldSpec`] per bulleted field line, in source order.
490    pub fields: Vec<FieldSpec>,
491    /// `- unique: <field>[, <field> …]` directives — each inner vec is one
492    /// uniqueness constraint over the listed field(s) (compound when >1). Two
493    /// records of this type whose listed values collide warn as
494    /// `DUP_UNIQUE_KEY`.
495    pub unique_keys: Vec<Vec<String>>,
496    /// `- summary_template: <template>` directive — the `{field}` interpolation
497    /// pattern `dbmd fm init` / `dbmd write` use to compose a default `summary`
498    /// for this type. `None` falls back to the body's first paragraph.
499    pub summary_template: Option<String>,
500    /// `- shard: by-date | flat` directive — whether records of this type are
501    /// date-sharded on disk (`records/<type>/<YYYY>/<MM>/…`) or kept flat.
502    /// `None` = no directive declared, so the store's built-in default for the
503    /// type applies ([`crate::store::Store::type_shards`]); `Some(true)` forces
504    /// date-sharding (e.g. a custom event type the toolkit has no built-in for);
505    /// `Some(false)` forces flat. This is the v0.2 generic-model way to declare
506    /// sharding — the toolkit ships no implicit per-type behavior beyond the
507    /// example-type defaults.
508    pub shard: Option<bool>,
509}
510
511/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
512///
513/// Modifiers are comma-separated inside the parens; this captures the
514/// recognized ones as typed fields and stashes anything unrecognized in
515/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
516#[derive(Debug, Clone, Default, PartialEq)]
517pub struct FieldSpec {
518    /// The field name.
519    pub name: String,
520    /// `required` modifier present.
521    pub required: bool,
522    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
523    /// `url`), if any.
524    pub shape: Option<Shape>,
525    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
526    /// start with. The trailing slash is required in the source syntax.
527    pub link_prefix: Option<PathBuf>,
528    /// `default <value>` — the value written when the field is absent.
529    pub default: Option<Value>,
530    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
531    /// on the line because of its own commas).
532    pub enum_values: Option<Vec<String>>,
533    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
534    /// validate surfaces these as `Info`, never errors.
535    pub unknown_modifiers: Vec<String>,
536}
537
538/// A recognized shape modifier for a schema field. Validate enforces the
539/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum Shape {
542    /// Any scalar string.
543    String,
544    /// Integer.
545    Int,
546    /// Boolean.
547    Bool,
548    /// RFC3339 / ISO-8601 date.
549    Date,
550    /// `<local>@<domain>` email address.
551    Email,
552    /// A currency amount.
553    Currency,
554    /// A URL.
555    Url,
556}
557
558/// The result of splitting a raw file into its frontmatter block and body.
559///
560/// `body` is the verbatim remainder after the closing `---` fence — the writer
561/// preserves it byte-for-byte so operator edits are never reflowed.
562#[derive(Debug, Clone, PartialEq, Eq)]
563pub struct ParsedFile {
564    /// The raw frontmatter YAML (between the fences, exclusive of them).
565    pub frontmatter_yaml: String,
566    /// The verbatim body (everything after the closing `---`).
567    pub body: String,
568}
569
570/// Split a file's full text into its frontmatter block and body. The
571/// frontmatter block must be the very first thing in the file, delimited by
572/// `---` on its own line at start and end. Returns
573/// [`ParseError::MissingFrontmatter`] if absent.
574pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
575    // Tolerate a single leading UTF-8 BOM (U+FEFF) before the opening fence,
576    // matching `store::frontmatter_block` and `index::extract_frontmatter_block`
577    // which already strip it. Without this, a BOM-prefixed file (common from
578    // Windows / exported markdown dropped into `sources/`) gets walked and
579    // indexed by `dbmd index` yet hard-fails every write/edit surface that
580    // routes through `read_file` (`fm get/set`, `format`, `link`, `write`). The
581    // BOM is dropped from the emitted body so the canonical writer never carries
582    // it forward.
583    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
584
585    // The opening fence must be the very first line: `---` (optionally with a
586    // trailing CR), no leading whitespace, nothing before it.
587    let mut lines = text.split_inclusive('\n');
588    let first = lines.next().unwrap_or("");
589    if first.trim_end_matches(['\r', '\n']) != "---" {
590        return Err(ParseError::MissingFrontmatter {
591            file: file.to_path_buf(),
592        });
593    }
594
595    // Scan for the closing fence line. Track byte offsets so we can slice the
596    // YAML (between fences, exclusive) and the body (verbatim, after the
597    // closing fence's line terminator).
598    let opening_len = first.len();
599    let mut offset = opening_len;
600    for line in lines {
601        if line.trim_end_matches(['\r', '\n']) == "---" {
602            let yaml = &text[opening_len..offset];
603            let body_start = offset + line.len();
604            let body = &text[body_start..];
605            return Ok(ParsedFile {
606                frontmatter_yaml: yaml.to_string(),
607                body: body.to_string(),
608            });
609        }
610        offset += line.len();
611    }
612
613    // Opening fence present but no closing fence: malformed frontmatter block.
614    Err(ParseError::MissingFrontmatter {
615        file: file.to_path_buf(),
616    })
617}
618
619/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
620/// verbatim body string.
621pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
622    let text = std::fs::read_to_string(path)?;
623    let parsed = split_frontmatter(&text, path)?;
624    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
625    Ok((fm, parsed.body))
626}
627
628/// Atomically write a markdown file from frontmatter + body: emit the
629/// frontmatter in canonical key order, then the body verbatim, via a
630/// temp-file-rename so a reader never sees a half-written file. Preserves the
631/// operator-edited body exactly as given.
632pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
633    let yaml = frontmatter.to_yaml();
634    // `to_yaml` already terminates each block with a newline. Compose the file
635    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
636    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
637    contents.push_str("---\n");
638    contents.push_str(&yaml);
639    contents.push_str("---\n");
640    contents.push_str(body);
641
642    // One durable, atomic write for all primary data (see `crate::fsx`):
643    // temp-file + fsync + rename + parent-fsync. Content records are primary
644    // data, so they get the durable path (unlike the rebuildable index).
645    crate::fsx::write_atomic(path, contents.as_bytes())?;
646    Ok(())
647}
648
649/// Extract every wiki-link from a body (and inline frontmatter), returning the
650/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
651/// `(file, line, col)` locations set.
652pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
653    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
654    let re = RE.get_or_init(|| {
655        // [[target]] or [[target|display]]; target/display exclude brackets and
656        // (for target) the `|` separator so nested forms don't over-match.
657        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
658    });
659
660    let mut out = Vec::new();
661    for (line_idx, line) in body.lines().enumerate() {
662        for caps in re.captures_iter(line) {
663            let whole = caps.get(0).expect("group 0 always present");
664            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
665            let display = caps.get(2).map(|m| m.as_str().to_string());
666            out.push(WikiLink {
667                is_full_path: target_is_full_path(&target),
668                has_md_extension: target_has_md_extension(&target),
669                target,
670                display,
671                location: (
672                    file.to_path_buf(),
673                    (line_idx as u32) + 1,
674                    char_column(line, whole.start()),
675                ),
676            });
677        }
678    }
679    out
680}
681
682/// Extract every standard markdown link `[text](url)` from a body into a
683/// separate stream, kept distinct from wiki-links.
684pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
685    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
686    let re = RE.get_or_init(|| {
687        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
688        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
689        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
690    });
691
692    let mut out = Vec::new();
693    for (line_idx, line) in body.lines().enumerate() {
694        for caps in re.captures_iter(line) {
695            let whole = caps.get(0).expect("group 0 always present");
696            out.push(MarkdownLink {
697                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
698                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
699                location: (
700                    file.to_path_buf(),
701                    (line_idx as u32) + 1,
702                    char_column(line, whole.start()),
703                ),
704            });
705        }
706    }
707    out
708}
709
710/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
711/// written so YAML parses it as nested sequences instead of a clean list of
712/// strings. Returns the offending keys so validate can emit
713/// `WIKI_LINK_FLOW_FORM_LIST`.
714///
715/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
716/// shapes nest:
717///
718/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
719///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
720/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
721///   (triple-nested). The list mis-encoding — flagged.
722/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
723///   is flagged too; the canonical list form must quote each item
724///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
725///
726/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
727/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
728/// single item is a sequence-of-scalars.
729pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
730    let value: Value = match serde_norway::from_str(frontmatter_yaml) {
731        Ok(v) => v,
732        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
733        Err(_) => return Vec::new(),
734    };
735    let Value::Mapping(map) = value else {
736        return Vec::new();
737    };
738
739    let mut out = Vec::new();
740    for (k, v) in &map {
741        if let Value::Sequence(items) = v {
742            // Triple-nesting: some outer item is a sequence that itself holds a
743            // sequence. Scalar inline `[[x]]` is only double-nested, so it
744            // never matches.
745            let is_link_list = items.iter().any(|item| match item {
746                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
747                _ => false,
748            });
749            if is_link_list {
750                if let Some(key) = k.as_str() {
751                    out.push(key.to_string());
752                }
753            }
754        }
755    }
756    out
757}
758
759/// Extract the `##`/`###` sections of a markdown body into a flat list with
760/// body slices.
761pub fn extract_sections(body: &str) -> Vec<Section> {
762    // Keep each line's start so we can slice the body verbatim (exact newlines).
763    let lines: Vec<&str> = body.split_inclusive('\n').collect();
764
765    // First pass: classify heading levels (0 = not a heading), honoring fenced
766    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
767    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
768    let mut fence: Option<(u8, usize)> = None;
769    for line in &lines {
770        let content = line.trim_end_matches(['\n', '\r']);
771        if let Some(f) = fence {
772            if is_closing_fence(content, f) {
773                fence = None;
774            }
775            levels.push(0);
776            continue;
777        }
778        if let Some(opened) = opening_fence(content) {
779            fence = Some(opened);
780            levels.push(0);
781            continue;
782        }
783        levels.push(heading_level(content));
784    }
785
786    // Second pass: emit `##`+ headings; each section body runs from its heading
787    // line to the next heading at an equal-or-shallower level (exclusive).
788    let mut sections = Vec::new();
789    for (i, &lvl) in levels.iter().enumerate() {
790        if lvl < 2 {
791            continue;
792        }
793        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
794        let heading = heading_text(heading_line, lvl);
795
796        let mut end = lines.len();
797        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
798            if other != 0 && other <= lvl {
799                end = j;
800                break;
801            }
802        }
803
804        sections.push(Section {
805            heading,
806            level: lvl,
807            line: (i + 1) as u32,
808            body: lines[i..end].concat(),
809        });
810    }
811    sections
812}
813
814/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
815/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
816/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
817/// ignored; absent sections leave their [`Config`] fields at default.
818pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
819    // The structured sections live in the body (after frontmatter). DB.md must
820    // still start with a valid `---` block (`type: db-md`); if it's missing we
821    // surface MissingFrontmatter like any other file.
822    let parsed = split_frontmatter(text, file)?;
823    let _frontmatter = Frontmatter::parse(&parsed.frontmatter_yaml, file)?;
824    let sections = extract_sections(&parsed.body);
825
826    let mut config = Config::default();
827    // Track which H2 region each H3 belongs to as we walk the flat list.
828    let mut current_h2: Option<String> = None;
829
830    for section in &sections {
831        match section.level {
832            2 => {
833                let name = section.heading.trim().to_ascii_lowercase();
834                current_h2 = Some(name.clone());
835                if name == "agent instructions" {
836                    let prose = section_prose(&section.body);
837                    if !prose.is_empty() {
838                        config.agent_instructions = Some(prose);
839                    }
840                }
841            }
842            3 => {
843                let h2 = current_h2.as_deref().unwrap_or("");
844                let h3 = section.heading.trim().to_ascii_lowercase();
845                match (h2, h3.as_str()) {
846                    ("policies", "frozen pages") => {
847                        config.frozen_pages = bullet_lines(&section.body)
848                            .into_iter()
849                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
850                            .collect();
851                    }
852                    ("policies", "ignored types") => {
853                        config.ignored_types = bullet_lines(&section.body)
854                            .into_iter()
855                            .flat_map(|b| extract_type_list_bullet(&b))
856                            .collect();
857                    }
858                    ("schemas", _) => {
859                        // The H3 heading text (as written) is the type name.
860                        let type_name = section.heading.trim().to_string();
861                        let mut schema = Schema::default();
862                        for b in bullet_lines(&section.body) {
863                            match parse_schema_bullet(&b) {
864                                SchemaBullet::Field(f) => schema.fields.push(f),
865                                SchemaBullet::Unique(k) if !k.is_empty() => {
866                                    schema.unique_keys.push(k)
867                                }
868                                SchemaBullet::SummaryTemplate(t) if !t.is_empty() => {
869                                    schema.summary_template = Some(t)
870                                }
871                                SchemaBullet::Shard(Some(b)) => schema.shard = Some(b),
872                                // Empty `unique:`/`summary_template:`, or a `shard:`
873                                // with an unrecognized value — ignored.
874                                SchemaBullet::Unique(_)
875                                | SchemaBullet::SummaryTemplate(_)
876                                | SchemaBullet::Shard(None) => {}
877                            }
878                        }
879                        config.schemas.insert(type_name, schema);
880                    }
881                    _ => {}
882                }
883            }
884            _ => {}
885        }
886    }
887
888    Ok(config)
889}
890
891/// One parsed bullet inside a `### <type>` schema block: an ordinary field, or a
892/// reserved directive (`unique:` / `summary_template:` / `shard:`). The names
893/// `unique`, `summary_template`, and `shard` are reserved and cannot be used as
894/// field names.
895#[derive(Debug)]
896enum SchemaBullet {
897    /// An ordinary `- <name> (<modifiers>)` field.
898    Field(FieldSpec),
899    /// `- unique: <field>[, <field> …]` — a (possibly compound) uniqueness key.
900    Unique(Vec<String>),
901    /// `- summary_template: <template>` — the default-`summary` pattern.
902    SummaryTemplate(String),
903    /// `- shard: by-date | flat` — date-shard records of this type, or keep them
904    /// flat. `None` = an unrecognized value, ignored like an unknown modifier.
905    Shard(Option<bool>),
906}
907
908/// Classify one `## Schemas` bullet as a directive or a field. The directive
909/// forms are `- unique: a, b, …` and `- summary_template: …`; the keyword check
910/// guards against false positives — a field like `- status (enum: a, b)` has a
911/// `(` before any `:`, so its head isn't a bare reserved keyword and it parses
912/// as a [`FieldSpec`].
913fn parse_schema_bullet(bullet_line: &str) -> SchemaBullet {
914    let line = bullet_line.trim();
915    let line = line
916        .strip_prefix("- ")
917        .or_else(|| line.strip_prefix("* "))
918        .or_else(|| line.strip_prefix("+ "))
919        .or_else(|| line.strip_prefix('-'))
920        .unwrap_or(line)
921        .trim();
922
923    if let Some((head, rest)) = line.split_once(':') {
924        match head.trim().to_ascii_lowercase().as_str() {
925            "unique" => {
926                let fields = rest
927                    .split(',')
928                    .map(|f| f.trim().to_string())
929                    .filter(|f| !f.is_empty())
930                    .collect();
931                return SchemaBullet::Unique(fields);
932            }
933            "summary_template" => {
934                return SchemaBullet::SummaryTemplate(rest.trim().to_string());
935            }
936            "shard" => {
937                // `by-date` (synonyms: date/sharded/true) enables date-sharding;
938                // `flat` (none/false) forces flat; anything else is ignored.
939                let v = match rest.trim().to_ascii_lowercase().as_str() {
940                    "by-date" | "date" | "sharded" | "true" => Some(true),
941                    "flat" | "none" | "false" => Some(false),
942                    _ => None,
943                };
944                return SchemaBullet::Shard(v);
945            }
946            _ => {}
947        }
948    }
949
950    SchemaBullet::Field(parse_field_spec(bullet_line))
951}
952
953/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
954/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
955/// in [`FieldSpec::unknown_modifiers`].
956pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
957    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
958    let line = bullet_line.trim();
959    let line = line
960        .strip_prefix("- ")
961        .or_else(|| line.strip_prefix("* "))
962        .or_else(|| line.strip_prefix("+ "))
963        .or_else(|| line.strip_prefix('-'))
964        .unwrap_or(line)
965        .trim();
966
967    // Split `<name> (<modifiers>)`. A bullet without parens is a free-form
968    // optional field of any shape — name only, no modifiers.
969    let (name, modifiers) = match line.find('(') {
970        Some(open) => {
971            let name = line[..open].trim().to_string();
972            let after = &line[open + 1..];
973            let mods = match after.rfind(')') {
974                Some(close) => &after[..close],
975                None => after, // tolerate a missing close paren
976            };
977            (name, mods.trim())
978        }
979        None => (line.to_string(), ""),
980    };
981
982    let mut spec = FieldSpec {
983        name,
984        ..FieldSpec::default()
985    };
986
987    if modifiers.is_empty() {
988        return spec;
989    }
990
991    // Modifiers are comma-separated. `enum:` is special: because its own value
992    // list contains commas, it must be last and swallows the remainder.
993    let raw: Vec<&str> = modifiers.split(',').collect();
994    let mut i = 0;
995    while i < raw.len() {
996        let token = raw[i].trim();
997        if token.is_empty() {
998            i += 1;
999            continue;
1000        }
1001        let lower = token.to_ascii_lowercase();
1002
1003        if lower == "required" {
1004            spec.required = true;
1005        } else if let Some(shape) = shape_from_str(&lower) {
1006            spec.shape = Some(shape);
1007        } else if let Some(rest) = lower.strip_prefix("link to ") {
1008            // The trailing slash is required in the source; store the prefix
1009            // without it so `Path::starts_with` comparisons are clean.
1010            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
1011            let _ = rest; // lowercase form only used for the keyword match
1012            spec.link_prefix = Some(PathBuf::from(prefix));
1013        } else if let Some(_rest) = lower.strip_prefix("default ") {
1014            // Value is everything after the keyword on this comma-token,
1015            // preserving original case.
1016            let value = token["default ".len()..].trim().to_string();
1017            spec.default = Some(Value::String(value));
1018        } else if lower == "enum" {
1019            // Bare `enum` keyword (`enum, open, closed`): the values are the
1020            // REMAINING tokens — the keyword itself must not leak in as a value.
1021            let values: Vec<String> = raw[i + 1..]
1022                .iter()
1023                .map(|v| v.trim().to_string())
1024                .filter(|v| !v.is_empty())
1025                .collect();
1026            spec.enum_values = Some(values);
1027            break; // enum consumed the rest of the line
1028        } else if lower.starts_with("enum:") {
1029            // `enum: open, closed` form: rejoin this token and the rest, then
1030            // drop everything up to and including the `:`.
1031            let mut joined = raw[i..].join(",");
1032            if let Some(colon) = joined.find(':') {
1033                joined = joined[colon + 1..].to_string();
1034            }
1035            let values: Vec<String> = joined
1036                .split(',')
1037                .map(|v| v.trim().to_string())
1038                .filter(|v| !v.is_empty())
1039                .collect();
1040            spec.enum_values = Some(values);
1041            break; // enum consumed the rest of the line
1042        } else {
1043            // Unrecognized modifier — captured verbatim, surfaced as Info.
1044            spec.unknown_modifiers.push(token.to_string());
1045        }
1046        i += 1;
1047    }
1048
1049    spec
1050}
1051
1052// ── Private helpers ─────────────────────────────────────────────────────────
1053
1054/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
1055/// is treated as absent; anything else must be an RFC3339 string.
1056fn parse_timestamp(
1057    value: &Value,
1058    key: &str,
1059    file: &Path,
1060) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
1061    match value {
1062        Value::Null => Ok(None),
1063        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
1064        other => Err(ParseError::BadTimestamp {
1065            file: file.to_path_buf(),
1066            key: key.to_string(),
1067            value: format!("{other:?}"),
1068        }),
1069    }
1070}
1071
1072/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
1073fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
1074    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
1075        file: file.to_path_buf(),
1076        key: key.to_string(),
1077        value: s.to_string(),
1078    })
1079}
1080
1081/// Coerce a YAML scalar value to its string form for the universal-contract
1082/// fields (`type`/`id`/`summary`/`status`). Mirrors `validate::scalar_string`
1083/// and `store::yaml_scalar_string` so the four modules agree on one coercion
1084/// rule: a bare numeric/bool scalar (`id: 100`, `summary: 2026`, `status: 0`)
1085/// is preserved as its string form rather than being read as None and silently
1086/// dropped on the next `to_yaml` re-emit. Returns `None` only for genuinely
1087/// non-scalar values (sequences, mappings, null), which were never a valid
1088/// shape for these fields.
1089fn scalar_string(value: &Value) -> Option<String> {
1090    match value {
1091        Value::String(s) => Some(s.clone()),
1092        Value::Number(n) => Some(n.to_string()),
1093        Value::Bool(b) => Some(b.to_string()),
1094        _ => None,
1095    }
1096}
1097
1098/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
1099/// (the canonical form) or a single scalar (coerced to a one-element list).
1100fn parse_tags(value: &Value) -> Vec<String> {
1101    match value {
1102        Value::Sequence(items) => items
1103            .iter()
1104            .filter_map(|v| match v {
1105                Value::String(s) => Some(s.clone()),
1106                Value::Number(n) => Some(n.to_string()),
1107                Value::Bool(b) => Some(b.to_string()),
1108                _ => None,
1109            })
1110            .collect(),
1111        Value::String(s) => vec![s.clone()],
1112        _ => Vec::new(),
1113    }
1114}
1115
1116/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
1117/// location, or `None` if the string is not a bare wiki-link. Used for
1118/// frontmatter-valued links where there is no body position to report.
1119fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
1120    let s = s.trim();
1121    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
1122    // Reject anything with further brackets (e.g. the nested flow-form item),
1123    // which is not a clean single wiki-link.
1124    if inner.contains('[') || inner.contains(']') {
1125        return None;
1126    }
1127    let (target, display) = match inner.split_once('|') {
1128        Some((t, d)) => (t.to_string(), Some(d.to_string())),
1129        None => (inner.to_string(), None),
1130    };
1131    Some(WikiLink {
1132        is_full_path: target_is_full_path(&target),
1133        has_md_extension: target_has_md_extension(&target),
1134        target,
1135        display,
1136        location: (PathBuf::new(), 0, 0),
1137    })
1138}
1139
1140/// Extract every wiki-link from a single frontmatter field value, accepting the
1141/// two canonical forms the spec defines (SPEC § Linking):
1142///
1143/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1144///   canonical unquoted inline (`f: [[x]]`) form, and
1145/// - a **list** field whose items are quoted wiki-link strings
1146///   (`- "[[x]]"`).
1147///
1148/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1149/// so the parsed [`Value`] shapes are not what one would naively expect:
1150///
1151/// | source                         | parsed `Value`                     | here |
1152/// |--------------------------------|------------------------------------|------|
1153/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1154/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1155/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1156/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1157///
1158/// The last row — an *unquoted list* — parses identically to the flow-form list
1159/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1160/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1161/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1162/// edge enumerator only ever sees the valid canonical forms.
1163///
1164/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1165/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1166/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1167fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1168    // Quoted scalar: `field: "[[x]]"`.
1169    if let Value::String(s) = value {
1170        return parse_wiki_link_str(s).into_iter().collect();
1171    }
1172    let Value::Sequence(items) = value else {
1173        return Vec::new();
1174    };
1175    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1176    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1177    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1178    if items.len() == 1 {
1179        if let Some(link) = unquoted_inline_link(&items[0]) {
1180            return vec![link];
1181        }
1182    }
1183    // Otherwise a list of quoted wiki-link strings; non-string items (the
1184    // unquoted-list mis-encoding) are left for validate to flag.
1185    items
1186        .iter()
1187        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1188        .collect()
1189}
1190
1191/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1192///
1193/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1194/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1195/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1196/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1197/// Re-emitting that verbatim yields the block sequence
1198///
1199/// ```text
1200/// company:
1201/// - - records/companies/northstar
1202/// ```
1203///
1204/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1205/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1206/// a value back into the canonical emitted form before it is written:
1207///
1208/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1209///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_norway emits
1210///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1211///   that [`links_in_field_value`] reads back as the same scalar link;
1212/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1213///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1214///   `set` write-in path and the canonical list form;
1215/// - everything else → returned verbatim (the common no-op for non-link values).
1216///
1217/// `|display` is preserved in both link branches. This is the single point that
1218/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1219/// corrupting a pre-existing canonical link, since they all funnel through
1220/// `to_yaml`.
1221fn canonicalize_extra_value(value: &Value) -> Value {
1222    match value {
1223        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1224        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1225        // YAML nested sequence). Non-link strings are returned untouched.
1226        Value::String(s) => match parse_wiki_link_str(s) {
1227            Some(link) => Value::String(wiki_link_literal(&link)),
1228            None => value.clone(),
1229        },
1230        Value::Sequence(items) => {
1231            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1232            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1233            // scalar string so the link is preserved rather than block-emitted.
1234            if items.len() == 1 {
1235                if let Some(link) = unquoted_inline_link(&items[0]) {
1236                    return Value::String(wiki_link_literal(&link));
1237                }
1238            }
1239            // List of wiki-links: re-emit as a block sequence of quoted-link
1240            // strings, the canonical list form `to_yaml` renders block-style and
1241            // `links_in_field_value` accepts. Only canonicalize when *every* item
1242            // is a clean single wiki-link; a list with any non-link item is left
1243            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1244            // validate flags) are untouched.
1245            let mut links = Vec::with_capacity(items.len());
1246            for item in items {
1247                match link_from_flow_list_item(item) {
1248                    Some(link) => links.push(link),
1249                    None => return value.clone(),
1250                }
1251            }
1252            if links.is_empty() {
1253                return value.clone();
1254            }
1255            Value::Sequence(
1256                links
1257                    .iter()
1258                    .map(|l| Value::String(wiki_link_literal(l)))
1259                    .collect(),
1260            )
1261        }
1262        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1263        _ => value.clone(),
1264    }
1265}
1266
1267/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1268/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1269fn wiki_link_literal(link: &WikiLink) -> String {
1270    match &link.display {
1271        Some(d) => format!("[[{}|{}]]", link.target, d),
1272        None => format!("[[{}]]", link.target),
1273    }
1274}
1275
1276/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1277/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1278/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1279/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1280/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1281/// from being mistaken for a wiki-link.
1282fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1283    let Value::Sequence(items) = v else {
1284        return None;
1285    };
1286    if items.len() != 1 {
1287        return None;
1288    }
1289    let s = items[0].as_str()?;
1290    // A clean unquoted wiki-link has no further brackets inside it.
1291    if s.contains('[') || s.contains(']') {
1292        return None;
1293    }
1294    parse_wiki_link_str(&format!("[[{s}]]"))
1295}
1296
1297/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1298/// wiki-links** that should be stored as a YAML block sequence, returning the
1299/// canonical `Value::Sequence` of quoted-link strings when so.
1300///
1301/// The value path of every write surface stringifies its argument; without this
1302/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1303/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1304/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1305/// two list spellings an agent naturally types and normalizes both to the block
1306/// form the canonical writer emits and `dbmd validate` accepts:
1307///
1308/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1309/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1310///
1311/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1312/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1313/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1314/// renders inline), an empty list, or a list with any non-link item. A single
1315/// link must stay scalar; only genuine multi-item-or-explicit lists become
1316/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1317/// validator never disagree.
1318fn parse_link_list_value(value: &str) -> Option<Value> {
1319    let trimmed = value.trim();
1320    // Only a YAML *flow sequence* literal is a list candidate; anything not
1321    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1322    // the single-inline-link guard below).
1323    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1324        return None;
1325    }
1326    let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(trimmed) else {
1327        return None;
1328    };
1329    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1330    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1331    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1332    // list. `links_in_field_value` reads it back as a scalar link either way.
1333    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1334        return None;
1335    }
1336    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1337    // spellings an agent types (see [`link_from_flow_list_item`]).
1338    let mut links = Vec::with_capacity(items.len());
1339    for item in &items {
1340        links.push(link_from_flow_list_item(item)?);
1341    }
1342    if links.is_empty() {
1343        return None;
1344    }
1345    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1346    // renders block-style and `links_in_field_value` accepts. `|display` is
1347    // preserved.
1348    let normalized = links
1349        .iter()
1350        .map(|l| Value::String(wiki_link_literal(l)))
1351        .collect();
1352    Some(Value::Sequence(normalized))
1353}
1354
1355/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1356/// across the spellings an agent types for a list. After top-level flow parsing,
1357/// a list item arrives in one of:
1358///
1359/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1360/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1361///   (one level deeper than a bare unquoted scalar, because the surrounding list
1362///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1363///   `Seq[String(x)]` with [`unquoted_inline_link`].
1364///
1365/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1366/// caller falls back to a scalar string and never fabricates a partial list.
1367fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1368    match item {
1369        Value::String(s) => parse_wiki_link_str(s),
1370        Value::Sequence(inner) => {
1371            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1372            // wrapper to expose the inline-link shape.
1373            if inner.len() == 1 {
1374                if let Some(link) = unquoted_inline_link(&inner[0]) {
1375                    return Some(link);
1376                }
1377            }
1378            // Defensive: also accept the inline-link shape directly.
1379            unquoted_inline_link(item)
1380        }
1381        _ => None,
1382    }
1383}
1384
1385/// A target is a full store-relative path when its first path segment is one of
1386/// the three canonical layer dirs and at least one `/` separator follows. A
1387/// trailing `.md` does not affect this classification.
1388fn target_is_full_path(target: &str) -> bool {
1389    let target = target.trim();
1390    match target.split_once('/') {
1391        Some((head, _rest)) => LAYER_DIRS.contains(&head),
1392        None => false,
1393    }
1394}
1395
1396/// True when the target carries a trailing `.md` extension (validate warns
1397/// `WIKI_LINK_HAS_EXTENSION`).
1398fn target_has_md_extension(target: &str) -> bool {
1399    target.trim().ends_with(".md")
1400}
1401
1402/// 1-based character (Unicode scalar) column of `byte_offset` within `line`.
1403fn char_column(line: &str, byte_offset: usize) -> u32 {
1404    (line[..byte_offset].chars().count() as u32) + 1
1405}
1406
1407/// Map a lowercase shape keyword to its [`Shape`].
1408fn shape_from_str(s: &str) -> Option<Shape> {
1409    match s {
1410        "string" => Some(Shape::String),
1411        "int" => Some(Shape::Int),
1412        "bool" => Some(Shape::Bool),
1413        "date" => Some(Shape::Date),
1414        "email" => Some(Shape::Email),
1415        "currency" => Some(Shape::Currency),
1416        "url" => Some(Shape::Url),
1417        _ => None,
1418    }
1419}
1420
1421/// The ATX heading level of a line (number of leading `#`), or 0 if not a
1422/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
1423/// end-of-line) after the `#` run, caps the run at six.
1424fn heading_level(line: &str) -> u8 {
1425    let indent = line.len() - line.trim_start_matches(' ').len();
1426    if indent > 3 {
1427        return 0;
1428    }
1429    let rest = &line[indent..];
1430    let hashes = rest.len() - rest.trim_start_matches('#').len();
1431    if hashes == 0 || hashes > 6 {
1432        return 0;
1433    }
1434    let after = &rest[hashes..];
1435    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
1436        hashes as u8
1437    } else {
1438        0
1439    }
1440}
1441
1442/// The heading text after the `#` run, trimmed, with any trailing ATX closing
1443/// `#` sequence removed (`## Title ##` → `Title`).
1444fn heading_text(line: &str, level: u8) -> String {
1445    let indent = line.len() - line.trim_start_matches(' ').len();
1446    let after_hashes = &line[indent + level as usize..];
1447    let trimmed = after_hashes.trim();
1448    let no_trailing = trimmed.trim_end_matches('#');
1449    if no_trailing.len() == trimmed.len() {
1450        trimmed.to_string()
1451    } else {
1452        no_trailing.trim_end().to_string()
1453    }
1454}
1455
1456/// If `line` opens a fenced code block, return `(fence byte, run length)`.
1457fn opening_fence(line: &str) -> Option<(u8, usize)> {
1458    let indent = line.len() - line.trim_start_matches(' ').len();
1459    if indent > 3 {
1460        return None;
1461    }
1462    let rest = &line[indent..];
1463    let byte = rest.bytes().next()?;
1464    if byte != b'`' && byte != b'~' {
1465        return None;
1466    }
1467    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1468    if run < 3 {
1469        return None;
1470    }
1471    // A backtick fence's info string may not itself contain a backtick.
1472    if byte == b'`' && rest[run..].contains('`') {
1473        return None;
1474    }
1475    Some((byte, run))
1476}
1477
1478/// True if `line` closes the currently open fence: same char, run at least as
1479/// long, nothing but trailing whitespace after.
1480fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
1481    let (byte, open_len) = fence;
1482    let indent = line.len() - line.trim_start_matches(' ').len();
1483    if indent > 3 {
1484        return false;
1485    }
1486    let rest = &line[indent..];
1487    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1488    if run < open_len {
1489        return false;
1490    }
1491    rest[run..].trim().is_empty()
1492}
1493
1494/// The prose body of a section: everything after the heading line, trimmed.
1495fn section_prose(section_body: &str) -> String {
1496    match section_body.split_once('\n') {
1497        Some((_heading, rest)) => rest.trim().to_string(),
1498        None => String::new(),
1499    }
1500}
1501
1502/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
1503/// line, each returned with its leading whitespace trimmed.
1504fn bullet_lines(section_body: &str) -> Vec<String> {
1505    section_body
1506        .lines()
1507        .skip(1) // the heading line
1508        .map(str::trim)
1509        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
1510        .map(|l| l.to_string())
1511        .collect()
1512}
1513
1514/// Cut a bullet's content at the first ` — ` / ` -- ` comment separator,
1515/// returning only the meaningful prefix.
1516fn strip_bullet_comment(content: &str) -> &str {
1517    let mut cut = content.len();
1518    for sep in [" — ", " -- ", " – "] {
1519        if let Some(idx) = content.find(sep) {
1520            cut = cut.min(idx);
1521        }
1522    }
1523    content[..cut].trim()
1524}
1525
1526/// Strip the leading bullet marker, returning the trimmed content after it.
1527fn bullet_content(bullet: &str) -> &str {
1528    let t = bullet.trim();
1529    t.strip_prefix("- ")
1530        .or_else(|| t.strip_prefix("* "))
1531        .or_else(|| t.strip_prefix("+ "))
1532        .unwrap_or(t)
1533        .trim()
1534}
1535
1536/// Extract a store-relative path from a Frozen-pages bullet. The path may be
1537/// wrapped in backticks and followed by an em-dash comment.
1538fn extract_path_bullet(bullet: &str) -> String {
1539    let content = bullet_content(bullet);
1540    // Prefer a backtick-delimited span if present.
1541    if let Some(start) = content.find('`') {
1542        if let Some(end_rel) = content[start + 1..].find('`') {
1543            return content[start + 1..start + 1 + end_rel].trim().to_string();
1544        }
1545    }
1546    // Otherwise take the text up to a comment separator, stripping quotes.
1547    strip_bullet_comment(content)
1548        .trim_matches('"')
1549        .trim_matches('\'')
1550        .trim()
1551        .to_string()
1552}
1553
1554/// Extract a comma-separated type list from an Ignored-types bullet, stripping
1555/// backticks/quotes and any trailing em-dash comment.
1556fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
1557    let content = strip_bullet_comment(bullet_content(bullet));
1558    content
1559        .split(',')
1560        .map(|t| {
1561            t.trim()
1562                .trim_matches('`')
1563                .trim_matches('"')
1564                .trim_matches('\'')
1565                .trim()
1566                .to_string()
1567        })
1568        .filter(|t| !t.is_empty())
1569        .collect()
1570}
1571
1572#[cfg(test)]
1573mod tests {
1574    use super::*;
1575    use std::path::Path;
1576    use tempfile::tempdir;
1577
1578    // ── Config::frozen_match (the single write-surface policy matcher) ───────
1579
1580    #[test]
1581    fn frozen_match_is_md_insensitive_both_directions() {
1582        // A policy entry stored WITHOUT `.md` (the natural extensionless
1583        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
1584        // write target — the regression every write surface had.
1585        let cfg = Config {
1586            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1587            ..Config::default()
1588        };
1589        assert_eq!(
1590            cfg.frozen_match(Path::new("records/decisions/q1.md")),
1591            Some(PathBuf::from("records/decisions/q1")),
1592            "extensionless policy entry must freeze the .md file"
1593        );
1594        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1595
1596        // The symmetric case: a policy entry WITH `.md` matches a bare target.
1597        let cfg = Config {
1598            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1599            ..Config::default()
1600        };
1601        assert_eq!(
1602            cfg.frozen_match(Path::new("records/decisions/q1")),
1603            Some(PathBuf::from("records/decisions/q1.md")),
1604        );
1605        // And the same-spelling cases still match.
1606        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1607    }
1608
1609    #[test]
1610    fn frozen_match_drops_leading_dot_slash() {
1611        let cfg = Config {
1612            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1613            ..Config::default()
1614        };
1615        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
1616        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
1617    }
1618
1619    #[test]
1620    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
1621        let cfg = Config {
1622            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1623            ..Config::default()
1624        };
1625        assert!(cfg
1626            .frozen_match(Path::new("records/decisions/q2.md"))
1627            .is_none());
1628        // A prefix is not a match: `q1` must not freeze `q1-draft`.
1629        assert!(cfg
1630            .frozen_match(Path::new("records/decisions/q1-draft.md"))
1631            .is_none());
1632        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
1633    }
1634
1635    // ── split_frontmatter ───────────────────────────────────────────────────
1636
1637    #[test]
1638    fn split_frontmatter_separates_yaml_and_verbatim_body() {
1639        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
1640        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1641        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
1642        // Body is everything after the closing fence's newline, byte-for-byte.
1643        assert_eq!(p.body, "# Heading\n\nBody line.\n");
1644    }
1645
1646    #[test]
1647    fn split_frontmatter_preserves_body_without_trailing_newline() {
1648        let text = "---\ntype: x\n---\nno trailing newline";
1649        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1650        assert_eq!(p.body, "no trailing newline");
1651    }
1652
1653    #[test]
1654    fn split_frontmatter_empty_body_when_nothing_after_fence() {
1655        let text = "---\ntype: x\n---\n";
1656        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1657        assert_eq!(p.body, "");
1658    }
1659
1660    #[test]
1661    fn split_frontmatter_missing_opening_fence_errors() {
1662        let text = "# No frontmatter here\ntype: x\n";
1663        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1664        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1665    }
1666
1667    #[test]
1668    fn split_frontmatter_leading_content_before_fence_rejected() {
1669        // The opening fence must be the very first line; a blank line first is
1670        // not allowed.
1671        let text = "\n---\ntype: x\n---\nbody";
1672        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1673        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1674    }
1675
1676    #[test]
1677    fn split_frontmatter_unterminated_block_errors() {
1678        let text = "---\ntype: x\nsummary: y\n";
1679        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1680        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1681    }
1682
1683    // ── Frontmatter::parse ───────────────────────────────────────────────────
1684
1685    #[test]
1686    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
1687        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
1688        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1689        assert_eq!(fm.type_.as_deref(), Some("contact"));
1690        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
1691        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
1692        assert_eq!(fm.status.as_deref(), Some("active"));
1693        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
1694        // Type-specific fields are NOT promoted to typed slots.
1695        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
1696        assert!(!fm.extra.contains_key("tags"));
1697        assert_eq!(
1698            fm.extra.get("email").and_then(|v| v.as_str()),
1699            Some("sarah@northstar.io")
1700        );
1701        assert_eq!(
1702            fm.extra.get("role").and_then(|v| v.as_str()),
1703            Some("Director")
1704        );
1705    }
1706
1707    #[test]
1708    fn parse_reads_rfc3339_timestamps() {
1709        let yaml =
1710            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
1711        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1712        let created = fm.created.expect("created parsed");
1713        // -07:00 offset is 7 * 3600 seconds west.
1714        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
1715        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
1716        assert!(fm.updated.is_some());
1717    }
1718
1719    #[test]
1720    fn parse_rejects_non_rfc3339_timestamp() {
1721        // A date-only value is not a full RFC3339 timestamp; created/updated
1722        // require the full form.
1723        let yaml = "type: email\ncreated: 2026-05-27";
1724        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1725        match err {
1726            ParseError::BadTimestamp { key, value, .. } => {
1727                assert_eq!(key, "created");
1728                assert_eq!(value, "2026-05-27");
1729            }
1730            other => panic!("expected BadTimestamp, got {other:?}"),
1731        }
1732    }
1733
1734    #[test]
1735    fn parse_malformed_yaml_errors() {
1736        // Unclosed flow mapping is invalid YAML.
1737        let yaml = "type: contact\n  bad: : :\n- nope";
1738        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1739        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1740    }
1741
1742    #[test]
1743    fn frontmatter_with_yaml_tag_on_mapping_does_not_panic() {
1744        // Regression: a YAML tag on the top-level mapping made the old
1745        // `expect_err` path PANIC, because a tagged mapping deserializes to a
1746        // `Mapping` just fine. It must now be handled — accepted as the inner
1747        // mapping, never a panic.
1748        let fm = Frontmatter::parse("!mytag\ntype: contact\nsummary: hi\n", Path::new("x.md"))
1749            .expect("tagged-mapping frontmatter must parse, not panic");
1750        assert_eq!(fm.type_.as_deref(), Some("contact"));
1751        // A genuine scalar/sequence top level is still malformed (and still
1752        // doesn't panic).
1753        assert!(Frontmatter::parse("- a\n- b\n", Path::new("x.md")).is_err());
1754    }
1755
1756    #[test]
1757    fn parse_empty_block_is_empty_frontmatter() {
1758        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
1759        assert_eq!(fm, Frontmatter::default());
1760    }
1761
1762    #[test]
1763    fn parse_scalar_top_level_is_malformed() {
1764        // A bare scalar at the top level is not a frontmatter mapping.
1765        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
1766        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1767    }
1768
1769    // ── to_yaml canonical order ──────────────────────────────────────────────
1770
1771    #[test]
1772    fn to_yaml_emits_canonical_key_order() {
1773        let mut fm = Frontmatter {
1774            type_: Some("contact".into()),
1775            id: Some("sarah-chen".into()),
1776            summary: Some("Director of Ops".into()),
1777            status: Some("active".into()),
1778            tags: vec!["vip".into()],
1779            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1780            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
1781            ..Default::default()
1782        };
1783        // Two type-specific fields, inserted in NON-alphabetical order to prove
1784        // the writer sorts them (BTreeMap) between the universal head and tail.
1785        fm.extra
1786            .insert("role".into(), Value::String("Director".into()));
1787        fm.extra.insert(
1788            "company".into(),
1789            Value::String("[[records/companies/northstar]]".into()),
1790        );
1791
1792        let yaml = fm.to_yaml();
1793        let keys: Vec<&str> = yaml
1794            .lines()
1795            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
1796            .map(|l| l.split(':').next().unwrap())
1797            .collect();
1798        assert_eq!(
1799            keys,
1800            vec![
1801                "type", "id", "created", "updated", "summary", // universal head
1802                "company", "role",   // type-specific, sorted
1803                "status", // universal tail
1804                "tags",
1805            ],
1806            "canonical order violated; got:\n{yaml}"
1807        );
1808        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
1809        assert!(
1810            yaml.contains("2026-05-27T08:00:00-07:00"),
1811            "created timestamp missing; got:\n{yaml}"
1812        );
1813        // The value re-parses to the same instant regardless of quoting.
1814        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
1815        assert_eq!(reparsed.created, fm.created);
1816        assert_eq!(reparsed.updated, fm.updated);
1817    }
1818
1819    #[test]
1820    fn to_yaml_omits_absent_optional_fields() {
1821        let fm = Frontmatter {
1822            type_: Some("note".into()),
1823            ..Default::default()
1824        };
1825        let yaml = fm.to_yaml();
1826        assert!(yaml.contains("type: note"));
1827        assert!(!yaml.contains("status"));
1828        assert!(!yaml.contains("tags"));
1829        assert!(!yaml.contains("summary"));
1830    }
1831
1832    // ── Regression: non-string scalar universal fields round-trip (finding #1) ─
1833
1834    #[test]
1835    fn regression_parse_preserves_non_string_scalar_universal_fields() {
1836        // A hand/externally-authored file whose universal fields are bare
1837        // scalars YAML reads as Number/Bool — `id: 100`, `summary: 2026`,
1838        // `status: 0`, `type: 42` — must be PRESERVED as their string form, not
1839        // read as None. Before the fix, `v.as_str()` returned None for these and
1840        // the matched arm discarded the value entirely (never reaching `extra`).
1841        let yaml = "type: 42\nid: 100\nsummary: 2026\nstatus: 0";
1842        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
1843        assert_eq!(fm.type_.as_deref(), Some("42"), "type scalar dropped");
1844        assert_eq!(fm.id.as_deref(), Some("100"), "id scalar dropped");
1845        assert_eq!(
1846            fm.summary.as_deref(),
1847            Some("2026"),
1848            "summary scalar dropped"
1849        );
1850        assert_eq!(fm.status.as_deref(), Some("0"), "status scalar dropped");
1851        // The values must surface through the public `get` accessor too.
1852        assert_eq!(
1853            fm.get("summary")
1854                .and_then(|v| v.as_str().map(str::to_string)),
1855            Some("2026".to_string())
1856        );
1857    }
1858
1859    #[test]
1860    fn regression_format_round_trip_does_not_delete_numeric_frontmatter() {
1861        // The exact finding-#1 trigger: `dbmd format` is read_file -> write_file.
1862        // A file whose `id`/`summary`/`status` are bare numeric scalars must
1863        // still carry those fields after the canonical re-emit. Before the fix,
1864        // the lines were silently deleted from disk (only `type` survived).
1865        let dir = tempdir().unwrap();
1866        let path = dir.path().join("x.md");
1867        let original = "---\ntype: contact\nid: 100\nsummary: 2026\nstatus: 0\n---\nbody\n";
1868        std::fs::write(&path, original).unwrap();
1869
1870        // Re-emit through the canonical writer, exactly as `dbmd format` does.
1871        let (fm, body) = read_file(&path).unwrap();
1872        write_file(&path, &fm, &body).unwrap();
1873
1874        let after = std::fs::read_to_string(&path).unwrap();
1875        // None of the four fields may vanish; they survive as string scalars.
1876        let reparsed = Frontmatter::parse(
1877            &split_frontmatter(&after, &path).unwrap().frontmatter_yaml,
1878            &path,
1879        )
1880        .unwrap();
1881        assert_eq!(reparsed.type_.as_deref(), Some("contact"));
1882        assert_eq!(reparsed.id.as_deref(), Some("100"), "id deleted by format");
1883        assert_eq!(
1884            reparsed.summary.as_deref(),
1885            Some("2026"),
1886            "summary deleted by format"
1887        );
1888        assert_eq!(
1889            reparsed.status.as_deref(),
1890            Some("0"),
1891            "status deleted by format"
1892        );
1893        // The body is preserved verbatim.
1894        assert_eq!(body, "body\n");
1895    }
1896
1897    // ── Regression: BOM-prefixed files parse like store/index (finding #19) ────
1898
1899    #[test]
1900    fn regression_split_frontmatter_tolerates_leading_utf8_bom() {
1901        // A BOM-prefixed file (EF BB BF + `---\n...`) is walked and indexed by
1902        // `dbmd index` (store/index strip the BOM) but, before the fix, every
1903        // write/edit surface routed through `read_file` hard-failed with
1904        // MissingFrontmatter. `split_frontmatter` must now strip a single leading
1905        // U+FEFF and emit a BOM-free body.
1906        let text = "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n";
1907        let parsed = split_frontmatter(text, Path::new("note.md")).unwrap();
1908        assert_eq!(parsed.frontmatter_yaml, "type: note\nsummary: x\n");
1909        // Body never carries the BOM forward into the canonical writer.
1910        assert_eq!(parsed.body, "body\n");
1911        assert!(!parsed.body.starts_with('\u{feff}'));
1912    }
1913
1914    #[test]
1915    fn regression_read_file_parses_bom_prefixed_file() {
1916        // End-to-end through the same `read_file` path `dbmd fm get/set`,
1917        // `format`, `link`, and `write` use. Before the fix this returned
1918        // Err(MissingFrontmatter) on a file the catalog had already indexed.
1919        let dir = tempdir().unwrap();
1920        let path = dir.path().join("note.md");
1921        std::fs::write(&path, "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n").unwrap();
1922
1923        let (fm, body) = read_file(&path).expect("BOM-prefixed file must parse");
1924        assert_eq!(fm.type_.as_deref(), Some("note"));
1925        assert_eq!(fm.summary.as_deref(), Some("x"));
1926        assert_eq!(body, "body\n");
1927    }
1928
1929    #[test]
1930    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
1931        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
1932        // *unquoted* inline `company: [[records/companies/northstar]]`
1933        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
1934        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
1935        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
1936        //     company:
1937        //     - - records/companies/northstar
1938        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
1939        // any `fm set` / `link` write) silently destroyed the link.
1940        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
1941        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1942        // Sanity: it really parsed as the nested sequence, not a string.
1943        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
1944
1945        let out = fm.to_yaml();
1946        // The link must survive as a quoted inline scalar — brackets intact, and
1947        // never the bracket-less block sequence `- - records/...`.
1948        assert!(
1949            out.contains("[[records/companies/northstar]]"),
1950            "canonical writer dropped the wiki-link brackets; got:\n{out}"
1951        );
1952        assert!(
1953            !out.contains("- - "),
1954            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
1955        );
1956
1957        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
1958        // one link with the right target (the edge graph/backlinks rely on).
1959        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1960        let fields = reparsed.link_fields();
1961        let links: Vec<(&str, &str, Option<&str>)> = fields
1962            .iter()
1963            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
1964            .collect();
1965        assert_eq!(
1966            links,
1967            vec![("company", "records/companies/northstar", None)]
1968        );
1969
1970        // A second re-emit is a fixed point — no progressive corruption across
1971        // repeated curator-loop writes.
1972        assert_eq!(
1973            reparsed.to_yaml(),
1974            out,
1975            "to_yaml is not idempotent on links"
1976        );
1977    }
1978
1979    #[test]
1980    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
1981        // The `|display` segment must survive the unquoted-inline round-trip too.
1982        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
1983        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1984        let out = fm.to_yaml();
1985        assert!(
1986            out.contains("[[records/companies/northstar|Northstar]]"),
1987            "display segment lost on round-trip; got:\n{out}"
1988        );
1989        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1990        let f = reparsed.link_fields();
1991        assert_eq!(f.len(), 1);
1992        assert_eq!(f[0].1.target, "records/companies/northstar");
1993        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
1994    }
1995
1996    #[test]
1997    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
1998        // A genuine quoted block list of links round-trips as a clean string
1999        // list — never collapsed to a scalar — and a plain nested sequence that
2000        // is NOT a wiki-link is left exactly as written (no false conversion).
2001        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
2002        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2003        let out = fm.to_yaml();
2004
2005        // Both attendee links survive as quoted strings.
2006        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
2007        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
2008
2009        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
2010        let fields = reparsed.link_fields();
2011        let attendees: Vec<&str> = fields
2012            .iter()
2013            .filter(|(k, _)| k == "attendees")
2014            .map(|(_, l)| l.target.as_str())
2015            .collect();
2016        assert_eq!(
2017            attendees,
2018            vec!["records/contacts/elena", "records/contacts/sarah"]
2019        );
2020        // The non-link nested sequence is preserved verbatim, not touched.
2021        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
2022    }
2023
2024    // ── read_file / write_file round-trip ────────────────────────────────────
2025
2026    #[test]
2027    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
2028        let dir = tempdir().unwrap();
2029        let path = dir.path().join("sources/emails/x.md");
2030        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
2031        let mut fm = Frontmatter {
2032            type_: Some("email".into()),
2033            summary: Some("renewal note".into()),
2034            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
2035            ..Default::default()
2036        };
2037        fm.extra
2038            .insert("from".into(), Value::String("elena@northstar.io".into()));
2039
2040        write_file(&path, &fm, body).unwrap();
2041
2042        let (read_fm, read_body) = read_file(&path).unwrap();
2043        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
2044        assert_eq!(read_fm.type_.as_deref(), Some("email"));
2045        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
2046        assert_eq!(
2047            read_fm.extra.get("from").and_then(|v| v.as_str()),
2048            Some("elena@northstar.io")
2049        );
2050        // The on-disk file starts with a fence and ends with the verbatim body.
2051        let raw = std::fs::read_to_string(&path).unwrap();
2052        assert!(raw.starts_with("---\n"));
2053        assert!(raw.ends_with(body));
2054    }
2055
2056    #[test]
2057    fn roundtrip_modify_summary_then_write_changes_only_summary() {
2058        let dir = tempdir().unwrap();
2059        let path = dir.path().join("records/contacts/sarah.md");
2060        let body = "Long-form operator notes about Sarah.\n";
2061        let fm = Frontmatter {
2062            type_: Some("contact".into()),
2063            summary: Some("old summary".into()),
2064            ..Default::default()
2065        };
2066        write_file(&path, &fm, body).unwrap();
2067
2068        // Read → modify summary → write back.
2069        let (mut fm2, body2) = read_file(&path).unwrap();
2070        fm2.summary = Some("new summary".into());
2071        write_file(&path, &fm2, &body2).unwrap();
2072
2073        let (fm3, body3) = read_file(&path).unwrap();
2074        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
2075        assert_eq!(fm3.type_.as_deref(), Some("contact"));
2076        assert_eq!(body3, body, "body unchanged across the round-trip");
2077    }
2078
2079    #[test]
2080    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
2081        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
2082        // a hand-written file carrying the canonical UNQUOTED scalar link
2083        // `company: [[records/companies/northstar]]`, read from disk then written
2084        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
2085        // value to the bracket-less block sequence `company:\n- - records/...`,
2086        // and every reader (validate/graph/backlinks) then lost the edge.
2087        let dir = tempdir().unwrap();
2088        let path = dir.path().join("records/contacts/sarah-chen.md");
2089        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
2090        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2091        std::fs::write(&path, file).unwrap();
2092
2093        // Read → write back unchanged (the canonical no-op re-emit).
2094        let (fm, body) = read_file(&path).unwrap();
2095        write_file(&path, &fm, &body).unwrap();
2096
2097        // On-disk bytes still carry the bracketed link, never `- - records/...`.
2098        let raw = std::fs::read_to_string(&path).unwrap();
2099        assert!(
2100            raw.contains("[[records/companies/northstar]]"),
2101            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
2102        );
2103        assert!(
2104            !raw.contains("- - "),
2105            "on-disk value became a nested block sequence; got:\n{raw}"
2106        );
2107
2108        // And the edge is still readable after the round-trip.
2109        let (fm2, _) = read_file(&path).unwrap();
2110        let fields = fm2.link_fields();
2111        let links: Vec<(&str, &str)> = fields
2112            .iter()
2113            .map(|(k, l)| (k.as_str(), l.target.as_str()))
2114            .collect();
2115        assert_eq!(links, vec![("company", "records/companies/northstar")]);
2116    }
2117
2118    #[test]
2119    fn write_file_does_not_leave_temp_files_behind() {
2120        let dir = tempdir().unwrap();
2121        let path = dir.path().join("records/x.md");
2122        let fm = Frontmatter {
2123            type_: Some("note".into()),
2124            ..Default::default()
2125        };
2126        write_file(&path, &fm, "body\n").unwrap();
2127        // The directory should contain only the target file, no `.x.md.tmp.*`.
2128        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
2129            .unwrap()
2130            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
2131            .collect();
2132        assert_eq!(entries, vec!["x.md".to_string()]);
2133    }
2134
2135    // ── is_content_file ──────────────────────────────────────────────────────
2136
2137    #[test]
2138    fn is_content_file_recognizes_layers_and_excludes_meta() {
2139        assert!(Frontmatter::is_content_file(Path::new(
2140            "sources/emails/2026-05-22.md"
2141        )));
2142        assert!(Frontmatter::is_content_file(Path::new(
2143            "records/contacts/sarah-chen.md"
2144        )));
2145        assert!(Frontmatter::is_content_file(Path::new(
2146            "wiki/people/sarah-chen.md"
2147        )));
2148        // Absolute paths under a layer are still content.
2149        assert!(Frontmatter::is_content_file(Path::new(
2150            "/home/db/records/companies/northstar.md"
2151        )));
2152        // index.md at any level is meta.
2153        assert!(!Frontmatter::is_content_file(Path::new(
2154            "records/contacts/index.md"
2155        )));
2156        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
2157        // Root meta files.
2158        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
2159        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
2160    }
2161
2162    // ── effective_id ─────────────────────────────────────────────────────────
2163
2164    #[test]
2165    fn effective_id_prefers_explicit_then_derives_from_path() {
2166        let with_id = Frontmatter {
2167            id: Some("explicit-id".into()),
2168            ..Default::default()
2169        };
2170        assert_eq!(
2171            with_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2172            "explicit-id"
2173        );
2174        let no_id = Frontmatter::default();
2175        assert_eq!(
2176            no_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2177            "sarah-chen"
2178        );
2179    }
2180
2181    // ── get / set ────────────────────────────────────────────────────────────
2182
2183    #[test]
2184    fn set_routes_universal_and_custom_keys() {
2185        let mut fm = Frontmatter::default();
2186        fm.set("type", "contact").unwrap();
2187        fm.set("summary", "hi").unwrap();
2188        fm.set("company", "[[records/companies/northstar]]")
2189            .unwrap();
2190        assert_eq!(fm.type_.as_deref(), Some("contact"));
2191        assert_eq!(fm.summary.as_deref(), Some("hi"));
2192        // Custom key landed in extra, not a typed slot.
2193        assert_eq!(
2194            fm.extra.get("company").and_then(|v| v.as_str()),
2195            Some("[[records/companies/northstar]]")
2196        );
2197        // get reads from both typed fields and extra.
2198        assert_eq!(
2199            fm.get("type").and_then(|v| v.as_str().map(String::from)),
2200            Some("contact".into())
2201        );
2202        assert_eq!(
2203            fm.get("company").and_then(|v| v.as_str().map(String::from)),
2204            Some("[[records/companies/northstar]]".into())
2205        );
2206        assert!(fm.get("nonexistent").is_none());
2207    }
2208
2209    #[test]
2210    fn set_timestamp_validates_rfc3339() {
2211        let mut fm = Frontmatter::default();
2212        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
2213        assert!(fm.created.is_some());
2214        let err = fm.set("updated", "not-a-date").unwrap_err();
2215        assert!(matches!(err, ParseError::BadTimestamp { .. }));
2216    }
2217
2218    // ── extract_wiki_links ───────────────────────────────────────────────────
2219
2220    #[test]
2221    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
2222        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[wiki/people/sarah-chen.md|Sarah]].\n";
2223        let links = extract_wiki_links(body, Path::new("doc.md"));
2224        assert_eq!(links.len(), 3);
2225
2226        // Full path, no extension, no display.
2227        assert_eq!(links[0].target, "records/contacts/sarah-chen");
2228        assert!(links[0].is_full_path);
2229        assert!(!links[0].has_md_extension);
2230        assert_eq!(links[0].display, None);
2231        assert_eq!(links[0].location.1, 1, "first link on line 1");
2232
2233        // Short form: not a full path.
2234        assert_eq!(links[1].target, "sarah-chen");
2235        assert!(!links[1].is_full_path, "bare target is short-form");
2236
2237        // Full path WITH .md extension and a display override on line 2.
2238        assert_eq!(links[2].target, "wiki/people/sarah-chen.md");
2239        assert!(links[2].is_full_path);
2240        assert!(links[2].has_md_extension);
2241        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2242        assert_eq!(links[2].location.1, 2);
2243    }
2244
2245    #[test]
2246    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2247        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
2248        let body = "café [[records/x/y]]";
2249        let links = extract_wiki_links(body, Path::new("d.md"));
2250        assert_eq!(links.len(), 1);
2251        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
2252        assert_eq!(links[0].location.2, 6);
2253    }
2254
2255    #[test]
2256    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
2257        let links = extract_wiki_links(
2258            "records/contacts/sarah-chen is not a link",
2259            Path::new("d.md"),
2260        );
2261        assert!(links.is_empty());
2262    }
2263
2264    // ── extract_markdown_links ───────────────────────────────────────────────
2265
2266    #[test]
2267    fn extract_markdown_links_captures_external_and_not_wiki_links() {
2268        let body =
2269            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
2270        let md = extract_markdown_links(body, Path::new("d.md"));
2271        assert_eq!(
2272            md.len(),
2273            1,
2274            "wiki-link must not be captured as a markdown link"
2275        );
2276        assert_eq!(md[0].text, "the thread");
2277        assert_eq!(md[0].url, "https://x.com/a");
2278        assert_eq!(md[0].location.1, 1);
2279
2280        // And the wiki-link extractor must not pick up the markdown link.
2281        let wl = extract_wiki_links(body, Path::new("d.md"));
2282        assert_eq!(wl.len(), 1);
2283        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
2284    }
2285
2286    // ── link_fields ──────────────────────────────────────────────────────────
2287
2288    #[test]
2289    fn link_fields_extracts_scalar_list_and_summary_links() {
2290        // The canonical list form quotes each item so YAML parses it as clean
2291        // strings; a scalar field may be quoted OR written in the canonical
2292        // unquoted inline form `company: [[x]]` (SPEC § Linking).
2293        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
2294        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2295        // Sanity: company really did parse as a scalar string here.
2296        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
2297        let fields = fm.link_fields();
2298
2299        // company (scalar) once, with the right target.
2300        let company: Vec<&str> = fields
2301            .iter()
2302            .filter(|(k, _)| k == "company")
2303            .map(|(_, l)| l.target.as_str())
2304            .collect();
2305        assert_eq!(company, vec!["records/companies/northstar"]);
2306        // attendees (block list) twice.
2307        let attendees: Vec<&str> = fields
2308            .iter()
2309            .filter(|(k, _)| k == "attendees")
2310            .map(|(_, l)| l.target.as_str())
2311            .collect();
2312        assert_eq!(
2313            attendees,
2314            vec!["records/contacts/elena", "records/contacts/sarah"]
2315        );
2316        // summary link surfaced.
2317        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
2318        // Plain-text field is not a link.
2319        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
2320    }
2321
2322    #[test]
2323    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
2324        // Regression: the canonical scalar wiki-link form is the *unquoted*
2325        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
2326        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
2327        // a naive `as_str()`-only walk drops it. link_fields() must still
2328        // surface exactly one link with the correct target.
2329        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
2330        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2331        // Sanity: it really did parse as the nested sequence form, NOT a string.
2332        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2333
2334        let fields = fm.link_fields();
2335        let links: Vec<(&str, &str, Option<&str>)> = fields
2336            .iter()
2337            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2338            .collect();
2339        assert_eq!(
2340            links,
2341            vec![("company", "records/companies/northstar", None)]
2342        );
2343
2344        // The `|display` segment survives the unquoted inline form too.
2345        let fm2 = Frontmatter::parse(
2346            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
2347            Path::new("m.md"),
2348        )
2349        .unwrap();
2350        let f2 = fm2.link_fields();
2351        assert_eq!(f2.len(), 1);
2352        assert_eq!(f2[0].0, "company");
2353        assert_eq!(f2[0].1.target, "records/companies/northstar");
2354        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
2355    }
2356
2357    #[test]
2358    fn link_fields_ignores_plain_one_item_flow_list() {
2359        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
2360        // — one nesting level shallower than an unquoted `[[foo]]` — and must
2361        // NOT be mistaken for a wiki-link.
2362        let yaml = "type: contact\naliases: [foo]";
2363        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2364        assert_eq!(fm.link_fields(), Vec::new());
2365    }
2366
2367    // ── detect_flow_form_link_lists ──────────────────────────────────────────
2368
2369    #[test]
2370    fn detect_flow_form_flags_list_misencodings_not_scalars() {
2371        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
2372        // inline wiki-link (double-nested) is NOT.
2373        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
2374        let flagged = detect_flow_form_link_lists(bad);
2375        assert_eq!(flagged, vec!["attendees".to_string()]);
2376
2377        // An UNquoted block list is also a mis-encoding (parses triple-nested).
2378        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
2379        assert_eq!(
2380            detect_flow_form_link_lists(unquoted_block),
2381            vec!["attendees".to_string()]
2382        );
2383
2384        // The canonical QUOTED block form parses to clean strings — NOT flagged.
2385        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
2386        assert!(detect_flow_form_link_lists(good).is_empty());
2387
2388        // A plain scalar list of strings is not flagged.
2389        let plain = "tags: [a, b, c]";
2390        assert!(detect_flow_form_link_lists(plain).is_empty());
2391    }
2392
2393    // ── extract_sections ─────────────────────────────────────────────────────
2394
2395    #[test]
2396    fn extract_sections_levels_nesting_and_boundaries() {
2397        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
2398        let secs = extract_sections(body);
2399        let headings: Vec<(&str, u8)> =
2400            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
2401        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
2402
2403        // "First" (H2) body extends through its H3 child, stopping at "Second".
2404        let first = &secs[0];
2405        assert!(first.body.contains("alpha"));
2406        assert!(first.body.contains("### Sub"));
2407        assert!(first.body.contains("beta"));
2408        assert!(!first.body.contains("Second"));
2409
2410        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
2411        let sub = &secs[1];
2412        assert!(sub.body.contains("beta"));
2413        assert!(!sub.body.contains("gamma"));
2414
2415        // 1-based line numbers within the body.
2416        assert_eq!(first.line, 2);
2417        assert_eq!(secs[2].line, 6);
2418    }
2419
2420    #[test]
2421    fn extract_sections_ignores_headings_in_fenced_code() {
2422        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
2423        let secs = extract_sections(body);
2424        assert_eq!(secs.len(), 1);
2425        assert_eq!(secs[0].heading, "Real");
2426        // The fenced "## Fake" is part of Real's body, not its own section.
2427        assert!(secs[0].body.contains("## Fake heading in code"));
2428    }
2429
2430    // ── parse_field_spec ─────────────────────────────────────────────────────
2431
2432    #[test]
2433    fn parse_field_spec_required_and_shape() {
2434        let f = parse_field_spec("- email (required, email)");
2435        assert_eq!(f.name, "email");
2436        assert!(f.required);
2437        assert_eq!(f.shape, Some(Shape::Email));
2438        assert!(f.unknown_modifiers.is_empty());
2439    }
2440
2441    #[test]
2442    fn parse_field_spec_link_prefix_strips_trailing_slash() {
2443        let f = parse_field_spec("- company (required, link to records/companies/)");
2444        assert!(f.required);
2445        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
2446        assert_eq!(f.shape, None);
2447    }
2448
2449    #[test]
2450    fn parse_field_spec_default_preserves_case_and_value() {
2451        let f = parse_field_spec("- currency (default USD)");
2452        assert_eq!(f.name, "currency");
2453        assert_eq!(f.default, Some(Value::String("USD".into())));
2454    }
2455
2456    #[test]
2457    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
2458        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
2459        assert!(f.required);
2460        assert_eq!(
2461            f.enum_values,
2462            Some(vec![
2463                "open".to_string(),
2464                "closed".to_string(),
2465                "pending".to_string()
2466            ])
2467        );
2468    }
2469
2470    #[test]
2471    fn parse_field_spec_bare_enum_keyword_is_not_itself_a_value() {
2472        // `enum` with no colon: the values are the remaining tokens; the keyword
2473        // itself must NOT leak in as an allowed value.
2474        let f = parse_field_spec("- status (required, enum, open, closed)");
2475        assert!(f.required);
2476        assert_eq!(
2477            f.enum_values,
2478            Some(vec!["open".to_string(), "closed".to_string()])
2479        );
2480    }
2481
2482    #[test]
2483    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
2484        let f = parse_field_spec("- weird (required, frobnicate, string)");
2485        assert!(f.required);
2486        assert_eq!(f.shape, Some(Shape::String));
2487        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
2488    }
2489
2490    #[test]
2491    fn parse_field_spec_no_parens_is_freeform_optional() {
2492        let f = parse_field_spec("- nickname");
2493        assert_eq!(f.name, "nickname");
2494        assert!(!f.required);
2495        assert_eq!(f.shape, None);
2496        assert!(f.link_prefix.is_none());
2497        assert!(f.enum_values.is_none());
2498        assert!(f.unknown_modifiers.is_empty());
2499    }
2500
2501    // ── parse_schema_bullet (directives) ─────────────────────────────────────
2502
2503    #[test]
2504    fn schema_bullet_unique_single_field() {
2505        match parse_schema_bullet("- unique: email") {
2506            SchemaBullet::Unique(fields) => assert_eq!(fields, vec!["email".to_string()]),
2507            other => panic!("expected Unique, got {other:?}"),
2508        }
2509    }
2510
2511    #[test]
2512    fn schema_bullet_unique_compound_trims_and_splits() {
2513        match parse_schema_bullet("- unique: date, amount , vendor") {
2514            SchemaBullet::Unique(fields) => assert_eq!(
2515                fields,
2516                vec![
2517                    "date".to_string(),
2518                    "amount".to_string(),
2519                    "vendor".to_string()
2520                ]
2521            ),
2522            other => panic!("expected Unique, got {other:?}"),
2523        }
2524    }
2525
2526    #[test]
2527    fn schema_bullet_summary_template_keeps_braces_and_inner_colons() {
2528        match parse_schema_bullet("- summary_template: {role} at {company} (x: y)") {
2529            SchemaBullet::SummaryTemplate(t) => assert_eq!(t, "{role} at {company} (x: y)"),
2530            other => panic!("expected SummaryTemplate, got {other:?}"),
2531        }
2532    }
2533
2534    #[test]
2535    fn schema_bullet_field_with_enum_modifier_is_not_a_directive() {
2536        // A field whose modifiers contain a colon (`enum:`) parses as a field, not
2537        // a directive — its head has a `(` before any `:`.
2538        match parse_schema_bullet("- status (enum: open, closed)") {
2539            SchemaBullet::Field(f) => {
2540                assert_eq!(f.name, "status");
2541                assert_eq!(
2542                    f.enum_values,
2543                    Some(vec!["open".to_string(), "closed".to_string()])
2544                );
2545            }
2546            other => panic!("expected Field, got {other:?}"),
2547        }
2548    }
2549
2550    #[test]
2551    fn parse_db_md_schema_captures_unique_and_summary_template() {
2552        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### contact\n- email (required, email)\n- unique: email\n- summary_template: {role} at {company}\n";
2553        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2554        let s = config.schemas.get("contact").expect("contact schema");
2555        assert_eq!(s.fields.len(), 1, "directives are not parsed as fields");
2556        assert_eq!(s.unique_keys, vec![vec!["email".to_string()]]);
2557        assert_eq!(s.summary_template.as_deref(), Some("{role} at {company}"));
2558    }
2559
2560    #[test]
2561    fn schema_bullet_shard_directive_parses_values() {
2562        assert!(matches!(
2563            parse_schema_bullet("- shard: by-date"),
2564            SchemaBullet::Shard(Some(true))
2565        ));
2566        assert!(matches!(
2567            parse_schema_bullet("- shard: flat"),
2568            SchemaBullet::Shard(Some(false))
2569        ));
2570        // An unrecognized value is ignored (None), like an unknown modifier.
2571        assert!(matches!(
2572            parse_schema_bullet("- shard: weekly"),
2573            SchemaBullet::Shard(None)
2574        ));
2575        // A field whose name has a `(` before any `:` is still a field — the same
2576        // guard that keeps `- status (enum: a, b)` a field, not a directive.
2577        assert!(matches!(
2578            parse_schema_bullet("- shardiness (string)"),
2579            SchemaBullet::Field(_)
2580        ));
2581    }
2582
2583    #[test]
2584    fn parse_db_md_schema_captures_shard_directive() {
2585        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### shipment\n- carrier (string)\n- shard: by-date\n\n### contact\n- shard: flat\n";
2586        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2587        let shipment = config.schemas.get("shipment").expect("shipment schema");
2588        assert_eq!(shipment.shard, Some(true));
2589        assert_eq!(
2590            shipment.fields.len(),
2591            1,
2592            "`shard:` is a directive, not a field"
2593        );
2594        assert_eq!(config.schemas.get("contact").unwrap().shard, Some(false));
2595    }
2596
2597    // ── parse_db_md ──────────────────────────────────────────────────────────
2598
2599    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `wiki/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
2600
2601    #[test]
2602    fn parse_db_md_extracts_all_canonical_sections() {
2603        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
2604
2605        // Agent instructions: free-form prose, heading line stripped.
2606        let ai = config
2607            .agent_instructions
2608            .expect("agent instructions present");
2609        assert!(ai.starts_with("Prioritize creating"));
2610        assert!(!ai.contains("## Agent instructions"));
2611
2612        // Frozen pages: paths extracted from backticked bullets, comments dropped.
2613        assert_eq!(
2614            config.frozen_pages,
2615            vec![
2616                PathBuf::from("records/decisions/2026-q1-strategy.md"),
2617                PathBuf::from("wiki/synthesis/2026-annual-plan.md"),
2618            ]
2619        );
2620
2621        // Ignored types: comma list, backticks/comment stripped.
2622        assert_eq!(
2623            config.ignored_types,
2624            vec!["test".to_string(), "temp".to_string()]
2625        );
2626
2627        // Schemas: two types, each with its fields in source order.
2628        assert_eq!(config.schemas.len(), 2);
2629        let contact = config.schemas.get("contact").expect("contact schema");
2630        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
2631        assert_eq!(names, vec!["name", "email", "company", "role"]);
2632        assert!(contact.fields[0].required); // name
2633        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
2634        assert_eq!(
2635            contact.fields[2].link_prefix,
2636            Some(PathBuf::from("records/companies"))
2637        ); // company
2638
2639        let expense = config.schemas.get("expense").expect("expense schema");
2640        let cur = expense
2641            .fields
2642            .iter()
2643            .find(|f| f.name == "currency")
2644            .unwrap();
2645        assert_eq!(cur.default, Some(Value::String("USD".into())));
2646    }
2647
2648    #[test]
2649    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
2650        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
2651        // unknown modifier, and bullets that appear with NO `### <type>`
2652        // heading (so they belong to no schema and are dropped).
2653        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
2654        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2655
2656        // The orphan bullet under `## Schemas` with no `### type` heading is not
2657        // captured as a schema.
2658        assert_eq!(config.schemas.len(), 1);
2659        let ticket = config.schemas.get("ticket").expect("ticket schema");
2660        assert_eq!(ticket.fields.len(), 2);
2661
2662        let priority = &ticket.fields[0];
2663        assert!(priority.required);
2664        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
2665        assert_eq!(
2666            priority.enum_values,
2667            Some(vec!["low".to_string(), "high".to_string()])
2668        );
2669
2670        // A bullet with an unclosed paren still yields a usable name.
2671        let broken = &ticket.fields[1];
2672        assert_eq!(broken.name, "broken");
2673    }
2674
2675    #[test]
2676    fn parse_db_md_missing_frontmatter_errors() {
2677        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
2678        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
2679        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2680    }
2681
2682    #[test]
2683    fn parse_db_md_absent_sections_default_empty() {
2684        let text = "---\ntype: db-md\n---\n\n# Title only\n";
2685        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2686        assert_eq!(config, Config::default());
2687    }
2688
2689    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
2690
2691    /// `Frontmatter::set` is the value path every write surface (`fm set`,
2692    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
2693    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
2694    /// quoted links — readable back by [`links_in_field_value`] and accepted by
2695    /// `dbmd validate` — never the flow-form scalar string that trips
2696    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
2697    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
2698    #[test]
2699    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
2700        for value in [
2701            "[[[records/contacts/a]], [[records/contacts/b]]]",
2702            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
2703        ] {
2704            let mut fm = Frontmatter::default();
2705            fm.set("attendees", value).unwrap();
2706
2707            // Stored as a 2-element sequence of clean quoted links.
2708            let stored = fm.extra.get("attendees").expect("attendees set");
2709            let Value::Sequence(items) = stored else {
2710                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
2711            };
2712            assert_eq!(items.len(), 2, "input {value}");
2713            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
2714            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
2715
2716            // The edge enumerator reads exactly the two links back (no stray
2717            // bracket targets, the flow-form-string symptom).
2718            let links: Vec<_> = links_in_field_value(stored)
2719                .into_iter()
2720                .map(|l| l.target)
2721                .collect();
2722            assert_eq!(
2723                links,
2724                vec!["records/contacts/a", "records/contacts/b"],
2725                "input {value}"
2726            );
2727
2728            // And the canonical writer renders it block-style, not as a scalar.
2729            let yaml = fm.to_yaml();
2730            assert!(
2731                yaml.contains("attendees:\n"),
2732                "expected block list in:\n{yaml}"
2733            );
2734            assert!(
2735                !yaml.contains("attendees: '[["),
2736                "must not be a flow-form scalar string in:\n{yaml}"
2737            );
2738        }
2739    }
2740
2741    /// A *single* inline wiki-link stays a scalar string (renders inline
2742    /// `field: [[x]]`), and a single link must never be widened to a one-item
2743    /// list — preserving the common `contact.company` / `expense.vendor` shape.
2744    #[test]
2745    fn set_single_inline_wiki_link_stays_scalar() {
2746        let mut fm = Frontmatter::default();
2747        fm.set("company", "[[records/companies/tideform]]").unwrap();
2748        assert_eq!(
2749            fm.extra.get("company"),
2750            Some(&Value::String("[[records/companies/tideform]]".into())),
2751        );
2752        // Still recognized as one link.
2753        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
2754            .into_iter()
2755            .map(|l| l.target)
2756            .collect();
2757        assert_eq!(links, vec!["records/companies/tideform"]);
2758    }
2759
2760    /// Plain text and a non-link flow list are left as verbatim scalar strings —
2761    /// the list normalization only triggers when every item is a clean wiki-link.
2762    #[test]
2763    fn set_non_link_values_stay_scalar_strings() {
2764        let mut fm = Frontmatter::default();
2765        fm.set("location", "Video call (remote)").unwrap();
2766        assert_eq!(
2767            fm.extra.get("location"),
2768            Some(&Value::String("Video call (remote)".into())),
2769        );
2770
2771        // A flow list whose items are NOT wiki-links must not be reinterpreted as
2772        // a link sequence; it stays the scalar string the agent passed.
2773        fm.set("note", "[draft, wip]").unwrap();
2774        assert_eq!(
2775            fm.extra.get("note"),
2776            Some(&Value::String("[draft, wip]".into()))
2777        );
2778    }
2779}